Compare commits

...

260 Commits

Author SHA1 Message Date
Benoit Jacob
3f79884f03 bump to 2.92.0 2010-10-15 09:46:20 -04:00
Benoit Jacob
26129229ec doc updates/improvements 2010-10-15 09:44:43 -04:00
Benoit Jacob
fcee1903be update the porting guide 2010-10-15 08:48:44 -04:00
Benoit Jacob
6dc478fd77 doc typo 2010-10-14 10:19:46 -04:00
Benoit Jacob
65c01e2bf7 JacobiSVD doc fix 2010-10-14 10:17:40 -04:00
Benoit Jacob
8f0e80fe30 JacobiSVD:
* fix preallocating constructors, allocate U and V of the right size for computation options
  * complete documentation and internal comments
  * improve unit test, test inf/nan values
2010-10-14 10:14:43 -04:00
Gael Guennebaud
47197065da compilation fix 2010-10-14 10:19:55 +02:00
Benoit Jacob
bcb9068268 fix bug #44: use VERIFY_IS_APPROX instead of exact comparison to please x87 extended precision 2010-10-13 09:40:57 -04:00
Benoit Jacob
c8ecc897c0 add EIGEN_TEST_X87 option 2010-10-13 09:04:59 -04:00
Gael Guennebaud
3a2bb7f782 fix compilation and warnings with fcc 4.0.1 2010-10-13 10:21:28 +02:00
Gael Guennebaud
bf402dd9b8 add the possibility to disable OpenGL testing 2010-10-12 20:23:52 +02:00
Benoit Jacob
8eb0fc1e72 remove SVD class (was bad code taked from elsewhere)
Use JacobiSVD for now.
We do plan to reintroduce a bidiagonalizing SVD asap.
2010-10-12 10:19:59 -04:00
Benoit Jacob
dbedc70012 Jacobi improvements:
* add fixed-size vectorized path
  * add missing restrict keywords
  * use innerStride()
  * allow vectorization even if innerStride()>1, if PacketSize==1
    (think of the case of rows of std::complex<double>)
2010-10-12 09:58:53 -04:00
Benoit Jacob
12a152031d fix the Jacobi bug, expand unit test 2010-10-12 09:43:40 -04:00
Benoit Jacob
75e60121f4 add Jacobi unit test. jacobi_5 fails, exposing bug #39. 2010-10-12 09:12:36 -04:00
Gael Guennebaud
0308f64515 add support for uniform of double 2010-10-12 11:04:19 +02:00
Gael Guennebaud
fb30bb9e59 uncomment commented line for debug 2010-10-12 10:40:42 +02:00
Gael Guennebaud
20be8ad91e add support for uniforms 2010-10-12 10:39:28 +02:00
Benoit Jacob
b8bb804007 set ColPivHouseholderQR as default preconditioner for JacobiSVD 2010-10-11 21:00:42 -04:00
Benoit Jacob
5c3d21693b implement JacobiSVD::solve() and expand the unit test 2010-10-11 15:36:04 -04:00
Gael Guennebaud
eb105cace8 compilation fix 2010-10-08 22:51:10 +02:00
Benoit Jacob
d229f99ba2 adapt Quaternion to JacobiSVD API changes. 2010-10-08 10:42:41 -04:00
Benoit Jacob
8ba8d90063 add option to compute thin U/V.
By default nothing is computed. You have to ask explicitly for thin/full U/V if you want them.
2010-10-08 10:42:40 -04:00
Benoit Jacob
6fad2eb97b Rework JacobiSVD api / template parameters.
There is now an integer QRPreconditioner template parameter, defaulting to full-piv QR.
Since we have to special-case each QR dec anyway, a template template parameter didn't add much value here.
There is an option NoQRPreconditioner if you know your matrices are already square (auto-detected for fixed-size matrices).
2010-10-08 10:42:32 -04:00
Benoit Jacob
58e0cce0f7 merge backout 2010-10-08 10:42:25 -04:00
Benoit Jacob
4a98cada26 Backed out changeset 2334291157
Sorry Thomas, these doc fixes are no longer relevant with the JacobiSVD API changes, and they are preventing me from applying my patches cleanly.
2010-10-08 10:42:06 -04:00
Gael Guennebaud
a76ce042e6 MSVC for windows mobile does not have the errno.h file 2010-10-07 18:09:15 +02:00
Gael Guennebaud
af22364988 an attempt to fix compilation on windows mobile 2010-10-07 17:54:46 +02:00
Gael Guennebaud
d9c131de5b remove the Taucs backend : Taucs is not maintained anymore and the backend was crap anyway 2010-10-06 17:42:17 +02:00
Gael Guennebaud
423f88aa1e improve FindCholmod 2010-10-06 17:38:02 +02:00
Romain Bossart
c6503e03eb Updates to the Sparse unsupported solvers module.
* change Sparse* specialization's signatures from <..., int Backend> to <..., typename Backend>. Update SparseExtra accordingly to use structs instead of the SparseBackend enum.
* add SparseLDLT Cholmod specialization
* for Cholmod and UmfPack, SparseLU, SparseLLT and SparseLDLT now use ei_solve_retval and have the new solve() method (to be closer to the 3.0 API).

* fix doc
2010-10-04 20:56:54 +02:00
Gael Guennebaud
e3d01f85b2 extend OpenGL support module with true unit tests and support for Transform, Translation, etc. 2010-10-06 13:28:13 +02:00
Gael Guennebaud
b5f32830fd fix geometry tutorial regarding the need to specify the "mode" 2010-10-06 13:27:14 +02:00
Gael Guennebaud
01fad14d78 mark LLT/LDLT solveInPlace func internal and rm their boolean returned value 2010-10-05 15:56:50 +02:00
Thomas Capricelli
2334291157 fix doc 2010-10-04 04:08:32 +02:00
Benoit Jacob
71f023de3e fix compilation on ubuntu 9.04's version of gcc 4.3 (yes, wtf) 2010-09-27 09:57:57 -04:00
Radu Bogdan Rusu
94ea1eed9a fix warning 2010-09-27 09:56:54 -04:00
Hauke Heibel
327ed3d1d3 Added a note to the Gram Schmidt code and improved some formatting. 2010-09-25 14:15:35 +02:00
Hauke Heibel
72d4d45133 Merge. 2010-09-24 17:34:49 +02:00
Hauke Heibel
316dadc8e4 Fixed some SVD issues.
Make the SVD's output unitary.
Improved unit tests.
Added an assert to the SVD ctor to check whether rows>=cols.
2010-09-24 17:32:44 +02:00
Hauke Heibel
053261de88 Make the SVD's output unitary and improved unit tests. 2010-09-24 16:28:20 +02:00
Benoit Jacob
1c54514bfc merge 2010-09-23 09:53:21 -04:00
Benoit Jacob
c253cc3d53 SVD:
* fix unit test for rectangular matrices.
 * enforce that rows >= cols since various places in the code assume that.
2010-09-23 09:51:08 -04:00
Hauke Heibel
947f84633b Fixed bad memory access in the SVD. 2010-09-23 11:15:36 +02:00
Hauke Heibel
62bf04b339 Fixed bad memory access in the SVD. 2010-09-23 11:15:36 +02:00
Gael Guennebaud
82e4a16759 remove superfluous #ifdef 2010-09-15 15:24:21 +02:00
Benoit Jacob
77c943670e add cmakelists for 2 subdirs and make sure all subdirs are installed (GLOB) 2010-09-14 04:11:15 -04:00
Gael Guennebaud
91e9344be9 fix vectorization logic and code of cross3 which was never enabled.. 2010-09-08 14:10:01 +02:00
Gael Guennebaud
f9123df772 fix unitialized quaternion 2010-09-08 12:57:33 +02:00
Gael Guennebaud
d591b0466d add a bench to compare various transformation methods 2010-09-07 18:21:36 +02:00
Gael Guennebaud
9bb75937cc fix += return by value like operations 2010-09-06 11:51:42 +02:00
Gael Guennebaud
62eb4dc99b noalias was wrongly skipping automatic transposition 2010-09-02 19:18:34 +02:00
Gael Guennebaud
4824db6444 add the possibility to extend QuaternionBase 2010-09-02 17:28:07 +02:00
Eamon Nerbonne
d17bb02ccd Fixes mingw32 compile issues 2010-09-02 10:38:23 +02:00
Gael Guennebaud
e0ea25fc21 add missing copyrights 2010-09-01 12:59:38 +02:00
Gael Guennebaud
b49dde01dc fix bad mat * mat * scalar when the implicit conversion operator to a Matrix is used 2010-08-31 09:54:38 +02:00
Hauke Heibel
dd94f10442 Docs: Improved the docs for writing functions taking Eigen types.
- Removed the wrong statement about the MSVC compiler.
- Reformulated "simple functions" usage.
- Reformulated the summary paragraph about writable parameters.
2010-08-27 08:19:09 +02:00
Gael Guennebaud
dcff9ba785 fix bad "using typename" 2010-08-25 13:34:35 +02:00
Gael Guennebaud
cb7a72d5b0 Fix Sun CC parsing of Eigen/Core. In particular,
I moved all the block related methods to a plugin file. This also
significantly reduce code verbosity.
2010-08-25 13:09:56 +02:00
Benoit Jacob
e17d17cea3 didn't want to commit that bench change. 2010-08-24 10:57:22 -04:00
Benoit Jacob
bd8d06033d make a couple of typedefs public so stuff compiles 2010-08-24 10:53:33 -04:00
Gael Guennebaud
a47bbf664c fix 4x4 SSE inversion when storage orders don't match 2010-08-24 13:00:59 +02:00
Gael Guennebaud
548ecc2fe5 update inverse unit test to highlight another bug in SSE 4x4 inversion code 2010-08-24 12:38:20 +02:00
Gael Guennebaud
ad9a7c69bc fix inversion of 4x4 unaligned matrices 2010-08-24 12:28:42 +02:00
Benoit Jacob
6924d4eec5 update this test to build against current eigen.
remove the 'normal' path as it was not compiling anymore and I couldn't see the point of it (?)
2010-08-23 23:21:25 -04:00
Gael Guennebaud
6261f4629f add TriangularMatrix::conjugate to be consistent since we have adjoint 2010-08-23 23:38:35 +02:00
Jitse Niesen
474c2996bd Docs: add section on resolving the aliasing issue. 2010-08-23 17:23:30 +01:00
Jitse Niesen
d1111d625c Docs: Typos in ArrayBase doxygen comments 2010-08-23 11:44:51 +01:00
Jitse Niesen
103b9351fd Docs: Add references to TopicClassHierarchy 2010-08-22 18:28:19 +01:00
Jitse Niesen
a6da803873 Document DenseCoeffsBase 2010-08-22 17:30:31 +01:00
Hauke Heibel
60aad09878 Fixed DiagonalMatrix assignment. 2010-08-21 16:34:46 +02:00
Hauke Heibel
92b1674c79 Fixed typos. 2010-08-19 20:11:06 +02:00
Hauke Heibel
610d79e686 Simplified to product templates to a minimum of template parameters.
Removed the ei_is_any_projective helper and added ei_transform_traits.
2010-08-19 20:02:46 +02:00
Hauke Heibel
a64aabf73c Removed unused code. 2010-08-19 19:33:13 +02:00
Hauke Heibel
55c7848877 Matrix product refactoring (rhs products only).
Added strong inlines required for MSVC for proper inlining.
Added specializations for DiagonalMatrix products to RotationBase.
Added left- and righ-hand-side products with DiagonalMatrix to Transform.
RHS Transform products now return Matrix objects only.
Split the geo_transformations unit test. Some tests were not made for projectivities.
Removed unused variables from main.h that caused warnings.
2010-08-19 19:25:35 +02:00
Gael Guennebaud
d4b664c4cd fix ugly conversion from double[2] to complex 2010-08-19 14:47:58 +02:00
Gael Guennebaud
5354ffbb4f add missing specialization for vector * selfadjoint 2010-08-19 14:05:21 +02:00
Gael Guennebaud
6264755dd3 merge 2010-08-18 15:34:55 +02:00
Gael Guennebaud
ab41c18d60 quickly mention how to solve a sparse problem 2010-08-18 15:33:58 +02:00
Benoit Jacob
216c9125e9 disable NonLinearOptimization test until it's fixed 2010-08-18 09:11:01 -04:00
Gael Guennebaud
ddbbd7065d * disable unalignment detection when vectorization is not enabled
* revert MapBase unalignment detection
2010-08-18 09:35:55 +02:00
Hauke Heibel
85fdcdf055 Fixed Geometry module failures.
Removed default parameter from Transform.
Removed the TransformXX typedefs.
Removed references to TransformXX from unit tests and docs.
Assigning Transforms to a sub-group is now forbidden at compile time.
Products should now properly support the Isometry flag.
Fixed alignment checks in MapBase.
2010-08-17 20:03:50 +02:00
Benoit Jacob
87aafc9169 fix Transform() constructor taking a Transform with other mode.
Not really tested as the geometry tests are currently busted.
2010-08-16 12:30:33 -04:00
Benoit Jacob
19d9c835e0 fix warnings 2010-08-16 11:11:43 -04:00
Gael Guennebaud
b37551f62a further improve compilation error message for array+=matrix 2010-08-16 11:13:02 +02:00
Gael Guennebaud
c625a6a85b improve compilation error message for array+=matrix and the likes 2010-08-16 11:07:17 +02:00
Gael Guennebaud
453d54325e fix declaration of AffineTransformType in Translation 2010-08-16 10:44:27 +02:00
Gael Guennebaud
ba212aeaa9 fix missdetection of GLUT 2010-08-16 09:50:24 +02:00
Gael Guennebaud
aa2b46aa91 allow vectorization of mat44.col() by adding a InnerPanel boolean
template parameter to Block
2010-07-23 16:29:29 +02:00
Gael Guennebaud
853c0e15df slightly generalize the alignment assert in MapBase 2010-08-16 09:41:07 +02:00
Gael Guennebaud
8566ef805b remove the aligned bit flag for non vectorizable types 2010-08-16 09:38:49 +02:00
Benoit Jacob
3a30a2bc3e forgot to remove a #endif 2010-08-13 14:03:38 -04:00
Benoit Jacob
b80d9dd42e fix determination of number of registers on sse:
__i386__ was not defined by MSVC 2010.
fixed as (2*sizeof(void*)).
also move that to SSE/ and let the default for unknown arch's be just 8.
2010-08-13 13:55:28 -04:00
Benoit Jacob
8bbe556e35 merge the backout 2010-08-11 00:06:31 -04:00
Benoit Jacob
97ced33b33 Backed out changeset 40f6e26a24
See thread on mailing list: "InnerPanel change mis-detects alignment?"
2010-08-11 00:04:06 -04:00
Jitse Niesen
76fbe94279 Document EIGEN_NO_DEBUG macro.
I needed some doxygen tricks to get this to work, so it may not be worth it.
2010-08-10 11:37:23 +01:00
Jitse Niesen
530b328769 Aliasing doc: explain that some cases are detected, reverse order examples. 2010-08-08 21:20:14 +01:00
Hauke Heibel
3dd8225862 Added more detailed docs to the QR decompositions classes. 2010-08-05 08:56:19 +02:00
Benoit Jacob
976d7c19e8 some small improvements to the page on functions taking eigen objects.
- make the beginning more precise
 - make the first example be a full selfcontained compiled example, no need for all the others, but having the first one doesn't hurt.
2010-08-04 21:42:32 -04:00
Hauke Heibel
5c7cb3c05c Added more examples to the function writing tutorial including EigenBase, DenseBase, etc. 2010-08-04 17:50:46 +02:00
Hauke Heibel
d558e84f0b Fixed some typos and reformulated a few sentences. 2010-08-04 16:40:33 +02:00
Hauke Heibel
224dd66e10 Added a tutorial on writing functions taking Eigen types. 2010-08-04 12:01:19 +02:00
Benoit Jacob
d90d7a006f fix warnings. The one in Reverse was potentially serious: coeff() methods should return CoeffReturnType, not "Scalar", if the expression is potentially a Lvalue. 2010-08-03 10:38:48 -04:00
Hauke Heibel
cc25edd5de Fixed Affine transform typedef. 2010-08-02 21:33:48 +02:00
Jitse Niesen
508b51cb62 Add page giving an overview of the class hierarchy.
This is mostly copied from the wiki, which in turn copies Benoit's email at
http://listengine.tuxfamily.org/lists.tuxfamily.org/eigen/2010/06/msg00576.html
I used ASCII art for the inheritance diagrams for now, but I don't mind
moving to GraphViz/dot as discussed earlier.
2010-08-02 11:36:44 +01:00
Jitse Niesen
a9fe75efc4 Documentation: Start special topic page on aliasing. 2010-07-31 21:37:29 +01:00
Hauke Heibel
7cefa75901 Added static method Identity() to the Translation class. 2010-07-29 17:30:37 +02:00
Hauke Heibel
e92993d7b9 Safeguarded some Transform functions with compile time asserts.
Added missing static Identity() to Rotation2D, AngleAxis.
2010-07-29 16:17:42 +02:00
Hauke Heibel
6b89ee0095 Transform is now per default Projective.
Improved invert() in the Transform class.
RotationBase offers matrix() to be conform with Transform's naming scheme.
Added Translation::translation() to be conform with Transform's naming scheme.
2010-07-29 15:54:32 +02:00
Hauke Heibel
2f0e8904f1 Removed debug outputs. 2010-07-28 10:47:58 +02:00
Kenneth Riddile
b038a4bb71 * added EIGEN_ALIGNED_ALLOCATOR macro to allow specifying a different aligned allocator
* attempted to add support for std::deque by copying and modifying the std::vector implementation...MSVC still fails to compile with the std::deque::resize() "will not be aligned" error...probably missing something simple but I'm not sure how to make it work
2010-07-26 19:06:47 -04:00
Jitse Niesen
1420f8b3a1 Several changes in comments to keep Doxygen happy. 2010-07-25 20:29:07 +01:00
Jitse Niesen
3d9764ee24 Add some more examples for the API documentation.
The only missing examples now are for homogeneous() and hnormalized();
I don't know what they're used for ...
2010-07-24 16:43:07 +01:00
Jitse Niesen
425444428c Add examples for API documentation of block methods in DenseBase. 2010-07-23 22:20:00 +01:00
Jitse Niesen
2b5a0060b4 Add examples for API documentation of MatrixBase::cwiseXxx() methods. 2010-07-23 20:32:33 +01:00
Jitse Niesen
072ee3c07d Set Doxygen config variable INCLUDE_PATH to plugins directory.
This is necessary to get functions like MatrixBase::cwiseAbs() documented;
otherwise doxygen can't find the include file in which they are defined.
2010-07-23 19:57:21 +01:00
Jitse Niesen
ae8425c74c Tutorial page 7: more typical example for .all(), minor copy-editing. 2010-07-23 19:20:10 +01:00
User Martin Senst
145830e067 Add newline at the end of Dense. 2010-07-23 19:00:02 +02:00
Gael Guennebaud
40f6e26a24 allow vectorization of mat44.col() by adding a InnerPanel boolean
template parameter to Block
2010-07-23 16:29:29 +02:00
Jitse Niesen
d0f6b1c21f Tutorial page 6: Fix typo, add table of contents. 2010-07-22 21:52:04 +01:00
Gael Guennebaud
9daa66f262 fix merge conflicts 2010-07-22 17:23:11 +02:00
Gael Guennebaud
5d98fa235d merge with complex branch 2010-07-22 16:57:14 +02:00
Jitse Niesen
403e672587 Extend tutorial page 5: Advanced initialization. 2010-07-22 15:53:21 +01:00
Gael Guennebaud
7020f30da3 sync with default branch 2010-07-22 16:29:35 +02:00
Gael Guennebaud
b9edd6fb85 oops 2010-07-22 16:24:01 +02:00
Gael Guennebaud
96ba7cd655 add an OpenGL module simplifying the way you can pass Eigen's objects to GL 2010-07-22 16:08:58 +02:00
Gael Guennebaud
fa6d36e0f7 fix SparseView: clean the nested matrix type 2010-07-22 15:57:01 +02:00
Hauke Heibel
734469e43f Unified LinSpaced in order to be conform with other setter methods as e.g. Constant. 2010-07-22 14:04:00 +02:00
Gael Guennebaud
c7f40e522e merge 2010-07-22 13:21:06 +02:00
Gael Guennebaud
06250a154c add matlab-like mixed product 2010-07-22 13:19:09 +02:00
Gael Guennebaud
bec3f9bfe4 rename indices to a common scheme 2010-07-22 13:17:39 +02:00
Gael Guennebaud
0916d69ca5 fix inner vectorization logic 2010-07-22 13:17:12 +02:00
Gael Guennebaud
0dfc5b296b fix strict aliasing issue 2010-07-22 13:16:53 +02:00
Gael Guennebaud
8a96b0080d now that we properly support mixing real-complex: clean mixingtypes test 2010-07-22 13:15:49 +02:00
Thomas Capricelli
8e21cef80a fix typo 2010-07-22 13:15:15 +02:00
Gael Guennebaud
4393f20fea fix compilation of quaternion demo 2010-07-21 17:34:32 +02:00
Gael Guennebaud
f1104a3b0f fix mandelbrot compilation, and make it use Array instead of Matrix 2010-07-21 17:13:02 +02:00
Gael Guennebaud
35f0bc70d8 fix a strict aliasing issue with gcc 4.3 2010-07-20 22:43:55 +02:00
Gael Guennebaud
b5f2b7d087 fix storage order request 2010-07-20 22:08:48 +02:00
Gael Guennebaud
7dbbc6ffd1 fix static allocation of workspace 2010-07-20 17:06:14 +02:00
Gael Guennebaud
ced1a45f82 add NEON ploaddup and pcplxflip functions 2010-07-20 14:24:01 +02:00
Gael Guennebaud
193eedbfe2 one more fix for openmp 2010-07-20 14:19:00 +02:00
Gael Guennebaud
d7fa09bf05 improve block-size heuristic 2010-07-20 13:23:50 +02:00
Gael Guennebaud
4824ac1363 fix openmp version 2010-07-20 13:23:19 +02:00
Gael Guennebaud
b551a2d77a fix declaration of pack_lhs in trsm 2010-07-20 12:58:22 +02:00
Gael Guennebaud
10a7668035 uncomment commented code for debug 2010-07-20 12:57:46 +02:00
Gael Guennebaud
7b23fad4c9 report a true assert when not checking for an assertion 2010-07-20 12:54:53 +02:00
Gael Guennebaud
44cb1e4802 it appears only the "on the left" case was tested 2010-07-20 10:32:56 +02:00
Gael Guennebaud
872523844a fix trmm and symm wrt lhs packing 2010-07-20 10:06:41 +02:00
Gael Guennebaud
76eb9c9fd9 fix compilation by including file in correct order 2010-07-19 23:32:13 +02:00
Gael Guennebaud
70b1ce11c6 * fix SelfCwiseBinaryOp traits and handling of mixed types
* improve compilation error in case of type mismatch
2010-07-19 23:31:08 +02:00
Gael Guennebaud
8b0b121c9e explicitely disable vectorization for mixed coeff based products 2010-07-19 23:28:57 +02:00
Gael Guennebaud
08c841eb87 fix lhs packing in the case of real * complex products 2010-07-19 23:16:03 +02:00
Gael Guennebaud
1ed4233fd2 port Jacobi to new ei_pset1/ei_pload API 2010-07-19 16:51:38 +02:00
Gael Guennebaud
c2ee454df4 * fix compilation of mixed scalar product
* optimize mixed scalar products
2010-07-19 16:49:09 +02:00
Gael Guennebaud
6e157dd7c6 * fix a couple of remaining issues with previous commit,
* merge ei_product_blocking_traits into ei_gepb_traits
2010-07-19 15:45:13 +02:00
Gael Guennebaud
f8aae7a908 * _mm_loaddup_pd is slow
* optimize SSE ei_ploaddup<Packet4f>
2010-07-19 15:43:27 +02:00
Gael Guennebaud
cd0e5dca9b wip: extend the gebp kernel to optimize complex and mixed products 2010-07-19 08:50:59 +02:00
Gael Guennebaud
45362f4eae update mixing type test 2010-07-15 08:40:09 +02:00
Gael Guennebaud
3f532edc6d update unit test for new API 2010-07-15 08:38:31 +02:00
Gael Guennebaud
1dc9aaaf36 add support for mixing type in trsv 2010-07-13 16:03:49 +02:00
Gael Guennebaud
36d9b51a44 optimize non fused MADD, and add a flatten attribute macro to enforce
inlining within a function
2010-07-13 15:16:34 +02:00
Gael Guennebaud
b72b7ab76f matrix product: move the alpha factor to gebp instead of the packing,
clean some temporaries, etc.
2010-07-12 16:31:46 +02:00
Gael Guennebaud
f8678272a4 mixing types step 3:
- improve support of colmajor by vector and matrix - matrix
- now all configurations are well handled, but the perf are not always very good
2010-07-11 23:57:23 +02:00
Gael Guennebaud
8e3c4283f5 make colmaj * vector uses pointers only 2010-07-11 16:01:48 +02:00
Gael Guennebaud
ff96c94043 mixing types in product step 2:
* pload* and pset1 are now templated on the packet type
* gemv routines are now embeded into a structure with
  a consistent API with respect to gemm
* some configurations of vector * matrix and matrix * matrix works fine,
  some need more work...
2010-07-11 15:48:30 +02:00
Gael Guennebaud
4161b8be67 sync 2010-07-10 22:58:51 +02:00
Gael Guennebaud
e5bc9526f1 * generalize rowmajor by vector
* fix weird compilation error when constructing a matrix with a row by matrix product
2010-07-10 22:53:27 +02:00
Gael Guennebaud
c4ef69b5bd fix compilation: make the check_coordinates* functions const 2010-07-10 22:37:16 +02:00
Benoit Jacob
6dcd373b9d let ei_pset1 use _mm_loaddup_pd. Not a significant speed improvement, but also not a speed regression, and replaces 3 instructions by 1 single instruction. 2010-07-09 18:51:17 -04:00
Konstantinos Margaritis
6ad3f1ab1f Added NEON/Complex.h, ~3.5x faster than scalar std::complex<float>
minor fix in AltiVec Complex.h
2010-07-10 00:09:29 +03:00
Gael Guennebaud
96f9015807 disable MSVC optimization when the underlying compiler is ICC 2010-07-09 19:33:43 +02:00
Gael Guennebaud
b2effa2b2c move ei_conj_if to a more appropriate file 2010-07-09 18:05:57 +02:00
Konstantinos Margaritis
642cc27eb1 forgot to commit ei_p4f_FORWARD; 2010-07-09 18:08:18 +03:00
Konstantinos Margaritis
f6bd508351 forgot to add the Complex.h include for AltiVec. 2010-07-09 17:56:53 +03:00
Konstantinos Margaritis
d9e134c73c Altivec port of Complex.h.
Note: For some reason g++ 4.4 is >200% slower than g++ 4.3 on altivec code.
The same benchmark (bench_gemm) was tested, on the same hardware/OS (G4/Debian testing),
with same CFLAGS. With some code reorganizing I managed to get some minor gain
on 4.4, but I just could not reach 4.3 speed. This is most likely a bug, but I'm waiting
to see if it's fixed on 4.5. I'll look into this a bit more.
2010-07-09 17:54:41 +03:00
Jitse Niesen
26cfe5a958 Be consistent in how the tutorial pages link together. 2010-07-09 11:59:29 +01:00
Jitse Niesen
2c03ca3325 Small changes to tutorial page 2 (matrix arithmetic):
* slightly more extensive discussion of aliasing
* layout: put example code and output side-by-side
* add some links, etc
2010-07-09 11:46:07 +01:00
Gael Guennebaud
b1a17dbfe4 fix a few weird issues with gcc 4.3 32bits and complex<float> 2010-07-09 08:27:58 +02:00
Thomas Capricelli
551cb9b7b4 bench: use of Eigen/Array is deprecated + fix includes for iostream 2010-07-09 03:59:36 +02:00
Gael Guennebaud
504d3a3586 fix SliceVectorizedTraversal for packetsize==1 2010-07-08 23:31:14 +02:00
Gael Guennebaud
51ec188da0 extend vectorization_logic 2010-07-08 23:30:16 +02:00
Carlos Becker
951da96f14 Added more redux types/examples in tutorial and fixed some display issues 2010-07-08 18:16:39 +01:00
Carlos Becker
cb3aad1d91 Reductions/Broadcasting/Visitor Tutorial added to index 2010-07-08 17:45:25 +01:00
Carlos Becker
9852e7b9cb Reductions/Broadcasting/Visitor Tutorial added 2010-07-08 17:42:23 +01:00
Gael Guennebaud
300a226ffa scalars fitting in a single packet requires more work, step 1
* add a, Alignable trait
* update LinearVectorization assignment
2010-07-08 14:27:47 +02:00
Gael Guennebaud
2a1500915a compilation fix 2010-07-08 14:26:00 +02:00
Gael Guennebaud
2066ed91de enabling aligned loads/store for complex<double> is much more tricky,
so the temporary fix is to always perform unaligned load/store
2010-07-07 22:50:19 +02:00
Gael Guennebaud
d89925e6de an attempt to fix wrong unaligned store 2010-07-07 22:35:06 +02:00
Gael Guennebaud
02fd3acd81 update to support mixin types 2010-07-07 19:49:48 +02:00
Gael Guennebaud
31a36aa9c4 support for real * complex matrix product - step 1 (works for some special cases) 2010-07-07 19:49:09 +02:00
Gael Guennebaud
fc3fd8ab57 mention that array = matrix is fine too 2010-07-07 18:10:11 +02:00
Gael Guennebaud
861962c55f sync 2010-07-07 16:44:05 +02:00
Gael Guennebaud
0f2d480af0 add support for complex 2010-07-07 16:41:29 +02:00
Gael Guennebaud
a2415388ef optimized conjugate products for SSE3 2010-07-07 16:37:20 +02:00
Gael Guennebaud
65257f6b29 optimize for SSE3 => significant speed up !! 2010-07-07 15:34:46 +02:00
Gael Guennebaud
dd18b22f0b optimize pmul for complex<double> 2010-07-07 15:29:04 +02:00
Gael Guennebaud
845994f18f optimize gemv for complex<double> and fix gcc alignment issue in 32bits 2010-07-07 15:28:41 +02:00
Gael Guennebaud
e07c0f6bb5 cleanning 2010-07-07 11:41:29 +02:00
Gael Guennebaud
3a7f16a655 typo 2010-07-07 11:13:30 +02:00
Gael Guennebaud
b0896382a3 s/IsVectorized/Vectorizable 2010-07-07 11:10:46 +02:00
Gael Guennebaud
74cf12cbe0 add a compile time error if someone call packet on Diagonal (instead of infinite runtime loop) 2010-07-07 11:07:12 +02:00
Gael Guennebaud
d5e0efaf69 fix vectorization rule of diagonal-product 2010-07-07 11:06:31 +02:00
Gael Guennebaud
c851044eae fix row cwise-prod column in coeff based products...
I really don't know why this worked so far...
2010-07-07 10:52:59 +02:00
Gael Guennebaud
55495dcbae extend product unit tests 2010-07-07 10:50:40 +02:00
Gael Guennebaud
e38fc9692d add a conj_product functor and optimize dot products 2010-07-07 10:00:08 +02:00
Gael Guennebaud
f8d3b4c060 fix mixing types in DiagonalProduct 2010-07-07 09:43:29 +02:00
Gael Guennebaud
bfa606d16f * add a IsVectorized mechanism (instead of packet-size>1...)
* vectorize complex<double>
2010-07-06 23:36:00 +02:00
Gael Guennebaud
38d0a0d5d6 add a unit test for previous bug 2010-07-06 20:54:35 +02:00
Gael Guennebaud
2dba4b7ce7 add a unit test for conj_helper and ei_pconj 2010-07-06 20:54:14 +02:00
Gael Guennebaud
bc57c68cf5 bug fix forgot to conjugate the scalar factor when needed 2010-07-06 20:53:48 +02:00
Gael Guennebaud
e04c3f2cc0 reduce code generation and minor speed up 2010-07-06 19:15:02 +02:00
Gael Guennebaud
d6454788d9 add support for vectorized conjugated products 2010-07-06 19:10:24 +02:00
Gael Guennebaud
291fef5760 fix range 2010-07-06 19:09:31 +02:00
Jitse Niesen
49747fa4a9 Various documentation improvements.
* Add short documentation for Array class
* Put all classes explicitly in Core module (where applicable)
* Section on Modules in Quick Reference Guide
* Put Page 7 after Page 6 in Contents :)
2010-07-06 13:10:08 +01:00
Jitse Niesen
3428d80d20 Small changes to tutorial page 1. 2010-07-06 10:48:25 +01:00
Jens Mueller
d849bc4401 Avoid calling resizeLike, if EIGEN_NO_AUTOMATIC_RESIZING is defined 2010-07-06 10:11:18 +02:00
Jens Mueller
5322b670c8 Add all unsupported modules and fix header file paths 2010-07-06 10:25:52 +02:00
Gael Guennebaud
7d23e7f9f1 indentation 2010-07-06 11:02:01 +02:00
Benoit Jacob
d1243b393e Added tag 3.0-beta1 for changeset 8cfbf33f60 2010-07-06 00:50:30 -04:00
Gael Guennebaud
c69a226192 * extend the Has* packet traits and makes all functor use it
* extend the packing routines to support conjugation
2010-07-05 23:27:54 +02:00
Gael Guennebaud
e1eccfad3f add intitial support for the vectorization of complex<float> 2010-07-05 16:18:09 +02:00
Manoj Rajagopalan
c64c0f382f Examples for DenseBase::middle{Rows,Cols}() 2010-06-30 11:26:31 -04:00
Manoj Rajagopalan
5c58582a08 Renamed DenseBase::{row,col}Range() to DenseBase::middle{Rows,Cols}() 2010-06-29 14:31:39 -04:00
Manoj Rajagopalan
6e5bed69dc Included tests for middleRows() and middleCols() 2010-06-29 12:39:58 -04:00
Manoj Rajagopalan
464fc297cf Included definitions for rowRange() and colRange() member functions of DenseBase 2010-06-26 17:37:17 -04:00
Martin Senst
4b474fdb34 Relax assertion to allow for matrices with cols() == 0 and/or rows() == 0. 2010-07-20 21:25:43 +02:00
Gael Guennebaud
95f2e7f3f5 introduce a new LvalueBit flag and split DenseCoeffBase into three level of accessors 2010-07-21 10:57:01 +02:00
Jitse Niesen
3abbdfd621 Add (set)LinSpaced to quick reference guide. 2010-07-20 21:55:22 +01:00
Jitse Niesen
abd5faf784 Require at least MPFR version 2.3.0, because we use mpfr_signbit.
Code in FindMPFR.cmake is taken from FindEigen2.cmake .
2010-07-19 12:26:52 +01:00
Gael Guennebaud
cac147ba10 add support for determinant on empty matrix 2010-07-19 10:45:06 +02:00
Gael Guennebaud
78d3c54631 add a small bench demoing the possibilities of a direct 3x3 eigen decomposition 2010-07-18 17:26:06 +02:00
Gael Guennebaud
ea27678153 fix compilation of ei_tridiagonalization_inplace_selector for 1x1 matrix 2010-07-18 17:10:11 +02:00
Gael Guennebaud
2a820d41df finish/fix level1 blas, all test pass 2010-07-17 13:49:43 +02:00
Gael Guennebaud
dd27e10360 fix level3 blas: it now passes all computational tests 2010-07-17 11:59:09 +02:00
Gael Guennebaud
2d78023815 fix hemm to not use the imaginary part of the diagonal entries 2010-07-17 11:57:54 +02:00
Gael Guennebaud
cbd6fe323c fix a couple a issue with blas (new TRMM api, and enforece column major) 2010-07-16 23:30:06 +02:00
Gael Guennebaud
f59226e901 fix compilation of blas lib 2010-07-16 22:27:24 +02:00
Gael Guennebaud
4c19024fbf re-enable writing to reversed objects 2010-07-16 22:26:07 +02:00
Gael Guennebaud
fb041c260c fix for empty matrices 2010-07-16 22:25:35 +02:00
Gael Guennebaud
883a8cbb2c disable the optimized 3x3 path for complexes which was not working at all 2010-07-16 18:22:00 +02:00
Gael Guennebaud
6ab9e8632f fix bad fuzzy comparison in 3x3 tridiagonalization 2010-07-16 16:38:58 +02:00
Gael Guennebaud
044424b0e2 fix sum()/prod() on empty matrix making sure this does not affect fixed sized object, extend related unit tests including partial reduction 2010-07-16 14:02:20 +02:00
Gael Guennebaud
6a370f50c7 MPRealSupport was missing 2010-07-15 20:45:45 +02:00
Gael Guennebaud
b08c26aefa merge 2010-07-15 20:41:33 +02:00
Gael Guennebaud
84fdbded4d add support for strictly triangular matrix in trmm though it is not really useful 2010-07-15 20:39:20 +02:00
Gael Guennebaud
87e89fea4e add a support module for MPFR C++ with basic unit testing 2010-07-15 16:29:04 +02:00
Gael Guennebaud
bfbe61454e merge 2010-07-15 09:54:31 +02:00
Gael Guennebaud
cf9edd9958 fix compilation for non trivial types 2010-07-14 23:31:38 +02:00
Gael Guennebaud
b6fac91998 merge 2010-07-14 22:51:53 +02:00
Gael Guennebaud
d4d4382b18 use dummy_precision by default instead of 0 2010-07-14 22:50:03 +02:00
Gael Guennebaud
90d6fc0e28 fix ei_aligned_delete for null pointers and non trivial dtors 2010-07-14 22:49:34 +02:00
Jitse Niesen
b0bd1cfa05 Tutorial page 4: add some text, diversify examples.
Use \verbinclude for output text to disable syntax highlighting.
Give tables consistent look.
2010-07-14 10:16:12 +01:00
Gael Guennebaud
e4f3759c4d add a bench for quaternion multiplication 2010-07-13 13:29:35 +02:00
Jitse Niesen
c36316f284 Change EXPAND_AS_DEFINED doxygen configuration option.
Add macros so that MatrixBase::cwiseProduct() and ArrayBase::min() are
documented, and remove one macro which is no longer used.
2010-07-13 10:14:58 +01:00
Jitse Niesen
140ad0908d Tutorial page 3: add more cwise operations, condense rest. 2010-07-12 22:45:57 +01:00
Christoph Hertzberg
6ba5d2c90c Implemented SSE optimized double-precision Quaternion multiplication 2010-07-12 23:30:47 +02:00
Jitse Niesen
8e776c94c1 Tutorial page 1: Put code and output side-by-side. 2010-07-12 12:02:31 +01:00
Gael Guennebaud
19a70ae939 fix doc compilation on non 32bits systems 2010-07-11 11:01:17 +02:00
Gael Guennebaud
850c6d8a2b fix unused warning 2010-07-11 10:58:58 +02:00
315 changed files with 14108 additions and 4812 deletions

View File

@@ -150,7 +150,7 @@ if(CMAKE_COMPILER_IS_GNUCXX)
option(EIGEN_TEST_NEON "Enable/Disable Neon in tests/examples" OFF)
if(EIGEN_TEST_NEON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfloat-abi=softfp -mfpu=neon -mcpu=cortex-a8")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfloat-abi=hard -mfpu=neon -mcpu=cortex-a8")
message("Enabling NEON in tests/examples")
endif()
@@ -198,6 +198,18 @@ if(MSVC)
endif(MSVC)
option(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION "Disable explicit vectorization in tests/examples" OFF)
option(EIGEN_TEST_X87 "Force using X87 instructions. Implies no vectorization." OFF)
if(EIGEN_TEST_X87)
set(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION ON)
if(CMAKE_COMPILER_IS_GNUCXX)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpmath=387")
message("Forcing use of x87 instructions in tests/examples")
else()
message("EIGEN_TEST_X87 ignored on your compiler")
endif()
endif()
if(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION)
add_definitions(-DEIGEN_DONT_VECTORIZE=1)
message("Disabling vectorization in tests/examples")

View File

@@ -26,13 +26,21 @@
#ifndef EIGEN_CORE_H
#define EIGEN_CORE_H
#define EIGEN_NO_STATIC_ASSERT
// first thing Eigen does: prevent MSVC from committing suicide
#include "src/Core/util/DisableMSVCWarnings.h"
// then include this file where all our macros are defined. It's really important to do it first because
// it's where we do all the alignment settings (platform detection and honoring the user's will if he
// defined e.g. EIGEN_DONT_ALIGN) so it needs to be done before we do anything with vectorization.
#include "src/Core/util/Macros.h"
#ifndef EIGEN_PARSED_BY_DOXYGEN
#include "src/Core/util/Macros.h"
#else
namespace Eigen { // for some reason Doxygen needs this namespace
#include "src/Core/util/Macros.h"
}
#endif
// if alignment is disabled, then disable vectorization. Note: EIGEN_ALIGN is the proper check, it takes into
// account both the user's will (EIGEN_DONT_ALIGN) and our own platform checks
@@ -87,7 +95,14 @@
#endif
// include files
#if (defined __GNUC__) && (defined __MINGW32__)
#include <intrin.h>
//including intrin.h works around a MINGW bug http://sourceforge.net/tracker/?func=detail&atid=102435&aid=2962480&group_id=2435
//in essence, intrin.h is included by windows.h and also declares intrinsics (just as emmintrin.h etc. below do). However,
//intrin.h uses an extern "C" declaration, and g++ thus complains of duplicate declarations with conflicting linkage. The linkage for intrinsics
//doesn't matter, but at that stage the compiler doesn't know; so, to avoid compile errors when windows.h is included after Eigen/Core,
//include intrin here.
#endif
#include <emmintrin.h>
#include <xmmintrin.h>
#ifdef EIGEN_VECTORIZE_SSE3
@@ -126,7 +141,14 @@
#include <omp.h>
#endif
// MSVC for windows mobile does not have the errno.h file
#if !(defined(_MSC_VER) && defined(_WIN32_WCE))
#define EIGEN_HAS_ERRNO
#endif
#ifdef EIGEN_HAS_ERRNO
#include <cerrno>
#endif
#include <cstdlib>
#include <cmath>
#include <complex>
@@ -145,7 +167,7 @@
#endif
// required for __cpuid, needs to be included after cmath
#ifdef _MSC_VER
#if defined(_MSC_VER) && (defined(_M_IX86)||defined(_M_IX64))
#include <intrin.h>
#endif
@@ -221,10 +243,13 @@ using std::size_t;
#if defined EIGEN_VECTORIZE_SSE
#include "src/Core/arch/SSE/PacketMath.h"
#include "src/Core/arch/SSE/MathFunctions.h"
#include "src/Core/arch/SSE/Complex.h"
#elif defined EIGEN_VECTORIZE_ALTIVEC
#include "src/Core/arch/AltiVec/PacketMath.h"
#include "src/Core/arch/AltiVec/Complex.h"
#elif defined EIGEN_VECTORIZE_NEON
#include "src/Core/arch/NEON/PacketMath.h"
#include "src/Core/arch/NEON/Complex.h"
#endif
#include "src/Core/arch/Default/Settings.h"
@@ -248,11 +273,11 @@ using std::size_t;
#include "src/Core/NoAlias.h"
#include "src/Core/DenseStorageBase.h"
#include "src/Core/Matrix.h"
#include "src/Core/SelfCwiseBinaryOp.h"
#include "src/Core/CwiseBinaryOp.h"
#include "src/Core/CwiseUnaryOp.h"
#include "src/Core/CwiseNullaryOp.h"
#include "src/Core/CwiseUnaryView.h"
#include "src/Core/SelfCwiseBinaryOp.h"
#include "src/Core/Dot.h"
#include "src/Core/StableNorm.h"
#include "src/Core/MapBase.h"

View File

@@ -4,4 +4,4 @@
#include "QR"
#include "SVD"
#include "Geometry"
#include "Eigenvalues"
#include "Eigenvalues"

View File

@@ -23,7 +23,6 @@ namespace Eigen {
*/
#include "src/misc/Solve.h"
#include "src/SVD/SVD.h"
#include "src/SVD/JacobiSVD.h"
#include "src/SVD/UpperBidiagonalization.h"

42
Eigen/StdDeque Normal file
View File

@@ -0,0 +1,42 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2009 Hauke Heibel <hauke.heibel@googlemail.com>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#ifndef EIGEN_STDDEQUE_MODULE_H
#define EIGEN_STDDEQUE_MODULE_H
#include "Core"
#include <deque>
#if (defined(_MSC_VER) && defined(_WIN64)) /* MSVC auto aligns in 64 bit builds */
#define EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(...)
#else
#include "src/StlSupport/StdDeque.h"
#endif
#endif // EIGEN_STDDEQUE_MODULE_H

View File

@@ -1,12 +1,6 @@
ADD_SUBDIRECTORY(Core)
ADD_SUBDIRECTORY(LU)
ADD_SUBDIRECTORY(QR)
ADD_SUBDIRECTORY(SVD)
ADD_SUBDIRECTORY(Cholesky)
ADD_SUBDIRECTORY(Geometry)
ADD_SUBDIRECTORY(Sparse)
ADD_SUBDIRECTORY(Jacobi)
ADD_SUBDIRECTORY(Householder)
ADD_SUBDIRECTORY(Eigenvalues)
ADD_SUBDIRECTORY(misc)
ADD_SUBDIRECTORY(plugins)
file(GLOB Eigen_src_subdirectories "*")
foreach(f ${Eigen_src_subdirectories})
if(NOT f MATCHES ".txt")
add_subdirectory(${f})
endif()
endforeach()

View File

@@ -363,7 +363,9 @@ struct ei_solve_retval<LDLT<_MatrixType,_UpLo>, Rhs>
}
};
/** This is the \em in-place version of solve().
/** \internal use x = ldlt_object.solve(x);
*
* This is the \em in-place version of solve().
*
* \param bAndX represents both the right-hand side matrix b and result x.
*

View File

@@ -76,11 +76,11 @@ template<typename _MatrixType, int _UpLo> class LLT
typedef LLT_Traits<MatrixType,UpLo> Traits;
/**
* \brief Default Constructor.
*
* The default constructor is useful in cases in which the user intends to
* perform decompositions via LLT::compute(const MatrixType&).
*/
* \brief Default Constructor.
*
* The default constructor is useful in cases in which the user intends to
* perform decompositions via LLT::compute(const MatrixType&).
*/
LLT() : m_matrix(), m_isInitialized(false) {}
/** \brief Default Constructor with memory preallocation
@@ -134,7 +134,7 @@ template<typename _MatrixType, int _UpLo> class LLT
}
template<typename Derived>
bool solveInPlace(MatrixBase<Derived> &bAndX) const;
void solveInPlace(MatrixBase<Derived> &bAndX) const;
LLT& compute(const MatrixType& matrix);
@@ -309,7 +309,9 @@ struct ei_solve_retval<LLT<_MatrixType, UpLo>, Rhs>
}
};
/** This is the \em in-place version of solve().
/** \internal use x = llt_object.solve(x);
*
* This is the \em in-place version of solve().
*
* \param bAndX represents both the right-hand side matrix b and result x.
*
@@ -322,13 +324,12 @@ struct ei_solve_retval<LLT<_MatrixType, UpLo>, Rhs>
*/
template<typename MatrixType, int _UpLo>
template<typename Derived>
bool LLT<MatrixType,_UpLo>::solveInPlace(MatrixBase<Derived> &bAndX) const
void LLT<MatrixType,_UpLo>::solveInPlace(MatrixBase<Derived> &bAndX) const
{
ei_assert(m_isInitialized && "LLT is not initialized.");
ei_assert(m_matrix.rows()==bAndX.rows());
matrixL().solveInPlace(bAndX);
matrixU().solveInPlace(bAndX);
return true;
}
/** \returns the matrix represented by the decomposition,

View File

@@ -25,6 +25,20 @@
#ifndef EIGEN_ARRAY_H
#define EIGEN_ARRAY_H
/** \class Array
* \ingroup Core_Module
*
* \brief General-purpose arrays with easy API for coefficient-wise operations
*
* The %Array class is very similar to the Matrix class. It provides
* general-purpose one- and two-dimensional arrays. The difference between the
* %Array and the %Matrix class is primarily in the API: the API for the
* %Array class provides easy access to coefficient-wise operations, while the
* API for the %Matrix class provides easy access to linear-algebra
* operations.
*
* \sa \ref TutorialArrayClass, \ref TopicClassHierarchy
*/
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
struct ei_traits<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > : ei_traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
{
@@ -231,6 +245,7 @@ class Array
};
/** \defgroup arraytypedefs Global array typedefs
* \ingroup Core_Module
*
* Eigen defines several typedef shortcuts for most common 1D and 2D array types.
*
@@ -251,7 +266,7 @@ class Array
#define EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, Size, SizeSuffix) \
/** \ingroup arraytypedefs */ \
typedef Array<Type, Size, Size> Array##SizeSuffix##SizeSuffix##TypeSuffix; \
/** \ingroup matrixtypedefs */ \
/** \ingroup arraytypedefs */ \
typedef Array<Type, Size, 1> Array##SizeSuffix##TypeSuffix;
#define EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, Size) \

View File

@@ -28,6 +28,7 @@
template<typename ExpressionType> class MatrixWrapper;
/** \class ArrayBase
* \ingroup Core_Module
*
* \brief Base class for all 1D and 2D array, and related expressions
*
@@ -43,7 +44,7 @@ template<typename ExpressionType> class MatrixWrapper;
*
* \param Derived is the derived type, e.g., an array or an expression type.
*
* \sa class MatrixBase
* \sa class MatrixBase, \ref TopicClassHierarchy
*/
template<typename Derived> class ArrayBase
: public DenseBase<Derived>
@@ -166,6 +167,13 @@ template<typename Derived> class ArrayBase
explicit ArrayBase(Index);
ArrayBase(Index,Index);
template<typename OtherDerived> explicit ArrayBase(const ArrayBase<OtherDerived>&);
protected:
// mixing arrays and matrices is not legal
template<typename OtherDerived> Derived& operator+=(const MatrixBase<OtherDerived>& mat)
{EIGEN_STATIC_ASSERT(sizeof(typename OtherDerived::Scalar)==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES);}
// mixing arrays and matrices is not legal
template<typename OtherDerived> Derived& operator-=(const MatrixBase<OtherDerived>& mat)
{EIGEN_STATIC_ASSERT(sizeof(typename OtherDerived::Scalar)==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES);}
};
/** replaces \c *this by \c *this - \a other.
@@ -177,7 +185,7 @@ template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived &
ArrayBase<Derived>::operator-=(const ArrayBase<OtherDerived> &other)
{
SelfCwiseBinaryOp<ei_scalar_difference_op<Scalar>, Derived> tmp(derived());
SelfCwiseBinaryOp<ei_scalar_difference_op<Scalar>, Derived, OtherDerived> tmp(derived());
tmp = other;
return derived();
}
@@ -191,7 +199,7 @@ template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived &
ArrayBase<Derived>::operator+=(const ArrayBase<OtherDerived>& other)
{
SelfCwiseBinaryOp<ei_scalar_sum_op<Scalar>, Derived> tmp(derived());
SelfCwiseBinaryOp<ei_scalar_sum_op<Scalar>, Derived, OtherDerived> tmp(derived());
tmp = other.derived();
return derived();
}
@@ -205,7 +213,7 @@ template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived &
ArrayBase<Derived>::operator*=(const ArrayBase<OtherDerived>& other)
{
SelfCwiseBinaryOp<ei_scalar_product_op<Scalar>, Derived> tmp(derived());
SelfCwiseBinaryOp<ei_scalar_product_op<Scalar>, Derived, OtherDerived> tmp(derived());
tmp = other.derived();
return derived();
}
@@ -219,7 +227,7 @@ template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived &
ArrayBase<Derived>::operator/=(const ArrayBase<OtherDerived>& other)
{
SelfCwiseBinaryOp<ei_scalar_quotient_op<Scalar>, Derived> tmp(derived());
SelfCwiseBinaryOp<ei_scalar_quotient_op<Scalar>, Derived, OtherDerived> tmp(derived());
tmp = other.derived();
return derived();
}

View File

@@ -26,6 +26,7 @@
#define EIGEN_ARRAYWRAPPER_H
/** \class ArrayWrapper
* \ingroup Core_Module
*
* \brief Expression of a mathematical vector or matrix as an array object
*
@@ -110,6 +111,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
};
/** \class MatrixWrapper
* \ingroup Core_Module
*
* \brief Expression of an array as a mathematical vector or matrix
*

View File

@@ -256,6 +256,12 @@ struct ei_assign_impl;
*** Default traversal ***
************************/
template<typename Derived1, typename Derived2, int Unrolling>
struct ei_assign_impl<Derived1, Derived2, InvalidTraversal, Unrolling>
{
inline static void run(Derived1 &, const Derived2 &) { }
};
template<typename Derived1, typename Derived2>
struct ei_assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling>
{
@@ -397,7 +403,12 @@ struct ei_assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{
const Index size = dst.size();
const Index packetSize = ei_packet_traits<typename Derived1::Scalar>::size;
typedef ei_packet_traits<typename Derived1::Scalar> PacketTraits;
enum {
packetSize = PacketTraits::size,
dstAlignment = PacketTraits::AlignedOnScalar ? Aligned : int(ei_assign_traits<Derived1,Derived2>::DstIsAligned) ,
srcAlignment = ei_assign_traits<Derived1,Derived2>::JointAlignment
};
const Index alignedStart = ei_assign_traits<Derived1,Derived2>::DstIsAligned ? 0
: ei_first_aligned(&dst.coeffRef(0), size);
const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
@@ -406,7 +417,7 @@ struct ei_assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling
for(Index index = alignedStart; index < alignedEnd; index += packetSize)
{
dst.template copyPacket<Derived2, Aligned, ei_assign_traits<Derived1,Derived2>::JointAlignment>(index, src);
dst.template copyPacket<Derived2, dstAlignment, srcAlignment>(index, src);
}
ei_unaligned_assign_impl<>::run(src,dst,alignedEnd,size);
@@ -438,12 +449,18 @@ struct ei_assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling>
typedef typename Derived1::Index Index;
inline static void run(Derived1 &dst, const Derived2 &src)
{
const Index packetSize = ei_packet_traits<typename Derived1::Scalar>::size;
typedef ei_packet_traits<typename Derived1::Scalar> PacketTraits;
enum {
packetSize = PacketTraits::size,
alignable = PacketTraits::AlignedOnScalar,
dstAlignment = alignable ? Aligned : int(ei_assign_traits<Derived1,Derived2>::DstIsAligned) ,
srcAlignment = ei_assign_traits<Derived1,Derived2>::JointAlignment
};
const Index packetAlignedMask = packetSize - 1;
const Index innerSize = dst.innerSize();
const Index outerSize = dst.outerSize();
const Index alignedStep = (packetSize - dst.outerStride() % packetSize) & packetAlignedMask;
Index alignedStart = ei_assign_traits<Derived1,Derived2>::DstIsAligned ? 0
const Index alignedStep = alignable ? (packetSize - dst.outerStride() % packetSize) & packetAlignedMask : 0;
Index alignedStart = ((!alignable) || ei_assign_traits<Derived1,Derived2>::DstIsAligned) ? 0
: ei_first_aligned(&dst.coeffRef(0,0), innerSize);
for(Index outer = 0; outer < outerSize; ++outer)
@@ -475,14 +492,21 @@ template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>
::lazyAssign(const DenseBase<OtherDerived>& other)
{
enum{
SameType = ei_is_same_type<typename Derived::Scalar,typename OtherDerived::Scalar>::ret
};
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived)
EIGEN_STATIC_ASSERT((ei_is_same_type<typename Derived::Scalar, typename OtherDerived::Scalar>::ret),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
EIGEN_STATIC_ASSERT(SameType,YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
#ifdef EIGEN_DEBUG_ASSIGN
ei_assign_traits<Derived, OtherDerived>::debug();
#endif
ei_assert(rows() == other.rows() && cols() == other.cols());
ei_assign_impl<Derived, OtherDerived>::run(derived(),other.derived());
ei_assign_impl<Derived, OtherDerived, int(SameType) ? int(ei_assign_traits<Derived, OtherDerived>::Traversal)
: int(InvalidTraversal)>::run(derived(),other.derived());
#ifndef EIGEN_NO_DEBUG
checkTransposeAliasing(other.derived());
#endif

View File

@@ -27,6 +27,7 @@
/**
* \class BandMatrix
* \ingroup Core_Module
*
* \brief Represents a rectangular matrix with a banded storage
*
@@ -54,7 +55,7 @@ struct ei_traits<BandMatrix<_Scalar,Rows,Cols,Supers,Subs,Options> >
ColsAtCompileTime = Cols,
MaxRowsAtCompileTime = Rows,
MaxColsAtCompileTime = Cols,
Flags = 0
Flags = LvalueBit
};
};
@@ -205,6 +206,7 @@ class BandMatrix : public EigenBase<BandMatrix<_Scalar,Rows,Cols,Supers,Subs,Opt
/**
* \class TridiagonalMatrix
* \ingroup Core_Module
*
* \brief Represents a tridiagonal matrix
*

View File

@@ -27,6 +27,7 @@
#define EIGEN_BLOCK_H
/** \class Block
* \ingroup Core_Module
*
* \brief Expression of a fixed-size or dynamic-size block
*
@@ -57,8 +58,8 @@
*
* \sa DenseBase::block(Index,Index,Index,Index), DenseBase::block(Index,Index), class VectorBlock
*/
template<typename XprType, int BlockRows, int BlockCols, bool HasDirectAccess>
struct ei_traits<Block<XprType, BlockRows, BlockCols, HasDirectAccess> > : ei_traits<XprType>
template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool HasDirectAccess>
struct ei_traits<Block<XprType, BlockRows, BlockCols, InnerPanel, HasDirectAccess> > : ei_traits<XprType>
{
typedef typename ei_traits<XprType>::Scalar Scalar;
typedef typename ei_traits<XprType>::StorageKind StorageKind;
@@ -91,15 +92,16 @@ struct ei_traits<Block<XprType, BlockRows, BlockCols, HasDirectAccess> > : ei_tr
MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % ei_packet_traits<Scalar>::size) == 0)
&& (InnerStrideAtCompileTime == 1)
? PacketAccessBit : 0,
MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && ((OuterStrideAtCompileTime % ei_packet_traits<Scalar>::size) == 0)) ? AlignedBit : 0,
FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1) ? LinearAccessBit : 0,
Flags0 = ei_traits<XprType>::Flags & (HereditaryBits | MaskPacketAccessBit | DirectAccessBit),
Flags0 = ei_traits<XprType>::Flags & (HereditaryBits | MaskPacketAccessBit | LvalueBit | DirectAccessBit | MaskAlignedBit),
Flags1 = Flags0 | FlagsLinearAccessBit,
Flags = (Flags1 & ~RowMajorBit) | (IsRowMajor ? RowMajorBit : 0)
};
};
template<typename XprType, int BlockRows, int BlockCols, bool HasDirectAccess> class Block
: public ei_dense_xpr_base<Block<XprType, BlockRows, BlockCols, HasDirectAccess> >::type
template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool HasDirectAccess> class Block
: public ei_dense_xpr_base<Block<XprType, BlockRows, BlockCols, InnerPanel, HasDirectAccess> >::type
{
public:
@@ -228,9 +230,9 @@ template<typename XprType, int BlockRows, int BlockCols, bool HasDirectAccess> c
};
/** \internal */
template<typename XprType, int BlockRows, int BlockCols>
class Block<XprType,BlockRows,BlockCols,true>
: public MapBase<Block<XprType, BlockRows, BlockCols,true> >
template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
class Block<XprType,BlockRows,BlockCols, InnerPanel,true>
: public MapBase<Block<XprType, BlockRows, BlockCols, InnerPanel, true> >
{
public:
@@ -321,546 +323,5 @@ class Block<XprType,BlockRows,BlockCols,true>
int m_outerStride;
};
/** \returns a dynamic-size expression of a block in *this.
*
* \param startRow the first row in the block
* \param startCol the first column in the block
* \param blockRows the number of rows in the block
* \param blockCols the number of columns in the block
*
* Example: \include MatrixBase_block_int_int_int_int.cpp
* Output: \verbinclude MatrixBase_block_int_int_int_int.out
*
* \note Even though the returned expression has dynamic size, in the case
* when it is applied to a fixed-size matrix, it inherits a fixed maximal size,
* which means that evaluating it does not cause a dynamic memory allocation.
*
* \sa class Block, block(Index,Index)
*/
template<typename Derived>
inline Block<Derived> DenseBase<Derived>
::block(Index startRow, Index startCol, Index blockRows, Index blockCols)
{
return Block<Derived>(derived(), startRow, startCol, blockRows, blockCols);
}
/** This is the const version of block(Index,Index,Index,Index). */
template<typename Derived>
inline const Block<Derived> DenseBase<Derived>
::block(Index startRow, Index startCol, Index blockRows, Index blockCols) const
{
return Block<Derived>(derived(), startRow, startCol, blockRows, blockCols);
}
/** \returns a dynamic-size expression of a top-right corner of *this.
*
* \param cRows the number of rows in the corner
* \param cCols the number of columns in the corner
*
* Example: \include MatrixBase_topRightCorner_int_int.cpp
* Output: \verbinclude MatrixBase_topRightCorner_int_int.out
*
* \sa class Block, block(Index,Index,Index,Index)
*/
template<typename Derived>
inline Block<Derived> DenseBase<Derived>
::topRightCorner(Index cRows, Index cCols)
{
return Block<Derived>(derived(), 0, cols() - cCols, cRows, cCols);
}
/** This is the const version of topRightCorner(Index, Index).*/
template<typename Derived>
inline const Block<Derived>
DenseBase<Derived>::topRightCorner(Index cRows, Index cCols) const
{
return Block<Derived>(derived(), 0, cols() - cCols, cRows, cCols);
}
/** \returns an expression of a fixed-size top-right corner of *this.
*
* The template parameters CRows and CCols are the number of rows and columns in the corner.
*
* Example: \include MatrixBase_template_int_int_topRightCorner.cpp
* Output: \verbinclude MatrixBase_template_int_int_topRightCorner.out
*
* \sa class Block, block(Index,Index,Index,Index)
*/
template<typename Derived>
template<int CRows, int CCols>
inline Block<Derived, CRows, CCols>
DenseBase<Derived>::topRightCorner()
{
return Block<Derived, CRows, CCols>(derived(), 0, cols() - CCols);
}
/** This is the const version of topRightCorner<int, int>().*/
template<typename Derived>
template<int CRows, int CCols>
inline const Block<Derived, CRows, CCols>
DenseBase<Derived>::topRightCorner() const
{
return Block<Derived, CRows, CCols>(derived(), 0, cols() - CCols);
}
/** \returns a dynamic-size expression of a top-left corner of *this.
*
* \param cRows the number of rows in the corner
* \param cCols the number of columns in the corner
*
* Example: \include MatrixBase_topLeftCorner_int_int.cpp
* Output: \verbinclude MatrixBase_topLeftCorner_int_int.out
*
* \sa class Block, block(Index,Index,Index,Index)
*/
template<typename Derived>
inline Block<Derived> DenseBase<Derived>
::topLeftCorner(Index cRows, Index cCols)
{
return Block<Derived>(derived(), 0, 0, cRows, cCols);
}
/** This is the const version of topLeftCorner(Index, Index).*/
template<typename Derived>
inline const Block<Derived>
DenseBase<Derived>::topLeftCorner(Index cRows, Index cCols) const
{
return Block<Derived>(derived(), 0, 0, cRows, cCols);
}
/** \returns an expression of a fixed-size top-left corner of *this.
*
* The template parameters CRows and CCols are the number of rows and columns in the corner.
*
* Example: \include MatrixBase_template_int_int_topLeftCorner.cpp
* Output: \verbinclude MatrixBase_template_int_int_topLeftCorner.out
*
* \sa class Block, block(Index,Index,Index,Index)
*/
template<typename Derived>
template<int CRows, int CCols>
inline Block<Derived, CRows, CCols>
DenseBase<Derived>::topLeftCorner()
{
return Block<Derived, CRows, CCols>(derived(), 0, 0);
}
/** This is the const version of topLeftCorner<int, int>().*/
template<typename Derived>
template<int CRows, int CCols>
inline const Block<Derived, CRows, CCols>
DenseBase<Derived>::topLeftCorner() const
{
return Block<Derived, CRows, CCols>(derived(), 0, 0);
}
/** \returns a dynamic-size expression of a bottom-right corner of *this.
*
* \param cRows the number of rows in the corner
* \param cCols the number of columns in the corner
*
* Example: \include MatrixBase_bottomRightCorner_int_int.cpp
* Output: \verbinclude MatrixBase_bottomRightCorner_int_int.out
*
* \sa class Block, block(Index,Index,Index,Index)
*/
template<typename Derived>
inline Block<Derived> DenseBase<Derived>
::bottomRightCorner(Index cRows, Index cCols)
{
return Block<Derived>(derived(), rows() - cRows, cols() - cCols, cRows, cCols);
}
/** This is the const version of bottomRightCorner(Index, Index).*/
template<typename Derived>
inline const Block<Derived>
DenseBase<Derived>::bottomRightCorner(Index cRows, Index cCols) const
{
return Block<Derived>(derived(), rows() - cRows, cols() - cCols, cRows, cCols);
}
/** \returns an expression of a fixed-size bottom-right corner of *this.
*
* The template parameters CRows and CCols are the number of rows and columns in the corner.
*
* Example: \include MatrixBase_template_int_int_bottomRightCorner.cpp
* Output: \verbinclude MatrixBase_template_int_int_bottomRightCorner.out
*
* \sa class Block, block(Index,Index,Index,Index)
*/
template<typename Derived>
template<int CRows, int CCols>
inline Block<Derived, CRows, CCols>
DenseBase<Derived>::bottomRightCorner()
{
return Block<Derived, CRows, CCols>(derived(), rows() - CRows, cols() - CCols);
}
/** This is the const version of bottomRightCorner<int, int>().*/
template<typename Derived>
template<int CRows, int CCols>
inline const Block<Derived, CRows, CCols>
DenseBase<Derived>::bottomRightCorner() const
{
return Block<Derived, CRows, CCols>(derived(), rows() - CRows, cols() - CCols);
}
/** \returns a dynamic-size expression of a bottom-left corner of *this.
*
* \param cRows the number of rows in the corner
* \param cCols the number of columns in the corner
*
* Example: \include MatrixBase_bottomLeftCorner_int_int.cpp
* Output: \verbinclude MatrixBase_bottomLeftCorner_int_int.out
*
* \sa class Block, block(Index,Index,Index,Index)
*/
template<typename Derived>
inline Block<Derived> DenseBase<Derived>
::bottomLeftCorner(Index cRows, Index cCols)
{
return Block<Derived>(derived(), rows() - cRows, 0, cRows, cCols);
}
/** This is the const version of bottomLeftCorner(Index, Index).*/
template<typename Derived>
inline const Block<Derived>
DenseBase<Derived>::bottomLeftCorner(Index cRows, Index cCols) const
{
return Block<Derived>(derived(), rows() - cRows, 0, cRows, cCols);
}
/** \returns an expression of a fixed-size bottom-left corner of *this.
*
* The template parameters CRows and CCols are the number of rows and columns in the corner.
*
* Example: \include MatrixBase_template_int_int_bottomLeftCorner.cpp
* Output: \verbinclude MatrixBase_template_int_int_bottomLeftCorner.out
*
* \sa class Block, block(Index,Index,Index,Index)
*/
template<typename Derived>
template<int CRows, int CCols>
inline Block<Derived, CRows, CCols>
DenseBase<Derived>::bottomLeftCorner()
{
return Block<Derived, CRows, CCols>(derived(), rows() - CRows, 0);
}
/** This is the const version of bottomLeftCorner<int, int>().*/
template<typename Derived>
template<int CRows, int CCols>
inline const Block<Derived, CRows, CCols>
DenseBase<Derived>::bottomLeftCorner() const
{
return Block<Derived, CRows, CCols>(derived(), rows() - CRows, 0);
}
/** \returns a block consisting of the top rows of *this.
*
* \param n the number of rows in the block
*
* Example: \include MatrixBase_topRows_int.cpp
* Output: \verbinclude MatrixBase_topRows_int.out
*
* \sa class Block, block(Index,Index,Index,Index)
*/
template<typename Derived>
inline typename DenseBase<Derived>::RowsBlockXpr DenseBase<Derived>
::topRows(Index n)
{
return RowsBlockXpr(derived(), 0, 0, n, cols());
}
/** This is the const version of topRows(Index).*/
template<typename Derived>
inline const typename DenseBase<Derived>::RowsBlockXpr
DenseBase<Derived>::topRows(Index n) const
{
return RowsBlockXpr(derived(), 0, 0, n, cols());
}
/** \returns a block consisting of the top rows of *this.
*
* \param N the number of rows in the block
*
* Example: \include MatrixBase_template_int_topRows.cpp
* Output: \verbinclude MatrixBase_template_int_topRows.out
*
* \sa class Block, block(Index,Index,Index,Index)
*/
template<typename Derived>
template<int N>
inline typename DenseBase<Derived>::template NRowsBlockXpr<N>::Type
DenseBase<Derived>::topRows()
{
return typename DenseBase<Derived>::template NRowsBlockXpr<N>::Type(derived(), 0, 0, N, cols());
}
/** This is the const version of topRows<int>().*/
template<typename Derived>
template<int N>
inline const typename DenseBase<Derived>::template NRowsBlockXpr<N>::Type
DenseBase<Derived>::topRows() const
{
return typename DenseBase<Derived>::template NRowsBlockXpr<N>::Type(derived(), 0, 0, N, cols());
}
/** \returns a block consisting of the bottom rows of *this.
*
* \param n the number of rows in the block
*
* Example: \include MatrixBase_bottomRows_int.cpp
* Output: \verbinclude MatrixBase_bottomRows_int.out
*
* \sa class Block, block(Index,Index,Index,Index)
*/
template<typename Derived>
inline typename DenseBase<Derived>::RowsBlockXpr DenseBase<Derived>
::bottomRows(Index n)
{
return RowsBlockXpr(derived(), rows() - n, 0, n, cols());
}
/** This is the const version of bottomRows(Index).*/
template<typename Derived>
inline const typename DenseBase<Derived>::RowsBlockXpr
DenseBase<Derived>::bottomRows(Index n) const
{
return RowsBlockXpr(derived(), rows() - n, 0, n, cols());
}
/** \returns a block consisting of the bottom rows of *this.
*
* \param N the number of rows in the block
*
* Example: \include MatrixBase_template_int_bottomRows.cpp
* Output: \verbinclude MatrixBase_template_int_bottomRows.out
*
* \sa class Block, block(Index,Index,Index,Index)
*/
template<typename Derived>
template<int N>
inline typename DenseBase<Derived>::template NRowsBlockXpr<N>::Type
DenseBase<Derived>::bottomRows()
{
return typename NRowsBlockXpr<N>::Type(derived(), rows() - N, 0, N, cols());
}
/** This is the const version of bottomRows<int>().*/
template<typename Derived>
template<int N>
inline const typename DenseBase<Derived>::template NRowsBlockXpr<N>::Type
DenseBase<Derived>::bottomRows() const
{
return typename NRowsBlockXpr<N>::Type(derived(), rows() - N, 0, N, cols());
}
/** \returns a block consisting of the top columns of *this.
*
* \param n the number of columns in the block
*
* Example: \include MatrixBase_leftCols_int.cpp
* Output: \verbinclude MatrixBase_leftCols_int.out
*
* \sa class Block, block(Index,Index,Index,Index)
*/
template<typename Derived>
inline typename DenseBase<Derived>::ColsBlockXpr DenseBase<Derived>
::leftCols(Index n)
{
return ColsBlockXpr(derived(), 0, 0, rows(), n);
}
/** This is the const version of leftCols(Index).*/
template<typename Derived>
inline const typename DenseBase<Derived>::ColsBlockXpr
DenseBase<Derived>::leftCols(Index n) const
{
return ColsBlockXpr(derived(), 0, 0, rows(), n);
}
/** \returns a block consisting of the top columns of *this.
*
* \param N the number of columns in the block
*
* Example: \include MatrixBase_template_int_leftCols.cpp
* Output: \verbinclude MatrixBase_template_int_leftCols.out
*
* \sa class Block, block(Index,Index,Index,Index)
*/
template<typename Derived>
template<int N>
inline typename DenseBase<Derived>::template NColsBlockXpr<N>::Type
DenseBase<Derived>::leftCols()
{
return typename NColsBlockXpr<N>::Type(derived(), 0, 0, rows(), N);
}
/** This is the const version of leftCols<int>().*/
template<typename Derived>
template<int N>
inline const typename DenseBase<Derived>::template NColsBlockXpr<N>::Type
DenseBase<Derived>::leftCols() const
{
return typename NColsBlockXpr<N>::Type(derived(), 0, 0, rows(), N);
}
/** \returns a block consisting of the top columns of *this.
*
* \param n the number of columns in the block
*
* Example: \include MatrixBase_rightCols_int.cpp
* Output: \verbinclude MatrixBase_rightCols_int.out
*
* \sa class Block, block(Index,Index,Index,Index)
*/
template<typename Derived>
inline typename DenseBase<Derived>::ColsBlockXpr DenseBase<Derived>
::rightCols(Index n)
{
return ColsBlockXpr(derived(), 0, cols() - n, rows(), n);
}
/** This is the const version of rightCols(Index).*/
template<typename Derived>
inline const typename DenseBase<Derived>::ColsBlockXpr
DenseBase<Derived>::rightCols(Index n) const
{
return ColsBlockXpr(derived(), 0, cols() - n, rows(), n);
}
/** \returns a block consisting of the top columns of *this.
*
* \param N the number of columns in the block
*
* Example: \include MatrixBase_template_int_rightCols.cpp
* Output: \verbinclude MatrixBase_template_int_rightCols.out
*
* \sa class Block, block(Index,Index,Index,Index)
*/
template<typename Derived>
template<int N>
inline typename DenseBase<Derived>::template NColsBlockXpr<N>::Type
DenseBase<Derived>::rightCols()
{
return typename DenseBase<Derived>::template NColsBlockXpr<N>::Type(derived(), 0, cols() - N, rows(), N);
}
/** This is the const version of rightCols<int>().*/
template<typename Derived>
template<int N>
inline const typename DenseBase<Derived>::template NColsBlockXpr<N>::Type
DenseBase<Derived>::rightCols() const
{
return typename DenseBase<Derived>::template NColsBlockXpr<N>::Type(derived(), 0, cols() - N, rows(), N);
}
/** \returns a fixed-size expression of a block in *this.
*
* The template parameters \a BlockRows and \a BlockCols are the number of
* rows and columns in the block.
*
* \param startRow the first row in the block
* \param startCol the first column in the block
*
* Example: \include MatrixBase_block_int_int.cpp
* Output: \verbinclude MatrixBase_block_int_int.out
*
* \note since block is a templated member, the keyword template has to be used
* if the matrix type is also a template parameter: \code m.template block<3,3>(1,1); \endcode
*
* \sa class Block, block(Index,Index,Index,Index)
*/
template<typename Derived>
template<int BlockRows, int BlockCols>
inline Block<Derived, BlockRows, BlockCols>
DenseBase<Derived>::block(Index startRow, Index startCol)
{
return Block<Derived, BlockRows, BlockCols>(derived(), startRow, startCol);
}
/** This is the const version of block<>(Index, Index). */
template<typename Derived>
template<int BlockRows, int BlockCols>
inline const Block<Derived, BlockRows, BlockCols>
DenseBase<Derived>::block(Index startRow, Index startCol) const
{
return Block<Derived, BlockRows, BlockCols>(derived(), startRow, startCol);
}
/** \returns an expression of the \a i-th column of *this. Note that the numbering starts at 0.
*
* Example: \include MatrixBase_col.cpp
* Output: \verbinclude MatrixBase_col.out
*
* \sa row(), class Block */
template<typename Derived>
inline typename DenseBase<Derived>::ColXpr
DenseBase<Derived>::col(Index i)
{
return ColXpr(derived(), i);
}
/** This is the const version of col(). */
template<typename Derived>
inline const typename DenseBase<Derived>::ColXpr
DenseBase<Derived>::col(Index i) const
{
return ColXpr(derived(), i);
}
/** \returns an expression of the \a i-th row of *this. Note that the numbering starts at 0.
*
* Example: \include MatrixBase_row.cpp
* Output: \verbinclude MatrixBase_row.out
*
* \sa col(), class Block */
template<typename Derived>
inline typename DenseBase<Derived>::RowXpr
DenseBase<Derived>::row(Index i)
{
return RowXpr(derived(), i);
}
/** This is the const version of row(). */
template<typename Derived>
inline const typename DenseBase<Derived>::RowXpr
DenseBase<Derived>::row(Index i) const
{
return RowXpr(derived(), i);
}
#endif // EIGEN_BLOCK_H

View File

@@ -27,6 +27,7 @@
#define EIGEN_COMMAINITIALIZER_H
/** \class CommaInitializer
* \ingroup Core_Module
*
* \brief Helper class used by the comma initializer operator
*

View File

@@ -27,6 +27,7 @@
#define EIGEN_CWISE_BINARY_OP_H
/** \class CwiseBinaryOp
* \ingroup Core_Module
*
* \brief Generic expression where a coefficient-wise binary operator is applied to two expressions
*
@@ -79,13 +80,14 @@ struct ei_traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
RhsCoeffReadCost = _RhsNested::CoeffReadCost,
LhsFlags = _LhsNested::Flags,
RhsFlags = _RhsNested::Flags,
SameType = ei_is_same_type<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::ret,
StorageOrdersAgree = (int(Lhs::Flags)&RowMajorBit)==(int(Rhs::Flags)&RowMajorBit),
Flags0 = (int(LhsFlags) | int(RhsFlags)) & (
HereditaryBits
| (int(LhsFlags) & int(RhsFlags) &
( AlignedBit
| (StorageOrdersAgree ? LinearAccessBit : 0)
| (ei_functor_traits<BinaryOp>::PacketAccess && StorageOrdersAgree ? PacketAccessBit : 0)
| (ei_functor_traits<BinaryOp>::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0)
)
)
),
@@ -94,6 +96,19 @@ struct ei_traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
};
};
// we require Lhs and Rhs to have the same scalar type. Currently there is no example of a binary functor
// that would take two operands of different types. If there were such an example, then this check should be
// moved to the BinaryOp functors, on a per-case basis. This would however require a change in the BinaryOp functors, as
// currently they take only one typename Scalar template parameter.
// It is tempting to always allow mixing different types but remember that this is often impossible in the vectorized paths.
// So allowing mixing different types gives very unexpected errors when enabling vectorization, when the user tries to
// add together a float matrix and a double matrix.
#define EIGEN_CHECK_BINARY_COMPATIBILIY(BINOP,LHS,RHS) \
EIGEN_STATIC_ASSERT((ei_functor_allows_mixing_real_and_complex<BINOP>::ret \
? int(ei_is_same_type<typename NumTraits<LHS>::Real, typename NumTraits<RHS>::Real>::ret) \
: int(ei_is_same_type<LHS, RHS>::ret)), \
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
template<typename BinaryOp, typename Lhs, typename Rhs, typename StorageKind>
class CwiseBinaryOpImpl;
@@ -120,17 +135,7 @@ class CwiseBinaryOp : ei_no_assignment_operator,
EIGEN_STRONG_INLINE CwiseBinaryOp(const Lhs& lhs, const Rhs& rhs, const BinaryOp& func = BinaryOp())
: m_lhs(lhs), m_rhs(rhs), m_functor(func)
{
// we require Lhs and Rhs to have the same scalar type. Currently there is no example of a binary functor
// that would take two operands of different types. If there were such an example, then this check should be
// moved to the BinaryOp functors, on a per-case basis. This would however require a change in the BinaryOp functors, as
// currently they take only one typename Scalar template parameter.
// It is tempting to always allow mixing different types but remember that this is often impossible in the vectorized paths.
// So allowing mixing different types gives very unexpected errors when enabling vectorization, when the user tries to
// add together a float matrix and a double matrix.
EIGEN_STATIC_ASSERT((ei_functor_allows_mixing_real_and_complex<BinaryOp>::ret
? int(ei_is_same_type<typename Lhs::RealScalar, typename Rhs::RealScalar>::ret)
: int(ei_is_same_type<typename Lhs::Scalar, typename Rhs::Scalar>::ret)),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
EIGEN_CHECK_BINARY_COMPATIBILIY(BinaryOp,typename Lhs::Scalar,typename Rhs::Scalar);
// require the sizes to match
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Lhs, Rhs)
ei_assert(lhs.rows() == rhs.rows() && lhs.cols() == rhs.cols());
@@ -210,8 +215,8 @@ template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived &
MatrixBase<Derived>::operator-=(const MatrixBase<OtherDerived> &other)
{
SelfCwiseBinaryOp<ei_scalar_difference_op<Scalar>, Derived> tmp(derived());
tmp = other;
SelfCwiseBinaryOp<ei_scalar_difference_op<Scalar>, Derived, OtherDerived> tmp(derived());
tmp = other.derived();
return derived();
}
@@ -224,7 +229,7 @@ template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived &
MatrixBase<Derived>::operator+=(const MatrixBase<OtherDerived>& other)
{
SelfCwiseBinaryOp<ei_scalar_sum_op<Scalar>, Derived> tmp(derived());
SelfCwiseBinaryOp<ei_scalar_sum_op<Scalar>, Derived, OtherDerived> tmp(derived());
tmp = other.derived();
return derived();
}

View File

@@ -26,6 +26,7 @@
#define EIGEN_CWISE_NULLARY_OP_H
/** \class CwiseNullaryOp
* \ingroup Core_Module
*
* \brief Generic expression of a matrix where all coefficients are defined by a functor
*
@@ -239,16 +240,29 @@ DenseBase<Derived>::Constant(const Scalar& value)
* Example: \include DenseBase_LinSpaced_seq.cpp
* Output: \verbinclude DenseBase_LinSpaced_seq.out
*
* \sa setLinSpaced(const Scalar&,const Scalar&,Index), LinSpaced(Scalar,Scalar,Index), CwiseNullaryOp
* \sa setLinSpaced(Index,const Scalar&,const Scalar&), LinSpaced(Index,Scalar,Scalar), CwiseNullaryOp
*/
template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::SequentialLinSpacedReturnType
DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& high, Index size)
DenseBase<Derived>::LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return DenseBase<Derived>::NullaryExpr(size, ei_linspaced_op<Scalar,false>(low,high,size));
}
/**
* \copydoc DenseBase::LinSpaced(Sequential_t, Index, const Scalar&, const Scalar&)
* Special version for fixed size types which does not require the size parameter.
*/
template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::SequentialLinSpacedReturnType
DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& high)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, ei_linspaced_op<Scalar,false>(low,high,Derived::SizeAtCompileTime));
}
/**
* \brief Sets a linearly space vector.
*
@@ -259,16 +273,29 @@ DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& hig
* Example: \include DenseBase_LinSpaced.cpp
* Output: \verbinclude DenseBase_LinSpaced.out
*
* \sa setLinSpaced(const Scalar&,const Scalar&,Index), LinSpaced(Sequential_t,const Scalar&,const Scalar&,Index), CwiseNullaryOp
* \sa setLinSpaced(Index,const Scalar&,const Scalar&), LinSpaced(Sequential_t,Index,const Scalar&,const Scalar&,Index), CwiseNullaryOp
*/
template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
DenseBase<Derived>::LinSpaced(const Scalar& low, const Scalar& high, Index size)
DenseBase<Derived>::LinSpaced(Index size, const Scalar& low, const Scalar& high)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return DenseBase<Derived>::NullaryExpr(size, ei_linspaced_op<Scalar,true>(low,high,size));
}
/**
* \copydoc DenseBase::LinSpaced(Index, const Scalar&, const Scalar&)
* Special version for fixed size types which does not require the size parameter.
*/
template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
DenseBase<Derived>::LinSpaced(const Scalar& low, const Scalar& high)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, ei_linspaced_op<Scalar,true>(low,high,Derived::SizeAtCompileTime));
}
/** \returns true if all coefficients in this matrix are approximately equal to \a value, to within precision \a prec */
template<typename Derived>
bool DenseBase<Derived>::isApproxToConstant
@@ -332,6 +359,7 @@ DenseStorageBase<Derived>::setConstant(Index size, const Scalar& value)
*
* \param rows the new number of rows
* \param cols the new number of columns
* \param value the value to which all coefficients are set
*
* Example: \include Matrix_setConstant_int_int.cpp
* Output: \verbinclude Matrix_setConstant_int_int.out
@@ -359,7 +387,7 @@ DenseStorageBase<Derived>::setConstant(Index rows, Index cols, const Scalar& val
* \sa CwiseNullaryOp
*/
template<typename Derived>
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(const Scalar& low, const Scalar& high, Index size)
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index size, const Scalar& low, const Scalar& high)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return derived() = Derived::NullaryExpr(size, ei_linspaced_op<Scalar,false>(low,high,size));

View File

@@ -27,6 +27,7 @@
#define EIGEN_CWISE_UNARY_OP_H
/** \class CwiseUnaryOp
* \ingroup Core_Module
*
* \brief Generic expression where a coefficient-wise unary operator is applied to an expression
*

View File

@@ -26,6 +26,7 @@
#define EIGEN_CWISE_UNARY_VIEW_H
/** \class CwiseUnaryView
* \ingroup Core_Module
*
* \brief Generic lvalue expression of a coefficient-wise unary operator of a matrix or a vector
*
@@ -47,7 +48,7 @@ struct ei_traits<CwiseUnaryView<ViewOp, MatrixType> >
typedef typename MatrixType::Nested MatrixTypeNested;
typedef typename ei_cleantype<MatrixTypeNested>::type _MatrixTypeNested;
enum {
Flags = (ei_traits<_MatrixTypeNested>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit)),
Flags = (ei_traits<_MatrixTypeNested>::Flags & (HereditaryBits | LvalueBit | LinearAccessBit | DirectAccessBit)),
CoeffReadCost = ei_traits<_MatrixTypeNested>::CoeffReadCost + ei_functor_traits<ViewOp>::Cost,
MatrixTypeInnerStride = ei_inner_stride_at_compile_time<MatrixType>::ret,
// need to cast the sizeof's from size_t to int explicitly, otherwise:

View File

@@ -27,6 +27,7 @@
#define EIGEN_DENSEBASE_H
/** \class DenseBase
* \ingroup Core_Module
*
* \brief Base class for all dense matrices, vectors, and arrays
*
@@ -34,6 +35,8 @@
* and related expression types). The common Eigen API for dense objects is contained in this class.
*
* \param Derived is the derived type, e.g., a matrix type or an expression.
*
* \sa \ref TopicClassHierarchy
*/
template<typename Derived> class DenseBase
#ifndef EIGEN_PARSED_BY_DOXYGEN
@@ -44,14 +47,13 @@ template<typename Derived> class DenseBase
#endif // not EIGEN_PARSED_BY_DOXYGEN
{
public:
#ifndef EIGEN_PARSED_BY_DOXYGEN
using ei_special_scalar_op_base<Derived,typename ei_traits<Derived>::Scalar,
typename NumTraits<typename ei_traits<Derived>::Scalar>::Real>::operator*;
class InnerIterator;
typedef typename ei_traits<Derived>::StorageKind StorageKind;
typedef typename ei_traits<Derived>::Index Index;
typedef typename ei_traits<Derived>::Index Index; /**< The type of indices */
typedef typename ei_traits<Derived>::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
typedef typename NumTraits<Scalar>::Real RealScalar;
@@ -87,9 +89,7 @@ template<typename Derived> class DenseBase
using Base::outerStride;
using Base::rowStride;
using Base::colStride;
using typename Base::CoeffReturnType;
#endif // not EIGEN_PARSED_BY_DOXYGEN
typedef typename Base::CoeffReturnType CoeffReturnType;
enum {
@@ -234,19 +234,6 @@ template<typename Derived> class DenseBase
typedef CwiseNullaryOp<ei_linspaced_op<Scalar,true>,Derived> RandomAccessLinSpacedReturnType;
/** \internal the return type of MatrixBase::eigenvalues() */
typedef Matrix<typename NumTraits<typename ei_traits<Derived>::Scalar>::Real, ei_traits<Derived>::ColsAtCompileTime, 1> EigenvaluesReturnType;
/** \internal expression type of a column */
typedef Block<Derived, ei_traits<Derived>::RowsAtCompileTime, 1> ColXpr;
/** \internal expression type of a row */
typedef Block<Derived, 1, ei_traits<Derived>::ColsAtCompileTime> RowXpr;
/** \internal expression type of a block of whole columns */
typedef Block<Derived, ei_traits<Derived>::RowsAtCompileTime, Dynamic> ColsBlockXpr;
/** \internal expression type of a block of whole rows */
typedef Block<Derived, Dynamic, ei_traits<Derived>::ColsAtCompileTime> RowsBlockXpr;
/** \internal expression type of a block of whole columns */
template<int N> struct NColsBlockXpr { typedef Block<Derived, ei_traits<Derived>::RowsAtCompileTime, N> Type; };
/** \internal expression type of a block of whole rows */
template<int N> struct NRowsBlockXpr { typedef Block<Derived, N, ei_traits<Derived>::ColsAtCompileTime> Type; };
#endif // not EIGEN_PARSED_BY_DOXYGEN
@@ -295,15 +282,6 @@ template<typename Derived> class DenseBase
public:
#endif
RowXpr row(Index i);
const RowXpr row(Index i) const;
ColXpr col(Index i);
const ColXpr col(Index i) const;
Block<Derived> block(Index startRow, Index startCol, Index blockRows, Index blockCols);
const Block<Derived> block(Index startRow, Index startCol, Index blockRows, Index blockCols) const;
VectorBlock<Derived> segment(Index start, Index size);
const VectorBlock<Derived> segment(Index start, Index size) const;
@@ -313,47 +291,6 @@ template<typename Derived> class DenseBase
VectorBlock<Derived> tail(Index size);
const VectorBlock<Derived> tail(Index size) const;
Block<Derived> topLeftCorner(Index cRows, Index cCols);
const Block<Derived> topLeftCorner(Index cRows, Index cCols) const;
Block<Derived> topRightCorner(Index cRows, Index cCols);
const Block<Derived> topRightCorner(Index cRows, Index cCols) const;
Block<Derived> bottomLeftCorner(Index cRows, Index cCols);
const Block<Derived> bottomLeftCorner(Index cRows, Index cCols) const;
Block<Derived> bottomRightCorner(Index cRows, Index cCols);
const Block<Derived> bottomRightCorner(Index cRows, Index cCols) const;
RowsBlockXpr topRows(Index n);
const RowsBlockXpr topRows(Index n) const;
RowsBlockXpr bottomRows(Index n);
const RowsBlockXpr bottomRows(Index n) const;
ColsBlockXpr leftCols(Index n);
const ColsBlockXpr leftCols(Index n) const;
ColsBlockXpr rightCols(Index n);
const ColsBlockXpr rightCols(Index n) const;
template<int CRows, int CCols> Block<Derived, CRows, CCols> topLeftCorner();
template<int CRows, int CCols> const Block<Derived, CRows, CCols> topLeftCorner() const;
template<int CRows, int CCols> Block<Derived, CRows, CCols> topRightCorner();
template<int CRows, int CCols> const Block<Derived, CRows, CCols> topRightCorner() const;
template<int CRows, int CCols> Block<Derived, CRows, CCols> bottomLeftCorner();
template<int CRows, int CCols> const Block<Derived, CRows, CCols> bottomLeftCorner() const;
template<int CRows, int CCols> Block<Derived, CRows, CCols> bottomRightCorner();
template<int CRows, int CCols> const Block<Derived, CRows, CCols> bottomRightCorner() const;
template<int NRows> typename NRowsBlockXpr<NRows>::Type topRows();
template<int NRows> const typename NRowsBlockXpr<NRows>::Type topRows() const;
template<int NRows> typename NRowsBlockXpr<NRows>::Type bottomRows();
template<int NRows> const typename NRowsBlockXpr<NRows>::Type bottomRows() const;
template<int NCols> typename NColsBlockXpr<NCols>::Type leftCols();
template<int NCols> const typename NColsBlockXpr<NCols>::Type leftCols() const;
template<int NCols> typename NColsBlockXpr<NCols>::Type rightCols();
template<int NCols> const typename NColsBlockXpr<NCols>::Type rightCols() const;
template<int BlockRows, int BlockCols>
Block<Derived, BlockRows, BlockCols> block(Index startRow, Index startCol);
template<int BlockRows, int BlockCols>
const Block<Derived, BlockRows, BlockCols> block(Index startRow, Index startCol) const;
template<int Size> VectorBlock<Derived,Size> head(void);
template<int Size> const VectorBlock<Derived,Size> head() const;
@@ -389,9 +326,13 @@ template<typename Derived> class DenseBase
Constant(const Scalar& value);
static const SequentialLinSpacedReturnType
LinSpaced(Sequential_t, const Scalar& low, const Scalar& high, Index size);
LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high);
static const RandomAccessLinSpacedReturnType
LinSpaced(const Scalar& low, const Scalar& high, Index size);
LinSpaced(Index size, const Scalar& low, const Scalar& high);
static const SequentialLinSpacedReturnType
LinSpaced(Sequential_t, const Scalar& low, const Scalar& high);
static const RandomAccessLinSpacedReturnType
LinSpaced(const Scalar& low, const Scalar& high);
template<typename CustomNullaryOp>
static const CwiseNullaryOp<CustomNullaryOp, Derived>
@@ -412,7 +353,8 @@ template<typename Derived> class DenseBase
void fill(const Scalar& value);
Derived& setConstant(const Scalar& value);
Derived& setLinSpaced(const Scalar& low, const Scalar& high, Index size);
Derived& setLinSpaced(Index size, const Scalar& low, const Scalar& high);
Derived& setLinSpaced(const Scalar& low, const Scalar& high);
Derived& setZero();
Derived& setOnes();
Derived& setRandom();
@@ -518,6 +460,13 @@ template<typename Derived> class DenseBase
const Eigen::Reverse<Derived, BothDirections> reverse() const;
void reverseInPlace();
#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::DenseBase
# include "../plugins/BlockMethods.h"
# ifdef EIGEN_DENSEBASE_PLUGIN
# include EIGEN_DENSEBASE_PLUGIN
# endif
#undef EIGEN_CURRENT_STORAGE_BASE_CLASS
#ifdef EIGEN2_SUPPORT
Block<Derived> corner(CornerType type, Index cRows, Index cCols);
@@ -529,9 +478,6 @@ template<typename Derived> class DenseBase
#endif // EIGEN2_SUPPORT
#ifdef EIGEN_DENSEBASE_PLUGIN
#include EIGEN_DENSEBASE_PLUGIN
#endif
// disable the use of evalTo for dense objects with a nice compilation error
template<typename Dest> inline void evalTo(Dest& ) const

View File

@@ -25,15 +25,26 @@
#ifndef EIGEN_DENSECOEFFSBASE_H
#define EIGEN_DENSECOEFFSBASE_H
template<typename Derived, bool EnableDirectAccessAPI>
class DenseCoeffsBase : public EigenBase<Derived>
/** \brief Base class providing read-only coefficient access to matrices and arrays.
* \ingroup Core_Module
* \tparam Derived Type of the derived class
* \tparam ReadOnlyAccessors Constant indicating read-only access
*
* This class defines the \c operator() \c const function and friends, which can be used to read specific
* entries of a matrix or array.
*
* \sa DenseCoeffsBase<Derived, WriteAccessors>, DenseCoeffsBase<Derived, DirectAccessors>,
* \ref TopicClassHierarchy
*/
template<typename Derived>
class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
{
public:
typedef typename ei_traits<Derived>::StorageKind StorageKind;
typedef typename ei_traits<Derived>::Index Index;
typedef typename ei_traits<Derived>::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
typedef typename ei_meta_if<ei_has_direct_access<Derived>::ret,
typedef typename ei_meta_if<bool(ei_traits<Derived>::Flags&LvalueBit),
const Scalar&,
typename ei_meta_if<ei_is_arithmetic<Scalar>::ret, Scalar, const Scalar>::ret
>::ret CoeffReturnType;
@@ -238,12 +249,23 @@ class DenseCoeffsBase : public EigenBase<Derived>
void colStride();
};
/** \brief Base class providing read/write coefficient access to matrices and arrays.
* \ingroup Core_Module
* \tparam Derived Type of the derived class
* \tparam WriteAccessors Constant indicating read/write access
*
* This class defines the non-const \c operator() function and friends, which can be used to write specific
* entries of a matrix or array. This class inherits DenseCoeffsBase<Derived, ReadOnlyAccessors> which
* defines the const variant for reading specific entries.
*
* \sa DenseCoeffsBase<Derived, DirectAccessors>, \ref TopicClassHierarchy
*/
template<typename Derived>
class DenseCoeffsBase<Derived, true> : public DenseCoeffsBase<Derived, false>
class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived, ReadOnlyAccessors>
{
public:
typedef DenseCoeffsBase<Derived, false> Base;
typedef DenseCoeffsBase<Derived, ReadOnlyAccessors> Base;
typedef typename ei_traits<Derived>::StorageKind StorageKind;
typedef typename ei_traits<Derived>::Index Index;
@@ -512,6 +534,34 @@ class DenseCoeffsBase<Derived, true> : public DenseCoeffsBase<Derived, false>
}
#endif
};
/** \brief Base class providing direct coefficient access to matrices and arrays.
* \ingroup Core_Module
* \tparam Derived Type of the derived class
* \tparam DirectAccessors Constant indicating direct access
*
* This class defines functions to work with strides which can be used to access entries directly. This class
* inherits DenseCoeffsBase<Derived, WriteAccessors> which defines functions to access entries using
* \c operator() .
*
* \sa \ref TopicClassHierarchy
*/
template<typename Derived>
class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived, WriteAccessors>
{
public:
typedef DenseCoeffsBase<Derived, WriteAccessors> Base;
typedef typename ei_traits<Derived>::Index Index;
typedef typename ei_traits<Derived>::Scalar Scalar;
typedef typename NumTraits<Scalar>::Real RealScalar;
using Base::rows;
using Base::cols;
using Base::size;
using Base::derived;
/** \returns the pointer increment between two consecutive elements within a slice in the inner direction.
*
* \sa outerStride(), rowStride(), colStride()
@@ -531,6 +581,7 @@ class DenseCoeffsBase<Derived, true> : public DenseCoeffsBase<Derived, false>
return derived().outerStride();
}
// FIXME shall we remove it ?
inline Index stride() const
{
return Derived::IsVectorAtCompileTime ? innerStride() : outerStride();

View File

@@ -36,8 +36,9 @@ template <typename Derived, typename OtherDerived = Derived, bool IsVector = sta
template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers> struct ei_matrix_swap_impl;
/**
* \brief Dense storage base class for matrices and arrays.
**/
* \brief %Dense storage base class for matrices and arrays.
* \sa \ref TopicClassHierarchy
*/
template<typename Derived>
class DenseStorageBase : public ei_dense_xpr_base<Derived>::type
{
@@ -108,7 +109,7 @@ class DenseStorageBase : public ei_dense_xpr_base<Derived>::type
template<int LoadMode>
EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const
{
return ei_ploadt<Scalar, LoadMode>
return ei_ploadt<PacketScalar, LoadMode>
(m_storage.data() + (Flags & RowMajorBit
? col + row * m_storage.cols()
: row + col * m_storage.rows()));
@@ -117,7 +118,7 @@ class DenseStorageBase : public ei_dense_xpr_base<Derived>::type
template<int LoadMode>
EIGEN_STRONG_INLINE PacketScalar packet(Index index) const
{
return ei_ploadt<Scalar, LoadMode>(m_storage.data() + index);
return ei_ploadt<PacketScalar, LoadMode>(m_storage.data() + index);
}
template<int StoreMode>
@@ -432,8 +433,9 @@ class DenseStorageBase : public ei_dense_xpr_base<Derived>::type
ei_assert((this->size()==0 || (IsVectorAtCompileTime ? (this->size() == other.size())
: (rows() == other.rows() && cols() == other.cols())))
&& "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined");
#endif
#else
resizeLike(other);
#endif
}
/**
@@ -482,8 +484,8 @@ class DenseStorageBase : public ei_dense_xpr_base<Derived>::type
template<typename T0, typename T1>
EIGEN_STRONG_INLINE void _init2(Index rows, Index cols, typename ei_enable_if<Base::SizeAtCompileTime!=2,T0>::type* = 0)
{
ei_assert(rows > 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
&& cols > 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols));
ei_assert(rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
&& cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols));
m_storage.resize(rows*cols,rows,cols);
EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
}

View File

@@ -26,6 +26,7 @@
#define EIGEN_DIAGONAL_H
/** \class Diagonal
* \ingroup Core_Module
*
* \brief Expression of a diagonal/subdiagonal/superdiagonal in a matrix
*
@@ -61,7 +62,7 @@ struct ei_traits<Diagonal<MatrixType,DiagIndex> >
MatrixType::MaxColsAtCompileTime)
: (EIGEN_SIZE_MIN_PREFER_FIXED(MatrixType::MaxRowsAtCompileTime, MatrixType::MaxColsAtCompileTime) - AbsDiagIndex),
MaxColsAtCompileTime = 1,
Flags = (unsigned int)_MatrixTypeNested::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit) & ~RowMajorBit,
Flags = (unsigned int)_MatrixTypeNested::Flags & (HereditaryBits | LinearAccessBit | LvalueBit | DirectAccessBit) & ~RowMajorBit,
CoeffReadCost = _MatrixTypeNested::CoeffReadCost,
MatrixTypeOuterStride = ei_outer_stride_at_compile_time<MatrixType>::ret,
InnerStrideAtCompileTime = MatrixTypeOuterStride == Dynamic ? Dynamic : MatrixTypeOuterStride+1,
@@ -125,6 +126,9 @@ template<typename MatrixType, int DiagIndex> class Diagonal
EIGEN_STRONG_INLINE Index absDiagIndex() const { return m_index.value()>0 ? m_index.value() : -m_index.value(); }
EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value()>0 ? 0 : -m_index.value(); }
EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value()>0 ? m_index.value() : 0; }
// triger a compile time error is someone try to call packet
template<int LoadMode> typename MatrixType::PacketReturnType packet(Index) const;
template<int LoadMode> typename MatrixType::PacketReturnType packet(Index,Index) const;
};
/** \returns an expression of the main diagonal of the matrix \c *this

View File

@@ -87,6 +87,7 @@ void DiagonalBase<Derived>::evalTo(MatrixBase<DenseDerived> &other) const
#endif
/** \class DiagonalMatrix
* \ingroup Core_Module
*
* \brief Represents a diagonal matrix with its storage
*
@@ -104,6 +105,9 @@ struct ei_traits<DiagonalMatrix<_Scalar,SizeAtCompileTime,MaxSizeAtCompileTime>
typedef Matrix<_Scalar,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1> DiagonalVectorType;
typedef Dense StorageKind;
typedef DenseIndex Index;
enum {
Flags = LvalueBit
};
};
template<typename _Scalar, int SizeAtCompileTime, int MaxSizeAtCompileTime>
@@ -170,7 +174,7 @@ class DiagonalMatrix
*/
DiagonalMatrix& operator=(const DiagonalMatrix& other)
{
m_diagonal = other.m_diagonal();
m_diagonal = other.diagonal();
return *this;
}
#endif
@@ -188,6 +192,7 @@ class DiagonalMatrix
};
/** \class DiagonalWrapper
* \ingroup Core_Module
*
* \brief Expression of a diagonal matrix
*
@@ -211,7 +216,7 @@ struct ei_traits<DiagonalWrapper<_DiagonalVectorType> >
ColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
MaxRowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
MaxColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
Flags = 0
Flags = ei_traits<DiagonalVectorType>::Flags & LvalueBit
};
};

View File

@@ -36,8 +36,16 @@ struct ei_traits<DiagonalProduct<MatrixType, DiagonalType, ProductOrder> >
ColsAtCompileTime = MatrixType::ColsAtCompileTime,
MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
Flags = (HereditaryBits & (unsigned int)(MatrixType::Flags))
| (PacketAccessBit & (unsigned int)(MatrixType::Flags) & (unsigned int)(DiagonalType::DiagonalVectorType::Flags)),
_StorageOrder = MatrixType::Flags & RowMajorBit ? RowMajor : ColMajor,
_PacketOnDiag = !((int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheLeft)
||(int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheRight)),
_SameTypes = ei_is_same_type<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ret,
// FIXME currently we need same types, but in the future the next rule should be the one
//_Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagonalType::Flags)&PacketAccessBit))),
_Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && _SameTypes && ((!_PacketOnDiag) || (bool(int(DiagonalType::Flags)&PacketAccessBit))),
Flags = (HereditaryBits & (unsigned int)(MatrixType::Flags)) | (_Vectorizable ? PacketAccessBit : 0),
CoeffReadCost = NumTraits<Scalar>::MulCost + MatrixType::CoeffReadCost + DiagonalType::DiagonalVectorType::CoeffReadCost
};
};
@@ -69,26 +77,34 @@ class DiagonalProduct : ei_no_assignment_operator,
EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const
{
enum {
StorageOrder = Flags & RowMajorBit ? RowMajor : ColMajor,
InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime,
DiagonalVectorPacketLoadMode = (LoadMode == Aligned && ((InnerSize%16) == 0)) ? Aligned : Unaligned
StorageOrder = Flags & RowMajorBit ? RowMajor : ColMajor
};
const Index indexInDiagonalVector = ProductOrder == OnTheLeft ? row : col;
if((int(StorageOrder) == RowMajor && int(ProductOrder) == OnTheLeft)
||(int(StorageOrder) == ColMajor && int(ProductOrder) == OnTheRight))
{
return ei_pmul(m_matrix.template packet<LoadMode>(row, col),
ei_pset1(m_diagonal.diagonal().coeff(indexInDiagonalVector)));
}
else
{
return ei_pmul(m_matrix.template packet<LoadMode>(row, col),
m_diagonal.diagonal().template packet<DiagonalVectorPacketLoadMode>(indexInDiagonalVector));
}
return packet_impl<LoadMode>(row,col,indexInDiagonalVector,typename ei_meta_if<
((int(StorageOrder) == RowMajor && int(ProductOrder) == OnTheLeft)
||(int(StorageOrder) == ColMajor && int(ProductOrder) == OnTheRight)), ei_meta_true, ei_meta_false>::ret());
}
protected:
template<int LoadMode>
EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, ei_meta_true) const
{
return ei_pmul(m_matrix.template packet<LoadMode>(row, col),
ei_pset1<PacketScalar>(m_diagonal.diagonal().coeff(id)));
}
template<int LoadMode>
EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, ei_meta_false) const
{
enum {
InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime,
DiagonalVectorPacketLoadMode = (LoadMode == Aligned && ((InnerSize%16) == 0)) ? Aligned : Unaligned
};
return ei_pmul(m_matrix.template packet<LoadMode>(row, col),
m_diagonal.diagonal().template packet<DiagonalVectorPacketLoadMode>(id));
}
const typename MatrixType::Nested m_matrix;
const typename DiagonalType::Nested m_diagonal;
};

View File

@@ -41,7 +41,7 @@ struct ei_dot_nocheck
{
static inline typename ei_traits<T>::Scalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
{
return a.conjugate().cwiseProduct(b).sum();
return a.template binaryExpr<ei_scalar_conj_product_op<typename ei_traits<T>::Scalar> >(b).sum();
}
};
@@ -50,7 +50,7 @@ struct ei_dot_nocheck<T, U, true>
{
static inline typename ei_traits<T>::Scalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
{
return a.adjoint().cwiseProduct(b).sum();
return a.transpose().template binaryExpr<ei_scalar_conj_product_op<typename ei_traits<T>::Scalar> >(b).sum();
}
};

View File

@@ -34,6 +34,8 @@
* Besides MatrixBase-derived classes, this also includes special matrix classes such as diagonal matrices, etc.
*
* Notice that this class is trivial, it is only used to disambiguate overloaded functions.
*
* \sa \ref TopicClassHierarchy
*/
template<typename Derived> struct EigenBase
{

View File

@@ -26,6 +26,7 @@
#define EIGEN_FLAGGED_H
/** \class Flagged
* \ingroup Core_Module
*
* \brief Expression with modified flags
*

View File

@@ -26,6 +26,7 @@
#define EIGEN_FORCEALIGNEDACCESS_H
/** \class ForceAlignedAccess
* \ingroup Core_Module
*
* \brief Enforce aligned packet loads and stores regardless of what is requested
*

View File

@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
@@ -35,18 +35,18 @@
template<typename Scalar> struct ei_scalar_sum_op {
EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_sum_op)
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a + b; }
template<typename PacketScalar>
EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a, const PacketScalar& b) const
template<typename Packet>
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
{ return ei_padd(a,b); }
template<typename PacketScalar>
EIGEN_STRONG_INLINE const Scalar predux(const PacketScalar& a) const
template<typename Packet>
EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const
{ return ei_predux(a); }
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_sum_op<Scalar> > {
enum {
Cost = NumTraits<Scalar>::AddCost,
PacketAccess = ei_packet_traits<Scalar>::size>1
PacketAccess = ei_packet_traits<Scalar>::HasAdd
};
};
@@ -55,21 +55,47 @@ struct ei_functor_traits<ei_scalar_sum_op<Scalar> > {
*
* \sa class CwiseBinaryOp, Cwise::operator*(), class VectorwiseOp, MatrixBase::redux()
*/
template<typename Scalar> struct ei_scalar_product_op {
template<typename LhsScalar,typename RhsScalar> struct ei_scalar_product_op {
enum {
Vectorizable = ei_is_same_type<LhsScalar,RhsScalar>::ret && ei_packet_traits<LhsScalar>::HasMul && ei_packet_traits<RhsScalar>::HasMul
};
typedef typename ei_scalar_product_traits<LhsScalar,RhsScalar>::ReturnType result_type;
EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_product_op)
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a * b; }
template<typename PacketScalar>
EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a, const PacketScalar& b) const
EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a * b; }
template<typename Packet>
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
{ return ei_pmul(a,b); }
template<typename PacketScalar>
EIGEN_STRONG_INLINE const Scalar predux(const PacketScalar& a) const
template<typename Packet>
EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const
{ return ei_predux_mul(a); }
};
template<typename LhsScalar,typename RhsScalar>
struct ei_functor_traits<ei_scalar_product_op<LhsScalar,RhsScalar> > {
enum {
Cost = (NumTraits<LhsScalar>::MulCost + NumTraits<RhsScalar>::MulCost)/2, // rough estimate!
PacketAccess = ei_scalar_product_op<LhsScalar,RhsScalar>::Vectorizable
};
};
/** \internal
* \brief Template functor to compute the conjugate product of two scalars
*
* This is a short cut for ei_conj(x) * y which is needed for optimization purpose
*/
template<typename Scalar> struct ei_scalar_conj_product_op {
enum { Conj = NumTraits<Scalar>::IsComplex };
EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_conj_product_op)
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const
{ return ei_conj_helper<Scalar,Scalar,Conj,false>().pmul(a,b); }
template<typename Packet>
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
{ return ei_conj_helper<Packet,Packet,Conj,false>().pmul(a,b); }
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_product_op<Scalar> > {
struct ei_functor_traits<ei_scalar_conj_product_op<Scalar> > {
enum {
Cost = NumTraits<Scalar>::MulCost,
PacketAccess = ei_packet_traits<Scalar>::size>1
PacketAccess = ei_packet_traits<Scalar>::HasMul
};
};
@@ -81,18 +107,18 @@ struct ei_functor_traits<ei_scalar_product_op<Scalar> > {
template<typename Scalar> struct ei_scalar_min_op {
EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_min_op)
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return std::min(a, b); }
template<typename PacketScalar>
EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a, const PacketScalar& b) const
template<typename Packet>
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
{ return ei_pmin(a,b); }
template<typename PacketScalar>
EIGEN_STRONG_INLINE const Scalar predux(const PacketScalar& a) const
template<typename Packet>
EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const
{ return ei_predux_min(a); }
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_min_op<Scalar> > {
enum {
Cost = NumTraits<Scalar>::AddCost,
PacketAccess = ei_packet_traits<Scalar>::size>1
PacketAccess = ei_packet_traits<Scalar>::HasMin
};
};
@@ -104,18 +130,18 @@ struct ei_functor_traits<ei_scalar_min_op<Scalar> > {
template<typename Scalar> struct ei_scalar_max_op {
EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_max_op)
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return std::max(a, b); }
template<typename PacketScalar>
EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a, const PacketScalar& b) const
template<typename Packet>
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
{ return ei_pmax(a,b); }
template<typename PacketScalar>
EIGEN_STRONG_INLINE const Scalar predux(const PacketScalar& a) const
template<typename Packet>
EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const
{ return ei_predux_max(a); }
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_max_op<Scalar> > {
enum {
Cost = NumTraits<Scalar>::AddCost,
PacketAccess = ei_packet_traits<Scalar>::size>1
PacketAccess = ei_packet_traits<Scalar>::HasMax
};
};
@@ -150,15 +176,15 @@ struct ei_functor_traits<ei_scalar_hypot_op<Scalar> > {
template<typename Scalar> struct ei_scalar_difference_op {
EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_difference_op)
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a - b; }
template<typename PacketScalar>
EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a, const PacketScalar& b) const
template<typename Packet>
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
{ return ei_psub(a,b); }
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_difference_op<Scalar> > {
enum {
Cost = NumTraits<Scalar>::AddCost,
PacketAccess = ei_packet_traits<Scalar>::size>1
PacketAccess = ei_packet_traits<Scalar>::HasSub
};
};
@@ -170,18 +196,15 @@ struct ei_functor_traits<ei_scalar_difference_op<Scalar> > {
template<typename Scalar> struct ei_scalar_quotient_op {
EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_quotient_op)
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a / b; }
template<typename PacketScalar>
EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a, const PacketScalar& b) const
template<typename Packet>
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
{ return ei_pdiv(a,b); }
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_quotient_op<Scalar> > {
enum {
Cost = 2 * NumTraits<Scalar>::MulCost,
PacketAccess = ei_packet_traits<Scalar>::size>1
#if (defined EIGEN_VECTORIZE)
&& !NumTraits<Scalar>::IsInteger
#endif
PacketAccess = ei_packet_traits<Scalar>::HasDiv
};
};
@@ -195,15 +218,15 @@ struct ei_functor_traits<ei_scalar_quotient_op<Scalar> > {
template<typename Scalar> struct ei_scalar_opposite_op {
EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_opposite_op)
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return -a; }
template<typename PacketScalar>
EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a) const
template<typename Packet>
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
{ return ei_pnegate(a); }
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_opposite_op<Scalar> >
{ enum {
Cost = NumTraits<Scalar>::AddCost,
PacketAccess = int(ei_packet_traits<Scalar>::size)>1 };
PacketAccess = ei_packet_traits<Scalar>::HasNegate };
};
/** \internal
@@ -215,8 +238,8 @@ template<typename Scalar> struct ei_scalar_abs_op {
EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_abs_op)
typedef typename NumTraits<Scalar>::Real result_type;
EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return ei_abs(a); }
template<typename PacketScalar>
EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a) const
template<typename Packet>
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
{ return ei_pabs(a); }
};
template<typename Scalar>
@@ -224,7 +247,7 @@ struct ei_functor_traits<ei_scalar_abs_op<Scalar> >
{
enum {
Cost = NumTraits<Scalar>::AddCost,
PacketAccess = int(ei_packet_traits<Scalar>::size)>1
PacketAccess = ei_packet_traits<Scalar>::HasAbs
};
};
@@ -237,13 +260,13 @@ template<typename Scalar> struct ei_scalar_abs2_op {
EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_abs2_op)
typedef typename NumTraits<Scalar>::Real result_type;
EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return ei_abs2(a); }
template<typename PacketScalar>
EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a) const
template<typename Packet>
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
{ return ei_pmul(a,a); }
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_abs2_op<Scalar> >
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = int(ei_packet_traits<Scalar>::size)>1 }; };
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::HasAbs2 }; };
/** \internal
* \brief Template functor to compute the conjugate of a complex value
@@ -253,15 +276,15 @@ struct ei_functor_traits<ei_scalar_abs2_op<Scalar> >
template<typename Scalar> struct ei_scalar_conjugate_op {
EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_conjugate_op)
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return ei_conj(a); }
template<typename PacketScalar>
EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a) const { return a; }
template<typename Packet>
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return ei_pconj(a); }
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_conjugate_op<Scalar> >
{
enum {
Cost = NumTraits<Scalar>::IsComplex ? NumTraits<Scalar>::AddCost : 0,
PacketAccess = int(ei_packet_traits<Scalar>::size)>1
PacketAccess = ei_packet_traits<Scalar>::HasConj
};
};
@@ -378,27 +401,27 @@ struct ei_functor_traits<ei_scalar_log_op<Scalar> >
* \sa class CwiseUnaryOp, MatrixBase::operator*, MatrixBase::operator/
*/
/* NOTE why doing the ei_pset1() in packetOp *is* an optimization ?
* indeed it seems better to declare m_other as a PacketScalar and do the ei_pset1() once
* indeed it seems better to declare m_other as a Packet and do the ei_pset1() once
* in the constructor. However, in practice:
* - GCC does not like m_other as a PacketScalar and generate a load every time it needs it
* - GCC does not like m_other as a Packet and generate a load every time it needs it
* - on the other hand GCC is able to moves the ei_pset1() away the loop :)
* - simpler code ;)
* (ICC and gcc 4.4 seems to perform well in both cases, the issue is visible with y = a*x + b*y)
*/
template<typename Scalar>
struct ei_scalar_multiple_op {
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
typedef typename ei_packet_traits<Scalar>::type Packet;
// FIXME default copy constructors seems bugged with std::complex<>
EIGEN_STRONG_INLINE ei_scalar_multiple_op(const ei_scalar_multiple_op& other) : m_other(other.m_other) { }
EIGEN_STRONG_INLINE ei_scalar_multiple_op(const Scalar& other) : m_other(other) { }
EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a * m_other; }
EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a) const
{ return ei_pmul(a, ei_pset1(m_other)); }
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
{ return ei_pmul(a, ei_pset1<Packet>(m_other)); }
typename ei_makeconst<typename NumTraits<Scalar>::Nested>::type m_other;
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_multiple_op<Scalar> >
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::size>1 }; };
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::HasMul }; };
template<typename Scalar1, typename Scalar2>
struct ei_scalar_multiple2_op {
@@ -414,18 +437,18 @@ struct ei_functor_traits<ei_scalar_multiple2_op<Scalar1,Scalar2> >
template<typename Scalar, bool IsInteger>
struct ei_scalar_quotient1_impl {
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
typedef typename ei_packet_traits<Scalar>::type Packet;
// FIXME default copy constructors seems bugged with std::complex<>
EIGEN_STRONG_INLINE ei_scalar_quotient1_impl(const ei_scalar_quotient1_impl& other) : m_other(other.m_other) { }
EIGEN_STRONG_INLINE ei_scalar_quotient1_impl(const Scalar& other) : m_other(static_cast<Scalar>(1) / other) {}
EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a * m_other; }
EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a) const
{ return ei_pmul(a, ei_pset1(m_other)); }
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
{ return ei_pmul(a, ei_pset1<Packet>(m_other)); }
const Scalar m_other;
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_quotient1_impl<Scalar,false> >
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::size>1 }; };
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::HasMul }; };
template<typename Scalar>
struct ei_scalar_quotient1_impl<Scalar,true> {
@@ -461,18 +484,19 @@ struct ei_functor_traits<ei_scalar_quotient1_op<Scalar> >
template<typename Scalar>
struct ei_scalar_constant_op {
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
typedef typename ei_packet_traits<Scalar>::type Packet;
EIGEN_STRONG_INLINE ei_scalar_constant_op(const ei_scalar_constant_op& other) : m_other(other.m_other) { }
EIGEN_STRONG_INLINE ei_scalar_constant_op(const Scalar& other) : m_other(other) { }
template<typename Index>
EIGEN_STRONG_INLINE const Scalar operator() (Index, Index = 0) const { return m_other; }
template<typename Index>
EIGEN_STRONG_INLINE const PacketScalar packetOp(Index, Index = 0) const { return ei_pset1(m_other); }
EIGEN_STRONG_INLINE const Packet packetOp(Index, Index = 0) const { return ei_pset1<Packet>(m_other); }
const Scalar m_other;
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_constant_op<Scalar> >
{ enum { Cost = 1, PacketAccess = ei_packet_traits<Scalar>::size>1, IsRepeatable = true }; };
// FIXME replace this packet test by a safe one
{ enum { Cost = 1, PacketAccess = ei_packet_traits<Scalar>::Vectorizable, IsRepeatable = true }; };
template<typename Scalar> struct ei_scalar_identity_op {
EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_identity_op)
@@ -493,22 +517,22 @@ template <typename Scalar, bool RandomAccess> struct ei_linspaced_op_impl;
template <typename Scalar>
struct ei_linspaced_op_impl<Scalar,false>
{
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
typedef typename ei_packet_traits<Scalar>::type Packet;
ei_linspaced_op_impl(Scalar low, Scalar step) :
m_low(low), m_step(step),
m_packetStep(ei_pset1(ei_packet_traits<Scalar>::size*step)),
m_base(ei_padd(ei_pset1(low),ei_pmul(ei_pset1(step),ei_plset<Scalar>(-ei_packet_traits<Scalar>::size)))) {}
m_packetStep(ei_pset1<Packet>(ei_packet_traits<Scalar>::size*step)),
m_base(ei_padd(ei_pset1<Packet>(low),ei_pmul(ei_pset1<Packet>(step),ei_plset<Scalar>(-ei_packet_traits<Scalar>::size)))) {}
template<typename Index>
EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return m_low+i*m_step; }
template<typename Index>
EIGEN_STRONG_INLINE const PacketScalar packetOp(Index) const { return m_base = ei_padd(m_base,m_packetStep); }
EIGEN_STRONG_INLINE const Packet packetOp(Index) const { return m_base = ei_padd(m_base,m_packetStep); }
const Scalar m_low;
const Scalar m_step;
const PacketScalar m_packetStep;
mutable PacketScalar m_base;
const Packet m_packetStep;
mutable Packet m_base;
};
// random access for packet ops:
@@ -517,23 +541,23 @@ struct ei_linspaced_op_impl<Scalar,false>
template <typename Scalar>
struct ei_linspaced_op_impl<Scalar,true>
{
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
typedef typename ei_packet_traits<Scalar>::type Packet;
ei_linspaced_op_impl(Scalar low, Scalar step) :
m_low(low), m_step(step),
m_lowPacket(ei_pset1(m_low)), m_stepPacket(ei_pset1(m_step)), m_interPacket(ei_plset<Scalar>(0)) {}
m_lowPacket(ei_pset1<Packet>(m_low)), m_stepPacket(ei_pset1<Packet>(m_step)), m_interPacket(ei_plset<Scalar>(0)) {}
template<typename Index>
EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return m_low+i*m_step; }
template<typename Index>
EIGEN_STRONG_INLINE const PacketScalar packetOp(Index i) const
{ return ei_padd(m_lowPacket, ei_pmul(m_stepPacket, ei_padd(ei_pset1<Scalar>(i),m_interPacket))); }
EIGEN_STRONG_INLINE const Packet packetOp(Index i) const
{ return ei_padd(m_lowPacket, ei_pmul(m_stepPacket, ei_padd(ei_pset1<Packet>(i),m_interPacket))); }
const Scalar m_low;
const Scalar m_step;
const PacketScalar m_lowPacket;
const PacketScalar m_stepPacket;
const PacketScalar m_interPacket;
const Packet m_lowPacket;
const Packet m_stepPacket;
const Packet m_interPacket;
};
// ----- Linspace functor ----------------------------------------------------------------
@@ -543,17 +567,17 @@ struct ei_linspaced_op_impl<Scalar,true>
// nested expressions).
template <typename Scalar, bool RandomAccess = true> struct ei_linspaced_op;
template <typename Scalar, bool RandomAccess> struct ei_functor_traits< ei_linspaced_op<Scalar,RandomAccess> >
{ enum { Cost = 1, PacketAccess = ei_packet_traits<Scalar>::size>1, IsRepeatable = true }; };
{ enum { Cost = 1, PacketAccess = ei_packet_traits<Scalar>::HasSetLinear, IsRepeatable = true }; };
template <typename Scalar, bool RandomAccess> struct ei_linspaced_op
{
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
typedef typename ei_packet_traits<Scalar>::type Packet;
ei_linspaced_op(Scalar low, Scalar high, int num_steps) : impl(low, (high-low)/(num_steps-1)) {}
template<typename Index>
EIGEN_STRONG_INLINE const Scalar operator() (Index i, Index = 0) const { return impl(i); }
template<typename Index>
EIGEN_STRONG_INLINE const PacketScalar packetOp(Index i, Index = 0) const { return impl.packetOp(i); }
EIGEN_STRONG_INLINE const Packet packetOp(Index i, Index = 0) const { return impl.packetOp(i); }
// This proxy object handles the actual required temporaries, the different
// implementations (random vs. sequential access) as well as the piping
// implementations (random vs. sequential access) as well as the
// correct piping to size 2/4 packet operations.
const ei_linspaced_op_impl<Scalar,RandomAccess> impl;
};
@@ -561,13 +585,15 @@ template <typename Scalar, bool RandomAccess> struct ei_linspaced_op
// all functors allow linear access, except ei_scalar_identity_op. So we fix here a quick meta
// to indicate whether a functor allows linear access, just always answering 'yes' except for
// ei_scalar_identity_op.
// FIXME move this to ei_functor_traits adding a ei_functor_default
template<typename Functor> struct ei_functor_has_linear_access { enum { ret = 1 }; };
template<typename Scalar> struct ei_functor_has_linear_access<ei_scalar_identity_op<Scalar> > { enum { ret = 0 }; };
// in CwiseBinaryOp, we require the Lhs and Rhs to have the same scalar type, except for multiplication
// where we only require them to have the same _real_ scalar type so one may multiply, say, float by complex<float>.
// FIXME move this to ei_functor_traits adding a ei_functor_default
template<typename Functor> struct ei_functor_allows_mixing_real_and_complex { enum { ret = 0 }; };
template<typename Scalar> struct ei_functor_allows_mixing_real_and_complex<ei_scalar_product_op<Scalar> > { enum { ret = 1 }; };
template<typename LhsScalar,typename RhsScalar> struct ei_functor_allows_mixing_real_and_complex<ei_scalar_product_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
/** \internal
@@ -577,18 +603,18 @@ template<typename Scalar> struct ei_functor_allows_mixing_real_and_complex<ei_sc
/* If you wonder why doing the ei_pset1() in packetOp() is an optimization check ei_scalar_multiple_op */
template<typename Scalar>
struct ei_scalar_add_op {
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
typedef typename ei_packet_traits<Scalar>::type Packet;
// FIXME default copy constructors seems bugged with std::complex<>
inline ei_scalar_add_op(const ei_scalar_add_op& other) : m_other(other.m_other) { }
inline ei_scalar_add_op(const Scalar& other) : m_other(other) { }
inline Scalar operator() (const Scalar& a) const { return a + m_other; }
inline const PacketScalar packetOp(const PacketScalar& a) const
{ return ei_padd(a, ei_pset1(m_other)); }
inline const Packet packetOp(const Packet& a) const
{ return ei_padd(a, ei_pset1<Packet>(m_other)); }
const Scalar m_other;
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_add_op<Scalar> >
{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = ei_packet_traits<Scalar>::size>1 }; };
{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = ei_packet_traits<Scalar>::HasAdd }; };
/** \internal
* \brief Template functor to compute the square root of a scalar
@@ -670,13 +696,13 @@ template<typename Scalar>
struct ei_scalar_inverse_op {
EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_inverse_op)
inline Scalar operator() (const Scalar& a) const { return Scalar(1)/a; }
template<typename PacketScalar>
inline const PacketScalar packetOp(const PacketScalar& a) const
{ return ei_pdiv(ei_pset1(Scalar(1)),a); }
template<typename Packet>
inline const Packet packetOp(const Packet& a) const
{ return ei_pdiv(ei_pset1<Packet>(Scalar(1)),a); }
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_inverse_op<Scalar> >
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = int(ei_packet_traits<Scalar>::size)>1 }; };
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::HasDiv }; };
/** \internal
* \brief Template functor to compute the square of a scalar
@@ -686,13 +712,13 @@ template<typename Scalar>
struct ei_scalar_square_op {
EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_square_op)
inline Scalar operator() (const Scalar& a) const { return a*a; }
template<typename PacketScalar>
inline const PacketScalar packetOp(const PacketScalar& a) const
template<typename Packet>
inline const Packet packetOp(const Packet& a) const
{ return ei_pmul(a,a); }
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_square_op<Scalar> >
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = int(ei_packet_traits<Scalar>::size)>1 }; };
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::HasMul }; };
/** \internal
* \brief Template functor to compute the cube of a scalar
@@ -702,13 +728,13 @@ template<typename Scalar>
struct ei_scalar_cube_op {
EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_cube_op)
inline Scalar operator() (const Scalar& a) const { return a*a*a; }
template<typename PacketScalar>
inline const PacketScalar packetOp(const PacketScalar& a) const
template<typename Packet>
inline const Packet packetOp(const Packet& a) const
{ return ei_pmul(a,ei_pmul(a,a)); }
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_cube_op<Scalar> >
{ enum { Cost = 2*NumTraits<Scalar>::MulCost, PacketAccess = int(ei_packet_traits<Scalar>::size)>1 }; };
{ enum { Cost = 2*NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::HasMul }; };
// default functor traits for STL functors:

View File

@@ -58,8 +58,11 @@ struct ei_default_packet_traits
HasMul = 1,
HasNegate = 1,
HasAbs = 1,
HasAbs2 = 1,
HasMin = 1,
HasMax = 1,
HasConj = 1,
HasSetLinear = 1,
HasDiv = 0,
HasSqrt = 0,
@@ -79,15 +82,22 @@ struct ei_default_packet_traits
template<typename T> struct ei_packet_traits : ei_default_packet_traits
{
typedef T type;
enum {size=1};
enum {
Vectorizable = 0,
size = 1,
AlignedOnScalar = 0
};
enum {
HasAdd = 0,
HasSub = 0,
HasMul = 0,
HasNegate = 0,
HasAbs = 0,
HasAbs2 = 0,
HasMin = 0,
HasMax = 0
HasMax = 0,
HasConj = 0,
HasSetLinear = 0
};
};
@@ -105,6 +115,10 @@ ei_psub(const Packet& a,
template<typename Packet> inline Packet
ei_pnegate(const Packet& a) { return -a; }
/** \internal \returns conj(a) (coeff-wise) */
template<typename Packet> inline Packet
ei_pconj(const Packet& a) { return ei_conj(a); }
/** \internal \returns a * b (coeff-wise) */
template<typename Packet> inline Packet
ei_pmul(const Packet& a,
@@ -146,16 +160,20 @@ template<typename Packet> inline Packet
ei_pandnot(const Packet& a, const Packet& b) { return a & (!b); }
/** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */
template<typename Scalar> inline typename ei_packet_traits<Scalar>::type
ei_pload(const Scalar* from) { return *from; }
template<typename Packet> inline Packet
ei_pload(const typename ei_unpacket_traits<Packet>::type* from) { return *from; }
/** \internal \returns a packet version of \a *from, (un-aligned load) */
template<typename Scalar> inline typename ei_packet_traits<Scalar>::type
ei_ploadu(const Scalar* from) { return *from; }
template<typename Packet> inline Packet
ei_ploadu(const typename ei_unpacket_traits<Packet>::type* from) { return *from; }
/** \internal \returns a packet with elements of \a *from duplicated, e.g.: (from[0],from[0],from[1],from[1]) */
template<typename Packet> inline Packet
ei_ploaddup(const typename ei_unpacket_traits<Packet>::type* from) { return *from; }
/** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */
template<typename Scalar> inline typename ei_packet_traits<Scalar>::type
ei_pset1(const Scalar& a) { return a; }
template<typename Packet> inline Packet
ei_pset1(const typename ei_unpacket_traits<Packet>::type& a) { return a; }
/** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */
template<typename Scalar> inline typename ei_packet_traits<Scalar>::type
@@ -242,13 +260,13 @@ ei_pmadd(const Packet& a,
/** \internal \returns a packet version of \a *from.
* \If LoadMode equals Aligned, \a from must be 16 bytes aligned */
template<typename Scalar, int LoadMode>
inline typename ei_packet_traits<Scalar>::type ei_ploadt(const Scalar* from)
template<typename Packet, int LoadMode>
inline Packet ei_ploadt(const typename ei_unpacket_traits<Packet>::type* from)
{
if(LoadMode == Aligned)
return ei_pload(from);
return ei_pload<Packet>(from);
else
return ei_ploadu(from);
return ei_ploadu<Packet>(from);
}
/** \internal copy the packet \a from to \a *to.

View File

@@ -31,6 +31,7 @@ enum { StreamPrecision = -1,
FullPrecision = -2 };
/** \class IOFormat
* \ingroup Core_Module
*
* \brief Stores a set of parameters controlling the way matrices are printed
*
@@ -80,6 +81,7 @@ struct IOFormat
};
/** \class WithFormat
* \ingroup Core_Module
*
* \brief Pseudo expression providing matrix output with given format
*

View File

@@ -27,6 +27,7 @@
#define EIGEN_MAP_H
/** \class Map
* \ingroup Core_Module
*
* \brief A matrix or vector expression mapping an existing array of data.
*
@@ -99,7 +100,7 @@ struct ei_traits<Map<PlainObjectType, MapOptions, StrideType> >
|| ( OuterStrideAtCompileTime!=Dynamic
&& ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime)%16)==0 ) ),
Flags0 = ei_traits<PlainObjectType>::Flags,
Flags1 = IsAligned ? int(Flags0) | AlignedBit : int(Flags0) & ~AlignedBit,
Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit),
Flags2 = HasNoStride ? int(Flags1) : int(Flags1 & ~LinearAccessBit),
Flags = KeepsPacketAccess ? int(Flags2) : (int(Flags2) & ~PacketAccessBit)
};

View File

@@ -27,6 +27,7 @@
#define EIGEN_MAPBASE_H
/** \class MapBase
* \ingroup Core_Module
*
* \brief Base class for Map and Block expression with direct access
*
@@ -123,14 +124,14 @@ template<typename Derived> class MapBase
template<int LoadMode>
inline PacketScalar packet(Index row, Index col) const
{
return ei_ploadt<Scalar, LoadMode>
return ei_ploadt<PacketScalar, LoadMode>
(m_data + (col * colStride() + row * rowStride()));
}
template<int LoadMode>
inline PacketScalar packet(Index index) const
{
return ei_ploadt<Scalar, LoadMode>(m_data + index * innerStride());
return ei_ploadt<PacketScalar, LoadMode>(m_data + index * innerStride());
}
template<int StoreMode>
@@ -188,8 +189,8 @@ template<typename Derived> class MapBase
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(ei_traits<Derived>::Flags&PacketAccessBit,
ei_inner_stride_at_compile_time<Derived>::ret==1),
PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1);
ei_assert(EIGEN_IMPLIES(ei_traits<Derived>::Flags&AlignedBit, (size_t(m_data)&0xf)==0)
&& "data is not aligned");
ei_assert(EIGEN_IMPLIES(ei_traits<Derived>::Flags&AlignedBit, (size_t(m_data) % (sizeof(Scalar)*ei_packet_traits<Scalar>::size)) == 0)
&& "data is not aligned");
}
const Scalar* EIGEN_RESTRICT m_data;

View File

@@ -27,6 +27,7 @@
#define EIGEN_MATRIX_H
/** \class Matrix
* \ingroup Core_Module
*
* \brief The matrix class, also used for vectors and row-vectors
*
@@ -106,7 +107,7 @@
* are the dimensions of the original matrix, while _Rows and _Cols are Dynamic.</dd>
* </dl>
*
* \see MatrixBase for the majority of the API methods for matrices
* \see MatrixBase for the majority of the API methods for matrices, \ref TopicClassHierarchy
*/
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
struct ei_traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >

View File

@@ -27,6 +27,7 @@
#define EIGEN_MATRIXBASE_H
/** \class MatrixBase
* \ingroup Core_Module
*
* \brief Base class for all dense matrices, vectors, and expressions
*
@@ -51,6 +52,8 @@
cout << x.row(0) << endl;
}
* \endcode
*
* \sa \ref TopicClassHierarchy
*/
template<typename Derived> class MatrixBase
: public DenseBase<Derived>
@@ -180,7 +183,7 @@ template<typename Derived> class MatrixBase
operator*(const MatrixBase<OtherDerived> &other) const;
template<typename OtherDerived>
const typename ProductReturnType<Derived,OtherDerived,LazyCoeffBasedProductMode>::Type
const typename LazyProductReturnType<Derived,OtherDerived>::Type
lazyProduct(const MatrixBase<OtherDerived> &other) const;
template<typename OtherDerived>
@@ -328,8 +331,6 @@ template<typename Derived> class MatrixBase
/////////// SVD module ///////////
SVD<PlainObject> svd() const;
/////////// Geometry module ///////////
template<typename OtherDerived>
@@ -338,7 +339,7 @@ template<typename Derived> class MatrixBase
PlainObject cross3(const MatrixBase<OtherDerived>& other) const;
PlainObject unitOrthogonal(void) const;
Matrix<Scalar,3,1> eulerAngles(Index a0, Index a1, Index a2) const;
const ScalarMultipleReturnType operator*(const UniformScaling<Scalar>& s) const;
ScalarMultipleReturnType operator*(const UniformScaling<Scalar>& s) const;
enum {
SizeMinusOne = SizeAtCompileTime==Dynamic ? Dynamic : SizeAtCompileTime-1
};
@@ -348,9 +349,13 @@ template<typename Derived> class MatrixBase
typedef CwiseUnaryOp<ei_scalar_quotient1_op<typename ei_traits<Derived>::Scalar>,
StartMinusOne > HNormalizedReturnType;
const HNormalizedReturnType hnormalized() const;
typedef Homogeneous<Derived,MatrixBase<Derived>::ColsAtCompileTime==1?Vertical:Horizontal> HomogeneousReturnType;
const HomogeneousReturnType homogeneous() const;
HNormalizedReturnType hnormalized() const;
// put this as separate enum value to work around possible GCC 4.3 bug (?)
enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1?Vertical:Horizontal };
typedef Homogeneous<Derived, HomogeneousReturnTypeDirection> HomogeneousReturnType;
HomogeneousReturnType homogeneous() const;
////////// Householder module ///////////
@@ -426,6 +431,13 @@ template<typename Derived> class MatrixBase
explicit MatrixBase(int);
MatrixBase(int,int);
template<typename OtherDerived> explicit MatrixBase(const MatrixBase<OtherDerived>&);
protected:
// mixing arrays and matrices is not legal
template<typename OtherDerived> Derived& operator+=(const ArrayBase<OtherDerived>& array)
{EIGEN_STATIC_ASSERT(sizeof(typename OtherDerived::Scalar)==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES);}
// mixing arrays and matrices is not legal
template<typename OtherDerived> Derived& operator-=(const ArrayBase<OtherDerived>& array)
{EIGEN_STATIC_ASSERT(sizeof(typename OtherDerived::Scalar)==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES);}
};
#endif // EIGEN_MATRIXBASE_H

View File

@@ -79,6 +79,7 @@ struct ei_matrix_array<T, 0, MatrixOptions, Alignment>
/** \internal
*
* \class ei_matrix_storage
* \ingroup Core_Module
*
* \brief Stores the data of a matrix
*

View File

@@ -27,6 +27,7 @@
#define EIGEN_NESTBYVALUE_H
/** \class NestByValue
* \ingroup Core_Module
*
* \brief Expression which must be nested by value
*

View File

@@ -26,6 +26,7 @@
#define EIGEN_NOALIAS_H
/** \class NoAlias
* \ingroup Core_Module
*
* \brief Pseudo expression providing an operator = assuming no aliasing
*
@@ -42,6 +43,7 @@
template<typename ExpressionType, template <typename> class StorageBase>
class NoAlias
{
typedef typename ExpressionType::Scalar Scalar;
public:
NoAlias(ExpressionType& expression) : m_expression(expression) {}
@@ -49,17 +51,31 @@ class NoAlias
* \sa MatrixBase::lazyAssign() */
template<typename OtherDerived>
EIGEN_STRONG_INLINE ExpressionType& operator=(const StorageBase<OtherDerived>& other)
{ return m_expression.lazyAssign(other.derived()); }
{ return ei_assign_selector<ExpressionType,OtherDerived,false>::run(m_expression,other.derived()); }
/** \sa MatrixBase::operator+= */
template<typename OtherDerived>
EIGEN_STRONG_INLINE ExpressionType& operator+=(const StorageBase<OtherDerived>& other)
{ return m_expression.lazyAssign(m_expression + other.derived()); }
{
typedef SelfCwiseBinaryOp<ei_scalar_sum_op<Scalar>, ExpressionType, OtherDerived> SelfAdder;
SelfAdder tmp(m_expression);
typedef typename ei_nested<OtherDerived>::type OtherDerivedNested;
typedef typename ei_cleantype<OtherDerivedNested>::type _OtherDerivedNested;
ei_assign_selector<SelfAdder,_OtherDerivedNested,false>::run(tmp,OtherDerivedNested(other.derived()));
return m_expression;
}
/** \sa MatrixBase::operator-= */
template<typename OtherDerived>
EIGEN_STRONG_INLINE ExpressionType& operator-=(const StorageBase<OtherDerived>& other)
{ return m_expression.lazyAssign(m_expression - other.derived()); }
{
typedef SelfCwiseBinaryOp<ei_scalar_difference_op<Scalar>, ExpressionType, OtherDerived> SelfAdder;
SelfAdder tmp(m_expression);
typedef typename ei_nested<OtherDerived>::type OtherDerivedNested;
typedef typename ei_cleantype<OtherDerivedNested>::type _OtherDerivedNested;
ei_assign_selector<SelfAdder,_OtherDerivedNested,false>::run(tmp,OtherDerivedNested(other.derived()));
return m_expression;
}
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename ProductDerived, typename Lhs, typename Rhs>

View File

@@ -26,6 +26,7 @@
#define EIGEN_NUMTRAITS_H
/** \class NumTraits
* \ingroup Core_Module
*
* \brief Holds information about the various numeric (i.e. scalar) types allowed by Eigen.
*

View File

@@ -27,6 +27,7 @@
#define EIGEN_PERMUTATIONMATRIX_H
/** \class PermutationMatrix
* \ingroup Core_Module
*
* \brief Permutation matrix
*

View File

@@ -27,6 +27,7 @@
#define EIGEN_PRODUCT_H
/** \class GeneralProduct
* \ingroup Core_Module
*
* \brief Expression of the product of two general matrices or vectors
*
@@ -120,6 +121,7 @@ template<> struct ei_product_type_selector<Small,Large,Small> { en
template<> struct ei_product_type_selector<Large,Large,Small> { enum { ret = GemmProduct }; };
/** \class ProductReturnType
* \ingroup Core_Module
*
* \brief Helper class to get the correct and optimized returned type of operator*
*
@@ -161,6 +163,10 @@ struct ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode>
typedef CoeffBasedProduct<LhsNested, RhsNested, NestByRefBit> Type;
};
// this is a workaround for sun CC
template<typename Lhs, typename Rhs>
struct LazyProductReturnType : public ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode>
{};
/***********************************************************************
* Implementation of Inner Vector Vector Product
@@ -280,10 +286,13 @@ class GeneralProduct<Lhs, Rhs, GemvProduct>
public:
EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
typedef typename Lhs::Scalar LhsScalar;
typedef typename Rhs::Scalar RhsScalar;
GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
{
EIGEN_STATIC_ASSERT((ei_is_same_type<typename Lhs::Scalar, typename Rhs::Scalar>::ret),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
// EIGEN_STATIC_ASSERT((ei_is_same_type<typename Lhs::Scalar, typename Rhs::Scalar>::ret),
// YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
}
enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
@@ -317,42 +326,66 @@ template<> struct ei_gemv_selector<OnTheRight,ColMajor,true>
template<typename ProductType, typename Dest>
static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
{
typedef typename ProductType::Scalar Scalar;
typedef typename ProductType::Index Index;
typedef typename ProductType::LhsScalar LhsScalar;
typedef typename ProductType::RhsScalar RhsScalar;
typedef typename ProductType::Scalar ResScalar;
typedef typename ProductType::RealScalar RealScalar;
typedef typename ProductType::ActualLhsType ActualLhsType;
typedef typename ProductType::ActualRhsType ActualRhsType;
typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
ActualLhsType actualLhs = LhsBlasTraits::extract(prod.lhs());
ActualRhsType actualRhs = RhsBlasTraits::extract(prod.rhs());
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
* RhsBlasTraits::extractScalarFactor(prod.rhs());
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
* RhsBlasTraits::extractScalarFactor(prod.rhs());
enum {
// FIXME find a way to allow an inner stride on the result if ei_packet_traits<Scalar>::size==1
EvalToDest = Dest::InnerStrideAtCompileTime==1
EvalToDestAtCompileTime = Dest::InnerStrideAtCompileTime==1,
ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex)
};
Scalar* EIGEN_RESTRICT actualDest;
if (EvalToDest)
bool alphaIsCompatible = (!ComplexByReal) || (ei_imag(actualAlpha)==RealScalar(0));
bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
RhsScalar compatibleAlpha = ei_get_factor<ResScalar,RhsScalar>::run(actualAlpha);
ResScalar* actualDest;
if (evalToDest)
{
actualDest = &dest.coeffRef(0);
}
else
{
actualDest = ei_aligned_stack_new(Scalar,dest.size());
Map<typename Dest::PlainObject>(actualDest, dest.size()) = dest;
actualDest = ei_aligned_stack_new(ResScalar,dest.size());
if(!alphaIsCompatible)
{
MappedDest(actualDest, dest.size()).setZero();
compatibleAlpha = RhsScalar(1);
}
else
MappedDest(actualDest, dest.size()) = dest;
}
ei_cache_friendly_product_colmajor_times_vector
<LhsBlasTraits::NeedToConjugate,RhsBlasTraits::NeedToConjugate>(
dest.size(),
&actualLhs.const_cast_derived().coeffRef(0,0), actualLhs.outerStride(),
actualRhs, actualDest, actualAlpha);
ei_general_matrix_vector_product
<Index,LhsScalar,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run(
actualLhs.rows(), actualLhs.cols(),
&actualLhs.const_cast_derived().coeffRef(0,0), actualLhs.outerStride(),
actualRhs.data(), actualRhs.innerStride(),
actualDest, 1,
compatibleAlpha);
if (!EvalToDest)
if (!evalToDest)
{
dest = Map<typename Dest::PlainObject>(actualDest, dest.size());
ei_aligned_stack_delete(Scalar, actualDest, dest.size());
if(!alphaIsCompatible)
dest += actualAlpha * MappedDest(actualDest, dest.size());
else
dest = MappedDest(actualDest, dest.size());
ei_aligned_stack_delete(ResScalar, actualDest, dest.size());
}
}
};
@@ -362,7 +395,10 @@ template<> struct ei_gemv_selector<OnTheRight,RowMajor,true>
template<typename ProductType, typename Dest>
static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
{
typedef typename ProductType::Scalar Scalar;
typedef typename ProductType::LhsScalar LhsScalar;
typedef typename ProductType::RhsScalar RhsScalar;
typedef typename ProductType::Scalar ResScalar;
typedef typename ProductType::Index Index;
typedef typename ProductType::ActualLhsType ActualLhsType;
typedef typename ProductType::ActualRhsType ActualRhsType;
typedef typename ProductType::_ActualRhsType _ActualRhsType;
@@ -372,29 +408,34 @@ template<> struct ei_gemv_selector<OnTheRight,RowMajor,true>
ActualLhsType actualLhs = LhsBlasTraits::extract(prod.lhs());
ActualRhsType actualRhs = RhsBlasTraits::extract(prod.rhs());
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
* RhsBlasTraits::extractScalarFactor(prod.rhs());
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
* RhsBlasTraits::extractScalarFactor(prod.rhs());
enum {
DirectlyUseRhs = ((ei_packet_traits<Scalar>::size==1) || (_ActualRhsType::Flags&ActualPacketAccessBit))
// FIXME I think here we really have to check for ei_packet_traits<Scalar>::size==1
// because in this case it is fine to have an inner stride
DirectlyUseRhs = ((ei_packet_traits<RhsScalar>::size==1) || (_ActualRhsType::Flags&ActualPacketAccessBit))
&& (!(_ActualRhsType::Flags & RowMajorBit))
};
Scalar* EIGEN_RESTRICT rhs_data;
RhsScalar* rhs_data;
if (DirectlyUseRhs)
rhs_data = reinterpret_cast<Scalar* EIGEN_RESTRICT>(&actualRhs.const_cast_derived().coeffRef(0));
rhs_data = &actualRhs.const_cast_derived().coeffRef(0);
else
{
rhs_data = ei_aligned_stack_new(Scalar, actualRhs.size());
Map<typename _ActualRhsType::PlainObject>(reinterpret_cast<Scalar*>(rhs_data), actualRhs.size()) = actualRhs;
rhs_data = ei_aligned_stack_new(RhsScalar, actualRhs.size());
Map<typename _ActualRhsType::PlainObject>(rhs_data, actualRhs.size()) = actualRhs;
}
ei_cache_friendly_product_rowmajor_times_vector
<LhsBlasTraits::NeedToConjugate,RhsBlasTraits::NeedToConjugate>(
ei_general_matrix_vector_product
<Index,LhsScalar,RowMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run(
actualLhs.rows(), actualLhs.cols(),
&actualLhs.const_cast_derived().coeffRef(0,0), actualLhs.outerStride(),
rhs_data, prod.rhs().size(), dest, actualAlpha);
rhs_data, 1,
&dest.coeffRef(0,0), dest.innerStride(),
actualAlpha);
if (!DirectlyUseRhs) ei_aligned_stack_delete(Scalar, rhs_data, prod.rhs().size());
if (!DirectlyUseRhs) ei_aligned_stack_delete(RhsScalar, rhs_data, prod.rhs().size());
}
};
@@ -477,7 +518,7 @@ MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
*/
template<typename Derived>
template<typename OtherDerived>
const typename ProductReturnType<Derived,OtherDerived,LazyCoeffBasedProductMode>::Type
const typename LazyProductReturnType<Derived,OtherDerived>::Type
MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const
{
enum {
@@ -496,7 +537,7 @@ MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const
INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
return typename ProductReturnType<Derived,OtherDerived,LazyCoeffBasedProductMode>::Type(derived(), other.derived());
return typename LazyProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
}
#endif // EIGEN_PRODUCT_H

View File

@@ -26,6 +26,7 @@
#define EIGEN_PRODUCTBASE_H
/** \class ProductBase
* \ingroup Core_Module
*
*/
template<typename Derived, typename _Lhs, typename _Rhs>
@@ -124,7 +125,7 @@ class ProductBase : public MatrixBase<Derived>
operator const PlainObject& () const
{
m_result.resize(m_lhs.rows(), m_rhs.cols());
this->evalTo(m_result);
derived().evalTo(m_result);
return m_result;
}
@@ -215,6 +216,7 @@ class ScaledProduct
typename NestedProduct::_LhsNested,
typename NestedProduct::_RhsNested> Base;
typedef typename Base::Scalar Scalar;
typedef typename Base::PlainObject PlainObject;
// EIGEN_PRODUCT_PUBLIC_INTERFACE(ScaledProduct)
ScaledProduct(const NestedProduct& prod, Scalar x)
@@ -231,7 +233,7 @@ class ScaledProduct
template<typename Dest>
inline void scaleAndAddTo(Dest& dst,Scalar alpha) const { m_prod.derived().scaleAndAddTo(dst,alpha); }
protected:
const NestedProduct& m_prod;
Scalar m_alpha;

View File

@@ -183,7 +183,7 @@ struct ei_redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>
typedef typename Derived::Index Index;
static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func)
{
ei_assert(mat.rows()>0 && mat.cols()>0 && "you are using a non initialized matrix");
ei_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
Scalar res;
res = mat.coeffByOuterInner(0, 0);
for(Index i = 1; i < mat.innerSize(); ++i)
@@ -210,6 +210,7 @@ struct ei_redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
static Scalar run(const Derived& mat, const Func& func)
{
const Index size = mat.size();
ei_assert(size && "you are using an empty matrix");
const Index packetSize = ei_packet_traits<Scalar>::size;
const Index alignedStart = ei_first_aligned(mat);
enum {
@@ -253,6 +254,7 @@ struct ei_redux_impl<Func, Derived, SliceVectorizedTraversal, NoUnrolling>
static Scalar run(const Derived& mat, const Func& func)
{
ei_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
const Index innerSize = mat.innerSize();
const Index outerSize = mat.outerSize();
enum {
@@ -294,6 +296,7 @@ struct ei_redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling
};
EIGEN_STRONG_INLINE static Scalar run(const Derived& mat, const Func& func)
{
ei_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
Scalar res = func.predux(ei_redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));
if (VectorizedSize != Size)
res = func(res,ei_redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func));
@@ -345,6 +348,8 @@ template<typename Derived>
EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar
DenseBase<Derived>::sum() const
{
if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
return Scalar(0);
return this->redux(Eigen::ei_scalar_sum_op<Scalar>());
}
@@ -370,6 +375,8 @@ template<typename Derived>
EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar
DenseBase<Derived>::prod() const
{
if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
return Scalar(1);
return this->redux(Eigen::ei_scalar_product_op<Scalar>());
}

View File

@@ -27,6 +27,7 @@
/**
* \class Replicate
* \ingroup Core_Module
*
* \brief Expression of the multiple replication of a matrix or vector
*

View File

@@ -27,6 +27,7 @@
#define EIGEN_RETURNBYVALUE_H
/** \class ReturnByValue
* \ingroup Core_Module
*
*/
template<typename Derived>

View File

@@ -28,6 +28,7 @@
#define EIGEN_REVERSE_H
/** \class Reverse
* \ingroup Core_Module
*
* \brief Expression of the reverse of a vector or matrix
*
@@ -58,7 +59,7 @@ struct ei_traits<Reverse<MatrixType, Direction> >
LinearAccess = ( (Direction==BothDirections) && (int(_MatrixTypeNested::Flags)&PacketAccessBit) )
? LinearAccessBit : 0,
Flags = int(_MatrixTypeNested::Flags) & (HereditaryBits | PacketAccessBit | LinearAccess),
Flags = int(_MatrixTypeNested::Flags) & (HereditaryBits | LvalueBit | PacketAccessBit | LinearAccess),
CoeffReadCost = _MatrixTypeNested::CoeffReadCost
};
@@ -108,6 +109,11 @@ template<typename MatrixType, int Direction> class Reverse
inline Index rows() const { return m_matrix.rows(); }
inline Index cols() const { return m_matrix.cols(); }
inline Index innerStride() const
{
return -m_matrix.innerStride();
}
inline Scalar& operator()(Index row, Index col)
{
ei_assert(row >= 0 && row < rows() && col >= 0 && col < cols());
@@ -120,13 +126,13 @@ template<typename MatrixType, int Direction> class Reverse
ReverseCol ? m_matrix.cols() - col - 1 : col);
}
inline const Scalar coeff(Index row, Index col) const
inline CoeffReturnType coeff(Index row, Index col) const
{
return m_matrix.coeff(ReverseRow ? m_matrix.rows() - row - 1 : row,
ReverseCol ? m_matrix.cols() - col - 1 : col);
}
inline const Scalar coeff(Index index) const
inline CoeffReturnType coeff(Index index) const
{
return m_matrix.coeff(m_matrix.size() - index - 1);
}

View File

@@ -26,6 +26,7 @@
#define EIGEN_SELECT_H
/** \class Select
* \ingroup Core_Module
*
* \brief Expression of a coefficient wise version of the C++ ternary operator ?:
*

View File

@@ -26,6 +26,7 @@
#define EIGEN_SELFADJOINTMATRIX_H
/** \class SelfAdjointView
* \ingroup Core_Module
*
*
* \brief Expression of a selfadjoint matrix from a triangular part of a dense matrix
@@ -64,7 +65,10 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
public:
typedef TriangularBase<SelfAdjointView> Base;
typedef typename ei_traits<SelfAdjointView>::Scalar Scalar;
/** \brief The type of coefficients in this matrix */
typedef typename ei_traits<SelfAdjointView>::Scalar Scalar;
typedef typename MatrixType::Index Index;
enum {

View File

@@ -26,6 +26,7 @@
#define EIGEN_SELFCWISEBINARYOP_H
/** \class SelfCwiseBinaryOp
* \ingroup Core_Module
*
* \internal
*
@@ -38,14 +39,21 @@
*
* \sa class SwapWrapper for a similar trick.
*/
template<typename BinaryOp, typename MatrixType>
struct ei_traits<SelfCwiseBinaryOp<BinaryOp,MatrixType> > : ei_traits<MatrixType>
template<typename BinaryOp, typename Lhs, typename Rhs>
struct ei_traits<SelfCwiseBinaryOp<BinaryOp,Lhs,Rhs> >
: ei_traits<CwiseBinaryOp<BinaryOp,Lhs,Rhs> >
{
enum {
// Note that it is still a good idea to preserve the DirectAccessBit
// so that assign can correctly align the data.
Flags = ei_traits<CwiseBinaryOp<BinaryOp,Lhs,Rhs> >::Flags | (Lhs::Flags&DirectAccessBit) | (Lhs::Flags&LvalueBit),
OuterStrideAtCompileTime = Lhs::OuterStrideAtCompileTime,
InnerStrideAtCompileTime = Lhs::InnerStrideAtCompileTime
};
};
template<typename BinaryOp, typename MatrixType> class SelfCwiseBinaryOp
: public ei_dense_xpr_base< SelfCwiseBinaryOp<BinaryOp, MatrixType> >::type
template<typename BinaryOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp
: public ei_dense_xpr_base< SelfCwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type
{
public:
@@ -54,9 +62,7 @@ template<typename BinaryOp, typename MatrixType> class SelfCwiseBinaryOp
typedef typename ei_packet_traits<Scalar>::type Packet;
using Base::operator=;
inline SelfCwiseBinaryOp(MatrixType& xpr, const BinaryOp& func = BinaryOp()) : m_matrix(xpr), m_functor(func) {}
inline SelfCwiseBinaryOp(Lhs& xpr, const BinaryOp& func = BinaryOp()) : m_matrix(xpr), m_functor(func) {}
inline Index rows() const { return m_matrix.rows(); }
inline Index cols() const { return m_matrix.cols(); }
@@ -121,12 +127,8 @@ template<typename BinaryOp, typename MatrixType> class SelfCwiseBinaryOp
template<typename RhsDerived>
EIGEN_STRONG_INLINE SelfCwiseBinaryOp& lazyAssign(const DenseBase<RhsDerived>& rhs)
{
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(MatrixType,RhsDerived)
EIGEN_STATIC_ASSERT((ei_functor_allows_mixing_real_and_complex<BinaryOp>::ret
? int(ei_is_same_type<typename MatrixType::RealScalar, typename RhsDerived::RealScalar>::ret)
: int(ei_is_same_type<typename MatrixType::Scalar, typename RhsDerived::Scalar>::ret)),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Lhs,RhsDerived)
EIGEN_CHECK_BINARY_COMPATIBILIY(BinaryOp,typename Lhs::Scalar,typename RhsDerived::Scalar);
#ifdef EIGEN_DEBUG_ASSIGN
ei_assign_traits<SelfCwiseBinaryOp, RhsDerived>::debug();
@@ -138,9 +140,18 @@ template<typename BinaryOp, typename MatrixType> class SelfCwiseBinaryOp
#endif
return *this;
}
// overloaded to honor evaluation of special matrices
// maybe another solution would be to not use SelfCwiseBinaryOp
// at first...
SelfCwiseBinaryOp& operator=(const Rhs& _rhs)
{
typename ei_nested<Rhs>::type rhs(_rhs);
return Base::operator=(rhs);
}
protected:
MatrixType& m_matrix;
Lhs& m_matrix;
const BinaryOp& m_functor;
private:
@@ -150,8 +161,8 @@ template<typename BinaryOp, typename MatrixType> class SelfCwiseBinaryOp
template<typename Derived>
inline Derived& DenseBase<Derived>::operator*=(const Scalar& other)
{
SelfCwiseBinaryOp<ei_scalar_product_op<Scalar>, Derived> tmp(derived());
typedef typename Derived::PlainObject PlainObject;
SelfCwiseBinaryOp<ei_scalar_product_op<Scalar>, Derived, typename PlainObject::ConstantReturnType> tmp(derived());
tmp = PlainObject::Constant(rows(),cols(),other);
return derived();
}
@@ -159,10 +170,11 @@ inline Derived& DenseBase<Derived>::operator*=(const Scalar& other)
template<typename Derived>
inline Derived& DenseBase<Derived>::operator/=(const Scalar& other)
{
SelfCwiseBinaryOp<typename ei_meta_if<NumTraits<Scalar>::IsInteger,
typedef typename ei_meta_if<NumTraits<Scalar>::IsInteger,
ei_scalar_quotient_op<Scalar>,
ei_scalar_product_op<Scalar> >::ret, Derived> tmp(derived());
ei_scalar_product_op<Scalar> >::ret BinOp;
typedef typename Derived::PlainObject PlainObject;
SelfCwiseBinaryOp<BinOp, Derived, typename PlainObject::ConstantReturnType> tmp(derived());
tmp = PlainObject::Constant(rows(),cols(), NumTraits<Scalar>::IsInteger ? other : Scalar(1)/other);
return derived();
}

View File

@@ -53,7 +53,8 @@ struct ei_triangular_solver_selector;
template<typename Lhs, typename Rhs, int Mode>
struct ei_triangular_solver_selector<Lhs,Rhs,OnTheLeft,Mode,NoUnrolling,RowMajor,1>
{
typedef typename Rhs::Scalar Scalar;
typedef typename Lhs::Scalar LhsScalar;
typedef typename Rhs::Scalar RhsScalar;
typedef ei_blas_traits<Lhs> LhsProductTraits;
typedef typename LhsProductTraits::ExtractType ActualLhsType;
typedef typename Lhs::Index Index;
@@ -80,12 +81,13 @@ struct ei_triangular_solver_selector<Lhs,Rhs,OnTheLeft,Mode,NoUnrolling,RowMajor
// 2 - it is slighlty faster at runtime
Index startRow = IsLower ? pi : pi-actualPanelWidth;
Index startCol = IsLower ? 0 : pi;
VectorBlock<Rhs,Dynamic> target(other,startRow,actualPanelWidth);
ei_cache_friendly_product_rowmajor_times_vector<LhsProductTraits::NeedToConjugate,false>(
ei_general_matrix_vector_product<Index,LhsScalar,RowMajor,LhsProductTraits::NeedToConjugate,RhsScalar,false>::run(
actualPanelWidth, r,
&(actualLhs.const_cast_derived().coeffRef(startRow,startCol)), actualLhs.outerStride(),
&(other.coeffRef(startCol)), r,
target, Scalar(-1));
&(other.coeffRef(startCol)), other.innerStride(),
&other.coeffRef(startRow), other.innerStride(),
RhsScalar(-1));
}
for(Index k=0; k<actualPanelWidth; ++k)
@@ -106,13 +108,12 @@ struct ei_triangular_solver_selector<Lhs,Rhs,OnTheLeft,Mode,NoUnrolling,RowMajor
template<typename Lhs, typename Rhs, int Mode>
struct ei_triangular_solver_selector<Lhs,Rhs,OnTheLeft,Mode,NoUnrolling,ColMajor,1>
{
typedef typename Rhs::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type Packet;
typedef typename Lhs::Scalar LhsScalar;
typedef typename Rhs::Scalar RhsScalar;
typedef ei_blas_traits<Lhs> LhsProductTraits;
typedef typename LhsProductTraits::ExtractType ActualLhsType;
typedef typename Lhs::Index Index;
enum {
PacketSize = ei_packet_traits<Scalar>::size,
IsLower = ((Mode&Lower)==Lower)
};
@@ -147,12 +148,11 @@ struct ei_triangular_solver_selector<Lhs,Rhs,OnTheLeft,Mode,NoUnrolling,ColMajor
// let's directly call the low level product function because:
// 1 - it is faster to compile
// 2 - it is slighlty faster at runtime
ei_cache_friendly_product_colmajor_times_vector<LhsProductTraits::NeedToConjugate,false>(
r,
&(actualLhs.const_cast_derived().coeffRef(endBlock,startBlock)), actualLhs.outerStride(),
other.segment(startBlock, actualPanelWidth),
&(other.coeffRef(endBlock, 0)),
Scalar(-1));
ei_general_matrix_vector_product<Index,LhsScalar,ColMajor,LhsProductTraits::NeedToConjugate,RhsScalar,false>::run(
r, actualPanelWidth,
&(actualLhs.const_cast_derived().coeffRef(endBlock,startBlock)), actualLhs.outerStride(),
&other.coeff(startBlock), other.innerStride(),
&(other.coeffRef(endBlock, 0)), other.innerStride(), RhsScalar(-1));
}
}
}

View File

@@ -26,6 +26,7 @@
#define EIGEN_STRIDE_H
/** \class Stride
* \ingroup Core_Module
*
* \brief Holds strides information for Map
*

View File

@@ -26,6 +26,7 @@
#define EIGEN_SWAP_H
/** \class SwapWrapper
* \ingroup Core_Module
*
* \internal
*

View File

@@ -27,6 +27,7 @@
#define EIGEN_TRANSPOSE_H
/** \class Transpose
* \ingroup Core_Module
*
* \brief Expression of the transpose of a matrix
*
@@ -302,11 +303,11 @@ inline void MatrixBase<Derived>::adjointInPlace()
// The following is to detect aliasing problems in most common cases.
template<typename BinOp,typename NestedXpr>
struct ei_blas_traits<SelfCwiseBinaryOp<BinOp,NestedXpr> >
template<typename BinOp,typename NestedXpr,typename Rhs>
struct ei_blas_traits<SelfCwiseBinaryOp<BinOp,NestedXpr,Rhs> >
: ei_blas_traits<NestedXpr>
{
typedef SelfCwiseBinaryOp<BinOp,NestedXpr> XprType;
typedef SelfCwiseBinaryOp<BinOp,NestedXpr,Rhs> XprType;
static inline const XprType extract(const XprType& x) { return x; }
};

View File

@@ -26,6 +26,7 @@
#define EIGEN_TRANSPOSITIONS_H
/** \class Transpositions
* \ingroup Core_Module
*
* \brief Represents a sequence of transpositions (row/column interchange)
*

View File

@@ -29,6 +29,7 @@
/** \internal
*
* \class TriangularBase
* \ingroup Core_Module
*
* \brief Base class for triangular part in a matrix
*/
@@ -89,7 +90,7 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
protected:
void check_coordinates(Index row, Index col)
void check_coordinates(Index row, Index col) const
{
EIGEN_ONLY_USED_FOR_DEBUG(row);
EIGEN_ONLY_USED_FOR_DEBUG(col);
@@ -101,17 +102,18 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
}
#ifdef EIGEN_INTERNAL_DEBUGGING
void check_coordinates_internal(Index row, Index col)
void check_coordinates_internal(Index row, Index col) const
{
check_coordinates(row, col);
}
#else
void check_coordinates_internal(Index , Index ) {}
void check_coordinates_internal(Index , Index ) const {}
#endif
};
/** \class TriangularView
* \ingroup Core_Module
*
* \brief Base class for triangular part in a matrix
*
@@ -152,11 +154,18 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
typedef TriangularBase<TriangularView> Base;
typedef typename ei_traits<TriangularView>::Scalar Scalar;
typedef _MatrixType MatrixType;
typedef typename MatrixType::PlainObject DenseMatrixType;
protected:
typedef typename MatrixType::Nested MatrixTypeNested;
typedef typename ei_cleantype<MatrixTypeNested>::type _MatrixTypeNested;
typedef typename ei_cleantype<typename MatrixType::ConjugateReturnType>::type MatrixConjugateReturnType;
public:
using Base::evalToLazy;
typedef typename ei_traits<TriangularView>::StorageKind StorageKind;
typedef typename ei_traits<TriangularView>::Index Index;
@@ -233,6 +242,12 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
template<typename OtherDerived>
void lazyAssign(const MatrixBase<OtherDerived>& other);
/** \sa MatrixBase::conjugate() */
inline TriangularView<MatrixConjugateReturnType,Mode> conjugate()
{ return m_matrix.conjugate(); }
/** \sa MatrixBase::conjugate() const */
inline const TriangularView<MatrixConjugateReturnType,Mode> conjugate() const
{ return m_matrix.conjugate(); }
/** \sa MatrixBase::adjoint() */
inline TriangularView<typename MatrixType::AdjointReturnType,TransposeMode> adjoint()

View File

@@ -27,6 +27,7 @@
#define EIGEN_VECTORBLOCK_H
/** \class VectorBlock
* \ingroup Core_Module
*
* \brief Expression of a fixed-size or dynamic-size sub-vector
*

View File

@@ -27,6 +27,7 @@
#define EIGEN_PARTIAL_REDUX_H
/** \class PartialReduxExpr
* \ingroup Core_Module
*
* \brief Generic expression of a partially reduxed matrix
*
@@ -154,6 +155,7 @@ struct ei_member_redux {
};
/** \class VectorwiseOp
* \ingroup Core_Module
*
* \brief Pseudo expression providing partial reduction operations
*
@@ -438,7 +440,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
}
/** Returns the expression of the sum of the vector \a other to each subvector of \c *this */
template<typename OtherDerived>
template<typename OtherDerived> EIGEN_STRONG_INLINE
CwiseBinaryOp<ei_scalar_sum_op<Scalar>,
ExpressionType,
typename ExtendedType<OtherDerived>::Type>
@@ -461,7 +463,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
/////////// Geometry module ///////////
const Homogeneous<ExpressionType,Direction> homogeneous() const;
Homogeneous<ExpressionType,Direction> homogeneous() const;
typedef typename ExpressionType::PlainObject CrossReturnType;
template<typename OtherDerived>
@@ -489,7 +491,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
Direction==Horizontal ? HNormalized_SizeMinusOne : 1> >
HNormalizedReturnType;
const HNormalizedReturnType hnormalized() const;
HNormalizedReturnType hnormalized() const;
protected:
ExpressionTypeNested m_matrix;

View File

@@ -0,0 +1,215 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#ifndef EIGEN_COMPLEX_ALTIVEC_H
#define EIGEN_COMPLEX_ALTIVEC_H
static Packet4ui ei_p4ui_CONJ_XOR = vec_mergeh((Packet4ui)ei_p4i_ZERO, (Packet4ui)ei_p4f_ZERO_);//{ 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
static Packet16uc ei_p16uc_COMPLEX_RE = vec_sld((Packet16uc) vec_splat((Packet4ui)ei_p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)ei_p16uc_FORWARD, 2), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };
static Packet16uc ei_p16uc_COMPLEX_IM = vec_sld((Packet16uc) vec_splat((Packet4ui)ei_p16uc_FORWARD, 1), (Packet16uc) vec_splat((Packet4ui)ei_p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 };
static Packet16uc ei_p16uc_COMPLEX_REV = vec_sld(ei_p16uc_REVERSE, ei_p16uc_REVERSE, 8);//{ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11 };
static Packet16uc ei_p16uc_COMPLEX_REV2 = vec_sld(ei_p16uc_FORWARD, ei_p16uc_FORWARD, 8);//{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
static Packet16uc ei_p16uc_PSET_HI = (Packet16uc) vec_mergeh((Packet4ui) vec_splat((Packet4ui)ei_p16uc_FORWARD, 0), (Packet4ui) vec_splat((Packet4ui)ei_p16uc_FORWARD, 1));//{ 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 };
static Packet16uc ei_p16uc_PSET_LO = (Packet16uc) vec_mergeh((Packet4ui) vec_splat((Packet4ui)ei_p16uc_FORWARD, 2), (Packet4ui) vec_splat((Packet4ui)ei_p16uc_FORWARD, 3));//{ 8,9,10,11, 12,13,14,15, 8,9,10,11, 12,13,14,15 };
//---------- float ----------
struct Packet2cf
{
EIGEN_STRONG_INLINE Packet2cf() {}
EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
Packet4f v;
};
template<> struct ei_packet_traits<std::complex<float> > : ei_default_packet_traits
{
typedef Packet2cf type;
enum {
Vectorizable = 1,
size = 2,
HasAdd = 1,
HasSub = 1,
HasMul = 1,
HasDiv = 1,
HasNegate = 1,
HasAbs = 0,
HasAbs2 = 0,
HasMin = 0,
HasMax = 0,
HasSetLinear = 0
};
};
template<> struct ei_unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; };
template<> EIGEN_STRONG_INLINE Packet2cf ei_pset1<Packet2cf>(const std::complex<float>& from)
{
Packet2cf res;
/* On AltiVec we cannot load 64-bit registers, so wa have to take care of alignment */
if ((ptrdiff_t)&from % 16 == 0) {
res.v = ei_pload((const float *)&from);
res.v = vec_perm(res.v, res.v, ei_p16uc_PSET_HI);
} else {
res.v = ei_ploadu((const float *)&from);
res.v = vec_perm(res.v, res.v, ei_p16uc_PSET_LO);
}
return res;
}
template<> EIGEN_STRONG_INLINE Packet2cf ei_padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_add(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf ei_psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_sub(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf ei_pnegate(const Packet2cf& a) { return Packet2cf(ei_psub<Packet4f>(ei_p4f_ZERO, a.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf ei_pconj(const Packet2cf& a) { return Packet2cf((Packet4f)vec_xor((Packet4ui)a.v, ei_p4ui_CONJ_XOR)); }
template<> EIGEN_STRONG_INLINE Packet2cf ei_pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
Packet4f v1, v2;
// Permute and multiply the real parts of a and b
v1 = vec_perm(a.v, a.v, ei_p16uc_COMPLEX_RE);
// Get the imaginary parts of a
v2 = vec_perm(a.v, a.v, ei_p16uc_COMPLEX_IM);
// multiply a_re * b
v1 = vec_madd(v1, b.v, ei_p4f_ZERO);
// multiply a_im * b and get the conjugate result
v2 = vec_madd(v2, b.v, ei_p4f_ZERO);
v2 = (Packet4f) vec_xor((Packet4ui)v2, ei_p4ui_CONJ_XOR);
// permute back to a proper order
v2 = vec_perm(v2, v2, ei_p16uc_COMPLEX_REV);
return Packet2cf(vec_add(v1, v2));
}
template<> EIGEN_STRONG_INLINE Packet2cf ei_pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_and(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf ei_por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_or(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf ei_pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_xor(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf ei_pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_and(a.v, vec_nor(b.v,b.v))); }
template<> EIGEN_STRONG_INLINE Packet2cf ei_pload <std::complex<float> >(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(ei_pload((const float*)from)); }
template<> EIGEN_STRONG_INLINE Packet2cf ei_ploadu<std::complex<float> >(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ei_ploadu((const float*)from)); }
template<> EIGEN_STRONG_INLINE void ei_pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE ei_pstore((float*)to, from.v); }
template<> EIGEN_STRONG_INLINE void ei_pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE ei_pstoreu((float*)to, from.v); }
template<> EIGEN_STRONG_INLINE void ei_prefetch<std::complex<float> >(const std::complex<float> * addr) { vec_dstt((float *)addr, DST_CTRL(2,2,32), DST_CHAN); }
template<> EIGEN_STRONG_INLINE std::complex<float> ei_pfirst<Packet2cf>(const Packet2cf& a)
{
std::complex<float> EIGEN_ALIGN16 res[2];
ei_pstore((float *)&res, a.v);
return res[0];
}
template<> EIGEN_STRONG_INLINE Packet2cf ei_preverse(const Packet2cf& a)
{
Packet4f rev_a;
rev_a = vec_perm(a.v, a.v, ei_p16uc_COMPLEX_REV2);
return Packet2cf(rev_a);
}
template<> EIGEN_STRONG_INLINE std::complex<float> ei_predux<Packet2cf>(const Packet2cf& a)
{
Packet4f b;
b = (Packet4f) vec_sld(a.v, a.v, 8);
b = ei_padd(a.v, b);
return ei_pfirst(Packet2cf(sum));
}
template<> EIGEN_STRONG_INLINE Packet2cf ei_preduxp<Packet2cf>(const Packet2cf* vecs)
{
Packet4f b1, b2;
b1 = (Packet4f) vec_sld(vecs[0].v, vecs[1].v, 8);
b2 = (Packet4f) vec_sld(vecs[1].v, vecs[0].v, 8);
b2 = (Packet4f) vec_sld(b2, b2, 8);
b2 = ei_padd(b1, b2);
return Packet2cf(b2);
}
template<> EIGEN_STRONG_INLINE std::complex<float> ei_predux_mul<Packet2cf>(const Packet2cf& a)
{
Packet4f b;
Packet2cf prod;
b = (Packet4f) vec_sld(a.v, a.v, 8);
prod = ei_pmul(a, Packet2cf(b));
return ei_pfirst(prod);
}
template<int Offset>
struct ei_palign_impl<Offset,Packet2cf>
{
EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second)
{
if (Offset==1)
{
first.v = vec_sld(first.v, second.v, 8);
}
}
};
template<> struct ei_conj_helper<Packet2cf, Packet2cf, false,true>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
{ return ei_padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
return ei_pmul(a, ei_pconj(b));
}
};
template<> struct ei_conj_helper<Packet2cf, Packet2cf, true,false>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
{ return ei_padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
return ei_pmul(ei_pconj(a), b);
}
};
template<> struct ei_conj_helper<Packet2cf, Packet2cf, true,true>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
{ return ei_padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
return ei_pconj(ei_pmul(a, b));
}
};
template<> EIGEN_STRONG_INLINE Packet2cf ei_pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
// TODO optimize it for AltiVec
Packet2cf res = ei_conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
Packet4f s = vec_madd(b.v, b.v, ei_p4f_ZERO);
return Packet2cf(ei_pdiv(res.v, vec_add(s,vec_perm(s, s, ei_p16uc_COMPLEX_REV))));
}
#endif // EIGEN_COMPLEX_ALTIVEC_H

View File

@@ -59,13 +59,13 @@ typedef __vector unsigned char Packet16uc;
Packet4i ei_p4i_##NAME = vec_splat_s32(X)
#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
Packet4f ei_p4f_##NAME = ei_pset1<float>(X)
Packet4f ei_p4f_##NAME = ei_pset1<Packet4f>(X)
#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
Packet4f ei_p4f_##NAME = vreinterpretq_f32_u32(ei_pset1<int>(X))
#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
Packet4i ei_p4i_##NAME = ei_pset1<int>(X)
Packet4i ei_p4i_##NAME = ei_pset1<Packet4i>(X)
#define DST_CHAN 1
#define DST_CTRL(size, count, stride) (((size) << 24) | ((count) << 16) | (stride))
@@ -74,6 +74,7 @@ typedef __vector unsigned char Packet16uc;
static Packet4f ei_p4f_COUNTDOWN = { 3.0, 2.0, 1.0, 0.0 };
static Packet4i ei_p4i_COUNTDOWN = { 3, 2, 1, 0 };
static Packet16uc ei_p16uc_REVERSE = {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3};
static Packet16uc ei_p16uc_FORWARD = vec_lvsl(0, (float*)0);
static _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0);
static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0);
@@ -85,8 +86,13 @@ static Packet4f ei_p4f_ZERO_ = (Packet4f) vec_sl((Packet4ui)ei_p4i_MINUS1, (Pack
template<> struct ei_packet_traits<float> : ei_default_packet_traits
{
typedef Packet4f type; enum {size=4};
typedef Packet4f type;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size=4,
// FIXME check the Has*
HasSin = 0,
HasCos = 0,
HasLog = 0,
@@ -95,7 +101,15 @@ template<> struct ei_packet_traits<float> : ei_default_packet_traits
};
};
template<> struct ei_packet_traits<int> : ei_default_packet_traits
{ typedef Packet4i type; enum {size=4}; };
{
typedef Packet4i type;
enum {
// FIXME check the Has*
Vectorizable = 1,
AlignedOnScalar = 1,
size=4
};
};
template<> struct ei_unpacket_traits<Packet4f> { typedef float type; enum {size=4}; };
template<> struct ei_unpacket_traits<Packet4i> { typedef int type; enum {size=4}; };
@@ -144,7 +158,7 @@ inline std::ostream & operator <<(std::ostream & s, const Packetbi & v)
return s;
}
*/
template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<float>(const float& from) {
template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<Packet4f>(const float& from) {
// Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
float EIGEN_ALIGN16 af[4];
af[0] = from;
@@ -153,7 +167,7 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<float>(const float& from) {
return vc;
}
template<> EIGEN_STRONG_INLINE Packet4i ei_pset1<int>(const int& from) {
template<> EIGEN_STRONG_INLINE Packet4i ei_pset1<Packet4i>(const int& from) {
int EIGEN_ALIGN16 ai[4];
ai[0] = from;
Packet4i vc = vec_ld(0, ai);
@@ -161,8 +175,8 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_pset1<int>(const int& from) {
return vc;
}
template<> EIGEN_STRONG_INLINE Packet4f ei_plset<float>(const float& a) { return vec_add(ei_pset1(a), ei_p4f_COUNTDOWN); }
template<> EIGEN_STRONG_INLINE Packet4i ei_plset<int>(const int& a) { return vec_add(ei_pset1(a), ei_p4i_COUNTDOWN); }
template<> EIGEN_STRONG_INLINE Packet4f ei_plset<float>(const float& a) { return vec_add(ei_pset1<Packet4f>(a), ei_p4f_COUNTDOWN); }
template<> EIGEN_STRONG_INLINE Packet4i ei_plset<int>(const int& a) { return vec_add(ei_pset1<Packet4i>(a), ei_p4i_COUNTDOWN); }
template<> EIGEN_STRONG_INLINE Packet4f ei_padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_add(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_add(a,b); }
@@ -227,7 +241,7 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_pdiv<Packet4f>(const Packet4f& a, con
template<> EIGEN_STRONG_INLINE Packet4i ei_pdiv<Packet4i>(const Packet4i& /*a*/, const Packet4i& /*b*/)
{ ei_assert(false && "packet integer division are not supported by AltiVec");
return ei_pset1<int>(0);
return ei_pset1<Packet4i>(0);
}
// for some weird raisons, it has to be overloaded for packet of integers
@@ -253,10 +267,10 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_pxor<Packet4i>(const Packet4i& a, con
template<> EIGEN_STRONG_INLINE Packet4f ei_pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, vec_nor(b, b)); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, vec_nor(b, b)); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pload<float>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pload<int>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); }
template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu(const float* from)
template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu<Packet4f>(const float* from)
{
EIGEN_DEBUG_ALIGNED_LOAD
// Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
@@ -268,7 +282,7 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu(const float* from)
return (Packet4f) vec_perm(MSQ, LSQ, mask); // align the data
}
template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu(const int* from)
template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu<Packet4i>(const int* from)
{
EIGEN_DEBUG_ALIGNED_LOAD
// Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html

View File

@@ -67,12 +67,7 @@
* Currently it must be 8 or 16. Other values will fail.
*/
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
#if (defined __i386__)
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 8
#else
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 16
#endif
#endif
#endif // EIGEN_DEFAULT_SETTINGS_H

View File

@@ -0,0 +1,262 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#ifndef EIGEN_COMPLEX_ALTIVEC_H
#define EIGEN_COMPLEX_ALTIVEC_H
static uint32x4_t ei_p4ui_CONJ_XOR = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
static uint32x2_t ei_p2ui_CONJ_XOR = { 0x00000000, 0x80000000 };
//---------- float ----------
struct Packet2cf
{
EIGEN_STRONG_INLINE Packet2cf() {}
EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
Packet4f v;
};
template<> struct ei_packet_traits<std::complex<float> > : ei_default_packet_traits
{
typedef Packet2cf type;
enum {
Vectorizable = 1,
size = 2,
HasAdd = 1,
HasSub = 1,
HasMul = 1,
HasDiv = 1,
HasNegate = 1,
HasAbs = 0,
HasAbs2 = 0,
HasMin = 0,
HasMax = 0,
HasSetLinear = 0
};
};
template<> struct ei_unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; };
template<> EIGEN_STRONG_INLINE Packet2cf ei_pset1<Packet2cf>(const std::complex<float>& from)
{
float32x2_t r64;
r64 = vld1_f32((float *)&from);
return Packet2cf(vcombine_f32(r64, r64));
}
template<> EIGEN_STRONG_INLINE Packet2cf ei_padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(ei_padd<Packet4f>(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf ei_psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(ei_psub<Packet4f>(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf ei_pnegate(const Packet2cf& a) { return Packet2cf(ei_pnegate<Packet4f>(a.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf ei_pconj(const Packet2cf& a)
{
return Packet2cf(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a.v), ei_p4ui_CONJ_XOR)));
}
template<> EIGEN_STRONG_INLINE Packet2cf ei_pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
Packet4f v1, v2;
float32x2_t a_lo, a_hi;
// Get the real values of a | a1_re | a1_re | a2_re | a2_re |
v1 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 0), vdup_lane_f32(vget_high_f32(a.v), 0));
// Get the real values of a | a1_im | a1_im | a2_im | a2_im |
v2 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 1), vdup_lane_f32(vget_high_f32(a.v), 1));
// Multiply the real a with b
v1 = vmulq_f32(v1, b.v);
// Multiply the imag a with b
v2 = vmulq_f32(v2, b.v);
// Conjugate v2
v2 = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(v2), ei_p4ui_CONJ_XOR));
// Swap real/imag elements in v2.
a_lo = vrev64_f32(vget_low_f32(v2));
a_hi = vrev64_f32(vget_high_f32(v2));
v2 = vcombine_f32(a_lo, a_hi);
// Add and return the result
return Packet2cf(vaddq_f32(v1, v2));
}
template<> EIGEN_STRONG_INLINE Packet2cf ei_pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
}
template<> EIGEN_STRONG_INLINE Packet2cf ei_por <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
}
template<> EIGEN_STRONG_INLINE Packet2cf ei_pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
return Packet2cf(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
}
template<> EIGEN_STRONG_INLINE Packet2cf ei_pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
return Packet2cf(vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
}
template<> EIGEN_STRONG_INLINE Packet2cf ei_pload <std::complex<float> >(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(ei_pload((const float*)from)); }
template<> EIGEN_STRONG_INLINE Packet2cf ei_ploadu<std::complex<float> >(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ei_ploadu((const float*)from)); }
template<> EIGEN_STRONG_INLINE void ei_pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE ei_pstore((float*)to, from.v); }
template<> EIGEN_STRONG_INLINE void ei_pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE ei_pstoreu((float*)to, from.v); }
template<> EIGEN_STRONG_INLINE void ei_prefetch<std::complex<float> >(const std::complex<float> * addr) { __pld((float *)addr); }
template<> EIGEN_STRONG_INLINE std::complex<float> ei_pfirst<Packet2cf>(const Packet2cf& a)
{
std::complex<float> EIGEN_ALIGN16 x[2];
vst1q_f32((float *)x, a.v);
return x[0];
}
template<> EIGEN_STRONG_INLINE Packet2cf ei_preverse(const Packet2cf& a)
{
float32x2_t a_lo, a_hi;
Packet4f a_r128;
a_lo = vget_low_f32(a.v);
a_hi = vget_high_f32(a.v);
a_r128 = vcombine_f32(a_hi, a_lo);
return Packet2cf(a_r128);
}
EIGEN_STRONG_INLINE Packet2cf ei_pcplxflip/*<Packet2cf>*/(const Packet2cf& x)
{
return Packet2cf(vrev64q_f32(a.v));
}
template<> EIGEN_STRONG_INLINE std::complex<float> ei_predux<Packet2cf>(const Packet2cf& a)
{
float32x2_t a1, a2;
std::complex<float> s;
a1 = vget_low_f32(a.v);
a2 = vget_high_f32(a.v);
a2 = vadd_f32(a1, a2);
vst1_f32((float *)&s, a2);
return s;
}
template<> EIGEN_STRONG_INLINE Packet2cf ei_preduxp<Packet2cf>(const Packet2cf* vecs)
{
Packet4f sum1, sum2, sum;
// Add the first two 64-bit float32x2_t of vecs[0]
sum1 = vcombine_f32(vget_low_f32(vecs[0].v), vget_low_f32(vecs[1].v));
sum2 = vcombine_f32(vget_high_f32(vecs[0].v), vget_high_f32(vecs[1].v));
sum = vaddq_f32(sum1, sum2);
return Packet2cf(sum);
}
template<> EIGEN_STRONG_INLINE std::complex<float> ei_predux_mul<Packet2cf>(const Packet2cf& a)
{
float32x2_t a1, a2, v1, v2, prod;
std::complex<float> s;
a1 = vget_low_f32(a.v);
a2 = vget_high_f32(a.v);
// Get the real values of a | a1_re | a1_re | a2_re | a2_re |
v1 = vdup_lane_f32(a1, 0);
// Get the real values of a | a1_im | a1_im | a2_im | a2_im |
v2 = vdup_lane_f32(a1, 1);
// Multiply the real a with b
v1 = vmul_f32(v1, a2);
// Multiply the imag a with b
v2 = vmul_f32(v2, a2);
// Conjugate v2
v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), ei_p2ui_CONJ_XOR));
// Swap real/imag elements in v2.
v2 = vrev64_f32(v2);
// Add v1, v2
prod = vadd_f32(v1, v2);
vst1_f32((float *)&s, prod);
return s;
}
template<int Offset>
struct ei_palign_impl<Offset,Packet2cf>
{
EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second)
{
if (Offset==1)
{
first.v = vextq_f32(first.v, second.v, 2);
}
}
};
template<> struct ei_conj_helper<Packet2cf, Packet2cf, false,true>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
{ return ei_padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
return ei_pmul(a, ei_pconj(b));
}
};
template<> struct ei_conj_helper<Packet2cf, Packet2cf, true,false>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
{ return ei_padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
return ei_pmul(ei_pconj(a), b);
}
};
template<> struct ei_conj_helper<Packet2cf, Packet2cf, true,true>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
{ return ei_padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
return ei_pconj(ei_pmul(a, b));
}
};
template<> EIGEN_STRONG_INLINE Packet2cf ei_pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
// TODO optimize it for AltiVec
Packet2cf res = ei_conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
Packet4f s, rev_s;
float32x2_t a_lo, a_hi;
// this computes the norm
s = vmulq_f32(b.v, b.v);
a_lo = vrev64_f32(vget_low_f32(s));
a_hi = vrev64_f32(vget_high_f32(s));
rev_s = vcombine_f32(a_lo, a_hi);
return Packet2cf(ei_pdiv(res.v, vaddq_f32(s,rev_s)));
}
#endif // EIGEN_COMPLEX_ALTIVEC_H

View File

@@ -45,13 +45,13 @@ typedef float32x4_t Packet4f;
typedef int32x4_t Packet4i;
#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
const Packet4f ei_p4f_##NAME = ei_pset1<float>(X)
const Packet4f ei_p4f_##NAME = ei_pset1<Packet4f>(X)
#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
const Packet4f ei_p4f_##NAME = vreinterpretq_f32_u32(ei_pset1<int>(X))
#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
const Packet4i ei_p4i_##NAME = ei_pset1<int>(X)
const Packet4i ei_p4i_##NAME = ei_pset1<Packet4i>(X)
#ifndef __pld
#define __pld(x) asm volatile ( " pld [%[addr]]\n" :: [addr] "r" (x) : "cc" );
@@ -59,8 +59,14 @@ typedef int32x4_t Packet4i;
template<> struct ei_packet_traits<float> : ei_default_packet_traits
{
typedef Packet4f type; enum {size=4};
typedef Packet4f type;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size = 4,
HasDiv = 1,
// FIXME check the Has*
HasSin = 0,
HasCos = 0,
HasLog = 0,
@@ -69,23 +75,31 @@ template<> struct ei_packet_traits<float> : ei_default_packet_traits
};
};
template<> struct ei_packet_traits<int> : ei_default_packet_traits
{ typedef Packet4i type; enum {size=4}; };
{
typedef Packet4i type;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size=4
// FIXME check the Has*
};
};
template<> struct ei_unpacket_traits<Packet4f> { typedef float type; enum {size=4}; };
template<> struct ei_unpacket_traits<Packet4i> { typedef int type; enum {size=4}; };
template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<float>(const float& from) { return vdupq_n_f32(from); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pset1<int>(const int& from) { return vdupq_n_s32(from); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<Packet4f>(const float& from) { return vdupq_n_f32(from); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pset1<Packet4i>(const int& from) { return vdupq_n_s32(from); }
template<> EIGEN_STRONG_INLINE Packet4f ei_plset<float>(const float& a)
{
Packet4f countdown = { 3, 2, 1, 0 };
return vaddq_f32(ei_pset1(a), countdown);
return vaddq_f32(ei_pset1<Packet4f>(a), countdown);
}
template<> EIGEN_STRONG_INLINE Packet4i ei_plset<int>(const int& a)
{
Packet4i countdown = { 3, 2, 1, 0 };
return vaddq_s32(ei_pset1(a), countdown);
return vaddq_s32(ei_pset1<Packet4i>(a), countdown);
}
template<> EIGEN_STRONG_INLINE Packet4f ei_padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return vaddq_f32(a,b); }
@@ -123,7 +137,7 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_pdiv<Packet4f>(const Packet4f& a, con
}
template<> EIGEN_STRONG_INLINE Packet4i ei_pdiv<Packet4i>(const Packet4i& /*a*/, const Packet4i& /*b*/)
{ ei_assert(false && "packet integer division are not supported by NEON");
return ei_pset1<int>(0);
return ei_pset1<Packet4i>(0);
}
// for some weird raisons, it has to be overloaded for packet of integers
@@ -163,8 +177,23 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_pandnot<Packet4i>(const Packet4i& a,
template<> EIGEN_STRONG_INLINE Packet4f ei_pload<float>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pload<int>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from); }
template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from); }
template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from); }
template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_f32(from); }
template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_s32(from); }
template<> EIGEN_STRONG_INLINE Packet4f ei_ploaddup<Packet4f>(const float* from)
{
float32x2_t lo, ho;
lo = vdup_n_f32(*from);
hi = vdup_n_f32(*from);
return vcombine_f32(lo, hi);
}
template<> EIGEN_STRONG_INLINE Packet4i ei_ploaddup<Packet4i>(const float* from)
{
int32x2_t lo, ho;
lo = vdup_n_s32(*from);
hi = vdup_n_s32(*from);
return vcombine_s32(lo, hi);
}
template<> EIGEN_STRONG_INLINE void ei_pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_f32(to, from); }
template<> EIGEN_STRONG_INLINE void ei_pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_s32(to, from); }
@@ -181,25 +210,21 @@ template<> EIGEN_STRONG_INLINE int ei_pfirst<Packet4i>(const Packet4i& a) { i
template<> EIGEN_STRONG_INLINE Packet4f ei_preverse(const Packet4f& a) {
float32x2_t a_lo, a_hi;
Packet4f a_r64, a_r128;
Packet4f a_r64;
a_r64 = vrev64q_f32(a);
a_lo = vget_low_f32(a_r64);
a_hi = vget_high_f32(a_r64);
a_r128 = vcombine_f32(a_hi, a_lo);
return a_r128;
return vcombine_f32(a_hi, a_lo);
}
template<> EIGEN_STRONG_INLINE Packet4i ei_preverse(const Packet4i& a) {
int32x2_t a_lo, a_hi;
Packet4i a_r64, a_r128;
Packet4i a_r64;
a_r64 = vrev64q_s32(a);
a_lo = vget_low_s32(a_r64);
a_hi = vget_high_s32(a_r64);
a_r128 = vcombine_s32(a_hi, a_lo);
return a_r128;
return vcombine_s32(a_hi, a_lo);
}
template<> EIGEN_STRONG_INLINE Packet4f ei_pabs(const Packet4f& a) { return vabsq_f32(a); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pabs(const Packet4i& a) { return vabsq_s32(a); }

View File

@@ -0,0 +1,426 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#ifndef EIGEN_COMPLEX_SSE_H
#define EIGEN_COMPLEX_SSE_H
//---------- float ----------
struct Packet2cf
{
EIGEN_STRONG_INLINE Packet2cf() {}
EIGEN_STRONG_INLINE explicit Packet2cf(const __m128& a) : v(a) {}
__m128 v;
};
template<> struct ei_packet_traits<std::complex<float> > : ei_default_packet_traits
{
typedef Packet2cf type;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size = 2,
HasAdd = 1,
HasSub = 1,
HasMul = 1,
HasDiv = 1,
HasNegate = 1,
HasAbs = 0,
HasAbs2 = 0,
HasMin = 0,
HasMax = 0,
HasSetLinear = 0
};
};
template<> struct ei_unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; };
template<> EIGEN_STRONG_INLINE Packet2cf ei_padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_add_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf ei_psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_sub_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf ei_pnegate(const Packet2cf& a)
{
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));
return Packet2cf(_mm_xor_ps(a.v,mask));
}
template<> EIGEN_STRONG_INLINE Packet2cf ei_pconj(const Packet2cf& a)
{
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
return Packet2cf(_mm_xor_ps(a.v,mask));
}
template<> EIGEN_STRONG_INLINE Packet2cf ei_pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
// TODO optimize it for SSE3 and 4
#ifdef EIGEN_VECTORIZE_SSE3
return Packet2cf(_mm_addsub_ps(_mm_mul_ps(_mm_moveldup_ps(a.v), b.v),
_mm_mul_ps(_mm_movehdup_ps(a.v),
ei_vec4f_swizzle1(b.v, 1, 0, 3, 2))));
// return Packet2cf(_mm_addsub_ps(_mm_mul_ps(ei_vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
// _mm_mul_ps(ei_vec4f_swizzle1(a.v, 1, 1, 3, 3),
// ei_vec4f_swizzle1(b.v, 1, 0, 3, 2))));
#else
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x00000000,0x80000000,0x00000000));
return Packet2cf(_mm_add_ps(_mm_mul_ps(ei_vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
_mm_xor_ps(_mm_mul_ps(ei_vec4f_swizzle1(a.v, 1, 1, 3, 3),
ei_vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask)));
#endif
}
template<> EIGEN_STRONG_INLINE Packet2cf ei_pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_and_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf ei_por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_or_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf ei_pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_xor_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf ei_pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_andnot_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf ei_pload <Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(ei_pload<Packet4f>(&ei_real_ref(*from))); }
template<> EIGEN_STRONG_INLINE Packet2cf ei_ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ei_ploadu<Packet4f>(&ei_real_ref(*from))); }
template<> EIGEN_STRONG_INLINE void ei_pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE ei_pstore(&ei_real_ref(*to), from.v); }
template<> EIGEN_STRONG_INLINE void ei_pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE ei_pstoreu(&ei_real_ref(*to), from.v); }
template<> EIGEN_STRONG_INLINE void ei_prefetch<std::complex<float> >(const std::complex<float> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE Packet2cf ei_pset1<Packet2cf>(const std::complex<float>& from)
{
Packet2cf res;
res.v = _mm_loadl_pi(res.v, (const __m64*)&from);
return Packet2cf(_mm_movelh_ps(res.v,res.v));
}
template<> EIGEN_STRONG_INLINE std::complex<float> ei_pfirst<Packet2cf>(const Packet2cf& a)
{
std::complex<float> res;
_mm_storel_pi((__m64*)&res, a.v);
return res;
}
template<> EIGEN_STRONG_INLINE Packet2cf ei_preverse(const Packet2cf& a) { return Packet2cf(_mm_castpd_ps(ei_preverse(_mm_castps_pd(a.v)))); }
template<> EIGEN_STRONG_INLINE std::complex<float> ei_predux<Packet2cf>(const Packet2cf& a)
{
return ei_pfirst(Packet2cf(_mm_add_ps(a.v, _mm_movehl_ps(a.v,a.v))));
}
template<> EIGEN_STRONG_INLINE Packet2cf ei_preduxp<Packet2cf>(const Packet2cf* vecs)
{
return Packet2cf(_mm_add_ps(_mm_movelh_ps(vecs[0].v,vecs[1].v), _mm_movehl_ps(vecs[1].v,vecs[0].v)));
}
template<> EIGEN_STRONG_INLINE std::complex<float> ei_predux_mul<Packet2cf>(const Packet2cf& a)
{
return ei_pfirst(ei_pmul(a, Packet2cf(_mm_movehl_ps(a.v,a.v))));
}
template<int Offset>
struct ei_palign_impl<Offset,Packet2cf>
{
EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second)
{
if (Offset==1)
{
first.v = _mm_movehl_ps(first.v, first.v);
first.v = _mm_movelh_ps(first.v, second.v);
}
}
};
template<> struct ei_conj_helper<Packet2cf, Packet2cf, false,true>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
{ return ei_padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
#ifdef EIGEN_VECTORIZE_SSE3
return ei_pmul(a, ei_pconj(b));
#else
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
return Packet2cf(_mm_add_ps(_mm_xor_ps(_mm_mul_ps(ei_vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask),
_mm_mul_ps(ei_vec4f_swizzle1(a.v, 1, 1, 3, 3),
ei_vec4f_swizzle1(b.v, 1, 0, 3, 2))));
#endif
}
};
template<> struct ei_conj_helper<Packet2cf, Packet2cf, true,false>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
{ return ei_padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
#ifdef EIGEN_VECTORIZE_SSE3
return ei_pmul(ei_pconj(a), b);
#else
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
return Packet2cf(_mm_add_ps(_mm_mul_ps(ei_vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
_mm_xor_ps(_mm_mul_ps(ei_vec4f_swizzle1(a.v, 1, 1, 3, 3),
ei_vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask)));
#endif
}
};
template<> struct ei_conj_helper<Packet2cf, Packet2cf, true,true>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
{ return ei_padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
#ifdef EIGEN_VECTORIZE_SSE3
return ei_pconj(ei_pmul(a, b));
#else
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
return Packet2cf(_mm_sub_ps(_mm_xor_ps(_mm_mul_ps(ei_vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask),
_mm_mul_ps(ei_vec4f_swizzle1(a.v, 1, 1, 3, 3),
ei_vec4f_swizzle1(b.v, 1, 0, 3, 2))));
#endif
}
};
template<> struct ei_conj_helper<Packet4f, Packet2cf, false,false>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet4f& x, const Packet2cf& y, const Packet2cf& c) const
{ return ei_padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet4f& x, const Packet2cf& y) const
{ return Packet2cf(ei_pmul(x, y.v)); }
};
template<> struct ei_conj_helper<Packet2cf, Packet4f, false,false>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet4f& y, const Packet2cf& c) const
{ return ei_padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& x, const Packet4f& y) const
{ return Packet2cf(ei_pmul(x.v, y)); }
};
template<> EIGEN_STRONG_INLINE Packet2cf ei_pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
// TODO optimize it for SSE3 and 4
Packet2cf res = ei_conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
__m128 s = _mm_mul_ps(b.v,b.v);
return Packet2cf(_mm_div_ps(res.v,_mm_add_ps(s,_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(s), 0xb1)))));
}
EIGEN_STRONG_INLINE Packet2cf ei_pcplxflip/*<Packet2cf>*/(const Packet2cf& x)
{
return Packet2cf(ei_vec4f_swizzle1(x.v, 1, 0, 3, 2));
}
//---------- double ----------
struct Packet1cd
{
EIGEN_STRONG_INLINE Packet1cd() {}
EIGEN_STRONG_INLINE explicit Packet1cd(const __m128d& a) : v(a) {}
__m128d v;
};
template<> struct ei_packet_traits<std::complex<double> > : ei_default_packet_traits
{
typedef Packet1cd type;
enum {
Vectorizable = 1,
AlignedOnScalar = 0,
size = 1,
HasAdd = 1,
HasSub = 1,
HasMul = 1,
HasDiv = 1,
HasNegate = 1,
HasAbs = 0,
HasAbs2 = 0,
HasMin = 0,
HasMax = 0,
HasSetLinear = 0
};
};
template<> struct ei_unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1}; };
template<> EIGEN_STRONG_INLINE Packet1cd ei_padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_add_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd ei_psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_sub_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd ei_pnegate(const Packet1cd& a) { return Packet1cd(ei_pnegate(a.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd ei_pconj(const Packet1cd& a)
{
const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
return Packet1cd(_mm_xor_pd(a.v,mask));
}
template<> EIGEN_STRONG_INLINE Packet1cd ei_pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{
// TODO optimize it for SSE3 and 4
#ifdef EIGEN_VECTORIZE_SSE3
return Packet1cd(_mm_addsub_pd(_mm_mul_pd(ei_vec2d_swizzle1(a.v, 0, 0), b.v),
_mm_mul_pd(ei_vec2d_swizzle1(a.v, 1, 1),
ei_vec2d_swizzle1(b.v, 1, 0))));
#else
const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));
return Packet1cd(_mm_add_pd(_mm_mul_pd(ei_vec2d_swizzle1(a.v, 0, 0), b.v),
_mm_xor_pd(_mm_mul_pd(ei_vec2d_swizzle1(a.v, 1, 1),
ei_vec2d_swizzle1(b.v, 1, 0)), mask)));
#endif
}
template<> EIGEN_STRONG_INLINE Packet1cd ei_pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_and_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd ei_por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_or_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd ei_pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_xor_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd ei_pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_andnot_pd(a.v,b.v)); }
// FIXME force unaligned load, this is a temporary fix
template<> EIGEN_STRONG_INLINE Packet1cd ei_pload <Packet1cd>(const std::complex<double>* from)
{ EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(ei_pload<Packet2d>((const double*)from)); }
template<> EIGEN_STRONG_INLINE Packet1cd ei_ploadu<Packet1cd>(const std::complex<double>* from)
{ EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ei_ploadu<Packet2d>((const double*)from)); }
template<> EIGEN_STRONG_INLINE Packet1cd ei_pset1<Packet1cd>(const std::complex<double>& from)
{ /* here we really have to use unaligned loads :( */ return ei_ploadu<Packet1cd>(&from); }
// FIXME force unaligned store, this is a temporary fix
template<> EIGEN_STRONG_INLINE void ei_pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE ei_pstore((double*)to, from.v); }
template<> EIGEN_STRONG_INLINE void ei_pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE ei_pstoreu((double*)to, from.v); }
template<> EIGEN_STRONG_INLINE void ei_prefetch<std::complex<double> >(const std::complex<double> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE std::complex<double> ei_pfirst<Packet1cd>(const Packet1cd& a)
{
EIGEN_ALIGN16 double res[2];
_mm_store_pd(res, a.v);
return std::complex<double>(res[0],res[1]);
}
template<> EIGEN_STRONG_INLINE Packet1cd ei_preverse(const Packet1cd& a) { return a; }
template<> EIGEN_STRONG_INLINE std::complex<double> ei_predux<Packet1cd>(const Packet1cd& a)
{
return ei_pfirst(a);
}
template<> EIGEN_STRONG_INLINE Packet1cd ei_preduxp<Packet1cd>(const Packet1cd* vecs)
{
return vecs[0];
}
template<> EIGEN_STRONG_INLINE std::complex<double> ei_predux_mul<Packet1cd>(const Packet1cd& a)
{
return ei_pfirst(a);
}
template<int Offset>
struct ei_palign_impl<Offset,Packet1cd>
{
EIGEN_STRONG_INLINE static void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
{
// FIXME is it sure we never have to align a Packet1cd?
// Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
}
};
template<> struct ei_conj_helper<Packet1cd, Packet1cd, false,true>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
{ return ei_padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
{
#ifdef EIGEN_VECTORIZE_SSE3
return ei_pmul(a, ei_pconj(b));
#else
const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
return Packet1cd(_mm_add_pd(_mm_xor_pd(_mm_mul_pd(ei_vec2d_swizzle1(a.v, 0, 0), b.v), mask),
_mm_mul_pd(ei_vec2d_swizzle1(a.v, 1, 1),
ei_vec2d_swizzle1(b.v, 1, 0))));
#endif
}
};
template<> struct ei_conj_helper<Packet1cd, Packet1cd, true,false>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
{ return ei_padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
{
#ifdef EIGEN_VECTORIZE_SSE3
return ei_pmul(ei_pconj(a), b);
#else
const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
return Packet1cd(_mm_add_pd(_mm_mul_pd(ei_vec2d_swizzle1(a.v, 0, 0), b.v),
_mm_xor_pd(_mm_mul_pd(ei_vec2d_swizzle1(a.v, 1, 1),
ei_vec2d_swizzle1(b.v, 1, 0)), mask)));
#endif
}
};
template<> struct ei_conj_helper<Packet1cd, Packet1cd, true,true>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
{ return ei_padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
{
#ifdef EIGEN_VECTORIZE_SSE3
return ei_pconj(ei_pmul(a, b));
#else
const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
return Packet1cd(_mm_sub_pd(_mm_xor_pd(_mm_mul_pd(ei_vec2d_swizzle1(a.v, 0, 0), b.v), mask),
_mm_mul_pd(ei_vec2d_swizzle1(a.v, 1, 1),
ei_vec2d_swizzle1(b.v, 1, 0))));
#endif
}
};
template<> struct ei_conj_helper<Packet2d, Packet1cd, false,false>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet2d& x, const Packet1cd& y, const Packet1cd& c) const
{ return ei_padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet2d& x, const Packet1cd& y) const
{ return Packet1cd(ei_pmul(x, y.v)); }
};
template<> struct ei_conj_helper<Packet1cd, Packet2d, false,false>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet2d& y, const Packet1cd& c) const
{ return ei_padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& x, const Packet2d& y) const
{ return Packet1cd(ei_pmul(x.v, y)); }
};
template<> EIGEN_STRONG_INLINE Packet1cd ei_pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{
// TODO optimize it for SSE3 and 4
Packet1cd res = ei_conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
__m128d s = _mm_mul_pd(b.v,b.v);
return Packet1cd(_mm_div_pd(res.v, _mm_add_pd(s,_mm_shuffle_pd(s, s, 0x1))));
}
EIGEN_STRONG_INLINE Packet1cd ei_pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
{
return Packet1cd(ei_preverse(x.v));
}
#endif // EIGEN_COMPLEX_SSE_H

View File

@@ -373,19 +373,19 @@ Packet4f ei_pcos<Packet4f>(const Packet4f& _x)
return _mm_xor_ps(y, sign_bit);
}
// This is Quake3's fast inverse square root.
// This is based on Quake3's fast inverse square root.
// For detail see here: http://www.beyond3d.com/content/articles/8/
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet4f ei_psqrt<Packet4f>(const Packet4f& _x)
{
Packet4f half = ei_pmul(_x, ei_pset1(.5f));
/* select only the inverse sqrt of non-zero inputs */
Packet4f non_zero_mask = _mm_cmpgt_ps(_x, ei_pset1(std::numeric_limits<float>::epsilon()));
Packet4f x = _mm_and_ps(non_zero_mask, _mm_rsqrt_ps(_x));
Packet4f half = ei_pmul(_x, ei_pset1<Packet4f>(.5f));
x = ei_pmul(x, ei_psub(ei_pset1(1.5f), ei_pmul(half, ei_pmul(x,x))));
return ei_pmul(_x,x);
/* select only the inverse sqrt of non-zero inputs */
Packet4f non_zero_mask = _mm_cmpgt_ps(_x, ei_pset1<Packet4f>(std::numeric_limits<float>::epsilon()));
Packet4f x = _mm_and_ps(non_zero_mask, _mm_rsqrt_ps(_x));
x = ei_pmul(x, ei_psub(ei_pset1<Packet4f>(1.5f), ei_pmul(half, ei_pmul(x,x))));
return ei_pmul(_x,x);
}
#endif // EIGEN_MATH_FUNCTIONS_SSE_H

View File

@@ -29,6 +29,10 @@
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
#endif
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
#endif
typedef __m128 Packet4f;
typedef __m128i Packet4i;
typedef __m128d Packet2d;
@@ -43,6 +47,9 @@ template<> struct ei_is_arithmetic<__m128d> { enum { ret = true }; };
#define ei_vec4i_swizzle1(v,p,q,r,s) \
(_mm_shuffle_epi32( v, ((s)<<6|(r)<<4|(q)<<2|(p))))
#define ei_vec2d_swizzle1(v,p,q) \
(_mm_castsi128_pd(_mm_shuffle_epi32( _mm_castpd_si128(v), ((q*2+1)<<6|(q*2)<<4|(p*2+1)<<2|(p*2)))))
#define ei_vec4f_swizzle2(a,b,p,q,r,s) \
(_mm_shuffle_ps( (a), (b), ((s)<<6|(r)<<4|(q)<<2|(p))))
@@ -50,18 +57,24 @@ template<> struct ei_is_arithmetic<__m128d> { enum { ret = true }; };
(_mm_castps_si128( (_mm_shuffle_ps( _mm_castsi128_ps(a), _mm_castsi128_ps(b), ((s)<<6|(r)<<4|(q)<<2|(p))))))
#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
const Packet4f ei_p4f_##NAME = ei_pset1<float>(X)
const Packet4f ei_p4f_##NAME = ei_pset1<Packet4f>(X)
#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
const Packet4f ei_p4f_##NAME = _mm_castsi128_ps(ei_pset1<int>(X))
const Packet4f ei_p4f_##NAME = _mm_castsi128_ps(ei_pset1<Packet4i>(X))
#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
const Packet4i ei_p4i_##NAME = ei_pset1<int>(X)
const Packet4i ei_p4i_##NAME = ei_pset1<Packet4i>(X)
template<> struct ei_packet_traits<float> : ei_default_packet_traits
{
typedef Packet4f type; enum {size=4};
typedef Packet4f type;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size=4,
HasDiv = 1,
HasSin = EIGEN_FAST_MATH,
HasCos = EIGEN_FAST_MATH,
HasLog = 1,
@@ -70,9 +83,26 @@ template<> struct ei_packet_traits<float> : ei_default_packet_traits
};
};
template<> struct ei_packet_traits<double> : ei_default_packet_traits
{ typedef Packet2d type; enum {size=2}; };
{
typedef Packet2d type;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size=2,
HasDiv = 1
};
};
template<> struct ei_packet_traits<int> : ei_default_packet_traits
{ typedef Packet4i type; enum {size=4}; };
{
typedef Packet4i type;
enum {
// FIXME check the Has*
Vectorizable = 1,
AlignedOnScalar = 1,
size=4
};
};
template<> struct ei_unpacket_traits<Packet4f> { typedef float type; enum {size=4}; };
template<> struct ei_unpacket_traits<Packet2d> { typedef double type; enum {size=2}; };
@@ -81,23 +111,24 @@ template<> struct ei_unpacket_traits<Packet4i> { typedef int type; enum {size
#ifdef __GNUC__
// Sometimes GCC implements _mm_set1_p* using multiple moves,
// that is inefficient :( (e.g., see ei_gemm_pack_rhs)
template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<float>(const float& from) {
template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<Packet4f>(const float& from) {
Packet4f res = _mm_set_ss(from);
return _mm_shuffle_ps(res,res,0);
return ei_vec4f_swizzle1(res,0,0,0,0);
}
template<> EIGEN_STRONG_INLINE Packet2d ei_pset1<double>(const double& from) {
template<> EIGEN_STRONG_INLINE Packet2d ei_pset1<Packet2d>(const double& from) {
// NOTE the SSE3 intrinsic _mm_loaddup_pd is never faster but sometimes much slower
Packet2d res = _mm_set_sd(from);
return _mm_unpacklo_pd(res,res);
return ei_vec2d_swizzle1(res, 0, 0);
}
#else
template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<float>(const float& from) { return _mm_set1_ps(from); }
template<> EIGEN_STRONG_INLINE Packet2d ei_pset1<double>(const double& from) { return _mm_set1_pd(from); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<Packet4f>(const float& from) { return _mm_set1_ps(from); }
template<> EIGEN_STRONG_INLINE Packet2d ei_pset1<Packet2d>(const double& from) { return _mm_set1_pd(from); }
#endif
template<> EIGEN_STRONG_INLINE Packet4i ei_pset1<int>(const int& from) { return _mm_set1_epi32(from); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pset1<Packet4i>(const int& from) { return _mm_set1_epi32(from); }
template<> EIGEN_STRONG_INLINE Packet4f ei_plset<float>(const float& a) { return _mm_add_ps(ei_pset1(a), _mm_set_ps(3,2,1,0)); }
template<> EIGEN_STRONG_INLINE Packet2d ei_plset<double>(const double& a) { return _mm_add_pd(ei_pset1(a),_mm_set_pd(1,0)); }
template<> EIGEN_STRONG_INLINE Packet4i ei_plset<int>(const int& a) { return _mm_add_epi32(ei_pset1(a),_mm_set_epi32(3,2,1,0)); }
template<> EIGEN_STRONG_INLINE Packet4f ei_plset<float>(const float& a) { return _mm_add_ps(ei_pset1<Packet4f>(a), _mm_set_ps(3,2,1,0)); }
template<> EIGEN_STRONG_INLINE Packet2d ei_plset<double>(const double& a) { return _mm_add_pd(ei_pset1<Packet2d>(a),_mm_set_pd(1,0)); }
template<> EIGEN_STRONG_INLINE Packet4i ei_plset<int>(const int& a) { return _mm_add_epi32(ei_pset1<Packet4i>(a),_mm_set_epi32(3,2,1,0)); }
template<> EIGEN_STRONG_INLINE Packet4f ei_padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_add_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d ei_padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_add_pd(a,b); }
@@ -144,7 +175,7 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_pdiv<Packet4f>(const Packet4f& a, con
template<> EIGEN_STRONG_INLINE Packet2d ei_pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_div_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pdiv<Packet4i>(const Packet4i& /*a*/, const Packet4i& /*b*/)
{ ei_assert(false && "packet integer division are not supported by SSE");
return ei_pset1<int>(0);
return ei_pset1<Packet4i>(0);
}
// for some weird raisons, it has to be overloaded for packet of integers
@@ -184,14 +215,14 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_pandnot<Packet4f>(const Packet4f& a,
template<> EIGEN_STRONG_INLINE Packet2d ei_pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_andnot_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_andnot_si128(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pload<float>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_ps(from); }
template<> EIGEN_STRONG_INLINE Packet2d ei_pload<double>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_pd(from); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pload<int>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(reinterpret_cast<const Packet4i*>(from)); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_ps(from); }
template<> EIGEN_STRONG_INLINE Packet2d ei_pload<Packet2d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_pd(from); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(reinterpret_cast<const Packet4i*>(from)); }
#if defined(_MSC_VER)
template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_ps(from); }
template<> EIGEN_STRONG_INLINE Packet2d ei_ploadu<double>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_pd(from); }
template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu<int>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_si128(reinterpret_cast<const Packet4i*>(from)); }
template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu<Packet4f>(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_ps(from); }
template<> EIGEN_STRONG_INLINE Packet2d ei_ploadu<Packet2d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_pd(from); }
template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu<Packet4i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_si128(reinterpret_cast<const Packet4i*>(from)); }
#else
// Fast unaligned loads. Note that here we cannot directly use intrinsics: this would
// require pointer casting to incompatible pointer types and leads to invalid code
@@ -199,7 +230,7 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_pload<int>(const int* from) { EIGEN_D
// a correct instruction dependency.
// TODO: do the same for MSVC (ICC is compatible)
// NOTE: with the code below, MSVC's compiler crashes!
template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu(const float* from)
template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu<Packet4f>(const float* from)
{
EIGEN_DEBUG_UNALIGNED_LOAD
__m128d res;
@@ -207,7 +238,7 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu(const float* from)
res = _mm_loadh_pd(res, (const double*)(from+2)) ;
return _mm_castpd_ps(res);
}
template<> EIGEN_STRONG_INLINE Packet2d ei_ploadu(const double* from)
template<> EIGEN_STRONG_INLINE Packet2d ei_ploadu<Packet2d>(const double* from)
{
EIGEN_DEBUG_UNALIGNED_LOAD
__m128d res;
@@ -215,7 +246,7 @@ template<> EIGEN_STRONG_INLINE Packet2d ei_ploadu(const double* from)
res = _mm_loadh_pd(res,from+1);
return res;
}
template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu(const int* from)
template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu<Packet4i>(const int* from)
{
EIGEN_DEBUG_UNALIGNED_LOAD
__m128d res;
@@ -225,6 +256,19 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu(const int* from)
}
#endif
template<> EIGEN_STRONG_INLINE Packet4f ei_ploaddup<Packet4f>(const float* from)
{
return ei_vec4f_swizzle1(_mm_castpd_ps(_mm_load_sd((const double*)from)), 0, 0, 1, 1);
}
template<> EIGEN_STRONG_INLINE Packet2d ei_ploaddup<Packet2d>(const double* from)
{ return ei_pset1<Packet2d>(from[0]); }
template<> EIGEN_STRONG_INLINE Packet4i ei_ploaddup<Packet4i>(const int* from)
{
Packet4i tmp;
tmp = _mm_loadl_epi64(reinterpret_cast<const Packet4i*>(from));
return ei_vec4i_swizzle1(tmp, 0, 0, 1, 1);
}
template<> EIGEN_STRONG_INLINE void ei_pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_ps(to, from); }
template<> EIGEN_STRONG_INLINE void ei_pstore<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_pd(to, from); }
template<> EIGEN_STRONG_INLINE void ei_pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<Packet4i*>(to), from); }
@@ -241,13 +285,13 @@ template<> EIGEN_STRONG_INLINE void ei_prefetch<float>(const float* addr) { _m
template<> EIGEN_STRONG_INLINE void ei_prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void ei_prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
#if defined(_MSC_VER) && (_MSC_VER <= 1500) && defined(_WIN64)
#if defined(_MSC_VER) && (_MSC_VER <= 1500) && defined(_WIN64) && !defined(__INTEL_COMPILER)
// The temporary variable fixes an internal compilation error.
// Direct of the struct members fixed bug #62.
template<> EIGEN_STRONG_INLINE float ei_pfirst<Packet4f>(const Packet4f& a) { return a.m128_f32[0]; }
template<> EIGEN_STRONG_INLINE double ei_pfirst<Packet2d>(const Packet2d& a) { return a.m128d_f64[0]; }
template<> EIGEN_STRONG_INLINE int ei_pfirst<Packet4i>(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; }
#elif defined(_MSC_VER) && (_MSC_VER <= 1500)
#elif defined(_MSC_VER) && (_MSC_VER <= 1500) && !defined(__INTEL_COMPILER)
// The temporary variable fixes an internal compilation error.
template<> EIGEN_STRONG_INLINE float ei_pfirst<Packet4f>(const Packet4f& a) { float x = _mm_cvtss_f32(a); return x; }
template<> EIGEN_STRONG_INLINE double ei_pfirst<Packet2d>(const Packet2d& a) { double x = _mm_cvtsd_f64(a); return x; }

View File

@@ -42,7 +42,7 @@
template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
struct ei_product_coeff_impl;
template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename PacketScalar, int LoadMode>
template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct ei_product_packet_impl;
template<typename LhsNested, typename RhsNested, int NestingFlags>
@@ -73,6 +73,8 @@ struct ei_traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
LhsRowMajor = LhsFlags & RowMajorBit,
RhsRowMajor = RhsFlags & RowMajorBit,
SameType = ei_is_same_type<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::ret,
CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit)
&& (ColsAtCompileTime == Dynamic
|| ( (ColsAtCompileTime % ei_packet_traits<Scalar>::size) == 0
@@ -94,7 +96,8 @@ struct ei_traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
| (EvalToRowMajor ? RowMajorBit : 0)
| NestingFlags
| (CanVectorizeLhs || CanVectorizeRhs ? PacketAccessBit : 0),
// TODO enable vectorization for mixed types
| (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0),
CoeffReadCost = InnerSize == Dynamic ? Dynamic
: InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
@@ -105,7 +108,8 @@ struct ei_traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
* loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
* the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI.
*/
CanVectorizeInner = LhsRowMajor
CanVectorizeInner = SameType
&& LhsRowMajor
&& (!RhsRowMajor)
&& (LhsFlags & RhsFlags & ActualPacketAccessBit)
&& (LhsFlags & RhsFlags & AlignedBit)
@@ -195,7 +199,7 @@ class CoeffBasedProduct
}
// Implicit conversion to the nested type (trigger the evaluation of the product)
operator const PlainObject& () const
EIGEN_STRONG_INLINE operator const PlainObject& () const
{
m_result.lazyAssign(*this);
return m_result;
@@ -275,20 +279,20 @@ struct ei_product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
*** Scalar path with inner vectorization ***
*******************************************/
template<int UnrollingIndex, typename Lhs, typename Rhs, typename PacketScalar>
template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet>
struct ei_product_coeff_vectorized_unroller
{
typedef typename Lhs::Index Index;
enum { PacketSize = ei_packet_traits<typename Lhs::Scalar>::size };
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
{
ei_product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, PacketScalar>::run(row, col, lhs, rhs, pres);
ei_product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
pres = ei_padd(pres, ei_pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) ));
}
};
template<typename Lhs, typename Rhs, typename PacketScalar>
struct ei_product_coeff_vectorized_unroller<0, Lhs, Rhs, PacketScalar>
template<typename Lhs, typename Rhs, typename Packet>
struct ei_product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
@@ -300,13 +304,13 @@ struct ei_product_coeff_vectorized_unroller<0, Lhs, Rhs, PacketScalar>
template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
struct ei_product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
{
typedef typename Lhs::PacketScalar PacketScalar;
typedef typename Lhs::PacketScalar Packet;
typedef typename Lhs::Index Index;
enum { PacketSize = ei_packet_traits<typename Lhs::Scalar>::size };
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
{
PacketScalar pres;
ei_product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, PacketScalar>::run(row, col, lhs, rhs, pres);
Packet pres;
ei_product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
ei_product_coeff_impl<DefaultTraversal,UnrollingIndex,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, res);
res = ei_predux(pres);
}
@@ -318,7 +322,7 @@ struct ei_product_coeff_vectorized_dyn_selector
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
{
res = lhs.row(row).cwiseProduct(rhs.col(col)).sum();
res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum();
}
};
@@ -330,7 +334,7 @@ struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
{
res = lhs.cwiseProduct(rhs.col(col)).sum();
res = lhs.transpose().cwiseProduct(rhs.col(col)).sum();
}
};
@@ -340,7 +344,7 @@ struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
{
res = lhs.row(row).cwiseProduct(rhs).sum();
res = lhs.row(row).transpose().cwiseProduct(rhs).sum();
}
};
@@ -350,7 +354,7 @@ struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
{
res = lhs.cwiseProduct(rhs).sum();
res = lhs.transpose().cwiseProduct(rhs).sum();
}
};
@@ -368,71 +372,71 @@ struct ei_product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetSca
*** Packet path ***
*******************/
template<int UnrollingIndex, typename Lhs, typename Rhs, typename PacketScalar, int LoadMode>
struct ei_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, PacketScalar, LoadMode>
template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct ei_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
{
ei_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, PacketScalar, LoadMode>::run(row, col, lhs, rhs, res);
res = ei_pmadd(ei_pset1(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
ei_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
res = ei_pmadd(ei_pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
}
};
template<int UnrollingIndex, typename Lhs, typename Rhs, typename PacketScalar, int LoadMode>
struct ei_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, PacketScalar, LoadMode>
template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct ei_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
{
ei_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, PacketScalar, LoadMode>::run(row, col, lhs, rhs, res);
res = ei_pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), ei_pset1(rhs.coeff(UnrollingIndex, col)), res);
ei_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
res = ei_pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), ei_pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
}
};
template<typename Lhs, typename Rhs, typename PacketScalar, int LoadMode>
struct ei_product_packet_impl<RowMajor, 0, Lhs, Rhs, PacketScalar, LoadMode>
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct ei_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
{
res = ei_pmul(ei_pset1(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
res = ei_pmul(ei_pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
}
};
template<typename Lhs, typename Rhs, typename PacketScalar, int LoadMode>
struct ei_product_packet_impl<ColMajor, 0, Lhs, Rhs, PacketScalar, LoadMode>
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct ei_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
{
res = ei_pmul(lhs.template packet<LoadMode>(row, 0), ei_pset1(rhs.coeff(0, col)));
res = ei_pmul(lhs.template packet<LoadMode>(row, 0), ei_pset1<Packet>(rhs.coeff(0, col)));
}
};
template<typename Lhs, typename Rhs, typename PacketScalar, int LoadMode>
struct ei_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, PacketScalar, LoadMode>
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct ei_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, PacketScalar& res)
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
{
ei_assert(lhs.cols()>0 && "you are using a non initialized matrix");
res = ei_pmul(ei_pset1(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
res = ei_pmul(ei_pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
for(Index i = 1; i < lhs.cols(); ++i)
res = ei_pmadd(ei_pset1(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
res = ei_pmadd(ei_pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
}
};
template<typename Lhs, typename Rhs, typename PacketScalar, int LoadMode>
struct ei_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, PacketScalar, LoadMode>
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct ei_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, PacketScalar& res)
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
{
ei_assert(lhs.cols()>0 && "you are using a non initialized matrix");
res = ei_pmul(lhs.template packet<LoadMode>(row, 0), ei_pset1(rhs.coeff(0, col)));
res = ei_pmul(lhs.template packet<LoadMode>(row, 0), ei_pset1<Packet>(rhs.coeff(0, col)));
for(Index i = 1; i < lhs.cols(); ++i)
res = ei_pmadd(lhs.template packet<LoadMode>(row, i), ei_pset1(rhs.coeff(i, col)), res);
res = ei_pmadd(lhs.template packet<LoadMode>(row, i), ei_pset1<Packet>(rhs.coeff(i, col)), res);
}
};

File diff suppressed because it is too large Load Diff

View File

@@ -29,26 +29,25 @@ template<typename _LhsScalar, typename _RhsScalar> class ei_level3_blocking;
/* Specialization for a row-major destination matrix => simple transposition of the product */
template<
typename Scalar, typename Index,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs>
struct ei_general_matrix_matrix_product<Scalar,Index,LhsStorageOrder,ConjugateLhs,RhsStorageOrder,ConjugateRhs,RowMajor>
typename Index,
typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs>
struct ei_general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor>
{
typedef typename ei_scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
static EIGEN_STRONG_INLINE void run(
Index rows, Index cols, Index depth,
const Scalar* lhs, Index lhsStride,
const Scalar* rhs, Index rhsStride,
Scalar* res, Index resStride,
Scalar alpha,
ei_level3_blocking<Scalar,Scalar>& blocking,
const LhsScalar* lhs, Index lhsStride,
const RhsScalar* rhs, Index rhsStride,
ResScalar* res, Index resStride,
ResScalar alpha,
ei_level3_blocking<RhsScalar,LhsScalar>& blocking,
GemmParallelInfo<Index>* info = 0)
{
// transpose the product such that the result is column major
ei_general_matrix_matrix_product<Scalar, Index,
RhsStorageOrder==RowMajor ? ColMajor : RowMajor,
ConjugateRhs,
LhsStorageOrder==RowMajor ? ColMajor : RowMajor,
ConjugateLhs,
ei_general_matrix_matrix_product<Index,
RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs,
LhsScalar, LhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateLhs,
ColMajor>
::run(cols,rows,depth,rhs,rhsStride,lhs,lhsStride,res,resStride,alpha,blocking,info);
}
@@ -57,35 +56,32 @@ struct ei_general_matrix_matrix_product<Scalar,Index,LhsStorageOrder,ConjugateLh
/* Specialization for a col-major destination matrix
* => Blocking algorithm following Goto's paper */
template<
typename Scalar, typename Index,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs>
struct ei_general_matrix_matrix_product<Scalar,Index,LhsStorageOrder,ConjugateLhs,RhsStorageOrder,ConjugateRhs,ColMajor>
typename Index,
typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs>
struct ei_general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor>
{
typedef typename ei_scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
static void run(Index rows, Index cols, Index depth,
const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride,
Scalar* res, Index resStride,
Scalar alpha,
ei_level3_blocking<Scalar,Scalar>& blocking,
const LhsScalar* _lhs, Index lhsStride,
const RhsScalar* _rhs, Index rhsStride,
ResScalar* res, Index resStride,
ResScalar alpha,
ei_level3_blocking<LhsScalar,RhsScalar>& blocking,
GemmParallelInfo<Index>* info = 0)
{
ei_const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
ei_const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
ei_const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
ei_const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
if (ConjugateRhs)
alpha = ei_conj(alpha);
typedef typename ei_packet_traits<Scalar>::type PacketType;
typedef ei_product_blocking_traits<Scalar> Blocking;
typedef ei_gebp_traits<LhsScalar,RhsScalar> Traits;
Index kc = blocking.kc(); // cache block size along the K direction
Index mc = std::min(rows,blocking.mc()); // cache block size along the M direction
//Index nc = blocking.nc(); // cache block size along the N direction
ei_gemm_pack_rhs<Scalar, Index, Blocking::nr, RhsStorageOrder> pack_rhs;
ei_gemm_pack_lhs<Scalar, Index, Blocking::mr, LhsStorageOrder> pack_lhs;
ei_gebp_kernel<Scalar, Index, Blocking::mr, Blocking::nr, ei_conj_helper<ConjugateLhs,ConjugateRhs> > gebp;
ei_gemm_pack_lhs<LhsScalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
ei_gemm_pack_rhs<RhsScalar, Index, Traits::nr, RhsStorageOrder> pack_rhs;
ei_gebp_kernel<LhsScalar, RhsScalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp;
#ifdef EIGEN_HAS_OPENMP
if(info)
@@ -94,10 +90,10 @@ static void run(Index rows, Index cols, Index depth,
Index tid = omp_get_thread_num();
Index threads = omp_get_num_threads();
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
std::size_t sizeW = kc*Blocking::PacketSize*Blocking::nr*8;
Scalar* w = ei_aligned_stack_new(Scalar, sizeW);
Scalar* blockB = blocking.blockB();
LhsScalar* blockA = ei_aligned_stack_new(LhsScalar, kc*mc);
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
RhsScalar* w = ei_aligned_stack_new(RhsScalar, sizeW);
RhsScalar* blockB = blocking.blockB();
ei_internal_assert(blockB!=0);
// For each horizontal panel of the rhs, and corresponding vertical panel of the lhs...
@@ -118,7 +114,7 @@ static void run(Index rows, Index cols, Index depth,
while(info[tid].users!=0) {}
info[tid].users += threads;
pack_rhs(blockB+info[tid].rhs_start*kc, &rhs(k,info[tid].rhs_start), rhsStride, alpha, actual_kc, info[tid].rhs_length);
pack_rhs(blockB+info[tid].rhs_start*kc, &rhs(k,info[tid].rhs_start), rhsStride, actual_kc, info[tid].rhs_length);
// Notify the other threads that the part B'_j is ready to go.
info[tid].sync = k;
@@ -134,7 +130,7 @@ static void run(Index rows, Index cols, Index depth,
if(shift>0)
while(info[j].sync!=k) {}
gebp(res+info[j].rhs_start*resStride, resStride, blockA, blockB+info[j].rhs_start*kc, mc, actual_kc, info[j].rhs_length, -1,-1,0,0, w);
gebp(res+info[j].rhs_start*resStride, resStride, blockA, blockB+info[j].rhs_start*kc, mc, actual_kc, info[j].rhs_length, alpha, -1,-1,0,0, w);
}
// Then keep going as usual with the remaining A'
@@ -146,7 +142,7 @@ static void run(Index rows, Index cols, Index depth,
pack_lhs(blockA, &lhs(i,k), lhsStride, actual_kc, actual_mc);
// C_i += A' * B'
gebp(res+i, resStride, blockA, blockB, actual_mc, actual_kc, cols, -1,-1,0,0, w);
gebp(res+i, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha, -1,-1,0,0, w);
}
// Release all the sub blocks B'_j of B' for the current thread,
@@ -156,8 +152,8 @@ static void run(Index rows, Index cols, Index depth,
--(info[j].users);
}
ei_aligned_stack_delete(Scalar, blockA, kc*mc);
ei_aligned_stack_delete(Scalar, w, sizeW);
ei_aligned_stack_delete(LhsScalar, blockA, kc*mc);
ei_aligned_stack_delete(RhsScalar, w, sizeW);
}
else
#endif // EIGEN_HAS_OPENMP
@@ -167,10 +163,10 @@ static void run(Index rows, Index cols, Index depth,
// this is the sequential version!
std::size_t sizeA = kc*mc;
std::size_t sizeB = kc*cols;
std::size_t sizeW = kc*Blocking::PacketSize*Blocking::nr;
Scalar *blockA = blocking.blockA()==0 ? ei_aligned_stack_new(Scalar, sizeA) : blocking.blockA();
Scalar *blockB = blocking.blockB()==0 ? ei_aligned_stack_new(Scalar, sizeB) : blocking.blockB();
Scalar *blockW = blocking.blockW()==0 ? ei_aligned_stack_new(Scalar, sizeW) : blocking.blockW();
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
LhsScalar *blockA = blocking.blockA()==0 ? ei_aligned_stack_new(LhsScalar, sizeA) : blocking.blockA();
RhsScalar *blockB = blocking.blockB()==0 ? ei_aligned_stack_new(RhsScalar, sizeB) : blocking.blockB();
RhsScalar *blockW = blocking.blockW()==0 ? ei_aligned_stack_new(RhsScalar, sizeW) : blocking.blockW();
// For each horizontal panel of the rhs, and corresponding panel of the lhs...
// (==GEMM_VAR1)
@@ -182,7 +178,7 @@ static void run(Index rows, Index cols, Index depth,
// => Pack rhs's panel into a sequential chunk of memory (L2 caching)
// Note that this panel will be read as many times as the number of blocks in the lhs's
// vertical panel which is, in practice, a very low number.
pack_rhs(blockB, &rhs(k2,0), rhsStride, alpha, actual_kc, cols);
pack_rhs(blockB, &rhs(k2,0), rhsStride, actual_kc, cols);
// For each mc x kc block of the lhs's vertical panel...
@@ -197,14 +193,14 @@ static void run(Index rows, Index cols, Index depth,
pack_lhs(blockA, &lhs(i2,k2), lhsStride, actual_kc, actual_mc);
// Everything is packed, we can now call the block * panel kernel:
gebp(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, -1, -1, 0, 0, blockW);
gebp(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha, -1, -1, 0, 0, blockW);
}
}
if(blocking.blockA()==0) ei_aligned_stack_delete(Scalar, blockA, kc*mc);
if(blocking.blockB()==0) ei_aligned_stack_delete(Scalar, blockB, sizeB);
if(blocking.blockW()==0) ei_aligned_stack_delete(Scalar, blockW, sizeW);
if(blocking.blockA()==0) ei_aligned_stack_delete(LhsScalar, blockA, kc*mc);
if(blocking.blockB()==0) ei_aligned_stack_delete(RhsScalar, blockB, sizeB);
if(blocking.blockW()==0) ei_aligned_stack_delete(RhsScalar, blockW, sizeW);
}
}
@@ -237,10 +233,10 @@ struct ei_gemm_functor
{
if(cols==-1)
cols = m_rhs.cols();
Gemm::run(rows, cols, m_lhs.cols(),
(const Scalar*)&(m_lhs.const_cast_derived().coeffRef(row,0)), m_lhs.outerStride(),
(const Scalar*)&(m_rhs.const_cast_derived().coeffRef(0,col)), m_rhs.outerStride(),
/*(const Scalar*)*/&(m_lhs.const_cast_derived().coeffRef(row,0)), m_lhs.outerStride(),
/*(const Scalar*)*/&(m_rhs.const_cast_derived().coeffRef(0,col)), m_rhs.outerStride(),
(Scalar*)&(m_dest.coeffRef(row,col)), m_dest.outerStride(),
m_actualAlpha, m_blocking, info);
}
@@ -299,11 +295,11 @@ class ei_gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols
};
typedef typename ei_meta_if<Transpose,_RhsScalar,_LhsScalar>::ret LhsScalar;
typedef typename ei_meta_if<Transpose,_LhsScalar,_RhsScalar>::ret RhsScalar;
typedef ei_product_blocking_traits<RhsScalar> Blocking;
typedef ei_gebp_traits<LhsScalar,RhsScalar> Traits;
enum {
SizeA = ActualRows * MaxDepth,
SizeB = ActualCols * MaxDepth,
SizeW = MaxDepth * Blocking::nr * ei_packet_traits<RhsScalar>::size
SizeW = MaxDepth * Traits::WorkSpaceFactor
};
EIGEN_ALIGN16 LhsScalar m_staticA[SizeA];
@@ -339,7 +335,7 @@ class ei_gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols
};
typedef typename ei_meta_if<Transpose,_RhsScalar,_LhsScalar>::ret LhsScalar;
typedef typename ei_meta_if<Transpose,_LhsScalar,_RhsScalar>::ret RhsScalar;
typedef ei_product_blocking_traits<RhsScalar> Blocking;
typedef ei_gebp_traits<LhsScalar,RhsScalar> Traits;
DenseIndex m_sizeA;
DenseIndex m_sizeB;
@@ -356,7 +352,7 @@ class ei_gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols
computeProductBlockingSizes<LhsScalar,RhsScalar>(this->m_kc, this->m_mc, this->m_nc);
m_sizeA = this->m_mc * this->m_kc;
m_sizeB = this->m_kc * this->m_nc;
m_sizeW = this->m_kc*ei_packet_traits<RhsScalar>::size*Blocking::nr;
m_sizeW = this->m_kc*Traits::WorkSpaceFactor;
}
void allocateA()
@@ -401,11 +397,15 @@ class GeneralProduct<Lhs, Rhs, GemmProduct>
};
public:
EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
typedef typename Lhs::Scalar LhsScalar;
typedef typename Rhs::Scalar RhsScalar;
typedef Scalar ResScalar;
GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
{
EIGEN_STATIC_ASSERT((ei_is_same_type<typename Lhs::Scalar, typename Rhs::Scalar>::ret),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
typedef ei_scalar_product_op<LhsScalar,RhsScalar> BinOp;
EIGEN_CHECK_BINARY_COMPATIBILIY(BinOp,LhsScalar,RhsScalar);
}
template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
@@ -418,15 +418,15 @@ class GeneralProduct<Lhs, Rhs, GemmProduct>
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
* RhsBlasTraits::extractScalarFactor(m_rhs);
typedef ei_gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
typedef ei_gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,LhsScalar,RhsScalar,
Dest::MaxRowsAtCompileTime,Dest::MaxColsAtCompileTime,MaxDepthAtCompileTime> BlockingType;
typedef ei_gemm_functor<
Scalar, Index,
ei_general_matrix_matrix_product<
Scalar, Index,
(_ActualLhsType::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(LhsBlasTraits::NeedToConjugate),
(_ActualRhsType::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(RhsBlasTraits::NeedToConjugate),
Index,
LhsScalar, (_ActualLhsType::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(LhsBlasTraits::NeedToConjugate),
RhsScalar, (_ActualRhsType::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(RhsBlasTraits::NeedToConjugate),
(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor>,
_ActualLhsType, _ActualRhsType, Dest, BlockingType> GemmFunctor;

View File

@@ -30,52 +30,80 @@
* the number of load/stores of the result by a factor 4 and to reduce
* the instruction dependency. Moreover, we know that all bands have the
* same alignment pattern.
* TODO: since rhs gets evaluated only once, no need to evaluate it
*
* Mixing type logic: C += alpha * A * B
* | A | B |alpha| comments
* |real |cplx |cplx | no vectorization
* |real |cplx |real | alpha is converted to a cplx when calling the run function, no vectorization
* |cplx |real |cplx | invalid, the caller has to do tmp: = A * B; C += alpha*tmp
* |cplx |real |real | optimal case, vectorization possible via real-cplx mul
*/
template<bool ConjugateLhs, bool ConjugateRhs, typename Scalar, typename Index, typename RhsType>
static EIGEN_DONT_INLINE
void ei_cache_friendly_product_colmajor_times_vector(
Index size,
const Scalar* lhs, Index lhsStride,
const RhsType& rhs,
Scalar* res,
Scalar alpha)
template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
struct ei_general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjugateLhs,RhsScalar,ConjugateRhs>
{
typedef typename ei_scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
enum {
Vectorizable = ei_packet_traits<LhsScalar>::Vectorizable && ei_packet_traits<RhsScalar>::Vectorizable
&& int(ei_packet_traits<LhsScalar>::size)==int(ei_packet_traits<RhsScalar>::size),
LhsPacketSize = Vectorizable ? ei_packet_traits<LhsScalar>::size : 1,
RhsPacketSize = Vectorizable ? ei_packet_traits<RhsScalar>::size : 1,
ResPacketSize = Vectorizable ? ei_packet_traits<ResScalar>::size : 1
};
typedef typename ei_packet_traits<LhsScalar>::type _LhsPacket;
typedef typename ei_packet_traits<RhsScalar>::type _RhsPacket;
typedef typename ei_packet_traits<ResScalar>::type _ResPacket;
typedef typename ei_meta_if<Vectorizable,_LhsPacket,LhsScalar>::ret LhsPacket;
typedef typename ei_meta_if<Vectorizable,_RhsPacket,RhsScalar>::ret RhsPacket;
typedef typename ei_meta_if<Vectorizable,_ResPacket,ResScalar>::ret ResPacket;
EIGEN_DONT_INLINE static void run(
Index rows, Index cols,
const LhsScalar* lhs, Index lhsStride,
const RhsScalar* rhs, Index rhsIncr,
ResScalar* res, Index
#ifdef EIGEN_INTERNAL_DEBUGGING
resIncr
#endif
, RhsScalar alpha)
{
ei_internal_assert(resIncr==1);
#ifdef _EIGEN_ACCUMULATE_PACKETS
#error _EIGEN_ACCUMULATE_PACKETS has already been defined
#endif
#define _EIGEN_ACCUMULATE_PACKETS(A0,A13,A2) \
ei_pstore(&res[j], \
ei_padd(ei_pload(&res[j]), \
ei_padd(ei_pload<ResPacket>(&res[j]), \
ei_padd( \
ei_padd(cj.pmul(EIGEN_CAT(ei_ploa , A0)(&lhs0[j]), ptmp0), \
cj.pmul(EIGEN_CAT(ei_ploa , A13)(&lhs1[j]), ptmp1)), \
ei_padd(cj.pmul(EIGEN_CAT(ei_ploa , A2)(&lhs2[j]), ptmp2), \
cj.pmul(EIGEN_CAT(ei_ploa , A13)(&lhs3[j]), ptmp3)) )))
ei_padd(pcj.pmul(EIGEN_CAT(ei_ploa , A0)<LhsPacket>(&lhs0[j]), ptmp0), \
pcj.pmul(EIGEN_CAT(ei_ploa , A13)<LhsPacket>(&lhs1[j]), ptmp1)), \
ei_padd(pcj.pmul(EIGEN_CAT(ei_ploa , A2)<LhsPacket>(&lhs2[j]), ptmp2), \
pcj.pmul(EIGEN_CAT(ei_ploa , A13)<LhsPacket>(&lhs3[j]), ptmp3)) )))
ei_conj_helper<ConjugateLhs,ConjugateRhs> cj;
ei_conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;
ei_conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj;
if(ConjugateRhs)
alpha = ei_conj(alpha);
typedef typename NumTraits<Scalar>::Real RealScalar;
typedef typename ei_packet_traits<Scalar>::type Packet;
const Index PacketSize = sizeof(Packet)/sizeof(Scalar);
enum { AllAligned = 0, EvenAligned, FirstAligned, NoneAligned };
const Index columnsAtOnce = 4;
const Index peels = 2;
const Index PacketAlignedMask = PacketSize-1;
const Index PeelAlignedMask = PacketSize*peels-1;
const Index LhsPacketAlignedMask = LhsPacketSize-1;
const Index ResPacketAlignedMask = ResPacketSize-1;
const Index PeelAlignedMask = ResPacketSize*peels-1;
const Index size = rows;
// How many coeffs of the result do we have to skip to be aligned.
// Here we assume data are at least aligned on the base scalar type.
Index alignedStart = ei_first_aligned(res,size);
Index alignedSize = PacketSize>1 ? alignedStart + ((size-alignedStart) & ~PacketAlignedMask) : 0;
Index alignedSize = ResPacketSize>1 ? alignedStart + ((size-alignedStart) & ~ResPacketAlignedMask) : 0;
const Index peeledSize = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart;
const Index alignmentStep = PacketSize>1 ? (PacketSize - lhsStride % PacketSize) & PacketAlignedMask : 0;
const Index alignmentStep = LhsPacketSize>1 ? (LhsPacketSize - lhsStride % LhsPacketSize) & LhsPacketAlignedMask : 0;
Index alignmentPattern = alignmentStep==0 ? AllAligned
: alignmentStep==(PacketSize/2) ? EvenAligned
: alignmentStep==(LhsPacketSize/2) ? EvenAligned
: FirstAligned;
// we cannot assume the first element is aligned because of sub-matrices
@@ -84,19 +112,19 @@ void ei_cache_friendly_product_colmajor_times_vector(
// find how many columns do we have to skip to be aligned with the result (if possible)
Index skipColumns = 0;
// if the data cannot be aligned (TODO add some compile time tests when possible, e.g. for floats)
if( (size_t(lhs)%sizeof(RealScalar)) || (size_t(res)%sizeof(RealScalar)) )
if( (size_t(lhs)%sizeof(LhsScalar)) || (size_t(res)%sizeof(ResScalar)) )
{
alignedSize = 0;
alignedStart = 0;
}
else if (PacketSize>1)
else if (LhsPacketSize>1)
{
ei_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(Packet)==0 || size<PacketSize);
ei_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || size<LhsPacketSize);
while (skipColumns<PacketSize &&
alignedStart != ((lhsAlignmentOffset + alignmentStep*skipColumns)%PacketSize))
while (skipColumns<LhsPacketSize &&
alignedStart != ((lhsAlignmentOffset + alignmentStep*skipColumns)%LhsPacketSize))
++skipColumns;
if (skipColumns==PacketSize)
if (skipColumns==LhsPacketSize)
{
// nothing can be aligned, no need to skip any column
alignmentPattern = NoneAligned;
@@ -104,30 +132,38 @@ void ei_cache_friendly_product_colmajor_times_vector(
}
else
{
skipColumns = std::min(skipColumns,rhs.size());
skipColumns = std::min(skipColumns,cols);
// note that the skiped columns are processed later.
}
ei_internal_assert( (alignmentPattern==NoneAligned)
|| (skipColumns + columnsAtOnce >= rhs.size())
|| PacketSize > size
|| (size_t(lhs+alignedStart+lhsStride*skipColumns)%sizeof(Packet))==0);
|| (skipColumns + columnsAtOnce >= cols)
|| LhsPacketSize > size
|| (size_t(lhs+alignedStart+lhsStride*skipColumns)%sizeof(LhsPacket))==0);
}
else if(Vectorizable)
{
alignedStart = 0;
alignedSize = size;
alignmentPattern = AllAligned;
}
Index offset1 = (FirstAligned && alignmentStep==1?3:1);
Index offset3 = (FirstAligned && alignmentStep==1?1:3);
Index columnBound = ((rhs.size()-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns;
Index columnBound = ((cols-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns;
for (Index i=skipColumns; i<columnBound; i+=columnsAtOnce)
{
Packet ptmp0 = ei_pset1(alpha*rhs[i]), ptmp1 = ei_pset1(alpha*rhs[i+offset1]),
ptmp2 = ei_pset1(alpha*rhs[i+2]), ptmp3 = ei_pset1(alpha*rhs[i+offset3]);
RhsPacket ptmp0 = ei_pset1<RhsPacket>(alpha*rhs[i*rhsIncr]),
ptmp1 = ei_pset1<RhsPacket>(alpha*rhs[(i+offset1)*rhsIncr]),
ptmp2 = ei_pset1<RhsPacket>(alpha*rhs[(i+2)*rhsIncr]),
ptmp3 = ei_pset1<RhsPacket>(alpha*rhs[(i+offset3)*rhsIncr]);
// this helps a lot generating better binary code
const Scalar *lhs0 = lhs + i*lhsStride, *lhs1 = lhs + (i+offset1)*lhsStride,
*lhs2 = lhs + (i+2)*lhsStride, *lhs3 = lhs + (i+offset3)*lhsStride;
const LhsScalar *lhs0 = lhs + i*lhsStride, *lhs1 = lhs + (i+offset1)*lhsStride,
*lhs2 = lhs + (i+2)*lhsStride, *lhs3 = lhs + (i+offset3)*lhsStride;
if (PacketSize>1)
if (Vectorizable)
{
/* explicit vectorization */
// process initial unaligned coeffs
@@ -144,51 +180,52 @@ void ei_cache_friendly_product_colmajor_times_vector(
switch(alignmentPattern)
{
case AllAligned:
for (Index j = alignedStart; j<alignedSize; j+=PacketSize)
for (Index j = alignedStart; j<alignedSize; j+=ResPacketSize)
_EIGEN_ACCUMULATE_PACKETS(d,d,d);
break;
case EvenAligned:
for (Index j = alignedStart; j<alignedSize; j+=PacketSize)
for (Index j = alignedStart; j<alignedSize; j+=ResPacketSize)
_EIGEN_ACCUMULATE_PACKETS(d,du,d);
break;
case FirstAligned:
if(peels>1)
{
Packet A00, A01, A02, A03, A10, A11, A12, A13;
LhsPacket A00, A01, A02, A03, A10, A11, A12, A13;
ResPacket T0, T1;
A01 = ei_pload(&lhs1[alignedStart-1]);
A02 = ei_pload(&lhs2[alignedStart-2]);
A03 = ei_pload(&lhs3[alignedStart-3]);
A01 = ei_pload<LhsPacket>(&lhs1[alignedStart-1]);
A02 = ei_pload<LhsPacket>(&lhs2[alignedStart-2]);
A03 = ei_pload<LhsPacket>(&lhs3[alignedStart-3]);
for (Index j = alignedStart; j<peeledSize; j+=peels*PacketSize)
for (Index j = alignedStart; j<peeledSize; j+=peels*ResPacketSize)
{
A11 = ei_pload(&lhs1[j-1+PacketSize]); ei_palign<1>(A01,A11);
A12 = ei_pload(&lhs2[j-2+PacketSize]); ei_palign<2>(A02,A12);
A13 = ei_pload(&lhs3[j-3+PacketSize]); ei_palign<3>(A03,A13);
A11 = ei_pload<LhsPacket>(&lhs1[j-1+LhsPacketSize]); ei_palign<1>(A01,A11);
A12 = ei_pload<LhsPacket>(&lhs2[j-2+LhsPacketSize]); ei_palign<2>(A02,A12);
A13 = ei_pload<LhsPacket>(&lhs3[j-3+LhsPacketSize]); ei_palign<3>(A03,A13);
A00 = ei_pload (&lhs0[j]);
A10 = ei_pload (&lhs0[j+PacketSize]);
A00 = cj.pmadd(A00, ptmp0, ei_pload(&res[j]));
A10 = cj.pmadd(A10, ptmp0, ei_pload(&res[j+PacketSize]));
A00 = ei_pload<LhsPacket>(&lhs0[j]);
A10 = ei_pload<LhsPacket>(&lhs0[j+LhsPacketSize]);
T0 = pcj.pmadd(A00, ptmp0, ei_pload<ResPacket>(&res[j]));
T1 = pcj.pmadd(A10, ptmp0, ei_pload<ResPacket>(&res[j+ResPacketSize]));
A00 = cj.pmadd(A01, ptmp1, A00);
A01 = ei_pload(&lhs1[j-1+2*PacketSize]); ei_palign<1>(A11,A01);
A00 = cj.pmadd(A02, ptmp2, A00);
A02 = ei_pload(&lhs2[j-2+2*PacketSize]); ei_palign<2>(A12,A02);
A00 = cj.pmadd(A03, ptmp3, A00);
ei_pstore(&res[j],A00);
A03 = ei_pload(&lhs3[j-3+2*PacketSize]); ei_palign<3>(A13,A03);
A10 = cj.pmadd(A11, ptmp1, A10);
A10 = cj.pmadd(A12, ptmp2, A10);
A10 = cj.pmadd(A13, ptmp3, A10);
ei_pstore(&res[j+PacketSize],A10);
T0 = pcj.pmadd(A01, ptmp1, T0);
A01 = ei_pload<LhsPacket>(&lhs1[j-1+2*LhsPacketSize]); ei_palign<1>(A11,A01);
T0 = pcj.pmadd(A02, ptmp2, T0);
A02 = ei_pload<LhsPacket>(&lhs2[j-2+2*LhsPacketSize]); ei_palign<2>(A12,A02);
T0 = pcj.pmadd(A03, ptmp3, T0);
ei_pstore(&res[j],T0);
A03 = ei_pload<LhsPacket>(&lhs3[j-3+2*LhsPacketSize]); ei_palign<3>(A13,A03);
T1 = pcj.pmadd(A11, ptmp1, T1);
T1 = pcj.pmadd(A12, ptmp2, T1);
T1 = pcj.pmadd(A13, ptmp3, T1);
ei_pstore(&res[j+ResPacketSize],T1);
}
}
for (Index j = peeledSize; j<alignedSize; j+=PacketSize)
for (Index j = peeledSize; j<alignedSize; j+=ResPacketSize)
_EIGEN_ACCUMULATE_PACKETS(d,du,du);
break;
default:
for (Index j = alignedStart; j<alignedSize; j+=PacketSize)
for (Index j = alignedStart; j<alignedSize; j+=ResPacketSize)
_EIGEN_ACCUMULATE_PACKETS(du,du,du);
break;
}
@@ -206,34 +243,33 @@ void ei_cache_friendly_product_colmajor_times_vector(
}
// process remaining first and last columns (at most columnsAtOnce-1)
Index end = rhs.size();
Index end = cols;
Index start = columnBound;
do
{
for (Index i=start; i<end; ++i)
for (Index k=start; k<end; ++k)
{
Packet ptmp0 = ei_pset1(alpha*rhs[i]);
const Scalar* lhs0 = lhs + i*lhsStride;
RhsPacket ptmp0 = ei_pset1<RhsPacket>(alpha*rhs[k*rhsIncr]);
const LhsScalar* lhs0 = lhs + k*lhsStride;
if (PacketSize>1)
if (Vectorizable)
{
/* explicit vectorization */
// process first unaligned result's coeffs
for (Index j=0; j<alignedStart; ++j)
res[j] += cj.pmul(lhs0[j], ei_pfirst(ptmp0));
// process aligned result's coeffs
if ((size_t(lhs0+alignedStart)%sizeof(Packet))==0)
for (Index j = alignedStart;j<alignedSize;j+=PacketSize)
ei_pstore(&res[j], cj.pmadd(ei_pload(&lhs0[j]), ptmp0, ei_pload(&res[j])));
if ((size_t(lhs0+alignedStart)%sizeof(LhsPacket))==0)
for (Index i = alignedStart;i<alignedSize;i+=ResPacketSize)
ei_pstore(&res[i], pcj.pmadd(ei_ploadu<LhsPacket>(&lhs0[i]), ptmp0, ei_pload<ResPacket>(&res[i])));
else
for (Index j = alignedStart;j<alignedSize;j+=PacketSize)
ei_pstore(&res[j], cj.pmadd(ei_ploadu(&lhs0[j]), ptmp0, ei_pload(&res[j])));
for (Index i = alignedStart;i<alignedSize;i+=ResPacketSize)
ei_pstore(&res[i], pcj.pmadd(ei_ploadu<LhsPacket>(&lhs0[i]), ptmp0, ei_pload<ResPacket>(&res[i])));
}
// process remaining scalars (or all if no explicit vectorization)
for (Index j=alignedSize; j<size; ++j)
res[j] += cj.pmul(lhs0[j], ei_pfirst(ptmp0));
for (Index i=alignedSize; i<size; ++i)
res[i] += cj.pmul(lhs0[i], ei_pfirst(ptmp0));
}
if (skipColumns)
{
@@ -243,73 +279,104 @@ void ei_cache_friendly_product_colmajor_times_vector(
}
else
break;
} while(PacketSize>1);
} while(Vectorizable);
#undef _EIGEN_ACCUMULATE_PACKETS
}
};
// TODO add peeling to mask unaligned load/stores
template<bool ConjugateLhs, bool ConjugateRhs, typename Scalar, typename Index, typename ResType>
static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector(
const Scalar* lhs, Index lhsStride,
const Scalar* rhs, Index rhsSize,
ResType& res,
Scalar alpha)
/* Optimized row-major matrix * vector product:
* This algorithm processes 4 rows at onces that allows to both reduce
* the number of load/stores of the result by a factor 4 and to reduce
* the instruction dependency. Moreover, we know that all bands have the
* same alignment pattern.
*
* Mixing type logic:
* - alpha is always a complex (or converted to a complex)
* - no vectorization
*/
template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
struct ei_general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjugateLhs,RhsScalar,ConjugateRhs>
{
typedef typename ei_scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
enum {
Vectorizable = ei_packet_traits<LhsScalar>::Vectorizable && ei_packet_traits<RhsScalar>::Vectorizable
&& int(ei_packet_traits<LhsScalar>::size)==int(ei_packet_traits<RhsScalar>::size),
LhsPacketSize = Vectorizable ? ei_packet_traits<LhsScalar>::size : 1,
RhsPacketSize = Vectorizable ? ei_packet_traits<RhsScalar>::size : 1,
ResPacketSize = Vectorizable ? ei_packet_traits<ResScalar>::size : 1
};
typedef typename ei_packet_traits<LhsScalar>::type _LhsPacket;
typedef typename ei_packet_traits<RhsScalar>::type _RhsPacket;
typedef typename ei_packet_traits<ResScalar>::type _ResPacket;
typedef typename ei_meta_if<Vectorizable,_LhsPacket,LhsScalar>::ret LhsPacket;
typedef typename ei_meta_if<Vectorizable,_RhsPacket,RhsScalar>::ret RhsPacket;
typedef typename ei_meta_if<Vectorizable,_ResPacket,ResScalar>::ret ResPacket;
EIGEN_DONT_INLINE static void run(
Index rows, Index cols,
const LhsScalar* lhs, Index lhsStride,
const RhsScalar* rhs, Index rhsIncr,
ResScalar* res, Index resIncr,
ResScalar alpha)
{
EIGEN_UNUSED_VARIABLE(rhsIncr);
ei_internal_assert(rhsIncr==1);
#ifdef _EIGEN_ACCUMULATE_PACKETS
#error _EIGEN_ACCUMULATE_PACKETS has already been defined
#endif
#define _EIGEN_ACCUMULATE_PACKETS(A0,A13,A2) {\
Packet b = ei_pload(&rhs[j]); \
ptmp0 = cj.pmadd(EIGEN_CAT(ei_ploa,A0) (&lhs0[j]), b, ptmp0); \
ptmp1 = cj.pmadd(EIGEN_CAT(ei_ploa,A13)(&lhs1[j]), b, ptmp1); \
ptmp2 = cj.pmadd(EIGEN_CAT(ei_ploa,A2) (&lhs2[j]), b, ptmp2); \
ptmp3 = cj.pmadd(EIGEN_CAT(ei_ploa,A13)(&lhs3[j]), b, ptmp3); }
RhsPacket b = ei_pload<RhsPacket>(&rhs[j]); \
ptmp0 = pcj.pmadd(EIGEN_CAT(ei_ploa,A0) <LhsPacket>(&lhs0[j]), b, ptmp0); \
ptmp1 = pcj.pmadd(EIGEN_CAT(ei_ploa,A13)<LhsPacket>(&lhs1[j]), b, ptmp1); \
ptmp2 = pcj.pmadd(EIGEN_CAT(ei_ploa,A2) <LhsPacket>(&lhs2[j]), b, ptmp2); \
ptmp3 = pcj.pmadd(EIGEN_CAT(ei_ploa,A13)<LhsPacket>(&lhs3[j]), b, ptmp3); }
ei_conj_helper<ConjugateLhs,ConjugateRhs> cj;
typedef typename NumTraits<Scalar>::Real RealScalar;
typedef typename ei_packet_traits<Scalar>::type Packet;
const Index PacketSize = sizeof(Packet)/sizeof(Scalar);
ei_conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;
ei_conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj;
enum { AllAligned=0, EvenAligned=1, FirstAligned=2, NoneAligned=3 };
const Index rowsAtOnce = 4;
const Index peels = 2;
const Index PacketAlignedMask = PacketSize-1;
const Index PeelAlignedMask = PacketSize*peels-1;
const Index size = rhsSize;
const Index RhsPacketAlignedMask = RhsPacketSize-1;
const Index LhsPacketAlignedMask = LhsPacketSize-1;
const Index PeelAlignedMask = RhsPacketSize*peels-1;
const Index depth = cols;
// How many coeffs of the result do we have to skip to be aligned.
// Here we assume data are at least aligned on the base scalar type
// if that's not the case then vectorization is discarded, see below.
Index alignedStart = ei_first_aligned(rhs, size);
Index alignedSize = PacketSize>1 ? alignedStart + ((size-alignedStart) & ~PacketAlignedMask) : 0;
Index alignedStart = ei_first_aligned(rhs, depth);
Index alignedSize = RhsPacketSize>1 ? alignedStart + ((depth-alignedStart) & ~RhsPacketAlignedMask) : 0;
const Index peeledSize = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart;
const Index alignmentStep = PacketSize>1 ? (PacketSize - lhsStride % PacketSize) & PacketAlignedMask : 0;
const Index alignmentStep = LhsPacketSize>1 ? (LhsPacketSize - lhsStride % LhsPacketSize) & LhsPacketAlignedMask : 0;
Index alignmentPattern = alignmentStep==0 ? AllAligned
: alignmentStep==(PacketSize/2) ? EvenAligned
: FirstAligned;
: alignmentStep==(LhsPacketSize/2) ? EvenAligned
: FirstAligned;
// we cannot assume the first element is aligned because of sub-matrices
const Index lhsAlignmentOffset = ei_first_aligned(lhs,size);
const Index lhsAlignmentOffset = ei_first_aligned(lhs,depth);
// find how many rows do we have to skip to be aligned with rhs (if possible)
Index skipRows = 0;
// if the data cannot be aligned (TODO add some compile time tests when possible, e.g. for floats)
if( (size_t(lhs)%sizeof(RealScalar)) || (size_t(rhs)%sizeof(RealScalar)) )
if( (sizeof(LhsScalar)!=sizeof(RhsScalar)) || (size_t(lhs)%sizeof(LhsScalar)) || (size_t(rhs)%sizeof(RhsScalar)) )
{
alignedSize = 0;
alignedStart = 0;
}
else if (PacketSize>1)
else if (LhsPacketSize>1)
{
ei_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(Packet)==0 || size<PacketSize);
ei_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || depth<LhsPacketSize);
while (skipRows<PacketSize &&
alignedStart != ((lhsAlignmentOffset + alignmentStep*skipRows)%PacketSize))
while (skipRows<LhsPacketSize &&
alignedStart != ((lhsAlignmentOffset + alignmentStep*skipRows)%LhsPacketSize))
++skipRows;
if (skipRows==PacketSize)
if (skipRows==LhsPacketSize)
{
// nothing can be aligned, no need to skip any column
alignmentPattern = NoneAligned;
@@ -317,38 +384,46 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector(
}
else
{
skipRows = std::min(skipRows,Index(res.size()));
skipRows = std::min(skipRows,Index(rows));
// note that the skiped columns are processed later.
}
ei_internal_assert( alignmentPattern==NoneAligned
|| PacketSize==1
|| (skipRows + rowsAtOnce >= res.size())
|| PacketSize > rhsSize
|| (size_t(lhs+alignedStart+lhsStride*skipRows)%sizeof(Packet))==0);
|| LhsPacketSize==1
|| (skipRows + rowsAtOnce >= rows)
|| LhsPacketSize > depth
|| (size_t(lhs+alignedStart+lhsStride*skipRows)%sizeof(LhsPacket))==0);
}
else if(Vectorizable)
{
alignedStart = 0;
alignedSize = depth;
alignmentPattern = AllAligned;
}
Index offset1 = (FirstAligned && alignmentStep==1?3:1);
Index offset3 = (FirstAligned && alignmentStep==1?1:3);
Index rowBound = ((res.size()-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows;
Index rowBound = ((rows-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows;
for (Index i=skipRows; i<rowBound; i+=rowsAtOnce)
{
Scalar tmp0 = Scalar(0), tmp1 = Scalar(0), tmp2 = Scalar(0), tmp3 = Scalar(0);
EIGEN_ALIGN16 ResScalar tmp0 = ResScalar(0);
ResScalar tmp1 = ResScalar(0), tmp2 = ResScalar(0), tmp3 = ResScalar(0);
// this helps the compiler generating good binary code
const Scalar *lhs0 = lhs + i*lhsStride, *lhs1 = lhs + (i+offset1)*lhsStride,
*lhs2 = lhs + (i+2)*lhsStride, *lhs3 = lhs + (i+offset3)*lhsStride;
const LhsScalar *lhs0 = lhs + i*lhsStride, *lhs1 = lhs + (i+offset1)*lhsStride,
*lhs2 = lhs + (i+2)*lhsStride, *lhs3 = lhs + (i+offset3)*lhsStride;
if (PacketSize>1)
if (Vectorizable)
{
/* explicit vectorization */
Packet ptmp0 = ei_pset1(Scalar(0)), ptmp1 = ei_pset1(Scalar(0)), ptmp2 = ei_pset1(Scalar(0)), ptmp3 = ei_pset1(Scalar(0));
ResPacket ptmp0 = ei_pset1<ResPacket>(ResScalar(0)), ptmp1 = ei_pset1<ResPacket>(ResScalar(0)),
ptmp2 = ei_pset1<ResPacket>(ResScalar(0)), ptmp3 = ei_pset1<ResPacket>(ResScalar(0));
// process initial unaligned coeffs
// FIXME this loop get vectorized by the compiler !
for (Index j=0; j<alignedStart; ++j)
{
Scalar b = rhs[j];
RhsScalar b = rhs[j];
tmp0 += cj.pmul(lhs0[j],b); tmp1 += cj.pmul(lhs1[j],b);
tmp2 += cj.pmul(lhs2[j],b); tmp3 += cj.pmul(lhs3[j],b);
}
@@ -358,11 +433,11 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector(
switch(alignmentPattern)
{
case AllAligned:
for (Index j = alignedStart; j<alignedSize; j+=PacketSize)
for (Index j = alignedStart; j<alignedSize; j+=RhsPacketSize)
_EIGEN_ACCUMULATE_PACKETS(d,d,d);
break;
case EvenAligned:
for (Index j = alignedStart; j<alignedSize; j+=PacketSize)
for (Index j = alignedStart; j<alignedSize; j+=RhsPacketSize)
_EIGEN_ACCUMULATE_PACKETS(d,du,d);
break;
case FirstAligned:
@@ -374,38 +449,38 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector(
* overlaping the desired unaligned packet. This is *much* more efficient
* than basic unaligned loads.
*/
Packet A01, A02, A03, b, A11, A12, A13;
A01 = ei_pload(&lhs1[alignedStart-1]);
A02 = ei_pload(&lhs2[alignedStart-2]);
A03 = ei_pload(&lhs3[alignedStart-3]);
LhsPacket A01, A02, A03, A11, A12, A13;
A01 = ei_pload<LhsPacket>(&lhs1[alignedStart-1]);
A02 = ei_pload<LhsPacket>(&lhs2[alignedStart-2]);
A03 = ei_pload<LhsPacket>(&lhs3[alignedStart-3]);
for (Index j = alignedStart; j<peeledSize; j+=peels*PacketSize)
for (Index j = alignedStart; j<peeledSize; j+=peels*RhsPacketSize)
{
b = ei_pload(&rhs[j]);
A11 = ei_pload(&lhs1[j-1+PacketSize]); ei_palign<1>(A01,A11);
A12 = ei_pload(&lhs2[j-2+PacketSize]); ei_palign<2>(A02,A12);
A13 = ei_pload(&lhs3[j-3+PacketSize]); ei_palign<3>(A03,A13);
RhsPacket b = ei_pload<RhsPacket>(&rhs[j]);
A11 = ei_pload<LhsPacket>(&lhs1[j-1+LhsPacketSize]); ei_palign<1>(A01,A11);
A12 = ei_pload<LhsPacket>(&lhs2[j-2+LhsPacketSize]); ei_palign<2>(A02,A12);
A13 = ei_pload<LhsPacket>(&lhs3[j-3+LhsPacketSize]); ei_palign<3>(A03,A13);
ptmp0 = cj.pmadd(ei_pload (&lhs0[j]), b, ptmp0);
ptmp1 = cj.pmadd(A01, b, ptmp1);
A01 = ei_pload(&lhs1[j-1+2*PacketSize]); ei_palign<1>(A11,A01);
ptmp2 = cj.pmadd(A02, b, ptmp2);
A02 = ei_pload(&lhs2[j-2+2*PacketSize]); ei_palign<2>(A12,A02);
ptmp3 = cj.pmadd(A03, b, ptmp3);
A03 = ei_pload(&lhs3[j-3+2*PacketSize]); ei_palign<3>(A13,A03);
ptmp0 = pcj.pmadd(ei_pload<LhsPacket>(&lhs0[j]), b, ptmp0);
ptmp1 = pcj.pmadd(A01, b, ptmp1);
A01 = ei_pload<LhsPacket>(&lhs1[j-1+2*LhsPacketSize]); ei_palign<1>(A11,A01);
ptmp2 = pcj.pmadd(A02, b, ptmp2);
A02 = ei_pload<LhsPacket>(&lhs2[j-2+2*LhsPacketSize]); ei_palign<2>(A12,A02);
ptmp3 = pcj.pmadd(A03, b, ptmp3);
A03 = ei_pload<LhsPacket>(&lhs3[j-3+2*LhsPacketSize]); ei_palign<3>(A13,A03);
b = ei_pload(&rhs[j+PacketSize]);
ptmp0 = cj.pmadd(ei_pload (&lhs0[j+PacketSize]), b, ptmp0);
ptmp1 = cj.pmadd(A11, b, ptmp1);
ptmp2 = cj.pmadd(A12, b, ptmp2);
ptmp3 = cj.pmadd(A13, b, ptmp3);
b = ei_pload<RhsPacket>(&rhs[j+RhsPacketSize]);
ptmp0 = pcj.pmadd(ei_pload<LhsPacket>(&lhs0[j+LhsPacketSize]), b, ptmp0);
ptmp1 = pcj.pmadd(A11, b, ptmp1);
ptmp2 = pcj.pmadd(A12, b, ptmp2);
ptmp3 = pcj.pmadd(A13, b, ptmp3);
}
}
for (Index j = peeledSize; j<alignedSize; j+=PacketSize)
for (Index j = peeledSize; j<alignedSize; j+=RhsPacketSize)
_EIGEN_ACCUMULATE_PACKETS(d,du,du);
break;
default:
for (Index j = alignedStart; j<alignedSize; j+=PacketSize)
for (Index j = alignedStart; j<alignedSize; j+=RhsPacketSize)
_EIGEN_ACCUMULATE_PACKETS(du,du,du);
break;
}
@@ -418,25 +493,28 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector(
// process remaining coeffs (or all if no explicit vectorization)
// FIXME this loop get vectorized by the compiler !
for (Index j=alignedSize; j<size; ++j)
for (Index j=alignedSize; j<depth; ++j)
{
Scalar b = rhs[j];
RhsScalar b = rhs[j];
tmp0 += cj.pmul(lhs0[j],b); tmp1 += cj.pmul(lhs1[j],b);
tmp2 += cj.pmul(lhs2[j],b); tmp3 += cj.pmul(lhs3[j],b);
}
res[i] += alpha*tmp0; res[i+offset1] += alpha*tmp1; res[i+2] += alpha*tmp2; res[i+offset3] += alpha*tmp3;
res[i*resIncr] += alpha*tmp0;
res[(i+offset1)*resIncr] += alpha*tmp1;
res[(i+2)*resIncr] += alpha*tmp2;
res[(i+offset3)*resIncr] += alpha*tmp3;
}
// process remaining first and last rows (at most columnsAtOnce-1)
Index end = res.size();
Index end = rows;
Index start = rowBound;
do
{
for (Index i=start; i<end; ++i)
{
Scalar tmp0 = Scalar(0);
Packet ptmp0 = ei_pset1(tmp0);
const Scalar* lhs0 = lhs + i*lhsStride;
EIGEN_ALIGN16 ResScalar tmp0 = ResScalar(0);
ResPacket ptmp0 = ei_pset1<ResPacket>(tmp0);
const LhsScalar* lhs0 = lhs + i*lhsStride;
// process first unaligned result's coeffs
// FIXME this loop get vectorized by the compiler !
for (Index j=0; j<alignedStart; ++j)
@@ -445,20 +523,20 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector(
if (alignedSize>alignedStart)
{
// process aligned rhs coeffs
if ((size_t(lhs0+alignedStart)%sizeof(Packet))==0)
for (Index j = alignedStart;j<alignedSize;j+=PacketSize)
ptmp0 = cj.pmadd(ei_pload(&lhs0[j]), ei_pload(&rhs[j]), ptmp0);
if ((size_t(lhs0+alignedStart)%sizeof(LhsPacket))==0)
for (Index j = alignedStart;j<alignedSize;j+=RhsPacketSize)
ptmp0 = pcj.pmadd(ei_pload<LhsPacket>(&lhs0[j]), ei_pload<RhsPacket>(&rhs[j]), ptmp0);
else
for (Index j = alignedStart;j<alignedSize;j+=PacketSize)
ptmp0 = cj.pmadd(ei_ploadu(&lhs0[j]), ei_pload(&rhs[j]), ptmp0);
for (Index j = alignedStart;j<alignedSize;j+=RhsPacketSize)
ptmp0 = pcj.pmadd(ei_ploadu<LhsPacket>(&lhs0[j]), ei_pload<RhsPacket>(&rhs[j]), ptmp0);
tmp0 += ei_predux(ptmp0);
}
// process remaining scalars
// FIXME this loop get vectorized by the compiler !
for (Index j=alignedSize; j<size; ++j)
for (Index j=alignedSize; j<depth; ++j)
tmp0 += cj.pmul(lhs0[j], rhs[j]);
res[i] += alpha*tmp0;
res[i*resIncr] += alpha*tmp0;
}
if (skipRows)
{
@@ -468,9 +546,10 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector(
}
else
break;
} while(PacketSize>1);
} while(Vectorizable);
#undef _EIGEN_ACCUMULATE_PACKETS
}
};
#endif // EIGEN_GENERAL_MATRIX_VECTOR_H

View File

@@ -26,10 +26,9 @@
#define EIGEN_SELFADJOINT_MATRIX_MATRIX_H
// pack a selfadjoint block diagonal for use with the gebp_kernel
template<typename Scalar, typename Index, int mr, int StorageOrder>
template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder>
struct ei_symm_pack_lhs
{
enum { PacketSize = ei_packet_traits<Scalar>::size };
template<int BlockRows> inline
void pack(Scalar* blockA, const ei_const_blas_data_mapper<Scalar,Index,StorageOrder>& lhs, Index cols, Index i, Index& count)
{
@@ -59,16 +58,16 @@ struct ei_symm_pack_lhs
{
ei_const_blas_data_mapper<Scalar,Index,StorageOrder> lhs(_lhs,lhsStride);
Index count = 0;
Index peeled_mc = (rows/mr)*mr;
for(Index i=0; i<peeled_mc; i+=mr)
Index peeled_mc = (rows/Pack1)*Pack1;
for(Index i=0; i<peeled_mc; i+=Pack1)
{
pack<mr>(blockA, lhs, cols, i, count);
pack<Pack1>(blockA, lhs, cols, i, count);
}
if(rows-peeled_mc>=PacketSize)
if(rows-peeled_mc>=Pack2)
{
pack<PacketSize>(blockA, lhs, cols, peeled_mc, count);
peeled_mc += PacketSize;
pack<Pack2>(blockA, lhs, cols, peeled_mc, count);
peeled_mc += Pack2;
}
// do the same with mr==1
@@ -89,7 +88,7 @@ template<typename Scalar, typename Index, int nr, int StorageOrder>
struct ei_symm_pack_rhs
{
enum { PacketSize = ei_packet_traits<Scalar>::size };
void operator()(Scalar* blockB, const Scalar* _rhs, Index rhsStride, Scalar alpha, Index rows, Index cols, Index k2)
void operator()(Scalar* blockB, const Scalar* _rhs, Index rhsStride, Index rows, Index cols, Index k2)
{
Index end_k = k2 + rows;
Index count = 0;
@@ -101,12 +100,12 @@ struct ei_symm_pack_rhs
{
for(Index k=k2; k<end_k; k++)
{
blockB[count+0] = alpha*rhs(k,j2+0);
blockB[count+1] = alpha*rhs(k,j2+1);
blockB[count+0] = rhs(k,j2+0);
blockB[count+1] = rhs(k,j2+1);
if (nr==4)
{
blockB[count+2] = alpha*rhs(k,j2+2);
blockB[count+3] = alpha*rhs(k,j2+3);
blockB[count+2] = rhs(k,j2+2);
blockB[count+3] = rhs(k,j2+3);
}
count += nr;
}
@@ -119,12 +118,12 @@ struct ei_symm_pack_rhs
// transpose
for(Index k=k2; k<j2; k++)
{
blockB[count+0] = alpha*ei_conj(rhs(j2+0,k));
blockB[count+1] = alpha*ei_conj(rhs(j2+1,k));
blockB[count+0] = ei_conj(rhs(j2+0,k));
blockB[count+1] = ei_conj(rhs(j2+1,k));
if (nr==4)
{
blockB[count+2] = alpha*ei_conj(rhs(j2+2,k));
blockB[count+3] = alpha*ei_conj(rhs(j2+3,k));
blockB[count+2] = ei_conj(rhs(j2+2,k));
blockB[count+3] = ei_conj(rhs(j2+3,k));
}
count += nr;
}
@@ -134,25 +133,25 @@ struct ei_symm_pack_rhs
{
// normal
for (Index w=0 ; w<h; ++w)
blockB[count+w] = alpha*rhs(k,j2+w);
blockB[count+w] = rhs(k,j2+w);
blockB[count+h] = alpha*rhs(k,k);
blockB[count+h] = ei_real(rhs(k,k));
// transpose
for (Index w=h+1 ; w<nr; ++w)
blockB[count+w] = alpha*ei_conj(rhs(j2+w,k));
blockB[count+w] = ei_conj(rhs(j2+w,k));
count += nr;
++h;
}
// normal
for(Index k=j2+nr; k<end_k; k++)
{
blockB[count+0] = alpha*rhs(k,j2+0);
blockB[count+1] = alpha*rhs(k,j2+1);
blockB[count+0] = rhs(k,j2+0);
blockB[count+1] = rhs(k,j2+1);
if (nr==4)
{
blockB[count+2] = alpha*rhs(k,j2+2);
blockB[count+3] = alpha*rhs(k,j2+3);
blockB[count+2] = rhs(k,j2+2);
blockB[count+3] = rhs(k,j2+3);
}
count += nr;
}
@@ -163,12 +162,12 @@ struct ei_symm_pack_rhs
{
for(Index k=k2; k<end_k; k++)
{
blockB[count+0] = alpha*ei_conj(rhs(j2+0,k));
blockB[count+1] = alpha*ei_conj(rhs(j2+1,k));
blockB[count+0] = ei_conj(rhs(j2+0,k));
blockB[count+1] = ei_conj(rhs(j2+1,k));
if (nr==4)
{
blockB[count+2] = alpha*ei_conj(rhs(j2+2,k));
blockB[count+3] = alpha*ei_conj(rhs(j2+3,k));
blockB[count+2] = ei_conj(rhs(j2+2,k));
blockB[count+3] = ei_conj(rhs(j2+3,k));
}
count += nr;
}
@@ -181,13 +180,13 @@ struct ei_symm_pack_rhs
Index half = std::min(end_k,j2);
for(Index k=k2; k<half; k++)
{
blockB[count] = alpha*ei_conj(rhs(j2,k));
blockB[count] = ei_conj(rhs(j2,k));
count += 1;
}
if(half==j2 && half<k2+rows)
{
blockB[count] = alpha*ei_real(rhs(j2,j2));
blockB[count] = ei_real(rhs(j2,j2));
count += 1;
}
else
@@ -196,7 +195,7 @@ struct ei_symm_pack_rhs
// normal
for(Index k=half+1; k<k2+rows; k++)
{
blockB[count] = alpha*rhs(k,j2);
blockB[count] = rhs(k,j2);
count += 1;
}
}
@@ -253,12 +252,9 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,Conjugate
ei_const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
ei_const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
if (ConjugateRhs)
alpha = ei_conj(alpha);
typedef ei_gebp_traits<Scalar,Scalar> Traits;
typedef ei_product_blocking_traits<Scalar> Blocking;
Index kc = size; // cache block size along the K direction
Index kc = size; // cache block size along the K direction
Index mc = rows; // cache block size along the M direction
Index nc = cols; // cache block size along the N direction
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
@@ -266,14 +262,15 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,Conjugate
kc = std::min(kc,mc);
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols;
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
std::size_t sizeB = sizeW + kc*cols;
Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB);
Scalar* blockB = allocatedBlockB + kc*Blocking::PacketSize*Blocking::nr;
Scalar* blockB = allocatedBlockB + sizeW;
ei_gebp_kernel<Scalar, Index, Blocking::mr, Blocking::nr, ei_conj_helper<ConjugateLhs,ConjugateRhs> > gebp_kernel;
ei_symm_pack_lhs<Scalar, Index, Blocking::mr,LhsStorageOrder> pack_lhs;
ei_gemm_pack_rhs<Scalar, Index, Blocking::nr,RhsStorageOrder> pack_rhs;
ei_gemm_pack_lhs<Scalar, Index, Blocking::mr,LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed;
ei_gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
ei_symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
ei_gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
ei_gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed;
for(Index k2=0; k2<size; k2+=kc)
{
@@ -282,7 +279,7 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,Conjugate
// we have selected one row panel of rhs and one column panel of lhs
// pack rhs's panel into a sequential chunk of memory
// and expand each coeff to a constant packet for further reuse
pack_rhs(blockB, &rhs(k2,0), rhsStride, alpha, actual_kc, cols);
pack_rhs(blockB, &rhs(k2,0), rhsStride, actual_kc, cols);
// the select lhs's panel has to be split in three different parts:
// 1 - the transposed panel above the diagonal block => transposed packed copy
@@ -294,7 +291,7 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,Conjugate
// transposed packed copy
pack_lhs_transposed(blockA, &lhs(k2, i2), lhsStride, actual_kc, actual_mc);
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols);
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
}
// the block diagonal
{
@@ -302,16 +299,16 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,Conjugate
// symmetric packed copy
pack_lhs(blockA, &lhs(k2,k2), lhsStride, actual_kc, actual_mc);
gebp_kernel(res+k2, resStride, blockA, blockB, actual_mc, actual_kc, cols);
gebp_kernel(res+k2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
}
for(Index i2=k2+kc; i2<size; i2+=mc)
{
const Index actual_mc = std::min(i2+mc,size)-i2;
ei_gemm_pack_lhs<Scalar, Index, Blocking::mr,LhsStorageOrder,false>()
ei_gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder,false>()
(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols);
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
}
}
@@ -338,10 +335,7 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,Conjugat
ei_const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
if (ConjugateRhs)
alpha = ei_conj(alpha);
typedef ei_product_blocking_traits<Scalar> Blocking;
typedef ei_gebp_traits<Scalar,Scalar> Traits;
Index kc = size; // cache block size along the K direction
Index mc = rows; // cache block size along the M direction
@@ -349,19 +343,20 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,Conjugat
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols;
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
std::size_t sizeB = sizeW + kc*cols;
Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB);
Scalar* blockB = allocatedBlockB + kc*Blocking::PacketSize*Blocking::nr;
Scalar* blockB = allocatedBlockB + sizeW;
ei_gebp_kernel<Scalar, Index, Blocking::mr, Blocking::nr, ei_conj_helper<ConjugateLhs,ConjugateRhs> > gebp_kernel;
ei_gemm_pack_lhs<Scalar, Index, Blocking::mr,LhsStorageOrder> pack_lhs;
ei_symm_pack_rhs<Scalar, Index, Blocking::nr,RhsStorageOrder> pack_rhs;
ei_gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
ei_gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
ei_symm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
for(Index k2=0; k2<size; k2+=kc)
{
const Index actual_kc = std::min(k2+kc,size)-k2;
pack_rhs(blockB, _rhs, rhsStride, alpha, actual_kc, cols, k2);
pack_rhs(blockB, _rhs, rhsStride, actual_kc, cols, k2);
// => GEPP
for(Index i2=0; i2<rows; i2+=mc)
@@ -369,7 +364,7 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,Conjugat
const Index actual_mc = std::min(i2+mc,rows)-i2;
pack_lhs(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols);
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
}
}

View File

@@ -46,8 +46,11 @@ static EIGEN_DONT_INLINE void ei_product_selfadjoint_vector(
FirstTriangular = IsRowMajor == IsLower
};
ei_conj_helper<NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, IsRowMajor), ConjugateRhs> cj0;
ei_conj_helper<NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> cj1;
ei_conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, IsRowMajor), ConjugateRhs> cj0;
ei_conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> cj1;
ei_conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, IsRowMajor), ConjugateRhs> pcj0;
ei_conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> pcj1;
Scalar cjAlpha = ConjugateRhs ? ei_conj(alpha) : alpha;
@@ -74,14 +77,14 @@ static EIGEN_DONT_INLINE void ei_product_selfadjoint_vector(
register const Scalar* EIGEN_RESTRICT A1 = lhs + (j+1)*lhsStride;
Scalar t0 = cjAlpha * rhs[j];
Packet ptmp0 = ei_pset1(t0);
Packet ptmp0 = ei_pset1<Packet>(t0);
Scalar t1 = cjAlpha * rhs[j+1];
Packet ptmp1 = ei_pset1(t1);
Packet ptmp1 = ei_pset1<Packet>(t1);
Scalar t2 = 0;
Packet ptmp2 = ei_pset1(t2);
Packet ptmp2 = ei_pset1<Packet>(t2);
Scalar t3 = 0;
Packet ptmp3 = ei_pset1(t3);
Packet ptmp3 = ei_pset1<Packet>(t3);
size_t starti = FirstTriangular ? 0 : j+2;
size_t endi = FirstTriangular ? j : size;
@@ -116,14 +119,14 @@ static EIGEN_DONT_INLINE void ei_product_selfadjoint_vector(
Scalar* EIGEN_RESTRICT resIt = res + alignedStart;
for (size_t i=alignedStart; i<alignedEnd; i+=PacketSize)
{
Packet A0i = ei_ploadu(a0It); a0It += PacketSize;
Packet A1i = ei_ploadu(a1It); a1It += PacketSize;
Packet Bi = ei_ploadu(rhsIt); rhsIt += PacketSize; // FIXME should be aligned in most cases
Packet Xi = ei_pload (resIt);
Packet A0i = ei_ploadu<Packet>(a0It); a0It += PacketSize;
Packet A1i = ei_ploadu<Packet>(a1It); a1It += PacketSize;
Packet Bi = ei_ploadu<Packet>(rhsIt); rhsIt += PacketSize; // FIXME should be aligned in most cases
Packet Xi = ei_pload <Packet>(resIt);
Xi = cj0.pmadd(A0i,ptmp0, cj0.pmadd(A1i,ptmp1,Xi));
ptmp2 = cj1.pmadd(A0i, Bi, ptmp2);
ptmp3 = cj1.pmadd(A1i, Bi, ptmp3);
Xi = pcj0.pmadd(A0i,ptmp0, pcj0.pmadd(A1i,ptmp1,Xi));
ptmp2 = pcj1.pmadd(A0i, Bi, ptmp2);
ptmp3 = pcj1.pmadd(A1i, Bi, ptmp3);
ei_pstore(resIt,Xi); resIt += PacketSize;
}
for (size_t i=alignedEnd; i<endi; i++)
@@ -198,5 +201,47 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
}
};
template<typename Lhs, typename Rhs, int RhsMode>
struct ei_traits<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false> >
: ei_traits<ProductBase<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>, Lhs, Rhs> >
{};
template<typename Lhs, typename Rhs, int RhsMode>
struct SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>
: public ProductBase<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>, Lhs, Rhs >
{
EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix)
enum {
RhsUpLo = RhsMode&(Upper|Lower)
};
SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
{
ei_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs);
const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs);
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
* RhsBlasTraits::extractScalarFactor(m_rhs);
ei_assert(dst.innerStride()==1 && "not implemented yet");
// transpose the product
ei_product_selfadjoint_vector<Scalar, Index, (ei_traits<_ActualRhsType>::Flags&RowMajorBit) ? ColMajor : RowMajor, int(RhsUpLo)==Upper ? Lower : Upper,
bool(RhsBlasTraits::NeedToConjugate), bool(LhsBlasTraits::NeedToConjugate)>
(
rhs.rows(), // size
&rhs.coeff(0,0), rhs.outerStride(), // lhs info
&lhs.coeff(0), lhs.innerStride(), // rhs info
&dst.coeffRef(0), // result info
actualAlpha // scale factor
);
}
};
#endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_H

View File

@@ -32,7 +32,7 @@
**********************************************************************/
// forward declarations (defined at the end of this file)
template<typename Scalar, typename Index, int mr, int nr, typename Conj, int UpLo>
template<typename Scalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int UpLo>
struct ei_sybb_kernel;
/* Optimized selfadjoint product (_SYRK) */
@@ -65,38 +65,42 @@ struct ei_selfadjoint_product<Scalar, Index, MatStorageOrder, ColMajor, AAT, UpL
{
ei_const_blas_data_mapper<Scalar, Index, MatStorageOrder> mat(_mat,matStride);
if(AAT)
alpha = ei_conj(alpha);
// if(AAT)
// alpha = ei_conj(alpha);
typedef ei_product_blocking_traits<Scalar> Blocking;
typedef ei_gebp_traits<Scalar,Scalar> Traits;
Index kc = depth; // cache block size along the K direction
Index mc = size; // cache block size along the M direction
Index nc = size; // cache block size along the N direction
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
// !!! mc must be a multiple of nr:
if(mc>Blocking::nr)
mc = (mc/Blocking::nr)*Blocking::nr;
if(mc>Traits::nr)
mc = (mc/Traits::nr)*Traits::nr;
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*size;
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
std::size_t sizeB = sizeW + kc*size;
Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB);
Scalar* blockB = allocatedBlockB + kc*Blocking::PacketSize*Blocking::nr;
Scalar* blockB = allocatedBlockB + sizeW;
// note that the actual rhs is the transpose/adjoint of mat
typedef ei_conj_helper<NumTraits<Scalar>::IsComplex && !AAT, NumTraits<Scalar>::IsComplex && AAT> Conj;
enum {
ConjLhs = NumTraits<Scalar>::IsComplex && !AAT,
ConjRhs = NumTraits<Scalar>::IsComplex && AAT
};
ei_gebp_kernel<Scalar, Index, Blocking::mr, Blocking::nr, Conj> gebp_kernel;
ei_gemm_pack_rhs<Scalar, Index, Blocking::nr,MatStorageOrder==RowMajor ? ColMajor : RowMajor> pack_rhs;
ei_gemm_pack_lhs<Scalar, Index, Blocking::mr,MatStorageOrder, false> pack_lhs;
ei_sybb_kernel<Scalar, Index, Blocking::mr, Blocking::nr, Conj, UpLo> sybb;
ei_gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjLhs, ConjRhs> gebp_kernel;
ei_gemm_pack_rhs<Scalar, Index, Traits::nr,MatStorageOrder==RowMajor ? ColMajor : RowMajor> pack_rhs;
ei_gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, MatStorageOrder, false> pack_lhs;
ei_sybb_kernel<Scalar, Index, Traits::mr, Traits::nr, ConjLhs, ConjRhs, UpLo> sybb;
for(Index k2=0; k2<depth; k2+=kc)
{
const Index actual_kc = std::min(k2+kc,depth)-k2;
// note that the actual rhs is the transpose/adjoint of mat
pack_rhs(blockB, &mat(0,k2), matStride, alpha, actual_kc, size);
pack_rhs(blockB, &mat(0,k2), matStride, actual_kc, size);
for(Index i2=0; i2<size; i2+=mc)
{
@@ -109,15 +113,15 @@ struct ei_selfadjoint_product<Scalar, Index, MatStorageOrder, ColMajor, AAT, UpL
// 2 - the actual_mc x actual_mc symmetric block => processed with a special kernel
// 3 - after the diagonal => processed with gebp or skipped
if (UpLo==Lower)
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, std::min(size,i2),
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, std::min(size,i2), alpha,
-1, -1, 0, 0, allocatedBlockB);
sybb(res+resStride*i2 + i2, resStride, blockA, blockB + actual_kc*i2, actual_mc, actual_kc, allocatedBlockB);
sybb(res+resStride*i2 + i2, resStride, blockA, blockB + actual_kc*i2, actual_mc, actual_kc, alpha, allocatedBlockB);
if (UpLo==Upper)
{
Index j2 = i2+actual_mc;
gebp_kernel(res+resStride*j2+i2, resStride, blockA, blockB+actual_kc*j2, actual_mc, actual_kc, std::max(Index(0),size-j2),
gebp_kernel(res+resStride*j2+i2, resStride, blockA, blockB+actual_kc*j2, actual_mc, actual_kc, std::max(Index(0), size-j2), alpha,
-1, -1, 0, 0, allocatedBlockB);
}
}
@@ -163,16 +167,16 @@ SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
// while the selfadjoint block overlapping the diagonal is evaluated into a
// small temporary buffer which is then accumulated into the result using a
// triangular traversal.
template<typename Scalar, typename Index, int mr, int nr, typename Conj, int UpLo>
template<typename Scalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int UpLo>
struct ei_sybb_kernel
{
enum {
PacketSize = ei_packet_traits<Scalar>::size,
BlockSize = EIGEN_PLAIN_ENUM_MAX(mr,nr)
};
void operator()(Scalar* res, Index resStride, const Scalar* blockA, const Scalar* blockB, Index size, Index depth, Scalar* workspace)
void operator()(Scalar* res, Index resStride, const Scalar* blockA, const Scalar* blockB, Index size, Index depth, Scalar alpha, Scalar* workspace)
{
ei_gebp_kernel<Scalar, Index, mr, nr, Conj> gebp_kernel;
ei_gebp_kernel<Scalar, Scalar, Index, mr, nr, ConjLhs, ConjRhs> gebp_kernel;
Matrix<Scalar,BlockSize,BlockSize,ColMajor> buffer;
// let's process the block per panel of actual_mc x BlockSize,
@@ -183,14 +187,15 @@ struct ei_sybb_kernel
const Scalar* actual_b = blockB+j*depth;
if(UpLo==Upper)
gebp_kernel(res+j*resStride, resStride, blockA, actual_b, j, depth, actualBlockSize, -1, -1, 0, 0, workspace);
gebp_kernel(res+j*resStride, resStride, blockA, actual_b, j, depth, actualBlockSize, alpha,
-1, -1, 0, 0, workspace);
// selfadjoint micro block
{
Index i = j;
buffer.setZero();
// 1 - apply the kernel on the temporary buffer
gebp_kernel(buffer.data(), BlockSize, blockA+depth*i, actual_b, actualBlockSize, depth, actualBlockSize,
gebp_kernel(buffer.data(), BlockSize, blockA+depth*i, actual_b, actualBlockSize, depth, actualBlockSize, alpha,
-1, -1, 0, 0, workspace);
// 2 - triangular accumulation
for(Index j1=0; j1<actualBlockSize; ++j1)
@@ -205,7 +210,7 @@ struct ei_sybb_kernel
if(UpLo==Lower)
{
Index i = j+actualBlockSize;
gebp_kernel(res+j*resStride+i, resStride, blockA+depth*i, actual_b, size-i, depth, actualBlockSize,
gebp_kernel(res+j*resStride+i, resStride, blockA+depth*i, actual_b, size-i, depth, actualBlockSize, alpha,
-1, -1, 0, 0, workspace);
}
}

View File

@@ -75,7 +75,7 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
Scalar alpha)
{
ei_product_triangular_matrix_matrix<Scalar, Index,
(Mode&UnitDiag) | ((Mode&Upper) ? Lower : Upper),
(Mode&(UnitDiag|ZeroDiag)) | ((Mode&Upper) ? Lower : Upper),
(!LhsIsTriangular),
RhsStorageOrder==RowMajor ? ColMajor : RowMajor,
ConjugateRhs,
@@ -105,13 +105,11 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,true,
ei_const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
ei_const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
if (ConjugateRhs)
alpha = ei_conj(alpha);
typedef ei_product_blocking_traits<Scalar> Blocking;
typedef ei_gebp_traits<Scalar,Scalar> Traits;
enum {
SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Blocking::mr,Blocking::nr),
IsLower = (Mode&Lower) == Lower
SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
IsLower = (Mode&Lower) == Lower,
SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1
};
Index kc = depth; // cache block size along the K direction
@@ -120,18 +118,21 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,true,
computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc);
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols;
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
std::size_t sizeB = sizeW + kc*cols;
Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB);
// Scalar* allocatedBlockB = new Scalar[sizeB];
Scalar* blockB = allocatedBlockB + kc*Blocking::PacketSize*Blocking::nr;
Scalar* blockB = allocatedBlockB + sizeW;
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer;
triangularBuffer.setZero();
triangularBuffer.diagonal().setOnes();
if((Mode&ZeroDiag)==ZeroDiag)
triangularBuffer.diagonal().setZero();
else
triangularBuffer.diagonal().setOnes();
ei_gebp_kernel<Scalar, Index, Blocking::mr, Blocking::nr, ei_conj_helper<ConjugateLhs,ConjugateRhs> > gebp_kernel;
ei_gemm_pack_lhs<Scalar, Index, Blocking::mr,LhsStorageOrder> pack_lhs;
ei_gemm_pack_rhs<Scalar, Index, Blocking::nr,RhsStorageOrder> pack_rhs;
ei_gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
ei_gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
ei_gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
for(Index k2=IsLower ? depth : 0;
IsLower ? k2>0 : k2<depth;
@@ -147,7 +148,7 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,true,
k2 = k2+actual_kc-kc;
}
pack_rhs(blockB, &rhs(actual_k2,0), rhsStride, alpha, actual_kc, cols);
pack_rhs(blockB, &rhs(actual_k2,0), rhsStride, actual_kc, cols);
// the selected lhs's panel has to be split in three different parts:
// 1 - the part which is above the diagonal block => skip it
@@ -169,14 +170,14 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,true,
// To this end we do an extra triangular copy to a small temporary buffer
for (Index k=0;k<actualPanelWidth;++k)
{
if (!(Mode&UnitDiag))
if (SetDiag)
triangularBuffer.coeffRef(k,k) = lhs(startBlock+k,startBlock+k);
for (Index i=IsLower ? k+1 : 0; IsLower ? i<actualPanelWidth : i<k; ++i)
triangularBuffer.coeffRef(i,k) = lhs(startBlock+i,startBlock+k);
}
pack_lhs(blockA, triangularBuffer.data(), triangularBuffer.outerStride(), actualPanelWidth, actualPanelWidth);
gebp_kernel(res+startBlock, resStride, blockA, blockB, actualPanelWidth, actualPanelWidth, cols,
gebp_kernel(res+startBlock, resStride, blockA, blockB, actualPanelWidth, actualPanelWidth, cols, alpha,
actualPanelWidth, actual_kc, 0, blockBOffset);
// GEBP with remaining micro panel
@@ -186,7 +187,7 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,true,
pack_lhs(blockA, &lhs(startTarget,startBlock), lhsStride, actualPanelWidth, lengthTarget);
gebp_kernel(res+startTarget, resStride, blockA, blockB, lengthTarget, actualPanelWidth, cols,
gebp_kernel(res+startTarget, resStride, blockA, blockB, lengthTarget, actualPanelWidth, cols, alpha,
actualPanelWidth, actual_kc, 0, blockBOffset);
}
}
@@ -198,10 +199,10 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,true,
for(Index i2=start; i2<end; i2+=mc)
{
const Index actual_mc = std::min(i2+mc,end)-i2;
ei_gemm_pack_lhs<Scalar, Index, Blocking::mr,LhsStorageOrder,false>()
ei_gemm_pack_lhs<Scalar, Index, Traits::mr,Traits::LhsProgress, LhsStorageOrder,false>()
(blockA, &lhs(i2, actual_k2), lhsStride, actual_kc, actual_mc);
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols);
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
}
}
}
@@ -231,13 +232,11 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,false,
ei_const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
ei_const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
if (ConjugateRhs)
alpha = ei_conj(alpha);
typedef ei_product_blocking_traits<Scalar> Blocking;
typedef ei_gebp_traits<Scalar,Scalar> Traits;
enum {
SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Blocking::mr,Blocking::nr),
IsLower = (Mode&Lower) == Lower
SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
IsLower = (Mode&Lower) == Lower,
SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1
};
Index kc = depth; // cache block size along the K direction
@@ -246,18 +245,22 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,false,
computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc);
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols;
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
std::size_t sizeB = sizeW + kc*cols;
Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar,sizeB);
Scalar* blockB = allocatedBlockB + kc*Blocking::PacketSize*Blocking::nr;
Scalar* blockB = allocatedBlockB + sizeW;
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer;
triangularBuffer.setZero();
triangularBuffer.diagonal().setOnes();
if((Mode&ZeroDiag)==ZeroDiag)
triangularBuffer.diagonal().setZero();
else
triangularBuffer.diagonal().setOnes();
ei_gebp_kernel<Scalar, Index, Blocking::mr, Blocking::nr, ei_conj_helper<ConjugateLhs,ConjugateRhs> > gebp_kernel;
ei_gemm_pack_lhs<Scalar, Index, Blocking::mr,LhsStorageOrder> pack_lhs;
ei_gemm_pack_rhs<Scalar, Index, Blocking::nr,RhsStorageOrder> pack_rhs;
ei_gemm_pack_rhs<Scalar, Index, Blocking::nr,RhsStorageOrder,true> pack_rhs_panel;
ei_gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
ei_gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
ei_gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
ei_gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder,false,true> pack_rhs_panel;
for(Index k2=IsLower ? 0 : depth;
IsLower ? k2<depth : k2>0;
@@ -280,7 +283,7 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,false,
Scalar* geb = blockB+ts*ts;
pack_rhs(geb, &rhs(actual_k2,IsLower ? 0 : k2), rhsStride, alpha, actual_kc, rs);
pack_rhs(geb, &rhs(actual_k2,IsLower ? 0 : k2), rhsStride, actual_kc, rs);
// pack the triangular part of the rhs padding the unrolled blocks with zeros
if(ts>0)
@@ -293,21 +296,21 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,false,
Index panelLength = IsLower ? actual_kc-j2-actualPanelWidth : j2;
// general part
pack_rhs_panel(blockB+j2*actual_kc,
&rhs(actual_k2+panelOffset, actual_j2), rhsStride, alpha,
&rhs(actual_k2+panelOffset, actual_j2), rhsStride,
panelLength, actualPanelWidth,
actual_kc, panelOffset);
// append the triangular part via a temporary buffer
for (Index j=0;j<actualPanelWidth;++j)
{
if (!(Mode&UnitDiag))
if (SetDiag)
triangularBuffer.coeffRef(j,j) = rhs(actual_j2+j,actual_j2+j);
for (Index k=IsLower ? j+1 : 0; IsLower ? k<actualPanelWidth : k<j; ++k)
triangularBuffer.coeffRef(k,j) = rhs(actual_j2+k,actual_j2+j);
}
pack_rhs_panel(blockB+j2*actual_kc,
triangularBuffer.data(), triangularBuffer.outerStride(), alpha,
triangularBuffer.data(), triangularBuffer.outerStride(),
actualPanelWidth, actualPanelWidth,
actual_kc, j2);
}
@@ -330,6 +333,7 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,false,
gebp_kernel(res+i2+(actual_k2+j2)*resStride, resStride,
blockA, blockB+j2*actual_kc,
actual_mc, panelLength, actualPanelWidth,
alpha,
actual_kc, actual_kc, // strides
blockOffset, blockOffset,// offsets
allocatedBlockB); // workspace
@@ -337,6 +341,7 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,false,
}
gebp_kernel(res+i2+(IsLower ? 0 : k2)*resStride, resStride,
blockA, geb, actual_mc, actual_kc, rs,
alpha,
-1, -1, 0, 0, allocatedBlockB);
}
}

View File

@@ -76,12 +76,11 @@ struct ei_product_triangular_vector_selector<true,Lhs,Rhs,Result,Mode,ConjLhs,Co
if (r>0)
{
Index s = IsLower ? pi+actualPanelWidth : 0;
ei_cache_friendly_product_colmajor_times_vector<ConjLhs,ConjRhs>(
r,
ei_general_matrix_vector_product<Index,Scalar,ColMajor,ConjLhs,Scalar,ConjRhs>::run(
r, actualPanelWidth,
&(lhs.const_cast_derived().coeffRef(s,pi)), lhs.outerStride(),
rhs.segment(pi, actualPanelWidth),
&(res.coeffRef(s)),
alpha);
&rhs.coeff(pi), rhs.innerStride(),
&res.coeffRef(s), res.innerStride(), alpha);
}
}
}
@@ -119,11 +118,11 @@ struct ei_product_triangular_vector_selector<true,Lhs,Rhs,Result,Mode,ConjLhs,Co
if (r>0)
{
Index s = IsLower ? 0 : pi + actualPanelWidth;
Block<Result,Dynamic,1> target(res,pi,0,actualPanelWidth,1);
ei_cache_friendly_product_rowmajor_times_vector<ConjLhs,ConjRhs>(
ei_general_matrix_vector_product<Index,Scalar,RowMajor,ConjLhs,Scalar,ConjRhs>::run(
actualPanelWidth, r,
&(lhs.const_cast_derived().coeffRef(pi,s)), lhs.outerStride(),
&(rhs.const_cast_derived().coeffRef(s)), r,
target, alpha);
&(rhs.const_cast_derived().coeffRef(s)), 1,
&res.coeffRef(pi,0), res.innerStride(), alpha);
}
}
}

View File

@@ -57,9 +57,9 @@ struct ei_triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStora
ei_const_blas_data_mapper<Scalar, Index, TriStorageOrder> tri(_tri,triStride);
ei_blas_data_mapper<Scalar, Index, ColMajor> other(_other,otherStride);
typedef ei_product_blocking_traits<Scalar> Blocking;
typedef ei_gebp_traits<Scalar,Scalar> Traits;
enum {
SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Blocking::mr,Blocking::nr),
SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
IsLower = (Mode&Lower) == Lower
};
@@ -69,14 +69,15 @@ struct ei_triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStora
computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc);
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols;
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
std::size_t sizeB = sizeW + kc*cols;
Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB);
Scalar* blockB = allocatedBlockB + kc*Blocking::PacketSize*Blocking::nr;
Scalar* blockB = allocatedBlockB + sizeW;
ei_conj_if<Conjugate> conj;
ei_gebp_kernel<Scalar, Index, Blocking::mr, Blocking::nr, ei_conj_helper<Conjugate,false> > gebp_kernel;
ei_gemm_pack_lhs<Scalar, Index, Blocking::mr,TriStorageOrder> pack_lhs;
ei_gemm_pack_rhs<Scalar, Index, Blocking::nr, ColMajor, true> pack_rhs;
ei_gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, Conjugate, false> gebp_kernel;
ei_gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, TriStorageOrder> pack_lhs;
ei_gemm_pack_rhs<Scalar, Index, Traits::nr, ColMajor, false, true> pack_rhs;
for(Index k2=IsLower ? 0 : size;
IsLower ? k2<size : k2>0;
@@ -140,7 +141,7 @@ struct ei_triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStora
Index blockBOffset = IsLower ? k1 : lengthTarget;
// update the respective rows of B from other
pack_rhs(blockB, _other+startBlock, otherStride, -1, actualPanelWidth, cols, actual_kc, blockBOffset);
pack_rhs(blockB, _other+startBlock, otherStride, actualPanelWidth, cols, actual_kc, blockBOffset);
// GEBP
if (lengthTarget>0)
@@ -149,7 +150,7 @@ struct ei_triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStora
pack_lhs(blockA, &tri(startTarget,startBlock), triStride, actualPanelWidth, lengthTarget);
gebp_kernel(_other+startTarget, otherStride, blockA, blockB, lengthTarget, actualPanelWidth, cols,
gebp_kernel(_other+startTarget, otherStride, blockA, blockB, lengthTarget, actualPanelWidth, cols, Scalar(-1),
actualPanelWidth, actual_kc, 0, blockBOffset);
}
}
@@ -166,7 +167,7 @@ struct ei_triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStora
{
pack_lhs(blockA, &tri(i2, IsLower ? k2 : k2-kc), triStride, actual_kc, actual_mc);
gebp_kernel(_other+i2, otherStride, blockA, blockB, actual_mc, actual_kc, cols);
gebp_kernel(_other+i2, otherStride, blockA, blockB, actual_mc, actual_kc, cols, Scalar(-1));
}
}
}
@@ -191,15 +192,15 @@ struct ei_triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStor
ei_const_blas_data_mapper<Scalar, Index, TriStorageOrder> rhs(_tri,triStride);
ei_blas_data_mapper<Scalar, Index, ColMajor> lhs(_other,otherStride);
typedef ei_product_blocking_traits<Scalar> Blocking;
typedef ei_gebp_traits<Scalar,Scalar> Traits;
enum {
RhsStorageOrder = TriStorageOrder,
SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Blocking::mr,Blocking::nr),
SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
IsLower = (Mode&Lower) == Lower
};
// Index kc = std::min<Index>(Blocking::Max_kc/4,size); // cache block size along the K direction
// Index mc = std::min<Index>(Blocking::Max_mc,size); // cache block size along the M direction
// Index kc = std::min<Index>(Traits::Max_kc/4,size); // cache block size along the K direction
// Index mc = std::min<Index>(Traits::Max_mc,size); // cache block size along the M direction
// check that !!!!
Index kc = size; // cache block size along the K direction
Index mc = size; // cache block size along the M direction
@@ -207,15 +208,16 @@ struct ei_triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStor
computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc);
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*size;
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
std::size_t sizeB = sizeW + kc*size;
Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB);
Scalar* blockB = allocatedBlockB + kc*Blocking::PacketSize*Blocking::nr;
Scalar* blockB = allocatedBlockB + sizeW;
ei_conj_if<Conjugate> conj;
ei_gebp_kernel<Scalar, Index, Blocking::mr, Blocking::nr, ei_conj_helper<false,Conjugate> > gebp_kernel;
ei_gemm_pack_rhs<Scalar, Index, Blocking::nr,RhsStorageOrder> pack_rhs;
ei_gemm_pack_rhs<Scalar, Index, Blocking::nr,RhsStorageOrder,true> pack_rhs_panel;
ei_gemm_pack_lhs<Scalar, Index, Blocking::mr, ColMajor, false, true> pack_lhs_panel;
ei_gebp_kernel<Scalar,Scalar, Index, Traits::mr, Traits::nr, false, Conjugate> gebp_kernel;
ei_gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
ei_gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder,false,true> pack_rhs_panel;
ei_gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, ColMajor, false, true> pack_lhs_panel;
for(Index k2=IsLower ? size : 0;
IsLower ? k2>0 : k2<size;
@@ -228,7 +230,7 @@ struct ei_triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStor
Index rs = IsLower ? actual_k2 : size - actual_k2 - actual_kc;
Scalar* geb = blockB+actual_kc*actual_kc;
if (rs>0) pack_rhs(geb, &rhs(actual_k2,startPanel), triStride, -1, actual_kc, rs);
if (rs>0) pack_rhs(geb, &rhs(actual_k2,startPanel), triStride, actual_kc, rs);
// triangular packing (we only pack the panels off the diagonal,
// neglecting the blocks overlapping the diagonal
@@ -242,7 +244,7 @@ struct ei_triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStor
if (panelLength>0)
pack_rhs_panel(blockB+j2*actual_kc,
&rhs(actual_k2+panelOffset, actual_j2), triStride, -1,
&rhs(actual_k2+panelOffset, actual_j2), triStride,
panelLength, actualPanelWidth,
actual_kc, panelOffset);
}
@@ -273,6 +275,7 @@ struct ei_triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStor
gebp_kernel(&lhs(i2,absolute_j2), otherStride,
blockA, blockB+j2*actual_kc,
actual_mc, panelLength, actualPanelWidth,
Scalar(-1),
actual_kc, actual_kc, // strides
panelOffset, panelOffset, // offsets
allocatedBlockB); // workspace
@@ -305,7 +308,7 @@ struct ei_triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStor
if (rs>0)
gebp_kernel(_other+i2+startPanel*otherStride, otherStride, blockA, geb,
actual_mc, actual_kc, rs,
actual_mc, actual_kc, rs, Scalar(-1),
-1, -1, 0, 0, allocatedBlockB);
}
}

View File

@@ -29,70 +29,98 @@
// implement and control fast level 2 and level 3 BLAS-like routines.
// forward declarations
template<typename Scalar, typename Index, int mr, int nr, typename Conj>
template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjugateLhs=false, bool ConjugateRhs=false>
struct ei_gebp_kernel;
template<typename Scalar, typename Index, int nr, int StorageOrder, bool PanelMode=false>
template<typename Scalar, typename Index, int nr, int StorageOrder, bool Conjugate = false, bool PanelMode=false>
struct ei_gemm_pack_rhs;
template<typename Scalar, typename Index, int mr, int StorageOrder, bool Conjugate = false, bool PanelMode = false>
template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder, bool Conjugate = false, bool PanelMode = false>
struct ei_gemm_pack_lhs;
template<
typename Scalar, typename Index,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs,
typename Index,
typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
int ResStorageOrder>
struct ei_general_matrix_matrix_product;
template<bool ConjugateLhs, bool ConjugateRhs, typename Scalar, typename Index, typename RhsType>
static void ei_cache_friendly_product_colmajor_times_vector(
Index size, const Scalar* lhs, Index lhsStride, const RhsType& rhs, Scalar* res, Scalar alpha);
template<typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
struct ei_general_matrix_vector_product;
template<bool ConjugateLhs, bool ConjugateRhs, typename Scalar, typename Index, typename ResType>
static void ei_cache_friendly_product_rowmajor_times_vector(
const Scalar* lhs, Index lhsStride, const Scalar* rhs, Index rhsSize, ResType& res, Scalar alpha);
// Provides scalar/packet-wise product and product with accumulation
// with optional conjugation of the arguments.
template<bool ConjLhs, bool ConjRhs> struct ei_conj_helper;
template<bool Conjugate> struct ei_conj_if;
template<> struct ei_conj_helper<false,false>
{
template<> struct ei_conj_if<true> {
template<typename T>
EIGEN_STRONG_INLINE T pmadd(const T& x, const T& y, const T& c) const { return ei_pmadd(x,y,c); }
inline T operator()(const T& x) { return ei_conj(x); }
};
template<> struct ei_conj_if<false> {
template<typename T>
EIGEN_STRONG_INLINE T pmul(const T& x, const T& y) const { return ei_pmul(x,y); }
inline const T& operator()(const T& x) { return x; }
};
template<> struct ei_conj_helper<false,true>
template<typename Scalar> struct ei_conj_helper<Scalar,Scalar,false,false>
{
template<typename T> std::complex<T>
pmadd(const std::complex<T>& x, const std::complex<T>& y, const std::complex<T>& c) const
{ return c + pmul(x,y); }
template<typename T> std::complex<T> pmul(const std::complex<T>& x, const std::complex<T>& y) const
{ return std::complex<T>(ei_real(x)*ei_real(y) + ei_imag(x)*ei_imag(y), ei_imag(x)*ei_real(y) - ei_real(x)*ei_imag(y)); }
EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const { return ei_pmadd(x,y,c); }
EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const { return ei_pmul(x,y); }
};
template<> struct ei_conj_helper<true,false>
template<typename RealScalar> struct ei_conj_helper<std::complex<RealScalar>, std::complex<RealScalar>, false,true>
{
template<typename T> std::complex<T>
pmadd(const std::complex<T>& x, const std::complex<T>& y, const std::complex<T>& c) const
typedef std::complex<RealScalar> Scalar;
EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const
{ return c + pmul(x,y); }
template<typename T> std::complex<T> pmul(const std::complex<T>& x, const std::complex<T>& y) const
{ return std::complex<T>(ei_real(x)*ei_real(y) + ei_imag(x)*ei_imag(y), ei_real(x)*ei_imag(y) - ei_imag(x)*ei_real(y)); }
EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const
{ return Scalar(ei_real(x)*ei_real(y) + ei_imag(x)*ei_imag(y), ei_imag(x)*ei_real(y) - ei_real(x)*ei_imag(y)); }
};
template<> struct ei_conj_helper<true,true>
template<typename RealScalar> struct ei_conj_helper<std::complex<RealScalar>, std::complex<RealScalar>, true,false>
{
template<typename T> std::complex<T>
pmadd(const std::complex<T>& x, const std::complex<T>& y, const std::complex<T>& c) const
typedef std::complex<RealScalar> Scalar;
EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const
{ return c + pmul(x,y); }
template<typename T> std::complex<T> pmul(const std::complex<T>& x, const std::complex<T>& y) const
{ return std::complex<T>(ei_real(x)*ei_real(y) - ei_imag(x)*ei_imag(y), - ei_real(x)*ei_imag(y) - ei_imag(x)*ei_real(y)); }
EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const
{ return Scalar(ei_real(x)*ei_real(y) + ei_imag(x)*ei_imag(y), ei_real(x)*ei_imag(y) - ei_imag(x)*ei_real(y)); }
};
template<typename RealScalar> struct ei_conj_helper<std::complex<RealScalar>, std::complex<RealScalar>, true,true>
{
typedef std::complex<RealScalar> Scalar;
EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const
{ return c + pmul(x,y); }
EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const
{ return Scalar(ei_real(x)*ei_real(y) - ei_imag(x)*ei_imag(y), - ei_real(x)*ei_imag(y) - ei_imag(x)*ei_real(y)); }
};
template<typename RealScalar,bool Conj> struct ei_conj_helper<std::complex<RealScalar>, RealScalar, Conj,false>
{
typedef std::complex<RealScalar> Scalar;
EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const RealScalar& y, const Scalar& c) const
{ return ei_padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const RealScalar& y) const
{ return ei_conj_if<Conj>()(x)*y; }
};
template<typename RealScalar,bool Conj> struct ei_conj_helper<RealScalar, std::complex<RealScalar>, false,Conj>
{
typedef std::complex<RealScalar> Scalar;
EIGEN_STRONG_INLINE Scalar pmadd(const RealScalar& x, const Scalar& y, const Scalar& c) const
{ return ei_padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Scalar pmul(const RealScalar& x, const Scalar& y) const
{ return x*ei_conj_if<Conj>()(y); }
};
template<typename From,typename To> struct ei_get_factor {
EIGEN_STRONG_INLINE static To run(const From& x) { return x; }
};
template<typename Scalar> struct ei_get_factor<Scalar,typename NumTraits<Scalar>::Real> {
EIGEN_STRONG_INLINE static typename NumTraits<Scalar>::Real run(const Scalar& x) { return ei_real(x); }
};
// Lightweight helper class to access matrix coefficients.
@@ -123,22 +151,6 @@ class ei_const_blas_data_mapper
Index m_stride;
};
// Defines various constant controlling register blocking for matrix-matrix algorithms.
template<typename Scalar>
struct ei_product_blocking_traits
{
typedef typename ei_packet_traits<Scalar>::type PacketType;
enum {
PacketSize = sizeof(PacketType)/sizeof(Scalar),
NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
// register block size along the N direction (must be either 2 or 4)
nr = NumberOfRegisters/4,
// register block size along the M direction (currently, this one cannot be modified)
mr = 2 * PacketSize
};
};
/* Helper class to analyze the factors of a Product expression.
* In particular it allows to pop out operator-, scalar multiples,

View File

@@ -38,7 +38,7 @@ const int Dynamic = -1;
*/
const int Infinity = -1;
/** \defgroup flags flags
/** \defgroup flags Flags
* \ingroup Core_Module
*
* These are the possible bits which can be OR'ed to constitute the flags of a matrix or
@@ -139,6 +139,14 @@ const unsigned int DirectAccessBit = 0x20;
* means the first coefficient packet is guaranteed to be aligned */
const unsigned int AlignedBit = 0x40;
/** \ingroup flags
*
* Means the expression is writable. Note that DirectAccessBit implies LvalueBit.
* Internaly, it is mainly used to enable the writable coeff accessors, and makes
* the read-only coeff accessors to return by const reference.
*/
const unsigned int LvalueBit = 0x80;
const unsigned int NestByRefBit = 0x100;
// list of flags that are inherited by default
@@ -176,7 +184,9 @@ enum {
LinearVectorizedTraversal,
/** \internal Generic vectorization path using one vectorized loop per row/column with some
* scalar loops to handle the unaligned boundaries */
SliceVectorizedTraversal
SliceVectorizedTraversal,
/** \internal Special case to properly handle incompatible scalar types or other defecting cases*/
InvalidTraversal
};
enum {
@@ -199,15 +209,6 @@ enum {
OnTheRight = 2
};
// options for SVD decomposition
enum {
SkipU = 0x1,
SkipV = 0x2,
AtLeastAsManyRowsAsCols = 0x4,
AtLeastAsManyColsAsRows = 0x8,
Square = AtLeastAsManyRowsAsCols | AtLeastAsManyColsAsRows
};
/* the following could as well be written:
* enum NoChange_t { NoChange };
* but it feels dangerous to disambiguate overloaded functions on enum/integer types.
@@ -234,13 +235,19 @@ enum {
IsSparse
};
enum AccessorLevels {
ReadOnlyAccessors, WriteAccessors, DirectAccessors
};
enum DecompositionOptions {
Pivoting = 0x01, // LDLT,
NoPivoting = 0x02, // LDLT,
ComputeU = 0x10, // SVD,
ComputeR = 0x20, // SVD,
ComputeFullU = 0x04, // SVD,
ComputeThinU = 0x08, // SVD,
ComputeFullV = 0x10, // SVD,
ComputeThinV = 0x20, // SVD,
EigenvaluesOnly = 0x40, // all eigen solvers
ComputeEigenvectors = 0x80, // all eigen solvers
ComputeEigenvectors = 0x80, // all eigen solvers
EigVecMask = EigenvaluesOnly | ComputeEigenvectors,
Ax_lBx = 0x100,
ABx_lx = 0x200,
@@ -248,6 +255,13 @@ enum DecompositionOptions {
GenEigMask = Ax_lBx | ABx_lx | BAx_lx
};
enum QRPreconditioners {
NoQRPreconditioner,
HouseholderQRPreconditioner,
ColPivHouseholderQRPreconditioner,
FullPivHouseholderQRPreconditioner
};
/** \brief Enum for reporting the status of a computation.
*/
enum ComputationInfo {

View File

@@ -36,7 +36,10 @@ template<typename Derived> struct ei_has_direct_access
template<typename Derived> struct EigenBase;
template<typename Derived> class DenseBase;
template<typename Derived, bool EnableDirectAccessAPI = ei_has_direct_access<Derived>::ret>
template<typename Derived,
AccessorLevels Level = (ei_traits<Derived>::Flags & DirectAccessBit) ? DirectAccessors
: (ei_traits<Derived>::Flags & LvalueBit) ? WriteAccessors
: ReadOnlyAccessors>
class DenseCoeffsBase;
template<typename _Scalar, int _Rows, int _Cols,
@@ -57,7 +60,7 @@ template<typename ExpressionType> class NestByValue;
template<typename ExpressionType> class ForceAlignedAccess;
template<typename ExpressionType> class SwapWrapper;
template<typename XprType, int BlockRows=Dynamic, int BlockCols=Dynamic,
template<typename XprType, int BlockRows=Dynamic, int BlockCols=Dynamic, bool InnerPanel = false,
bool HasDirectAccess = ei_has_direct_access<XprType>::ret> class Block;
template<typename MatrixType, int Size=Dynamic> class VectorBlock;
@@ -67,7 +70,7 @@ template<typename NullaryOp, typename MatrixType> class CwiseNullaryOp;
template<typename UnaryOp, typename MatrixType> class CwiseUnaryOp;
template<typename ViewOp, typename MatrixType> class CwiseUnaryView;
template<typename BinaryOp, typename Lhs, typename Rhs> class CwiseBinaryOp;
template<typename BinOp, typename MatrixType> class SelfCwiseBinaryOp;
template<typename BinOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp;
template<typename Derived, typename Lhs, typename Rhs> class ProductBase;
template<typename Lhs, typename Rhs, int Mode> class GeneralProduct;
template<typename Lhs, typename Rhs, int NestingFlags> class CoeffBasedProduct;
@@ -106,9 +109,16 @@ template<typename Lhs, typename Rhs,
int ProductType = ei_product_type<Lhs,Rhs>::value>
struct ProductReturnType;
// this is a workaround for sun CC
template<typename Lhs, typename Rhs> struct LazyProductReturnType;
// Provides scalar/packet-wise product and product with accumulation
// with optional conjugation of the arguments.
template<typename LhsScalar, typename RhsScalar, bool ConjLhs=false, bool ConjRhs=false> struct ei_conj_helper;
template<typename Scalar> struct ei_scalar_sum_op;
template<typename Scalar> struct ei_scalar_difference_op;
template<typename Scalar> struct ei_scalar_product_op;
template<typename Scalar> struct ei_scalar_conj_product_op;
template<typename Scalar> struct ei_scalar_quotient_op;
template<typename Scalar> struct ei_scalar_opposite_op;
template<typename Scalar> struct ei_scalar_conjugate_op;
@@ -135,7 +145,8 @@ template<typename Scalar> struct ei_scalar_add_op;
template<typename Scalar> struct ei_scalar_constant_op;
template<typename Scalar> struct ei_scalar_identity_op;
template<typename Scalar1,typename Scalar2> struct ei_scalar_multiple2_op;
template<typename LhsScalar,typename RhsScalar=LhsScalar> struct ei_scalar_product_op;
template<typename LhsScalar,typename RhsScalar> struct ei_scalar_multiple2_op;
struct IOFormat;
@@ -158,8 +169,7 @@ template<typename MatrixType> struct ei_inverse_impl;
template<typename MatrixType> class HouseholderQR;
template<typename MatrixType> class ColPivHouseholderQR;
template<typename MatrixType> class FullPivHouseholderQR;
template<typename MatrixType> class SVD;
template<typename MatrixType, unsigned int Options = 0> class JacobiSVD;
template<typename MatrixType, int QRPreconditioner = ColPivHouseholderQRPreconditioner> class JacobiSVD;
template<typename MatrixType, int UpLo = Lower> class LLT;
template<typename MatrixType, int UpLo = Lower> class LDLT;
template<typename VectorsType, typename CoeffsType, int Side=OnTheLeft> class HouseholderSequence;
@@ -172,7 +182,7 @@ template<typename Derived> class QuaternionBase;
template<typename Scalar> class Quaternion;
template<typename Scalar> class Rotation2D;
template<typename Scalar> class AngleAxis;
template<typename Scalar,int Dim,int Mode=Affine> class Transform;
template<typename Scalar,int Dim,int Mode> class Transform;
template <typename _Scalar, int _AmbientDim> class ParametrizedLine;
template <typename _Scalar, int _AmbientDim> class Hyperplane;
template<typename Scalar,int Dim> class Translation;

View File

@@ -27,7 +27,7 @@
#define EIGEN_MACROS_H
#define EIGEN_WORLD_VERSION 2
#define EIGEN_MAJOR_VERSION 91
#define EIGEN_MAJOR_VERSION 92
#define EIGEN_MINOR_VERSION 0
#define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
@@ -109,6 +109,19 @@
#define EIGEN_DEBUG_VAR(x) std::cerr << #x << " = " << x << std::endl;
#ifdef EIGEN_PARSED_BY_DOXYGEN
/** \def EIGEN_NO_DEBUG
* \ingroup Core_Module
* \brief If defined, Eigen's assertions are disabled.
* \details Disabling run-time assertions improves the performance, but it is dangerous because the
* assertions guard against programming errors. By default, the EIGEN_NO_DEBUG macro is not defined and
* Eigen's run-time assertions are thus enabled. However, if the NDEBUG macro is defined (this is a
* standard C++ macro which disables all asserts), then the EIGEN_NO_DEBUG macro will also be defined, and
* so Eigen's assertions will also be disabled.
*/
#define EIGEN_NO_DEBUG
#endif
#ifdef NDEBUG
# ifndef EIGEN_NO_DEBUG
# define EIGEN_NO_DEBUG
@@ -147,6 +160,12 @@
#define EIGEN_ALWAYS_INLINE_ATTRIB
#endif
#if EIGEN_GNUC_AT_LEAST(4,1)
#define EIGEN_FLATTEN_ATTRIB __attribute__((flatten))
#else
#define EIGEN_FLATTEN_ATTRIB
#endif
// EIGEN_FORCE_INLINE means "inline as much as possible"
#if (defined _MSC_VER) || (defined __intel_compiler)
#define EIGEN_STRONG_INLINE __forceinline
@@ -343,7 +362,7 @@
#define EIGEN_MAKE_CWISE_BINARY_OP(METHOD,FUNCTOR) \
template<typename OtherDerived> \
inline const CwiseBinaryOp<FUNCTOR<Scalar>, Derived, OtherDerived> \
EIGEN_STRONG_INLINE const CwiseBinaryOp<FUNCTOR<Scalar>, Derived, OtherDerived> \
METHOD(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const \
{ \
return CwiseBinaryOp<FUNCTOR<Scalar>, Derived, OtherDerived>(derived(), other.derived()); \
@@ -353,10 +372,8 @@
#define EIGEN_CWISE_PRODUCT_RETURN_TYPE(LHS,RHS) \
CwiseBinaryOp< \
ei_scalar_product_op< \
typename ei_scalar_product_traits< \
typename ei_traits<LHS>::Scalar, \
typename ei_traits<RHS>::Scalar \
>::ReturnType \
>, \
LHS, \
RHS \

View File

@@ -146,7 +146,9 @@ inline void* ei_generic_aligned_realloc(void* ptr, size_t size, size_t old_size)
void* newptr = ei_aligned_malloc(size);
if (newptr == 0)
{
#ifdef EIGEN_HAS_ERRNO
errno = ENOMEM; // according to the standard
#endif
return 0;
}
@@ -315,7 +317,8 @@ template<typename T> inline T* ei_construct_elements_of_array(T *ptr, size_t siz
template<typename T> inline void ei_destruct_elements_of_array(T *ptr, size_t size)
{
// always destruct an array starting from the end.
while(size) ptr[--size].~T();
if(ptr)
while(size) ptr[--size].~T();
}
/*****************************************************************************
@@ -495,6 +498,7 @@ inline static Index ei_first_aligned(const Scalar* array, Index size)
/****************************************************************************/
/** \class aligned_allocator
* \ingroup Core_Module
*
* \brief STL compatible allocator to use with with 16 byte aligned types
*
@@ -588,17 +592,17 @@ public:
//---------- Cache sizes ----------
#if defined(__GNUC__)
# if defined(__PIC__) && defined(__i386__)
# if defined(__PIC__) && defined(__i386__)
# define EIGEN_CPUID(abcd,func,id) \
__asm__ __volatile__ ("xchgl %%ebx, %%esi;cpuid; xchgl %%ebx,%%esi": "=a" (abcd[0]), "=S" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id));
# elif !defined(__arm__) && !defined(__powerpc__)
# elif !defined(__arm__) && !defined(__powerpc__)
# define EIGEN_CPUID(abcd,func,id) \
__asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id) );
# endif
# endif
#elif defined(_MSC_VER)
#if (_MSC_VER > 1500) /* newer than MSVC++ 9.0 */ || (_MSC_VER == 1500 && _MSC_FULL_VER >= 150030729) /* MSVC++ 9.0 with SP1*/
# if (_MSC_VER > 1500) /* newer than MSVC++ 9.0 */ || (_MSC_VER == 1500 && _MSC_FULL_VER >= 150030729) /* MSVC++ 9.0 with SP1*/
# define EIGEN_CPUID(abcd,func,id) __cpuidex((int*)abcd,func,id)
#endif
# endif
#endif
#ifdef EIGEN_CPUID

View File

@@ -205,10 +205,10 @@ template<typename T> struct ei_scalar_product_traits<std::complex<T>, T>
};
// FIXME quick workaround around current limitation of ei_result_of
template<typename Scalar, typename ArgType0, typename ArgType1>
struct ei_result_of<ei_scalar_product_op<Scalar>(ArgType0,ArgType1)> {
typedef typename ei_scalar_product_traits<typename ei_cleantype<ArgType0>::type, typename ei_cleantype<ArgType1>::type>::ReturnType type;
};
// template<typename Scalar, typename ArgType0, typename ArgType1>
// struct ei_result_of<ei_scalar_product_op<Scalar>(ArgType0,ArgType1)> {
// typedef typename ei_scalar_product_traits<typename ei_cleantype<ArgType0>::type, typename ei_cleantype<ArgType1>::type>::ReturnType type;
// };
template<typename T> struct ei_is_diagonal
{ enum { ret = false }; };
@@ -222,16 +222,4 @@ template<typename T> struct ei_is_diagonal<DiagonalWrapper<T> >
template<typename T, int S> struct ei_is_diagonal<DiagonalMatrix<T,S> >
{ enum { ret = true }; };
template<bool Conjugate> struct ei_conj_if;
template<> struct ei_conj_if<true> {
template<typename T>
inline T operator()(const T& x) { return ei_conj(x); }
};
template<> struct ei_conj_if<false> {
template<typename T>
inline const T& operator()(const T& x) { return x; }
};
#endif // EIGEN_META_H

View File

@@ -60,7 +60,9 @@
YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES,
THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE,
THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE,
THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE,
YOU_MADE_A_PROGRAMMING_MISTAKE,
EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT,
EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE,
YOU_CALLED_A_FIXED_SIZE_METHOD_ON_A_DYNAMIC_SIZE_MATRIX_OR_VECTOR,
YOU_CALLED_A_DYNAMIC_SIZE_METHOD_ON_A_FIXED_SIZE_MATRIX_OR_VECTOR,
@@ -85,7 +87,10 @@
YOU_ALREADY_SPECIFIED_THIS_STRIDE,
INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION,
THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD,
PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1
PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1,
THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS,
YOU_CANNOT_MIX_ARRAYS_AND_MATRICES,
YOU_PERFORMED_AN_INVALID_TRANSFORMATION_CONVERSION
};
};
@@ -95,12 +100,12 @@
#ifdef _MSC_VER
#define EIGEN_STATIC_ASSERT(CONDITION,MSG) \
{Eigen::ei_static_assert<CONDITION ? true : false>::MSG;}
{Eigen::ei_static_assert<(CONDITION)>::MSG;}
#else
#define EIGEN_STATIC_ASSERT(CONDITION,MSG) \
if (Eigen::ei_static_assert<CONDITION ? true : false>::MSG) {}
if (Eigen::ei_static_assert<(CONDITION)>::MSG) {}
#endif

View File

@@ -119,21 +119,11 @@ class ei_compute_matrix_flags
enum {
row_major_bit = Options&RowMajor ? RowMajorBit : 0,
is_dynamic_size_storage = MaxRows==Dynamic || MaxCols==Dynamic,
#if EIGEN_ALIGN_STATICALLY
is_fixed_size_aligned
= (!is_dynamic_size_storage) && (((MaxCols*MaxRows) % ei_packet_traits<Scalar>::size) == 0),
#else
is_fixed_size_aligned = 0,
#endif
#if EIGEN_ALIGN
is_dynamic_size_aligned = is_dynamic_size_storage,
#else
is_dynamic_size_aligned = 0,
#endif
aligned_bit =
(
((Options&DontAlign)==0)
((Options&DontAlign)==0)
&& ei_packet_traits<Scalar>::Vectorizable
&& (
#if EIGEN_ALIGN_STATICALLY
((!is_dynamic_size_storage) && (((MaxCols*MaxRows) % ei_packet_traits<Scalar>::size) == 0))
@@ -151,11 +141,11 @@ class ei_compute_matrix_flags
)
) ? AlignedBit : 0,
packet_access_bit = ei_packet_traits<Scalar>::size > 1 && aligned_bit ? PacketAccessBit : 0
packet_access_bit = ei_packet_traits<Scalar>::Vectorizable && aligned_bit ? PacketAccessBit : 0
};
public:
enum { ret = LinearAccessBit | DirectAccessBit | NestByRefBit | packet_access_bit | row_major_bit | aligned_bit };
enum { ret = LinearAccessBit | LvalueBit | DirectAccessBit | NestByRefBit | packet_access_bit | row_major_bit | aligned_bit };
};
template<int _Rows, int _Cols> struct ei_size_at_compile_time
@@ -355,7 +345,7 @@ template<typename T, int n=1, typename PlainObject = typename ei_eval<T>::type>
template<unsigned int Flags> struct ei_are_flags_consistent
{
enum { ret = true };
enum { ret = EIGEN_IMPLIES(bool(Flags&DirectAccessBit), bool(Flags&LvalueBit)) };
};
template<typename Derived, typename XprKind = typename ei_traits<Derived>::XprKind>

View File

@@ -0,0 +1,6 @@
FILE(GLOB Eigen_Eigen2Support_SRCS "*.h")
INSTALL(FILES
${Eigen_Eigen2Support_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Eigen2Support COMPONENT Devel
)

View File

@@ -291,8 +291,8 @@ void ComplexEigenSolver<MatrixType>::doComputeEigenvectors(RealScalar matrixnorm
ComplexScalar z = m_schur.matrixT().coeff(i,i) - m_schur.matrixT().coeff(k,k);
if(z==ComplexScalar(0))
{
// If the i-th and k-th eigenvalue are equal, then z equals 0.
// Use a small value instead, to prevent division by zero.
// If the i-th and k-th eigenvalue are equal, then z equals 0.
// Use a small value instead, to prevent division by zero.
ei_real_ref(z) = NumTraits<RealScalar>::epsilon() * matrixnorm;
}
m_matX.coeffRef(i,k) = m_matX.coeff(i,k) / z;

View File

@@ -130,7 +130,7 @@ template<typename _MatrixType> class HessenbergDecomposition
{
if(matrix.rows()<2)
{
m_isInitialized = true;
m_isInitialized = true;
return;
}
m_hCoeffs.resize(matrix.rows()-1,1);
@@ -160,7 +160,7 @@ template<typename _MatrixType> class HessenbergDecomposition
m_matrix = matrix;
if(matrix.rows()<2)
{
m_isInitialized = true;
m_isInitialized = true;
return *this;
}
m_hCoeffs.resize(matrix.rows()-1,1);
@@ -360,7 +360,7 @@ template<typename MatrixType> struct HessenbergDecompositionMatrixHReturnType
result = m_hess.packedMatrix();
Index n = result.rows();
if (n>2)
result.bottomLeftCorner(n-2, n-2).template triangularView<Lower>().setZero();
result.bottomLeftCorner(n-2, n-2).template triangularView<Lower>().setZero();
}
Index rows() const { return m_hess.packedMatrix().rows(); }

View File

@@ -384,7 +384,9 @@ void ei_tridiagonalization_inplace(MatrixType& matA, CoeffVectorType& hCoeffs)
}
// forward declaration, implementation at the end of this file
template<typename MatrixType, int Size=MatrixType::ColsAtCompileTime>
template<typename MatrixType,
int Size=MatrixType::ColsAtCompileTime,
bool IsComplex=NumTraits<typename MatrixType::Scalar>::IsComplex>
struct ei_tridiagonalization_inplace_selector;
/** \brief Performs a full tridiagonalization in place
@@ -431,15 +433,15 @@ template<typename MatrixType, typename DiagonalType, typename SubDiagonalType>
void ei_tridiagonalization_inplace(MatrixType& mat, DiagonalType& diag, SubDiagonalType& subdiag, bool extractQ)
{
typedef typename MatrixType::Index Index;
Index n = mat.rows();
ei_assert(mat.cols()==n && diag.size()==n && subdiag.size()==n-1);
//Index n = mat.rows();
ei_assert(mat.cols()==mat.rows() && diag.size()==mat.rows() && subdiag.size()==mat.rows()-1);
ei_tridiagonalization_inplace_selector<MatrixType>::run(mat, diag, subdiag, extractQ);
}
/** \internal
* General full tridiagonalization
*/
template<typename MatrixType, int Size>
template<typename MatrixType, int Size, bool IsComplex>
struct ei_tridiagonalization_inplace_selector
{
typedef typename Tridiagonalization<MatrixType>::CoeffVectorType CoeffVectorType;
@@ -458,11 +460,11 @@ struct ei_tridiagonalization_inplace_selector
};
/** \internal
* Specialization for 3x3 matrices.
* Specialization for 3x3 real matrices.
* Especially useful for plane fitting.
*/
template<typename MatrixType>
struct ei_tridiagonalization_inplace_selector<MatrixType,3>
struct ei_tridiagonalization_inplace_selector<MatrixType,3,false>
{
typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::RealScalar RealScalar;
@@ -470,14 +472,14 @@ struct ei_tridiagonalization_inplace_selector<MatrixType,3>
template<typename DiagonalType, typename SubDiagonalType>
static void run(MatrixType& mat, DiagonalType& diag, SubDiagonalType& subdiag, bool extractQ)
{
diag[0] = ei_real(mat(0,0));
diag[0] = mat(0,0);
RealScalar v1norm2 = ei_abs2(mat(2,0));
if (ei_isMuchSmallerThan(v1norm2, RealScalar(1)))
if(v1norm2 == RealScalar(0))
{
diag[1] = ei_real(mat(1,1));
diag[2] = ei_real(mat(2,2));
subdiag[0] = ei_real(mat(1,0));
subdiag[1] = ei_real(mat(2,1));
diag[1] = mat(1,1);
diag[2] = mat(2,2);
subdiag[0] = mat(1,0);
subdiag[1] = mat(2,1);
if (extractQ)
mat.setIdentity();
}
@@ -485,18 +487,18 @@ struct ei_tridiagonalization_inplace_selector<MatrixType,3>
{
RealScalar beta = ei_sqrt(ei_abs2(mat(1,0)) + v1norm2);
RealScalar invBeta = RealScalar(1)/beta;
Scalar m01 = ei_conj(mat(1,0)) * invBeta;
Scalar m02 = ei_conj(mat(2,0)) * invBeta;
Scalar q = RealScalar(2)*m01*ei_conj(mat(2,1)) + m02*(mat(2,2) - mat(1,1));
diag[1] = ei_real(mat(1,1) + m02*q);
diag[2] = ei_real(mat(2,2) - m02*q);
Scalar m01 = mat(1,0) * invBeta;
Scalar m02 = mat(2,0) * invBeta;
Scalar q = RealScalar(2)*m01*mat(2,1) + m02*(mat(2,2) - mat(1,1));
diag[1] = mat(1,1) + m02*q;
diag[2] = mat(2,2) - m02*q;
subdiag[0] = beta;
subdiag[1] = ei_real(ei_conj(mat(2,1)) - m01 * q);
subdiag[1] = mat(2,1) - m01 * q;
if (extractQ)
{
mat << 1, 0, 0,
0, m01, m02,
0, m02, -m01;
0, m01, m02,
0, m02, -m01;
}
}
}
@@ -505,8 +507,8 @@ struct ei_tridiagonalization_inplace_selector<MatrixType,3>
/** \internal
* Trivial specialization for 1x1 matrices
*/
template<typename MatrixType>
struct ei_tridiagonalization_inplace_selector<MatrixType,1>
template<typename MatrixType, bool IsComplex>
struct ei_tridiagonalization_inplace_selector<MatrixType,1,IsComplex>
{
typedef typename MatrixType::Scalar Scalar;

View File

@@ -142,6 +142,8 @@ public:
m_angle = Scalar(other.angle());
}
inline static const AngleAxis Identity() { return AngleAxis(0, Vector3::UnitX()); }
/** \returns \c true if \c *this is approximately equal to \a other, within the precision
* determined by \a prec.
*

View File

@@ -135,7 +135,7 @@ template<typename MatrixType,int _Direction> class Homogeneous
*
* \return an expression of the equivalent homogeneous vector
*
* \vectoronly
* \only_for_vectors
*
* Example: \include MatrixBase_homogeneous.cpp
* Output: \verbinclude MatrixBase_homogeneous.out
@@ -143,7 +143,7 @@ template<typename MatrixType,int _Direction> class Homogeneous
* \sa class Homogeneous
*/
template<typename Derived>
inline const typename MatrixBase<Derived>::HomogeneousReturnType
inline typename MatrixBase<Derived>::HomogeneousReturnType
MatrixBase<Derived>::homogeneous() const
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);
@@ -159,7 +159,7 @@ MatrixBase<Derived>::homogeneous() const
*
* \sa MatrixBase::homogeneous() */
template<typename ExpressionType, int Direction>
inline const Homogeneous<ExpressionType,Direction>
inline Homogeneous<ExpressionType,Direction>
VectorwiseOp<ExpressionType,Direction>::homogeneous() const
{
return _expression();
@@ -174,7 +174,7 @@ VectorwiseOp<ExpressionType,Direction>::homogeneous() const
*
* \sa VectorwiseOp::hnormalized() */
template<typename Derived>
inline const typename MatrixBase<Derived>::HNormalizedReturnType
inline typename MatrixBase<Derived>::HNormalizedReturnType
MatrixBase<Derived>::hnormalized() const
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);
@@ -192,7 +192,7 @@ MatrixBase<Derived>::hnormalized() const
*
* \sa MatrixBase::hnormalized() */
template<typename ExpressionType, int Direction>
inline const typename VectorwiseOp<ExpressionType,Direction>::HNormalizedReturnType
inline typename VectorwiseOp<ExpressionType,Direction>::HNormalizedReturnType
VectorwiseOp<ExpressionType,Direction>::hnormalized() const
{
return HNormalized_Block(_expression(),0,0,

View File

@@ -228,7 +228,7 @@ public:
* or a more generic Affine transformation. The default is Affine.
* Other kind of transformations are not supported.
*/
inline Hyperplane& transform(const Transform<Scalar,AmbientDimAtCompileTime>& t,
inline Hyperplane& transform(const Transform<Scalar,AmbientDimAtCompileTime,Affine>& t,
TransformTraits traits = Affine)
{
transform(t.linear(), traits);

View File

@@ -54,7 +54,7 @@ MatrixBase<Derived>::cross(const MatrixBase<OtherDerived>& other) const
template< int Arch,typename VectorLhs,typename VectorRhs,
typename Scalar = typename VectorLhs::Scalar,
int Vectorizable = (VectorLhs::Flags&VectorRhs::Flags)&PacketAccessBit>
bool Vectorizable = (VectorLhs::Flags&VectorRhs::Flags)&PacketAccessBit>
struct ei_cross3_impl {
inline static typename ei_plain_matrix_type<VectorLhs>::type
run(const VectorLhs& lhs, const VectorRhs& rhs)

View File

@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2009 Mathieu Gautier <mathieu.gautier@cea.fr>
//
// Eigen is free software; you can redistribute it and/or
@@ -180,6 +180,10 @@ public:
return typename ei_cast_return_type<Derived,Quaternion<NewScalarType> >::type(
coeffs().template cast<NewScalarType>());
}
#ifdef EIGEN_QUATERNIONBASE_PLUGIN
# include EIGEN_QUATERNIONBASE_PLUGIN
#endif
};
/***************************************************************************
@@ -277,19 +281,6 @@ typedef Quaternion<double> Quaterniond;
* Specialization of Map<Quaternion<Scalar>>
***************************************************************************/
/** \class Map<Quaternion>
*
*
* \brief Expression of a quaternion from a memory buffer
*
* \param _Scalar the type of the Quaternion coefficients
* \param PacketAccess see class Map
*
* This is a specialization of class Map for Quaternion. This class allows to view
* a 4 scalar memory buffer as an Eigen's Quaternion object.
*
* \sa class Map, class Quaternion, class QuaternionBase
*/
template<typename _Scalar, int _PacketAccess>
struct ei_traits<Map<Quaternion<_Scalar>, _PacketAccess> >:
ei_traits<Quaternion<_Scalar> >
@@ -301,6 +292,16 @@ ei_traits<Quaternion<_Scalar> >
};
};
/** \brief Expression of a quaternion from a memory buffer
*
* \param _Scalar the type of the Quaternion coefficients
* \param PacketAccess see class Map
*
* This is a specialization of class Map for Quaternion. This class allows to view
* a 4 scalar memory buffer as an Eigen's Quaternion object.
*
* \sa class Map, class Quaternion, class QuaternionBase
*/
template<typename _Scalar, int PacketAccess>
class Map<Quaternion<_Scalar>, PacketAccess >
: public QuaternionBase<Map<Quaternion<_Scalar>, PacketAccess> >
@@ -398,7 +399,8 @@ QuaternionBase<Derived>::_transformVector(Vector3 v) const
// It appears to be much faster than the common algorithm found
// in the litterature (30 versus 39 flops). It also requires two
// Vector3 as temporaries.
Vector3 uv = Scalar(2) * this->vec().cross(v);
Vector3 uv = this->vec().cross(v);
uv += uv;
return v + this->w() * uv + this->vec().cross(uv);
}
@@ -513,7 +515,7 @@ inline Derived& QuaternionBase<Derived>::setFromTwoVectors(const MatrixBase<Deri
{
c = std::max<Scalar>(c,-1);
Matrix<Scalar,2,3> m; m << v0.transpose(), v1.transpose();
JacobiSVD<Matrix<Scalar,2,3> > svd(m);
JacobiSVD<Matrix<Scalar,2,3> > svd(m, ComputeFullV);
Vector3 axis = svd.matrixV().col(2);
Scalar w2 = (Scalar(1)+c)*Scalar(0.5);

View File

@@ -117,6 +117,8 @@ public:
m_angle = Scalar(other.angle());
}
inline static Rotation2D Identity() { return Rotation2D(0); }
/** \returns \c true if \c *this is approximately equal to \a other, within the precision
* determined by \a prec.
*

View File

@@ -55,12 +55,17 @@ class RotationBase
/** \returns an equivalent rotation matrix */
inline RotationMatrixType toRotationMatrix() const { return derived().toRotationMatrix(); }
/** \returns an equivalent rotation matrix
* This function is added to be conform with the Transform class' naming scheme.
*/
inline RotationMatrixType matrix() const { return derived().toRotationMatrix(); }
/** \returns the inverse rotation */
inline Derived inverse() const { return derived().inverse(); }
/** \returns the concatenation of the rotation \c *this with a translation \a t */
inline Transform<Scalar,Dim> operator*(const Translation<Scalar,Dim>& t) const
{ return toRotationMatrix() * t; }
inline Transform<Scalar,Dim,Isometry> operator*(const Translation<Scalar,Dim>& t) const
{ return Transform<Scalar,Dim,Isometry>(*this) * t; }
/** \returns the concatenation of the rotation \c *this with a uniform scaling \a s */
inline RotationMatrixType operator*(const UniformScaling<Scalar>& s) const
@@ -82,6 +87,14 @@ class RotationBase
inline RotationMatrixType operator*(const EigenBase<OtherDerived>& l, const Derived& r)
{ return l.derived() * r.toRotationMatrix(); }
/** \returns the concatenation of a scaling \a l with the rotation \a r */
friend inline Transform<Scalar,Dim,Affine> operator*(const DiagonalMatrix<Scalar,Dim>& l, const Derived& r)
{
Transform<Scalar,Dim,Affine> res(r);
res.linear().applyOnTheLeft(l);
return res;
}
/** \returns the concatenation of the rotation \c *this with a transformation \a t */
template<int Mode>
inline Transform<Scalar,Dim,Mode> operator*(const Transform<Scalar,Dim,Mode>& t) const
@@ -102,6 +115,18 @@ struct ei_rotation_base_generic_product_selector<RotationDerived,MatrixType,fals
{ return r.toRotationMatrix() * m; }
};
template<typename RotationDerived, typename Scalar, int Dim, int MaxDim>
struct ei_rotation_base_generic_product_selector< RotationDerived, DiagonalMatrix<Scalar,Dim,MaxDim>, false >
{
typedef Transform<Scalar,Dim,Affine> ReturnType;
inline static ReturnType run(const RotationDerived& r, const DiagonalMatrix<Scalar,Dim,MaxDim>& m)
{
ReturnType res(r);
res.linear() *= m;
return res;
}
};
template<typename RotationDerived,typename OtherVectorType>
struct ei_rotation_base_generic_product_selector<RotationDerived,OtherVectorType,true>
{

View File

@@ -69,7 +69,7 @@ public:
/** Concatenates a uniform scaling and a translation */
template<int Dim>
inline Transform<Scalar,Dim> operator* (const Translation<Scalar,Dim>& t) const;
inline Transform<Scalar,Dim,Affine> operator* (const Translation<Scalar,Dim>& t) const;
/** Concatenates a uniform scaling and an affine transformation */
template<int Dim, int Mode>
@@ -115,7 +115,7 @@ public:
/** Concatenates a linear transformation matrix and a uniform scaling */
// NOTE this operator is defiend in MatrixBase and not as a friend function
// of UniformScaling to fix an internal crash of Intel's ICC
template<typename Derived> const typename MatrixBase<Derived>::ScalarMultipleReturnType
template<typename Derived> typename MatrixBase<Derived>::ScalarMultipleReturnType
MatrixBase<Derived>::operator*(const UniformScaling<Scalar>& s) const
{ return derived() * s.factor(); }
@@ -158,10 +158,10 @@ typedef DiagonalMatrix<double,3> AlignedScaling3d;
template<typename Scalar>
template<int Dim>
inline Transform<Scalar,Dim>
inline Transform<Scalar,Dim,Affine>
UniformScaling<Scalar>::operator* (const Translation<Scalar,Dim>& t) const
{
Transform<Scalar,Dim> res;
Transform<Scalar,Dim,Affine> res;
res.matrix().setZero();
res.linear().diagonal().fill(factor());
res.translation() = factor() * t.vector();

View File

@@ -3,6 +3,7 @@
//
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
// Copyright (C) 2010 Hauke Heibel <hauke.heibel@gmail.com>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
@@ -26,18 +27,22 @@
#ifndef EIGEN_TRANSFORM_H
#define EIGEN_TRANSFORM_H
// Note that we have to pass Dim and HDim because it is not allowed to use a template
// parameter to define a template specialization. To be more precise, in the following
// specializations, it is not allowed to use Dim+1 instead of HDim.
template< typename Other,
int Mode,
int Dim,
int HDim,
int OtherRows=Other::RowsAtCompileTime,
int OtherCols=Other::ColsAtCompileTime>
struct ei_transform_right_product_impl;
template<typename Transform>
struct ei_transform_traits
{
enum
{
Dim = Transform::Dim,
HDim = Transform::HDim,
Mode = Transform::Mode,
IsProjective = (Mode==Projective)
};
};
template<typename TransformType> struct ei_transform_take_affine_part;
template< typename TransformType,
typename MatrixType,
bool IsProjective = ei_transform_traits<TransformType>::IsProjective>
struct ei_transform_right_product_impl;
template< typename Other,
int Mode,
@@ -47,7 +52,12 @@ template< typename Other,
int OtherCols=Other::ColsAtCompileTime>
struct ei_transform_left_product_impl;
template<typename Lhs,typename Rhs> struct ei_transform_transform_product_impl;
template< typename Lhs,
typename Rhs,
bool AnyProjective =
ei_transform_traits<Lhs>::IsProjective ||
ei_transform_traits<Lhs>::IsProjective>
struct ei_transform_transform_product_impl;
template< typename Other,
int Mode,
@@ -57,6 +67,8 @@ template< typename Other,
int OtherCols=Other::ColsAtCompileTime>
struct ei_transform_construct_from_matrix;
template<typename TransformType> struct ei_transform_take_affine_part;
/** \geometry_module \ingroup Geometry_Module
*
* \class Transform
@@ -71,7 +83,7 @@ struct ei_transform_construct_from_matrix;
* This is the default.
* - AffineCompact: the transformation is stored as a (Dim)x(Dim+1) matrix.
* - Projective: the transformation is stored as a (Dim+1)^2 matrix
* whithout any assumption.
* without any assumption.
*
* The homography is internally represented and stored by a matrix which
* is available through the matrix() method. To understand the behavior of
@@ -80,19 +92,19 @@ struct ei_transform_construct_from_matrix;
*
* \code v' = T * v \endcode
*
* Thefore, an affine transformation matrix M is shaped like this:
* Therefore, an affine transformation matrix M is shaped like this:
*
* \f$ \left( \begin{array}{cc}
* linear & translation\\
* 0 ... 0 & 1
* \end{array} \right) \f$
*
* Note that for a provective transformation the last row can be anything,
* and then the interpretation of different parts might be sighlty different.
* Note that for a projective transformation the last row can be anything,
* and then the interpretation of different parts might be sightly different.
*
* However, unlike a plain matrix, the Transform class provides many features
* simplifying both its assembly and usage. In particular, it can be composed
* with any other transformations (Transform,Trnaslation,RotationBase,Matrix)
* with any other transformations (Transform,Translation,RotationBase,Matrix)
* and can be directly used to transform implicit homogeneous vectors. All these
* operations are handled via the operator*. For the composition of transformations,
* its principle consists to first convert the right/left hand sides of the product
@@ -139,17 +151,17 @@ struct ei_transform_construct_from_matrix;
* 1 & ... & 1
* \end{array} \right) \f$
*
* The concatenation of a Tranform object with any kind of other transformation
* The concatenation of a Transform object with any kind of other transformation
* always returns a Transform object.
*
* A little execption to the "as pure matrix product" rule is the case of the
* A little exception to the "as pure matrix product" rule is the case of the
* transformation of non homogeneous vectors by an affine transformation. In
* that case the last matrix row can be ignored, and the product returns non
* homogeneous vectors.
*
* Since, for instance, a Dim x Dim matrix is interpreted as a linear transformation,
* it is not possible to directly transform Dim vectors stored in a Dim x Dim matrix.
* The solution is either to use a Dim x Dynamic matrix or explicitely request a
* The solution is either to use a Dim x Dynamic matrix or explicitly request a
* vector transformation by making the vector homogeneous:
* \code
* m' = T * m.colwise().homogeneous();
@@ -202,7 +214,7 @@ protected:
public:
/** Default constructor without initialization of the meaningfull coefficients.
/** Default constructor without initialization of the meaningful coefficients.
* If Mode==Affine, then the last row is set to [0 ... 0 1] */
inline Transform()
{
@@ -243,9 +255,41 @@ public:
template<int OtherMode>
inline Transform(const Transform<Scalar,Dim,OtherMode>& other)
{
ei_assert(OtherMode!=Projective && "You cannot directly assign a projective transform to an affine one.");
typedef typename Transform<Scalar,Dim,OtherMode>::MatrixType OtherMatrixType;
ei_transform_construct_from_matrix<OtherMatrixType,Mode,Dim,HDim>::run(this, other.matrix());
// prevent conversions as:
// Affine | AffineCompact | Isometry = Projective
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(OtherMode==int(Projective), Mode==int(Projective)),
YOU_PERFORMED_AN_INVALID_TRANSFORMATION_CONVERSION)
// prevent conversions as:
// Isometry = Affine | AffineCompact
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(OtherMode==int(Affine)||OtherMode==int(AffineCompact), Mode!=int(Isometry)),
YOU_PERFORMED_AN_INVALID_TRANSFORMATION_CONVERSION)
enum { ModeIsAffineCompact = Mode == int(AffineCompact),
OtherModeIsAffineCompact = OtherMode == int(AffineCompact)
};
if(ModeIsAffineCompact == OtherModeIsAffineCompact)
{
// We need the block expression because the code is compiled for all
// combinations of transformations and will trigger a compile time error
// if one tries to assign the matrices directly
m_matrix.template block<Dim,Dim+1>(0,0) = other.matrix().template block<Dim,Dim+1>(0,0);
makeAffine();
}
else if(OtherModeIsAffineCompact)
{
typedef typename Transform<Scalar,Dim,OtherMode>::MatrixType OtherMatrixType;
ei_transform_construct_from_matrix<OtherMatrixType,Mode,Dim,HDim>::run(this, other.matrix());
}
else
{
// here we know that Mode == AffineCompact and OtherMode != AffineCompact.
// if OtherMode were Projective, the static assert above would already have caught it.
// So the only possibility is that OtherMode == Affine
linear() = other.linear();
translation() = other.translation();
}
}
template<typename OtherDerived>
@@ -271,10 +315,10 @@ public:
#endif
/** shortcut for m_matrix(row,col);
* \sa MatrixBase::operaror(Index,Index) const */
* \sa MatrixBase::operator(Index,Index) const */
inline Scalar operator() (Index row, Index col) const { return m_matrix(row,col); }
/** shortcut for m_matrix(row,col);
* \sa MatrixBase::operaror(Index,Index) */
* \sa MatrixBase::operator(Index,Index) */
inline Scalar& operator() (Index row, Index col) { return m_matrix(row,col); }
/** \returns a read-only expression of the transformation matrix */
@@ -310,9 +354,9 @@ public:
*/
// note: this function is defined here because some compilers cannot find the respective declaration
template<typename OtherDerived>
inline const typename ei_transform_right_product_impl<OtherDerived,Mode,_Dim,_Dim+1>::ResultType
EIGEN_STRONG_INLINE const typename ei_transform_right_product_impl<Transform, OtherDerived>::ResultType
operator * (const EigenBase<OtherDerived> &other) const
{ return ei_transform_right_product_impl<OtherDerived,Mode,Dim,HDim>::run(*this,other.derived()); }
{ return ei_transform_right_product_impl<Transform, OtherDerived>::run(*this,other.derived()); }
/** \returns the product expression of a transformation matrix \a a times a transform \a b
*
@@ -323,23 +367,54 @@ public:
*/
template<typename OtherDerived> friend
inline const typename ei_transform_left_product_impl<OtherDerived,Mode,_Dim,_Dim+1>::ResultType
operator * (const EigenBase<OtherDerived> &a, const Transform &b)
operator * (const EigenBase<OtherDerived> &a, const Transform &b)
{ return ei_transform_left_product_impl<OtherDerived,Mode,Dim,HDim>::run(a.derived(),b); }
/** \returns The product expression of a transform \a a times a diagonal matrix \a b
*
* The rhs diagonal matrix is interpreted as an affine scaling transformation. The
* product results in a Transform of the same type (mode) as the lhs only if the lhs
* mode is no isometry. In that case, the returned transform is an affinity.
*/
friend inline const Transform<Scalar,Dim,((Mode==(int)Isometry)?Affine:(int)Mode)>
operator * (const Transform &a, const DiagonalMatrix<Scalar,Dim> &b)
{
Transform<Scalar,Dim,((Mode==(int)Isometry)?Affine:(int)Mode)> res(a);
res.linear() *= b;
return res;
}
/** \returns The product expression of a diagonal matrix \a a times a transform \a b
*
* The lhs diagonal matrix is interpreted as an affine scaling transformation. The
* product results in a Transform of the same type (mode) as the lhs only if the lhs
* mode is no isometry. In that case, the returned transform is an affinity.
*/
friend inline const Transform<Scalar,Dim,((Mode==(int)Isometry)?Affine:(int)Mode)>
operator * (const DiagonalMatrix<Scalar,Dim> &a, const Transform &b)
{
Transform<Scalar,Dim,((Mode==(int)Isometry)?Affine:(int)Mode)> res;
res.linear().noalias() = a*b.linear();
res.translation().noalias() = a*b.translation();
if (Mode!=int(AffineCompact))
res.matrix().row(Dim) = b.matrix().row(Dim);
return res;
}
template<typename OtherDerived>
inline Transform& operator*=(const EigenBase<OtherDerived>& other) { return *this = *this * other; }
/** Contatenates two transformations */
/** Concatenates two transformations */
inline const Transform operator * (const Transform& other) const
{
return ei_transform_transform_product_impl<Transform,Transform>::run(*this,other);
}
/** Contatenates two different transformations */
/** Concatenates two different transformations */
template<int OtherMode>
inline const typename ei_transform_transform_product_impl<
Transform,Transform<Scalar,Dim,OtherMode> >::ResultType
operator * (const Transform<Scalar,Dim,OtherMode>& other) const
Transform,Transform<Scalar,Dim,OtherMode> >::ResultType
operator * (const Transform<Scalar,Dim,OtherMode>& other) const
{
return ei_transform_transform_product_impl<Transform,Transform<Scalar,Dim,OtherMode> >::run(*this,other);
}
@@ -388,6 +463,8 @@ public:
inline Transform& operator*=(const UniformScaling<Scalar>& s) { return scale(s.factor()); }
inline Transform operator*(const UniformScaling<Scalar>& s) const;
inline Transform& operator*=(const DiagonalMatrix<Scalar,Dim>& s) { linear() *= s; return *this; }
template<typename Derived>
inline Transform& operator=(const RotationBase<Derived,Dim>& r);
template<typename Derived>
@@ -477,15 +554,6 @@ public:
};
/** \ingroup Geometry_Module */
typedef Transform<float,2> Transform2f;
/** \ingroup Geometry_Module */
typedef Transform<float,3> Transform3f;
/** \ingroup Geometry_Module */
typedef Transform<double,2> Transform2d;
/** \ingroup Geometry_Module */
typedef Transform<double,3> Transform3d;
/** \ingroup Geometry_Module */
typedef Transform<float,2,Isometry> Isometry2f;
/** \ingroup Geometry_Module */
@@ -496,13 +564,13 @@ typedef Transform<double,2,Isometry> Isometry2d;
typedef Transform<double,3,Isometry> Isometry3d;
/** \ingroup Geometry_Module */
typedef Transform<float,2> Affine2f;
typedef Transform<float,2,Affine> Affine2f;
/** \ingroup Geometry_Module */
typedef Transform<float,3> Affine3f;
typedef Transform<float,3,Affine> Affine3f;
/** \ingroup Geometry_Module */
typedef Transform<double,2> Affine2d;
typedef Transform<double,2,Affine> Affine2d;
/** \ingroup Geometry_Module */
typedef Transform<double,3> Affine3d;
typedef Transform<double,3,Affine> Affine3d;
/** \ingroup Geometry_Module */
typedef Transform<float,2,AffineCompact> AffineCompact2f;
@@ -548,7 +616,7 @@ Transform<Scalar,Dim,Mode>& Transform<Scalar,Dim,Mode>::operator=(const QMatrix&
m_matrix << other.m11(), other.m21(), other.dx(),
other.m12(), other.m22(), other.dy(),
0, 0, 1;
return *this;
return *this;
}
/** \returns a QMatrix from \c *this assuming the dimension is 2.
@@ -587,7 +655,7 @@ Transform<Scalar,Dim,Mode>& Transform<Scalar,Dim,Mode>::operator=(const QTransfo
m_matrix << other.m11(), other.m21(), other.dx(),
other.m12(), other.m22(), other.dy(),
other.m13(), other.m23(), other.m33();
return *this;
return *this;
}
/** \returns a QTransform from \c *this assuming the dimension is 2.
@@ -618,6 +686,7 @@ Transform<Scalar,Dim,Mode>&
Transform<Scalar,Dim,Mode>::scale(const MatrixBase<OtherDerived> &other)
{
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim))
EIGEN_STATIC_ASSERT(Mode!=int(Isometry), THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS)
linearExt().noalias() = (linearExt() * other.asDiagonal());
return *this;
}
@@ -629,6 +698,7 @@ Transform<Scalar,Dim,Mode>::scale(const MatrixBase<OtherDerived> &other)
template<typename Scalar, int Dim, int Mode>
inline Transform<Scalar,Dim,Mode>& Transform<Scalar,Dim,Mode>::scale(Scalar s)
{
EIGEN_STATIC_ASSERT(Mode!=int(Isometry), THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS)
linearExt() *= s;
return *this;
}
@@ -643,6 +713,7 @@ Transform<Scalar,Dim,Mode>&
Transform<Scalar,Dim,Mode>::prescale(const MatrixBase<OtherDerived> &other)
{
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim))
EIGEN_STATIC_ASSERT(Mode!=int(Isometry), THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS)
m_matrix.template block<Dim,HDim>(0,0).noalias() = (other.asDiagonal() * m_matrix.template block<Dim,HDim>(0,0));
return *this;
}
@@ -654,6 +725,7 @@ Transform<Scalar,Dim,Mode>::prescale(const MatrixBase<OtherDerived> &other)
template<typename Scalar, int Dim, int Mode>
inline Transform<Scalar,Dim,Mode>& Transform<Scalar,Dim,Mode>::prescale(Scalar s)
{
EIGEN_STATIC_ASSERT(Mode!=int(Isometry), THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS)
m_matrix.template topRows<Dim>() *= s;
return *this;
}
@@ -742,6 +814,7 @@ Transform<Scalar,Dim,Mode>&
Transform<Scalar,Dim,Mode>::shear(Scalar sx, Scalar sy)
{
EIGEN_STATIC_ASSERT(int(Dim)==2, YOU_MADE_A_PROGRAMMING_MISTAKE)
EIGEN_STATIC_ASSERT(Mode!=int(Isometry), THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS)
VectorType tmp = linear().col(0)*sy + linear().col(1);
linear() << linear().col(0) + linear().col(1)*sx, tmp;
return *this;
@@ -757,6 +830,7 @@ Transform<Scalar,Dim,Mode>&
Transform<Scalar,Dim,Mode>::preshear(Scalar sx, Scalar sy)
{
EIGEN_STATIC_ASSERT(int(Dim)==2, YOU_MADE_A_PROGRAMMING_MISTAKE)
EIGEN_STATIC_ASSERT(Mode!=int(Isometry), THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS)
m_matrix.template block<Dim,HDim>(0,0) = LinearMatrixType(1, sx, sy, 1) * m_matrix.template block<Dim,HDim>(0,0);
return *this;
}
@@ -854,7 +928,18 @@ template<typename Scalar, int Dim, int Mode>
template<typename RotationMatrixType, typename ScalingMatrixType>
void Transform<Scalar,Dim,Mode>::computeRotationScaling(RotationMatrixType *rotation, ScalingMatrixType *scaling) const
{
linear().svd().computeRotationScaling(rotation, scaling);
JacobiSVD<LinearMatrixType> svd(linear(), ComputeFullU | ComputeFullV);
Scalar x = (svd.matrixU() * svd.matrixV().adjoint()).determinant(); // so x has absolute value 1
VectorType sv(svd.singularValues());
sv.coeffRef(0) *= x;
if(scaling) scaling->lazyAssign(svd.matrixV() * sv.asDiagonal() * svd.matrixV().adjoint());
if(rotation)
{
LinearMatrixType m(svd.matrixU());
m.col(0) /= x;
rotation->lazyAssign(m * svd.matrixV().adjoint());
}
}
/** decomposes the linear part of the transformation as a product rotation x scaling, the scaling being
@@ -872,7 +957,18 @@ template<typename Scalar, int Dim, int Mode>
template<typename ScalingMatrixType, typename RotationMatrixType>
void Transform<Scalar,Dim,Mode>::computeScalingRotation(ScalingMatrixType *scaling, RotationMatrixType *rotation) const
{
linear().svd().computeScalingRotation(scaling, rotation);
JacobiSVD<LinearMatrixType> svd(linear(), ComputeFullU | ComputeFullV);
Scalar x = (svd.matrixU() * svd.matrixV().adjoint()).determinant(); // so x has absolute value 1
VectorType sv(svd.singularValues());
sv.coeffRef(0) *= x;
if(scaling) scaling->lazyAssign(svd.matrixU() * sv.asDiagonal() * svd.matrixU().adjoint());
if(rotation)
{
LinearMatrixType m(svd.matrixU());
m.col(0) /= x;
rotation->lazyAssign(m * svd.matrixV().adjoint());
}
}
/** Convenient method to set \c *this from a position, orientation and scale
@@ -914,7 +1010,7 @@ struct ei_projective_transform_inverse<TransformType, Projective>
* \returns the inverse transformation according to some given knowledge
* on \c *this.
*
* \param traits allows to optimize the inversion process when the transformation
* \param hint allows to optimize the inversion process when the transformation
* is known to be not a general transformation. The possible values are:
* - Projective if the transformation is not necessarily affine, i.e., if the
* last row is not guaranteed to be [0 ... 0 1]
@@ -954,11 +1050,7 @@ Transform<Scalar,Dim,Mode>::inverse(TransformTraits hint) const
// translation and remaining parts
res.matrix().template topRightCorner<Dim,1>()
= - res.matrix().template topLeftCorner<Dim,Dim>() * translation();
if(int(Mode)!=int(AffineCompact))
{
res.matrix().template block<1,Dim>(Dim,0).setZero();
res.matrix().coeffRef(Dim,Dim) = 1;
}
res.makeAffine(); // we do need this, because in the beginning res is uninitialized
}
return res;
}
@@ -1022,161 +1114,70 @@ struct ei_transform_construct_from_matrix<Other, AffineCompact,Dim,HDim, HDim,HD
{ transform->matrix() = other.template block<Dim,HDim>(0,0); }
};
/*********************************************************
*** Specializations of operator* with a EigenBase ***
*********************************************************/
/**********************************************************
*** Specializations of operator* with rhs EigenBase ***
**********************************************************/
// ei_general_product_return_type is a generalization of ProductReturnType, for all types (including e.g. DiagonalBase...),
// instead of being restricted to MatrixBase.
template<typename Lhs, typename Rhs> struct ei_general_product_return_type;
template<typename D1, typename D2> struct ei_general_product_return_type<MatrixBase<D1>, MatrixBase<D2> >
: ProductReturnType<D1,D2> {};
template<typename Lhs, typename D2> struct ei_general_product_return_type<Lhs, MatrixBase<D2> >
{ typedef D2 Type; };
template<typename D1, typename Rhs> struct ei_general_product_return_type<MatrixBase<D1>, Rhs >
{ typedef D1 Type; };
// Projective * set of homogeneous column vectors
template<typename Other, int Dim, int HDim>
struct ei_transform_right_product_impl<Other,Projective, Dim,HDim, HDim, Dynamic>
template<int LhsMode,int RhsMode>
struct ei_transform_product_result
{
typedef Transform<typename Other::Scalar,Dim,Projective> TransformType;
typedef typename TransformType::MatrixType MatrixType;
typedef typename ProductReturnType<MatrixType,Other>::Type ResultType;
static ResultType run(const TransformType& tr, const Other& other)
{ return tr.matrix() * other; }
enum
{
Mode =
(LhsMode == (int)Projective || RhsMode == (int)Projective ) ? Projective :
(LhsMode == (int)Affine || RhsMode == (int)Affine ) ? Affine :
(LhsMode == (int)AffineCompact || RhsMode == (int)AffineCompact ) ? AffineCompact :
(LhsMode == (int)Isometry || RhsMode == (int)Isometry ) ? Isometry : Projective
};
};
// Projective * homogeneous column vector
template<typename Other, int Dim, int HDim>
struct ei_transform_right_product_impl<Other,Projective, Dim,HDim, HDim, 1>
template< typename TransformType, typename MatrixType >
struct ei_transform_right_product_impl< TransformType, MatrixType, true >
{
typedef Transform<typename Other::Scalar,Dim,Projective> TransformType;
typedef typename TransformType::MatrixType MatrixType;
typedef typename ProductReturnType<MatrixType,Other>::Type ResultType;
static ResultType run(const TransformType& tr, const Other& other)
{ return tr.matrix() * other; }
};
typedef typename MatrixType::PlainObject ResultType;
// Projective * column vector
template<typename Other, int Dim, int HDim>
struct ei_transform_right_product_impl<Other,Projective, Dim,HDim, Dim, 1>
{
typedef Transform<typename Other::Scalar,Dim,Projective> TransformType;
typedef Matrix<typename Other::Scalar,HDim,1> ResultType;
static ResultType run(const TransformType& tr, const Other& other)
{ return tr.matrix().template block<HDim,Dim>(0,0) * other + tr.matrix().col(Dim); }
};
// Affine * column vector
template<typename Other, int Mode, int Dim, int HDim>
struct ei_transform_right_product_impl<Other,Mode, Dim,HDim, Dim,1>
{
typedef Transform<typename Other::Scalar,Dim,Mode> TransformType;
typedef Matrix<typename Other::Scalar,Dim,1> ResultType;
static ResultType run(const TransformType& tr, const Other& other)
{ return tr.linear() * other + tr.translation(); }
};
// Affine * set of column vectors
// FIXME use a ReturnByValue to remove the temporary
template<typename Other, int Mode, int Dim, int HDim>
struct ei_transform_right_product_impl<Other,Mode, Dim,HDim, Dim,Dynamic>
{
typedef Transform<typename Other::Scalar,Dim,Mode> TransformType;
typedef Matrix<typename Other::Scalar,Dim,Dynamic> ResultType;
static ResultType run(const TransformType& tr, const Other& other)
{ return (tr.linear() * other).colwise() + tr.translation(); }
};
// Affine * homogeneous column vector
// FIXME added for backward compatibility, but I'm not sure we should keep it
template<typename Other, int Mode, int Dim, int HDim>
struct ei_transform_right_product_impl<Other,Mode, Dim,HDim, HDim,1>
{
typedef Transform<typename Other::Scalar,Dim,Mode> TransformType;
typedef Matrix<typename Other::Scalar,HDim,1> ResultType;
static ResultType run(const TransformType& tr, const Other& other)
{ return tr.matrix() * other; }
};
template<typename Other, int Dim, int HDim>
struct ei_transform_right_product_impl<Other,AffineCompact, Dim,HDim, HDim,1>
{
typedef Transform<typename Other::Scalar,Dim,AffineCompact> TransformType;
typedef Matrix<typename Other::Scalar,HDim,1> ResultType;
static ResultType run(const TransformType& tr, const Other& other)
EIGEN_STRONG_INLINE static ResultType run(const TransformType& T, const MatrixType& other)
{
ResultType res;
res.template head<HDim>() = tr.matrix() * other;
res.coeffRef(Dim) = other.coeff(Dim);
return T.matrix() * other;
}
};
// T * linear matrix => T
template<typename Other, int Mode, int Dim, int HDim>
struct ei_transform_right_product_impl<Other,Mode, Dim,HDim, Dim,Dim>
template< typename TransformType, typename MatrixType >
struct ei_transform_right_product_impl< TransformType, MatrixType, false >
{
typedef Transform<typename Other::Scalar,Dim,Mode> TransformType;
typedef typename TransformType::MatrixType MatrixType;
typedef TransformType ResultType;
static ResultType run(const TransformType& tr, const Other& other)
enum {
Dim = TransformType::Dim,
HDim = TransformType::HDim,
OtherRows = MatrixType::RowsAtCompileTime,
OtherCols = MatrixType::ColsAtCompileTime
};
typedef typename MatrixType::PlainObject ResultType;
EIGEN_STRONG_INLINE static ResultType run(const TransformType& T, const MatrixType& other)
{
TransformType res;
res.matrix().col(Dim) = tr.matrix().col(Dim);
res.linearExt().noalias() = (tr.linearExt() * other);
if(Mode==Affine)
res.matrix().row(Dim).template head<Dim>() = tr.matrix().row(Dim).template head<Dim>();
return res;
}
};
// T * affine matrix => T
template<typename Other, int Mode, int Dim, int HDim>
struct ei_transform_right_product_impl<Other,Mode, Dim,HDim, Dim,HDim>
{
typedef Transform<typename Other::Scalar,Dim,Mode> TransformType;
typedef typename TransformType::MatrixType MatrixType;
typedef TransformType ResultType;
static ResultType run(const TransformType& tr, const Other& other)
{
TransformType res;
enum { Rows = Mode==Projective ? HDim : Dim };
res.matrix().template block<Rows,HDim>(0,0).noalias() = (tr.linearExt() * other);
res.translationExt() += tr.translationExt();
if(Mode!=Affine)
res.makeAffine();
return res;
}
};
// T * generic matrix => Projective
template<typename Other, int Mode, int Dim, int HDim>
struct ei_transform_right_product_impl<Other,Mode, Dim,HDim, HDim,HDim>
{
typedef Transform<typename Other::Scalar,Dim,Mode> TransformType;
typedef typename TransformType::MatrixType MatrixType;
typedef Transform<typename Other::Scalar,Dim,Projective> ResultType;
static ResultType run(const TransformType& tr, const Other& other)
{ return ResultType(tr.matrix() * other); }
};
// AffineCompact * generic matrix => Projective
template<typename Other, int Dim, int HDim>
struct ei_transform_right_product_impl<Other,AffineCompact, Dim,HDim, HDim,HDim>
{
typedef Transform<typename Other::Scalar,Dim,AffineCompact> TransformType;
typedef Transform<typename Other::Scalar,Dim,Projective> ResultType;
static ResultType run(const TransformType& tr, const Other& other)
{
ResultType res;
res.affine().noalias() = tr.matrix() * other;
res.makeAffine();
EIGEN_STATIC_ASSERT(OtherRows==Dim || OtherRows==HDim, YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES);
typedef Block<ResultType, Dim, OtherCols> TopLeftLhs;
typedef Block<MatrixType, Dim, OtherCols> TopLeftRhs;
ResultType res(other.rows(),other.cols());
TopLeftLhs(res, 0, 0, Dim, other.cols()) =
( T.linear() * TopLeftRhs(other, 0, 0, Dim, other.cols()) ).colwise() +
T.translation();
// we need to take .rows() because OtherRows might be Dim or HDim
if (OtherRows==HDim)
res.row(other.rows()) = other.row(other.rows());
return res;
}
};
/**********************************************************
*** Specializations of operator* with lhs EigenBase ***
**********************************************************/
// generic HDim x HDim matrix * T => Projective
template<typename Other,int Mode, int Dim, int HDim>
@@ -1247,7 +1248,7 @@ struct ei_transform_left_product_impl<Other,Mode,Dim,HDim, Dim,Dim>
static ResultType run(const Other& other, const TransformType& tr)
{
TransformType res;
if(Mode!=AffineCompact)
if(Mode!=int(AffineCompact))
res.matrix().row(Dim) = tr.matrix().row(Dim);
res.matrix().template topRows<Dim>().noalias()
= other * tr.matrix().template topRows<Dim>();
@@ -1259,52 +1260,32 @@ struct ei_transform_left_product_impl<Other,Mode,Dim,HDim, Dim,Dim>
*** Specializations of operator* with another Transform ***
**********************************************************/
template<typename Scalar, int Dim, int Mode>
struct ei_transform_transform_product_impl<Transform<Scalar,Dim,Mode>,Transform<Scalar,Dim,Mode> >
template<typename Scalar, int Dim, int LhsMode, int RhsMode>
struct ei_transform_transform_product_impl<Transform<Scalar,Dim,LhsMode>,Transform<Scalar,Dim,RhsMode>,false >
{
typedef Transform<Scalar,Dim,Mode> TransformType;
typedef TransformType ResultType;
static ResultType run(const TransformType& lhs, const TransformType& rhs)
enum { ResultMode = ei_transform_product_result<LhsMode,RhsMode>::Mode };
typedef Transform<Scalar,Dim,LhsMode> Lhs;
typedef Transform<Scalar,Dim,RhsMode> Rhs;
typedef Transform<Scalar,Dim,ResultMode> ResultType;
static ResultType run(const Lhs& lhs, const Rhs& rhs)
{
return ResultType(lhs.matrix() * rhs.matrix());
}
};
template<typename Scalar, int Dim>
struct ei_transform_transform_product_impl<Transform<Scalar,Dim,AffineCompact>,Transform<Scalar,Dim,AffineCompact> >
{
typedef Transform<Scalar,Dim,AffineCompact> TransformType;
typedef TransformType ResultType;
static ResultType run(const TransformType& lhs, const TransformType& rhs)
{
return ei_transform_right_product_impl<typename TransformType::MatrixType,
AffineCompact,Dim,Dim+1>::run(lhs,rhs.matrix());
ResultType res;
res.linear() = lhs.linear() * rhs.linear();
res.translation() = lhs.linear() * rhs.translation() + lhs.translation();
res.makeAffine();
return res;
}
};
template<typename Scalar, int Dim, int LhsMode, int RhsMode>
struct ei_transform_transform_product_impl<Transform<Scalar,Dim,LhsMode>,Transform<Scalar,Dim,RhsMode> >
struct ei_transform_transform_product_impl<Transform<Scalar,Dim,LhsMode>,Transform<Scalar,Dim,RhsMode>,true >
{
typedef Transform<Scalar,Dim,LhsMode> Lhs;
typedef Transform<Scalar,Dim,RhsMode> Rhs;
typedef typename ei_transform_right_product_impl<typename Rhs::MatrixType,
LhsMode,Dim,Dim+1>::ResultType ResultType;
typedef Transform<Scalar,Dim,Projective> ResultType;
static ResultType run(const Lhs& lhs, const Rhs& rhs)
{
return ei_transform_right_product_impl<typename Rhs::MatrixType,LhsMode,Dim,Dim+1>::run(lhs,rhs.matrix());
}
};
template<typename Scalar, int Dim>
struct ei_transform_transform_product_impl<Transform<Scalar,Dim,AffineCompact>,
Transform<Scalar,Dim,Affine> >
{
typedef Transform<Scalar,Dim,AffineCompact> Lhs;
typedef Transform<Scalar,Dim,Affine> Rhs;
typedef Transform<Scalar,Dim,AffineCompact> ResultType;
static ResultType run(const Lhs& lhs, const Rhs& rhs)
{
return ResultType(lhs.matrix() * rhs.matrix());
return ResultType( lhs.matrix() * rhs.matrix() );
}
};

View File

@@ -53,7 +53,7 @@ public:
/** corresponding linear transformation matrix type */
typedef Matrix<Scalar,Dim,Dim> LinearMatrixType;
/** corresponding affine transformation type */
typedef Transform<Scalar,Dim> AffineTransformType;
typedef Transform<Scalar,Dim,Affine> AffineTransformType;
protected:
@@ -98,6 +98,9 @@ public:
const VectorType& vector() const { return m_coeffs; }
VectorType& vector() { return m_coeffs; }
const VectorType& translation() const { return m_coeffs; }
VectorType& translation() { return m_coeffs; }
/** Concatenates two translation */
inline Translation operator* (const Translation& other) const
{ return Translation(m_coeffs + other.m_coeffs); }
@@ -128,7 +131,7 @@ public:
return res;
}
/** Concatenates a translation and an affine transformation */
/** Concatenates a translation and a transformation */
template<int Mode>
inline Transform<Scalar,Dim,Mode> operator* (const Transform<Scalar,Dim,Mode>& t) const
{
@@ -150,6 +153,8 @@ public:
return *this;
}
static const Translation Identity() { return Translation(VectorType::Zero()); }
/** \returns \c *this with scalar type casted to \a NewScalarType
*
* Note that if \a NewScalarType is equal to the current scalar type of \c *this

View File

@@ -141,7 +141,7 @@ umeyama(const MatrixBase<Derived>& src, const MatrixBase<OtherDerived>& dst, boo
// Eq. (38)
const MatrixType sigma = one_over_n * dst_demean * src_demean.transpose();
SVD<MatrixType> svd(sigma);
JacobiSVD<MatrixType> svd(sigma, ComputeFullU | ComputeFullV);
// Initialize the resulting transformation with an identity matrix...
TransformationMatrixType Rt = TransformationMatrixType::Identity(m+1,m+1);

View File

@@ -2,7 +2,7 @@
// for linear algebra.
//
// Copyright (C) 2009 Rohit Garg <rpg.314@gmail.com>
// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
@@ -54,8 +54,8 @@ struct ei_cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true>
inline static typename ei_plain_matrix_type<VectorLhs>::type
run(const VectorLhs& lhs, const VectorRhs& rhs)
{
__m128 a = lhs.coeffs().packet<VectorLhs::Flags&AlignedBit ? Aligned : Unaligned>(0);
__m128 b = rhs.coeffs().packet<VectorRhs::Flags&AlignedBit ? Aligned : Unaligned>(0);
__m128 a = lhs.template packet<VectorLhs::Flags&AlignedBit ? Aligned : Unaligned>(0);
__m128 b = rhs.template packet<VectorRhs::Flags&AlignedBit ? Aligned : Unaligned>(0);
__m128 mul1=_mm_mul_ps(ei_vec4f_swizzle1(a,1,2,0,3),ei_vec4f_swizzle1(b,2,0,1,3));
__m128 mul2=_mm_mul_ps(ei_vec4f_swizzle1(a,2,0,1,3),ei_vec4f_swizzle1(b,1,2,0,3));
typename ei_plain_matrix_type<VectorLhs>::type res;
@@ -64,4 +64,60 @@ struct ei_cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true>
}
};
template<class Derived, class OtherDerived>
struct ei_quat_product<Architecture::SSE, Derived, OtherDerived, double, Aligned>
{
inline static Quaternion<double> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b)
{
const Packet2d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));
Quaternion<double> res;
const double* a = _a.coeffs().data();
Packet2d b_xy = _b.coeffs().template packet<Aligned>(0);
Packet2d b_zw = _b.coeffs().template packet<Aligned>(2);
Packet2d a_xx = ei_pset1<Packet2d>(a[0]);
Packet2d a_yy = ei_pset1<Packet2d>(a[1]);
Packet2d a_zz = ei_pset1<Packet2d>(a[2]);
Packet2d a_ww = ei_pset1<Packet2d>(a[3]);
// two temporaries:
Packet2d t1, t2;
/*
* t1 = ww*xy + yy*zw
* t2 = zz*xy - xx*zw
* res.xy = t1 +/- swap(t2)
*/
t1 = ei_padd(ei_pmul(a_ww, b_xy), ei_pmul(a_yy, b_zw));
t2 = ei_psub(ei_pmul(a_zz, b_xy), ei_pmul(a_xx, b_zw));
#ifdef __SSE3__
EIGEN_UNUSED_VARIABLE(mask)
ei_pstore(&res.x(), _mm_addsub_pd(t1, ei_preverse(t2)));
#else
ei_pstore(&res.x(), ei_padd(t1, ei_pxor(mask,ei_preverse(t2))));
#endif
/*
* t1 = ww*zw - yy*xy
* t2 = zz*zw + xx*xy
* res.zw = t1 -/+ swap(t2) = swap( swap(t1) +/- t2)
*/
t1 = ei_psub(ei_pmul(a_ww, b_zw), ei_pmul(a_yy, b_xy));
t2 = ei_padd(ei_pmul(a_zz, b_zw), ei_pmul(a_xx, b_xy));
#ifdef __SSE3__
EIGEN_UNUSED_VARIABLE(mask)
ei_pstore(&res.z(), ei_preverse(_mm_addsub_pd(ei_preverse(t1), t2)));
#else
ei_pstore(&res.z(), ei_psub(t1, ei_pxor(mask,ei_preverse(t2))));
#endif
return res;
}
};
#endif // EIGEN_GEOMETRY_SSE_H

Some files were not shown because too many files have changed in this diff Show More