Compare commits

..

681 Commits

Author SHA1 Message Date
Antonio Sanchez
bdcab83e28 Get docs to build 2025-10-17 21:34:55 -07:00
Antonio Sanchez
5c9addf4a2 Add CI for docs 2025-10-16 22:01:28 -07:00
Thomas Capricelli
05bd58e9b4 simplify/uniformize eigen_gen_docs 2013-10-18 13:19:14 +02:00
Thomas Capricelli
69f583a866 uniformize piwik code among branches 2013-10-11 20:43:29 +02:00
Thomas Capricelli
49016cbe4b fix a weird typo I commited in ae76c97704
(Nov 10th, 2009)
2013-10-11 20:42:41 +02:00
Gael Guennebaud
43f054dbb4 Added tag 3.0.7 for changeset 2a965155af 2013-08-01 11:36:26 +02:00
Gael Guennebaud
2a965155af bump to 3.0.7 2013-08-01 11:36:16 +02:00
Gael Guennebaud
5ce83aeb6b Fix traits of Map<Quaternion>, and respectively extend the unit tests
(transplanted from 392ffce3b9
)
2013-01-20 10:21:54 +01:00
Gael Guennebaud
41070aad7b Some minor documentation fixes in Quaternion
(transplanted from fb89b66229
)
2013-01-20 10:20:39 +01:00
Christoph Hertzberg
27f6fd3a50 Fix bug #507: Mark variable as unused in NDEBUG case 2012-12-20 11:21:47 +01:00
Christoph Hertzberg
45ae9a069c Fix bug #531: Empty line in <table> made doxygen render it as paragraphs 2012-12-17 16:13:42 +01:00
Gael Guennebaud
bdd80ebe1c Added tag 3.0.6 for changeset 06773276cd 2012-07-09 18:35:34 +02:00
Gael Guennebaud
06773276cd bump to 3.0.6 2012-07-09 18:35:20 +02:00
Gael Guennebaud
c8271df0ec Fix kdBVH unit test
(transplanted from cb64e587c5
)
2012-06-04 22:01:06 +02:00
Gael Guennebaud
9e84d135db fix warning 2012-07-09 13:23:44 +02:00
Gael Guennebaud
8d2f7ae94b fix implicit scalar conversion
(transplanted from 139c91bf30
)
2012-06-28 13:12:49 +02:00
Gael Guennebaud
a1a0cccd4e fix bug #478: RealSchur failed on a zero matrix.
(transplanted from b96b429aa2
)
2012-06-20 10:08:32 +02:00
Gael Guennebaud
45e1bb5ea5 fix geometry tutorial about scalings.
(transplanted from 1727373706
)
2012-06-18 22:07:13 +02:00
Gael Guennebaud
d0c374f1ed fix bug #477: warning with gcc 4.7
(transplanted from c8346abcdd
)
2012-06-20 09:54:52 +02:00
Thomas Capricelli
f231560ec2 backport typo fix from 37d367a231 2012-06-18 12:35:44 +02:00
Gael Guennebaud
cea814b90d fix bug #475: .exp() now returns +inf when overflow occurs (SSE)
(transplanted from a3e700db72
)
2012-06-14 10:38:39 +02:00
Gael Guennebaud
15b1558483 Fix bug #466: race condition destected by helgrind in manage_caching_sizes.
After all, the solution based on threadprivate is not that costly.
(transplanted from f2849fac20
)
2012-06-08 17:29:02 +02:00
Gael Guennebaud
bfe9b35152 fix ambiguous calls in the functors by prefixing function calls with internal::
(transplanted from 7e36d32b32
)
2012-06-08 09:53:50 +02:00
williami
6d4f7f76ce Fixed RVCT 3.1 compiler errors.
(transplanted from fc5f21903b
)
2012-06-04 10:21:16 -05:00
Thomas Capricelli
b4c4490587 backport fix from main branch (rev 8f47246475
)
2012-05-01 17:42:30 +02:00
Jitse Niesen
6af80a23a5 Add parentheses to silence clang warning (bug #451). 2012-04-29 16:37:43 +01:00
Jitse Niesen
f1f70ceb84 Fix infinite recursion in ProductBase::coeff() (bug #447)
Triggered by product of dynamic-size 1 x n and n x 1 matrices.
Also, add regression test.
(transplanted from 77a5a2b28cb89bca74bdf5936dafb306af6be162)
2012-04-18 15:16:05 +01:00
Gael Guennebaud
ea1ac035ce fix compilation of "somedensematrix.llt().matrixL().transpose()" (missing constness on the return types)
(transplanted from b0cf95619e
)
2012-04-10 15:40:36 +02:00
Gael Guennebaud
360a79d6f8 Replicate now makes use of the cost model to evaluate its nested expression
(transplanted from 311c5b87a3
)
2012-04-06 00:22:13 +02:00
Thomas Capricelli
057254381d uniformize eigen_gen_docs between branches / cleaning 2012-04-03 14:25:36 +02:00
Gael Guennebaud
cafd34fa91 fix bug #362 and add missing specialization for affine-compact * projective
(transplanted from 48f0bbb586
)
2012-03-30 23:22:29 +02:00
Gael Guennebaud
deeffdb245 update CDash server address 2012-03-30 00:38:32 +02:00
Gael Guennebaud
10295de37b s/__SSE3__/EIGEN_VECTORIZE_SSE3
(transplanted from f0a1652113
)
2012-03-21 23:50:43 +01:00
Gael Guennebaud
c31b70fcfd workaround stupid gcc 4.7 warning
(transplanted from daad446d5d
)
2012-03-22 00:01:03 +01:00
Gael Guennebaud
b55585a93d declare Block::m_outerStride as Index (instead of int)
(transplanted from d7da6f63a8
)
2012-03-09 13:54:22 +01:00
Gael Guennebaud
ae32b89b12 update tag for 3.0.5 (hope that's fine) 2012-02-10 21:17:31 +01:00
Gael Guennebaud
0007cc3dd7 fix linking issue with manage_caching_sizes_second_if_negative 2012-02-10 20:52:25 +01:00
Gael Guennebaud
2bde6013c9 Added tag 3.0.5 for changeset 7b9d54ba58 2012-02-10 19:53:33 +01:00
Gael Guennebaud
7b9d54ba58 bump 2012-02-10 19:53:09 +01:00
Gael Guennebaud
457e4b2493 fix bug #417: Map should be nested by value, not by reference
(transplanted from 8dd3ae282d
)
2012-02-09 15:25:42 +01:00
Tim Holy
f54cc2284e Add a tutorial page on the Map class, and add a section to FunctionsTakingEigenTypes about multiple-argument functions and the pitfalls when using Map/Expression types.
(transplanted from 44b19b432c
)
2012-02-08 22:11:12 +01:00
Gael Guennebaud
503cf43556 fix bug #415: wrong return in Rotation2D::operator*=
(transplanted from 5bb34fd14c
)
2012-02-08 21:50:51 +01:00
Jitse Niesen
b9e2b4f6f5 Document that JacobiSVD also handles complex matrices.
Thanks to 'Jazzdude' for noting this on IRC.
(transplanted from ed244e9c1a
)
2012-01-26 13:16:50 +00:00
Gael Guennebaud
2c2b7f4173 fix bug #410: fix a possible out of range access in EigenSolver
(transplanted from a108216af1
)
2012-01-25 19:02:31 +01:00
Gael Guennebaud
fd52daae87 fix bug #406: Using OpenMP and Eigen causes infinite loop/deadlock 2012-01-25 17:42:22 +01:00
Jitse Niesen
61ad84fd4d Make sure that now-fixed assert is not triggered.
(transplanted from 0e1e0a2a58
)
2012-01-19 14:30:44 +00:00
Keir Mierle
0fa2b394ce Fix broken asserts releaved by Clang. 2012-01-18 15:03:27 -08:00
Jitse Niesen
bc0fc5d21e Correct description of rankUpdate() in quick reference guide.
Thanks to Sameer Agarwal for pointing out this mistake.
2012-01-09 12:57:11 +00:00
Keir Mierle
45bcad41b4 Fix out-of-range int constant in 4x4 inverse. 2012-01-05 23:15:09 -08:00
Gael Guennebaud
28bbc4bf47 fix bug #398, the quaternion returned by slerp was not always normalized,
add a proper unit test for slerp
(transplanted from 8171adb7ff
)
2011-12-23 22:39:32 +01:00
Jitse Niesen
05f45cfecd Remove asserts that eigenvalue computation has converged (bug #354).
(transplanted from 1e7712771e
)
2011-12-12 17:17:38 +00:00
Sebastian Lipponer
01e13a273e Fix MSVC integer overflow warning
(transplanted from fff25a4b46
)
2011-12-09 10:39:10 +00:00
Thomas Capricelli
5437ab95fd eigen_gen_docs: dont try to update permissions on server 2011-12-06 15:53:53 +01:00
Benoit Jacob
a45de92246 Added tag 3.0.4 for changeset 1d68e47a23 2011-12-06 08:15:17 -05:00
Benoit Jacob
1d68e47a23 bump 2011-12-06 08:15:10 -05:00
Gael Guennebaud
41b0fd733f fix QuaternionBase::cast.
It did not work with clang, and I'm unsure how it worked for gcc/msvc since QuaternionBase was introduced
(transplanted from 84cf1b5b1d
)
2011-12-05 14:13:59 +01:00
Gael Guennebaud
228920fad7 fig bug #373: compilation error with clang 2.9 when exceptions are disabled (cannot reproduce with clang 3.0 or 3.1)
(transplanted from 59576014a9
)
2011-12-05 09:44:25 +01:00
Gael Guennebaud
dcb36e3d49 fix alignment computation in Block and MapBase such that aligned means aligned on 16 bytes and nothing else 2011-11-28 13:43:10 +01:00
Marc Glisse
11a31f2eba bug #383 - another c++11-user-defined-literal fix 2011-11-27 15:27:25 -05:00
Marc Glisse
874d4e9f30 bug #383 - EIGEN_ASM_COMMENT broken in C++11
this is due to the new user-defined literals syntax.
2011-11-26 17:55:18 -05:00
Jitse Niesen
99d8e5de2b Install eigen3.pc in default directory if pkgconfig not found (bug #358).
(transplanted from 63dcdb65fd
)
2011-11-22 17:30:35 +00:00
Benoit Jacob
a52ab9c089 Alignment fixes:
* Fix AlignedBit computation for Plain Objects
 * use it for the conditional alignment of operator new
 * only overload new in PlainObjectBase, don't overload again in Matrix and Array
2011-11-22 09:04:31 -05:00
Gael Guennebaud
9ed342a30e stop fill pivoting LU only if the pivot is exactly 0
(transplanted from f278a3eaba
)
2011-11-22 09:18:54 +01:00
Jitse Niesen
0ef41ec958 Put docs for unsupported modules in right place (bug #372).
Doxygen was confused by the unsupported modules being partly in the doc/
directly, instead of completely in unsupported/doc/ . Thus, the link to
the unsupported modules on the server did not work (I think this manifested
itself after doxygen was upgraded on the server).
(transplanted from changeset 7898281b2b
)
2011-11-14 13:51:32 +00:00
Marton Danoczy
7438c2d3ce Patches to support ARM NEON with Clang 3.0 and LLVM-GCC 2011-11-04 16:37:10 +01:00
Benoit Jacob
7764885d04 Refactor force-inlining macros and use EIGEN_ALWAYS_INLINE to force inlining of the integer overflow helpers, whose non-inlining caused major performance problems, see the mailing list thread 'Significant perf regression probably due to bug #363 patches' 2011-11-06 16:27:41 -05:00
Gael Guennebaud
6021b5c467 Automatically produce a tgz archive of the documentation.
(transplanted from cdd3e85060
)
2011-11-05 21:59:36 +01:00
Jitse Niesen
1ab1f7b125 Allow for more iterations in SelfAdjointEigenSolver (bug #354).
Add an assert to guard against using eigenvalues that have not converged.
Add call to info() in tutorial example to cover non-convergence.
2011-11-02 14:18:20 +00:00
Benoit Jacob
411b4a1b1d bug #369 - Quaternion alignment is broken
The problem was two-fold:
 * missing aligned operator new
 * Flags were mis-computed, the Aligned constant was misused
2011-10-31 09:23:41 -04:00
Benoit Jacob
8f7fb19907 bug #363 - check for integer overflow in size computations 2011-10-16 16:12:19 -04:00
Jitse Niesen
074755a27c Added tag 3.0.3 for changeset 37725a72db 2011-10-06 20:35:53 +01:00
Jitse Niesen
37725a72db Bump version to 3.0.3 2011-10-06 20:35:36 +01:00
Jitse Niesen
0d1f7ed252 Workaround for mysterious error C2082 in MSVC.
Also, get rid of some "conversion from int to bool" warnings.
2011-10-02 22:23:02 +01:00
Gael Guennebaud
bef5ada15a fix eigen2 support test compilation with ICC 2011-09-28 17:52:06 +02:00
Jitse Niesen
bababb5bd6 Convert tabs to spaces. 2011-09-27 15:47:04 +01:00
Jitse Niesen
9d0fcacc72 Fix bug #286: Infinite loop in JacobiSVD with denormals 2011-09-27 14:25:02 +01:00
Gael Guennebaud
1f974f33d8 some std GNU header files undefined min/max and don't like like either 2011-09-20 01:47:21 +02:00
Jitse Niesen
f698fbed62 Typo in geometry tutorial. 2011-09-19 21:57:26 +01:00
Jitse Niesen
db08fb676b Bug fix for matrix1 * matrix2 * scalar1 * scalar2.
See report on http://forum.kde.org/viewtopic.php?f=74&t=96947 .
2011-09-19 15:15:12 +01:00
Michael Schmidt
3a0d0df82d Protecting remaining min/max usages with parentheses 2011-09-18 16:25:54 +02:00
Jitse Niesen
af34da6438 Fix LDLT::solve() if matrix singular but solution exists (bug #241).
Clarify this in docs and add regression test.
2011-09-11 06:30:53 +01:00
Trevor Wennblom
9c92d70f1d resolve pkgconfig destination - #338
(transplanted from 6b31aa4bd1
)
2011-08-30 19:15:16 -05:00
Jitse Niesen
b6fc4cfe2a Update docs of PlainObjectBase::Map(); fixes bug #335.
Also fix some typos.
2011-09-03 15:18:21 +01:00
Gael Guennebaud
467b7b9263 fix bug #337: mess with min/max in eigen2 support 2011-08-28 22:17:11 +02:00
Gael Guennebaud
48fdb50ae3 Added tag 3.0.2 for changeset a65053d80b 2011-08-26 14:56:38 +02:00
Gael Guennebaud
a65053d80b bump to 3.0.2 2011-08-26 14:56:26 +02:00
root
adcb220db3 fix linking issue with msvc 2011-08-26 15:22:48 +02:00
Gael Guennebaud
b21f9c3573 fix bug #330: Index to int conversion warning
(transplanted from 8414be739b
)
2011-08-23 11:02:10 +02:00
Gael Guennebaud
fe228fc50b mv the mpreal copy in its own folder
(transplanted from ea4a1960f0
)
2011-08-19 15:08:29 +02:00
Gael Guennebaud
4ab20b4cae update to latest mpreal and fix a min/max issue in mprel.h
(transplanted from 79ad55a901
)
2011-08-19 15:03:45 +02:00
Gael Guennebaud
5d5cf478ab oops EIGEN_DEFINE_STL_VECTOR_SPECIALIZATION now perfroms full specialization,
no need for the typename keywords
(transplanted from b3f5fbbd9a
)
2011-08-22 10:48:04 +02:00
Gael Guennebaud
55149df4e8 fix bug #262: Compilation error of stdvector_overload test with GCC 4.6
Now our aligned allocator is automatically activatived only when the user
did not specified an allocator (or specified the default std::allocator).
(transplanted from b85c89c313
)
2011-08-22 10:12:10 +02:00
Gael Guennebaud
b2d10249b4 fix linking issue
(transplanted from ca7d3dca79
)
2011-08-12 22:38:53 +02:00
Thomas Capricelli
bdf0b0c47e fix a bug where some rotations were not initialized
They actually were in the original minpack code, this is a bug introduced
by our migration.
Reported on #322 and
http://forum.kde.org/viewtopic.php?f=74&t=96197#p201158
2011-08-04 05:02:47 +02:00
Thomas Capricelli
ea7923c6f9 wa2 was computed twice because of a confustion between changesets
746c787a76
 and ee0e39284c
.
Reported on forum:
http://forum.kde.org/viewtopic.php?f=74&t=96197#p201158
2011-08-04 03:25:29 +02:00
Gael Guennebaud
49b6e9143e protect calls to min and max with parentheses to make Eigen compatible with default windows.h 2011-07-21 11:19:36 +02:00
Gael Guennebaud
f096553344 fix bug #320 (pretty gdb printer on mingw)
(transplanted from d4bd8bddb5
)
2011-07-20 11:15:42 +02:00
Gael Guennebaud
433b353013 fix bug #316 - SelfAdjointEigenSolver::compute does not handle matrices of size (1,1) correctly
(transplanted from 5fdebc2fa5
)
2011-07-09 07:15:14 +02:00
Thomas Capricelli
3cb088c39f fix few warnings reported by clang 2011-07-07 22:19:43 +02:00
Gael Guennebaud
a99ea69b32 fix constness of intersection methods (bug #309)
(transplanted from c98cd5e564
)
2011-06-27 13:15:01 +02:00
Thomas Capricelli
d03bbcbcbc fix typo in doc for ParametrizedLine 2011-06-23 00:34:30 +02:00
Tim Holy
fae2aa3fd9 Relatively straightforward changes to wording of documentation, focusing particularly on the sparse and (to a lesser extent) geometry pages.
(transplanted from 16a2d896bc
)
2011-06-20 22:47:58 -05:00
Tim Holy
13a17d968f A first tiny test commit: fix a spelling error in the documentation.
(transplanted from 4a95badf74
)
2011-06-19 14:39:19 -05:00
Gael Guennebaud
135ba535a4 fix documentation of norm
(transplanted from a55c27a15f
)
2011-06-18 08:30:34 +02:00
Gael Guennebaud
bbbf0559fe remove the use of non standard long long
(transplanted from 40287d2fd9
)
2011-06-14 10:56:47 +02:00
Gael Guennebaud
c91fed1eec fix aligned_allocator::allocate interface
(transplanted from f82b3ea241
)
2011-06-14 08:50:25 +02:00
Thomas Capricelli
f59b08f3bd fix typo in constant name 2011-06-12 23:53:46 +02:00
Gael Guennebaud
9155002901 fix compilation with MinGW
(transplanted from 5bc4abc45e
)
2011-06-01 12:16:21 +02:00
Gael Guennebaud
46f4bd9ed4 fix aligned_stack_memory_handler for null pointers
(transplanted from 6441e8727b
)
2011-04-21 09:00:55 +02:00
Gael Guennebaud
ebad34db21 Added tag 3.0.1 for changeset c0f867ed10 2011-05-30 15:23:33 +02:00
Gael Guennebaud
c0f867ed10 bump to 3.0.1 2011-05-30 15:15:37 +02:00
Gael Guennebaud
d225bbe534 do not directly call std::ceil
(transplanted from 9464745385
)
2011-05-28 16:46:38 +02:00
Jitse Niesen
a6f8da7c48 Fix typo ('using namespace' instead of 'using').
(transplanted from d23845c4cc
)
2011-05-26 09:52:36 +01:00
Gael Guennebaud
33efb8ed62 Simplify the use of custom scalar types, the rule is to never directly call a standard math function using std:: but rather put a using std::foo before and simply call foo:
using std::max;
max(a,b);
(transplanted from 87ac09daa8
)
2011-05-25 08:41:45 +02:00
Gael Guennebaud
63e5cf525f work around an ICE with ICC 12 2011-05-29 11:23:31 +02:00
Gael Guennebaud
3cd1641dac fix bug #278: geometry tutorial 2011-05-28 22:12:15 +02:00
Gael Guennebaud
4fe4ab8fc0 finish to fix bug #270: we have to use EIGEN_ALIGN_STATICALLY and not EIGEN_DONT_ALIGN_STATICALLY...
(transplanted from 7b46d7ed0f
)
2011-05-28 11:38:53 +02:00
Gael Guennebaud
d7d76bf4ca bug #225: add a unit test for memory leak
(transplanted from 5541bcb769
)
2011-05-23 14:20:49 +02:00
Gael Guennebaud
cf76a50a34 bug #271: fix copy/paste mistakes in doc 2011-05-23 13:39:26 +02:00
Gael Guennebaud
ee46ae9ba7 clean a bit previous patch (ctor vs static_cast and a few bits)
(transplanted from da644fb0c3e0b7fcda03ba27a02061c084809b9f)
2011-05-23 13:34:04 +02:00
David H. Bailey
b3c3627c72 fix implicit scalar conversions (needed to support fancy scalar types, see bug #276)
(transplanted from d61f1eae804a5dc4924f167c00fbde31c1bef7ea)
2011-05-23 11:20:13 +02:00
Gael Guennebaud
e3a521be6b backport 7209d6a126
(fix gemv_static_vector_if on architectures that cannot aligned on the stack (e.g., ARM NEON))
2011-05-21 22:19:12 +02:00
Gael Guennebaud
4c7d57490c clean several other assertion checking tests
(transplanted from 96464f8563
)
2011-05-20 09:59:15 +02:00
Gael Guennebaud
fe21e084b4 fix vectorization_logic when EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT
(transplanted from 501bc602ec
)
2011-05-19 21:52:40 +02:00
Gael Guennebaud
282fd7a2da NEON: fix plset
(transplanted from f2837aebc4
)
2011-05-18 21:12:08 +02:00
Gael Guennebaud
7d28c618a0 add unit test for plset
(transplanted from 8170ef0b2d
)
2011-05-18 21:11:03 +02:00
Gael Guennebaud
f07fca2c80 NEON: disable unaligned assertion checking for non vectorized types
(transplanted from 7f2a88c91f
)
2011-05-18 14:11:40 +02:00
Gael Guennebaud
99ab2411e5 NEON: fix ploaddup
(transplanted from 85c137ccd4
)
2011-05-18 08:15:47 +02:00
Gael Guennebaud
ffefe1bd2e fix trmm for some unusual trapezoidal cases (a dense set of columns or rows is zero)
(transplanted from 568478ffe5
)
2011-03-28 17:41:46 +02:00
Gael Guennebaud
55574053d0 fix bug #267: alloca is not aligned on arm
(transplanted from 179d42bb2b
)
2011-05-17 21:30:12 +02:00
Gael Guennebaud
ffee1d1c87 fix 228 (ei_aligned_stack_delete does not exist anymore)
(transplanted from 5fda8cdfb3
)
2011-03-21 21:59:42 +01:00
Gael Guennebaud
adf5992767 port sparse LLT/LDLT to new stack allocation API
(transplanted from 535a61ede8
)
2011-03-20 17:10:43 +01:00
Gael Guennebaud
19e7c672bb clean a bit the stack allocation mechanism
(transplanted from b8ecda5c66
)
2011-03-19 10:27:47 +01:00
Gael Guennebaud
99a6178e6a test the new stack allocation mechanism
(transplanted from bbb4b35dfc
)
2011-03-19 08:51:38 +01:00
Gael Guennebaud
c3342b0bb4 fix memory leak when a custom scalar throw an exception
(transplanted from 290205dfc0
)
2011-03-19 01:06:50 +01:00
John Tytgat
84c8b6d5c5 fix bug #260: broken Qt support for Transform 2011-05-11 22:31:36 +02:00
Jitse Niesen
18a8034348 Get rid of wrong "subscript above bounds" warning (bug #149). 2011-05-07 18:44:11 +01:00
Gael Guennebaud
697e1656ce add missing .data() members to MatrixWrapper and ArrayWrapper
(transplanted from fb76452cbc
)
2011-05-06 21:15:05 +02:00
Gael Guennebaud
c2a23c3e24 fix compilation on ARM NEON (missing AlignedOnScalar)
(transplanted from 97b6d26f5b
)
2011-05-06 09:03:48 +02:00
Thomas Capricelli
6d0e3154d7 better fix for gcc 4.6.0 / ptrdiff_t, as suggested by Benoit 2011-05-05 18:48:40 +02:00
Thomas Capricelli
7b122ed158 backport of a18a1be42d
Fix compilation with gcc-4.6.0, patch provided by Anton Gladky <gladky.anton@gmail.com>,
working on debian packaging.
2011-05-05 00:48:13 +02:00
Jitse Niesen
d9232a96aa Bail out if preprocessor symbol Success is defined (bug #253). 2011-05-04 14:28:01 +01:00
Jitse Niesen
4ecf67f5e4 Backport of a96c849c20
: Document enums in Contants.h (bug #248).
2011-05-03 17:18:10 +01:00
Gael Guennebaud
860d66c0f1 fix bug #258: asin/acos copy paste mistake
(transplanted from 1947da39ab
)
2011-05-02 13:26:44 +02:00
Mathieu Gautier
ba3aafa85f Quaternion : add Flags on Quaternion's traits with the LvalueBit set if needed
Quaternion : change PacketAccess to IsAligned to mimic other traits
test : add a test and 4 failtest on Map<const Quaternion> based on Eigen::Map ones
(transplanted from 2b5868ee7e71398e35d495d447b02e0be54f53da)
2011-04-12 14:49:50 +02:00
Thomas Capricelli
b478521ecd eigen_gen_docs : be nice with the server : dont use -j3 2011-04-19 17:41:23 +02:00
Thomas Capricelli
e8fa6dde01 adapt eigen_gen_docs for the 3.0 branch. Also, create the 'build' dir if
not present.
2011-04-19 17:36:56 +02:00
Gael Guennebaud
134b83c310 fix bug #250: compilation error with gcc 4.6 (STL header files no longer include cstddef)
(transplanted from e87f653924
)
2011-04-19 16:34:25 +02:00
Gael Guennebaud
b0e810fb3f fix bug #242: vectorization was wrongly enabled on MSVC 2005
(transplanted from 67d50f539b
)
2011-04-19 15:25:00 +02:00
Eamon Nerbonne
dee686f762 WIN32 isn't defined ?? but _WIN32 is. 2011-04-19 14:37:04 +02:00
Jitse Niesen
90cacfa610 Make MapBase(PointerType) constructor explicit (fixes bug #251).
Backport of changeset 0b40b36d10
.
2011-04-19 12:56:41 +01:00
Benoit Jacob
de21678aab fix unaligned-array-assert link 2011-04-18 06:35:54 -04:00
Jitse Niesen
a700d3c506 Backport of c9b5531d6c
: Normalize eigenvectors (bug #249).
2011-04-15 17:41:12 +01:00
Jitse Niesen
fc4684fe97 Backport of 70d5837e00
: Correct typo in QuickReference doc.
2011-04-01 16:59:45 +01:00
Adam Szalkowski
c088ee78c8 fix bug #239: the essential part was left uninitialized in some cases
(transplanted from 969e92261d
)
2011-03-31 09:54:52 +02:00
Jitse Niesen
e53539435d Backport of changeset c6ad2deead
. Fixes bug #232.
2011-03-24 10:45:24 +00:00
Benoit Jacob
1e8b834ceb fix typos 2011-03-21 06:45:57 -04:00
Benoit Jacob
3c510db6bf Added tag 3.0.0 for changeset 72ffb63165 2011-03-19 11:43:21 -04:00
Gael Guennebaud
72ffb63165 fix compilation for old but not so old versions of glew 2011-03-18 10:26:21 +01:00
Benoit Jacob
67e24b85a4 bump 2011-03-18 05:13:34 -04:00
Gael Guennebaud
2359486129 disable testing of aligned members when aligned static allocation is not enabled (e.g., for gcc 3.4) 2011-03-15 09:53:23 +01:00
Gael Guennebaud
dd2e4be741 fix array_for_matrix unit test 2011-03-15 09:42:22 +01:00
Benoit Jacob
c5ef8f9027 Added tag 3.0-rc1 for changeset 4931a719f4 2011-03-14 14:10:12 -04:00
Benoit Jacob
4931a719f4 bump 2011-03-14 14:10:05 -04:00
Jitse Niesen
27f34269d5 Document EIGEN_DEFAULT_DENSE_INDEX_TYPE.
Also, expand description of EIGEN_DONT_ALIGN.
2011-03-11 11:15:44 +00:00
Jitse Niesen
e7d2376688 Change int to Index in equalsIdentity().
This fixes compilation errors in nullary test on 64-bits machines.
2011-03-11 11:06:13 +00:00
Benoit Jacob
dc36efbb8f fix bug #219: Map Flags AlignedBit was miscomputed, didn't account for EIGEN_ALIGN 2011-03-10 10:17:17 -05:00
Benoit Jacob
9a47fb289b add test for EIGEN_DONT_ALIGN and EIGEN_DONT_ALIGN_STATICALLY, cf recent bugs (214 etc) and changeset 56818d907e 2011-03-10 09:44:59 -05:00
Jitse Niesen
151e3294cf Fix equalsIdentity() for rectangular matrices. 2011-03-10 13:49:06 +00:00
Oliver Ruepp
5d1263e7c5 bug #37: fix resizing when the destination sparse matrix is row major 2011-03-08 16:37:59 +01:00
Gael Guennebaud
c6c6c34909 repeat nullary tests, and fix some tests 2011-03-07 16:41:59 +01:00
Jitse Niesen
931edea57d Tweak geo_quaternion test to squash intermittent failures. 2011-03-07 11:42:55 +00:00
Benoit Jacob
bfcad536e8 * bug #206: correctly forward computationOptions and work towards avoiding mallocs after preallocation, with unit test.
* added EIGEN_RUNTIME_NO_MALLOC and new set_is_malloc_allowed() function to implement that test
2011-03-06 20:59:25 -05:00
Benoit Jacob
b464fc19bc try to fix a ICC 11.1 compiler error (bug #217) 2011-03-06 19:27:31 -05:00
Benoit Jacob
c541d0a62e disable ICC 12 warning 279 - controlling expression is constant 2011-03-06 19:06:44 -05:00
Benoit Jacob
b43d92a5a2 The Eigen2 intrusive std::vector hack really can't be supported in eigen3 (bug #215) 2011-03-04 10:24:41 -05:00
Benoit Jacob
56818d907e Make EIGEN_ALIGN16 always align to fix crashes with EIGEN_DONT_ALIGN_STATICALLY. New macro EIGEN_USER_ALIGN16 had the old behavior i.e. honors user preference. 2011-03-04 09:57:49 -05:00
Sameer Sheorey
e9868f438b Changed debug/gdb/printers.py to correctly display variable sized matrices.
There is no python error now.
2011-03-02 10:47:54 -06:00
Gael Guennebaud
4f0909b5f0 fix bug #212 (installation of Eigen2Support/Geometry) 2011-03-04 14:16:58 +01:00
Jitse Niesen
6cac61ca3e Copy fix of unit test when GSL is enabled to eigen2 test suite. 2011-03-04 11:04:07 +00:00
Jitse Niesen
1180ede36d Escape hash character in docs as required by doxygen. 2011-03-03 15:19:11 +00:00
Jitse Niesen
99fa279ed1 Use copy_bool() workaround in Eigen2 test suite.
See bug #89 and changeset 59596efdf7
.
2011-03-03 14:17:23 +00:00
Jitse Niesen
dbab12d6b0 Fix bug #205: eigen2_adjoint_5 test fails. 2011-03-02 22:00:48 +00:00
Gael Guennebaud
dc727d86f1 extend unit tests of Transform * MatrixBase and Transform * Homogeneous 2011-03-02 19:34:39 +01:00
Gael Guennebaud
5cec29162b fix compilation in the case of 1D Transform 2011-03-02 19:29:55 +01:00
Gael Guennebaud
703c8a0cc6 fix compilation when mixing CompactAffine with Homogeneous objects 2011-03-02 19:27:13 +01:00
Gael Guennebaud
d30f0c0953 fix transform * matrix products: in particular it now truely considers the rhs as a set of (homogeneous) points and do not neglect the homogeneous coordinates in the case of affine transform 2011-03-02 19:26:38 +01:00
Gael Guennebaud
adacacb285 fix bug #204: limit integer values to numbers which are representable using float 2011-03-02 14:24:26 +01:00
Gael Guennebaud
c8e1b679fa re-enable fast pset1-pstore by introducing a new higher level pstore1 function 2011-03-02 10:55:44 +01:00
Gael Guennebaud
951e238430 now fixing "unsupported" "legacy" code... 2011-03-01 16:45:46 +01:00
Benoit Jacob
9c5c8d8916 Added tag 3.0-beta4 for changeset 77fc6a9914 2011-02-28 00:55:59 -05:00
Benoit Jacob
77fc6a9914 bump 2011-02-28 00:55:52 -05:00
Benoit Jacob
eef03525b8 fix bug #203: revert to using _mm_set1_p[sd] 2011-02-28 00:04:05 -05:00
Benoit Jacob
31621ff0ef relax condition in matrix_exponential test for clang 2011-02-27 23:25:14 -05:00
Benoit Jacob
0b44893b4e fix umeyama test 2011-02-27 23:20:45 -05:00
Benoit Jacob
8cad73072e fix stable_norm test: the |small| value was 0 on clang with complex<float>. 2011-02-27 22:35:49 -05:00
Benoit Jacob
9be2712bf7 remove now-useless comments 2011-02-27 22:35:17 -05:00
Benoit Jacob
0612768c1c fix bug #201: Clang too has intrinsics bugs preventing us to use custom unaligned loads 2011-02-27 21:59:07 -05:00
Benoit Jacob
32025a2510 disable BVH test on Clang++. Looks like there's a good reason why BVH is unsupported. It seems to have a very weird usage pattern, relying on an externally defined bounding_box function in a naive way. 2011-02-27 21:37:34 -05:00
Benoit Jacob
771e64200f fix compilation of unit tests with clang 2011-02-27 20:33:58 -05:00
Benoit Jacob
4846c76d9d shut up a stupid clang 2.8 warning 2011-02-27 20:18:03 -05:00
Benoit Jacob
afc9efca15 fix compilation with clang 2.8 2011-02-27 20:17:47 -05:00
Benoit Jacob
ea7d872181 documentation fixes 2011-02-27 17:43:10 -05:00
Benoit Jacob
b6299c974f add option to build in 32bit mode 2011-02-27 17:27:23 -05:00
Benoit Jacob
b3544ce2ae bug #195 - fix this once and for all: just never use _mm_load_sd on gcc/i386, it generates redundant x87 ops 2011-02-27 17:26:59 -05:00
Jitse Niesen
a8f5ef9388 Document (non)sorting of eigenvalues.
Also, update docs for (Generalized)SelfAdjointEigenSolver to reflect that these
two classes were split apart.
2011-02-27 14:06:55 +00:00
Jitse Niesen
58abf0eb98 Use absolute error to test sum in which cancellation may occur. 2011-02-25 08:56:37 +00:00
Gael Guennebaud
ef73265987 to ease debugging let's catch invalid template options in Transform 2011-02-25 09:03:24 +01:00
Gael Guennebaud
4fbd78d993 fix compilation with gcc 3.4 2011-02-25 09:02:15 +01:00
Benoit Jacob
5dfae4524b fix bug #195: fast unaligned load for integer using _mm_load_sd failed when the value interpreted as a NaN 2011-02-24 10:31:57 -05:00
Hauke Heibel
2064c59878 Improved docs of PlainObjectBase::conservativeResize methods. 2011-02-24 15:48:41 +01:00
Gael Guennebaud
bb9a465c5a fix AltiVec ploaddup 2011-02-24 00:23:50 +03:00
Gael Guennebaud
28d17c5390 bounds the range of random integers for AltiVec 2011-02-24 00:22:53 +03:00
Gael Guennebaud
4bfe38eda2 extend testing of ploaddup 2011-02-24 00:22:10 +03:00
Gael Guennebaud
23aae0d63e fix pset1 for complex 2011-02-23 21:24:47 +03:00
Gael Guennebaud
0dfea7fce4 improve packetmath unit test 2011-02-23 21:24:26 +03:00
Gael Guennebaud
c121e6f390 implement ploaddup for complex and SSE/NEON even though they are not used in practice 2011-02-23 16:31:42 +01:00
Gael Guennebaud
955c099eb5 implement ploaddup for altivec and add respective unit test 2011-02-23 18:20:55 +03:00
Gael Guennebaud
a00aaf7f7e fix overflow in packetmath unit test 2011-02-23 17:57:18 +03:00
Gael Guennebaud
6e01780541 fix a couple of issues with pcplxflip 2011-02-23 17:51:40 +03:00
Gael Guennebaud
939f0327b6 mention reverse and replicate in the quick ref 2011-02-23 15:31:16 +01:00
Gael Guennebaud
78e1a62c54 implement pcplxflip for altivec 2011-02-23 14:20:58 +01:00
Gael Guennebaud
59eeb67187 add unit test for pcplxflip 2011-02-23 14:20:33 +01:00
Gael Guennebaud
b8374aec00 implement workarounds for MSVC IDEs and the Experimental target 2011-02-23 11:53:20 +01:00
Gael Guennebaud
7dc18b20bb same for neon 2011-02-23 09:41:55 +01:00
Gael Guennebaud
32e7dae776 Altivec: fix infinite loop (ei_ -> internal:: change) 2011-02-23 09:41:02 +01:00
Gael Guennebaud
9ab503903e suppress unused warning 2011-02-23 09:32:55 +01:00
Gael Guennebaud
14b164b00e do not try to use Eigen's blas/lapack if they cannot be compiled 2011-02-23 09:25:32 +01:00
Gael Guennebaud
c78b5fd9aa fix no newline warning 2011-02-23 09:23:11 +01:00
Gael Guennebaud
2fb5567e08 add missing AlignedOnScalar 2011-02-22 21:25:47 +01:00
Benoit Jacob
3df134dec2 fix icc warning #68 2011-02-22 10:11:03 -05:00
Benoit Jacob
c58a2ff03a add EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS non-default option. Use it in our own CMakeLists. also add a include-guard-like mechanism to prevent doing unmatched #pragma warning push/pop. 2011-02-22 10:05:41 -05:00
Benoit Jacob
9e1127619c merge 2011-02-22 09:33:01 -05:00
Benoit Jacob
720767ae40 ICC 12 / linux only defined __INTEL_COMPILER, not __intel_compiler 2011-02-22 09:32:39 -05:00
Benoit Jacob
d8e97aee89 shut up stupid ICC warnings 2011-02-22 09:31:22 -05:00
Benoit Jacob
625814464e fix legitimate ICC 12 warning 2011-02-22 09:30:54 -05:00
Gael Guennebaud
39b27fb656 altivec compilation fix 2011-02-22 15:26:28 +01:00
Benoit Jacob
25579df2d4 'fix' a couple of clang -Wconstant-logical-operand warnings (still not convinced about the pertinence of that warning) 2011-02-22 08:54:55 -05:00
Benoit Jacob
3884308da7 __attribute__((flatten)) seems to be recognized by neither clang nor icc despite these compilers defining __GNUC__. 2011-02-22 08:40:37 -05:00
Gael Guennebaud
68631e28d4 also test non_projective_only with row major transformations 2011-02-22 14:26:32 +01:00
Benoit Jacob
39d3bc2394 fix bug #190: directly pass Transform Options to Matrix, allowing to use RowMajor. Fix issues in Transform with non-default Options. 2011-02-22 08:14:38 -05:00
Gael Guennebaud
659c97ee49 gcc 4.4 also defines float32_t as a special type 2011-02-22 10:04:09 +01:00
Gael Guennebaud
769eeac35e disable output compression since this feature seems to be broken 2011-02-21 21:19:38 +01:00
Gael Guennebaud
51da67f211 more compilation fixes for altivec 2011-02-21 20:36:20 +01:00
Gael Guennebaud
05545d0197 fix compilation 2011-02-21 17:47:31 +01:00
Gael Guennebaud
8bee573a78 workaround ICC aggressive optimization 2011-02-21 16:17:58 +01:00
Gael Guennebaud
fb1a29fed5 fix ICE and warning with gcc 4.2.4 2011-02-21 16:11:18 +01:00
Gael Guennebaud
e129e985c3 link to blas/lapack only when needed, and use the static versions to hopefully workaround weird linking issues to gfortranbegin (see jitse dashboard) 2011-02-21 15:48:37 +01:00
Gael Guennebaud
2d5ea82807 fix bug #176 (workaround a too aggressive optimization made by ICC) 2011-02-21 11:00:07 +01:00
Gael Guennebaud
3c00e3da03 enable some tests that have been commented out 2011-02-18 18:08:58 +01:00
Gael Guennebaud
434817164e fix umfpack with complexes 2011-02-18 18:07:59 +01:00
Gael Guennebaud
2c1ac23c62 remove unused code 2011-02-18 17:54:48 +01:00
Gael Guennebaud
a0e5b00280 forgot that one, again 2011-02-18 17:50:36 +01:00
Gael Guennebaud
6456b74a89 merge 2011-02-18 17:40:31 +01:00
Gael Guennebaud
86ca05b324 remove largeEps in adjoint unit test and use a more accurate test_isApproxWithRef test. 2011-02-18 17:39:04 +01:00
Gael Guennebaud
8f8c67b8bd fix bug #186 (in 32 bits mode, gcc 4.3 messed up with pfirst for complex<float>) 2011-02-18 15:47:17 +01:00
Benoit Jacob
aa966ca319 fix bug #187: stable norm test was quite broken 2011-02-18 09:46:49 -05:00
Gael Guennebaud
f7cd63b964 fix bug #189 (issue with fortran concentions to return COMPLEX values) 2011-02-18 15:11:31 +01:00
Gael Guennebaud
69cecc45e5 extend mapstride unit test to test unaligned configurations 2011-02-18 14:41:40 +01:00
Gael Guennebaud
abce49ea21 fix a segfault in "slice vectorization" when the destination might not be aligned on a scalar (complex<double>) 2011-02-18 14:20:36 +01:00
Gael Guennebaud
d271ad38ce back to brute force linking to sparse libraries (fix cmake when these libs are not found) 2011-02-18 11:35:45 +01:00
Gael Guennebaud
3e2314dd67 forgot to include this file in previous commit (needed for lapack) 2011-02-18 11:32:39 +01:00
Gael Guennebaud
444c1bc55b now cholmod, umfpack, and superlu uses our own BLAS and LAPACK libs 2011-02-18 11:26:31 +01:00
Gael Guennebaud
390724b4b6 add lapack interface to real symmetric eigenvalue dec and enable building of the lapack shared library 2011-02-18 11:25:04 +01:00
Gael Guennebaud
d8ca948148 it is now up to user of these Find* module to find and link to BLAS and/or LAPACK 2011-02-18 11:23:27 +01:00
Gael Guennebaud
3345ea0ddd clean a bit SuperLU declarations 2011-02-18 10:23:32 +01:00
Gael Guennebaud
9195a224f3 fix division by zero if the matrix is exactly zero 2011-02-17 19:39:57 +01:00
Gael Guennebaud
b8ef48c46d for consistency forward declare tan, asin, acos functors 2011-02-17 18:23:04 +01:00
Gael Guennebaud
a53a7d6e6a use C linkage for umfpack (might fix some linking issues) 2011-02-17 18:19:28 +01:00
Gael Guennebaud
eda59ffc1b mention std::ptr_fun in the quickref guide 2011-02-17 18:07:21 +01:00
Gael Guennebaud
6f86c12339 typo 2011-02-17 17:48:16 +01:00
Gael Guennebaud
aea630a98a factorize implementation of standard real unary math functions, and add acos, asin 2011-02-17 17:37:11 +01:00
Gael Guennebaud
2ba55e90db make check no test everything - also rm the EigenTesting cmake sub-project 2011-02-17 16:58:18 +01:00
Benoit Jacob
d0b8ce8f2a fix unused var warning 2011-02-17 09:41:17 -05:00
Gael Guennebaud
1c4e85ac7e forgot to include this file in one pretty old commit (missing EXCLUDE_FROM_ALL) 2011-02-17 15:33:35 +01:00
Jitse Niesen
78fa34e8ff Add blas tests for buildtests target. 2011-02-17 13:53:20 +00:00
Benoit Jacob
8fb27fad36 remove #include <iostream> at the wrong place 2011-02-17 07:47:05 -05:00
Jitse Niesen
be224d93f4 Include necessary header files when working around bug #89.
Fixes bug #188.
2011-02-17 11:51:48 +00:00
Benoit Jacob
11402edfd3 with old gcc (bug #89), only include iostream in debug mode 2011-02-16 12:01:47 -05:00
Gael Guennebaud
fe8a710a21 properly report OpenGL as a disabled backend 2011-02-16 18:01:06 +01:00
Gael Guennebaud
03d86ea736 fix intallation of unsupported modules 2011-02-16 17:59:35 +01:00
Benoit Jacob
13a5582835 undo debugging change 2011-02-16 09:18:48 -05:00
Benoit Jacob
59596efdf7 Fix bug #89: on GCC <= 4.3, use a custom assert implementation to work around a compiler bug 2011-02-16 08:50:19 -05:00
Jitse Niesen
6db8fa7d04 Replace unset() by set() with no value specified; this does the same.
unset() was introduced in CMake 2.6.3 but we require only 2.6.2.
2011-02-16 10:16:47 +00:00
Gael Guennebaud
2f15f74218 CTEST_CUSTOM_* parameter have to be put in a CTestCustum.cmake file which itself has to be in the build directory 2011-02-15 12:39:45 +01:00
Gael Guennebaud
578d6f7ced now ctest does compile the test even though they are not in the "all" target 2011-02-15 11:40:43 +01:00
Gael Guennebaud
a1d7e9051e fix bug #184 (warning) 2011-02-14 15:41:00 +01:00
Gael Guennebaud
8e0a42350d fix stupid warning (bug #185) 2011-02-14 15:33:26 +01:00
Hauke Heibel
ac465a0891 Improve the Transform interface in order to prevent T.rotation() = R from compiling. 2011-02-14 12:00:47 +01:00
Jitse Niesen
211e1f8044 Improve documentation of plugins. 2011-02-13 22:50:57 +00:00
Benoit Jacob
d09b94e2ad Added tag 3.0-beta3 for changeset 58986ac832 2011-02-12 18:57:10 -05:00
Benoit Jacob
58986ac832 bump 2011-02-12 18:57:04 -05:00
Jitse Niesen
8bca23bbec Mention comma initializer can be used to concatenate vectors
(inspired by a question on IRC)
2011-02-12 23:17:31 +00:00
Hauke Heibel
1a6597b8e4 MSVC does not like using uninitialized SSE variables, so we have to pass all zeros. 2011-02-12 21:29:16 +01:00
Hauke Heibel
509ca63543 Merge 2011-02-12 18:50:53 +01:00
Hauke Heibel
beb03032b7 Disabled warning regarding the use of uninitialized variables on MSVC. 2011-02-12 18:48:57 +01:00
Jitse Niesen
9ac68e40a0 Write topic page for storage orders. 2011-02-12 17:43:29 +00:00
Hauke Heibel
7015aa00a9 Added configuration file for the 'eol' extension. 2011-02-12 18:38:56 +01:00
Gael Guennebaud
9d2bf35a05 implement optimized ploadu for MSVC10: this also fix bad code generation in gebp_kernel :) 2011-02-12 16:40:09 +01:00
Gael Guennebaud
ec7409b16e since gebp_kernel handled the scaling by alpha it used too many packets, this patch fix that. 2011-02-12 14:17:52 +01:00
Benoit Jacob
f7e4602a40 doc fixes 2011-02-11 09:55:54 -05:00
Hauke Heibel
bf79a3199c Reduced error traces when mixing matrices with different scalar types. 2011-02-11 09:41:48 +01:00
Gael Guennebaud
fe70113fab fix Transform documention regarding Mode 2011-02-10 18:58:37 +01:00
Benoit Jacob
f3b81302cd fix typo 2011-02-10 11:06:01 -05:00
Benoit Jacob
57b22204db document the eigen2 support stages 2011-02-10 10:55:22 -05:00
Benoit Jacob
6a5a13e394 The pfirst hack is needed also on msvc 2010 as it gets completely nuts, even though it doesnt segfault as msvc 2008 did 2011-02-09 15:13:23 -05:00
Benoit Jacob
63626bb966 remove debug #error 2011-02-09 14:37:52 -05:00
Benoit Jacob
85f9fab003 back out changeset efdf2e4056
. It turns out that the SSE3 header is always included, even without any SSE enabled, so it was making us wrongly use SSE3 paths. Backing this out fixes msvc related crashes, at least bug #165.
2011-02-09 14:01:26 -05:00
Gael Guennebaud
d6c4ca4845 fix redundancy 2011-02-09 13:44:05 +01:00
Gael Guennebaud
c0d5131435 workaround gcc 4.2.1 ICE (fix bug #145) 2011-02-09 13:04:35 +01:00
Gael Guennebaud
40526e24b4 fix memory leak (when conservatively resizing vectors of dynamically allocated scalar types such as bugnums) 2011-02-07 19:52:16 +01:00
Benoit Jacob
ba9f6a2c3b now random<integer types> spans over 0..RAND_MAX, or -RAND_MAX/2..RAND_MAX/2 for signed types, or the most significant bits for smaller integer types. 2011-02-07 10:55:41 -05:00
Benoit Jacob
3386a946f8 fix unit tests for integer types in preparation for next changeset making random<int> span over a much bigger range 2011-02-07 10:54:50 -05:00
Benoit Jacob
68a2e04a96 fix fuzzy compares for integer types, using a selector 2011-02-07 10:53:17 -05:00
Gael Guennebaud
c5c8efa575 workaround gcc 4.2 and 4.3 compilation issue with NEON 2011-02-07 16:41:21 +01:00
Benoit Jacob
9105e62d0a introduce EIGEN_MAKING_DOCS to tell whether we're compiling the docs examples 2011-02-06 12:51:42 -05:00
Benoit Jacob
02ee26a3a5 fix build of class Block examples 2011-02-06 12:43:01 -05:00
Benoit Jacob
182ed9ba6c merge 2011-02-06 11:57:31 -05:00
Benoit Jacob
bc6625ab87 fix const correctness in Diagonal::coeffRef (fix found by failtests) 2011-02-06 11:57:04 -05:00
Benoit Jacob
dab4e583cb fix EIGEN_STATIC_ASSERT_LVALUE (fix found by failtests) 2011-02-06 11:56:33 -05:00
Benoit Jacob
80500b693c add more failtests 2011-02-06 11:55:51 -05:00
Hauke Heibel
d975b82105 Removed internal::as_argument. This fixes the alignment issues of bug #165. 2011-02-06 17:33:04 +01:00
Hauke Heibel
7ea6ac79a3 Exposed failtetst publicly. 2011-02-06 13:43:08 +01:00
Gael Guennebaud
ea99880760 fix under- and overflow 2011-02-06 08:23:10 +01:00
Benoit Jacob
9ce08b352f add more failtests 2011-02-06 01:44:51 -05:00
Benoit Jacob
9b13e9aece failtest: a new cmake-based test suite for testing stuff that should fail to build. This first batch imports some const correctness checks from bug #54. 2011-02-05 18:57:29 -05:00
Hauke Heibel
8aee724274 Made MatrixBase::BasisReturnType const. 2011-02-05 15:53:17 +01:00
Hauke Heibel
6c3dc0d243 Fix Diagonal related const correctness issues. 2011-02-05 14:19:53 +01:00
Hauke Heibel
e20f1a44bb Fixed hidden const correctness issue. 2011-02-05 13:52:18 +01:00
Jitse Niesen
e2d46eac42 Remove all references to EIGEN_TUNE_CPU_CACHE_SIZE.
This macro is no longer used as of revision 0212eec23f
.
2011-02-04 22:33:53 +01:00
Thomas Capricelli
0b555a4a3d fix misc warnings 2011-02-04 13:55:12 +01:00
Thomas Capricelli
0ed604583f turnaround for a compiler bug in gcc 3.4.6 2011-02-04 12:09:30 +01:00
Gael Guennebaud
aee4e950d3 extend ctest script for SSSE3 and above 2011-02-03 18:04:43 +01:00
Gael Guennebaud
5887a086cf fix SSE3 issue (infinite loop after the ei_ => internal change) - this fix bug #174 2011-02-03 17:55:24 +01:00
Gael Guennebaud
1526de96a0 fix compilation with MSVC 2011-02-03 17:23:33 +01:00
Benoit Jacob
4489c56c9e add Map static methods taking Strides, add test checking for compilation errors 2011-02-03 10:05:45 -05:00
Gael Guennebaud
2e2614b0fd fix MSVC8 compilation 2011-02-03 15:40:48 +01:00
Gael Guennebaud
2f71277105 add global tan function 2011-02-03 14:45:21 +01:00
Jason Newton
d028262e06 add tan function in Array world 2011-02-03 14:34:40 +01:00
Gael Guennebaud
1eae6d0fb9 an even more stable procedure 2011-02-03 11:25:34 +01:00
Gael Guennebaud
5beb2f4f0d slightly more stable eigen vector computation 2011-02-03 10:31:45 +01:00
Gael Guennebaud
a617d7f2ad fix compilation with MSVC2005 (strange, stupid fixes for MSVC9 confuse MSVC8....) 2011-02-02 17:47:48 +01:00
Gael Guennebaud
52e0a44034 implement GBMV 2011-02-02 11:39:13 +01:00
Gael Guennebaud
d5f6819761 split BandMatrix to a base and a wrapper class 2011-02-02 11:38:08 +01:00
Gael Guennebaud
8915d5bd22 fix 168 : now TriangularView::solve returns by value making TriangularView::solveInPlace less important.
Also fix the very outdated documentation of this function.
2011-02-01 17:21:20 +01:00
Gael Guennebaud
59af20b390 extend nomalloc test 2011-02-01 16:46:35 +01:00
Gael Guennebaud
ffc8386fdb mark the packet access methods as internal 2011-02-01 16:14:53 +01:00
Gael Guennebaud
a486d5590a implement optimized path for selfadjoint rank 1 update (safe regarding dynamic alloc) 2011-02-01 15:49:10 +01:00
Benoit Jacob
3eb74cf9fc forgot hg add 2011-02-01 07:51:55 -05:00
Gael Guennebaud
fa32ce0fc5 fix alignment issue 2011-02-01 13:51:56 +01:00
Benoit Jacob
2d09b11a97 relax Matrix/Array(Index) ctors to allow size 0, add test. 2011-02-01 07:46:02 -05:00
Gael Guennebaud
faa1284c12 fix compilation of snippets 2011-02-01 13:28:14 +01:00
Gael Guennebaud
4cb9d0f943 notify the creation of manual temporaries 2011-02-01 11:41:52 +01:00
Gael Guennebaud
c60818fca8 fix trmv regarding strided vectors and static allocation of temporaries 2011-02-01 11:38:46 +01:00
Gael Guennebaud
0fdd01fe24 operator(int) and the likes are not only fine for linear storage 2011-02-01 11:09:02 +01:00
Gael Guennebaud
f4a7679904 fix packing criterion 2011-02-01 10:41:12 +01:00
Gael Guennebaud
f46ace61d3 fix dynamic allocation for fixed size objects in matrix-vector product 2011-01-31 21:30:27 +01:00
Benoit Jacob
5ca407de54 update .hgignore 2011-01-31 09:21:31 -05:00
Benoit Jacob
dc22ae101f kill stage 15, it's useless 2011-01-31 09:18:49 -05:00
Benoit Jacob
df06f0be31 eigen2 support: pass remaining 2 tests 2011-01-31 08:55:38 -05:00
Benoit Jacob
7032ec80ae eigen2support: disable sparse tests, and do not require to define YES_I_KNOW_NOT_STABLE 2011-01-31 08:44:49 -05:00
Benoit Jacob
374deaed5f make eigen2 eigensolver test pass 2011-01-31 08:36:14 -05:00
Gael Guennebaud
e2642ed620 clean the script to generate the plots 2011-01-31 12:45:18 +01:00
Gael Guennebaud
3874e6a72b include cblas.h header file to ease configuration 2011-01-31 11:02:59 +01:00
Gael Guennebaud
476cb4c65c fix name collision 2011-01-31 10:54:21 +01:00
Gael Guennebaud
9a73bfeb85 add GOTO2 and clean a bit the cmake macros 2011-01-31 10:45:03 +01:00
Gael Guennebaud
6e67d15795 now gemv supports strides 2011-01-30 08:17:46 +01:00
Hauke Heibel
157a5040d5 Added the /bigobj flag in order to enable compilation with MSVC when EIGEN_SPLIT_LARGE_TESTS is not set. 2011-01-29 14:35:24 +01:00
Benoit Jacob
a1f5ea8954 make eigen2 cholesky test pass 2011-01-28 13:04:23 -05:00
Benoit Jacob
e001db2a15 fix bug in triangular matrix-vector produce found by eigen2 tests! 2011-01-28 13:04:11 -05:00
Gael Guennebaud
852077fbc9 still test fftw even if the binary for long double is not available 2011-01-28 16:54:01 +01:00
Gael Guennebaud
c478e0039e disable broken determinant for complexes and SuperLU 2011-01-28 16:30:21 +01:00
Benoit Jacob
6f2ba1f52b typo reported by Don Lorenzo 2011-01-28 10:00:34 -05:00
Gael Guennebaud
817d86cbaf really fix permute_symm_to_symm for sparse complex matrix 2011-01-28 15:51:55 +01:00
Gael Guennebaud
6ec660ca7e fix crash in autodiff 2011-01-28 15:30:33 +01:00
Gael Guennebaud
af712e80e6 fix bug #73: weird compilation error in HouseholderSequence where double and float were mixed. Hopefuly this also solve bug #91... 2011-01-28 12:35:32 +01:00
Gael Guennebaud
d76ed18a9f rm useless ctor 2011-01-28 11:25:11 +01:00
Gael Guennebaud
1731a432e7 fix BTL cholesky action and output errors if the factorization failed 2011-01-28 11:24:18 +01:00
Gael Guennebaud
837f1ae59c fix compilation with old gcc 2011-01-28 11:23:02 +01:00
Gael Guennebaud
ddfd288dc9 start nighlty builds at 00:00:00 UTC 2011-01-28 10:33:02 +01:00
Gael Guennebaud
42d512d33c fix compilation with gcc 4.2 and older 2011-01-28 10:26:05 +01:00
Gael Guennebaud
97801e5e0e Eigen/Eigen should not include Sparse until it is API stable 2011-01-28 10:04:02 +01:00
Gael Guennebaud
736d00ab87 typo 2011-01-28 09:57:35 +01:00
Gael Guennebaud
162d29e696 fix compilation of sparse module with ICC 2011-01-28 09:55:32 +01:00
Thomas Capricelli
22db1a6e82 fix fftw test 2011-01-27 18:25:41 +01:00
Benoit Jacob
b2b8c6a89c dot() now always uses eigen3 convention, even in eigen2 support mode, even stage 10. Didn't have a choice as lots of eigen code is using it. 2011-01-27 12:04:26 -05:00
Gael Guennebaud
e761ba68f7 merge 2011-01-27 18:03:13 +01:00
Gael Guennebaud
3d8e179aa2 fix MaxCols in ComplexEigenSolver which was causing memory allocation instead of static allocation in the nomalloc test. Uncomment commenetd parts of the nomalloc test since now matrix-matrix products are safe. 2011-01-27 18:02:49 +01:00
Gael Guennebaud
32124bc64a EIGEN_YES_I_KNOW_SPARSE_MODULE_IS_NOT_STABLE_YET must be defined to use Eigen/Sparse 2011-01-27 17:36:58 +01:00
Benoit Jacob
52fed69baa add test for geometry with eigen2_ prefixes. fix that stuff. 2011-01-27 11:21:38 -05:00
Gael Guennebaud
955e096277 add an Options template parameter to Hyperplane and ParametrizedLine 2011-01-27 17:17:06 +01:00
Hauke Heibel
d5e81d866a Added regression tests for bug #148. 2011-01-27 16:37:06 +01:00
Benoit Jacob
fd400ffffb reverse order of testing for eigen2 support stages. Higher stages now have priority. So if your whole project builds with say stage 10, you can manually enable stage 20 for selected files. 2011-01-27 10:34:44 -05:00
Benoit Jacob
b69b6a9db2 add Threshold API to FullPivHouseholderQR 2011-01-27 10:17:52 -05:00
Gael Guennebaud
a954a0fbd5 Add an Options template paramter to Transform to enable/disable alignment 2011-01-27 16:07:33 +01:00
Jakob Schwendner
e3306953ef test case for unaligned quaternion 2011-01-27 09:14:30 -05:00
Christoph Hertzberg
0aa752fc4f add quaternion Options, add unaligned possibility 2011-01-27 09:14:22 -05:00
Gael Guennebaud
9ccd16609c fix twisted selfadjoint to selfadjoint (conjugation issue) 2011-01-27 14:39:01 +01:00
Gael Guennebaud
f5d0f115b4 EigenSolver is now in the Eigenvalues modules, not QR !
: Enter commit message.  Lines beginning with 'HG:' are removed.
2011-01-27 13:56:03 +01:00
Gael Guennebaud
255f2a1379 fix various compilations issues 2011-01-27 13:51:39 +01:00
Gael Guennebaud
999678c3f0 fix mixingtypes unit test 2011-01-27 13:51:17 +01:00
Eamon Nerbonne
40998f5e86 fix const-related compiler error on MSC. 2011-01-27 07:43:07 -05:00
Gael Guennebaud
5f03cbd44f fix many missing const in return types 2011-01-27 12:12:24 +01:00
Gael Guennebaud
e8d6a5ca87 fix cross product for complexes and add support for mixed real-complex cross products 2011-01-27 11:33:37 +01:00
Gael Guennebaud
0bfb78c824 allow mixed complex-real and real-complex dot products 2011-01-27 09:59:19 +01:00
Benoit Jacob
fe3bb545e0 allow matrix[index] in EIGEN2_SUPPORT 2011-01-26 20:22:33 -05:00
Gael Guennebaud
c90d0c363b improve automatic handling of gotoblas and atlas 2011-01-26 19:39:10 +01:00
Gael Guennebaud
0e8a532f87 always link to gfortran for gotoblas, it seems to be harmless for 1.x but needed for 2.x 2011-01-26 19:16:06 +01:00
Gael Guennebaud
240bfdd142 finish the move to Eigen3 in BTL, and let's use our own FindEigen3.cmake script 2011-01-26 19:12:35 +01:00
Gael Guennebaud
86acb46518 pass to eigen3 ;) 2011-01-26 18:41:06 +01:00
Gael Guennebaud
faeae169dd fix compilation 2011-01-26 17:58:17 +01:00
Gael Guennebaud
210a280daf update FindMKL to match the default installation behavior of MKL 11 2011-01-26 17:58:01 +01:00
Gael Guennebaud
1eb85b4cf1 allow the possibility to automatically call or not the ctors on a per scalar type basis, and disable automatic initialization of std::complex<> 2011-01-26 17:56:49 +01:00
Gael Guennebaud
4783748953 do not include reference lapack files if they are not there 2011-01-26 17:10:05 +01:00
Benoit Jacob
162cb8ff42 import back LeastSquares into eigen2support. Pass most of eigen2's 'regression' test, except for regression_4 which is about complex numbers. 2011-01-26 11:05:41 -05:00
Gael Guennebaud
98285ba81c merge 2011-01-26 16:36:07 +01:00
Gael Guennebaud
7ef9d82b39 add a minimalistict lapack wrapper 2011-01-26 16:34:45 +01:00
Gael Guennebaud
15ef62ca43 extend PermutationMatrix and Transpositions to support arbitrary interger types and to support the Map/Wrapper model via base and derived classes 2011-01-26 16:33:23 +01:00
Benoit Jacob
76c630d185 eigen2 support: import SVD back, pass SVD tests 2011-01-26 10:33:03 -05:00
Benoit Jacob
313eea8f10 fix the remainder of bug #159 2011-01-26 10:01:18 -05:00
Benoit Jacob
f88ca0ac79 fix the eigen3 part of bug #159 - build issue with selfadjointview 2011-01-26 09:49:06 -05:00
Benoit Jacob
9a5ded3e1d fix bug #160 - forgot hg add 2011-01-25 21:31:27 -05:00
Benoit Jacob
c350f6f12c fix bug #161 2011-01-25 21:28:20 -05:00
Benoit Jacob
39536d44da fix build 2011-01-25 21:24:31 -05:00
Benoit Jacob
1d98cc5e5d eigen2 support: implement part<SelfAdjoint>, mimic eigen2 behavior braindeadness-for-braindeadness 2011-01-25 21:22:04 -05:00
Benoit Jacob
4fbadfd230 merge 2011-01-25 11:19:54 -05:00
Benoit Jacob
07e3ef4f38 eigen2: pass QR decomposition and hyperplane tests 2011-01-25 11:19:26 -05:00
Gael Guennebaud
6896cab5b9 one more const missing 2011-01-25 16:52:40 +01:00
Gael Guennebaud
28d6e84150 fix compilation after recent const change in return types 2011-01-25 16:33:02 +01:00
Benoit Jacob
b1d6a9945c eigen2: pass the inverse test 2011-01-25 10:05:29 -05:00
Benoit Jacob
09d1923f61 eigen2: pass lu test 2011-01-25 10:02:36 -05:00
Benoit Jacob
3e2469f951 eigen2: split tests 2011-01-25 09:02:59 -05:00
Benoit Jacob
b04591fbb4 disable eigen2_first_aligned test, it's completely internal stuff 2011-01-25 08:38:22 -05:00
Benoit Jacob
acd2c82655 fix eigen2_bug_132 test 2011-01-25 08:37:32 -05:00
Benoit Jacob
8acd43bbdb let eigen2 tests use the same ei_add_test macro, which required to prefix them with eigen2_ ; rename buildtests_eigen2 to eigen2_buildtests, etc. 2011-01-25 08:37:18 -05:00
Benoit Jacob
dcfb58f529 eigen2: fix USING_PART_OF_NAMESPACE_EIGEN 2011-01-25 08:03:12 -05:00
Gael Guennebaud
84448b058c fix USING_PART_OF_NAMESPACE_EIGEN to export ei_ prefixed math functions 2011-01-25 09:35:49 +01:00
Gael Guennebaud
7dd4aaba9f fix missing const qualifier in cwiseEqual 2011-01-24 18:49:18 +01:00
Benoit Jacob
bd12ac4ffc import eigen2 Geometry module into Eigen2Support.
fix build of geometry tests
2011-01-24 11:21:58 -05:00
Benoit Jacob
5bfde30e48 fix compilation of array tests 2011-01-24 09:38:50 -05:00
Benoit Jacob
9089488210 fix compilation of Eigen/Geometry with EIGEN2_SUPPORT: was including non-existent header 2011-01-24 08:59:47 -05:00
Benoit Jacob
c3a4f6b5c5 const-qualify template parameters representing const arguments to expressions.
needed to fix docs compile issue.
2011-01-24 08:27:06 -05:00
Benoit Jacob
5331fa3033 fix compilation of LU class example 2011-01-24 07:41:47 -05:00
Benoit Jacob
1dabd133cc pass eigen2's triangular test 2011-01-23 21:53:28 -05:00
Benoit Jacob
5c82fd7f40 Move part() to EIGEN2_SUPPORT (had been deprecated for a long time) 2011-01-23 18:49:36 -05:00
Benoit Jacob
1cf4996d3c make eigen2 visitor test pass 2011-01-23 18:34:30 -05:00
Benoit Jacob
8df5bca979 rename build stages to multiples of 10; old stage 2 becomes stage 15, while stage 20 generates errors (instead of warnings) on conflicting API. 2011-01-23 18:22:18 -05:00
Benoit Jacob
cc1f70abc3 make eigen2 dynalloc test pass (add to eigen2 support some internal stuff that some users may have been relying on) 2011-01-21 10:47:31 -05:00
Benoit Jacob
30de1651d3 relax Map const correctness in eigen2 support stages <= 3
introduce new 'strict' stage 4
2011-01-21 10:42:19 -05:00
Benoit Jacob
54dfcdf86e remove eigen2 vectorization_logic test, it's not an API test 2011-01-21 10:29:43 -05:00
Benoit Jacob
5be269db88 make eigen2 submatrices test pass 2011-01-21 10:24:59 -05:00
Benoit Jacob
cc2b7a5397 introduce the 3 stages of eigen2 support, writing to the mailing list about that in Eigen2 to Eigen3 Migration Path thread 2011-01-21 09:51:03 -05:00
Benoit Jacob
34d93686db lots more EIGEN2_SUPPORT fixes. Now several of the most important core tests build and succeed. 2011-01-20 10:36:32 -05:00
Benoit Jacob
66a2ffa9bd Completely disable Eigen/Array in Eigen3; completely enable in EIGEN2_SUPPORT. 2011-01-20 08:12:24 -05:00
Benoit Jacob
96f08213f7 big eigen2support fix, aimed at users who relied on internal eigen2 stuff: now we dont need customizations in test/eigen2/main.h anymore.
These tests already build:
eigen2_basicstuff
eigen2_adjoint
eigen2_linearstructure
eigen2_prec_inverse_4x4
2011-01-19 11:01:07 -05:00
Benoit Jacob
bf0cffa897 restore the behavior of defaulting to Release build type 2011-01-19 10:15:36 -05:00
Benoit Jacob
1f6bd2915d import eigen2 test suite. enable by defining EIGEN_TEST_EIGEN2
only test_prec_inverse4x4 is fixed at the moment. now need to go over all those tests.
2011-01-19 10:10:54 -05:00
Benoit Jacob
604afc9aca fix bug #155, const-related compilation error 2011-01-18 09:14:14 -05:00
Hauke Heibel
9b2546fea8 Added remaining const coeffRef accessors to Array- and MatrixWrapper. 2011-01-18 13:19:13 +01:00
Benoit Jacob
c7eaca50a0 __cpuidex is not (always) present in VS 2008 + SP1, it seems 2011-01-17 11:17:45 -05:00
hamelin.philippe
5e28f34005 Replace CMAKE_SOURCE_DIR with PROJECT_SOURCE_DIR to allow the cmake project to be included by a root project. 2011-01-17 09:59:40 -05:00
Gael Guennebaud
5010033d88 do not stop the factorization if one pivot is exactly 0, and return the
index of the first zero pivot if any
2011-01-17 11:11:22 +01:00
Gael Guennebaud
ef3e690a0c return the index of the first non positive diagonal entry (more useful than simply true or false) 2011-01-17 11:09:03 +01:00
Gael Guennebaud
8b6c1caa3e fix compilation of rowmajor sparse time diagonal 2011-01-14 20:29:55 +01:00
Thomas Capricelli
dcbf091e60 fix EIGEN_TEST_NOQT (reported by Philippe Hamelin) 2011-01-14 14:30:06 +01:00
Jose Luis Blanco
cbfab7204f Update of CPUID macros to fix segfaults in amd64 code. 2011-01-05 02:43:43 +01:00
Benoit Jacob
98f0274305 third pass of const-correctness fixes (bug #54), hopefully the last one... 2011-01-07 05:16:01 -05:00
Gael Guennebaud
c7baf07a3e add plugin mechanism to sparse objects 2011-01-07 15:53:02 +01:00
Jitse Niesen
9111d73017 Fix compilation error in HouseholderSequence introduced in my previous commit. 2011-01-07 13:46:23 +00:00
Romain Bossart
4abb772b52 Fix bug #38
* address of temporaries were passed to umfpack_zi_* functions. It is ok with g++-4.4 or 4.5, but not with the -std=c++0x in both versions. This patch makes it work for c++98 and c++0x versions
2011-01-07 10:27:22 +01:00
Jitse Niesen
2cc75f4922 Make HouseholderSequence::setTrans() protected (cf. bug #50).
Users can call .transpose() instead.
2011-01-06 11:30:19 +00:00
Manuel Yguel
934720c4ba Decrease the degree of the polynomials being tested to reduce time spent during the tests. 2011-01-05 19:49:13 +01:00
Hauke Heibel
4ba0ec5e0e Fixed #148 where a const-accessor for coefficients was missing in the MatrixWrapper. 2011-01-04 15:35:50 +01:00
Gael Guennebaud
d7e1eeaece fix compilation when defaulting to row major 2011-01-04 14:40:06 +01:00
Gael Guennebaud
3a4d56171d fix openglsupport unit test when defaulting to row major 2011-01-04 14:34:17 +01:00
Gael Guennebaud
64356a622d fix vectorization_logic unit test when defaulting to row major 2011-01-04 14:18:07 +01:00
Jitse Niesen
004488a31d Fix bug in symmetric rank-2 update for row-major matrices (bug #144). 2011-01-04 10:35:39 +00:00
Jitse Niesen
fb023b871f Const-correctness fix for gemv_selector<OnTheRight,ColMajor,true> (bug #144). 2011-01-04 10:35:10 +00:00
Benoit Jacob
fd4e366d7e fix severe perf bug: coeff-based matrix products were not considered aligned, typically preventing vectorization.
added unit test.
2011-01-02 12:07:39 -05:00
Jitse Niesen
47a9d2ed54 Document HouseholderSequence.
Incomplete: I did not explain the difference between OnTheLeft and OnTheRight,
and there is only one example.
2011-01-02 16:59:44 +00:00
Gael Guennebaud
583f963517 make the table fit within 80 characters 2011-01-01 12:02:55 +01:00
Gael Guennebaud
e7318148b5 an attempt to fix a compilation issue with -std=c++0x 2011-01-01 11:40:30 +01:00
Jose Luis Blanco
7feb644620 Switched "MESSAGE(" -> "MESSAGE(STATUS " in CMake script, since otherwise they may look like errors to the user. 2010-12-29 22:02:01 +01:00
Gael Guennebaud
902af035d3 merge 2010-12-31 17:26:48 +01:00
Gael Guennebaud
25efcdd042 fix sparse time dense product with a rowmajor lhs 2010-12-31 17:11:17 +01:00
David J. Luitz
11e253bc10 [Sparse] Added regression tests for the two bugfixes, the code passes all sparse_product tests 2010-12-30 15:16:23 +01:00
Benoit Jacob
13867c15cc fix compilation of code using e.g. Transpose<const Foo>::data() non-const-qualified. Same problem existed for coeffRef() and also in MapBase.h. 2010-12-30 07:47:51 -05:00
Benoit Jacob
26c2afd55a fix compile errors in Tridiagonalization and in doc examples 2010-12-30 04:52:20 -05:00
Benoit Jacob
dbd9c5fd50 fix HouseholderSequence API, bug #50:
* remove ctors taking more than 2 ints
 * rename actualVectors to length
 * add length/shift/trans accessors/mutators
2010-12-30 04:18:40 -05:00
Trevor Irons
e112ad8124 In QuickRefPage LinSpaced is improperly documented. 2010-12-29 10:08:41 -07:00
Jitse Niesen
d6a5ba5a08 Rename EIGEN_DENSESTORAGEBASE_PLUGIN to EIGEN_PLAINOBJECTBASE_PLUGIN. 2010-12-29 19:12:39 +00:00
Jose Luis Blanco
3ca31a8b74 fixed msvc9 build errors. 2010-12-29 19:42:01 +01:00
Jitse Niesen
d84b135ed3 Enable GSL tests (reverts part of changeset 6628534eb5
).
2010-12-29 17:45:18 +00:00
Jose Luis Blanco
97c54ad220 fix MSVC warnings, bug #143 2010-12-29 06:15:41 -05:00
Thomas Capricelli
7a29ae0b5c fix preprocessor checks for availability of cpuid 2010-12-28 13:46:39 +01:00
Jitse Niesen
657013c974 Mention ptr_fun in docs for .unaryExpr() 2010-12-27 16:35:25 +00:00
Jitse Niesen
265e1ef4ef Extend doc page on preprocessor directives. 2010-12-27 16:34:58 +00:00
Jitse Niesen
8db9acbc16 Move doxygen comments for EIGEN_NO_DEBUG from source to I14.
This reverts changeset 76fbe94279
. Benoit and I agree that my
approach there (to use doxygen comments) pollutes the code too much.
2010-12-27 15:07:11 +00:00
Jitse Niesen
840c4e1ab5 Move section on preprocessor directives from I00 to its own page. 2010-12-27 15:07:07 +00:00
Jitse Niesen
42a050dc68 Finish doc page on aliasing. 2010-12-27 15:06:55 +00:00
Benoit Jacob
dc3618a557 move BandMatrix and TridiagonalMatrix to the internal:: namespace 2010-12-25 17:17:10 -05:00
Benoit Jacob
8d2a10c5c1 more renaming to make this file matrix-or-array-agnostic 2010-12-25 17:04:36 -05:00
Benoit Jacob
e8768251db rename macro 2010-12-25 17:01:01 -05:00
Benoit Jacob
86d3711fb7 remove EIGEN_REF_TO_TEMPORARY, clarify docs 2010-12-25 16:45:25 -05:00
Benoit Jacob
75b7d98665 bug #54 - really fix const correctness except in Sparse 2010-12-22 17:45:37 -05:00
Hauke Heibel
3b6d97b51a Re-enabled the BLAS compilation on non-MSVC systems. 2010-12-17 10:52:57 +01:00
Hauke Heibel
5e46f7a499 Switched back to the old behaviour where EIGEN_SPLIT_LARGE_TESTS was ON per default on MSVC systems.
Without splitting these tests, some do not compile
2010-12-17 09:42:17 +01:00
Gael Guennebaud
a21d56b766 disable blas if C++ compiler is MSVC 2010-12-16 20:51:44 +01:00
Hauke Heibel
efdf2e4056 Added automatic SSE3/4.1/4.2 support for MSVC. 2010-12-16 20:08:22 +01:00
Hauke Heibel
b31e1246e1 Re-enabled the missing tests, again... 2010-12-16 19:07:23 +01:00
Hauke Heibel
83e3c4582f Improved the array unit test - internal::isApprox needs to use the same precision as VERIFY_IS_NOT_APPROX.
Removed debug code from test_isApprox.
2010-12-16 18:53:02 +01:00
Hauke Heibel
2d0dfe5d60 Uups - re-enabled subtests 1 to 5. 2010-12-16 17:36:10 +01:00
Hauke Heibel
f578dc7aff Fixed compound subtraction in ArrayBase where the assignment needs to be carried out on the derived type.
Added unit tests for map based component wise arithmetic.
2010-12-16 17:34:13 +01:00
Hauke Heibel
dbfb53e8ef Added unit test for matrix creation from const raw data. 2010-12-15 15:28:43 +01:00
Hauke Heibel
6f5c45ceff Fixed ctor from const raw data for Matrices and added the missing implementation for Arrays.
Fixed a warning regarding the conversion from int to bool in MapBase.
2010-12-15 15:19:51 +01:00
Gael Guennebaud
6a9a6bbc78 fix warning 2010-12-13 10:18:33 +01:00
Gael Guennebaud
68fe80861c Fix bug #133: remove the EIGEN_RESTRICT which was useless here anyway 2010-12-13 09:56:13 +01:00
Jitse Niesen
f2c18f2e37 merge 2010-12-12 21:24:24 +00:00
Jitse Niesen
4a5ebcd1ce Fix compilation of Tridiagonalization_diagonal example.
After changeset 0d63212257
, matrixT() is a real matrix even if the matrix
which is decomposed is complex.
2010-12-12 13:53:42 +00:00
Gael Guennebaud
c7f01157dd enforce compilation of blas unit tests when running ctest 2010-12-12 13:10:00 +01:00
Jitse Niesen
9cd4f67e7f Specify root namespace for fftw_plan from FFTW3 library.
After changeset 4716040703
 (the ei_ --> internal:: change), there are two symbols
called fftw_plan, one from the FFTW3 library and one from Eigen.
2010-12-12 11:44:30 +00:00
Konstantinos Margaritis
e05c79cbd8 Fixed NEON compilation errors, changed float-abi back to softfp (which is the most used right now).
Some complex tests appear to segfault, needs a more careful look.
2010-12-10 20:27:46 +02:00
Benoit Jacob
b11343e15c fix intermittend failure of schur_real test: there only is an iterative process if size>2 2010-12-10 02:10:03 -05:00
Benoit Jacob
74cc42b22f bug #54 - The big Map const-correctness changes 2010-12-10 02:09:58 -05:00
Gael Guennebaud
e736df3edd suppress stupid warning 2010-12-10 15:53:13 +01:00
Gael Guennebaud
79cc86f701 fix compilation 2010-12-10 13:52:47 +01:00
Gael Guennebaud
67c28570e3 fix compilation with ICC (template keyword on a non template method) 2010-12-10 10:05:52 +01:00
Gael Guennebaud
5bc21c25c5 fix ICE with gcc 3.4 and 4.0.1 2010-12-10 09:59:44 +01:00
Gael Guennebaud
bacd531862 fix bug #128 : tridiagonalization failed for 1x1 matrices 2010-12-09 19:56:20 +01:00
Gael Guennebaud
17de59278b simplification 2010-12-09 19:47:02 +01:00
Gael Guennebaud
147a63c4b5 compilation fix 2010-12-09 19:46:26 +01:00
Gael Guennebaud
0b32c5bdda fix compilation of sparse_basic for DynamicSparseMatrix 2010-12-09 19:39:15 +01:00
Benoit Jacob
aec0782719 fix the build of eigensolver_complex test.
it was calling the .value() method on an inner product, and that was blocked in bad zero-sized case.

fixed by adding the .value() method to DenseBase for all 1x1 expressions, and allowing coeff accessors in ProductBase for 1x1 expressions.
2010-12-09 03:47:35 -05:00
Benoit Jacob
1be6449f2e fix bug #127. our product selection logic was flawed in that it used the Max-sized to determine whether the size is 1.
+ test.
2010-12-09 02:38:07 -05:00
Benoit Jacob
819bcbed19 fix comment 2010-12-07 02:17:15 -05:00
Eamon Nerbonne
7a7ca99a31 [mq]: Mingw32 fix
intrin.h is not required nor supported by mingw32.  It is present (and supported) on mingw-w64 builds, even those for 32-bit systems, but here too it's not required on 32-bit systems.  So if we're on mingw, and it's 64-bit, then and only then is the intrin.h inclusion necessary.
2010-12-03 23:24:06 +01:00
Gael Guennebaud
c49c013c47 add main ei_* functions into Eigen2Support 2010-12-03 11:22:35 +01:00
Gael Guennebaud
14208eb478 add a word about the ei_ prefix change in Eigen2 -> Eigen3 doc page. 2010-12-03 10:54:16 +01:00
Hauke Heibel
a289065c73 Applied a fix to our std::vector specialization which prevents the usage of workaround_msvc_stl_support when T is not a class. 2010-12-02 12:33:15 +01:00
Benoit Jacob
59b944cb50 add is_const 2010-12-01 09:22:54 -05:00
Benoit Jacob
46387cc180 remove makeconst_return_type 2010-12-01 09:22:50 -05:00
Hauke Heibel
f0ba513f41 Fixed compilation of tridiagonalization related unit tests. 2010-11-27 15:41:46 +01:00
Hauke Heibel
3899857e08 Removed remove_const_on_value_type since the meaning is unclear and it is in fact unused.
Extened the meta unit tests.
2010-11-26 18:06:08 +01:00
Hauke Heibel
60a544c879 Added STL like (add|remove)_const. Fixed add_const_on_value_type for "const T* const". 2010-11-26 16:56:03 +01:00
Hauke Heibel
bf9d25ce58 Postfixed add_const and remove_const by _on_value_type to express the differences to the STL. 2010-11-26 16:30:45 +01:00
Benoit Jacob
139392488d dos2unix 2010-11-26 10:10:26 -05:00
Jitse Niesen
e868b6736a Merge. 2010-11-26 14:37:58 +00:00
Gael Guennebaud
d551e99644 make HessenbergDecompositionMatrixHReturnType internal 2010-11-26 15:39:01 +01:00
Gael Guennebaud
e06c6553e0 make TridiagonalizationMatrixTReturnType internal and only export a public MatrixTReturnType typedef 2010-11-26 15:36:29 +01:00
Gael Guennebaud
0d63212257 add a TridiagonalizationMatrixTReturnType class to make Tridiagonalization::matrixT() more efficient and future proof. 2010-11-26 15:31:47 +01:00
Jitse Niesen
9bad7c7edb Compilation fix in case EIGEN_DEBUG_ASSERTS is defined. 2010-11-26 14:21:57 +00:00
Gael Guennebaud
421b2b5ff7 fix a couple of issues with TridiagonalMatrix 2010-11-26 13:04:20 +01:00
Gael Guennebaud
d8b26cfeec s/id/p to avoid name clash 2010-11-26 08:36:16 +01:00
Gael Guennebaud
156a31b0e9 fully implement scalar_fuzzy_impl<bool> as, e.g., the missing isMuchSmallerThan is convenient to filter out false values. 2010-11-25 18:00:30 +01:00
Jitse Niesen
010ed9510b Remove parentheses for compatibility with cmake 2.6.2 2010-11-24 22:26:13 +00:00
Benoit Jacob
cd1225ef14 make example compile 2010-11-24 09:18:49 -05:00
Benoit Jacob
f84cbba52a minor fixes 2010-11-24 09:16:30 -05:00
Benoit Jacob
07f2406dc1 some dox tweaks 2010-11-24 08:23:17 -05:00
Gael Guennebaud
f1690fb9fa fix bug #122 : rank 2 update test and scalar multiple extraction were both wrong 2010-11-23 19:19:04 +01:00
Benoit Jacob
0ab9a0a2f7 make UpperBidiagonalization internal: don't want to support it, it's not used.
Keeping it because it tests BandMatrix.
2010-11-23 11:12:42 -05:00
Benoit Jacob
ee38dbf1e6 Rework nested<> to be cleaner, see bug #76. 2010-11-23 11:11:40 -05:00
Frederic Gosselin
4c5932f8f5 Improves the filter for hidden files in "Eigen" and "Eigen/src".
This generic solution prevent cmake from having an error .svn folders when the source folder is under subversion.
2010-11-22 10:47:07 -05:00
Gael Guennebaud
5a65d7970a now the full blas folder requires a fortran compiler 2010-11-22 19:07:29 +01:00
Gael Guennebaud
3976a66889 fix bug #120 : compilation issue of trsolve unit test 2010-11-22 18:59:56 +01:00
Gael Guennebaud
f5f288b741 split level 1 and 2 implementation files into smaller ones and fix a couple of numerical and tricky issues discovered by the lapack test suite 2010-11-22 18:49:12 +01:00
Gael Guennebaud
a6f483e86b import reference BLAS routines which are not already implemented in Eigen : modified givens rotations, and packed and banded storages 2010-11-22 18:05:09 +01:00
Gael Guennebaud
7213dd1e6b this product still badly read the imaginary part on the diagonal 2010-11-22 18:00:47 +01:00
Benoit Jacob
a3f214ade9 holy crap, i had disabled all static asserts in 71f023de3e 2010-11-22 08:21:30 -05:00
Gael Guennebaud
d8396a8da0 fix compilation of product_mmtr 2010-11-21 10:23:06 +01:00
Gael Guennebaud
fb6d9ca951 add missing non const data() method to MapBase 2010-11-21 10:17:25 +01:00
Gael Guennebaud
0020ea544a implement HEMV level2 blas routine 2010-11-21 10:09:33 +01:00
Gael Guennebaud
12bfe5e718 make sure our internal selfadjoint*vector product does not use the imaginary part of the diagonal entries 2010-11-21 10:08:48 +01:00
Gael Guennebaud
e88901daf4 implement SYMV level2 blas routines 2010-11-21 09:34:41 +01:00
Gael Guennebaud
1ac9124fac implements TRMV level 2 blas routine 2010-11-20 23:29:20 +01:00
Gael Guennebaud
d72a8f1e50 make trmv uses direct access 2010-11-20 22:42:24 +01:00
Gael Guennebaud
437dff80ee fix issue 114: workaround cmake enable_language bug 2010-11-20 12:01:17 +01:00
Gael Guennebaud
86474115f5 IBM XL C compiler supports __attribute__((aligned(n))) syntax 2010-11-19 17:33:51 +01:00
Gael Guennebaud
8ad1f64e0a some cleaning in blas level 2 2010-11-19 17:22:43 +01:00
Thomas Capricelli
94f59a92cb fix typo 2010-11-19 17:16:28 +01:00
Gael Guennebaud
ed1ecb24d2 implement GERC and GERU blas routines 2010-11-19 17:05:24 +01:00
Gael Guennebaud
458637f097 implement GER blas routine 2010-11-19 17:02:24 +01:00
Gael Guennebaud
68f8519327 implement HER and HER2 blas routines 2010-11-19 16:51:52 +01:00
Gael Guennebaud
5ce199b1dd update rank 2 update doc 2010-11-19 16:50:49 +01:00
Gael Guennebaud
f369b5a711 makes rank 2 update function conformant to BLAS HER2 2010-11-19 16:50:15 +01:00
Gael Guennebaud
e14f14642d implement SYR and SYR2 2010-11-19 16:09:25 +01:00
Gael Guennebaud
661ef6c127 add regression unit test 2010-11-19 15:38:37 +01:00
Gael Guennebaud
3f24dbf6f5 fix compilation of transform * scaling 2010-11-19 14:45:45 +01:00
Gael Guennebaud
3e99356b59 clean a bit AMD and SimplicialCholesky and add support for partly stored selfadjoint matrices 2010-11-18 10:30:52 +01:00
Gael Guennebaud
1618df55df Add support for sparse symmetric permutations 2010-11-18 10:28:39 +01:00
Gael Guennebaud
fb71b737e4 update blas lib wrt recent change of general_matrix_matrix_triangular_product 2010-11-16 19:19:33 +01:00
Jitse Niesen
e54c8d20cb Docs: aliasing and component-wise operations. 2010-11-16 17:28:59 +00:00
Gael Guennebaud
da05b6af0e fix some remainign issue with ei_ -> internal change 2010-11-16 15:54:48 +01:00
Gael Guennebaud
9a3ec637ff new feature: copy from a sparse selfadjoint view to a full sparse matrix 2010-11-15 14:14:05 +01:00
Gael Guennebaud
5a3a229550 fix return type of rightHouseholderSequence() 2010-11-15 11:11:22 +01:00
Jitse Niesen
cad73d9cdc Correct std::map fix (two commits ago); copy fix to aligned_allocator doc. 2010-11-12 12:06:24 +00:00
Thomas Capricelli
d64e68c8bc fix doc compilation 2010-11-12 11:33:09 +01:00
Jose Luis Blanco
9ba15cd63c Docs: correct declaration of aligned std::map in TopicStlContainers. 2010-11-12 10:05:41 +00:00
Gael Guennebaud
b4fa8261b1 properly use nested types 2010-11-10 19:06:20 +01:00
Gael Guennebaud
05ed9be639 prevent warning 2010-11-10 18:59:16 +01:00
Gael Guennebaud
2577ef90c0 generalize our internal rank K update routine to support more general A*B product while evaluating only one triangular part and make it available via, e.g.:
R.triangularView<Lower>() += s * A * B;
2010-11-10 18:58:55 +01:00
Gael Guennebaud
c810d14d4d add missing specialization 2010-11-09 12:03:20 +01:00
Gael Guennebaud
39477e697a extend unit test to cover previous bug 2010-11-05 14:37:42 +01:00
Gael Guennebaud
572b5585e3 fix Eigen's trsv for complexes 2010-11-05 14:36:34 +01:00
Gael Guennebaud
0e30c4ae3f blas level2: gemv and trsv are green 2010-11-05 14:14:50 +01:00
Gael Guennebaud
3fdea699b8 trsv: simplifications/cleaning 2010-11-05 12:54:32 +01:00
Gael Guennebaud
0e6c1170ab trsv: add support for inner-stride!=1, reduce code instanciation, move implementation to a new products/XX.h file 2010-11-05 12:43:14 +01:00
Gael Guennebaud
fe1353080e fix error handling of level 1 routines 2010-11-04 22:25:59 +01:00
Gael Guennebaud
15e8ad686c add a minimum degree ordering routine based on CSparse (LGPL) and a new built-in sparse cholesky decomposition 2010-11-04 09:58:22 +01:00
Gael Guennebaud
5a4f77716d fix bug #107: SelfAdjointEigenSolver and RowMajor (and add unit test) 2010-11-04 09:33:05 +01:00
Gael Guennebaud
20fcef9656 fixes related to ei_ -> internal change 2010-11-04 08:38:16 +01:00
Gael Guennebaud
62a51184d7 merge 2010-11-04 08:32:52 +01:00
Gael Guennebaud
fd88d721d2 implement proper error handling in level 3 routines 2010-11-03 22:03:12 +01:00
Gael Guennebaud
a8fb6b0ad3 improve detection of erros 2010-11-03 22:02:44 +01:00
Gael Guennebaud
1eea88bff7 fix matrix product bug with OpenMP 2010-11-03 16:12:37 +01:00
Gael Guennebaud
8d27f55eb3 rm auto normalization in favor of clamping 2010-11-03 15:32:40 +01:00
Hauke Heibel
d204ec491d Additional fix to enforce the compiler to use the correct prunning method. 2010-11-02 14:33:33 +01:00
Hauke Heibel
3a3f163e31 Fix bug #65.
In order to prevent compilation errors, the default functor "struct func" must not be defined inside the function scope. I just moved it into a private section of SparseMatrix.
2010-11-02 14:32:41 +01:00
Hauke Heibel
b3007db131 Added a comment on why is_arithmetic is used in DenseCoeffsBase. 2010-11-02 10:11:22 +01:00
Hauke Heibel
96e4a4b59c Fixed compilation due to lacking Transform definitions. 2010-11-01 16:53:39 +01:00
Gael Guennebaud
d2e257cb5d oops (rm commented code) 2010-11-01 09:40:33 +01:00
Gael Guennebaud
c7eda0d866 Let's be safe: enable auto normalization is quaternion to angle-axis code since a slight numerical issue may trigger NaN. The overhead is small and I doubt the perf of this function could be critival for any application ! 2010-10-31 23:26:01 +01:00
Benoit Jacob
006c9a5105 implement VERIFY in a function so it doesn't get compiled thousands of times. 2010-10-29 10:27:20 -04:00
Benoit Jacob
7d441260db on test failure, abort instead of exit, so we can get a stack trace 2010-10-29 10:07:30 -04:00
Benoit Jacob
99ccb26cfe add eigen2support Transform typedefs, add Eigen2To3 section on Transform 2010-10-29 09:00:35 -04:00
Benoit Jacob
bd249d1121 fix bug #92 - we were doing stupid things when passing the list of libraries to link to. 2010-10-28 10:44:20 -04:00
Benoit Jacob
868f753d10 document LvalueBit better 2010-10-28 09:40:20 -04:00
Gael Guennebaud
1d4e80f09d generalize the prune function 2010-10-28 11:39:31 +02:00
Gael Guennebaud
02c8b6af82 fix sparse rankUpdate and triangularView iterator 2010-10-27 15:13:03 +02:00
Gael Guennebaud
241e5ee3e7 add the possibility to solve for sparse rhs with Cholmod 2010-10-27 14:31:23 +02:00
Hauke Heibel
5d4ff3f99c Fixed bug #95 by changing _M_IX64 to _M_X64 as proposed by Jan Schlicht. 2010-10-27 11:07:38 +02:00
Hauke Heibel
3efff8c69e Merge 2010-10-26 16:48:12 +02:00
Gael Guennebaud
f4a6a8e295 rm the useless SparseSolverBase class and provide more compile time traits 2010-10-26 16:47:47 +02:00
Hauke Heibel
c738cd56eb Renamed cleantype to remove_all since it is close to remove_{const|pointer|reference}. 2010-10-26 16:47:01 +02:00
Gael Guennebaud
2fbb9932b0 fix compilation (bad internal:: stuff) 2010-10-26 16:38:51 +02:00
Gael Guennebaud
5e95ee6662 fix compilation and unit test of adolc 2010-10-26 16:26:20 +02:00
Gael Guennebaud
92044fcc2b fix bug #94: add #include src/misc/Solve.h in SparseExtra 2010-10-26 15:51:06 +02:00
Gael Guennebaud
666c16cf63 add new API for Cholmod preserving the legacy one for now 2010-10-26 15:48:33 +02:00
Hauke Heibel
7bc8e3ac09 Initial fixes for bug #85.
Renamed meta_{true|false} to {true|false}_type, meta_if to conditional, is_same_type to is_same, un{ref|pointer|const} to remove_{reference|pointer|const} and makeconst to add_const.
Changed boolean type 'ret' member to 'value'.
Changed 'ret' members refering to types to 'type'.
Adapted all code occurences.
2010-10-25 22:13:49 +02:00
Hauke Heibel
597b2745e1 Allow unset ${CMAKE_BUILD_TYPE} which is required for some targets and corresponding to using default values. 2010-10-25 18:49:39 +02:00
Benoit Jacob
724af13540 make polynomialsolver test compile faster 2010-10-25 10:15:22 -04:00
Benoit Jacob
a94f216487 error out on bad build type 2010-10-25 10:15:22 -04:00
Benoit Jacob
fdaa3f311a adapt mpreal to eigen3 mathfunctions system 2010-10-25 10:15:22 -04:00
Benoit Jacob
4716040703 bug #86 : use internal:: namespace instead of ei_ prefix 2010-10-25 10:15:22 -04:00
Benoit Jacob
ca85a1f6c5 remove build type tweaking 2010-10-23 10:00:43 -04:00
Jitse Niesen
dbdf7ee942 Use 'Release' as default when build type is not specified.
Otherwise, "cmake /path/to/eigen/" in an empty build directory, as specified
on the CMake page on the wiki, yields a fatal error.
2010-10-22 12:23:35 +01:00
Benoit Jacob
bfd46eacad don't change the build type, fatal error if bad build type 2010-10-21 08:55:48 -04:00
Hauke Heibel
969518f99d Improved I13_FunctionsTakingEigenTypes.dox.
Removed the r-value reference part and focused on EIGEN_REF_TO_TEMPORARY only.
2010-10-21 10:14:23 +02:00
Hauke Heibel
ba86d3ef65 Fixed bug #84. 2010-10-21 10:13:17 +02:00
Hauke Heibel
9bbaff6b41 Fixed the unit test splitting for MSVC. 2010-10-21 07:39:06 +02:00
Benoit Jacob
ee60fc2062 fix typo and rephrase sentence 2010-10-20 09:43:16 -04:00
Benoit Jacob
8c17fab8f5 renaming: ei_matrix_storage -> DenseStorage
DenseStorageBase  -> PlainObjectBase
2010-10-20 09:34:13 -04:00
Hauke Heibel
9cf748757e Improved the fixed size array display. 2010-10-20 11:56:29 +02:00
Benoit Jacob
e259f71477 rename PlanarRotation -> JacobiRotation 2010-10-19 21:56:26 -04:00
Benoit Jacob
9044c98cff work around stupid msvc error when constructing at compile time an expression
that involves a division by zero, even if the numeric type has floating point
2010-10-19 21:56:11 -04:00
Gael Guennebaud
e5073746f3 allows blocks of code to be larger than the page body (like tables) 2010-10-19 16:55:49 +02:00
Gael Guennebaud
e19c6b89f5 update the position of the owl 2010-10-19 16:07:04 +02:00
Gael Guennebaud
54814eb05b factorize CSS code, make use of the "manual" class when appropriate, clean the style of the big linear algebra table 2010-10-19 15:25:00 +02:00
Benoit Jacob
70f95ef80d increase css max-width 2010-10-19 09:40:23 -04:00
Benoit Jacob
b1604ea553 merge 2010-10-19 09:32:19 -04:00
Benoit Jacob
b8dfc62f3c specify max-width in em not px 2010-10-19 09:31:22 -04:00
Gael Guennebaud
6d8e7d68e4 factorize CSS code, make use of the "manual" class when appropriate, clean the style of the big linear algebra table 2010-10-19 15:25:00 +02:00
Benoit Jacob
9e3005d552 css update: max-width and margins 2010-10-19 09:18:06 -04:00
Benoit Jacob
9fa54d4cc9 move tables from class "tutorial_code" to "example"
also remove a align="center" in the Aliasing page -- it doesn't make sense to have 1 centered table page when all others are left aligned.
2010-10-19 08:42:49 -04:00
Gael Guennebaud
ca4bd5851c update style of the quick ref guide 2010-10-19 11:59:11 +02:00
Gael Guennebaud
f66fe2663f update CSS to doxygen 1.7.2, new CSS and cleaning of the tutorial 2010-10-19 11:40:49 +02:00
Hauke Heibel
9f8b6ad43e Fixed bug #79. 2010-10-19 09:43:54 +02:00
Benoit Jacob
3481f10e7a re-fix the broken msvc warning in JacobiSVD 2010-10-18 09:46:22 -04:00
Benoit Jacob
3404d5fb14 improvements in pages 5 and 7 of the tutorial. 2010-10-18 09:09:30 -04:00
Benoit Jacob
1c15a6d96f improvements in tutorial page 4 : block operations 2010-10-18 08:44:27 -04:00
Benoit Jacob
4b0fb968ea fixed table html 2010-10-18 07:23:48 -04:00
Benoit Jacob
597bb61c23 fix stupid msvc warning in jacobisvd 2010-10-18 06:54:11 -04:00
Benoit Jacob
6628534eb5 fix bug i just introduced in ei_add_test_internal 2010-10-17 11:47:59 -04:00
Benoit Jacob
19ae4362bd ah ok, we want to build this even without GSL.
so the bug is in FindGSL.cmake.
2010-10-17 11:31:58 -04:00
Benoit Jacob
4e3feb023d more unsupported/ CMake fixes 2010-10-17 11:21:10 -04:00
Benoit Jacob
1e3a035275 Fix general linking issue for tests linking to multiple libs, and explicitly link mpfr_real test to GMP. 2010-10-17 11:04:43 -04:00
Benoit Jacob
8356bc8d06 add jacobiSvd() method, update test & docs 2010-10-17 09:40:52 -04:00
Hauke Heibel
cd3a9d1ccb Fixed bug #74. 2010-10-17 12:33:47 +02:00
Hauke Heibel
c19b965730 Added stddeque unit test dervied from the stdlist test. 2010-10-16 10:45:30 +02:00
Benoit Jacob
6f6400e488 Added tag 3.0-beta2 for changeset 3f79884f03 2010-10-15 09:46:45 -04:00
Gael Guennebaud
e85a3857f0 import BLAS test suite 2010-10-14 13:46:01 +02:00
Gael Guennebaud
0cae73d1eb add the prototype of all level2 functions 2010-10-08 23:31:57 +02:00
681 changed files with 84142 additions and 16247 deletions

41
.gitignore vendored Normal file
View File

@@ -0,0 +1,41 @@
qrc_*cxx
*.orig
*.pyc
*.diff
diff
*.save
save
*.old
*.gmo
*.qm
core
core.*
*.bak
*~
*.build*
*.moc.*
*.moc
ui_*
CMakeCache.txt
tags
.*.swp
activity.png
*.out
*.php*
*.log
*.orig
*.rej
log
patch
*.patch
a
a.*
lapack/testing
lapack/reference
.*project
.settings
Makefile
!ci/build.gitlab-ci.yml
!scripts/buildtests.in
!Eigen/Core
!Eigen/src/Core

28
.gitlab-ci.yml Normal file
View File

@@ -0,0 +1,28 @@
# This file is part of Eigen, a lightweight C++ template library
# for linear algebra.
#
# Copyright (C) 2023, The Eigen Authors
#
# This Source Code Form is subject to the terms of the Mozilla
# Public License v. 2.0. If a copy of the MPL was not distributed
# with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
stages:
- build
- deploy
variables:
# CMake build directory.
EIGEN_CI_BUILDDIR: .build
# Specify the CMake build target.
EIGEN_CI_BUILD_TARGET: ""
# If a test regex is specified, that will be selected.
# Otherwise, we will try a label if specified.
EIGEN_CI_CTEST_REGEX: ""
EIGEN_CI_CTEST_LABEL: ""
EIGEN_CI_CTEST_ARGS: ""
include:
- "/ci/common.gitlab-ci.yml"
- "/ci/build.linux.gitlab-ci.yml"
- "/ci/deploy.gitlab-ci.yml"

3
.hgeol Normal file
View File

@@ -0,0 +1,3 @@
[patterns]
**.* = native
eigen_autoexp_part.dat = CRLF

View File

@@ -23,5 +23,10 @@ tags
activity.png
*.out
*.php*
eigen_gen_credits.log
*.log
*.orig
*.rej
log
patch
a
a.*

View File

@@ -2,19 +2,32 @@ project(Eigen)
cmake_minimum_required(VERSION 2.6.2)
# guard against in-source builds
if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR})
message(FATAL_ERROR "In-source builds not allowed. Please make a new directory (called a build directory) and run CMake from there. (you may need to remove CMakeCache.txt ")
message(FATAL_ERROR "In-source builds not allowed. Please make a new directory (called a build directory) and run CMake from there. You may need to remove CMakeCache.txt. ")
endif()
# guard against bad build-type strings
if (NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "Release")
endif()
string(TOLOWER "${CMAKE_BUILD_TYPE}" cmake_build_type_tolower)
if( NOT cmake_build_type_tolower STREQUAL "debug"
AND NOT cmake_build_type_tolower STREQUAL "release"
AND NOT cmake_build_type_tolower STREQUAL "relwithdebinfo")
message(FATAL_ERROR "Unknown build type \"${CMAKE_BUILD_TYPE}\". Allowed values are Debug, Release, RelWithDebInfo (case-insensitive).")
endif()
#############################################################################
# retrieve version infomation #
#############################################################################
# automatically parse the version number
file(READ "${CMAKE_SOURCE_DIR}/Eigen/src/Core/util/Macros.h" _eigen_version_header)
file(READ "${PROJECT_SOURCE_DIR}/Eigen/src/Core/util/Macros.h" _eigen_version_header)
string(REGEX MATCH "define[ \t]+EIGEN_WORLD_VERSION[ \t]+([0-9]+)" _eigen_world_version_match "${_eigen_version_header}")
set(EIGEN_WORLD_VERSION "${CMAKE_MATCH_1}")
string(REGEX MATCH "define[ \t]+EIGEN_MAJOR_VERSION[ \t]+([0-9]+)" _eigen_major_version_match "${_eigen_version_header}")
@@ -81,13 +94,6 @@ endif(NOT WIN32)
set(CMAKE_INCLUDE_CURRENT_DIR ON)
string(TOLOWER "${CMAKE_BUILD_TYPE}" cmake_build_type_tolower)
if(cmake_build_type_tolower STREQUAL "debug")
set(CMAKE_BUILD_TYPE "Debug")
else()
set(CMAKE_BUILD_TYPE "Release")
endif()
option(EIGEN_SPLIT_LARGE_TESTS "Split large tests into smaller executables" ON)
option(EIGEN_DEFAULT_TO_ROW_MAJOR "Use row-major as default matrix storage order" OFF)
@@ -95,6 +101,8 @@ if(EIGEN_DEFAULT_TO_ROW_MAJOR)
add_definitions("-DEIGEN_DEFAULT_TO_ROW_MAJOR")
endif()
add_definitions("-DEIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS")
if(CMAKE_COMPILER_IS_GNUCXX)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wnon-virtual-dtor -Wno-long-long -ansi -Wundef -Wcast-align -Wchar-subscripts -Wall -W -Wpointer-arith -Wwrite-strings -Wformat-security -fexceptions -fno-check-new -fno-common -fstrict-aliasing")
set(CMAKE_CXX_FLAGS_DEBUG "-g3")
@@ -115,43 +123,43 @@ if(CMAKE_COMPILER_IS_GNUCXX)
option(EIGEN_TEST_SSE2 "Enable/Disable SSE2 in tests/examples" OFF)
if(EIGEN_TEST_SSE2)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse2")
message("Enabling SSE2 in tests/examples")
message(STATUS "Enabling SSE2 in tests/examples")
endif()
option(EIGEN_TEST_SSE3 "Enable/Disable SSE3 in tests/examples" OFF)
if(EIGEN_TEST_SSE3)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse3")
message("Enabling SSE3 in tests/examples")
message(STATUS "Enabling SSE3 in tests/examples")
endif()
option(EIGEN_TEST_SSSE3 "Enable/Disable SSSE3 in tests/examples" OFF)
if(EIGEN_TEST_SSSE3)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mssse3")
message("Enabling SSSE3 in tests/examples")
message(STATUS "Enabling SSSE3 in tests/examples")
endif()
option(EIGEN_TEST_SSE4_1 "Enable/Disable SSE4.1 in tests/examples" OFF)
if(EIGEN_TEST_SSE4_1)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1")
message("Enabling SSE4.1 in tests/examples")
message(STATUS "Enabling SSE4.1 in tests/examples")
endif()
option(EIGEN_TEST_SSE4_2 "Enable/Disable SSE4.2 in tests/examples" OFF)
if(EIGEN_TEST_SSE4_2)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2")
message("Enabling SSE4.2 in tests/examples")
message(STATUS "Enabling SSE4.2 in tests/examples")
endif()
option(EIGEN_TEST_ALTIVEC "Enable/Disable AltiVec in tests/examples" OFF)
if(EIGEN_TEST_ALTIVEC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maltivec -mabi=altivec")
message("Enabling AltiVec in tests/examples")
message(STATUS "Enabling AltiVec in tests/examples")
endif()
option(EIGEN_TEST_NEON "Enable/Disable Neon in tests/examples" OFF)
if(EIGEN_TEST_NEON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfloat-abi=hard -mfpu=neon -mcpu=cortex-a8")
message("Enabling NEON in tests/examples")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfloat-abi=softfp -mfpu=neon -mcpu=cortex-a8")
message(STATUS "Enabling NEON in tests/examples")
endif()
check_cxx_compiler_flag("-fopenmp" COMPILER_SUPPORT_OPENMP)
@@ -159,7 +167,7 @@ if(CMAKE_COMPILER_IS_GNUCXX)
option(EIGEN_TEST_OPENMP "Enable/Disable OpenMP in tests/examples" OFF)
if(EIGEN_TEST_OPENMP)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
message("Enabling OpenMP in tests/examples")
message(STATUS "Enabling OpenMP in tests/examples")
endif()
endif()
@@ -183,7 +191,7 @@ if(MSVC)
option(EIGEN_TEST_OPENMP "Enable/Disable OpenMP in tests/examples" OFF)
if(EIGEN_TEST_OPENMP)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /openmp")
message("Enabling OpenMP in tests/examples")
message(STATUS "Enabling OpenMP in tests/examples")
endif()
endif()
@@ -193,32 +201,42 @@ if(MSVC)
# arch is not supported on 64 bit systems, SSE is enabled automatically.
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:SSE2")
endif(NOT CMAKE_CL_64)
message("Enabling SSE2 in tests/examples")
message(STATUS "Enabling SSE2 in tests/examples")
endif(EIGEN_TEST_SSE2)
endif(MSVC)
option(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION "Disable explicit vectorization in tests/examples" OFF)
option(EIGEN_TEST_X87 "Force using X87 instructions. Implies no vectorization." OFF)
option(EIGEN_TEST_32BIT "Force generating 32bit code." OFF)
if(EIGEN_TEST_X87)
set(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION ON)
if(CMAKE_COMPILER_IS_GNUCXX)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpmath=387")
message("Forcing use of x87 instructions in tests/examples")
message(STATUS "Forcing use of x87 instructions in tests/examples")
else()
message("EIGEN_TEST_X87 ignored on your compiler")
message(STATUS "EIGEN_TEST_X87 ignored on your compiler")
endif()
endif()
if(EIGEN_TEST_32BIT)
if(CMAKE_COMPILER_IS_GNUCXX)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32")
message(STATUS "Forcing generation of 32-bit code in tests/examples")
else()
message(STATUS "EIGEN_TEST_32BIT ignored on your compiler")
endif()
endif()
if(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION)
add_definitions(-DEIGEN_DONT_VECTORIZE=1)
message("Disabling vectorization in tests/examples")
message(STATUS "Disabling vectorization in tests/examples")
endif()
option(EIGEN_TEST_NO_EXPLICIT_ALIGNMENT "Disable explicit alignment (hence vectorization) in tests/examples" OFF)
if(EIGEN_TEST_NO_EXPLICIT_ALIGNMENT)
add_definitions(-DEIGEN_DONT_ALIGN=1)
message("Disabling alignment in tests/examples")
message(STATUS "Disabling alignment in tests/examples")
endif()
option(EIGEN_TEST_C++0x "Enables all C++0x features." OFF)
@@ -261,9 +279,21 @@ install(FILES
)
if(EIGEN_BUILD_PKGCONFIG)
SET(path_separator ":")
STRING(REPLACE ${path_separator} ";" pkg_config_libdir_search "$ENV{PKG_CONFIG_LIBDIR}")
message(STATUS "searching for 'pkgconfig' directory in PKG_CONFIG_LIBDIR ( $ENV{PKG_CONFIG_LIBDIR} ), ${CMAKE_INSTALL_PREFIX}/share, and ${CMAKE_INSTALL_PREFIX}/lib")
FIND_PATH(pkg_config_libdir pkgconfig ${pkg_config_libdir_search} ${CMAKE_INSTALL_PREFIX}/share ${CMAKE_INSTALL_PREFIX}/lib ${pkg_config_libdir_search})
if(pkg_config_libdir)
SET(pkg_config_install_dir ${pkg_config_libdir})
message(STATUS "found ${pkg_config_libdir}/pkgconfig" )
else(pkg_config_libdir)
SET(pkg_config_install_dir ${CMAKE_INSTALL_PREFIX}/share)
message(STATUS "pkgconfig not found; installing in ${pkg_config_install_dir}" )
endif(pkg_config_libdir)
configure_file(eigen3.pc.in eigen3.pc)
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/eigen3.pc
DESTINATION share/pkgconfig
DESTINATION ${pkg_config_install_dir}/pkgconfig
)
endif(EIGEN_BUILD_PKGCONFIG)
@@ -271,23 +301,66 @@ add_subdirectory(Eigen)
add_subdirectory(doc EXCLUDE_FROM_ALL)
add_custom_target(buildtests)
add_custom_target(check COMMAND "ctest")
add_dependencies(check buildtests)
# CMake/Ctest does not allow us to change the build command,
# so we have to workaround by directly editing the generated DartConfiguration.tcl file
# save CMAKE_MAKE_PROGRAM
set(CMAKE_MAKE_PROGRAM_SAVE ${CMAKE_MAKE_PROGRAM})
# and set a fake one
set(CMAKE_MAKE_PROGRAM "@EIGEN_MAKECOMMAND_PLACEHOLDER@")
include(CTest)
enable_testing() # must be called from the root CMakeLists, see man page
include(EigenTesting)
ei_init_testing()
# overwrite default DartConfiguration.tcl
# The worarounds are different for each version of the MSVC IDE
if(MSVC_IDE)
if(MSVC_VERSION EQUAL 1600) # MSVC 2010
set(EIGEN_MAKECOMMAND_PLACEHOLDER "${CMAKE_MAKE_PROGRAM_SAVE} buildtests.vcxproj /p:Configuration=\${CTEST_CONFIGURATION_TYPE} \n # ")
else() # MSVC 2008 (TODO check MSVC 2005)
set(EIGEN_MAKECOMMAND_PLACEHOLDER "${CMAKE_MAKE_PROGRAM_SAVE} /project buildtests")
endif()
else()
# for make and nmake
set(EIGEN_MAKECOMMAND_PLACEHOLDER "${CMAKE_MAKE_PROGRAM_SAVE} buildtests")
endif()
configure_file(${CMAKE_BINARY_DIR}/DartConfiguration.tcl ${CMAKE_BINARY_DIR}/DartConfiguration.tcl)
# restore default CMAKE_MAKE_PROGRAM
set(CMAKE_MAKE_PROGRAM ${CMAKE_MAKE_PROGRAM_SAVE})
# un-set temporary variables so that it is like they never existed.
# CMake 2.6.3 introduces the more logical unset() syntax for this.
set(CMAKE_MAKE_PROGRAM_SAVE)
set(EIGEN_MAKECOMMAND_PLACEHOLDER)
configure_file(${CMAKE_SOURCE_DIR}/CTestCustom.cmake.in ${CMAKE_BINARY_DIR}/CTestCustom.cmake)
if(EIGEN_LEAVE_TEST_IN_ALL_TARGET)
add_subdirectory(test) # can't do EXCLUDE_FROM_ALL here, breaks CTest
else()
add_subdirectory(test EXCLUDE_FROM_ALL)
endif()
if(NOT MSVC)
if(EIGEN_LEAVE_TEST_IN_ALL_TARGET)
add_subdirectory(blas)
add_subdirectory(lapack)
else()
add_subdirectory(blas EXCLUDE_FROM_ALL)
add_subdirectory(lapack EXCLUDE_FROM_ALL)
endif()
endif(NOT MSVC)
add_subdirectory(unsupported)
add_subdirectory(demos EXCLUDE_FROM_ALL)
add_subdirectory(blas EXCLUDE_FROM_ALL)
# must be after test and unsupported, for configuring buildtests.in
add_subdirectory(scripts EXCLUDE_FROM_ALL)
@@ -298,30 +371,35 @@ endif(EIGEN_BUILD_BTL)
ei_testing_print_summary()
message("")
message("Configured Eigen ${EIGEN_VERSION_NUMBER}")
message("")
message(STATUS "")
message(STATUS "Configured Eigen ${EIGEN_VERSION_NUMBER}")
message(STATUS "")
option(EIGEN_FAILTEST "Enable failtests." OFF)
if(EIGEN_FAILTEST)
add_subdirectory(failtest)
endif()
string(TOLOWER "${CMAKE_GENERATOR}" cmake_generator_tolower)
if(cmake_generator_tolower MATCHES "makefile")
message("Some things you can do now:")
message("--------------+----------------------------------------------------------------")
message("Command | Description")
message("--------------+----------------------------------------------------------------")
message("make install | Install to ${CMAKE_INSTALL_PREFIX}. To change that:")
message(" | cmake . -DCMAKE_INSTALL_PREFIX=yourpath")
message(" | Eigen headers will then be installed to:")
message(" | ${INCLUDE_INSTALL_DIR}")
message(" | To install Eigen headers to a separate location, do:")
message(" | cmake . -DEIGEN_INCLUDE_INSTALL_DIR=yourpath")
message("make doc | Generate the API documentation, requires Doxygen & LaTeX")
message("make check | Build and run the unit-tests. Read this page:")
message(" | http://eigen.tuxfamily.org/index.php?title=Tests")
message("make blas | Build BLAS library (not the same thing as Eigen)")
message("--------------+----------------------------------------------------------------")
message(STATUS "Some things you can do now:")
message(STATUS "--------------+--------------------------------------------------------------")
message(STATUS "Command | Description")
message(STATUS "--------------+--------------------------------------------------------------")
message(STATUS "make install | Install to ${CMAKE_INSTALL_PREFIX}. To change that:")
message(STATUS " | cmake . -DCMAKE_INSTALL_PREFIX=yourpath")
message(STATUS " | Eigen headers will then be installed to:")
message(STATUS " | ${INCLUDE_INSTALL_DIR}")
message(STATUS " | To install Eigen headers to a separate location, do:")
message(STATUS " | cmake . -DEIGEN_INCLUDE_INSTALL_DIR=yourpath")
message(STATUS "make doc | Generate the API documentation, requires Doxygen & LaTeX")
message(STATUS "make check | Build and run the unit-tests. Read this page:")
message(STATUS " | http://eigen.tuxfamily.org/index.php?title=Tests")
message(STATUS "make blas | Build BLAS library (not the same thing as Eigen)")
message(STATUS "--------------+--------------------------------------------------------------")
else()
message("To build/run the unit tests, read this page:")
message(" http://eigen.tuxfamily.org/index.php?title=Tests")
message(STATUS "To build/run the unit tests, read this page:")
message(STATUS " http://eigen.tuxfamily.org/index.php?title=Tests")
endif()
message("")
message(STATUS "")

View File

@@ -5,13 +5,9 @@
## ENABLE_TESTING()
## INCLUDE(CTest)
set(CTEST_PROJECT_NAME "Eigen")
set(CTEST_NIGHTLY_START_TIME "06:00:00 UTC")
set(CTEST_NIGHTLY_START_TIME "00:00:00 UTC")
set(CTEST_DROP_METHOD "http")
set(CTEST_DROP_SITE "eigen.tuxfamily.org")
set(CTEST_DROP_SITE "manao.inria.fr")
set(CTEST_DROP_LOCATION "/CDash/submit.php?project=Eigen")
set(CTEST_DROP_SITE_CDASH TRUE)
## A tribute to Dynamic!
set(CTEST_CUSTOM_MAXIMUM_NUMBER_OF_WARNINGS "33331")
set(CTEST_CUSTOM_MAXIMUM_NUMBER_OF_ERRORS "33331")

4
CTestCustom.cmake.in Normal file
View File

@@ -0,0 +1,4 @@
## A tribute to Dynamic!
set(CTEST_CUSTOM_MAXIMUM_NUMBER_OF_WARNINGS "33331")
set(CTEST_CUSTOM_MAXIMUM_NUMBER_OF_ERRORS "33331")

View File

@@ -1,14 +1,11 @@
#ifndef EIGEN_ARRAY_MODULE_H
#define EIGEN_ARRAY_MODULE_H
#ifdef _MSC_VER
#pragma message("The inclusion of Eigen/Array is deprecated. \
The array module is available as soon as Eigen/Core is included.")
#elif __GNUC__
#warning "The inclusion of Eigen/Array is deprecated. \
The array module is available as soon as Eigen/Core is included."
#endif
// include Core first to handle Eigen2 support macros
#include "Core"
#ifndef EIGEN2_SUPPORT
#error The Eigen/Array header does no longer exist in Eigen3. All that functionality has moved to Eigen/Core.
#endif
#endif // EIGEN_ARRAY_MODULE_H

View File

@@ -1,6 +1,12 @@
include(RegexUtils)
test_escape_string_as_regex()
file(GLOB Eigen_directory_files "*")
escape_string_as_regex(ESCAPED_CMAKE_CURRENT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
foreach(f ${Eigen_directory_files})
if(NOT f MATCHES ".txt" AND NOT f MATCHES "${CMAKE_CURRENT_SOURCE_DIR}/src")
if(NOT f MATCHES "\\.txt" AND NOT f MATCHES "${ESCAPED_CMAKE_CURRENT_SOURCE_DIR}/[.].+" AND NOT f MATCHES "${ESCAPED_CMAKE_CURRENT_SOURCE_DIR}/src")
list(APPEND Eigen_directory_files_to_install ${f})
endif()
endforeach(f ${Eigen_directory_files})

View File

@@ -3,7 +3,7 @@
#include "Core"
#include "src/Core/util/DisableMSVCWarnings.h"
#include "src/Core/util/DisableStupidWarnings.h"
namespace Eigen {
@@ -27,7 +27,7 @@ namespace Eigen {
} // namespace Eigen
#include "src/Core/util/EnableMSVCWarnings.h"
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_CHOLESKY_MODULE_H
/* vim: set filetype=cpp et sw=2 ts=2 ai: */

View File

@@ -2,7 +2,7 @@
// for linear algebra.
//
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2007-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
// Copyright (C) 2007-2011 Benoit Jacob <jacob.benoit.1@gmail.com>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
@@ -26,21 +26,13 @@
#ifndef EIGEN_CORE_H
#define EIGEN_CORE_H
#define EIGEN_NO_STATIC_ASSERT
// first thing Eigen does: prevent MSVC from committing suicide
#include "src/Core/util/DisableMSVCWarnings.h"
// first thing Eigen does: stop the compiler from committing suicide
#include "src/Core/util/DisableStupidWarnings.h"
// then include this file where all our macros are defined. It's really important to do it first because
// it's where we do all the alignment settings (platform detection and honoring the user's will if he
// defined e.g. EIGEN_DONT_ALIGN) so it needs to be done before we do anything with vectorization.
#ifndef EIGEN_PARSED_BY_DOXYGEN
#include "src/Core/util/Macros.h"
#else
namespace Eigen { // for some reason Doxygen needs this namespace
#include "src/Core/util/Macros.h"
}
#endif
#include "src/Core/util/Macros.h"
// if alignment is disabled, then disable vectorization. Note: EIGEN_ALIGN is the proper check, it takes into
// account both the user's will (EIGEN_DONT_ALIGN) and our own platform checks
@@ -59,16 +51,16 @@
#define EIGEN_SSE2_ON_MSVC_2008_OR_LATER
#endif
#endif
#endif
// Remember that usage of defined() in a #define is undefined by the standard
#if (defined __SSE2__) && ( (!defined __GNUC__) || EIGEN_GNUC_AT_LEAST(4,2) )
#define EIGEN_SSE2_BUT_NOT_OLD_GCC
#else
// Remember that usage of defined() in a #define is undefined by the standard
#if (defined __SSE2__) && ( (!defined __GNUC__) || EIGEN_GNUC_AT_LEAST(4,2) )
#define EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC
#endif
#endif
#ifndef EIGEN_DONT_VECTORIZE
#if defined (EIGEN_SSE2_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)
#if defined (EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)
// Defines symbols for compile-time detection of which instructions are
// used.
@@ -95,28 +87,30 @@
#endif
// include files
#if (defined __GNUC__) && (defined __MINGW32__)
#include <intrin.h>
//including intrin.h works around a MINGW bug http://sourceforge.net/tracker/?func=detail&atid=102435&aid=2962480&group_id=2435
//in essence, intrin.h is included by windows.h and also declares intrinsics (just as emmintrin.h etc. below do). However,
//intrin.h uses an extern "C" declaration, and g++ thus complains of duplicate declarations with conflicting linkage. The linkage for intrinsics
//doesn't matter, but at that stage the compiler doesn't know; so, to avoid compile errors when windows.h is included after Eigen/Core,
//include intrin here.
#endif
#include <emmintrin.h>
#include <xmmintrin.h>
#ifdef EIGEN_VECTORIZE_SSE3
// This extern "C" works around a MINGW-w64 compilation issue
// https://sourceforge.net/tracker/index.php?func=detail&aid=3018394&group_id=202880&atid=983354
// In essence, intrin.h is included by windows.h and also declares intrinsics (just as emmintrin.h etc. below do).
// However, intrin.h uses an extern "C" declaration, and g++ thus complains of duplicate declarations
// with conflicting linkage. The linkage for intrinsics doesn't matter, but at that stage the compiler doesn't know;
// so, to avoid compile errors when windows.h is included after Eigen/Core, ensure intrinsics are extern "C" here too.
// notice that since these are C headers, the extern "C" is theoretically needed anyways.
extern "C" {
#include <emmintrin.h>
#include <xmmintrin.h>
#ifdef EIGEN_VECTORIZE_SSE3
#include <pmmintrin.h>
#endif
#ifdef EIGEN_VECTORIZE_SSSE3
#endif
#ifdef EIGEN_VECTORIZE_SSSE3
#include <tmmintrin.h>
#endif
#ifdef EIGEN_VECTORIZE_SSE4_1
#endif
#ifdef EIGEN_VECTORIZE_SSE4_1
#include <smmintrin.h>
#endif
#ifdef EIGEN_VECTORIZE_SSE4_2
#endif
#ifdef EIGEN_VECTORIZE_SSE4_2
#include <nmmintrin.h>
#endif
#endif
} // end extern "C"
#elif defined __ALTIVEC__
#define EIGEN_VECTORIZE
#define EIGEN_VECTORIZE_ALTIVEC
@@ -142,13 +136,14 @@
#endif
// MSVC for windows mobile does not have the errno.h file
#if !(defined(_MSC_VER) && defined(_WIN32_WCE))
#if !(defined(_MSC_VER) && defined(_WIN32_WCE)) && !defined(__ARMCC_VERSION)
#define EIGEN_HAS_ERRNO
#endif
#ifdef EIGEN_HAS_ERRNO
#include <cerrno>
#endif
#include <cstddef>
#include <cstdlib>
#include <cmath>
#include <complex>
@@ -158,20 +153,21 @@
#include <cstring>
#include <string>
#include <limits>
#include <climits> // for CHAR_BIT
// for min/max:
#include <algorithm>
// for outputting debug info
#ifdef EIGEN_DEBUG_ASSIGN
#include<iostream>
#include <iostream>
#endif
// required for __cpuid, needs to be included after cmath
#if defined(_MSC_VER) && (defined(_M_IX86)||defined(_M_IX64))
#if defined(_MSC_VER) && (defined(_M_IX86)||defined(_M_X64))
#include <intrin.h>
#endif
#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(EIGEN_NO_EXCEPTIONS)
#if defined(_CPPUNWIND) || defined(__EXCEPTIONS)
#define EIGEN_EXCEPTIONS
#endif
@@ -179,16 +175,10 @@
#include <new>
#endif
// this needs to be done after all possible windows C header includes and before any Eigen source includes
// (system C++ includes are supposed to be able to deal with this already):
// windows.h defines min and max macros which would make Eigen fail to compile.
#if defined(min) || defined(max)
#error The preprocessor symbols 'min' or 'max' are defined. If you are compiling on Windows, do #define NOMINMAX to prevent windows.h from defining these symbols.
#endif
// defined in bits/termios.h
#undef B0
/** \brief Namespace containing all symbols from the %Eigen library. */
namespace Eigen {
inline static const char *SimdInstructionSetsInUse(void) {
@@ -211,6 +201,32 @@ inline static const char *SimdInstructionSetsInUse(void) {
#endif
}
#define STAGE10_FULL_EIGEN2_API 10
#define STAGE20_RESOLVE_API_CONFLICTS 20
#define STAGE30_FULL_EIGEN3_API 30
#define STAGE40_FULL_EIGEN3_STRICTNESS 40
#define STAGE99_NO_EIGEN2_SUPPORT 99
#if defined EIGEN2_SUPPORT_STAGE40_FULL_EIGEN3_STRICTNESS
#define EIGEN2_SUPPORT
#define EIGEN2_SUPPORT_STAGE STAGE40_FULL_EIGEN3_STRICTNESS
#elif defined EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API
#define EIGEN2_SUPPORT
#define EIGEN2_SUPPORT_STAGE STAGE30_FULL_EIGEN3_API
#elif defined EIGEN2_SUPPORT_STAGE20_RESOLVE_API_CONFLICTS
#define EIGEN2_SUPPORT
#define EIGEN2_SUPPORT_STAGE STAGE20_RESOLVE_API_CONFLICTS
#elif defined EIGEN2_SUPPORT_STAGE10_FULL_EIGEN2_API
#define EIGEN2_SUPPORT
#define EIGEN2_SUPPORT_STAGE STAGE10_FULL_EIGEN2_API
#elif defined EIGEN2_SUPPORT
// default to stage 3, that's what it's always meant
#define EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API
#define EIGEN2_SUPPORT_STAGE STAGE30_FULL_EIGEN3_API
#else
#define EIGEN2_SUPPORT_STAGE STAGE99_NO_EIGEN2_SUPPORT
#endif
#ifdef EIGEN2_SUPPORT
#undef minor
#endif
@@ -218,6 +234,8 @@ inline static const char *SimdInstructionSetsInUse(void) {
// we use size_t frequently and we'll never remember to prepend it with std:: everytime just to
// ensure QNX/QCC support
using std::size_t;
// gcc 4.6.0 wants std:: for ptrdiff_t
using std::ptrdiff_t;
/** \defgroup Core_Module Core module
* This is the main module of Eigen providing dense matrix and vector support
@@ -266,13 +284,14 @@ using std::size_t;
#endif
#include "src/Core/util/BlasUtil.h"
#include "src/Core/MatrixStorage.h"
#include "src/Core/DenseStorage.h"
#include "src/Core/NestByValue.h"
#include "src/Core/ForceAlignedAccess.h"
#include "src/Core/ReturnByValue.h"
#include "src/Core/NoAlias.h"
#include "src/Core/DenseStorageBase.h"
#include "src/Core/PlainObjectBase.h"
#include "src/Core/Matrix.h"
#include "src/Core/Array.h"
#include "src/Core/CwiseBinaryOp.h"
#include "src/Core/CwiseUnaryOp.h"
#include "src/Core/CwiseNullaryOp.h"
@@ -308,6 +327,7 @@ using std::size_t;
#include "src/Core/products/GeneralBlockPanelKernel.h"
#include "src/Core/products/GeneralMatrixVector.h"
#include "src/Core/products/GeneralMatrixMatrix.h"
#include "src/Core/products/GeneralMatrixMatrixTriangular.h"
#include "src/Core/products/SelfadjointMatrixVector.h"
#include "src/Core/products/SelfadjointMatrixMatrix.h"
#include "src/Core/products/SelfadjointProduct.h"
@@ -315,6 +335,7 @@ using std::size_t;
#include "src/Core/products/TriangularMatrixVector.h"
#include "src/Core/products/TriangularMatrixMatrix.h"
#include "src/Core/products/TriangularSolverMatrix.h"
#include "src/Core/products/TriangularSolverVector.h"
#include "src/Core/BandMatrix.h"
#include "src/Core/BooleanRedux.h"
@@ -325,13 +346,12 @@ using std::size_t;
#include "src/Core/Reverse.h"
#include "src/Core/ArrayBase.h"
#include "src/Core/ArrayWrapper.h"
#include "src/Core/Array.h"
} // namespace Eigen
#include "src/Core/GlobalFunctions.h"
#include "src/Core/util/EnableMSVCWarnings.h"
#include "src/Core/util/ReenableStupidWarnings.h"
#ifdef EIGEN2_SUPPORT
#include "Eigen2Support"

View File

@@ -1,2 +1,2 @@
#include "Dense"
#include "Sparse"
//#include "Sparse"

View File

@@ -29,7 +29,7 @@
#error Eigen2 support must be enabled by defining EIGEN2_SUPPORT before including any Eigen header
#endif
#include "src/Core/util/DisableMSVCWarnings.h"
#include "src/Core/util/DisableStupidWarnings.h"
namespace Eigen {
@@ -43,6 +43,9 @@ namespace Eigen {
*
*/
#include "src/Eigen2Support/Macros.h"
#include "src/Eigen2Support/Memory.h"
#include "src/Eigen2Support/Meta.h"
#include "src/Eigen2Support/Lazy.h"
#include "src/Eigen2Support/Cwise.h"
#include "src/Eigen2Support/CwiseOperators.h"
@@ -50,11 +53,12 @@ namespace Eigen {
#include "src/Eigen2Support/Block.h"
#include "src/Eigen2Support/VectorBlock.h"
#include "src/Eigen2Support/Minor.h"
#include "src/Eigen2Support/MathFunctions.h"
} // namespace Eigen
#include "src/Core/util/EnableMSVCWarnings.h"
#include "src/Core/util/ReenableStupidWarnings.h"
// Eigen2 used to include iostream
#include<iostream>

View File

@@ -3,7 +3,7 @@
#include "Core"
#include "src/Core/util/DisableMSVCWarnings.h"
#include "src/Core/util/DisableStupidWarnings.h"
#include "Cholesky"
#include "Jacobi"
@@ -38,7 +38,7 @@ namespace Eigen {
} // namespace Eigen
#include "src/Core/util/EnableMSVCWarnings.h"
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_EIGENVALUES_MODULE_H
/* vim: set filetype=cpp et sw=2 ts=2 ai: */

View File

@@ -3,7 +3,7 @@
#include "Core"
#include "src/Core/util/DisableMSVCWarnings.h"
#include "src/Core/util/DisableStupidWarnings.h"
#include "SVD"
#include "LU"
@@ -33,27 +33,34 @@ namespace Eigen {
*/
#include "src/Geometry/OrthoMethods.h"
#include "src/Geometry/Homogeneous.h"
#include "src/Geometry/RotationBase.h"
#include "src/Geometry/Rotation2D.h"
#include "src/Geometry/Quaternion.h"
#include "src/Geometry/AngleAxis.h"
#include "src/Geometry/EulerAngles.h"
#include "src/Geometry/Transform.h"
#include "src/Geometry/Translation.h"
#include "src/Geometry/Scaling.h"
#include "src/Geometry/Hyperplane.h"
#include "src/Geometry/ParametrizedLine.h"
#include "src/Geometry/AlignedBox.h"
#include "src/Geometry/Umeyama.h"
#if defined EIGEN_VECTORIZE_SSE
#include "src/Geometry/arch/Geometry_SSE.h"
#if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS
#include "src/Geometry/Homogeneous.h"
#include "src/Geometry/RotationBase.h"
#include "src/Geometry/Rotation2D.h"
#include "src/Geometry/Quaternion.h"
#include "src/Geometry/AngleAxis.h"
#include "src/Geometry/Transform.h"
#include "src/Geometry/Translation.h"
#include "src/Geometry/Scaling.h"
#include "src/Geometry/Hyperplane.h"
#include "src/Geometry/ParametrizedLine.h"
#include "src/Geometry/AlignedBox.h"
#include "src/Geometry/Umeyama.h"
#if defined EIGEN_VECTORIZE_SSE
#include "src/Geometry/arch/Geometry_SSE.h"
#endif
#endif
#ifdef EIGEN2_SUPPORT
#include "src/Eigen2Support/Geometry/All.h"
#endif
} // namespace Eigen
#include "src/Core/util/EnableMSVCWarnings.h"
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_GEOMETRY_MODULE_H
/* vim: set filetype=cpp et sw=2 ts=2 ai: */

View File

@@ -3,7 +3,7 @@
#include "Core"
#include "src/Core/util/DisableMSVCWarnings.h"
#include "src/Core/util/DisableStupidWarnings.h"
namespace Eigen {
@@ -21,7 +21,7 @@ namespace Eigen {
} // namespace Eigen
#include "src/Core/util/EnableMSVCWarnings.h"
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_HOUSEHOLDER_MODULE_H
/* vim: set filetype=cpp et sw=2 ts=2 ai: */

View File

@@ -3,7 +3,7 @@
#include "Core"
#include "src/Core/util/DisableMSVCWarnings.h"
#include "src/Core/util/DisableStupidWarnings.h"
namespace Eigen {
@@ -23,7 +23,7 @@ namespace Eigen {
} // namespace Eigen
#include "src/Core/util/EnableMSVCWarnings.h"
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_JACOBI_MODULE_H
/* vim: set filetype=cpp et sw=2 ts=2 ai: */

View File

@@ -3,7 +3,7 @@
#include "Core"
#include "src/Core/util/DisableMSVCWarnings.h"
#include "src/Core/util/DisableStupidWarnings.h"
namespace Eigen {
@@ -30,9 +30,13 @@ namespace Eigen {
#include "src/LU/arch/Inverse_SSE.h"
#endif
#ifdef EIGEN2_SUPPORT
#include "src/Eigen2Support/LU.h"
#endif
} // namespace Eigen
#include "src/Core/util/EnableMSVCWarnings.h"
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_LU_MODULE_H
/* vim: set filetype=cpp et sw=2 ts=2 ai: */

36
Eigen/LeastSquares Normal file
View File

@@ -0,0 +1,36 @@
#ifndef EIGEN_REGRESSION_MODULE_H
#define EIGEN_REGRESSION_MODULE_H
#ifndef EIGEN2_SUPPORT
#error LeastSquares is only available in Eigen2 support mode (define EIGEN2_SUPPORT)
#endif
// exclude from normal eigen3-only documentation
#ifdef EIGEN2_SUPPORT
#include "Core"
#include "src/Core/util/DisableStupidWarnings.h"
#include "Eigenvalues"
#include "Geometry"
namespace Eigen {
/** \defgroup LeastSquares_Module LeastSquares module
* This module provides linear regression and related features.
*
* \code
* #include <Eigen/LeastSquares>
* \endcode
*/
#include "src/Eigen2Support/LeastSquares.h"
} // namespace Eigen
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN2_SUPPORT
#endif // EIGEN_REGRESSION_MODULE_H

View File

@@ -3,7 +3,7 @@
#include "Core"
#include "src/Core/util/DisableMSVCWarnings.h"
#include "src/Core/util/DisableStupidWarnings.h"
#include "Cholesky"
#include "Jacobi"
@@ -29,13 +29,17 @@ namespace Eigen {
#include "src/QR/FullPivHouseholderQR.h"
#include "src/QR/ColPivHouseholderQR.h"
#ifdef EIGEN2_SUPPORT
#include "src/Eigen2Support/QR.h"
#endif
} // namespace Eigen
#include "src/Core/util/EnableMSVCWarnings.h"
#include "src/Core/util/ReenableStupidWarnings.h"
// FIXME for compatibility we include Eigenvalues here:
#ifdef EIGEN2_SUPPORT
#include "Eigenvalues"
#endif
#endif // EIGEN_QR_MODULE_H
/* vim: set filetype=cpp et sw=2 ts=2 ai: */

View File

@@ -6,27 +6,27 @@
#if (!EIGEN_MALLOC_ALREADY_ALIGNED)
#include "src/Core/util/DisableMSVCWarnings.h"
#include "src/Core/util/DisableStupidWarnings.h"
void *qMalloc(size_t size)
{
return Eigen::ei_aligned_malloc(size);
return Eigen::internal::aligned_malloc(size);
}
void qFree(void *ptr)
{
Eigen::ei_aligned_free(ptr);
Eigen::internal::aligned_free(ptr);
}
void *qRealloc(void *ptr, size_t size)
{
void* newPtr = Eigen::ei_aligned_malloc(size);
void* newPtr = Eigen::internal::aligned_malloc(size);
memcpy(newPtr, ptr, size);
Eigen::ei_aligned_free(ptr);
Eigen::internal::aligned_free(ptr);
return newPtr;
}
#include "src/Core/util/EnableMSVCWarnings.h"
#include "src/Core/util/ReenableStupidWarnings.h"
#endif

View File

@@ -5,7 +5,7 @@
#include "Householder"
#include "Jacobi"
#include "src/Core/util/DisableMSVCWarnings.h"
#include "src/Core/util/DisableStupidWarnings.h"
namespace Eigen {
@@ -13,9 +13,9 @@ namespace Eigen {
*
*
*
* This module provides SVD decomposition for (currently) real matrices.
* This module provides SVD decomposition for matrices (both real and complex).
* This decomposition is accessible via the following MatrixBase method:
* - MatrixBase::svd()
* - MatrixBase::jacobiSvd()
*
* \code
* #include <Eigen/SVD>
@@ -26,9 +26,13 @@ namespace Eigen {
#include "src/SVD/JacobiSVD.h"
#include "src/SVD/UpperBidiagonalization.h"
#ifdef EIGEN2_SUPPORT
#include "src/Eigen2Support/SVD.h"
#endif
} // namespace Eigen
#include "src/Core/util/EnableMSVCWarnings.h"
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_SVD_MODULE_H
/* vim: set filetype=cpp et sw=2 ts=2 ai: */

View File

@@ -3,7 +3,7 @@
#include "Core"
#include "src/Core/util/DisableMSVCWarnings.h"
#include "src/Core/util/DisableStupidWarnings.h"
#include <vector>
#include <map>
@@ -11,6 +11,14 @@
#include <cstring>
#include <algorithm>
#ifdef EIGEN2_SUPPORT
#define EIGEN_YES_I_KNOW_SPARSE_MODULE_IS_NOT_STABLE_YET
#endif
#ifndef EIGEN_YES_I_KNOW_SPARSE_MODULE_IS_NOT_STABLE_YET
#error The sparse module API is not stable yet. To use it anyway, please define the EIGEN_YES_I_KNOW_SPARSE_MODULE_IS_NOT_STABLE_YET preprocessor token.
#endif
namespace Eigen {
/** \defgroup Sparse_Module Sparse module
@@ -55,7 +63,7 @@ struct Sparse {};
} // namespace Eigen
#include "src/Core/util/EnableMSVCWarnings.h"
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_SPARSE_MODULE_H

View File

@@ -1,6 +1,7 @@
file(GLOB Eigen_src_subdirectories "*")
escape_string_as_regex(ESCAPED_CMAKE_CURRENT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
foreach(f ${Eigen_src_subdirectories})
if(NOT f MATCHES ".txt")
if(NOT f MATCHES "\\.txt" AND NOT f MATCHES "${ESCAPED_CMAKE_CURRENT_SOURCE_DIR}/[.].+" )
add_subdirectory(${f})
endif()
endforeach()

View File

@@ -27,7 +27,9 @@
#ifndef EIGEN_LDLT_H
#define EIGEN_LDLT_H
namespace internal {
template<typename MatrixType, int UpLo> struct LDLT_Traits;
}
/** \ingroup cholesky_Module
*
@@ -74,7 +76,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
typedef Transpositions<RowsAtCompileTime, MaxRowsAtCompileTime> TranspositionType;
typedef PermutationMatrix<RowsAtCompileTime, MaxRowsAtCompileTime> PermutationType;
typedef LDLT_Traits<MatrixType,UpLo> Traits;
typedef internal::LDLT_Traits<MatrixType,UpLo> Traits;
/** \brief Default Constructor.
*
@@ -108,14 +110,14 @@ template<typename _MatrixType, int _UpLo> class LDLT
/** \returns a view of the upper triangular matrix U */
inline typename Traits::MatrixU matrixU() const
{
ei_assert(m_isInitialized && "LDLT is not initialized.");
eigen_assert(m_isInitialized && "LDLT is not initialized.");
return Traits::getU(m_matrix);
}
/** \returns a view of the lower triangular matrix L */
inline typename Traits::MatrixL matrixL() const
{
ei_assert(m_isInitialized && "LDLT is not initialized.");
eigen_assert(m_isInitialized && "LDLT is not initialized.");
return Traits::getL(m_matrix);
}
@@ -123,47 +125,72 @@ template<typename _MatrixType, int _UpLo> class LDLT
*/
inline const TranspositionType& transpositionsP() const
{
ei_assert(m_isInitialized && "LDLT is not initialized.");
eigen_assert(m_isInitialized && "LDLT is not initialized.");
return m_transpositions;
}
/** \returns the coefficients of the diagonal matrix D */
inline Diagonal<MatrixType,0> vectorD(void) const
inline Diagonal<const MatrixType> vectorD(void) const
{
ei_assert(m_isInitialized && "LDLT is not initialized.");
eigen_assert(m_isInitialized && "LDLT is not initialized.");
return m_matrix.diagonal();
}
/** \returns true if the matrix is positive (semidefinite) */
inline bool isPositive(void) const
{
ei_assert(m_isInitialized && "LDLT is not initialized.");
eigen_assert(m_isInitialized && "LDLT is not initialized.");
return m_sign == 1;
}
#ifdef EIGEN2_SUPPORT
inline bool isPositiveDefinite() const
{
return isPositive();
}
#endif
/** \returns true if the matrix is negative (semidefinite) */
inline bool isNegative(void) const
{
ei_assert(m_isInitialized && "LDLT is not initialized.");
eigen_assert(m_isInitialized && "LDLT is not initialized.");
return m_sign == -1;
}
/** \returns a solution x of \f$ A x = b \f$ using the current decomposition of A.
*
* This function also supports in-place solves using the syntax <tt>x = decompositionObject.solve(x)</tt> .
*
* \note_about_checking_solutions
*
* \sa solveInPlace(), MatrixBase::ldlt()
* More precisely, this method solves \f$ A x = b \f$ using the decomposition \f$ A = P^T L D L^* P \f$
* by solving the systems \f$ P^T y_1 = b \f$, \f$ L y_2 = y_1 \f$, \f$ D y_3 = y_2 \f$,
* \f$ L^* y_4 = y_3 \f$ and \f$ P x = y_4 \f$ in succession. If the matrix \f$ A \f$ is singular, then
* \f$ D \f$ will also be singular (all the other matrices are invertible). In that case, the
* least-square solution of \f$ D y_3 = y_2 \f$ is computed. This does not mean that this function
* computes the least-square solution of \f$ A x = b \f$ is \f$ A \f$ is singular.
*
* \sa MatrixBase::ldlt()
*/
template<typename Rhs>
inline const ei_solve_retval<LDLT, Rhs>
inline const internal::solve_retval<LDLT, Rhs>
solve(const MatrixBase<Rhs>& b) const
{
ei_assert(m_isInitialized && "LDLT is not initialized.");
ei_assert(m_matrix.rows()==b.rows()
eigen_assert(m_isInitialized && "LDLT is not initialized.");
eigen_assert(m_matrix.rows()==b.rows()
&& "LDLT::solve(): invalid number of rows of the right hand side matrix b");
return ei_solve_retval<LDLT, Rhs>(*this, b.derived());
return internal::solve_retval<LDLT, Rhs>(*this, b.derived());
}
#ifdef EIGEN2_SUPPORT
template<typename OtherDerived, typename ResultType>
bool solve(const MatrixBase<OtherDerived>& b, ResultType *result) const
{
*result = this->solve(b);
return true;
}
#endif
template<typename Derived>
bool solveInPlace(MatrixBase<Derived> &bAndX) const;
@@ -175,7 +202,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
*/
inline const MatrixType& matrixLDLT() const
{
ei_assert(m_isInitialized && "LDLT is not initialized.");
eigen_assert(m_isInitialized && "LDLT is not initialized.");
return m_matrix;
}
@@ -199,9 +226,11 @@ template<typename _MatrixType, int _UpLo> class LDLT
bool m_isInitialized;
};
template<int UpLo> struct ei_ldlt_inplace;
namespace internal {
template<> struct ei_ldlt_inplace<Lower>
template<int UpLo> struct ldlt_inplace;
template<> struct ldlt_inplace<Lower>
{
template<typename MatrixType, typename TranspositionType, typename Workspace>
static bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, int* sign=0)
@@ -209,14 +238,14 @@ template<> struct ei_ldlt_inplace<Lower>
typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::RealScalar RealScalar;
typedef typename MatrixType::Index Index;
ei_assert(mat.rows()==mat.cols());
eigen_assert(mat.rows()==mat.cols());
const Index size = mat.rows();
if (size <= 1)
{
transpositions.setIdentity();
if(sign)
*sign = ei_real(mat.coeff(0,0))>0 ? 1:-1;
*sign = real(mat.coeff(0,0))>0 ? 1:-1;
return true;
}
@@ -234,10 +263,10 @@ template<> struct ei_ldlt_inplace<Lower>
// The biggest overall is the point of reference to which further diagonals
// are compared; if any diagonal is negligible compared
// to the largest overall, the algorithm bails.
cutoff = ei_abs(NumTraits<Scalar>::epsilon() * biggest_in_corner);
cutoff = abs(NumTraits<Scalar>::epsilon() * biggest_in_corner);
if(sign)
*sign = ei_real(mat.diagonal().coeff(index_of_biggest_in_corner)) > 0 ? 1 : -1;
*sign = real(mat.diagonal().coeff(index_of_biggest_in_corner)) > 0 ? 1 : -1;
}
// Finish early if the matrix is not full rank.
@@ -259,11 +288,11 @@ template<> struct ei_ldlt_inplace<Lower>
for(int i=k+1;i<index_of_biggest_in_corner;++i)
{
Scalar tmp = mat.coeffRef(i,k);
mat.coeffRef(i,k) = ei_conj(mat.coeffRef(index_of_biggest_in_corner,i));
mat.coeffRef(index_of_biggest_in_corner,i) = ei_conj(tmp);
mat.coeffRef(i,k) = conj(mat.coeffRef(index_of_biggest_in_corner,i));
mat.coeffRef(index_of_biggest_in_corner,i) = conj(tmp);
}
if(NumTraits<Scalar>::IsComplex)
mat.coeffRef(index_of_biggest_in_corner,k) = ei_conj(mat.coeff(index_of_biggest_in_corner,k));
mat.coeffRef(index_of_biggest_in_corner,k) = conj(mat.coeff(index_of_biggest_in_corner,k));
}
// partition the matrix:
@@ -282,7 +311,7 @@ template<> struct ei_ldlt_inplace<Lower>
if(rs>0)
A21.noalias() -= A20 * temp.head(k);
}
if((rs>0) && (ei_abs(mat.coeffRef(k,k)) > cutoff))
if((rs>0) && (abs(mat.coeffRef(k,k)) > cutoff))
A21 /= mat.coeffRef(k,k);
}
@@ -290,38 +319,40 @@ template<> struct ei_ldlt_inplace<Lower>
}
};
template<> struct ei_ldlt_inplace<Upper>
template<> struct ldlt_inplace<Upper>
{
template<typename MatrixType, typename TranspositionType, typename Workspace>
static EIGEN_STRONG_INLINE bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, int* sign=0)
{
Transpose<MatrixType> matt(mat);
return ei_ldlt_inplace<Lower>::unblocked(matt, transpositions, temp, sign);
return ldlt_inplace<Lower>::unblocked(matt, transpositions, temp, sign);
}
};
template<typename MatrixType> struct LDLT_Traits<MatrixType,Lower>
{
typedef TriangularView<MatrixType, UnitLower> MatrixL;
typedef TriangularView<typename MatrixType::AdjointReturnType, UnitUpper> MatrixU;
typedef const TriangularView<const MatrixType, UnitLower> MatrixL;
typedef const TriangularView<const typename MatrixType::AdjointReturnType, UnitUpper> MatrixU;
inline static MatrixL getL(const MatrixType& m) { return m; }
inline static MatrixU getU(const MatrixType& m) { return m.adjoint(); }
};
template<typename MatrixType> struct LDLT_Traits<MatrixType,Upper>
{
typedef TriangularView<typename MatrixType::AdjointReturnType, UnitLower> MatrixL;
typedef TriangularView<MatrixType, UnitUpper> MatrixU;
typedef const TriangularView<const typename MatrixType::AdjointReturnType, UnitLower> MatrixL;
typedef const TriangularView<const MatrixType, UnitUpper> MatrixU;
inline static MatrixL getL(const MatrixType& m) { return m.adjoint(); }
inline static MatrixU getU(const MatrixType& m) { return m; }
};
} // end namespace internal
/** Compute / recompute the LDLT decomposition A = L D L^* = U^* D U of \a matrix
*/
template<typename MatrixType, int _UpLo>
LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const MatrixType& a)
{
ei_assert(a.rows()==a.cols());
eigen_assert(a.rows()==a.cols());
const Index size = a.rows();
m_matrix = a;
@@ -330,22 +361,23 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const MatrixType& a)
m_isInitialized = false;
m_temporary.resize(size);
ei_ldlt_inplace<UpLo>::unblocked(m_matrix, m_transpositions, m_temporary, &m_sign);
internal::ldlt_inplace<UpLo>::unblocked(m_matrix, m_transpositions, m_temporary, &m_sign);
m_isInitialized = true;
return *this;
}
namespace internal {
template<typename _MatrixType, int _UpLo, typename Rhs>
struct ei_solve_retval<LDLT<_MatrixType,_UpLo>, Rhs>
: ei_solve_retval_base<LDLT<_MatrixType,_UpLo>, Rhs>
struct solve_retval<LDLT<_MatrixType,_UpLo>, Rhs>
: solve_retval_base<LDLT<_MatrixType,_UpLo>, Rhs>
{
typedef LDLT<_MatrixType,_UpLo> LDLTType;
EIGEN_MAKE_SOLVE_HELPERS(LDLTType,Rhs)
template<typename Dest> void evalTo(Dest& dst) const
{
ei_assert(rhs().rows() == dec().matrixLDLT().rows());
eigen_assert(rhs().rows() == dec().matrixLDLT().rows());
// dst = P b
dst = dec().transpositionsP() * rhs();
@@ -353,7 +385,21 @@ struct ei_solve_retval<LDLT<_MatrixType,_UpLo>, Rhs>
dec().matrixL().solveInPlace(dst);
// dst = D^-1 (L^-1 P b)
dst = dec().vectorD().asDiagonal().inverse() * dst;
// more precisely, use pseudo-inverse of D (see bug 241)
using std::abs;
using std::max;
typedef typename LDLTType::MatrixType MatrixType;
typedef typename LDLTType::Scalar Scalar;
typedef typename LDLTType::RealScalar RealScalar;
const Diagonal<const MatrixType> vectorD = dec().vectorD();
RealScalar tolerance = (max)(vectorD.array().abs().maxCoeff() * NumTraits<Scalar>::epsilon(),
RealScalar(1) / NumTraits<RealScalar>::highest()); // motivated by LAPACK's xGELSS
for (Index i = 0; i < vectorD.size(); ++i) {
if(abs(vectorD(i)) > tolerance)
dst.row(i) /= vectorD(i);
else
dst.row(i).setZero();
}
// dst = L^-T (D^-1 L^-1 P b)
dec().matrixU().solveInPlace(dst);
@@ -362,6 +408,7 @@ struct ei_solve_retval<LDLT<_MatrixType,_UpLo>, Rhs>
dst = dec().transpositionsP().transpose() * dst;
}
};
}
/** \internal use x = ldlt_object.solve(x);
*
@@ -380,9 +427,9 @@ template<typename MatrixType,int _UpLo>
template<typename Derived>
bool LDLT<MatrixType,_UpLo>::solveInPlace(MatrixBase<Derived> &bAndX) const
{
ei_assert(m_isInitialized && "LDLT is not initialized.");
eigen_assert(m_isInitialized && "LDLT is not initialized.");
const Index size = m_matrix.rows();
ei_assert(size == bAndX.rows());
eigen_assert(size == bAndX.rows());
bAndX = this->solve(bAndX);
@@ -395,7 +442,7 @@ bool LDLT<MatrixType,_UpLo>::solveInPlace(MatrixBase<Derived> &bAndX) const
template<typename MatrixType, int _UpLo>
MatrixType LDLT<MatrixType,_UpLo>::reconstructedMatrix() const
{
ei_assert(m_isInitialized && "LDLT is not initialized.");
eigen_assert(m_isInitialized && "LDLT is not initialized.");
const Index size = m_matrix.rows();
MatrixType res(size,size);

View File

@@ -25,7 +25,9 @@
#ifndef EIGEN_LLT_H
#define EIGEN_LLT_H
namespace internal{
template<typename MatrixType, int UpLo> struct LLT_Traits;
}
/** \ingroup cholesky_Module
*
@@ -68,12 +70,12 @@ template<typename _MatrixType, int _UpLo> class LLT
typedef typename MatrixType::Index Index;
enum {
PacketSize = ei_packet_traits<Scalar>::size,
PacketSize = internal::packet_traits<Scalar>::size,
AlignmentMask = int(PacketSize)-1,
UpLo = _UpLo
};
typedef LLT_Traits<MatrixType,UpLo> Traits;
typedef internal::LLT_Traits<MatrixType,UpLo> Traits;
/**
* \brief Default Constructor.
@@ -102,14 +104,14 @@ template<typename _MatrixType, int _UpLo> class LLT
/** \returns a view of the upper triangular matrix U */
inline typename Traits::MatrixU matrixU() const
{
ei_assert(m_isInitialized && "LLT is not initialized.");
eigen_assert(m_isInitialized && "LLT is not initialized.");
return Traits::getU(m_matrix);
}
/** \returns a view of the lower triangular matrix L */
inline typename Traits::MatrixL matrixL() const
{
ei_assert(m_isInitialized && "LLT is not initialized.");
eigen_assert(m_isInitialized && "LLT is not initialized.");
return Traits::getL(m_matrix);
}
@@ -124,15 +126,26 @@ template<typename _MatrixType, int _UpLo> class LLT
* \sa solveInPlace(), MatrixBase::llt()
*/
template<typename Rhs>
inline const ei_solve_retval<LLT, Rhs>
inline const internal::solve_retval<LLT, Rhs>
solve(const MatrixBase<Rhs>& b) const
{
ei_assert(m_isInitialized && "LLT is not initialized.");
ei_assert(m_matrix.rows()==b.rows()
eigen_assert(m_isInitialized && "LLT is not initialized.");
eigen_assert(m_matrix.rows()==b.rows()
&& "LLT::solve(): invalid number of rows of the right hand side matrix b");
return ei_solve_retval<LLT, Rhs>(*this, b.derived());
return internal::solve_retval<LLT, Rhs>(*this, b.derived());
}
#ifdef EIGEN2_SUPPORT
template<typename OtherDerived, typename ResultType>
bool solve(const MatrixBase<OtherDerived>& b, ResultType *result) const
{
*result = this->solve(b);
return true;
}
bool isPositiveDefinite() const { return true; }
#endif
template<typename Derived>
void solveInPlace(MatrixBase<Derived> &bAndX) const;
@@ -144,7 +157,7 @@ template<typename _MatrixType, int _UpLo> class LLT
*/
inline const MatrixType& matrixLLT() const
{
ei_assert(m_isInitialized && "LLT is not initialized.");
eigen_assert(m_isInitialized && "LLT is not initialized.");
return m_matrix;
}
@@ -158,7 +171,7 @@ template<typename _MatrixType, int _UpLo> class LLT
*/
ComputationInfo info() const
{
ei_assert(m_isInitialized && "LLT is not initialized.");
eigen_assert(m_isInitialized && "LLT is not initialized.");
return m_info;
}
@@ -175,17 +188,20 @@ template<typename _MatrixType, int _UpLo> class LLT
ComputationInfo m_info;
};
template<int UpLo> struct ei_llt_inplace;
namespace internal {
template<> struct ei_llt_inplace<Lower>
template<int UpLo> struct llt_inplace;
template<> struct llt_inplace<Lower>
{
template<typename MatrixType>
static bool unblocked(MatrixType& mat)
static typename MatrixType::Index unblocked(MatrixType& mat)
{
typedef typename MatrixType::Index Index;
typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::RealScalar RealScalar;
typedef typename MatrixType::Index Index;
ei_assert(mat.rows()==mat.cols());
eigen_assert(mat.rows()==mat.cols());
const Index size = mat.rows();
for(Index k = 0; k < size; ++k)
{
@@ -195,29 +211,29 @@ template<> struct ei_llt_inplace<Lower>
Block<MatrixType,1,Dynamic> A10(mat,k,0,1,k);
Block<MatrixType,Dynamic,Dynamic> A20(mat,k+1,0,rs,k);
RealScalar x = ei_real(mat.coeff(k,k));
if (k>0) x -= mat.row(k).head(k).squaredNorm();
RealScalar x = real(mat.coeff(k,k));
if (k>0) x -= A10.squaredNorm();
if (x<=RealScalar(0))
return false;
mat.coeffRef(k,k) = x = ei_sqrt(x);
return k;
mat.coeffRef(k,k) = x = sqrt(x);
if (k>0 && rs>0) A21.noalias() -= A20 * A10.adjoint();
if (rs>0) A21 *= RealScalar(1)/x;
}
return true;
return -1;
}
template<typename MatrixType>
static bool blocked(MatrixType& m)
static typename MatrixType::Index blocked(MatrixType& m)
{
typedef typename MatrixType::Index Index;
ei_assert(m.rows()==m.cols());
eigen_assert(m.rows()==m.cols());
Index size = m.rows();
if(size<32)
return unblocked(m);
Index blockSize = size/8;
blockSize = (blockSize/16)*16;
blockSize = std::min(std::max(blockSize,Index(8)), Index(128));
blockSize = (std::min)((std::max)(blockSize,Index(8)), Index(128));
for (Index k=0; k<size; k+=blockSize)
{
@@ -225,56 +241,59 @@ template<> struct ei_llt_inplace<Lower>
// A00 | - | -
// lu = A10 | A11 | -
// A20 | A21 | A22
Index bs = std::min(blockSize, size-k);
Index bs = (std::min)(blockSize, size-k);
Index rs = size - k - bs;
Block<MatrixType,Dynamic,Dynamic> A11(m,k, k, bs,bs);
Block<MatrixType,Dynamic,Dynamic> A21(m,k+bs,k, rs,bs);
Block<MatrixType,Dynamic,Dynamic> A22(m,k+bs,k+bs,rs,rs);
if(!unblocked(A11)) return false;
Index ret;
if((ret=unblocked(A11))>=0) return k+ret;
if(rs>0) A11.adjoint().template triangularView<Upper>().template solveInPlace<OnTheRight>(A21);
if(rs>0) A22.template selfadjointView<Lower>().rankUpdate(A21,-1); // bottleneck
}
return true;
return -1;
}
};
template<> struct ei_llt_inplace<Upper>
template<> struct llt_inplace<Upper>
{
template<typename MatrixType>
static EIGEN_STRONG_INLINE bool unblocked(MatrixType& mat)
static EIGEN_STRONG_INLINE typename MatrixType::Index unblocked(MatrixType& mat)
{
Transpose<MatrixType> matt(mat);
return ei_llt_inplace<Lower>::unblocked(matt);
return llt_inplace<Lower>::unblocked(matt);
}
template<typename MatrixType>
static EIGEN_STRONG_INLINE bool blocked(MatrixType& mat)
static EIGEN_STRONG_INLINE typename MatrixType::Index blocked(MatrixType& mat)
{
Transpose<MatrixType> matt(mat);
return ei_llt_inplace<Lower>::blocked(matt);
return llt_inplace<Lower>::blocked(matt);
}
};
template<typename MatrixType> struct LLT_Traits<MatrixType,Lower>
{
typedef TriangularView<MatrixType, Lower> MatrixL;
typedef TriangularView<typename MatrixType::AdjointReturnType, Upper> MatrixU;
typedef const TriangularView<const MatrixType, Lower> MatrixL;
typedef const TriangularView<const typename MatrixType::AdjointReturnType, Upper> MatrixU;
inline static MatrixL getL(const MatrixType& m) { return m; }
inline static MatrixU getU(const MatrixType& m) { return m.adjoint(); }
static bool inplace_decomposition(MatrixType& m)
{ return ei_llt_inplace<Lower>::blocked(m); }
{ return llt_inplace<Lower>::blocked(m)==-1; }
};
template<typename MatrixType> struct LLT_Traits<MatrixType,Upper>
{
typedef TriangularView<typename MatrixType::AdjointReturnType, Lower> MatrixL;
typedef TriangularView<MatrixType, Upper> MatrixU;
typedef const TriangularView<const typename MatrixType::AdjointReturnType, Lower> MatrixL;
typedef const TriangularView<const MatrixType, Upper> MatrixU;
inline static MatrixL getL(const MatrixType& m) { return m.adjoint(); }
inline static MatrixU getU(const MatrixType& m) { return m; }
static bool inplace_decomposition(MatrixType& m)
{ return ei_llt_inplace<Upper>::blocked(m); }
{ return llt_inplace<Upper>::blocked(m)==-1; }
};
} // end namespace internal
/** Computes / recomputes the Cholesky decomposition A = LL^* = U^*U of \a matrix
*
*
@@ -295,9 +314,10 @@ LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const MatrixType& a)
return *this;
}
namespace internal {
template<typename _MatrixType, int UpLo, typename Rhs>
struct ei_solve_retval<LLT<_MatrixType, UpLo>, Rhs>
: ei_solve_retval_base<LLT<_MatrixType, UpLo>, Rhs>
struct solve_retval<LLT<_MatrixType, UpLo>, Rhs>
: solve_retval_base<LLT<_MatrixType, UpLo>, Rhs>
{
typedef LLT<_MatrixType,UpLo> LLTType;
EIGEN_MAKE_SOLVE_HELPERS(LLTType,Rhs)
@@ -308,6 +328,7 @@ struct ei_solve_retval<LLT<_MatrixType, UpLo>, Rhs>
dec().solveInPlace(dst);
}
};
}
/** \internal use x = llt_object.solve(x);
*
@@ -326,8 +347,8 @@ template<typename MatrixType, int _UpLo>
template<typename Derived>
void LLT<MatrixType,_UpLo>::solveInPlace(MatrixBase<Derived> &bAndX) const
{
ei_assert(m_isInitialized && "LLT is not initialized.");
ei_assert(m_matrix.rows()==bAndX.rows());
eigen_assert(m_isInitialized && "LLT is not initialized.");
eigen_assert(m_matrix.rows()==bAndX.rows());
matrixL().solveInPlace(bAndX);
matrixU().solveInPlace(bAndX);
}
@@ -338,7 +359,7 @@ void LLT<MatrixType,_UpLo>::solveInPlace(MatrixBase<Derived> &bAndX) const
template<typename MatrixType, int _UpLo>
MatrixType LLT<MatrixType,_UpLo>::reconstructedMatrix() const
{
ei_assert(m_isInitialized && "LLT is not initialized.");
eigen_assert(m_isInitialized && "LLT is not initialized.");
return matrixL() * matrixL().adjoint().toDenseMatrix();
}

View File

@@ -37,22 +37,27 @@
* API for the %Matrix class provides easy access to linear-algebra
* operations.
*
* This class can be extended with the help of the plugin mechanism described on the page
* \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_ARRAY_PLUGIN.
*
* \sa \ref TutorialArrayClass, \ref TopicClassHierarchy
*/
namespace internal {
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
struct ei_traits<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > : ei_traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
struct traits<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > : traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
{
typedef ArrayXpr XprKind;
typedef ArrayBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > XprBase;
};
}
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
class Array
: public DenseStorageBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
: public PlainObjectBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
{
public:
typedef DenseStorageBase<Array> Base;
typedef PlainObjectBase<Array> Base;
EIGEN_DENSE_PUBLIC_INTERFACE(Array)
enum { Options = _Options };
@@ -60,13 +65,11 @@ class Array
protected:
template <typename Derived, typename OtherDerived, bool IsVector>
friend struct ei_conservative_resize_like_impl;
friend struct internal::conservative_resize_like_impl;
using Base::m_storage;
public:
enum { NeedsToAlign = (!(Options&DontAlign))
&& SizeAtCompileTime!=Dynamic && ((static_cast<int>(sizeof(Scalar))*SizeAtCompileTime)%16)==0 };
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
using Base::base;
using Base::coeff;
@@ -126,8 +129,8 @@ class Array
#ifndef EIGEN_PARSED_BY_DOXYGEN
// FIXME is it still needed ??
/** \internal */
Array(ei_constructor_without_unaligned_array_assert)
: Base(ei_constructor_without_unaligned_array_assert())
Array(internal::constructor_without_unaligned_array_assert)
: Base(internal::constructor_without_unaligned_array_assert())
{
Base::_check_template_params();
EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
@@ -145,8 +148,8 @@ class Array
{
Base::_check_template_params();
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Array)
ei_assert(dim > 0);
ei_assert(SizeAtCompileTime == Dynamic || SizeAtCompileTime == dim);
eigen_assert(dim >= 0);
eigen_assert(SizeAtCompileTime == Dynamic || SizeAtCompileTime == dim);
EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
}
@@ -228,7 +231,7 @@ class Array
* data pointers.
*/
template<typename OtherDerived>
void swap(ArrayBase<OtherDerived> EIGEN_REF_TO_TEMPORARY other)
void swap(ArrayBase<OtherDerived> const & other)
{ this->_swap(other.derived()); }
inline Index innerStride() const { return 1; }
@@ -241,7 +244,7 @@ class Array
private:
template<typename MatrixType, typename OtherDerived, bool SwapPointers>
friend struct ei_matrix_swap_impl;
friend struct internal::matrix_swap_impl;
};
/** \defgroup arraytypedefs Global array typedefs

View File

@@ -42,7 +42,10 @@ template<typename ExpressionType> class MatrixWrapper;
*
* This class is the base that is inherited by all array expression types.
*
* \param Derived is the derived type, e.g., an array or an expression type.
* \tparam Derived is the derived type, e.g., an array or an expression type.
*
* This class can be extended with the help of the plugin mechanism described on the page
* \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_ARRAYBASE_PLUGIN.
*
* \sa class MatrixBase, \ref TopicClassHierarchy
*/
@@ -53,16 +56,16 @@ template<typename Derived> class ArrayBase
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** The base class for a given storage type. */
typedef ArrayBase StorageBaseType;
typedef ArrayBase Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl;
using ei_special_scalar_op_base<Derived,typename ei_traits<Derived>::Scalar,
typename NumTraits<typename ei_traits<Derived>::Scalar>::Real>::operator*;
using internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar,
typename NumTraits<typename internal::traits<Derived>::Scalar>::Real>::operator*;
typedef typename ei_traits<Derived>::StorageKind StorageKind;
typedef typename ei_traits<Derived>::Index Index;
typedef typename ei_traits<Derived>::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
typedef typename internal::traits<Derived>::StorageKind StorageKind;
typedef typename internal::traits<Derived>::Index Index;
typedef typename internal::traits<Derived>::Scalar Scalar;
typedef typename internal::packet_traits<Scalar>::type PacketScalar;
typedef typename NumTraits<Scalar>::Real RealScalar;
typedef DenseBase<Derived> Base;
@@ -91,6 +94,7 @@ template<typename Derived> class ArrayBase
using Base::operator/=;
typedef typename Base::CoeffReturnType CoeffReturnType;
#endif // not EIGEN_PARSED_BY_DOXYGEN
#ifndef EIGEN_PARSED_BY_DOXYGEN
@@ -99,17 +103,17 @@ template<typename Derived> class ArrayBase
* reference to a matrix, not a matrix! It is however guaranteed that the return type of eval() is either
* PlainObject or const PlainObject&.
*/
typedef Array<typename ei_traits<Derived>::Scalar,
ei_traits<Derived>::RowsAtCompileTime,
ei_traits<Derived>::ColsAtCompileTime,
AutoAlign | (ei_traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
ei_traits<Derived>::MaxRowsAtCompileTime,
ei_traits<Derived>::MaxColsAtCompileTime
typedef Array<typename internal::traits<Derived>::Scalar,
internal::traits<Derived>::RowsAtCompileTime,
internal::traits<Derived>::ColsAtCompileTime,
AutoAlign | (internal::traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
internal::traits<Derived>::MaxRowsAtCompileTime,
internal::traits<Derived>::MaxColsAtCompileTime
> PlainObject;
/** \internal Represents a matrix with all coefficients equal to one another*/
typedef CwiseNullaryOp<ei_scalar_constant_op<Scalar>,Derived> ConstantReturnType;
typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,Derived> ConstantReturnType;
#endif // not EIGEN_PARSED_BY_DOXYGEN
#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::ArrayBase
@@ -129,7 +133,7 @@ template<typename Derived> class ArrayBase
*/
Derived& operator=(const ArrayBase& other)
{
return ei_assign_selector<Derived,Derived>::run(derived(), other.derived());
return internal::assign_selector<Derived,Derived>::run(derived(), other.derived());
}
Derived& operator+=(const Scalar& scalar)
@@ -169,10 +173,10 @@ template<typename Derived> class ArrayBase
template<typename OtherDerived> explicit ArrayBase(const ArrayBase<OtherDerived>&);
protected:
// mixing arrays and matrices is not legal
template<typename OtherDerived> Derived& operator+=(const MatrixBase<OtherDerived>& mat)
template<typename OtherDerived> Derived& operator+=(const MatrixBase<OtherDerived>& )
{EIGEN_STATIC_ASSERT(sizeof(typename OtherDerived::Scalar)==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES);}
// mixing arrays and matrices is not legal
template<typename OtherDerived> Derived& operator-=(const MatrixBase<OtherDerived>& mat)
template<typename OtherDerived> Derived& operator-=(const MatrixBase<OtherDerived>& )
{EIGEN_STATIC_ASSERT(sizeof(typename OtherDerived::Scalar)==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES);}
};
@@ -185,8 +189,8 @@ template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived &
ArrayBase<Derived>::operator-=(const ArrayBase<OtherDerived> &other)
{
SelfCwiseBinaryOp<ei_scalar_difference_op<Scalar>, Derived, OtherDerived> tmp(derived());
tmp = other;
SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, Derived, OtherDerived> tmp(derived());
tmp = other.derived();
return derived();
}
@@ -199,7 +203,7 @@ template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived &
ArrayBase<Derived>::operator+=(const ArrayBase<OtherDerived>& other)
{
SelfCwiseBinaryOp<ei_scalar_sum_op<Scalar>, Derived, OtherDerived> tmp(derived());
SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, Derived, OtherDerived> tmp(derived());
tmp = other.derived();
return derived();
}
@@ -213,7 +217,7 @@ template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived &
ArrayBase<Derived>::operator*=(const ArrayBase<OtherDerived>& other)
{
SelfCwiseBinaryOp<ei_scalar_product_op<Scalar>, Derived, OtherDerived> tmp(derived());
SelfCwiseBinaryOp<internal::scalar_product_op<Scalar>, Derived, OtherDerived> tmp(derived());
tmp = other.derived();
return derived();
}
@@ -227,7 +231,7 @@ template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived &
ArrayBase<Derived>::operator/=(const ArrayBase<OtherDerived>& other)
{
SelfCwiseBinaryOp<ei_scalar_quotient_op<Scalar>, Derived, OtherDerived> tmp(derived());
SelfCwiseBinaryOp<internal::scalar_quotient_op<Scalar>, Derived, OtherDerived> tmp(derived());
tmp = other.derived();
return derived();
}

View File

@@ -35,12 +35,15 @@
*
* \sa MatrixBase::array(), class MatrixWrapper
*/
namespace internal {
template<typename ExpressionType>
struct ei_traits<ArrayWrapper<ExpressionType> >
: public ei_traits<typename ei_cleantype<typename ExpressionType::Nested>::type >
struct traits<ArrayWrapper<ExpressionType> >
: public traits<typename remove_all<typename ExpressionType::Nested>::type >
{
typedef ArrayXpr XprKind;
};
}
template<typename ExpressionType>
class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
@@ -50,7 +53,13 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
EIGEN_DENSE_PUBLIC_INTERFACE(ArrayWrapper)
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(ArrayWrapper)
typedef typename ei_nested<ExpressionType>::type NestedExpressionType;
typedef typename internal::conditional<
internal::is_lvalue<ExpressionType>::value,
Scalar,
const Scalar
>::type ScalarWithConstIfNotLvalue;
typedef typename internal::nested<ExpressionType>::type NestedExpressionType;
inline ArrayWrapper(const ExpressionType& matrix) : m_expression(matrix) {}
@@ -59,6 +68,9 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
inline Index outerStride() const { return m_expression.outerStride(); }
inline Index innerStride() const { return m_expression.innerStride(); }
inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
inline const Scalar* data() const { return m_expression.data(); }
inline const CoeffReturnType coeff(Index row, Index col) const
{
return m_expression.coeff(row, col);
@@ -69,6 +81,11 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
return m_expression.const_cast_derived().coeffRef(row, col);
}
inline const Scalar& coeffRef(Index row, Index col) const
{
return m_expression.const_cast_derived().coeffRef(row, col);
}
inline const CoeffReturnType coeff(Index index) const
{
return m_expression.coeff(index);
@@ -79,6 +96,11 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
return m_expression.const_cast_derived().coeffRef(index);
}
inline const Scalar& coeffRef(Index index) const
{
return m_expression.const_cast_derived().coeffRef(index);
}
template<int LoadMode>
inline const PacketScalar packet(Index row, Index col) const
{
@@ -121,12 +143,14 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
* \sa MatrixBase::matrix(), class ArrayWrapper
*/
namespace internal {
template<typename ExpressionType>
struct ei_traits<MatrixWrapper<ExpressionType> >
: public ei_traits<typename ei_cleantype<typename ExpressionType::Nested>::type >
struct traits<MatrixWrapper<ExpressionType> >
: public traits<typename remove_all<typename ExpressionType::Nested>::type >
{
typedef MatrixXpr XprKind;
};
}
template<typename ExpressionType>
class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
@@ -136,7 +160,13 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
EIGEN_DENSE_PUBLIC_INTERFACE(MatrixWrapper)
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(MatrixWrapper)
typedef typename ei_nested<ExpressionType>::type NestedExpressionType;
typedef typename internal::conditional<
internal::is_lvalue<ExpressionType>::value,
Scalar,
const Scalar
>::type ScalarWithConstIfNotLvalue;
typedef typename internal::nested<ExpressionType>::type NestedExpressionType;
inline MatrixWrapper(const ExpressionType& matrix) : m_expression(matrix) {}
@@ -145,6 +175,9 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
inline Index outerStride() const { return m_expression.outerStride(); }
inline Index innerStride() const { return m_expression.innerStride(); }
inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
inline const Scalar* data() const { return m_expression.data(); }
inline const CoeffReturnType coeff(Index row, Index col) const
{
return m_expression.coeff(row, col);
@@ -155,6 +188,11 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
return m_expression.const_cast_derived().coeffRef(row, col);
}
inline const Scalar& coeffRef(Index row, Index col) const
{
return m_expression.derived().coeffRef(row, col);
}
inline const CoeffReturnType coeff(Index index) const
{
return m_expression.coeff(index);
@@ -165,6 +203,11 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
return m_expression.const_cast_derived().coeffRef(index);
}
inline const Scalar& coeffRef(Index index) const
{
return m_expression.const_cast_derived().coeffRef(index);
}
template<int LoadMode>
inline const PacketScalar packet(Index row, Index col) const
{

View File

@@ -27,19 +27,21 @@
#ifndef EIGEN_ASSIGN_H
#define EIGEN_ASSIGN_H
namespace internal {
/***************************************************************************
* Part 1 : the logic deciding a strategy for traversal and unrolling *
***************************************************************************/
template <typename Derived, typename OtherDerived>
struct ei_assign_traits
struct assign_traits
{
public:
enum {
DstIsAligned = Derived::Flags & AlignedBit,
DstHasDirectAccess = Derived::Flags & DirectAccessBit,
SrcIsAligned = OtherDerived::Flags & AlignedBit,
JointAlignment = DstIsAligned && SrcIsAligned ? Aligned : Unaligned
JointAlignment = bool(DstIsAligned) && bool(SrcIsAligned) ? Aligned : Unaligned
};
private:
@@ -51,7 +53,7 @@ private:
: int(Derived::Flags)&RowMajorBit ? int(Derived::MaxColsAtCompileTime)
: int(Derived::MaxRowsAtCompileTime),
MaxSizeAtCompileTime = Derived::SizeAtCompileTime,
PacketSize = ei_packet_traits<typename Derived::Scalar>::size
PacketSize = packet_traits<typename Derived::Scalar>::size
};
enum {
@@ -104,9 +106,9 @@ public:
: int(NoUnrolling)
)
: int(Traversal) == int(LinearVectorizedTraversal)
? ( int(MayUnrollCompletely) && int(DstIsAligned) ? int(CompleteUnrolling) : int(NoUnrolling) )
? ( bool(MayUnrollCompletely) && bool(DstIsAligned) ? int(CompleteUnrolling) : int(NoUnrolling) )
: int(Traversal) == int(LinearTraversal)
? ( int(MayUnrollCompletely) ? int(CompleteUnrolling) : int(NoUnrolling) )
? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) : int(NoUnrolling) )
: int(NoUnrolling)
};
@@ -143,7 +145,7 @@ public:
************************/
template<typename Derived1, typename Derived2, int Index, int Stop>
struct ei_assign_DefaultTraversal_CompleteUnrolling
struct assign_DefaultTraversal_CompleteUnrolling
{
enum {
outer = Index / Derived1::InnerSizeAtCompileTime,
@@ -153,28 +155,28 @@ struct ei_assign_DefaultTraversal_CompleteUnrolling
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{
dst.copyCoeffByOuterInner(outer, inner, src);
ei_assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src);
assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src);
}
};
template<typename Derived1, typename Derived2, int Stop>
struct ei_assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
struct assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
{
EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &) {}
};
template<typename Derived1, typename Derived2, int Index, int Stop>
struct ei_assign_DefaultTraversal_InnerUnrolling
struct assign_DefaultTraversal_InnerUnrolling
{
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src, int outer)
{
dst.copyCoeffByOuterInner(outer, Index, src);
ei_assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src, outer);
assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src, outer);
}
};
template<typename Derived1, typename Derived2, int Stop>
struct ei_assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Stop, Stop>
struct assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Stop, Stop>
{
EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &, int) {}
};
@@ -184,17 +186,17 @@ struct ei_assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Stop, Stop>
***********************/
template<typename Derived1, typename Derived2, int Index, int Stop>
struct ei_assign_LinearTraversal_CompleteUnrolling
struct assign_LinearTraversal_CompleteUnrolling
{
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{
dst.copyCoeff(Index, src);
ei_assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src);
assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src);
}
};
template<typename Derived1, typename Derived2, int Stop>
struct ei_assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
struct assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
{
EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &) {}
};
@@ -204,41 +206,41 @@ struct ei_assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Sto
**************************/
template<typename Derived1, typename Derived2, int Index, int Stop>
struct ei_assign_innervec_CompleteUnrolling
struct assign_innervec_CompleteUnrolling
{
enum {
outer = Index / Derived1::InnerSizeAtCompileTime,
inner = Index % Derived1::InnerSizeAtCompileTime,
JointAlignment = ei_assign_traits<Derived1,Derived2>::JointAlignment
JointAlignment = assign_traits<Derived1,Derived2>::JointAlignment
};
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{
dst.template copyPacketByOuterInner<Derived2, Aligned, JointAlignment>(outer, inner, src);
ei_assign_innervec_CompleteUnrolling<Derived1, Derived2,
Index+ei_packet_traits<typename Derived1::Scalar>::size, Stop>::run(dst, src);
assign_innervec_CompleteUnrolling<Derived1, Derived2,
Index+packet_traits<typename Derived1::Scalar>::size, Stop>::run(dst, src);
}
};
template<typename Derived1, typename Derived2, int Stop>
struct ei_assign_innervec_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
struct assign_innervec_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
{
EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &) {}
};
template<typename Derived1, typename Derived2, int Index, int Stop>
struct ei_assign_innervec_InnerUnrolling
struct assign_innervec_InnerUnrolling
{
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src, int outer)
{
dst.template copyPacketByOuterInner<Derived2, Aligned, Aligned>(outer, Index, src);
ei_assign_innervec_InnerUnrolling<Derived1, Derived2,
Index+ei_packet_traits<typename Derived1::Scalar>::size, Stop>::run(dst, src, outer);
assign_innervec_InnerUnrolling<Derived1, Derived2,
Index+packet_traits<typename Derived1::Scalar>::size, Stop>::run(dst, src, outer);
}
};
template<typename Derived1, typename Derived2, int Stop>
struct ei_assign_innervec_InnerUnrolling<Derived1, Derived2, Stop, Stop>
struct assign_innervec_InnerUnrolling<Derived1, Derived2, Stop, Stop>
{
EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &, int) {}
};
@@ -248,22 +250,22 @@ struct ei_assign_innervec_InnerUnrolling<Derived1, Derived2, Stop, Stop>
***************************************************************************/
template<typename Derived1, typename Derived2,
int Traversal = ei_assign_traits<Derived1, Derived2>::Traversal,
int Unrolling = ei_assign_traits<Derived1, Derived2>::Unrolling>
struct ei_assign_impl;
int Traversal = assign_traits<Derived1, Derived2>::Traversal,
int Unrolling = assign_traits<Derived1, Derived2>::Unrolling>
struct assign_impl;
/************************
*** Default traversal ***
************************/
template<typename Derived1, typename Derived2, int Unrolling>
struct ei_assign_impl<Derived1, Derived2, InvalidTraversal, Unrolling>
struct assign_impl<Derived1, Derived2, InvalidTraversal, Unrolling>
{
inline static void run(Derived1 &, const Derived2 &) { }
};
template<typename Derived1, typename Derived2>
struct ei_assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling>
struct assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling>
{
typedef typename Derived1::Index Index;
inline static void run(Derived1 &dst, const Derived2 &src)
@@ -277,24 +279,24 @@ struct ei_assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling>
};
template<typename Derived1, typename Derived2>
struct ei_assign_impl<Derived1, Derived2, DefaultTraversal, CompleteUnrolling>
struct assign_impl<Derived1, Derived2, DefaultTraversal, CompleteUnrolling>
{
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{
ei_assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
::run(dst, src);
}
};
template<typename Derived1, typename Derived2>
struct ei_assign_impl<Derived1, Derived2, DefaultTraversal, InnerUnrolling>
struct assign_impl<Derived1, Derived2, DefaultTraversal, InnerUnrolling>
{
typedef typename Derived1::Index Index;
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{
const Index outerSize = dst.outerSize();
for(Index outer = 0; outer < outerSize; ++outer)
ei_assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, 0, Derived1::InnerSizeAtCompileTime>
assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, 0, Derived1::InnerSizeAtCompileTime>
::run(dst, src, outer);
}
};
@@ -304,7 +306,7 @@ struct ei_assign_impl<Derived1, Derived2, DefaultTraversal, InnerUnrolling>
***********************/
template<typename Derived1, typename Derived2>
struct ei_assign_impl<Derived1, Derived2, LinearTraversal, NoUnrolling>
struct assign_impl<Derived1, Derived2, LinearTraversal, NoUnrolling>
{
typedef typename Derived1::Index Index;
inline static void run(Derived1 &dst, const Derived2 &src)
@@ -316,11 +318,11 @@ struct ei_assign_impl<Derived1, Derived2, LinearTraversal, NoUnrolling>
};
template<typename Derived1, typename Derived2>
struct ei_assign_impl<Derived1, Derived2, LinearTraversal, CompleteUnrolling>
struct assign_impl<Derived1, Derived2, LinearTraversal, CompleteUnrolling>
{
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{
ei_assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
::run(dst, src);
}
};
@@ -330,14 +332,14 @@ struct ei_assign_impl<Derived1, Derived2, LinearTraversal, CompleteUnrolling>
**************************/
template<typename Derived1, typename Derived2>
struct ei_assign_impl<Derived1, Derived2, InnerVectorizedTraversal, NoUnrolling>
struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, NoUnrolling>
{
typedef typename Derived1::Index Index;
inline static void run(Derived1 &dst, const Derived2 &src)
{
const Index innerSize = dst.innerSize();
const Index outerSize = dst.outerSize();
const Index packetSize = ei_packet_traits<typename Derived1::Scalar>::size;
const Index packetSize = packet_traits<typename Derived1::Scalar>::size;
for(Index outer = 0; outer < outerSize; ++outer)
for(Index inner = 0; inner < innerSize; inner+=packetSize)
dst.template copyPacketByOuterInner<Derived2, Aligned, Aligned>(outer, inner, src);
@@ -345,24 +347,24 @@ struct ei_assign_impl<Derived1, Derived2, InnerVectorizedTraversal, NoUnrolling>
};
template<typename Derived1, typename Derived2>
struct ei_assign_impl<Derived1, Derived2, InnerVectorizedTraversal, CompleteUnrolling>
struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, CompleteUnrolling>
{
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{
ei_assign_innervec_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
assign_innervec_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
::run(dst, src);
}
};
template<typename Derived1, typename Derived2>
struct ei_assign_impl<Derived1, Derived2, InnerVectorizedTraversal, InnerUnrolling>
struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, InnerUnrolling>
{
typedef typename Derived1::Index Index;
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{
const Index outerSize = dst.outerSize();
for(Index outer = 0; outer < outerSize; ++outer)
ei_assign_innervec_InnerUnrolling<Derived1, Derived2, 0, Derived1::InnerSizeAtCompileTime>
assign_innervec_InnerUnrolling<Derived1, Derived2, 0, Derived1::InnerSizeAtCompileTime>
::run(dst, src, outer);
}
};
@@ -372,14 +374,14 @@ struct ei_assign_impl<Derived1, Derived2, InnerVectorizedTraversal, InnerUnrolli
***************************/
template <bool IsAligned = false>
struct ei_unaligned_assign_impl
struct unaligned_assign_impl
{
template <typename Derived, typename OtherDerived>
static EIGEN_STRONG_INLINE void run(const Derived&, OtherDerived&, typename Derived::Index, typename Derived::Index) {}
};
template <>
struct ei_unaligned_assign_impl<false>
struct unaligned_assign_impl<false>
{
// MSVC must not inline this functions. If it does, it fails to optimize the
// packet access path.
@@ -397,45 +399,45 @@ struct ei_unaligned_assign_impl<false>
};
template<typename Derived1, typename Derived2>
struct ei_assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling>
struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling>
{
typedef typename Derived1::Index Index;
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{
const Index size = dst.size();
typedef ei_packet_traits<typename Derived1::Scalar> PacketTraits;
typedef packet_traits<typename Derived1::Scalar> PacketTraits;
enum {
packetSize = PacketTraits::size,
dstAlignment = PacketTraits::AlignedOnScalar ? Aligned : int(ei_assign_traits<Derived1,Derived2>::DstIsAligned) ,
srcAlignment = ei_assign_traits<Derived1,Derived2>::JointAlignment
dstAlignment = PacketTraits::AlignedOnScalar ? Aligned : int(assign_traits<Derived1,Derived2>::DstIsAligned) ,
srcAlignment = assign_traits<Derived1,Derived2>::JointAlignment
};
const Index alignedStart = ei_assign_traits<Derived1,Derived2>::DstIsAligned ? 0
: ei_first_aligned(&dst.coeffRef(0), size);
const Index alignedStart = assign_traits<Derived1,Derived2>::DstIsAligned ? 0
: first_aligned(&dst.coeffRef(0), size);
const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
ei_unaligned_assign_impl<ei_assign_traits<Derived1,Derived2>::DstIsAligned!=0>::run(src,dst,0,alignedStart);
unaligned_assign_impl<assign_traits<Derived1,Derived2>::DstIsAligned!=0>::run(src,dst,0,alignedStart);
for(Index index = alignedStart; index < alignedEnd; index += packetSize)
{
dst.template copyPacket<Derived2, dstAlignment, srcAlignment>(index, src);
}
ei_unaligned_assign_impl<>::run(src,dst,alignedEnd,size);
unaligned_assign_impl<>::run(src,dst,alignedEnd,size);
}
};
template<typename Derived1, typename Derived2>
struct ei_assign_impl<Derived1, Derived2, LinearVectorizedTraversal, CompleteUnrolling>
struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, CompleteUnrolling>
{
typedef typename Derived1::Index Index;
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{
enum { size = Derived1::SizeAtCompileTime,
packetSize = ei_packet_traits<typename Derived1::Scalar>::size,
packetSize = packet_traits<typename Derived1::Scalar>::size,
alignedSize = (size/packetSize)*packetSize };
ei_assign_innervec_CompleteUnrolling<Derived1, Derived2, 0, alignedSize>::run(dst, src);
ei_assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, alignedSize, size>::run(dst, src);
assign_innervec_CompleteUnrolling<Derived1, Derived2, 0, alignedSize>::run(dst, src);
assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, alignedSize, size>::run(dst, src);
}
};
@@ -444,24 +446,24 @@ struct ei_assign_impl<Derived1, Derived2, LinearVectorizedTraversal, CompleteUnr
***************************/
template<typename Derived1, typename Derived2>
struct ei_assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling>
struct assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling>
{
typedef typename Derived1::Index Index;
inline static void run(Derived1 &dst, const Derived2 &src)
{
typedef ei_packet_traits<typename Derived1::Scalar> PacketTraits;
typedef packet_traits<typename Derived1::Scalar> PacketTraits;
enum {
packetSize = PacketTraits::size,
alignable = PacketTraits::AlignedOnScalar,
dstAlignment = alignable ? Aligned : int(ei_assign_traits<Derived1,Derived2>::DstIsAligned) ,
srcAlignment = ei_assign_traits<Derived1,Derived2>::JointAlignment
dstAlignment = alignable ? Aligned : int(assign_traits<Derived1,Derived2>::DstIsAligned) ,
srcAlignment = assign_traits<Derived1,Derived2>::JointAlignment
};
const Index packetAlignedMask = packetSize - 1;
const Index innerSize = dst.innerSize();
const Index outerSize = dst.outerSize();
const Index alignedStep = alignable ? (packetSize - dst.outerStride() % packetSize) & packetAlignedMask : 0;
Index alignedStart = ((!alignable) || ei_assign_traits<Derived1,Derived2>::DstIsAligned) ? 0
: ei_first_aligned(&dst.coeffRef(0,0), innerSize);
Index alignedStart = ((!alignable) || assign_traits<Derived1,Derived2>::DstIsAligned) ? 0
: first_aligned(&dst.coeffRef(0,0), innerSize);
for(Index outer = 0; outer < outerSize; ++outer)
{
@@ -472,7 +474,7 @@ struct ei_assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling>
// do the vectorizable part of the assignment
for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize)
dst.template copyPacketByOuterInner<Derived2, Aligned, Unaligned>(outer, inner, src);
dst.template copyPacketByOuterInner<Derived2, dstAlignment, Unaligned>(outer, inner, src);
// do the non-vectorizable part of the assignment
for(Index inner = alignedEnd; inner<innerSize ; ++inner)
@@ -483,6 +485,8 @@ struct ei_assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling>
}
};
} // end namespace internal
/***************************************************************************
* Part 4 : implementation of DenseBase methods
***************************************************************************/
@@ -493,26 +497,27 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>
::lazyAssign(const DenseBase<OtherDerived>& other)
{
enum{
SameType = ei_is_same_type<typename Derived::Scalar,typename OtherDerived::Scalar>::ret
SameType = internal::is_same<typename Derived::Scalar,typename OtherDerived::Scalar>::value
};
EIGEN_STATIC_ASSERT_LVALUE(Derived)
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived)
EIGEN_STATIC_ASSERT(SameType,YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
#ifdef EIGEN_DEBUG_ASSIGN
ei_assign_traits<Derived, OtherDerived>::debug();
internal::assign_traits<Derived, OtherDerived>::debug();
#endif
ei_assert(rows() == other.rows() && cols() == other.cols());
ei_assign_impl<Derived, OtherDerived, int(SameType) ? int(ei_assign_traits<Derived, OtherDerived>::Traversal)
: int(InvalidTraversal)>::run(derived(),other.derived());
eigen_assert(rows() == other.rows() && cols() == other.cols());
internal::assign_impl<Derived, OtherDerived, int(SameType) ? int(internal::assign_traits<Derived, OtherDerived>::Traversal)
: int(InvalidTraversal)>::run(derived(),other.derived());
#ifndef EIGEN_NO_DEBUG
checkTransposeAliasing(other.derived());
#endif
return derived();
}
namespace internal {
template<typename Derived, typename OtherDerived,
bool EvalBeforeAssigning = (int(OtherDerived::Flags) & EvalBeforeAssigningBit) != 0,
bool NeedToTranspose = Derived::IsVectorAtCompileTime
@@ -522,49 +527,51 @@ template<typename Derived, typename OtherDerived,
// revert to || as soon as not needed anymore.
(int(Derived::ColsAtCompileTime) == 1 && int(OtherDerived::RowsAtCompileTime) == 1))
&& int(Derived::SizeAtCompileTime) != 1>
struct ei_assign_selector;
struct assign_selector;
template<typename Derived, typename OtherDerived>
struct ei_assign_selector<Derived,OtherDerived,false,false> {
struct assign_selector<Derived,OtherDerived,false,false> {
EIGEN_STRONG_INLINE static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.derived()); }
};
template<typename Derived, typename OtherDerived>
struct ei_assign_selector<Derived,OtherDerived,true,false> {
struct assign_selector<Derived,OtherDerived,true,false> {
EIGEN_STRONG_INLINE static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.eval()); }
};
template<typename Derived, typename OtherDerived>
struct ei_assign_selector<Derived,OtherDerived,false,true> {
struct assign_selector<Derived,OtherDerived,false,true> {
EIGEN_STRONG_INLINE static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose()); }
};
template<typename Derived, typename OtherDerived>
struct ei_assign_selector<Derived,OtherDerived,true,true> {
struct assign_selector<Derived,OtherDerived,true,true> {
EIGEN_STRONG_INLINE static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose().eval()); }
};
} // end namespace internal
template<typename Derived>
template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator=(const DenseBase<OtherDerived>& other)
{
return ei_assign_selector<Derived,OtherDerived>::run(derived(), other.derived());
return internal::assign_selector<Derived,OtherDerived>::run(derived(), other.derived());
}
template<typename Derived>
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator=(const DenseBase& other)
{
return ei_assign_selector<Derived,Derived>::run(derived(), other.derived());
return internal::assign_selector<Derived,Derived>::run(derived(), other.derived());
}
template<typename Derived>
EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const MatrixBase& other)
{
return ei_assign_selector<Derived,Derived>::run(derived(), other.derived());
return internal::assign_selector<Derived,Derived>::run(derived(), other.derived());
}
template<typename Derived>
template <typename OtherDerived>
EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const DenseBase<OtherDerived>& other)
{
return ei_assign_selector<Derived,OtherDerived>::run(derived(), other.derived());
return internal::assign_selector<Derived,OtherDerived>::run(derived(), other.derived());
}
template<typename Derived>

View File

@@ -25,112 +25,82 @@
#ifndef EIGEN_BANDMATRIX_H
#define EIGEN_BANDMATRIX_H
/**
* \class BandMatrix
* \ingroup Core_Module
*
* \brief Represents a rectangular matrix with a banded storage
*
* \param _Scalar Numeric type, i.e. float, double, int
* \param Rows Number of rows, or \b Dynamic
* \param Cols Number of columns, or \b Dynamic
* \param Supers Number of super diagonal
* \param Subs Number of sub diagonal
* \param _Options A combination of either \b RowMajor or \b ColMajor, and of \b SelfAdjoint
* The former controls storage order, and defaults to column-major. The latter controls
* whether the matrix represent a selfadjoint matrix in which case either Supers of Subs
* have to be null.
*
* \sa class TridiagonalMatrix
*/
template<typename _Scalar, int Rows, int Cols, int Supers, int Subs, int Options>
struct ei_traits<BandMatrix<_Scalar,Rows,Cols,Supers,Subs,Options> >
{
typedef _Scalar Scalar;
typedef Dense StorageKind;
typedef DenseIndex Index;
enum {
CoeffReadCost = NumTraits<Scalar>::ReadCost,
RowsAtCompileTime = Rows,
ColsAtCompileTime = Cols,
MaxRowsAtCompileTime = Rows,
MaxColsAtCompileTime = Cols,
Flags = LvalueBit
};
};
namespace internal {
template<typename _Scalar, int Rows, int Cols, int Supers, int Subs, int Options>
class BandMatrix : public EigenBase<BandMatrix<_Scalar,Rows,Cols,Supers,Subs,Options> >
template<typename Derived>
class BandMatrixBase : public EigenBase<Derived>
{
public:
enum {
Flags = ei_traits<BandMatrix>::Flags,
CoeffReadCost = ei_traits<BandMatrix>::CoeffReadCost,
RowsAtCompileTime = ei_traits<BandMatrix>::RowsAtCompileTime,
ColsAtCompileTime = ei_traits<BandMatrix>::ColsAtCompileTime,
MaxRowsAtCompileTime = ei_traits<BandMatrix>::MaxRowsAtCompileTime,
MaxColsAtCompileTime = ei_traits<BandMatrix>::MaxColsAtCompileTime
Flags = internal::traits<Derived>::Flags,
CoeffReadCost = internal::traits<Derived>::CoeffReadCost,
RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,
ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
MaxRowsAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime,
MaxColsAtCompileTime = internal::traits<Derived>::MaxColsAtCompileTime,
Supers = internal::traits<Derived>::Supers,
Subs = internal::traits<Derived>::Subs,
Options = internal::traits<Derived>::Options
};
typedef typename ei_traits<BandMatrix>::Scalar Scalar;
typedef typename internal::traits<Derived>::Scalar Scalar;
typedef Matrix<Scalar,RowsAtCompileTime,ColsAtCompileTime> DenseMatrixType;
typedef typename DenseMatrixType::Index Index;
typedef typename internal::traits<Derived>::CoefficientsType CoefficientsType;
typedef EigenBase<Derived> Base;
protected:
enum {
DataRowsAtCompileTime = ((Supers!=Dynamic) && (Subs!=Dynamic))
? 1 + Supers + Subs
: Dynamic,
SizeAtCompileTime = EIGEN_SIZE_MIN_PREFER_DYNAMIC(Rows,Cols)
SizeAtCompileTime = EIGEN_SIZE_MIN_PREFER_DYNAMIC(RowsAtCompileTime,ColsAtCompileTime)
};
typedef Matrix<Scalar,DataRowsAtCompileTime,ColsAtCompileTime,Options&RowMajor?RowMajor:ColMajor> DataType;
public:
inline BandMatrix(Index rows=Rows, Index cols=Cols, Index supers=Supers, Index subs=Subs)
: m_data(1+supers+subs,cols),
m_rows(rows), m_supers(supers), m_subs(subs)
{
//m_data.setConstant(666);
}
/** \returns the number of columns */
inline Index rows() const { return m_rows.value(); }
/** \returns the number of rows */
inline Index cols() const { return m_data.cols(); }
using Base::derived;
using Base::rows;
using Base::cols;
/** \returns the number of super diagonals */
inline Index supers() const { return m_supers.value(); }
inline Index supers() const { return derived().supers(); }
/** \returns the number of sub diagonals */
inline Index subs() const { return m_subs.value(); }
inline Index subs() const { return derived().subs(); }
/** \returns an expression of the underlying coefficient matrix */
inline const CoefficientsType& coeffs() const { return derived().coeffs(); }
/** \returns an expression of the underlying coefficient matrix */
inline CoefficientsType& coeffs() { return derived().coeffs(); }
/** \returns a vector expression of the \a i -th column,
* only the meaningful part is returned.
* \warning the internal storage must be column major. */
inline Block<DataType,Dynamic,1> col(Index i)
inline Block<CoefficientsType,Dynamic,1> col(Index i)
{
EIGEN_STATIC_ASSERT((Options&RowMajor)==0,THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES);
Index start = 0;
Index len = m_data.rows();
Index len = coeffs().rows();
if (i<=supers())
{
start = supers()-i;
len = std::min(rows(),std::max<Index>(0,m_data.rows() - (supers()-i)));
len = (std::min)(rows(),std::max<Index>(0,coeffs().rows() - (supers()-i)));
}
else if (i>=rows()-subs())
len = std::max<Index>(0,m_data.rows() - (i + 1 - rows() + subs()));
return Block<DataType,Dynamic,1>(m_data, start, i, len, 1);
len = std::max<Index>(0,coeffs().rows() - (i + 1 - rows() + subs()));
return Block<CoefficientsType,Dynamic,1>(coeffs(), start, i, len, 1);
}
/** \returns a vector expression of the main diagonal */
inline Block<DataType,1,SizeAtCompileTime> diagonal()
{ return Block<DataType,1,SizeAtCompileTime>(m_data,supers(),0,1,std::min(rows(),cols())); }
inline Block<CoefficientsType,1,SizeAtCompileTime> diagonal()
{ return Block<CoefficientsType,1,SizeAtCompileTime>(coeffs(),supers(),0,1,(std::min)(rows(),cols())); }
/** \returns a vector expression of the main diagonal (const version) */
inline const Block<DataType,1,SizeAtCompileTime> diagonal() const
{ return Block<DataType,1,SizeAtCompileTime>(m_data,supers(),0,1,std::min(rows(),cols())); }
inline const Block<const CoefficientsType,1,SizeAtCompileTime> diagonal() const
{ return Block<const CoefficientsType,1,SizeAtCompileTime>(coeffs(),supers(),0,1,(std::min)(rows(),cols())); }
template<int Index> struct DiagonalIntReturnType {
enum {
@@ -143,38 +113,38 @@ class BandMatrix : public EigenBase<BandMatrix<_Scalar,Rows,Cols,Supers,Subs,Opt
? EIGEN_SIZE_MIN_PREFER_DYNAMIC(ColsAtCompileTime, RowsAtCompileTime + ActualIndex)
: EIGEN_SIZE_MIN_PREFER_DYNAMIC(RowsAtCompileTime, ColsAtCompileTime - ActualIndex))
};
typedef Block<DataType,1, DiagonalSize> BuildType;
typedef typename ei_meta_if<Conjugate,
CwiseUnaryOp<ei_scalar_conjugate_op<Scalar>,BuildType >,
BuildType>::ret Type;
typedef Block<CoefficientsType,1, DiagonalSize> BuildType;
typedef typename internal::conditional<Conjugate,
CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>,BuildType >,
BuildType>::type Type;
};
/** \returns a vector expression of the \a N -th sub or super diagonal */
template<int N> inline typename DiagonalIntReturnType<N>::Type diagonal()
{
return typename DiagonalIntReturnType<N>::BuildType(m_data, supers()-N, std::max(0,N), 1, diagonalLength(N));
return typename DiagonalIntReturnType<N>::BuildType(coeffs(), supers()-N, (std::max)(0,N), 1, diagonalLength(N));
}
/** \returns a vector expression of the \a N -th sub or super diagonal */
template<int N> inline const typename DiagonalIntReturnType<N>::Type diagonal() const
{
return typename DiagonalIntReturnType<N>::BuildType(m_data, supers()-N, std::max(0,N), 1, diagonalLength(N));
return typename DiagonalIntReturnType<N>::BuildType(coeffs(), supers()-N, (std::max)(0,N), 1, diagonalLength(N));
}
/** \returns a vector expression of the \a i -th sub or super diagonal */
inline Block<DataType,1,Dynamic> diagonal(Index i)
inline Block<CoefficientsType,1,Dynamic> diagonal(Index i)
{
ei_assert((i<0 && -i<=subs()) || (i>=0 && i<=supers()));
return Block<DataType,1,Dynamic>(m_data, supers()-i, std::max<Index>(0,i), 1, diagonalLength(i));
eigen_assert((i<0 && -i<=subs()) || (i>=0 && i<=supers()));
return Block<CoefficientsType,1,Dynamic>(coeffs(), supers()-i, std::max<Index>(0,i), 1, diagonalLength(i));
}
/** \returns a vector expression of the \a i -th sub or super diagonal */
inline const Block<DataType,1,Dynamic> diagonal(Index i) const
inline const Block<const CoefficientsType,1,Dynamic> diagonal(Index i) const
{
ei_assert((i<0 && -i<=subs()) || (i>=0 && i<=supers()));
return Block<DataType,1,Dynamic>(m_data, supers()-i, std::max<Index>(0,i), 1, diagonalLength(i));
eigen_assert((i<0 && -i<=subs()) || (i>=0 && i<=supers()));
return Block<const CoefficientsType,1,Dynamic>(coeffs(), supers()-i, std::max<Index>(0,i), 1, diagonalLength(i));
}
template<typename Dest> inline void evalTo(Dest& dst) const
{
dst.resize(rows(),cols());
@@ -196,19 +166,155 @@ class BandMatrix : public EigenBase<BandMatrix<_Scalar,Rows,Cols,Supers,Subs,Opt
protected:
inline Index diagonalLength(Index i) const
{ return i<0 ? std::min(cols(),rows()+i) : std::min(rows(),cols()-i); }
{ return i<0 ? (std::min)(cols(),rows()+i) : (std::min)(rows(),cols()-i); }
};
DataType m_data;
ei_variable_if_dynamic<Index, Rows> m_rows;
ei_variable_if_dynamic<Index, Supers> m_supers;
ei_variable_if_dynamic<Index, Subs> m_subs;
/**
* \class BandMatrix
* \ingroup Core_Module
*
* \brief Represents a rectangular matrix with a banded storage
*
* \param _Scalar Numeric type, i.e. float, double, int
* \param Rows Number of rows, or \b Dynamic
* \param Cols Number of columns, or \b Dynamic
* \param Supers Number of super diagonal
* \param Subs Number of sub diagonal
* \param _Options A combination of either \b #RowMajor or \b #ColMajor, and of \b #SelfAdjoint
* The former controls \ref TopicStorageOrders "storage order", and defaults to
* column-major. The latter controls whether the matrix represents a selfadjoint
* matrix in which case either Supers of Subs have to be null.
*
* \sa class TridiagonalMatrix
*/
template<typename _Scalar, int _Rows, int _Cols, int _Supers, int _Subs, int _Options>
struct traits<BandMatrix<_Scalar,_Rows,_Cols,_Supers,_Subs,_Options> >
{
typedef _Scalar Scalar;
typedef Dense StorageKind;
typedef DenseIndex Index;
enum {
CoeffReadCost = NumTraits<Scalar>::ReadCost,
RowsAtCompileTime = _Rows,
ColsAtCompileTime = _Cols,
MaxRowsAtCompileTime = _Rows,
MaxColsAtCompileTime = _Cols,
Flags = LvalueBit,
Supers = _Supers,
Subs = _Subs,
Options = _Options,
DataRowsAtCompileTime = ((Supers!=Dynamic) && (Subs!=Dynamic)) ? 1 + Supers + Subs : Dynamic
};
typedef Matrix<Scalar,DataRowsAtCompileTime,ColsAtCompileTime,Options&RowMajor?RowMajor:ColMajor> CoefficientsType;
};
template<typename _Scalar, int Rows, int Cols, int Supers, int Subs, int Options>
class BandMatrix : public BandMatrixBase<BandMatrix<_Scalar,Rows,Cols,Supers,Subs,Options> >
{
public:
typedef typename internal::traits<BandMatrix>::Scalar Scalar;
typedef typename internal::traits<BandMatrix>::Index Index;
typedef typename internal::traits<BandMatrix>::CoefficientsType CoefficientsType;
inline BandMatrix(Index rows=Rows, Index cols=Cols, Index supers=Supers, Index subs=Subs)
: m_coeffs(1+supers+subs,cols),
m_rows(rows), m_supers(supers), m_subs(subs)
{
}
/** \returns the number of columns */
inline Index rows() const { return m_rows.value(); }
/** \returns the number of rows */
inline Index cols() const { return m_coeffs.cols(); }
/** \returns the number of super diagonals */
inline Index supers() const { return m_supers.value(); }
/** \returns the number of sub diagonals */
inline Index subs() const { return m_subs.value(); }
inline const CoefficientsType& coeffs() const { return m_coeffs; }
inline CoefficientsType& coeffs() { return m_coeffs; }
protected:
CoefficientsType m_coeffs;
internal::variable_if_dynamic<Index, Rows> m_rows;
internal::variable_if_dynamic<Index, Supers> m_supers;
internal::variable_if_dynamic<Index, Subs> m_subs;
};
template<typename _CoefficientsType,int _Rows, int _Cols, int _Supers, int _Subs,int _Options>
class BandMatrixWrapper;
template<typename _CoefficientsType,int _Rows, int _Cols, int _Supers, int _Subs,int _Options>
struct traits<BandMatrixWrapper<_CoefficientsType,_Rows,_Cols,_Supers,_Subs,_Options> >
{
typedef typename _CoefficientsType::Scalar Scalar;
typedef typename _CoefficientsType::StorageKind StorageKind;
typedef typename _CoefficientsType::Index Index;
enum {
CoeffReadCost = internal::traits<_CoefficientsType>::CoeffReadCost,
RowsAtCompileTime = _Rows,
ColsAtCompileTime = _Cols,
MaxRowsAtCompileTime = _Rows,
MaxColsAtCompileTime = _Cols,
Flags = LvalueBit,
Supers = _Supers,
Subs = _Subs,
Options = _Options,
DataRowsAtCompileTime = ((Supers!=Dynamic) && (Subs!=Dynamic)) ? 1 + Supers + Subs : Dynamic
};
typedef _CoefficientsType CoefficientsType;
};
template<typename _CoefficientsType,int _Rows, int _Cols, int _Supers, int _Subs,int _Options>
class BandMatrixWrapper : public BandMatrixBase<BandMatrixWrapper<_CoefficientsType,_Rows,_Cols,_Supers,_Subs,_Options> >
{
public:
typedef typename internal::traits<BandMatrixWrapper>::Scalar Scalar;
typedef typename internal::traits<BandMatrixWrapper>::CoefficientsType CoefficientsType;
typedef typename internal::traits<BandMatrixWrapper>::Index Index;
inline BandMatrixWrapper(const CoefficientsType& coeffs, Index rows=_Rows, Index cols=_Cols, Index supers=_Supers, Index subs=_Subs)
: m_coeffs(coeffs),
m_rows(rows), m_supers(supers), m_subs(subs)
{
EIGEN_UNUSED_VARIABLE(cols);
//internal::assert(coeffs.cols()==cols() && (supers()+subs()+1)==coeffs.rows());
}
/** \returns the number of columns */
inline Index rows() const { return m_rows.value(); }
/** \returns the number of rows */
inline Index cols() const { return m_coeffs.cols(); }
/** \returns the number of super diagonals */
inline Index supers() const { return m_supers.value(); }
/** \returns the number of sub diagonals */
inline Index subs() const { return m_subs.value(); }
inline const CoefficientsType& coeffs() const { return m_coeffs; }
protected:
const CoefficientsType& m_coeffs;
internal::variable_if_dynamic<Index, _Rows> m_rows;
internal::variable_if_dynamic<Index, _Supers> m_supers;
internal::variable_if_dynamic<Index, _Subs> m_subs;
};
/**
* \class TridiagonalMatrix
* \ingroup Core_Module
*
* \brief Represents a tridiagonal matrix
* \brief Represents a tridiagonal matrix with a compact banded storage
*
* \param _Scalar Numeric type, i.e. float, double, int
* \param Size Number of rows and cols, or \b Dynamic
@@ -219,10 +325,10 @@ class BandMatrix : public EigenBase<BandMatrix<_Scalar,Rows,Cols,Supers,Subs,Opt
template<typename Scalar, int Size, int Options>
class TridiagonalMatrix : public BandMatrix<Scalar,Size,Size,Options&SelfAdjoint?0:1,1,Options|RowMajor>
{
typedef BandMatrix<Scalar,Size,Size,1,Options&SelfAdjoint?0:1,Options|RowMajor> Base;
typedef BandMatrix<Scalar,Size,Size,Options&SelfAdjoint?0:1,1,Options|RowMajor> Base;
typedef typename Base::Index Index;
public:
TridiagonalMatrix(Index size = Size) : Base(size,size,1,1) {}
TridiagonalMatrix(Index size = Size) : Base(size,size,Options&SelfAdjoint?0:1,1) {}
inline typename Base::template DiagonalIntReturnType<1>::Type super()
{ return Base::template diagonal<1>(); }
@@ -235,4 +341,6 @@ class TridiagonalMatrix : public BandMatrix<Scalar,Size,Size,Options&SelfAdjoint
protected:
};
} // end namespace internal
#endif // EIGEN_BANDMATRIX_H

View File

@@ -58,61 +58,68 @@
*
* \sa DenseBase::block(Index,Index,Index,Index), DenseBase::block(Index,Index), class VectorBlock
*/
namespace internal {
template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool HasDirectAccess>
struct ei_traits<Block<XprType, BlockRows, BlockCols, InnerPanel, HasDirectAccess> > : ei_traits<XprType>
struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel, HasDirectAccess> > : traits<XprType>
{
typedef typename ei_traits<XprType>::Scalar Scalar;
typedef typename ei_traits<XprType>::StorageKind StorageKind;
typedef typename ei_traits<XprType>::XprKind XprKind;
typedef typename ei_nested<XprType>::type XprTypeNested;
typedef typename ei_unref<XprTypeNested>::type _XprTypeNested;
typedef typename traits<XprType>::Scalar Scalar;
typedef typename traits<XprType>::StorageKind StorageKind;
typedef typename traits<XprType>::XprKind XprKind;
typedef typename nested<XprType>::type XprTypeNested;
typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
enum{
MatrixRows = ei_traits<XprType>::RowsAtCompileTime,
MatrixCols = ei_traits<XprType>::ColsAtCompileTime,
MatrixRows = traits<XprType>::RowsAtCompileTime,
MatrixCols = traits<XprType>::ColsAtCompileTime,
RowsAtCompileTime = MatrixRows == 0 ? 0 : BlockRows,
ColsAtCompileTime = MatrixCols == 0 ? 0 : BlockCols,
MaxRowsAtCompileTime = BlockRows==0 ? 0
: RowsAtCompileTime != Dynamic ? int(RowsAtCompileTime)
: int(ei_traits<XprType>::MaxRowsAtCompileTime),
: int(traits<XprType>::MaxRowsAtCompileTime),
MaxColsAtCompileTime = BlockCols==0 ? 0
: ColsAtCompileTime != Dynamic ? int(ColsAtCompileTime)
: int(ei_traits<XprType>::MaxColsAtCompileTime),
XprTypeIsRowMajor = (int(ei_traits<XprType>::Flags)&RowMajorBit) != 0,
: int(traits<XprType>::MaxColsAtCompileTime),
XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0,
IsRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
: (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
: XprTypeIsRowMajor,
HasSameStorageOrderAsXprType = (IsRowMajor == XprTypeIsRowMajor),
InnerSize = IsRowMajor ? int(ColsAtCompileTime) : int(RowsAtCompileTime),
InnerStrideAtCompileTime = HasSameStorageOrderAsXprType
? int(ei_inner_stride_at_compile_time<XprType>::ret)
: int(ei_outer_stride_at_compile_time<XprType>::ret),
? int(inner_stride_at_compile_time<XprType>::ret)
: int(outer_stride_at_compile_time<XprType>::ret),
OuterStrideAtCompileTime = HasSameStorageOrderAsXprType
? int(ei_outer_stride_at_compile_time<XprType>::ret)
: int(ei_inner_stride_at_compile_time<XprType>::ret),
MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % ei_packet_traits<Scalar>::size) == 0)
? int(outer_stride_at_compile_time<XprType>::ret)
: int(inner_stride_at_compile_time<XprType>::ret),
MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % packet_traits<Scalar>::size) == 0)
&& (InnerStrideAtCompileTime == 1)
? PacketAccessBit : 0,
MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && ((OuterStrideAtCompileTime % ei_packet_traits<Scalar>::size) == 0)) ? AlignedBit : 0,
MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % 16) == 0)) ? AlignedBit : 0,
FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1) ? LinearAccessBit : 0,
Flags0 = ei_traits<XprType>::Flags & (HereditaryBits | MaskPacketAccessBit | LvalueBit | DirectAccessBit | MaskAlignedBit),
Flags1 = Flags0 | FlagsLinearAccessBit,
Flags = (Flags1 & ~RowMajorBit) | (IsRowMajor ? RowMajorBit : 0)
FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0,
FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0,
Flags0 = traits<XprType>::Flags & ( (HereditaryBits & ~RowMajorBit) |
DirectAccessBit |
MaskPacketAccessBit |
MaskAlignedBit),
Flags = Flags0 | FlagsLinearAccessBit | FlagsLvalueBit | FlagsRowMajorBit
};
};
}
template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool HasDirectAccess> class Block
: public ei_dense_xpr_base<Block<XprType, BlockRows, BlockCols, InnerPanel, HasDirectAccess> >::type
: public internal::dense_xpr_base<Block<XprType, BlockRows, BlockCols, InnerPanel, HasDirectAccess> >::type
{
public:
typedef typename ei_dense_xpr_base<Block>::type Base;
typedef typename internal::dense_xpr_base<Block>::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(Block)
class InnerIterator;
/** Column or Row constructor
*/
inline Block(const XprType& xpr, Index i)
inline Block(XprType& xpr, Index i)
: m_xpr(xpr),
// It is a row if and only if BlockRows==1 and BlockCols==XprType::ColsAtCompileTime,
// and it is a column if and only if BlockRows==XprType::RowsAtCompileTime and BlockCols==1,
@@ -123,33 +130,33 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
m_blockRows(BlockRows==1 ? 1 : xpr.rows()),
m_blockCols(BlockCols==1 ? 1 : xpr.cols())
{
ei_assert( (i>=0) && (
eigen_assert( (i>=0) && (
((BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) && i<xpr.rows())
||((BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) && i<xpr.cols())));
}
/** Fixed-size constructor
*/
inline Block(const XprType& xpr, Index startRow, Index startCol)
inline Block(XprType& xpr, Index startRow, Index startCol)
: m_xpr(xpr), m_startRow(startRow), m_startCol(startCol),
m_blockRows(BlockRows), m_blockCols(BlockCols)
{
EIGEN_STATIC_ASSERT(RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic,THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE)
ei_assert(startRow >= 0 && BlockRows >= 1 && startRow + BlockRows <= xpr.rows()
eigen_assert(startRow >= 0 && BlockRows >= 1 && startRow + BlockRows <= xpr.rows()
&& startCol >= 0 && BlockCols >= 1 && startCol + BlockCols <= xpr.cols());
}
/** Dynamic-size constructor
*/
inline Block(const XprType& xpr,
inline Block(XprType& xpr,
Index startRow, Index startCol,
Index blockRows, Index blockCols)
: m_xpr(xpr), m_startRow(startRow), m_startCol(startCol),
m_blockRows(blockRows), m_blockCols(blockCols)
{
ei_assert((RowsAtCompileTime==Dynamic || RowsAtCompileTime==blockRows)
eigen_assert((RowsAtCompileTime==Dynamic || RowsAtCompileTime==blockRows)
&& (ColsAtCompileTime==Dynamic || ColsAtCompileTime==blockCols));
ei_assert(startRow >= 0 && blockRows >= 0 && startRow + blockRows <= xpr.rows()
eigen_assert(startRow >= 0 && blockRows >= 0 && startRow + blockRows <= xpr.rows()
&& startCol >= 0 && blockCols >= 0 && startCol + blockCols <= xpr.cols());
}
@@ -160,16 +167,31 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
inline Scalar& coeffRef(Index row, Index col)
{
EIGEN_STATIC_ASSERT_LVALUE(XprType)
return m_xpr.const_cast_derived()
.coeffRef(row + m_startRow.value(), col + m_startCol.value());
}
inline const Scalar& coeffRef(Index row, Index col) const
{
return m_xpr.derived()
.coeffRef(row + m_startRow.value(), col + m_startCol.value());
}
EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const
{
return m_xpr.coeff(row + m_startRow.value(), col + m_startCol.value());
}
inline Scalar& coeffRef(Index index)
{
EIGEN_STATIC_ASSERT_LVALUE(XprType)
return m_xpr.const_cast_derived()
.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
}
inline const Scalar& coeffRef(Index index) const
{
return m_xpr.const_cast_derived()
.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
@@ -223,10 +245,10 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
protected:
const typename XprType::Nested m_xpr;
const ei_variable_if_dynamic<Index, XprType::RowsAtCompileTime == 1 ? 0 : Dynamic> m_startRow;
const ei_variable_if_dynamic<Index, XprType::ColsAtCompileTime == 1 ? 0 : Dynamic> m_startCol;
const ei_variable_if_dynamic<Index, RowsAtCompileTime> m_blockRows;
const ei_variable_if_dynamic<Index, ColsAtCompileTime> m_blockCols;
const internal::variable_if_dynamic<Index, XprType::RowsAtCompileTime == 1 ? 0 : Dynamic> m_startRow;
const internal::variable_if_dynamic<Index, XprType::ColsAtCompileTime == 1 ? 0 : Dynamic> m_startCol;
const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_blockRows;
const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_blockCols;
};
/** \internal */
@@ -243,15 +265,15 @@ class Block<XprType,BlockRows,BlockCols, InnerPanel,true>
/** Column or Row constructor
*/
inline Block(const XprType& xpr, Index i)
: Base(&xpr.const_cast_derived().coeffRef(
inline Block(XprType& xpr, Index i)
: Base(internal::const_cast_ptr(&xpr.coeffRef(
(BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0,
(BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0),
(BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0)),
BlockRows==1 ? 1 : xpr.rows(),
BlockCols==1 ? 1 : xpr.cols()),
m_xpr(xpr)
{
ei_assert( (i>=0) && (
eigen_assert( (i>=0) && (
((BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) && i<xpr.rows())
||((BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) && i<xpr.cols())));
init();
@@ -259,25 +281,25 @@ class Block<XprType,BlockRows,BlockCols, InnerPanel,true>
/** Fixed-size constructor
*/
inline Block(const XprType& xpr, Index startRow, Index startCol)
: Base(&xpr.const_cast_derived().coeffRef(startRow,startCol)), m_xpr(xpr)
inline Block(XprType& xpr, Index startRow, Index startCol)
: Base(internal::const_cast_ptr(&xpr.coeffRef(startRow,startCol))), m_xpr(xpr)
{
ei_assert(startRow >= 0 && BlockRows >= 1 && startRow + BlockRows <= xpr.rows()
eigen_assert(startRow >= 0 && BlockRows >= 1 && startRow + BlockRows <= xpr.rows()
&& startCol >= 0 && BlockCols >= 1 && startCol + BlockCols <= xpr.cols());
init();
}
/** Dynamic-size constructor
*/
inline Block(const XprType& xpr,
inline Block(XprType& xpr,
Index startRow, Index startCol,
Index blockRows, Index blockCols)
: Base(&xpr.const_cast_derived().coeffRef(startRow,startCol), blockRows, blockCols),
: Base(internal::const_cast_ptr(&xpr.coeffRef(startRow,startCol)), blockRows, blockCols),
m_xpr(xpr)
{
ei_assert((RowsAtCompileTime==Dynamic || RowsAtCompileTime==blockRows)
eigen_assert((RowsAtCompileTime==Dynamic || RowsAtCompileTime==blockRows)
&& (ColsAtCompileTime==Dynamic || ColsAtCompileTime==blockCols));
ei_assert(startRow >= 0 && blockRows >= 0 && startRow + blockRows <= xpr.rows()
eigen_assert(startRow >= 0 && blockRows >= 0 && startRow + blockRows <= xpr.rows()
&& startCol >= 0 && blockCols >= 0 && startCol + blockCols <= xpr.cols());
init();
}
@@ -285,7 +307,7 @@ class Block<XprType,BlockRows,BlockCols, InnerPanel,true>
/** \sa MapBase::innerStride() */
inline Index innerStride() const
{
return ei_traits<Block>::HasSameStorageOrderAsXprType
return internal::traits<Block>::HasSameStorageOrderAsXprType
? m_xpr.innerStride()
: m_xpr.outerStride();
}
@@ -304,7 +326,7 @@ class Block<XprType,BlockRows,BlockCols, InnerPanel,true>
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** \internal used by allowAligned() */
inline Block(const XprType& xpr, const Scalar* data, Index blockRows, Index blockCols)
inline Block(XprType& xpr, const Scalar* data, Index blockRows, Index blockCols)
: Base(data, blockRows, blockCols), m_xpr(xpr)
{
init();
@@ -314,13 +336,13 @@ class Block<XprType,BlockRows,BlockCols, InnerPanel,true>
protected:
void init()
{
m_outerStride = ei_traits<Block>::HasSameStorageOrderAsXprType
m_outerStride = internal::traits<Block>::HasSameStorageOrderAsXprType
? m_xpr.outerStride()
: m_xpr.innerStride();
}
const typename XprType::Nested m_xpr;
int m_outerStride;
Index m_outerStride;
};

View File

@@ -25,8 +25,10 @@
#ifndef EIGEN_ALLANDANY_H
#define EIGEN_ALLANDANY_H
namespace internal {
template<typename Derived, int UnrollCount>
struct ei_all_unroller
struct all_unroller
{
enum {
col = (UnrollCount-1) / Derived::RowsAtCompileTime,
@@ -35,24 +37,24 @@ struct ei_all_unroller
inline static bool run(const Derived &mat)
{
return ei_all_unroller<Derived, UnrollCount-1>::run(mat) && mat.coeff(row, col);
return all_unroller<Derived, UnrollCount-1>::run(mat) && mat.coeff(row, col);
}
};
template<typename Derived>
struct ei_all_unroller<Derived, 1>
struct all_unroller<Derived, 1>
{
inline static bool run(const Derived &mat) { return mat.coeff(0, 0); }
};
template<typename Derived>
struct ei_all_unroller<Derived, Dynamic>
struct all_unroller<Derived, Dynamic>
{
inline static bool run(const Derived &) { return false; }
};
template<typename Derived, int UnrollCount>
struct ei_any_unroller
struct any_unroller
{
enum {
col = (UnrollCount-1) / Derived::RowsAtCompileTime,
@@ -61,22 +63,24 @@ struct ei_any_unroller
inline static bool run(const Derived &mat)
{
return ei_any_unroller<Derived, UnrollCount-1>::run(mat) || mat.coeff(row, col);
return any_unroller<Derived, UnrollCount-1>::run(mat) || mat.coeff(row, col);
}
};
template<typename Derived>
struct ei_any_unroller<Derived, 1>
struct any_unroller<Derived, 1>
{
inline static bool run(const Derived &mat) { return mat.coeff(0, 0); }
};
template<typename Derived>
struct ei_any_unroller<Derived, Dynamic>
struct any_unroller<Derived, Dynamic>
{
inline static bool run(const Derived &) { return false; }
};
} // end namespace internal
/** \returns true if all coefficients are true
*
* Example: \include MatrixBase_all.cpp
@@ -94,7 +98,7 @@ inline bool DenseBase<Derived>::all() const
&& SizeAtCompileTime * (CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT
};
if(unroll)
return ei_all_unroller<Derived,
return internal::all_unroller<Derived,
unroll ? int(SizeAtCompileTime) : Dynamic
>::run(derived());
else
@@ -120,7 +124,7 @@ inline bool DenseBase<Derived>::any() const
&& SizeAtCompileTime * (CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT
};
if(unroll)
return ei_any_unroller<Derived,
return internal::any_unroller<Derived,
unroll ? int(SizeAtCompileTime) : Dynamic
>::run(derived());
else

View File

@@ -64,12 +64,12 @@ struct CommaInitializer
m_row+=m_currentBlockRows;
m_col = 0;
m_currentBlockRows = 1;
ei_assert(m_row<m_xpr.rows()
eigen_assert(m_row<m_xpr.rows()
&& "Too many rows passed to comma initializer (operator<<)");
}
ei_assert(m_col<m_xpr.cols()
eigen_assert(m_col<m_xpr.cols()
&& "Too many coefficients passed to comma initializer (operator<<)");
ei_assert(m_currentBlockRows==1);
eigen_assert(m_currentBlockRows==1);
m_xpr.coeffRef(m_row, m_col++) = s;
return *this;
}
@@ -83,12 +83,12 @@ struct CommaInitializer
m_row+=m_currentBlockRows;
m_col = 0;
m_currentBlockRows = other.rows();
ei_assert(m_row+m_currentBlockRows<=m_xpr.rows()
eigen_assert(m_row+m_currentBlockRows<=m_xpr.rows()
&& "Too many rows passed to comma initializer (operator<<)");
}
ei_assert(m_col<m_xpr.cols()
eigen_assert(m_col<m_xpr.cols()
&& "Too many coefficients passed to comma initializer (operator<<)");
ei_assert(m_currentBlockRows==other.rows());
eigen_assert(m_currentBlockRows==other.rows());
if (OtherDerived::SizeAtCompileTime != Dynamic)
m_xpr.template block<OtherDerived::RowsAtCompileTime != Dynamic ? OtherDerived::RowsAtCompileTime : 1,
OtherDerived::ColsAtCompileTime != Dynamic ? OtherDerived::ColsAtCompileTime : 1>
@@ -101,7 +101,7 @@ struct CommaInitializer
inline ~CommaInitializer()
{
ei_assert((m_row+m_currentBlockRows) == m_xpr.rows()
eigen_assert((m_row+m_currentBlockRows) == m_xpr.rows()
&& m_col == m_xpr.cols()
&& "Too few coefficients passed to comma initializer (operator<<)");
}

View File

@@ -45,56 +45,59 @@
*
* \sa MatrixBase::binaryExpr(const MatrixBase<OtherDerived> &,const CustomBinaryOp &) const, class CwiseUnaryOp, class CwiseNullaryOp
*/
namespace internal {
template<typename BinaryOp, typename Lhs, typename Rhs>
struct ei_traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
{
// we must not inherit from ei_traits<Lhs> since it has
// we must not inherit from traits<Lhs> since it has
// the potential to cause problems with MSVC
typedef typename ei_cleantype<Lhs>::type Ancestor;
typedef typename ei_traits<Ancestor>::XprKind XprKind;
typedef typename remove_all<Lhs>::type Ancestor;
typedef typename traits<Ancestor>::XprKind XprKind;
enum {
RowsAtCompileTime = ei_traits<Ancestor>::RowsAtCompileTime,
ColsAtCompileTime = ei_traits<Ancestor>::ColsAtCompileTime,
MaxRowsAtCompileTime = ei_traits<Ancestor>::MaxRowsAtCompileTime,
MaxColsAtCompileTime = ei_traits<Ancestor>::MaxColsAtCompileTime
RowsAtCompileTime = traits<Ancestor>::RowsAtCompileTime,
ColsAtCompileTime = traits<Ancestor>::ColsAtCompileTime,
MaxRowsAtCompileTime = traits<Ancestor>::MaxRowsAtCompileTime,
MaxColsAtCompileTime = traits<Ancestor>::MaxColsAtCompileTime
};
// even though we require Lhs and Rhs to have the same scalar type (see CwiseBinaryOp constructor),
// we still want to handle the case when the result type is different.
typedef typename ei_result_of<
typedef typename result_of<
BinaryOp(
typename Lhs::Scalar,
typename Rhs::Scalar
)
>::type Scalar;
typedef typename ei_promote_storage_type<typename ei_traits<Lhs>::StorageKind,
typename ei_traits<Rhs>::StorageKind>::ret StorageKind;
typedef typename ei_promote_index_type<typename ei_traits<Lhs>::Index,
typename ei_traits<Rhs>::Index>::type Index;
typedef typename promote_storage_type<typename traits<Lhs>::StorageKind,
typename traits<Rhs>::StorageKind>::ret StorageKind;
typedef typename promote_index_type<typename traits<Lhs>::Index,
typename traits<Rhs>::Index>::type Index;
typedef typename Lhs::Nested LhsNested;
typedef typename Rhs::Nested RhsNested;
typedef typename ei_unref<LhsNested>::type _LhsNested;
typedef typename ei_unref<RhsNested>::type _RhsNested;
typedef typename remove_reference<LhsNested>::type _LhsNested;
typedef typename remove_reference<RhsNested>::type _RhsNested;
enum {
LhsCoeffReadCost = _LhsNested::CoeffReadCost,
RhsCoeffReadCost = _RhsNested::CoeffReadCost,
LhsFlags = _LhsNested::Flags,
RhsFlags = _RhsNested::Flags,
SameType = ei_is_same_type<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::ret,
SameType = is_same<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::value,
StorageOrdersAgree = (int(Lhs::Flags)&RowMajorBit)==(int(Rhs::Flags)&RowMajorBit),
Flags0 = (int(LhsFlags) | int(RhsFlags)) & (
HereditaryBits
| (int(LhsFlags) & int(RhsFlags) &
( AlignedBit
| (StorageOrdersAgree ? LinearAccessBit : 0)
| (ei_functor_traits<BinaryOp>::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0)
| (functor_traits<BinaryOp>::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0)
)
)
),
Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit),
CoeffReadCost = LhsCoeffReadCost + RhsCoeffReadCost + ei_functor_traits<BinaryOp>::Cost
CoeffReadCost = LhsCoeffReadCost + RhsCoeffReadCost + functor_traits<BinaryOp>::Cost
};
};
} // end namespace internal
// we require Lhs and Rhs to have the same scalar type. Currently there is no example of a binary functor
// that would take two operands of different types. If there were such an example, then this check should be
@@ -104,33 +107,33 @@ struct ei_traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
// So allowing mixing different types gives very unexpected errors when enabling vectorization, when the user tries to
// add together a float matrix and a double matrix.
#define EIGEN_CHECK_BINARY_COMPATIBILIY(BINOP,LHS,RHS) \
EIGEN_STATIC_ASSERT((ei_functor_allows_mixing_real_and_complex<BINOP>::ret \
? int(ei_is_same_type<typename NumTraits<LHS>::Real, typename NumTraits<RHS>::Real>::ret) \
: int(ei_is_same_type<LHS, RHS>::ret)), \
EIGEN_STATIC_ASSERT((internal::functor_allows_mixing_real_and_complex<BINOP>::ret \
? int(internal::is_same<typename NumTraits<LHS>::Real, typename NumTraits<RHS>::Real>::value) \
: int(internal::is_same<LHS, RHS>::value)), \
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
template<typename BinaryOp, typename Lhs, typename Rhs, typename StorageKind>
class CwiseBinaryOpImpl;
template<typename BinaryOp, typename Lhs, typename Rhs>
class CwiseBinaryOp : ei_no_assignment_operator,
class CwiseBinaryOp : internal::no_assignment_operator,
public CwiseBinaryOpImpl<
BinaryOp, Lhs, Rhs,
typename ei_promote_storage_type<typename ei_traits<Lhs>::StorageKind,
typename ei_traits<Rhs>::StorageKind>::ret>
typename internal::promote_storage_type<typename internal::traits<Lhs>::StorageKind,
typename internal::traits<Rhs>::StorageKind>::ret>
{
public:
typedef typename CwiseBinaryOpImpl<
BinaryOp, Lhs, Rhs,
typename ei_promote_storage_type<typename ei_traits<Lhs>::StorageKind,
typename ei_traits<Rhs>::StorageKind>::ret>::Base Base;
typename internal::promote_storage_type<typename internal::traits<Lhs>::StorageKind,
typename internal::traits<Rhs>::StorageKind>::ret>::Base Base;
EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseBinaryOp)
typedef typename ei_nested<Lhs>::type LhsNested;
typedef typename ei_nested<Rhs>::type RhsNested;
typedef typename ei_unref<LhsNested>::type _LhsNested;
typedef typename ei_unref<RhsNested>::type _RhsNested;
typedef typename internal::nested<Lhs>::type LhsNested;
typedef typename internal::nested<Rhs>::type RhsNested;
typedef typename internal::remove_reference<LhsNested>::type _LhsNested;
typedef typename internal::remove_reference<RhsNested>::type _RhsNested;
EIGEN_STRONG_INLINE CwiseBinaryOp(const Lhs& lhs, const Rhs& rhs, const BinaryOp& func = BinaryOp())
: m_lhs(lhs), m_rhs(rhs), m_functor(func)
@@ -138,19 +141,19 @@ class CwiseBinaryOp : ei_no_assignment_operator,
EIGEN_CHECK_BINARY_COMPATIBILIY(BinaryOp,typename Lhs::Scalar,typename Rhs::Scalar);
// require the sizes to match
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Lhs, Rhs)
ei_assert(lhs.rows() == rhs.rows() && lhs.cols() == rhs.cols());
eigen_assert(lhs.rows() == rhs.rows() && lhs.cols() == rhs.cols());
}
EIGEN_STRONG_INLINE Index rows() const {
// return the fixed size type if available to enable compile time optimizations
if (ei_traits<typename ei_cleantype<LhsNested>::type>::RowsAtCompileTime==Dynamic)
if (internal::traits<typename internal::remove_all<LhsNested>::type>::RowsAtCompileTime==Dynamic)
return m_rhs.rows();
else
return m_lhs.rows();
}
EIGEN_STRONG_INLINE Index cols() const {
// return the fixed size type if available to enable compile time optimizations
if (ei_traits<typename ei_cleantype<LhsNested>::type>::ColsAtCompileTime==Dynamic)
if (internal::traits<typename internal::remove_all<LhsNested>::type>::ColsAtCompileTime==Dynamic)
return m_rhs.cols();
else
return m_lhs.cols();
@@ -171,12 +174,12 @@ class CwiseBinaryOp : ei_no_assignment_operator,
template<typename BinaryOp, typename Lhs, typename Rhs>
class CwiseBinaryOpImpl<BinaryOp, Lhs, Rhs, Dense>
: public ei_dense_xpr_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type
: public internal::dense_xpr_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type
{
typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> Derived;
public:
typedef typename ei_dense_xpr_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type Base;
typedef typename internal::dense_xpr_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE( Derived )
EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
@@ -215,7 +218,7 @@ template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived &
MatrixBase<Derived>::operator-=(const MatrixBase<OtherDerived> &other)
{
SelfCwiseBinaryOp<ei_scalar_difference_op<Scalar>, Derived, OtherDerived> tmp(derived());
SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, Derived, OtherDerived> tmp(derived());
tmp = other.derived();
return derived();
}
@@ -229,7 +232,7 @@ template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived &
MatrixBase<Derived>::operator+=(const MatrixBase<OtherDerived>& other)
{
SelfCwiseBinaryOp<ei_scalar_sum_op<Scalar>, Derived, OtherDerived> tmp(derived());
SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, Derived, OtherDerived> tmp(derived());
tmp = other.derived();
return derived();
}

View File

@@ -42,32 +42,35 @@
*
* \sa class CwiseUnaryOp, class CwiseBinaryOp, DenseBase::NullaryExpr()
*/
namespace internal {
template<typename NullaryOp, typename PlainObjectType>
struct ei_traits<CwiseNullaryOp<NullaryOp, PlainObjectType> > : ei_traits<PlainObjectType>
struct traits<CwiseNullaryOp<NullaryOp, PlainObjectType> > : traits<PlainObjectType>
{
enum {
Flags = (ei_traits<PlainObjectType>::Flags
Flags = (traits<PlainObjectType>::Flags
& ( HereditaryBits
| (ei_functor_has_linear_access<NullaryOp>::ret ? LinearAccessBit : 0)
| (ei_functor_traits<NullaryOp>::PacketAccess ? PacketAccessBit : 0)))
| (ei_functor_traits<NullaryOp>::IsRepeatable ? 0 : EvalBeforeNestingBit),
CoeffReadCost = ei_functor_traits<NullaryOp>::Cost
| (functor_has_linear_access<NullaryOp>::ret ? LinearAccessBit : 0)
| (functor_traits<NullaryOp>::PacketAccess ? PacketAccessBit : 0)))
| (functor_traits<NullaryOp>::IsRepeatable ? 0 : EvalBeforeNestingBit),
CoeffReadCost = functor_traits<NullaryOp>::Cost
};
};
}
template<typename NullaryOp, typename PlainObjectType>
class CwiseNullaryOp : ei_no_assignment_operator,
public ei_dense_xpr_base< CwiseNullaryOp<NullaryOp, PlainObjectType> >::type
class CwiseNullaryOp : internal::no_assignment_operator,
public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp, PlainObjectType> >::type
{
public:
typedef typename ei_dense_xpr_base<CwiseNullaryOp>::type Base;
typedef typename internal::dense_xpr_base<CwiseNullaryOp>::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(CwiseNullaryOp)
CwiseNullaryOp(Index rows, Index cols, const NullaryOp& func = NullaryOp())
: m_rows(rows), m_cols(cols), m_functor(func)
{
ei_assert(rows >= 0
eigen_assert(rows >= 0
&& (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
&& cols >= 0
&& (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols));
@@ -99,8 +102,8 @@ class CwiseNullaryOp : ei_no_assignment_operator,
}
protected:
const ei_variable_if_dynamic<Index, RowsAtCompileTime> m_rows;
const ei_variable_if_dynamic<Index, ColsAtCompileTime> m_cols;
const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_rows;
const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_cols;
const NullaryOp m_functor;
};
@@ -185,7 +188,7 @@ template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
DenseBase<Derived>::Constant(Index rows, Index cols, const Scalar& value)
{
return DenseBase<Derived>::NullaryExpr(rows, cols, ei_scalar_constant_op<Scalar>(value));
return DenseBase<Derived>::NullaryExpr(rows, cols, internal::scalar_constant_op<Scalar>(value));
}
/** \returns an expression of a constant matrix of value \a value
@@ -207,7 +210,7 @@ template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
DenseBase<Derived>::Constant(Index size, const Scalar& value)
{
return DenseBase<Derived>::NullaryExpr(size, ei_scalar_constant_op<Scalar>(value));
return DenseBase<Derived>::NullaryExpr(size, internal::scalar_constant_op<Scalar>(value));
}
/** \returns an expression of a constant matrix of value \a value
@@ -224,7 +227,7 @@ EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
DenseBase<Derived>::Constant(const Scalar& value)
{
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
return DenseBase<Derived>::NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, ei_scalar_constant_op<Scalar>(value));
return DenseBase<Derived>::NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_constant_op<Scalar>(value));
}
/**
@@ -247,7 +250,7 @@ EIGEN_STRONG_INLINE const typename DenseBase<Derived>::SequentialLinSpacedReturn
DenseBase<Derived>::LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return DenseBase<Derived>::NullaryExpr(size, ei_linspaced_op<Scalar,false>(low,high,size));
return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,false>(low,high,size));
}
/**
@@ -260,7 +263,7 @@ DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& hig
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, ei_linspaced_op<Scalar,false>(low,high,Derived::SizeAtCompileTime));
return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,false>(low,high,Derived::SizeAtCompileTime));
}
/**
@@ -280,7 +283,7 @@ EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedRetu
DenseBase<Derived>::LinSpaced(Index size, const Scalar& low, const Scalar& high)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return DenseBase<Derived>::NullaryExpr(size, ei_linspaced_op<Scalar,true>(low,high,size));
return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,true>(low,high,size));
}
/**
@@ -293,7 +296,7 @@ DenseBase<Derived>::LinSpaced(const Scalar& low, const Scalar& high)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, ei_linspaced_op<Scalar,true>(low,high,Derived::SizeAtCompileTime));
return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,true>(low,high,Derived::SizeAtCompileTime));
}
/** \returns true if all coefficients in this matrix are approximately equal to \a value, to within precision \a prec */
@@ -303,7 +306,7 @@ bool DenseBase<Derived>::isApproxToConstant
{
for(Index j = 0; j < cols(); ++j)
for(Index i = 0; i < rows(); ++i)
if(!ei_isApprox(this->coeff(i, j), value, prec))
if(!internal::isApprox(this->coeff(i, j), value, prec))
return false;
return true;
}
@@ -349,7 +352,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setConstant(const Scalar& value
*/
template<typename Derived>
EIGEN_STRONG_INLINE Derived&
DenseStorageBase<Derived>::setConstant(Index size, const Scalar& value)
PlainObjectBase<Derived>::setConstant(Index size, const Scalar& value)
{
resize(size);
return setConstant(value);
@@ -368,7 +371,7 @@ DenseStorageBase<Derived>::setConstant(Index size, const Scalar& value)
*/
template<typename Derived>
EIGEN_STRONG_INLINE Derived&
DenseStorageBase<Derived>::setConstant(Index rows, Index cols, const Scalar& value)
PlainObjectBase<Derived>::setConstant(Index rows, Index cols, const Scalar& value)
{
resize(rows, cols);
return setConstant(value);
@@ -390,7 +393,7 @@ template<typename Derived>
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index size, const Scalar& low, const Scalar& high)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return derived() = Derived::NullaryExpr(size, ei_linspaced_op<Scalar,false>(low,high,size));
return derived() = Derived::NullaryExpr(size, internal::linspaced_op<Scalar,false>(low,high,size));
}
// zero:
@@ -469,7 +472,7 @@ bool DenseBase<Derived>::isZero(RealScalar prec) const
{
for(Index j = 0; j < cols(); ++j)
for(Index i = 0; i < rows(); ++i)
if(!ei_isMuchSmallerThan(this->coeff(i, j), static_cast<Scalar>(1), prec))
if(!internal::isMuchSmallerThan(this->coeff(i, j), static_cast<Scalar>(1), prec))
return false;
return true;
}
@@ -498,7 +501,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setZero()
*/
template<typename Derived>
EIGEN_STRONG_INLINE Derived&
DenseStorageBase<Derived>::setZero(Index size)
PlainObjectBase<Derived>::setZero(Index size)
{
resize(size);
return setConstant(Scalar(0));
@@ -516,7 +519,7 @@ DenseStorageBase<Derived>::setZero(Index size)
*/
template<typename Derived>
EIGEN_STRONG_INLINE Derived&
DenseStorageBase<Derived>::setZero(Index rows, Index cols)
PlainObjectBase<Derived>::setZero(Index rows, Index cols)
{
resize(rows, cols);
return setConstant(Scalar(0));
@@ -624,7 +627,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setOnes()
*/
template<typename Derived>
EIGEN_STRONG_INLINE Derived&
DenseStorageBase<Derived>::setOnes(Index size)
PlainObjectBase<Derived>::setOnes(Index size)
{
resize(size);
return setConstant(Scalar(1));
@@ -642,7 +645,7 @@ DenseStorageBase<Derived>::setOnes(Index size)
*/
template<typename Derived>
EIGEN_STRONG_INLINE Derived&
DenseStorageBase<Derived>::setOnes(Index rows, Index cols)
PlainObjectBase<Derived>::setOnes(Index rows, Index cols)
{
resize(rows, cols);
return setConstant(Scalar(1));
@@ -668,7 +671,7 @@ template<typename Derived>
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType
MatrixBase<Derived>::Identity(Index rows, Index cols)
{
return DenseBase<Derived>::NullaryExpr(rows, cols, ei_scalar_identity_op<Scalar>());
return DenseBase<Derived>::NullaryExpr(rows, cols, internal::scalar_identity_op<Scalar>());
}
/** \returns an expression of the identity matrix (not necessarily square).
@@ -686,7 +689,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType
MatrixBase<Derived>::Identity()
{
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
return MatrixBase<Derived>::NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, ei_scalar_identity_op<Scalar>());
return MatrixBase<Derived>::NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_identity_op<Scalar>());
}
/** \returns true if *this is approximately equal to the identity matrix
@@ -708,12 +711,12 @@ bool MatrixBase<Derived>::isIdentity
{
if(i == j)
{
if(!ei_isApprox(this->coeff(i, j), static_cast<Scalar>(1), prec))
if(!internal::isApprox(this->coeff(i, j), static_cast<Scalar>(1), prec))
return false;
}
else
{
if(!ei_isMuchSmallerThan(this->coeff(i, j), static_cast<RealScalar>(1), prec))
if(!internal::isMuchSmallerThan(this->coeff(i, j), static_cast<RealScalar>(1), prec))
return false;
}
}
@@ -721,8 +724,10 @@ bool MatrixBase<Derived>::isIdentity
return true;
}
namespace internal {
template<typename Derived, bool Big = (Derived::SizeAtCompileTime>=16)>
struct ei_setIdentity_impl
struct setIdentity_impl
{
static EIGEN_STRONG_INLINE Derived& run(Derived& m)
{
@@ -731,18 +736,20 @@ struct ei_setIdentity_impl
};
template<typename Derived>
struct ei_setIdentity_impl<Derived, true>
struct setIdentity_impl<Derived, true>
{
typedef typename Derived::Index Index;
static EIGEN_STRONG_INLINE Derived& run(Derived& m)
{
m.setZero();
const Index size = std::min(m.rows(), m.cols());
const Index size = (std::min)(m.rows(), m.cols());
for(Index i = 0; i < size; ++i) m.coeffRef(i,i) = typename Derived::Scalar(1);
return m;
}
};
} // end namespace internal
/** Writes the identity expression (not necessarily square) into *this.
*
* Example: \include MatrixBase_setIdentity.cpp
@@ -753,7 +760,7 @@ struct ei_setIdentity_impl<Derived, true>
template<typename Derived>
EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity()
{
return ei_setIdentity_impl<Derived>::run(derived());
return internal::setIdentity_impl<Derived>::run(derived());
}
/** \brief Resizes to the given size, and writes the identity expression (not necessarily square) into *this.

View File

@@ -45,33 +45,36 @@
*
* \sa MatrixBase::unaryExpr(const CustomUnaryOp &) const, class CwiseBinaryOp, class CwiseNullaryOp
*/
namespace internal {
template<typename UnaryOp, typename XprType>
struct ei_traits<CwiseUnaryOp<UnaryOp, XprType> >
: ei_traits<XprType>
struct traits<CwiseUnaryOp<UnaryOp, XprType> >
: traits<XprType>
{
typedef typename ei_result_of<
typedef typename result_of<
UnaryOp(typename XprType::Scalar)
>::type Scalar;
typedef typename XprType::Nested XprTypeNested;
typedef typename ei_unref<XprTypeNested>::type _XprTypeNested;
typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
enum {
Flags = _XprTypeNested::Flags & (
HereditaryBits | LinearAccessBit | AlignedBit
| (ei_functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0)),
CoeffReadCost = _XprTypeNested::CoeffReadCost + ei_functor_traits<UnaryOp>::Cost
| (functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0)),
CoeffReadCost = _XprTypeNested::CoeffReadCost + functor_traits<UnaryOp>::Cost
};
};
}
template<typename UnaryOp, typename XprType, typename StorageKind>
class CwiseUnaryOpImpl;
template<typename UnaryOp, typename XprType>
class CwiseUnaryOp : ei_no_assignment_operator,
public CwiseUnaryOpImpl<UnaryOp, XprType, typename ei_traits<XprType>::StorageKind>
class CwiseUnaryOp : internal::no_assignment_operator,
public CwiseUnaryOpImpl<UnaryOp, XprType, typename internal::traits<XprType>::StorageKind>
{
public:
typedef typename CwiseUnaryOpImpl<UnaryOp, XprType,typename ei_traits<XprType>::StorageKind>::Base Base;
typedef typename CwiseUnaryOpImpl<UnaryOp, XprType,typename internal::traits<XprType>::StorageKind>::Base Base;
EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryOp)
inline CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp())
@@ -84,11 +87,11 @@ class CwiseUnaryOp : ei_no_assignment_operator,
const UnaryOp& functor() const { return m_functor; }
/** \returns the nested expression */
const typename ei_cleantype<typename XprType::Nested>::type&
const typename internal::remove_all<typename XprType::Nested>::type&
nestedExpression() const { return m_xpr; }
/** \returns the nested expression */
typename ei_cleantype<typename XprType::Nested>::type&
typename internal::remove_all<typename XprType::Nested>::type&
nestedExpression() { return m_xpr.const_cast_derived(); }
protected:
@@ -100,12 +103,12 @@ class CwiseUnaryOp : ei_no_assignment_operator,
// It can be used for any expression types implementing the dense concept.
template<typename UnaryOp, typename XprType>
class CwiseUnaryOpImpl<UnaryOp,XprType,Dense>
: public ei_dense_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type
: public internal::dense_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type
{
public:
typedef CwiseUnaryOp<UnaryOp, XprType> Derived;
typedef typename ei_dense_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type Base;
typedef typename internal::dense_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const

View File

@@ -38,39 +38,42 @@
*
* \sa MatrixBase::unaryViewExpr(const CustomUnaryOp &) const, class CwiseUnaryOp
*/
namespace internal {
template<typename ViewOp, typename MatrixType>
struct ei_traits<CwiseUnaryView<ViewOp, MatrixType> >
: ei_traits<MatrixType>
struct traits<CwiseUnaryView<ViewOp, MatrixType> >
: traits<MatrixType>
{
typedef typename ei_result_of<
ViewOp(typename ei_traits<MatrixType>::Scalar)
typedef typename result_of<
ViewOp(typename traits<MatrixType>::Scalar)
>::type Scalar;
typedef typename MatrixType::Nested MatrixTypeNested;
typedef typename ei_cleantype<MatrixTypeNested>::type _MatrixTypeNested;
typedef typename remove_all<MatrixTypeNested>::type _MatrixTypeNested;
enum {
Flags = (ei_traits<_MatrixTypeNested>::Flags & (HereditaryBits | LvalueBit | LinearAccessBit | DirectAccessBit)),
CoeffReadCost = ei_traits<_MatrixTypeNested>::CoeffReadCost + ei_functor_traits<ViewOp>::Cost,
MatrixTypeInnerStride = ei_inner_stride_at_compile_time<MatrixType>::ret,
Flags = (traits<_MatrixTypeNested>::Flags & (HereditaryBits | LvalueBit | LinearAccessBit | DirectAccessBit)),
CoeffReadCost = traits<_MatrixTypeNested>::CoeffReadCost + functor_traits<ViewOp>::Cost,
MatrixTypeInnerStride = inner_stride_at_compile_time<MatrixType>::ret,
// need to cast the sizeof's from size_t to int explicitly, otherwise:
// "error: no integral type can represent all of the enumerator values
InnerStrideAtCompileTime = MatrixTypeInnerStride == Dynamic
? int(Dynamic)
: int(MatrixTypeInnerStride)
* int(sizeof(typename ei_traits<MatrixType>::Scalar) / sizeof(Scalar)),
OuterStrideAtCompileTime = ei_outer_stride_at_compile_time<MatrixType>::ret
* int(sizeof(typename traits<MatrixType>::Scalar) / sizeof(Scalar)),
OuterStrideAtCompileTime = outer_stride_at_compile_time<MatrixType>::ret
};
};
}
template<typename ViewOp, typename MatrixType, typename StorageKind>
class CwiseUnaryViewImpl;
template<typename ViewOp, typename MatrixType>
class CwiseUnaryView : ei_no_assignment_operator,
public CwiseUnaryViewImpl<ViewOp, MatrixType, typename ei_traits<MatrixType>::StorageKind>
class CwiseUnaryView : internal::no_assignment_operator,
public CwiseUnaryViewImpl<ViewOp, MatrixType, typename internal::traits<MatrixType>::StorageKind>
{
public:
typedef typename CwiseUnaryViewImpl<ViewOp, MatrixType,typename ei_traits<MatrixType>::StorageKind>::Base Base;
typedef typename CwiseUnaryViewImpl<ViewOp, MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base;
EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryView)
inline CwiseUnaryView(const MatrixType& mat, const ViewOp& func = ViewOp())
@@ -85,33 +88,33 @@ class CwiseUnaryView : ei_no_assignment_operator,
const ViewOp& functor() const { return m_functor; }
/** \returns the nested expression */
const typename ei_cleantype<typename MatrixType::Nested>::type&
const typename internal::remove_all<typename MatrixType::Nested>::type&
nestedExpression() const { return m_matrix; }
/** \returns the nested expression */
typename ei_cleantype<typename MatrixType::Nested>::type&
typename internal::remove_all<typename MatrixType::Nested>::type&
nestedExpression() { return m_matrix.const_cast_derived(); }
protected:
// FIXME changed from MatrixType::Nested because of a weird compilation error with sun CC
const typename ei_nested<MatrixType>::type m_matrix;
const typename internal::nested<MatrixType>::type m_matrix;
ViewOp m_functor;
};
template<typename ViewOp, typename MatrixType>
class CwiseUnaryViewImpl<ViewOp,MatrixType,Dense>
: public ei_dense_xpr_base< CwiseUnaryView<ViewOp, MatrixType> >::type
: public internal::dense_xpr_base< CwiseUnaryView<ViewOp, MatrixType> >::type
{
public:
typedef CwiseUnaryView<ViewOp, MatrixType> Derived;
typedef typename ei_dense_xpr_base< CwiseUnaryView<ViewOp, MatrixType> >::type Base;
typedef typename internal::dense_xpr_base< CwiseUnaryView<ViewOp, MatrixType> >::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
inline Index innerStride() const
{
return derived().nestedExpression().innerStride() * sizeof(typename ei_traits<MatrixType>::Scalar) / sizeof(Scalar);
return derived().nestedExpression().innerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) / sizeof(Scalar);
}
inline Index outerStride() const

View File

@@ -34,28 +34,37 @@
* This class is the base that is inherited by all dense objects (matrix, vector, arrays,
* and related expression types). The common Eigen API for dense objects is contained in this class.
*
* \param Derived is the derived type, e.g., a matrix type or an expression.
* \tparam Derived is the derived type, e.g., a matrix type or an expression.
*
* This class can be extended with the help of the plugin mechanism described on the page
* \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_DENSEBASE_PLUGIN.
*
* \sa \ref TopicClassHierarchy
*/
template<typename Derived> class DenseBase
#ifndef EIGEN_PARSED_BY_DOXYGEN
: public ei_special_scalar_op_base<Derived,typename ei_traits<Derived>::Scalar,
typename NumTraits<typename ei_traits<Derived>::Scalar>::Real>
: public internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar,
typename NumTraits<typename internal::traits<Derived>::Scalar>::Real>
#else
: public DenseCoeffsBase<Derived>
#endif // not EIGEN_PARSED_BY_DOXYGEN
{
public:
using ei_special_scalar_op_base<Derived,typename ei_traits<Derived>::Scalar,
typename NumTraits<typename ei_traits<Derived>::Scalar>::Real>::operator*;
using internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar,
typename NumTraits<typename internal::traits<Derived>::Scalar>::Real>::operator*;
class InnerIterator;
typedef typename ei_traits<Derived>::StorageKind StorageKind;
typedef typename ei_traits<Derived>::Index Index; /**< The type of indices */
typedef typename ei_traits<Derived>::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
typedef typename internal::traits<Derived>::StorageKind StorageKind;
/** \brief The type of indices
* \details To change this, \c \#define the preprocessor symbol \c EIGEN_DEFAULT_DENSE_INDEX_TYPE.
* \sa \ref TopicPreprocessorDirectives.
*/
typedef typename internal::traits<Derived>::Index Index;
typedef typename internal::traits<Derived>::Scalar Scalar;
typedef typename internal::packet_traits<Scalar>::type PacketScalar;
typedef typename NumTraits<Scalar>::Real RealScalar;
typedef DenseCoeffsBase<Derived> Base;
@@ -93,26 +102,26 @@ template<typename Derived> class DenseBase
enum {
RowsAtCompileTime = ei_traits<Derived>::RowsAtCompileTime,
RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,
/**< The number of rows at compile-time. This is just a copy of the value provided
* by the \a Derived type. If a value is not known at compile-time,
* it is set to the \a Dynamic constant.
* \sa MatrixBase::rows(), MatrixBase::cols(), ColsAtCompileTime, SizeAtCompileTime */
ColsAtCompileTime = ei_traits<Derived>::ColsAtCompileTime,
ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
/**< The number of columns at compile-time. This is just a copy of the value provided
* by the \a Derived type. If a value is not known at compile-time,
* it is set to the \a Dynamic constant.
* \sa MatrixBase::rows(), MatrixBase::cols(), RowsAtCompileTime, SizeAtCompileTime */
SizeAtCompileTime = (ei_size_at_compile_time<ei_traits<Derived>::RowsAtCompileTime,
ei_traits<Derived>::ColsAtCompileTime>::ret),
SizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::RowsAtCompileTime,
internal::traits<Derived>::ColsAtCompileTime>::ret),
/**< This is equal to the number of coefficients, i.e. the number of
* rows times the number of columns, or to \a Dynamic if this is not
* known at compile-time. \sa RowsAtCompileTime, ColsAtCompileTime */
MaxRowsAtCompileTime = ei_traits<Derived>::MaxRowsAtCompileTime,
MaxRowsAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime,
/**< This value is equal to the maximum possible number of rows that this expression
* might have. If this expression might have an arbitrarily high number of rows,
* this value is set to \a Dynamic.
@@ -123,7 +132,7 @@ template<typename Derived> class DenseBase
* \sa RowsAtCompileTime, MaxColsAtCompileTime, MaxSizeAtCompileTime
*/
MaxColsAtCompileTime = ei_traits<Derived>::MaxColsAtCompileTime,
MaxColsAtCompileTime = internal::traits<Derived>::MaxColsAtCompileTime,
/**< This value is equal to the maximum possible number of columns that this expression
* might have. If this expression might have an arbitrarily high number of columns,
* this value is set to \a Dynamic.
@@ -134,8 +143,8 @@ template<typename Derived> class DenseBase
* \sa ColsAtCompileTime, MaxRowsAtCompileTime, MaxSizeAtCompileTime
*/
MaxSizeAtCompileTime = (ei_size_at_compile_time<ei_traits<Derived>::MaxRowsAtCompileTime,
ei_traits<Derived>::MaxColsAtCompileTime>::ret),
MaxSizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::MaxRowsAtCompileTime,
internal::traits<Derived>::MaxColsAtCompileTime>::ret),
/**< This value is equal to the maximum possible number of coefficients that this expression
* might have. If this expression might have an arbitrarily high number of coefficients,
* this value is set to \a Dynamic.
@@ -146,32 +155,34 @@ template<typename Derived> class DenseBase
* \sa SizeAtCompileTime, MaxRowsAtCompileTime, MaxColsAtCompileTime
*/
IsVectorAtCompileTime = ei_traits<Derived>::MaxRowsAtCompileTime == 1
|| ei_traits<Derived>::MaxColsAtCompileTime == 1,
IsVectorAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime == 1
|| internal::traits<Derived>::MaxColsAtCompileTime == 1,
/**< This is set to true if either the number of rows or the number of
* columns is known at compile-time to be equal to 1. Indeed, in that case,
* we are dealing with a column-vector (if there is only one column) or with
* a row-vector (if there is only one row). */
Flags = ei_traits<Derived>::Flags,
Flags = internal::traits<Derived>::Flags,
/**< This stores expression \ref flags flags which may or may not be inherited by new expressions
* constructed from this one. See the \ref flags "list of flags".
*/
IsRowMajor = int(Flags) & RowMajorBit, /**< True if this expression has row-major storage order. */
InnerSizeAtCompileTime = int(IsVectorAtCompileTime) ? SizeAtCompileTime
: int(IsRowMajor) ? ColsAtCompileTime : RowsAtCompileTime,
InnerSizeAtCompileTime = int(IsVectorAtCompileTime) ? int(SizeAtCompileTime)
: int(IsRowMajor) ? int(ColsAtCompileTime) : int(RowsAtCompileTime),
CoeffReadCost = ei_traits<Derived>::CoeffReadCost,
CoeffReadCost = internal::traits<Derived>::CoeffReadCost,
/**< This is a rough measure of how expensive it is to read one coefficient from
* this expression.
*/
InnerStrideAtCompileTime = ei_inner_stride_at_compile_time<Derived>::ret,
OuterStrideAtCompileTime = ei_outer_stride_at_compile_time<Derived>::ret
InnerStrideAtCompileTime = internal::inner_stride_at_compile_time<Derived>::ret,
OuterStrideAtCompileTime = internal::outer_stride_at_compile_time<Derived>::ret
};
enum { ThisConstantIsPrivateInPlainObjectBase };
/** \returns the number of nonzero coefficients which is in practice the number
* of stored coefficients. */
inline Index nonZeros() const { return size(); }
@@ -183,8 +194,8 @@ template<typename Derived> class DenseBase
/** \returns the outer size.
*
* \note For a vector, this returns just 1. For a matrix (non-vector), this is the major dimension
* with respect to the storage order, i.e., the number of columns for a column-major matrix,
* and the number of rows for a row-major matrix. */
* with respect to the \ref TopicStorageOrders "storage order", i.e., the number of columns for a
* column-major matrix, and the number of rows for a row-major matrix. */
Index outerSize() const
{
return IsVectorAtCompileTime ? 1
@@ -194,8 +205,8 @@ template<typename Derived> class DenseBase
/** \returns the inner size.
*
* \note For a vector, this is just the size. For a matrix (non-vector), this is the minor dimension
* with respect to the storage order, i.e., the number of rows for a column-major matrix,
* and the number of columns for a row-major matrix. */
* with respect to the \ref TopicStorageOrders "storage order", i.e., the number of rows for a
* column-major matrix, and the number of columns for a row-major matrix. */
Index innerSize() const
{
return IsVectorAtCompileTime ? this->size()
@@ -209,7 +220,7 @@ template<typename Derived> class DenseBase
void resize(Index size)
{
EIGEN_ONLY_USED_FOR_DEBUG(size);
ei_assert(size == this->size()
eigen_assert(size == this->size()
&& "DenseBase::resize() does not actually allow to resize.");
}
/** Only plain matrices/arrays, not expressions, may be resized; therefore the only useful resize methods are
@@ -220,20 +231,20 @@ template<typename Derived> class DenseBase
{
EIGEN_ONLY_USED_FOR_DEBUG(rows);
EIGEN_ONLY_USED_FOR_DEBUG(cols);
ei_assert(rows == this->rows() && cols == this->cols()
eigen_assert(rows == this->rows() && cols == this->cols()
&& "DenseBase::resize() does not actually allow to resize.");
}
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** \internal Represents a matrix with all coefficients equal to one another*/
typedef CwiseNullaryOp<ei_scalar_constant_op<Scalar>,Derived> ConstantReturnType;
typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,Derived> ConstantReturnType;
/** \internal Represents a vector with linearly spaced coefficients that allows sequential access only. */
typedef CwiseNullaryOp<ei_linspaced_op<Scalar,false>,Derived> SequentialLinSpacedReturnType;
typedef CwiseNullaryOp<internal::linspaced_op<Scalar,false>,Derived> SequentialLinSpacedReturnType;
/** \internal Represents a vector with linearly spaced coefficients that allows random access. */
typedef CwiseNullaryOp<ei_linspaced_op<Scalar,true>,Derived> RandomAccessLinSpacedReturnType;
typedef CwiseNullaryOp<internal::linspaced_op<Scalar,true>,Derived> RandomAccessLinSpacedReturnType;
/** \internal the return type of MatrixBase::eigenvalues() */
typedef Matrix<typename NumTraits<typename ei_traits<Derived>::Scalar>::Real, ei_traits<Derived>::ColsAtCompileTime, 1> EigenvaluesReturnType;
typedef Matrix<typename NumTraits<typename internal::traits<Derived>::Scalar>::Real, internal::traits<Derived>::ColsAtCompileTime, 1> EigenvaluesReturnType;
#endif // not EIGEN_PARSED_BY_DOXYGEN
@@ -273,7 +284,8 @@ template<typename Derived> class DenseBase
CommaInitializer<Derived> operator<< (const DenseBase<OtherDerived>& other);
Eigen::Transpose<Derived> transpose();
const Eigen::Transpose<Derived> transpose() const;
typedef const Transpose<const Derived> ConstTransposeReturnType;
ConstTransposeReturnType transpose() const;
void transposeInPlace();
#ifndef EIGEN_NO_DEBUG
protected:
@@ -282,41 +294,29 @@ template<typename Derived> class DenseBase
public:
#endif
VectorBlock<Derived> segment(Index start, Index size);
const VectorBlock<Derived> segment(Index start, Index size) const;
typedef VectorBlock<Derived> SegmentReturnType;
typedef const VectorBlock<const Derived> ConstSegmentReturnType;
template<int Size> struct FixedSegmentReturnType { typedef VectorBlock<Derived, Size> Type; };
template<int Size> struct ConstFixedSegmentReturnType { typedef const VectorBlock<const Derived, Size> Type; };
// Note: The "DenseBase::" prefixes are added to help MSVC9 to match these declarations with the later implementations.
SegmentReturnType segment(Index start, Index size);
typename DenseBase::ConstSegmentReturnType segment(Index start, Index size) const;
VectorBlock<Derived> head(Index size);
const VectorBlock<Derived> head(Index size) const;
SegmentReturnType head(Index size);
typename DenseBase::ConstSegmentReturnType head(Index size) const;
VectorBlock<Derived> tail(Index size);
const VectorBlock<Derived> tail(Index size) const;
SegmentReturnType tail(Index size);
typename DenseBase::ConstSegmentReturnType tail(Index size) const;
template<int Size> VectorBlock<Derived,Size> head(void);
template<int Size> const VectorBlock<Derived,Size> head() const;
template<int Size> typename FixedSegmentReturnType<Size>::Type head();
template<int Size> typename ConstFixedSegmentReturnType<Size>::Type head() const;
template<int Size> VectorBlock<Derived,Size> tail();
template<int Size> const VectorBlock<Derived,Size> tail() const;
template<int Size> typename FixedSegmentReturnType<Size>::Type tail();
template<int Size> typename ConstFixedSegmentReturnType<Size>::Type tail() const;
template<int Size> VectorBlock<Derived,Size> segment(Index start);
template<int Size> const VectorBlock<Derived,Size> segment(Index start) const;
Diagonal<Derived,0> diagonal();
const Diagonal<Derived,0> diagonal() const;
template<int Index> Diagonal<Derived,Index> diagonal();
template<int Index> const Diagonal<Derived,Index> diagonal() const;
Diagonal<Derived, Dynamic> diagonal(Index index);
const Diagonal<Derived, Dynamic> diagonal(Index index) const;
template<unsigned int Mode> TriangularView<Derived, Mode> part();
template<unsigned int Mode> const TriangularView<Derived, Mode> part() const;
template<unsigned int Mode> TriangularView<Derived, Mode> triangularView();
template<unsigned int Mode> const TriangularView<Derived, Mode> triangularView() const;
template<unsigned int UpLo> SelfAdjointView<Derived, UpLo> selfadjointView();
template<unsigned int UpLo> const SelfAdjointView<Derived, UpLo> selfadjointView() const;
template<int Size> typename FixedSegmentReturnType<Size>::Type segment(Index start);
template<int Size> typename ConstFixedSegmentReturnType<Size>::Type segment(Index start) const;
static const ConstantReturnType
Constant(Index rows, Index cols, const Scalar& value);
@@ -381,22 +381,39 @@ template<typename Derived> class DenseBase
* Notice that in the case of a plain matrix or vector (not an expression) this function just returns
* a const reference, in order to avoid a useless copy.
*/
EIGEN_STRONG_INLINE const typename ei_eval<Derived>::type eval() const
EIGEN_STRONG_INLINE const typename internal::eval<Derived>::type eval() const
{
// Even though MSVC does not honor strong inlining when the return type
// is a dynamic matrix, we desperately need strong inlining for fixed
// size types on MSVC.
return typename ei_eval<Derived>::type(derived());
return typename internal::eval<Derived>::type(derived());
}
/** swaps *this with the expression \a other.
*
*/
template<typename OtherDerived>
void swap(DenseBase<OtherDerived> EIGEN_REF_TO_TEMPORARY other);
void swap(const DenseBase<OtherDerived>& other,
int = OtherDerived::ThisConstantIsPrivateInPlainObjectBase)
{
SwapWrapper<Derived>(derived()).lazyAssign(other.derived());
}
/** swaps *this with the matrix or array \a other.
*
*/
template<typename OtherDerived>
void swap(PlainObjectBase<OtherDerived>& other)
{
SwapWrapper<Derived>(derived()).lazyAssign(other.derived());
}
inline const NestByValue<Derived> nestByValue() const;
inline const ForceAlignedAccess<Derived> forceAlignedAccess() const;
inline ForceAlignedAccess<Derived> forceAlignedAccess();
template<bool Enable> inline const typename ei_meta_if<Enable,ForceAlignedAccess<Derived>,Derived&>::ret forceAlignedAccessIf() const;
template<bool Enable> inline typename ei_meta_if<Enable,ForceAlignedAccess<Derived>,Derived&>::ret forceAlignedAccessIf();
template<bool Enable> inline const typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf() const;
template<bool Enable> inline typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf();
Scalar sum() const;
Scalar mean() const;
@@ -404,17 +421,20 @@ template<typename Derived> class DenseBase
Scalar prod() const;
typename ei_traits<Derived>::Scalar minCoeff() const;
typename ei_traits<Derived>::Scalar maxCoeff() const;
typename internal::traits<Derived>::Scalar minCoeff() const;
typename internal::traits<Derived>::Scalar maxCoeff() const;
typename ei_traits<Derived>::Scalar minCoeff(Index* row, Index* col) const;
typename ei_traits<Derived>::Scalar maxCoeff(Index* row, Index* col) const;
typename ei_traits<Derived>::Scalar minCoeff(Index* index) const;
typename ei_traits<Derived>::Scalar maxCoeff(Index* index) const;
template<typename IndexType>
typename internal::traits<Derived>::Scalar minCoeff(IndexType* row, IndexType* col) const;
template<typename IndexType>
typename internal::traits<Derived>::Scalar maxCoeff(IndexType* row, IndexType* col) const;
template<typename IndexType>
typename internal::traits<Derived>::Scalar minCoeff(IndexType* index) const;
template<typename IndexType>
typename internal::traits<Derived>::Scalar maxCoeff(IndexType* index) const;
template<typename BinaryOp>
typename ei_result_of<BinaryOp(typename ei_traits<Derived>::Scalar)>::type
typename internal::result_of<BinaryOp(typename internal::traits<Derived>::Scalar)>::type
redux(const BinaryOp& func) const;
template<typename Visitor>
@@ -422,20 +442,33 @@ template<typename Derived> class DenseBase
inline const WithFormat<Derived> format(const IOFormat& fmt) const;
/** \returns the unique coefficient of a 1x1 expression */
CoeffReturnType value() const
{
EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)
eigen_assert(this->rows() == 1 && this->cols() == 1);
return derived().coeff(0,0);
}
/////////// Array module ///////////
bool all(void) const;
bool any(void) const;
Index count() const;
const VectorwiseOp<Derived,Horizontal> rowwise() const;
VectorwiseOp<Derived,Horizontal> rowwise();
const VectorwiseOp<Derived,Vertical> colwise() const;
VectorwiseOp<Derived,Vertical> colwise();
typedef VectorwiseOp<Derived, Horizontal> RowwiseReturnType;
typedef const VectorwiseOp<const Derived, Horizontal> ConstRowwiseReturnType;
typedef VectorwiseOp<Derived, Vertical> ColwiseReturnType;
typedef const VectorwiseOp<const Derived, Vertical> ConstColwiseReturnType;
static const CwiseNullaryOp<ei_scalar_random_op<Scalar>,Derived> Random(Index rows, Index cols);
static const CwiseNullaryOp<ei_scalar_random_op<Scalar>,Derived> Random(Index size);
static const CwiseNullaryOp<ei_scalar_random_op<Scalar>,Derived> Random();
ConstRowwiseReturnType rowwise() const;
RowwiseReturnType rowwise();
ConstColwiseReturnType colwise() const;
ColwiseReturnType colwise();
static const CwiseNullaryOp<internal::scalar_random_op<Scalar>,Derived> Random(Index rows, Index cols);
static const CwiseNullaryOp<internal::scalar_random_op<Scalar>,Derived> Random(Index size);
static const CwiseNullaryOp<internal::scalar_random_op<Scalar>,Derived> Random();
template<typename ThenDerived,typename ElseDerived>
const Select<Derived,ThenDerived,ElseDerived>
@@ -456,8 +489,10 @@ template<typename Derived> class DenseBase
const Replicate<Derived,RowFactor,ColFactor> replicate() const;
const Replicate<Derived,Dynamic,Dynamic> replicate(Index rowFacor,Index colFactor) const;
Eigen::Reverse<Derived, BothDirections> reverse();
const Eigen::Reverse<Derived, BothDirections> reverse() const;
typedef Reverse<Derived, BothDirections> ReverseReturnType;
typedef const Reverse<const Derived, BothDirections> ConstReverseReturnType;
ReverseReturnType reverse();
ConstReverseReturnType reverse() const;
void reverseInPlace();
#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::DenseBase
@@ -482,7 +517,7 @@ template<typename Derived> class DenseBase
// disable the use of evalTo for dense objects with a nice compilation error
template<typename Dest> inline void evalTo(Dest& ) const
{
EIGEN_STATIC_ASSERT((ei_is_same_type<Dest,void>::ret),THE_EVAL_EVALTO_FUNCTION_SHOULD_NEVER_BE_CALLED_FOR_DENSE_OBJECTS);
EIGEN_STATIC_ASSERT((internal::is_same<Dest,void>::value),THE_EVAL_EVALTO_FUNCTION_SHOULD_NEVER_BE_CALLED_FOR_DENSE_OBJECTS);
}
protected:
@@ -493,8 +528,6 @@ template<typename Derived> class DenseBase
* Only do it when debugging Eigen, as this borders on paranoiac and could slow compilation down
*/
#ifdef EIGEN_INTERNAL_DEBUGGING
EIGEN_STATIC_ASSERT(ei_are_flags_consistent<Flags>::ret,
INVALID_MATRIXBASE_TEMPLATE_PARAMETERS)
EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, int(IsRowMajor))
&& EIGEN_IMPLIES(MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1, int(!IsRowMajor))),
INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION)

View File

@@ -25,10 +25,17 @@
#ifndef EIGEN_DENSECOEFFSBASE_H
#define EIGEN_DENSECOEFFSBASE_H
namespace internal {
template<typename T> struct add_const_on_value_type_if_arithmetic
{
typedef typename conditional<is_arithmetic<T>::value, T, typename add_const_on_value_type<T>::type>::type type;
};
}
/** \brief Base class providing read-only coefficient access to matrices and arrays.
* \ingroup Core_Module
* \tparam Derived Type of the derived class
* \tparam ReadOnlyAccessors Constant indicating read-only access
* \tparam #ReadOnlyAccessors Constant indicating read-only access
*
* This class defines the \c operator() \c const function and friends, which can be used to read specific
* entries of a matrix or array.
@@ -40,15 +47,26 @@ template<typename Derived>
class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
{
public:
typedef typename ei_traits<Derived>::StorageKind StorageKind;
typedef typename ei_traits<Derived>::Index Index;
typedef typename ei_traits<Derived>::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
typedef typename ei_meta_if<bool(ei_traits<Derived>::Flags&LvalueBit),
const Scalar&,
typename ei_meta_if<ei_is_arithmetic<Scalar>::ret, Scalar, const Scalar>::ret
>::ret CoeffReturnType;
typedef typename ei_makeconst_return_type<typename ei_packet_traits<Scalar>::type>::type PacketReturnType;
typedef typename internal::traits<Derived>::StorageKind StorageKind;
typedef typename internal::traits<Derived>::Index Index;
typedef typename internal::traits<Derived>::Scalar Scalar;
typedef typename internal::packet_traits<Scalar>::type PacketScalar;
// Explanation for this CoeffReturnType typedef.
// - This is the return type of the coeff() method.
// - The LvalueBit means exactly that we can offer a coeffRef() method, which means exactly that we can get references
// to coeffs, which means exactly that we can have coeff() return a const reference (as opposed to returning a value).
// - The is_artihmetic check is required since "const int", "const double", etc. will cause warnings on some systems
// while the declaration of "const T", where T is a non arithmetic type does not. Always returning "const Scalar&" is
// not possible, since the underlying expressions might not offer a valid address the reference could be referring to.
typedef typename internal::conditional<bool(internal::traits<Derived>::Flags&LvalueBit),
const Scalar&,
typename internal::conditional<internal::is_arithmetic<Scalar>::value, Scalar, const Scalar>::type
>::type CoeffReturnType;
typedef typename internal::add_const_on_value_type_if_arithmetic<
typename internal::packet_traits<Scalar>::type
>::type PacketReturnType;
typedef EigenBase<Derived> Base;
using Base::rows;
@@ -88,7 +106,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
*/
EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const
{
ei_internal_assert(row >= 0 && row < rows()
eigen_internal_assert(row >= 0 && row < rows()
&& col >= 0 && col < cols());
return derived().coeff(row, col);
}
@@ -105,7 +123,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
*/
EIGEN_STRONG_INLINE CoeffReturnType operator()(Index row, Index col) const
{
ei_assert(row >= 0 && row < rows()
eigen_assert(row >= 0 && row < rows()
&& col >= 0 && col < cols());
return derived().coeff(row, col);
}
@@ -128,7 +146,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
EIGEN_STRONG_INLINE CoeffReturnType
coeff(Index index) const
{
ei_internal_assert(index >= 0 && index < size());
eigen_internal_assert(index >= 0 && index < size());
return derived().coeff(index);
}
@@ -144,9 +162,11 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
EIGEN_STRONG_INLINE CoeffReturnType
operator[](Index index) const
{
#ifndef EIGEN2_SUPPORT
EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime,
THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD)
ei_assert(index >= 0 && index < size());
#endif
eigen_assert(index >= 0 && index < size());
return derived().coeff(index);
}
@@ -163,7 +183,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
EIGEN_STRONG_INLINE CoeffReturnType
operator()(Index index) const
{
ei_assert(index >= 0 && index < size());
eigen_assert(index >= 0 && index < size());
return derived().coeff(index);
}
@@ -187,11 +207,12 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
EIGEN_STRONG_INLINE CoeffReturnType
w() const { return (*this)[3]; }
/** \returns the packet of coefficients starting at the given row and column. It is your responsibility
/** \internal
* \returns the packet of coefficients starting at the given row and column. It is your responsibility
* to ensure that a packet really starts there. This method is only available on expressions having the
* PacketAccessBit.
*
* The \a LoadMode parameter may have the value \a Aligned or \a Unaligned. Its effect is to select
* The \a LoadMode parameter may have the value \a #Aligned or \a #Unaligned. Its effect is to select
* the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets
* starting at an address which is a multiple of the packet size.
*/
@@ -199,12 +220,13 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
template<int LoadMode>
EIGEN_STRONG_INLINE PacketReturnType packet(Index row, Index col) const
{
ei_internal_assert(row >= 0 && row < rows()
eigen_internal_assert(row >= 0 && row < rows()
&& col >= 0 && col < cols());
return derived().template packet<LoadMode>(row,col);
}
/** \internal */
template<int LoadMode>
EIGEN_STRONG_INLINE PacketReturnType packetByOuterInner(Index outer, Index inner) const
{
@@ -212,11 +234,12 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
colIndexByOuterInner(outer, inner));
}
/** \returns the packet of coefficients starting at the given index. It is your responsibility
/** \internal
* \returns the packet of coefficients starting at the given index. It is your responsibility
* to ensure that a packet really starts there. This method is only available on expressions having the
* PacketAccessBit and the LinearAccessBit.
*
* The \a LoadMode parameter may have the value \a Aligned or \a Unaligned. Its effect is to select
* The \a LoadMode parameter may have the value \a #Aligned or \a #Unaligned. Its effect is to select
* the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets
* starting at an address which is a multiple of the packet size.
*/
@@ -224,13 +247,13 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
template<int LoadMode>
EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
{
ei_internal_assert(index >= 0 && index < size());
eigen_internal_assert(index >= 0 && index < size());
return derived().template packet<LoadMode>(index);
}
protected:
// explanation: DenseBase is doing "using ..." on the methods from DenseCoeffsBase.
// But some methods are only available in the EnableDirectAccessAPI case.
// But some methods are only available in the DirectAccess case.
// So we add dummy methods here with these names, so that "using... " doesn't fail.
// It's not private so that the child class DenseBase can access them, and it's not public
// either since it's an implementation detail, so has to be protected.
@@ -252,7 +275,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
/** \brief Base class providing read/write coefficient access to matrices and arrays.
* \ingroup Core_Module
* \tparam Derived Type of the derived class
* \tparam WriteAccessors Constant indicating read/write access
* \tparam #WriteAccessors Constant indicating read/write access
*
* This class defines the non-const \c operator() function and friends, which can be used to write specific
* entries of a matrix or array. This class inherits DenseCoeffsBase<Derived, ReadOnlyAccessors> which
@@ -267,10 +290,10 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
typedef DenseCoeffsBase<Derived, ReadOnlyAccessors> Base;
typedef typename ei_traits<Derived>::StorageKind StorageKind;
typedef typename ei_traits<Derived>::Index Index;
typedef typename ei_traits<Derived>::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
typedef typename internal::traits<Derived>::StorageKind StorageKind;
typedef typename internal::traits<Derived>::Index Index;
typedef typename internal::traits<Derived>::Scalar Scalar;
typedef typename internal::packet_traits<Scalar>::type PacketScalar;
typedef typename NumTraits<Scalar>::Real RealScalar;
using Base::coeff;
@@ -303,7 +326,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
*/
EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col)
{
ei_internal_assert(row >= 0 && row < rows()
eigen_internal_assert(row >= 0 && row < rows()
&& col >= 0 && col < cols());
return derived().coeffRef(row, col);
}
@@ -323,7 +346,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
EIGEN_STRONG_INLINE Scalar&
operator()(Index row, Index col)
{
ei_assert(row >= 0 && row < rows()
eigen_assert(row >= 0 && row < rows()
&& col >= 0 && col < cols());
return derived().coeffRef(row, col);
}
@@ -347,7 +370,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
EIGEN_STRONG_INLINE Scalar&
coeffRef(Index index)
{
ei_internal_assert(index >= 0 && index < size());
eigen_internal_assert(index >= 0 && index < size());
return derived().coeffRef(index);
}
@@ -361,9 +384,11 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
EIGEN_STRONG_INLINE Scalar&
operator[](Index index)
{
#ifndef EIGEN2_SUPPORT
EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime,
THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD)
ei_assert(index >= 0 && index < size());
#endif
eigen_assert(index >= 0 && index < size());
return derived().coeffRef(index);
}
@@ -379,7 +404,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
EIGEN_STRONG_INLINE Scalar&
operator()(Index index)
{
ei_assert(index >= 0 && index < size());
eigen_assert(index >= 0 && index < size());
return derived().coeffRef(index);
}
@@ -403,35 +428,38 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
EIGEN_STRONG_INLINE Scalar&
w() { return (*this)[3]; }
/** Stores the given packet of coefficients, at the given row and column of this expression. It is your responsibility
/** \internal
* Stores the given packet of coefficients, at the given row and column of this expression. It is your responsibility
* to ensure that a packet really starts there. This method is only available on expressions having the
* PacketAccessBit.
*
* The \a LoadMode parameter may have the value \a Aligned or \a Unaligned. Its effect is to select
* The \a LoadMode parameter may have the value \a #Aligned or \a #Unaligned. Its effect is to select
* the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets
* starting at an address which is a multiple of the packet size.
*/
template<int StoreMode>
EIGEN_STRONG_INLINE void writePacket
(Index row, Index col, const typename ei_packet_traits<Scalar>::type& x)
(Index row, Index col, const typename internal::packet_traits<Scalar>::type& x)
{
ei_internal_assert(row >= 0 && row < rows()
eigen_internal_assert(row >= 0 && row < rows()
&& col >= 0 && col < cols());
derived().template writePacket<StoreMode>(row,col,x);
}
/** \internal */
template<int StoreMode>
EIGEN_STRONG_INLINE void writePacketByOuterInner
(Index outer, Index inner, const typename ei_packet_traits<Scalar>::type& x)
(Index outer, Index inner, const typename internal::packet_traits<Scalar>::type& x)
{
writePacket<StoreMode>(rowIndexByOuterInner(outer, inner),
colIndexByOuterInner(outer, inner),
x);
}
/** Stores the given packet of coefficients, at the given index in this expression. It is your responsibility
/** \internal
* Stores the given packet of coefficients, at the given index in this expression. It is your responsibility
* to ensure that a packet really starts there. This method is only available on expressions having the
* PacketAccessBit and the LinearAccessBit.
*
@@ -439,12 +467,11 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
* the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets
* starting at an address which is a multiple of the packet size.
*/
template<int StoreMode>
EIGEN_STRONG_INLINE void writePacket
(Index index, const typename ei_packet_traits<Scalar>::type& x)
(Index index, const typename internal::packet_traits<Scalar>::type& x)
{
ei_internal_assert(index >= 0 && index < size());
eigen_internal_assert(index >= 0 && index < size());
derived().template writePacket<StoreMode>(index,x);
}
@@ -461,7 +488,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
template<typename OtherDerived>
EIGEN_STRONG_INLINE void copyCoeff(Index row, Index col, const DenseBase<OtherDerived>& other)
{
ei_internal_assert(row >= 0 && row < rows()
eigen_internal_assert(row >= 0 && row < rows()
&& col >= 0 && col < cols());
derived().coeffRef(row, col) = other.derived().coeff(row, col);
}
@@ -477,7 +504,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
template<typename OtherDerived>
EIGEN_STRONG_INLINE void copyCoeff(Index index, const DenseBase<OtherDerived>& other)
{
ei_internal_assert(index >= 0 && index < size());
eigen_internal_assert(index >= 0 && index < size());
derived().coeffRef(index) = other.derived().coeff(index);
}
@@ -502,7 +529,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
template<typename OtherDerived, int StoreMode, int LoadMode>
EIGEN_STRONG_INLINE void copyPacket(Index row, Index col, const DenseBase<OtherDerived>& other)
{
ei_internal_assert(row >= 0 && row < rows()
eigen_internal_assert(row >= 0 && row < rows()
&& col >= 0 && col < cols());
derived().template writePacket<StoreMode>(row, col,
other.derived().template packet<LoadMode>(row, col));
@@ -519,11 +546,12 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
template<typename OtherDerived, int StoreMode, int LoadMode>
EIGEN_STRONG_INLINE void copyPacket(Index index, const DenseBase<OtherDerived>& other)
{
ei_internal_assert(index >= 0 && index < size());
eigen_internal_assert(index >= 0 && index < size());
derived().template writePacket<StoreMode>(index,
other.derived().template packet<LoadMode>(index));
}
/** \internal */
template<typename OtherDerived, int StoreMode, int LoadMode>
EIGEN_STRONG_INLINE void copyPacketByOuterInner(Index outer, Index inner, const DenseBase<OtherDerived>& other)
{
@@ -536,25 +564,25 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
};
/** \brief Base class providing direct coefficient access to matrices and arrays.
/** \brief Base class providing direct read-only coefficient access to matrices and arrays.
* \ingroup Core_Module
* \tparam Derived Type of the derived class
* \tparam DirectAccessors Constant indicating direct access
* \tparam #DirectAccessors Constant indicating direct access
*
* This class defines functions to work with strides which can be used to access entries directly. This class
* inherits DenseCoeffsBase<Derived, WriteAccessors> which defines functions to access entries using
* inherits DenseCoeffsBase<Derived, ReadOnlyAccessors> which defines functions to access entries read-only using
* \c operator() .
*
* \sa \ref TopicClassHierarchy
*/
template<typename Derived>
class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived, WriteAccessors>
class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived, ReadOnlyAccessors>
{
public:
typedef DenseCoeffsBase<Derived, WriteAccessors> Base;
typedef typename ei_traits<Derived>::Index Index;
typedef typename ei_traits<Derived>::Scalar Scalar;
typedef DenseCoeffsBase<Derived, ReadOnlyAccessors> Base;
typedef typename internal::traits<Derived>::Index Index;
typedef typename internal::traits<Derived>::Scalar Scalar;
typedef typename NumTraits<Scalar>::Real RealScalar;
using Base::rows;
@@ -606,57 +634,132 @@ class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived
}
};
/** \brief Base class providing direct read/write coefficient access to matrices and arrays.
* \ingroup Core_Module
* \tparam Derived Type of the derived class
* \tparam #DirectWriteAccessors Constant indicating direct access
*
* This class defines functions to work with strides which can be used to access entries directly. This class
* inherits DenseCoeffsBase<Derived, WriteAccessors> which defines functions to access entries read/write using
* \c operator().
*
* \sa \ref TopicClassHierarchy
*/
template<typename Derived>
class DenseCoeffsBase<Derived, DirectWriteAccessors>
: public DenseCoeffsBase<Derived, WriteAccessors>
{
public:
typedef DenseCoeffsBase<Derived, WriteAccessors> Base;
typedef typename internal::traits<Derived>::Index Index;
typedef typename internal::traits<Derived>::Scalar Scalar;
typedef typename NumTraits<Scalar>::Real RealScalar;
using Base::rows;
using Base::cols;
using Base::size;
using Base::derived;
/** \returns the pointer increment between two consecutive elements within a slice in the inner direction.
*
* \sa outerStride(), rowStride(), colStride()
*/
inline Index innerStride() const
{
return derived().innerStride();
}
/** \returns the pointer increment between two consecutive inner slices (for example, between two consecutive columns
* in a column-major matrix).
*
* \sa innerStride(), rowStride(), colStride()
*/
inline Index outerStride() const
{
return derived().outerStride();
}
// FIXME shall we remove it ?
inline Index stride() const
{
return Derived::IsVectorAtCompileTime ? innerStride() : outerStride();
}
/** \returns the pointer increment between two consecutive rows.
*
* \sa innerStride(), outerStride(), colStride()
*/
inline Index rowStride() const
{
return Derived::IsRowMajor ? outerStride() : innerStride();
}
/** \returns the pointer increment between two consecutive columns.
*
* \sa innerStride(), outerStride(), rowStride()
*/
inline Index colStride() const
{
return Derived::IsRowMajor ? innerStride() : outerStride();
}
};
namespace internal {
template<typename Derived, bool JustReturnZero>
struct ei_first_aligned_impl
struct first_aligned_impl
{
inline static typename Derived::Index run(const Derived&)
{ return 0; }
};
template<typename Derived>
struct ei_first_aligned_impl<Derived, false>
struct first_aligned_impl<Derived, false>
{
inline static typename Derived::Index run(const Derived& m)
{
return ei_first_aligned(&m.const_cast_derived().coeffRef(0,0), m.size());
return first_aligned(&m.const_cast_derived().coeffRef(0,0), m.size());
}
};
/** \internal \returns the index of the first element of the array that is well aligned for vectorization.
*
* There is also the variant ei_first_aligned(const Scalar*, Integer) defined in Memory.h. See it for more
* There is also the variant first_aligned(const Scalar*, Integer) defined in Memory.h. See it for more
* documentation.
*/
template<typename Derived>
inline static typename Derived::Index ei_first_aligned(const Derived& m)
inline static typename Derived::Index first_aligned(const Derived& m)
{
return ei_first_aligned_impl
return first_aligned_impl
<Derived, (Derived::Flags & AlignedBit) || !(Derived::Flags & DirectAccessBit)>
::run(m);
}
template<typename Derived, bool HasDirectAccess = ei_has_direct_access<Derived>::ret>
struct ei_inner_stride_at_compile_time
template<typename Derived, bool HasDirectAccess = has_direct_access<Derived>::ret>
struct inner_stride_at_compile_time
{
enum { ret = ei_traits<Derived>::InnerStrideAtCompileTime };
enum { ret = traits<Derived>::InnerStrideAtCompileTime };
};
template<typename Derived>
struct ei_inner_stride_at_compile_time<Derived, false>
struct inner_stride_at_compile_time<Derived, false>
{
enum { ret = 0 };
};
template<typename Derived, bool HasDirectAccess = ei_has_direct_access<Derived>::ret>
struct ei_outer_stride_at_compile_time
template<typename Derived, bool HasDirectAccess = has_direct_access<Derived>::ret>
struct outer_stride_at_compile_time
{
enum { ret = ei_traits<Derived>::OuterStrideAtCompileTime };
enum { ret = traits<Derived>::OuterStrideAtCompileTime };
};
template<typename Derived>
struct ei_outer_stride_at_compile_time<Derived, false>
struct outer_stride_at_compile_time<Derived, false>
{
enum { ret = 0 };
};
} // end namespace internal
#endif // EIGEN_DENSECOEFFSBASE_H

View File

@@ -0,0 +1,304 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2006-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
// Copyright (C) 2010 Hauke Heibel <hauke.heibel@gmail.com>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#ifndef EIGEN_MATRIXSTORAGE_H
#define EIGEN_MATRIXSTORAGE_H
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN EIGEN_DENSE_STORAGE_CTOR_PLUGIN;
#else
#define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
#endif
namespace internal {
struct constructor_without_unaligned_array_assert {};
/** \internal
* Static array. If the MatrixOrArrayOptions require auto-alignment, the array will be automatically aligned:
* to 16 bytes boundary if the total size is a multiple of 16 bytes.
*/
template <typename T, int Size, int MatrixOrArrayOptions,
int Alignment = (MatrixOrArrayOptions&DontAlign) ? 0
: (((Size*sizeof(T))%16)==0) ? 16
: 0 >
struct plain_array
{
T array[Size];
plain_array() {}
plain_array(constructor_without_unaligned_array_assert) {}
};
#ifdef EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
#define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask)
#else
#define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \
eigen_assert((reinterpret_cast<size_t>(array) & sizemask) == 0 \
&& "this assertion is explained here: " \
"http://eigen.tuxfamily.org/dox-devel/TopicUnalignedArrayAssert.html" \
" **** READ THIS WEB PAGE !!! ****");
#endif
template <typename T, int Size, int MatrixOrArrayOptions>
struct plain_array<T, Size, MatrixOrArrayOptions, 16>
{
EIGEN_USER_ALIGN16 T array[Size];
plain_array() { EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(0xf) }
plain_array(constructor_without_unaligned_array_assert) {}
};
template <typename T, int MatrixOrArrayOptions, int Alignment>
struct plain_array<T, 0, MatrixOrArrayOptions, Alignment>
{
EIGEN_USER_ALIGN16 T array[1];
plain_array() {}
plain_array(constructor_without_unaligned_array_assert) {}
};
} // end namespace internal
/** \internal
*
* \class DenseStorage
* \ingroup Core_Module
*
* \brief Stores the data of a matrix
*
* This class stores the data of fixed-size, dynamic-size or mixed matrices
* in a way as compact as possible.
*
* \sa Matrix
*/
template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseStorage;
// purely fixed-size matrix
template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseStorage
{
internal::plain_array<T,Size,_Options> m_data;
public:
inline explicit DenseStorage() {}
inline DenseStorage(internal::constructor_without_unaligned_array_assert)
: m_data(internal::constructor_without_unaligned_array_assert()) {}
inline DenseStorage(DenseIndex,DenseIndex,DenseIndex) {}
inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); }
inline static DenseIndex rows(void) {return _Rows;}
inline static DenseIndex cols(void) {return _Cols;}
inline void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {}
inline void resize(DenseIndex,DenseIndex,DenseIndex) {}
inline const T *data() const { return m_data.array; }
inline T *data() { return m_data.array; }
};
// null matrix
template<typename T, int _Rows, int _Cols, int _Options> class DenseStorage<T, 0, _Rows, _Cols, _Options>
{
public:
inline explicit DenseStorage() {}
inline DenseStorage(internal::constructor_without_unaligned_array_assert) {}
inline DenseStorage(DenseIndex,DenseIndex,DenseIndex) {}
inline void swap(DenseStorage& ) {}
inline static DenseIndex rows(void) {return _Rows;}
inline static DenseIndex cols(void) {return _Cols;}
inline void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {}
inline void resize(DenseIndex,DenseIndex,DenseIndex) {}
inline const T *data() const { return 0; }
inline T *data() { return 0; }
};
// dynamic-size matrix with fixed-size storage
template<typename T, int Size, int _Options> class DenseStorage<T, Size, Dynamic, Dynamic, _Options>
{
internal::plain_array<T,Size,_Options> m_data;
DenseIndex m_rows;
DenseIndex m_cols;
public:
inline explicit DenseStorage() : m_rows(0), m_cols(0) {}
inline DenseStorage(internal::constructor_without_unaligned_array_assert)
: m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0), m_cols(0) {}
inline DenseStorage(DenseIndex, DenseIndex rows, DenseIndex cols) : m_rows(rows), m_cols(cols) {}
inline void swap(DenseStorage& other)
{ std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
inline DenseIndex rows(void) const {return m_rows;}
inline DenseIndex cols(void) const {return m_cols;}
inline void conservativeResize(DenseIndex, DenseIndex rows, DenseIndex cols) { m_rows = rows; m_cols = cols; }
inline void resize(DenseIndex, DenseIndex rows, DenseIndex cols) { m_rows = rows; m_cols = cols; }
inline const T *data() const { return m_data.array; }
inline T *data() { return m_data.array; }
};
// dynamic-size matrix with fixed-size storage and fixed width
template<typename T, int Size, int _Cols, int _Options> class DenseStorage<T, Size, Dynamic, _Cols, _Options>
{
internal::plain_array<T,Size,_Options> m_data;
DenseIndex m_rows;
public:
inline explicit DenseStorage() : m_rows(0) {}
inline DenseStorage(internal::constructor_without_unaligned_array_assert)
: m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0) {}
inline DenseStorage(DenseIndex, DenseIndex rows, DenseIndex) : m_rows(rows) {}
inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
inline DenseIndex rows(void) const {return m_rows;}
inline DenseIndex cols(void) const {return _Cols;}
inline void conservativeResize(DenseIndex, DenseIndex rows, DenseIndex) { m_rows = rows; }
inline void resize(DenseIndex, DenseIndex rows, DenseIndex) { m_rows = rows; }
inline const T *data() const { return m_data.array; }
inline T *data() { return m_data.array; }
};
// dynamic-size matrix with fixed-size storage and fixed height
template<typename T, int Size, int _Rows, int _Options> class DenseStorage<T, Size, _Rows, Dynamic, _Options>
{
internal::plain_array<T,Size,_Options> m_data;
DenseIndex m_cols;
public:
inline explicit DenseStorage() : m_cols(0) {}
inline DenseStorage(internal::constructor_without_unaligned_array_assert)
: m_data(internal::constructor_without_unaligned_array_assert()), m_cols(0) {}
inline DenseStorage(DenseIndex, DenseIndex, DenseIndex cols) : m_cols(cols) {}
inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
inline DenseIndex rows(void) const {return _Rows;}
inline DenseIndex cols(void) const {return m_cols;}
inline void conservativeResize(DenseIndex, DenseIndex, DenseIndex cols) { m_cols = cols; }
inline void resize(DenseIndex, DenseIndex, DenseIndex cols) { m_cols = cols; }
inline const T *data() const { return m_data.array; }
inline T *data() { return m_data.array; }
};
// purely dynamic matrix.
template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynamic, _Options>
{
T *m_data;
DenseIndex m_rows;
DenseIndex m_cols;
public:
inline explicit DenseStorage() : m_data(0), m_rows(0), m_cols(0) {}
inline DenseStorage(internal::constructor_without_unaligned_array_assert)
: m_data(0), m_rows(0), m_cols(0) {}
inline DenseStorage(DenseIndex size, DenseIndex rows, DenseIndex cols)
: m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(rows), m_cols(cols)
{ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN }
inline ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, m_rows*m_cols); }
inline void swap(DenseStorage& other)
{ std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
inline DenseIndex rows(void) const {return m_rows;}
inline DenseIndex cols(void) const {return m_cols;}
inline void conservativeResize(DenseIndex size, DenseIndex rows, DenseIndex cols)
{
m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*m_cols);
m_rows = rows;
m_cols = cols;
}
void resize(DenseIndex size, DenseIndex rows, DenseIndex cols)
{
if(size != m_rows*m_cols)
{
internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, m_rows*m_cols);
if (size)
m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
else
m_data = 0;
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
}
m_rows = rows;
m_cols = cols;
}
inline const T *data() const { return m_data; }
inline T *data() { return m_data; }
};
// matrix with dynamic width and fixed height (so that matrix has dynamic size).
template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Rows, Dynamic, _Options>
{
T *m_data;
DenseIndex m_cols;
public:
inline explicit DenseStorage() : m_data(0), m_cols(0) {}
inline DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {}
inline DenseStorage(DenseIndex size, DenseIndex, DenseIndex cols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_cols(cols)
{ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN }
inline ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols); }
inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
inline static DenseIndex rows(void) {return _Rows;}
inline DenseIndex cols(void) const {return m_cols;}
inline void conservativeResize(DenseIndex size, DenseIndex, DenseIndex cols)
{
m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, _Rows*m_cols);
m_cols = cols;
}
EIGEN_STRONG_INLINE void resize(DenseIndex size, DenseIndex, DenseIndex cols)
{
if(size != _Rows*m_cols)
{
internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols);
if (size)
m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
else
m_data = 0;
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
}
m_cols = cols;
}
inline const T *data() const { return m_data; }
inline T *data() { return m_data; }
};
// matrix with dynamic height and fixed width (so that matrix has dynamic size).
template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dynamic, _Cols, _Options>
{
T *m_data;
DenseIndex m_rows;
public:
inline explicit DenseStorage() : m_data(0), m_rows(0) {}
inline DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {}
inline DenseStorage(DenseIndex size, DenseIndex rows, DenseIndex) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(rows)
{ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN }
inline ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows); }
inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
inline DenseIndex rows(void) const {return m_rows;}
inline static DenseIndex cols(void) {return _Cols;}
inline void conservativeResize(DenseIndex size, DenseIndex rows, DenseIndex)
{
m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*_Cols);
m_rows = rows;
}
EIGEN_STRONG_INLINE void resize(DenseIndex size, DenseIndex rows, DenseIndex)
{
if(size != m_rows*_Cols)
{
internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows);
if (size)
m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
else
m_data = 0;
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
}
m_rows = rows;
}
inline const T *data() const { return m_data; }
inline T *data() { return m_data; }
};
#endif // EIGEN_MATRIX_H

View File

@@ -43,12 +43,14 @@
*
* \sa MatrixBase::diagonal(), MatrixBase::diagonal(Index)
*/
namespace internal {
template<typename MatrixType, int DiagIndex>
struct ei_traits<Diagonal<MatrixType,DiagIndex> >
: ei_traits<MatrixType>
struct traits<Diagonal<MatrixType,DiagIndex> >
: traits<MatrixType>
{
typedef typename ei_nested<MatrixType>::type MatrixTypeNested;
typedef typename ei_unref<MatrixTypeNested>::type _MatrixTypeNested;
typedef typename nested<MatrixType>::type MatrixTypeNested;
typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;
typedef typename MatrixType::StorageKind StorageKind;
enum {
AbsDiagIndex = DiagIndex<0 ? -DiagIndex : DiagIndex, // only used if DiagIndex != Dynamic
@@ -62,28 +64,30 @@ struct ei_traits<Diagonal<MatrixType,DiagIndex> >
MatrixType::MaxColsAtCompileTime)
: (EIGEN_SIZE_MIN_PREFER_FIXED(MatrixType::MaxRowsAtCompileTime, MatrixType::MaxColsAtCompileTime) - AbsDiagIndex),
MaxColsAtCompileTime = 1,
Flags = (unsigned int)_MatrixTypeNested::Flags & (HereditaryBits | LinearAccessBit | LvalueBit | DirectAccessBit) & ~RowMajorBit,
MaskLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
Flags = (unsigned int)_MatrixTypeNested::Flags & (HereditaryBits | LinearAccessBit | MaskLvalueBit | DirectAccessBit) & ~RowMajorBit,
CoeffReadCost = _MatrixTypeNested::CoeffReadCost,
MatrixTypeOuterStride = ei_outer_stride_at_compile_time<MatrixType>::ret,
MatrixTypeOuterStride = outer_stride_at_compile_time<MatrixType>::ret,
InnerStrideAtCompileTime = MatrixTypeOuterStride == Dynamic ? Dynamic : MatrixTypeOuterStride+1,
OuterStrideAtCompileTime = 0
};
};
}
template<typename MatrixType, int DiagIndex> class Diagonal
: public ei_dense_xpr_base< Diagonal<MatrixType,DiagIndex> >::type
: public internal::dense_xpr_base< Diagonal<MatrixType,DiagIndex> >::type
{
public:
typedef typename ei_dense_xpr_base<Diagonal>::type Base;
typedef typename internal::dense_xpr_base<Diagonal>::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(Diagonal)
inline Diagonal(const MatrixType& matrix, Index index = DiagIndex) : m_matrix(matrix), m_index(index) {}
inline Diagonal(MatrixType& matrix, Index index = DiagIndex) : m_matrix(matrix), m_index(index) {}
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Diagonal)
inline Index rows() const
{ return m_index.value()<0 ? std::min(m_matrix.cols(),m_matrix.rows()+m_index.value()) : std::min(m_matrix.rows(),m_matrix.cols()-m_index.value()); }
{ return m_index.value()<0 ? (std::min)(m_matrix.cols(),m_matrix.rows()+m_index.value()) : (std::min)(m_matrix.rows(),m_matrix.cols()-m_index.value()); }
inline Index cols() const { return 1; }
@@ -98,6 +102,12 @@ template<typename MatrixType, int DiagIndex> class Diagonal
}
inline Scalar& coeffRef(Index row, Index)
{
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
return m_matrix.const_cast_derived().coeffRef(row+rowOffset(), row+colOffset());
}
inline const Scalar& coeffRef(Index row, Index) const
{
return m_matrix.const_cast_derived().coeffRef(row+rowOffset(), row+colOffset());
}
@@ -108,6 +118,12 @@ template<typename MatrixType, int DiagIndex> class Diagonal
}
inline Scalar& coeffRef(Index index)
{
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
return m_matrix.const_cast_derived().coeffRef(index+rowOffset(), index+colOffset());
}
inline const Scalar& coeffRef(Index index) const
{
return m_matrix.const_cast_derived().coeffRef(index+rowOffset(), index+colOffset());
}
@@ -119,7 +135,7 @@ template<typename MatrixType, int DiagIndex> class Diagonal
protected:
const typename MatrixType::Nested m_matrix;
const ei_variable_if_dynamic<Index, DiagIndex> m_index;
const internal::variable_if_dynamic<Index, DiagIndex> m_index;
private:
// some compilers may fail to optimize std::max etc in case of compile-time constants...
@@ -140,18 +156,18 @@ template<typename MatrixType, int DiagIndex> class Diagonal
*
* \sa class Diagonal */
template<typename Derived>
inline Diagonal<Derived, 0>
inline typename MatrixBase<Derived>::DiagonalReturnType
MatrixBase<Derived>::diagonal()
{
return Diagonal<Derived, 0>(derived());
return derived();
}
/** This is the const version of diagonal(). */
template<typename Derived>
inline const Diagonal<Derived, 0>
inline const typename MatrixBase<Derived>::ConstDiagonalReturnType
MatrixBase<Derived>::diagonal() const
{
return Diagonal<Derived, 0>(derived());
return ConstDiagonalReturnType(derived());
}
/** \returns an expression of the \a DiagIndex-th sub or super diagonal of the matrix \c *this
@@ -166,18 +182,18 @@ MatrixBase<Derived>::diagonal() const
*
* \sa MatrixBase::diagonal(), class Diagonal */
template<typename Derived>
inline Diagonal<Derived, Dynamic>
inline typename MatrixBase<Derived>::template DiagonalIndexReturnType<Dynamic>::Type
MatrixBase<Derived>::diagonal(Index index)
{
return Diagonal<Derived, Dynamic>(derived(), index);
return typename DiagonalIndexReturnType<Dynamic>::Type(derived(), index);
}
/** This is the const version of diagonal(Index). */
template<typename Derived>
inline const Diagonal<Derived, Dynamic>
inline typename MatrixBase<Derived>::template ConstDiagonalIndexReturnType<Dynamic>::Type
MatrixBase<Derived>::diagonal(Index index) const
{
return Diagonal<Derived, Dynamic>(derived(), index);
return typename ConstDiagonalIndexReturnType<Dynamic>::Type(derived(), index);
}
/** \returns an expression of the \a DiagIndex-th sub or super diagonal of the matrix \c *this
@@ -192,20 +208,20 @@ MatrixBase<Derived>::diagonal(Index index) const
*
* \sa MatrixBase::diagonal(), class Diagonal */
template<typename Derived>
template<int DiagIndex>
inline Diagonal<Derived,DiagIndex>
template<int Index>
inline typename MatrixBase<Derived>::template DiagonalIndexReturnType<Index>::Type
MatrixBase<Derived>::diagonal()
{
return Diagonal<Derived,DiagIndex>(derived());
return derived();
}
/** This is the const version of diagonal<int>(). */
template<typename Derived>
template<int DiagIndex>
inline const Diagonal<Derived,DiagIndex>
template<int Index>
inline typename MatrixBase<Derived>::template ConstDiagonalIndexReturnType<Index>::Type
MatrixBase<Derived>::diagonal() const
{
return Diagonal<Derived,DiagIndex>(derived());
return derived();
}
#endif // EIGEN_DIAGONAL_H

View File

@@ -31,10 +31,10 @@ template<typename Derived>
class DiagonalBase : public EigenBase<Derived>
{
public:
typedef typename ei_traits<Derived>::DiagonalVectorType DiagonalVectorType;
typedef typename internal::traits<Derived>::DiagonalVectorType DiagonalVectorType;
typedef typename DiagonalVectorType::Scalar Scalar;
typedef typename ei_traits<Derived>::StorageKind StorageKind;
typedef typename ei_traits<Derived>::Index Index;
typedef typename internal::traits<Derived>::StorageKind StorageKind;
typedef typename internal::traits<Derived>::Index Index;
enum {
RowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
@@ -46,6 +46,8 @@ class DiagonalBase : public EigenBase<Derived>
};
typedef Matrix<Scalar, RowsAtCompileTime, ColsAtCompileTime, 0, MaxRowsAtCompileTime, MaxColsAtCompileTime> DenseMatrixType;
typedef DenseMatrixType DenseType;
typedef DiagonalMatrix<Scalar,DiagonalVectorType::SizeAtCompileTime,DiagonalVectorType::MaxSizeAtCompileTime> PlainObject;
inline const Derived& derived() const { return *static_cast<const Derived*>(this); }
inline Derived& derived() { return *static_cast<Derived*>(this); }
@@ -70,11 +72,24 @@ class DiagonalBase : public EigenBase<Derived>
const DiagonalProduct<MatrixDerived, Derived, OnTheLeft>
operator*(const MatrixBase<MatrixDerived> &matrix) const;
inline const DiagonalWrapper<CwiseUnaryOp<ei_scalar_inverse_op<Scalar>, DiagonalVectorType> >
inline const DiagonalWrapper<CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const DiagonalVectorType> >
inverse() const
{
return diagonal().cwiseInverse();
}
#ifdef EIGEN2_SUPPORT
template<typename OtherDerived>
bool isApprox(const DiagonalBase<OtherDerived>& other, typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) const
{
return diagonal().isApprox(other.diagonal(), precision);
}
template<typename OtherDerived>
bool isApprox(const MatrixBase<OtherDerived>& other, typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) const
{
return toDenseMatrix().isApprox(other, precision);
}
#endif
};
template<typename Derived>
@@ -98,9 +113,11 @@ void DiagonalBase<Derived>::evalTo(MatrixBase<DenseDerived> &other) const
*
* \sa class DiagonalWrapper
*/
namespace internal {
template<typename _Scalar, int SizeAtCompileTime, int MaxSizeAtCompileTime>
struct ei_traits<DiagonalMatrix<_Scalar,SizeAtCompileTime,MaxSizeAtCompileTime> >
: ei_traits<Matrix<_Scalar,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
struct traits<DiagonalMatrix<_Scalar,SizeAtCompileTime,MaxSizeAtCompileTime> >
: traits<Matrix<_Scalar,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
{
typedef Matrix<_Scalar,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1> DiagonalVectorType;
typedef Dense StorageKind;
@@ -109,18 +126,18 @@ struct ei_traits<DiagonalMatrix<_Scalar,SizeAtCompileTime,MaxSizeAtCompileTime>
Flags = LvalueBit
};
};
}
template<typename _Scalar, int SizeAtCompileTime, int MaxSizeAtCompileTime>
class DiagonalMatrix
: public DiagonalBase<DiagonalMatrix<_Scalar,SizeAtCompileTime,MaxSizeAtCompileTime> >
{
public:
#ifndef EIGEN_PARSED_BY_DOXYGEN
typedef typename ei_traits<DiagonalMatrix>::DiagonalVectorType DiagonalVectorType;
typedef typename internal::traits<DiagonalMatrix>::DiagonalVectorType DiagonalVectorType;
typedef const DiagonalMatrix& Nested;
typedef _Scalar Scalar;
typedef typename ei_traits<DiagonalMatrix>::StorageKind StorageKind;
typedef typename ei_traits<DiagonalMatrix>::Index Index;
typedef typename internal::traits<DiagonalMatrix>::StorageKind StorageKind;
typedef typename internal::traits<DiagonalMatrix>::Index Index;
#endif
protected:
@@ -204,8 +221,10 @@ class DiagonalMatrix
*
* \sa class DiagonalMatrix, class DiagonalBase, MatrixBase::asDiagonal()
*/
namespace internal {
template<typename _DiagonalVectorType>
struct ei_traits<DiagonalWrapper<_DiagonalVectorType> >
struct traits<DiagonalWrapper<_DiagonalVectorType> >
{
typedef _DiagonalVectorType DiagonalVectorType;
typedef typename DiagonalVectorType::Scalar Scalar;
@@ -216,13 +235,14 @@ struct ei_traits<DiagonalWrapper<_DiagonalVectorType> >
ColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
MaxRowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
MaxColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
Flags = ei_traits<DiagonalVectorType>::Flags & LvalueBit
Flags = traits<DiagonalVectorType>::Flags & LvalueBit
};
};
}
template<typename _DiagonalVectorType>
class DiagonalWrapper
: public DiagonalBase<DiagonalWrapper<_DiagonalVectorType> >, ei_no_assignment_operator
: public DiagonalBase<DiagonalWrapper<_DiagonalVectorType> >, internal::no_assignment_operator
{
public:
#ifndef EIGEN_PARSED_BY_DOXYGEN
@@ -250,7 +270,7 @@ class DiagonalWrapper
* \sa class DiagonalWrapper, class DiagonalMatrix, diagonal(), isDiagonal()
**/
template<typename Derived>
inline const DiagonalWrapper<Derived>
inline const DiagonalWrapper<const Derived>
MatrixBase<Derived>::asDiagonal() const
{
return derived();
@@ -265,21 +285,20 @@ MatrixBase<Derived>::asDiagonal() const
* \sa asDiagonal()
*/
template<typename Derived>
bool MatrixBase<Derived>::isDiagonal
(RealScalar prec) const
bool MatrixBase<Derived>::isDiagonal(RealScalar prec) const
{
if(cols() != rows()) return false;
RealScalar maxAbsOnDiagonal = static_cast<RealScalar>(-1);
for(Index j = 0; j < cols(); ++j)
{
RealScalar absOnDiagonal = ei_abs(coeff(j,j));
RealScalar absOnDiagonal = internal::abs(coeff(j,j));
if(absOnDiagonal > maxAbsOnDiagonal) maxAbsOnDiagonal = absOnDiagonal;
}
for(Index j = 0; j < cols(); ++j)
for(Index i = 0; i < j; ++i)
{
if(!ei_isMuchSmallerThan(coeff(i, j), maxAbsOnDiagonal, prec)) return false;
if(!ei_isMuchSmallerThan(coeff(j, i), maxAbsOnDiagonal, prec)) return false;
if(!internal::isMuchSmallerThan(coeff(i, j), maxAbsOnDiagonal, prec)) return false;
if(!internal::isMuchSmallerThan(coeff(j, i), maxAbsOnDiagonal, prec)) return false;
}
return true;
}

View File

@@ -26,11 +26,12 @@
#ifndef EIGEN_DIAGONALPRODUCT_H
#define EIGEN_DIAGONALPRODUCT_H
namespace internal {
template<typename MatrixType, typename DiagonalType, int ProductOrder>
struct ei_traits<DiagonalProduct<MatrixType, DiagonalType, ProductOrder> >
: ei_traits<MatrixType>
struct traits<DiagonalProduct<MatrixType, DiagonalType, ProductOrder> >
: traits<MatrixType>
{
typedef typename ei_scalar_product_traits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar;
typedef typename scalar_product_traits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar;
enum {
RowsAtCompileTime = MatrixType::RowsAtCompileTime,
ColsAtCompileTime = MatrixType::ColsAtCompileTime,
@@ -40,7 +41,7 @@ struct ei_traits<DiagonalProduct<MatrixType, DiagonalType, ProductOrder> >
_StorageOrder = MatrixType::Flags & RowMajorBit ? RowMajor : ColMajor,
_PacketOnDiag = !((int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheLeft)
||(int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheRight)),
_SameTypes = ei_is_same_type<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ret,
_SameTypes = is_same<typename MatrixType::Scalar, typename DiagonalType::Scalar>::value,
// FIXME currently we need same types, but in the future the next rule should be the one
//_Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagonalType::Flags)&PacketAccessBit))),
_Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && _SameTypes && ((!_PacketOnDiag) || (bool(int(DiagonalType::Flags)&PacketAccessBit))),
@@ -49,9 +50,10 @@ struct ei_traits<DiagonalProduct<MatrixType, DiagonalType, ProductOrder> >
CoeffReadCost = NumTraits<Scalar>::MulCost + MatrixType::CoeffReadCost + DiagonalType::DiagonalVectorType::CoeffReadCost
};
};
}
template<typename MatrixType, typename DiagonalType, int ProductOrder>
class DiagonalProduct : ei_no_assignment_operator,
class DiagonalProduct : internal::no_assignment_operator,
public MatrixBase<DiagonalProduct<MatrixType, DiagonalType, ProductOrder> >
{
public:
@@ -62,7 +64,7 @@ class DiagonalProduct : ei_no_assignment_operator,
inline DiagonalProduct(const MatrixType& matrix, const DiagonalType& diagonal)
: m_matrix(matrix), m_diagonal(diagonal)
{
ei_assert(diagonal.diagonal().size() == (ProductOrder == OnTheLeft ? matrix.rows() : matrix.cols()));
eigen_assert(diagonal.diagonal().size() == (ProductOrder == OnTheLeft ? matrix.rows() : matrix.cols()));
}
inline Index rows() const { return m_matrix.rows(); }
@@ -81,27 +83,27 @@ class DiagonalProduct : ei_no_assignment_operator,
};
const Index indexInDiagonalVector = ProductOrder == OnTheLeft ? row : col;
return packet_impl<LoadMode>(row,col,indexInDiagonalVector,typename ei_meta_if<
return packet_impl<LoadMode>(row,col,indexInDiagonalVector,typename internal::conditional<
((int(StorageOrder) == RowMajor && int(ProductOrder) == OnTheLeft)
||(int(StorageOrder) == ColMajor && int(ProductOrder) == OnTheRight)), ei_meta_true, ei_meta_false>::ret());
||(int(StorageOrder) == ColMajor && int(ProductOrder) == OnTheRight)), internal::true_type, internal::false_type>::type());
}
protected:
template<int LoadMode>
EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, ei_meta_true) const
EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, internal::true_type) const
{
return ei_pmul(m_matrix.template packet<LoadMode>(row, col),
ei_pset1<PacketScalar>(m_diagonal.diagonal().coeff(id)));
return internal::pmul(m_matrix.template packet<LoadMode>(row, col),
internal::pset1<PacketScalar>(m_diagonal.diagonal().coeff(id)));
}
template<int LoadMode>
EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, ei_meta_false) const
EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, internal::false_type) const
{
enum {
InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime,
DiagonalVectorPacketLoadMode = (LoadMode == Aligned && ((InnerSize%16) == 0)) ? Aligned : Unaligned
};
return ei_pmul(m_matrix.template packet<LoadMode>(row, col),
return internal::pmul(m_matrix.template packet<LoadMode>(row, col),
m_diagonal.diagonal().template packet<DiagonalVectorPacketLoadMode>(id));
}

View File

@@ -25,6 +25,8 @@
#ifndef EIGEN_DOT_H
#define EIGEN_DOT_H
namespace internal {
// helper function for dot(). The problem is that if we put that in the body of dot(), then upon calling dot
// with mismatched types, the compiler emits errors about failing to instantiate cwiseProduct BEFORE
// looking at the static assertions. Thus this is a trick to get better compile errors.
@@ -37,23 +39,27 @@ template<typename T, typename U,
// revert to || as soon as not needed anymore.
(int(T::ColsAtCompileTime) == 1 && int(U::RowsAtCompileTime) == 1))
>
struct ei_dot_nocheck
struct dot_nocheck
{
static inline typename ei_traits<T>::Scalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
typedef typename scalar_product_traits<typename traits<T>::Scalar,typename traits<U>::Scalar>::ReturnType ResScalar;
static inline ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
{
return a.template binaryExpr<ei_scalar_conj_product_op<typename ei_traits<T>::Scalar> >(b).sum();
return a.template binaryExpr<scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> >(b).sum();
}
};
template<typename T, typename U>
struct ei_dot_nocheck<T, U, true>
struct dot_nocheck<T, U, true>
{
static inline typename ei_traits<T>::Scalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
typedef typename scalar_product_traits<typename traits<T>::Scalar,typename traits<U>::Scalar>::ReturnType ResScalar;
static inline ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
{
return a.transpose().template binaryExpr<ei_scalar_conj_product_op<typename ei_traits<T>::Scalar> >(b).sum();
return a.transpose().template binaryExpr<scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> >(b).sum();
}
};
} // end namespace internal
/** \returns the dot product of *this with other.
*
* \only_for_vectors
@@ -66,40 +72,72 @@ struct ei_dot_nocheck<T, U, true>
*/
template<typename Derived>
template<typename OtherDerived>
typename ei_traits<Derived>::Scalar
typename internal::scalar_product_traits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived)
EIGEN_STATIC_ASSERT((ei_is_same_type<Scalar, typename OtherDerived::Scalar>::ret),
typedef internal::scalar_conj_product_op<Scalar,typename OtherDerived::Scalar> func;
EIGEN_CHECK_BINARY_COMPATIBILIY(func,Scalar,typename OtherDerived::Scalar);
eigen_assert(size() == other.size());
return internal::dot_nocheck<Derived,OtherDerived>::run(*this, other);
}
#ifdef EIGEN2_SUPPORT
/** \returns the dot product of *this with other, with the Eigen2 convention that the dot product is linear in the first variable
* (conjugating the second variable). Of course this only makes a difference in the complex case.
*
* This method is only available in EIGEN2_SUPPORT mode.
*
* \only_for_vectors
*
* \sa dot()
*/
template<typename Derived>
template<typename OtherDerived>
typename internal::traits<Derived>::Scalar
MatrixBase<Derived>::eigen2_dot(const MatrixBase<OtherDerived>& other) const
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived)
EIGEN_STATIC_ASSERT((internal::is_same<Scalar, typename OtherDerived::Scalar>::value),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
ei_assert(size() == other.size());
eigen_assert(size() == other.size());
return ei_dot_nocheck<Derived,OtherDerived>::run(*this, other);
return internal::dot_nocheck<OtherDerived,Derived>::run(other,*this);
}
#endif
//---------- implementation of L2 norm and related functions ----------
/** \returns the squared \em l2 norm of *this, i.e., for vectors, the dot product of *this with itself.
/** \returns, for vectors, the squared \em l2 norm of \c *this, and for matrices the Frobenius norm.
* In both cases, it consists in the sum of the square of all the matrix entries.
* For vectors, this is also equals to the dot product of \c *this with itself.
*
* \sa dot(), norm()
*/
template<typename Derived>
EIGEN_STRONG_INLINE typename NumTraits<typename ei_traits<Derived>::Scalar>::Real MatrixBase<Derived>::squaredNorm() const
EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::squaredNorm() const
{
return ei_real((*this).cwiseAbs2().sum());
return internal::real((*this).cwiseAbs2().sum());
}
/** \returns the \em l2 norm of *this, i.e., for vectors, the square root of the dot product of *this with itself.
/** \returns, for vectors, the \em l2 norm of \c *this, and for matrices the Frobenius norm.
* In both cases, it consists in the square root of the sum of the square of all the matrix entries.
* For vectors, this is also equals to the square root of the dot product of \c *this with itself.
*
* \sa dot(), squaredNorm()
*/
template<typename Derived>
inline typename NumTraits<typename ei_traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const
inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const
{
return ei_sqrt(squaredNorm());
return internal::sqrt(squaredNorm());
}
/** \returns an expression of the quotient of *this by its own norm.
@@ -112,8 +150,8 @@ template<typename Derived>
inline const typename MatrixBase<Derived>::PlainObject
MatrixBase<Derived>::normalized() const
{
typedef typename ei_nested<Derived>::type Nested;
typedef typename ei_unref<Nested>::type _Nested;
typedef typename internal::nested<Derived>::type Nested;
typedef typename internal::remove_reference<Nested>::type _Nested;
_Nested n(derived());
return n / n.norm();
}
@@ -132,55 +170,59 @@ inline void MatrixBase<Derived>::normalize()
//---------- implementation of other norms ----------
namespace internal {
template<typename Derived, int p>
struct ei_lpNorm_selector
struct lpNorm_selector
{
typedef typename NumTraits<typename ei_traits<Derived>::Scalar>::Real RealScalar;
typedef typename NumTraits<typename traits<Derived>::Scalar>::Real RealScalar;
inline static RealScalar run(const MatrixBase<Derived>& m)
{
return ei_pow(m.cwiseAbs().array().pow(p).sum(), RealScalar(1)/p);
return pow(m.cwiseAbs().array().pow(p).sum(), RealScalar(1)/p);
}
};
template<typename Derived>
struct ei_lpNorm_selector<Derived, 1>
struct lpNorm_selector<Derived, 1>
{
inline static typename NumTraits<typename ei_traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
inline static typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
{
return m.cwiseAbs().sum();
}
};
template<typename Derived>
struct ei_lpNorm_selector<Derived, 2>
struct lpNorm_selector<Derived, 2>
{
inline static typename NumTraits<typename ei_traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
inline static typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
{
return m.norm();
}
};
template<typename Derived>
struct ei_lpNorm_selector<Derived, Infinity>
struct lpNorm_selector<Derived, Infinity>
{
inline static typename NumTraits<typename ei_traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
inline static typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
{
return m.cwiseAbs().maxCoeff();
}
};
} // end namespace internal
/** \returns the \f$ \ell^p \f$ norm of *this, that is, returns the p-th root of the sum of the p-th powers of the absolute values
* of the coefficients of *this. If \a p is the special value \a Eigen::Infinity, this function returns the \f$ \ell^p\infty \f$
* of the coefficients of *this. If \a p is the special value \a Eigen::Infinity, this function returns the \f$ \ell^\infty \f$
* norm, that is the maximum of the absolute values of the coefficients of *this.
*
* \sa norm()
*/
template<typename Derived>
template<int p>
inline typename NumTraits<typename ei_traits<Derived>::Scalar>::Real
inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
MatrixBase<Derived>::lpNorm() const
{
return ei_lpNorm_selector<Derived, p>::run(*this);
return internal::lpNorm_selector<Derived, p>::run(*this);
}
//---------- implementation of isOrthogonal / isUnitary ----------
@@ -196,9 +238,9 @@ template<typename OtherDerived>
bool MatrixBase<Derived>::isOrthogonal
(const MatrixBase<OtherDerived>& other, RealScalar prec) const
{
typename ei_nested<Derived,2>::type nested(derived());
typename ei_nested<OtherDerived,2>::type otherNested(other.derived());
return ei_abs2(nested.dot(otherNested)) <= prec * prec * nested.squaredNorm() * otherNested.squaredNorm();
typename internal::nested<Derived,2>::type nested(derived());
typename internal::nested<OtherDerived,2>::type otherNested(other.derived());
return internal::abs2(nested.dot(otherNested)) <= prec * prec * nested.squaredNorm() * otherNested.squaredNorm();
}
/** \returns true if *this is approximately an unitary matrix,
@@ -218,10 +260,10 @@ bool MatrixBase<Derived>::isUnitary(RealScalar prec) const
typename Derived::Nested nested(derived());
for(Index i = 0; i < cols(); ++i)
{
if(!ei_isApprox(nested.col(i).squaredNorm(), static_cast<RealScalar>(1), prec))
if(!internal::isApprox(nested.col(i).squaredNorm(), static_cast<RealScalar>(1), prec))
return false;
for(Index j = 0; j < i; ++j)
if(!ei_isMuchSmallerThan(nested.col(i).dot(nested.col(j)), static_cast<Scalar>(1), prec))
if(!internal::isMuchSmallerThan(nested.col(i).dot(nested.col(j)), static_cast<Scalar>(1), prec))
return false;
}
return true;

View File

@@ -39,10 +39,10 @@
*/
template<typename Derived> struct EigenBase
{
// typedef typename ei_plain_matrix_type<Derived>::type PlainObject;
// typedef typename internal::plain_matrix_type<Derived>::type PlainObject;
typedef typename ei_traits<Derived>::StorageKind StorageKind;
typedef typename ei_traits<Derived>::Index Index;
typedef typename internal::traits<Derived>::StorageKind StorageKind;
typedef typename internal::traits<Derived>::Index Index;
/** \returns a reference to the derived object */
Derived& derived() { return *static_cast<Derived*>(this); }
@@ -51,6 +51,8 @@ template<typename Derived> struct EigenBase
inline Derived& const_cast_derived() const
{ return *static_cast<Derived*>(const_cast<EigenBase*>(this)); }
inline const Derived& const_derived() const
{ return *static_cast<const Derived*>(this); }
/** \returns the number of rows. \sa cols(), RowsAtCompileTime */
inline Index rows() const { return derived().rows(); }

View File

@@ -40,11 +40,14 @@
*
* \sa MatrixBase::flagged()
*/
namespace internal {
template<typename ExpressionType, unsigned int Added, unsigned int Removed>
struct ei_traits<Flagged<ExpressionType, Added, Removed> > : ei_traits<ExpressionType>
struct traits<Flagged<ExpressionType, Added, Removed> > : traits<ExpressionType>
{
enum { Flags = (ExpressionType::Flags | Added) & ~Removed };
};
}
template<typename ExpressionType, unsigned int Added, unsigned int Removed> class Flagged
: public MatrixBase<Flagged<ExpressionType, Added, Removed> >
@@ -52,9 +55,10 @@ template<typename ExpressionType, unsigned int Added, unsigned int Removed> clas
public:
typedef MatrixBase<Flagged> Base;
EIGEN_DENSE_PUBLIC_INTERFACE(Flagged)
typedef typename ei_meta_if<ei_must_nest_by_value<ExpressionType>::ret,
ExpressionType, const ExpressionType&>::ret ExpressionTypeNested;
typedef typename internal::conditional<internal::must_nest_by_value<ExpressionType>::ret,
ExpressionType, const ExpressionType&>::type ExpressionTypeNested;
typedef typename ExpressionType::InnerIterator InnerIterator;
inline Flagged(const ExpressionType& matrix) : m_matrix(matrix) {}
@@ -64,21 +68,31 @@ template<typename ExpressionType, unsigned int Added, unsigned int Removed> clas
inline Index outerStride() const { return m_matrix.outerStride(); }
inline Index innerStride() const { return m_matrix.innerStride(); }
inline const Scalar coeff(Index row, Index col) const
inline CoeffReturnType coeff(Index row, Index col) const
{
return m_matrix.coeff(row, col);
}
inline CoeffReturnType coeff(Index index) const
{
return m_matrix.coeff(index);
}
inline const Scalar& coeffRef(Index row, Index col) const
{
return m_matrix.const_cast_derived().coeffRef(row, col);
}
inline const Scalar& coeffRef(Index index) const
{
return m_matrix.const_cast_derived().coeffRef(index);
}
inline Scalar& coeffRef(Index row, Index col)
{
return m_matrix.const_cast_derived().coeffRef(row, col);
}
inline const Scalar coeff(Index index) const
{
return m_matrix.coeff(index);
}
inline Scalar& coeffRef(Index index)
{
return m_matrix.const_cast_derived().coeffRef(index);

View File

@@ -37,16 +37,19 @@
*
* \sa MatrixBase::forceAlignedAccess()
*/
namespace internal {
template<typename ExpressionType>
struct ei_traits<ForceAlignedAccess<ExpressionType> > : public ei_traits<ExpressionType>
struct traits<ForceAlignedAccess<ExpressionType> > : public traits<ExpressionType>
{};
}
template<typename ExpressionType> class ForceAlignedAccess
: public ei_dense_xpr_base< ForceAlignedAccess<ExpressionType> >::type
: public internal::dense_xpr_base< ForceAlignedAccess<ExpressionType> >::type
{
public:
typedef typename ei_dense_xpr_base<ForceAlignedAccess>::type Base;
typedef typename internal::dense_xpr_base<ForceAlignedAccess>::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(ForceAlignedAccess)
inline ForceAlignedAccess(const ExpressionType& matrix) : m_expression(matrix) {}
@@ -134,7 +137,7 @@ MatrixBase<Derived>::forceAlignedAccess()
*/
template<typename Derived>
template<bool Enable>
inline typename ei_makeconst<typename ei_meta_if<Enable,ForceAlignedAccess<Derived>,Derived&>::ret>::type
inline typename internal::add_const_on_value_type<typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type>::type
MatrixBase<Derived>::forceAlignedAccessIf() const
{
return derived();
@@ -145,7 +148,7 @@ MatrixBase<Derived>::forceAlignedAccessIf() const
*/
template<typename Derived>
template<bool Enable>
inline typename ei_meta_if<Enable,ForceAlignedAccess<Derived>,Derived&>::ret
inline typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type
MatrixBase<Derived>::forceAlignedAccessIf()
{
return derived();

File diff suppressed because it is too large Load Diff

View File

@@ -26,9 +26,68 @@
#ifndef EIGEN_FUZZY_H
#define EIGEN_FUZZY_H
// TODO support small integer types properly i.e. do exact compare on coeffs --- taking a HS norm is guaranteed to cause integer overflow.
namespace internal
{
template<typename Derived, typename OtherDerived, bool is_integer = NumTraits<typename Derived::Scalar>::IsInteger>
struct isApprox_selector
{
static bool run(const Derived& x, const OtherDerived& y, typename Derived::RealScalar prec)
{
using std::min;
const typename internal::nested<Derived,2>::type nested(x);
const typename internal::nested<OtherDerived,2>::type otherNested(y);
return (nested - otherNested).cwiseAbs2().sum() <= prec * prec * (min)(nested.cwiseAbs2().sum(), otherNested.cwiseAbs2().sum());
}
};
template<typename Derived, typename OtherDerived>
struct isApprox_selector<Derived, OtherDerived, true>
{
static bool run(const Derived& x, const OtherDerived& y, typename Derived::RealScalar)
{
return x.matrix() == y.matrix();
}
};
template<typename Derived, typename OtherDerived, bool is_integer = NumTraits<typename Derived::Scalar>::IsInteger>
struct isMuchSmallerThan_object_selector
{
static bool run(const Derived& x, const OtherDerived& y, typename Derived::RealScalar prec)
{
return x.cwiseAbs2().sum() <= abs2(prec) * y.cwiseAbs2().sum();
}
};
template<typename Derived, typename OtherDerived>
struct isMuchSmallerThan_object_selector<Derived, OtherDerived, true>
{
static bool run(const Derived& x, const OtherDerived&, typename Derived::RealScalar)
{
return x.matrix() == Derived::Zero(x.rows(), x.cols()).matrix();
}
};
template<typename Derived, bool is_integer = NumTraits<typename Derived::Scalar>::IsInteger>
struct isMuchSmallerThan_scalar_selector
{
static bool run(const Derived& x, const typename Derived::RealScalar& y, typename Derived::RealScalar prec)
{
return x.cwiseAbs2().sum() <= abs2(prec * y);
}
};
template<typename Derived>
struct isMuchSmallerThan_scalar_selector<Derived, true>
{
static bool run(const Derived& x, const typename Derived::RealScalar&, typename Derived::RealScalar)
{
return x.matrix() == Derived::Zero(x.rows(), x.cols()).matrix();
}
};
} // end namespace internal
#ifndef EIGEN_LEGACY_COMPARES
/** \returns \c true if \c *this is approximately equal to \a other, within the precision
* determined by \a prec.
@@ -42,10 +101,10 @@
* \note Because of the multiplicativeness of this comparison, one can't use this function
* to check whether \c *this is approximately equal to the zero matrix or vector.
* Indeed, \c isApprox(zero) returns false unless \c *this itself is exactly the zero matrix
* or vector. If you want to test whether \c *this is zero, use ei_isMuchSmallerThan(const
* or vector. If you want to test whether \c *this is zero, use internal::isMuchSmallerThan(const
* RealScalar&, RealScalar) instead.
*
* \sa ei_isMuchSmallerThan(const RealScalar&, RealScalar) const
* \sa internal::isMuchSmallerThan(const RealScalar&, RealScalar) const
*/
template<typename Derived>
template<typename OtherDerived>
@@ -54,12 +113,7 @@ bool DenseBase<Derived>::isApprox(
RealScalar prec
) const
{
const typename ei_nested<Derived,2>::type nested(derived());
const typename ei_nested<OtherDerived,2>::type otherNested(other.derived());
// std::cerr << typeid(Derived).name() << " => " << typeid(typename ei_nested<Derived,2>::type).name() << "\n";
// std::cerr << typeid(OtherDerived).name() << " => " << typeid(typename ei_nested<OtherDerived,2>::type).name() << "\n";
// return false;
return (nested - otherNested).cwiseAbs2().sum() <= prec * prec * std::min(nested.cwiseAbs2().sum(), otherNested.cwiseAbs2().sum());
return internal::isApprox_selector<Derived, OtherDerived>::run(derived(), other.derived(), prec);
}
/** \returns \c true if the norm of \c *this is much smaller than \a other,
@@ -81,7 +135,7 @@ bool DenseBase<Derived>::isMuchSmallerThan(
RealScalar prec
) const
{
return derived().cwiseAbs2().sum() <= prec * prec * other * other;
return internal::isMuchSmallerThan_scalar_selector<Derived>::run(derived(), other, prec);
}
/** \returns \c true if the norm of \c *this is much smaller than the norm of \a other,
@@ -101,140 +155,7 @@ bool DenseBase<Derived>::isMuchSmallerThan(
RealScalar prec
) const
{
return derived().cwiseAbs2().sum() <= prec * prec * other.derived().cwiseAbs2().sum();
return internal::isMuchSmallerThan_object_selector<Derived, OtherDerived>::run(derived(), other.derived(), prec);
}
#else
template<typename Derived, typename OtherDerived=Derived, bool IsVector=Derived::IsVectorAtCompileTime>
struct ei_fuzzy_selector;
/** \returns \c true if \c *this is approximately equal to \a other, within the precision
* determined by \a prec.
*
* \note The fuzzy compares are done multiplicatively. Two vectors \f$ v \f$ and \f$ w \f$
* are considered to be approximately equal within precision \f$ p \f$ if
* \f[ \Vert v - w \Vert \leqslant p\,\min(\Vert v\Vert, \Vert w\Vert). \f]
* For matrices, the comparison is done on all columns.
*
* \note Because of the multiplicativeness of this comparison, one can't use this function
* to check whether \c *this is approximately equal to the zero matrix or vector.
* Indeed, \c isApprox(zero) returns false unless \c *this itself is exactly the zero matrix
* or vector. If you want to test whether \c *this is zero, use ei_isMuchSmallerThan(const
* RealScalar&, RealScalar) instead.
*
* \sa ei_isMuchSmallerThan(const RealScalar&, RealScalar) const
*/
template<typename Derived>
template<typename OtherDerived>
bool DenseBase<Derived>::isApprox(
const DenseBase<OtherDerived>& other,
RealScalar prec
) const
{
return ei_fuzzy_selector<Derived,OtherDerived>::isApprox(derived(), other.derived(), prec);
}
/** \returns \c true if the norm of \c *this is much smaller than \a other,
* within the precision determined by \a prec.
*
* \note The fuzzy compares are done multiplicatively. A vector \f$ v \f$ is
* considered to be much smaller than \f$ x \f$ within precision \f$ p \f$ if
* \f[ \Vert v \Vert \leqslant p\,\vert x\vert. \f]
* For matrices, the comparison is done on all columns.
*
* \sa isApprox(), isMuchSmallerThan(const DenseBase<OtherDerived>&, RealScalar) const
*/
template<typename Derived>
bool DenseBase<Derived>::isMuchSmallerThan(
const typename NumTraits<Scalar>::Real& other,
RealScalar prec
) const
{
return ei_fuzzy_selector<Derived>::isMuchSmallerThan(derived(), other, prec);
}
/** \returns \c true if the norm of \c *this is much smaller than the norm of \a other,
* within the precision determined by \a prec.
*
* \note The fuzzy compares are done multiplicatively. A vector \f$ v \f$ is
* considered to be much smaller than a vector \f$ w \f$ within precision \f$ p \f$ if
* \f[ \Vert v \Vert \leqslant p\,\Vert w\Vert. \f]
* For matrices, the comparison is done on all columns.
*
* \sa isApprox(), isMuchSmallerThan(const RealScalar&, RealScalar) const
*/
template<typename Derived>
template<typename OtherDerived>
bool DenseBase<Derived>::isMuchSmallerThan(
const DenseBase<OtherDerived>& other,
RealScalar prec
) const
{
return ei_fuzzy_selector<Derived,OtherDerived>::isMuchSmallerThan(derived(), other.derived(), prec);
}
template<typename Derived, typename OtherDerived>
struct ei_fuzzy_selector<Derived,OtherDerived,true>
{
typedef typename Derived::RealScalar RealScalar;
static bool isApprox(const Derived& self, const OtherDerived& other, RealScalar prec)
{
EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived)
ei_assert(self.size() == other.size());
return((self - other).squaredNorm() <= std::min(self.squaredNorm(), other.squaredNorm()) * prec * prec);
}
static bool isMuchSmallerThan(const Derived& self, const RealScalar& other, RealScalar prec)
{
return(self.squaredNorm() <= ei_abs2(other * prec));
}
static bool isMuchSmallerThan(const Derived& self, const OtherDerived& other, RealScalar prec)
{
EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived)
ei_assert(self.size() == other.size());
return(self.squaredNorm() <= other.squaredNorm() * prec * prec);
}
};
template<typename Derived, typename OtherDerived>
struct ei_fuzzy_selector<Derived,OtherDerived,false>
{
typedef typename Derived::RealScalar RealScalar;
typedef typename Derived::Index Index;
static bool isApprox(const Derived& self, const OtherDerived& other, RealScalar prec)
{
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived)
ei_assert(self.rows() == other.rows() && self.cols() == other.cols());
typename Derived::Nested nested(self);
typename OtherDerived::Nested otherNested(other);
for(Index i = 0; i < self.cols(); ++i)
if((nested.col(i) - otherNested.col(i)).squaredNorm()
> std::min(nested.col(i).squaredNorm(), otherNested.col(i).squaredNorm()) * prec * prec)
return false;
return true;
}
static bool isMuchSmallerThan(const Derived& self, const RealScalar& other, RealScalar prec)
{
typename Derived::Nested nested(self);
for(Index i = 0; i < self.cols(); ++i)
if(nested.col(i).squaredNorm() > ei_abs2(other * prec))
return false;
return true;
}
static bool isMuchSmallerThan(const Derived& self, const OtherDerived& other, RealScalar prec)
{
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived)
ei_assert(self.rows() == other.rows() && self.cols() == other.cols());
typename Derived::Nested nested(self);
typename OtherDerived::Nested otherNested(other);
for(Index i = 0; i < self.cols(); ++i)
if(nested.col(i).squaredNorm() > otherNested.col(i).squaredNorm() * prec * prec)
return false;
return true;
}
};
#endif
#endif // EIGEN_FUZZY_H

View File

@@ -26,6 +26,8 @@
#ifndef EIGEN_GENERIC_PACKET_MATH_H
#define EIGEN_GENERIC_PACKET_MATH_H
namespace internal {
/** \internal
* \file GenericPacketMath.h
*
@@ -50,7 +52,7 @@
#define EIGEN_DEBUG_UNALIGNED_STORE
#endif
struct ei_default_packet_traits
struct default_packet_traits
{
enum {
HasAdd = 1,
@@ -79,7 +81,7 @@ struct ei_default_packet_traits
};
};
template<typename T> struct ei_packet_traits : ei_default_packet_traits
template<typename T> struct packet_traits : default_packet_traits
{
typedef T type;
enum {
@@ -103,92 +105,92 @@ template<typename T> struct ei_packet_traits : ei_default_packet_traits
/** \internal \returns a + b (coeff-wise) */
template<typename Packet> inline Packet
ei_padd(const Packet& a,
padd(const Packet& a,
const Packet& b) { return a+b; }
/** \internal \returns a - b (coeff-wise) */
template<typename Packet> inline Packet
ei_psub(const Packet& a,
psub(const Packet& a,
const Packet& b) { return a-b; }
/** \internal \returns -a (coeff-wise) */
template<typename Packet> inline Packet
ei_pnegate(const Packet& a) { return -a; }
pnegate(const Packet& a) { return -a; }
/** \internal \returns conj(a) (coeff-wise) */
template<typename Packet> inline Packet
ei_pconj(const Packet& a) { return ei_conj(a); }
pconj(const Packet& a) { return conj(a); }
/** \internal \returns a * b (coeff-wise) */
template<typename Packet> inline Packet
ei_pmul(const Packet& a,
pmul(const Packet& a,
const Packet& b) { return a*b; }
/** \internal \returns a / b (coeff-wise) */
template<typename Packet> inline Packet
ei_pdiv(const Packet& a,
pdiv(const Packet& a,
const Packet& b) { return a/b; }
/** \internal \returns the min of \a a and \a b (coeff-wise) */
template<typename Packet> inline Packet
ei_pmin(const Packet& a,
const Packet& b) { return std::min(a, b); }
pmin(const Packet& a,
const Packet& b) { using std::min; return (min)(a, b); }
/** \internal \returns the max of \a a and \a b (coeff-wise) */
template<typename Packet> inline Packet
ei_pmax(const Packet& a,
const Packet& b) { return std::max(a, b); }
pmax(const Packet& a,
const Packet& b) { using std::max; return (max)(a, b); }
/** \internal \returns the absolute value of \a a */
template<typename Packet> inline Packet
ei_pabs(const Packet& a) { return ei_abs(a); }
pabs(const Packet& a) { return abs(a); }
/** \internal \returns the bitwise and of \a a and \a b */
template<typename Packet> inline Packet
ei_pand(const Packet& a, const Packet& b) { return a & b; }
pand(const Packet& a, const Packet& b) { return a & b; }
/** \internal \returns the bitwise or of \a a and \a b */
template<typename Packet> inline Packet
ei_por(const Packet& a, const Packet& b) { return a | b; }
por(const Packet& a, const Packet& b) { return a | b; }
/** \internal \returns the bitwise xor of \a a and \a b */
template<typename Packet> inline Packet
ei_pxor(const Packet& a, const Packet& b) { return a ^ b; }
pxor(const Packet& a, const Packet& b) { return a ^ b; }
/** \internal \returns the bitwise andnot of \a a and \a b */
template<typename Packet> inline Packet
ei_pandnot(const Packet& a, const Packet& b) { return a & (!b); }
pandnot(const Packet& a, const Packet& b) { return a & (!b); }
/** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */
template<typename Packet> inline Packet
ei_pload(const typename ei_unpacket_traits<Packet>::type* from) { return *from; }
pload(const typename unpacket_traits<Packet>::type* from) { return *from; }
/** \internal \returns a packet version of \a *from, (un-aligned load) */
template<typename Packet> inline Packet
ei_ploadu(const typename ei_unpacket_traits<Packet>::type* from) { return *from; }
ploadu(const typename unpacket_traits<Packet>::type* from) { return *from; }
/** \internal \returns a packet with elements of \a *from duplicated, e.g.: (from[0],from[0],from[1],from[1]) */
template<typename Packet> inline Packet
ei_ploaddup(const typename ei_unpacket_traits<Packet>::type* from) { return *from; }
ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; }
/** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */
template<typename Packet> inline Packet
ei_pset1(const typename ei_unpacket_traits<Packet>::type& a) { return a; }
pset1(const typename unpacket_traits<Packet>::type& a) { return a; }
/** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */
template<typename Scalar> inline typename ei_packet_traits<Scalar>::type
ei_plset(const Scalar& a) { return a; }
template<typename Scalar> inline typename packet_traits<Scalar>::type
plset(const Scalar& a) { return a; }
/** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */
template<typename Scalar, typename Packet> inline void ei_pstore(Scalar* to, const Packet& from)
template<typename Scalar, typename Packet> inline void pstore(Scalar* to, const Packet& from)
{ (*to) = from; }
/** \internal copy the packet \a from to \a *to, (un-aligned store) */
template<typename Scalar, typename Packet> inline void ei_pstoreu(Scalar* to, const Packet& from)
template<typename Scalar, typename Packet> inline void pstoreu(Scalar* to, const Packet& from)
{ (*to) = from; }
/** \internal tries to do cache prefetching of \a addr */
template<typename Scalar> inline void ei_prefetch(const Scalar* addr)
template<typename Scalar> inline void prefetch(const Scalar* addr)
{
#if !defined(_MSC_VER)
__builtin_prefetch(addr);
@@ -196,93 +198,118 @@ __builtin_prefetch(addr);
}
/** \internal \returns the first element of a packet */
template<typename Packet> inline typename ei_unpacket_traits<Packet>::type ei_pfirst(const Packet& a)
template<typename Packet> inline typename unpacket_traits<Packet>::type pfirst(const Packet& a)
{ return a; }
/** \internal \returns a packet where the element i contains the sum of the packet of \a vec[i] */
template<typename Packet> inline Packet
ei_preduxp(const Packet* vecs) { return vecs[0]; }
preduxp(const Packet* vecs) { return vecs[0]; }
/** \internal \returns the sum of the elements of \a a*/
template<typename Packet> inline typename ei_unpacket_traits<Packet>::type ei_predux(const Packet& a)
template<typename Packet> inline typename unpacket_traits<Packet>::type predux(const Packet& a)
{ return a; }
/** \internal \returns the product of the elements of \a a*/
template<typename Packet> inline typename ei_unpacket_traits<Packet>::type ei_predux_mul(const Packet& a)
template<typename Packet> inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a)
{ return a; }
/** \internal \returns the min of the elements of \a a*/
template<typename Packet> inline typename ei_unpacket_traits<Packet>::type ei_predux_min(const Packet& a)
template<typename Packet> inline typename unpacket_traits<Packet>::type predux_min(const Packet& a)
{ return a; }
/** \internal \returns the max of the elements of \a a*/
template<typename Packet> inline typename ei_unpacket_traits<Packet>::type ei_predux_max(const Packet& a)
template<typename Packet> inline typename unpacket_traits<Packet>::type predux_max(const Packet& a)
{ return a; }
/** \internal \returns the reversed elements of \a a*/
template<typename Packet> inline Packet ei_preverse(const Packet& a)
template<typename Packet> inline Packet preverse(const Packet& a)
{ return a; }
/** \internal \returns \a a with real and imaginary part flipped (for complex type only) */
template<typename Packet> inline Packet pcplxflip(const Packet& a)
{ return Packet(imag(a),real(a)); }
/**************************
* Special math functions
***************************/
/** \internal \returns the sin of \a a (coeff-wise) */
/** \internal \returns the sine of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet ei_psin(const Packet& a) { return ei_sin(a); }
Packet psin(const Packet& a) { return sin(a); }
/** \internal \returns the cos of \a a (coeff-wise) */
/** \internal \returns the cosine of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet ei_pcos(const Packet& a) { return ei_cos(a); }
Packet pcos(const Packet& a) { return cos(a); }
/** \internal \returns the tan of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet ptan(const Packet& a) { return tan(a); }
/** \internal \returns the arc sine of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet pasin(const Packet& a) { return asin(a); }
/** \internal \returns the arc cosine of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet pacos(const Packet& a) { return acos(a); }
/** \internal \returns the exp of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet ei_pexp(const Packet& a) { return ei_exp(a); }
Packet pexp(const Packet& a) { return exp(a); }
/** \internal \returns the log of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet ei_plog(const Packet& a) { return ei_log(a); }
Packet plog(const Packet& a) { return log(a); }
/** \internal \returns the square-root of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet ei_psqrt(const Packet& a) { return ei_sqrt(a); }
Packet psqrt(const Packet& a) { return sqrt(a); }
/***************************************************************************
* The following functions might not have to be overwritten for vectorized types
***************************************************************************/
/** \internal copy a packet with constant coeficient \a a (e.g., [a,a,a,a]) to \a *to. \a to must be 16 bytes aligned */
// NOTE: this function must really be templated on the packet type (think about different packet types for the same scalar type)
template<typename Packet>
inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename unpacket_traits<Packet>::type& a)
{
pstore(to, pset1<Packet>(a));
}
/** \internal \returns a * b + c (coeff-wise) */
template<typename Packet> inline Packet
ei_pmadd(const Packet& a,
pmadd(const Packet& a,
const Packet& b,
const Packet& c)
{ return ei_padd(ei_pmul(a, b),c); }
{ return padd(pmul(a, b),c); }
/** \internal \returns a packet version of \a *from.
* \If LoadMode equals Aligned, \a from must be 16 bytes aligned */
* If LoadMode equals #Aligned, \a from must be 16 bytes aligned */
template<typename Packet, int LoadMode>
inline Packet ei_ploadt(const typename ei_unpacket_traits<Packet>::type* from)
inline Packet ploadt(const typename unpacket_traits<Packet>::type* from)
{
if(LoadMode == Aligned)
return ei_pload<Packet>(from);
return pload<Packet>(from);
else
return ei_ploadu<Packet>(from);
return ploadu<Packet>(from);
}
/** \internal copy the packet \a from to \a *to.
* If StoreMode equals Aligned, \a to must be 16 bytes aligned */
* If StoreMode equals #Aligned, \a to must be 16 bytes aligned */
template<typename Scalar, typename Packet, int LoadMode>
inline void ei_pstoret(Scalar* to, const Packet& from)
inline void pstoret(Scalar* to, const Packet& from)
{
if(LoadMode == Aligned)
ei_pstore(to, from);
pstore(to, from);
else
ei_pstoreu(to, from);
pstoreu(to, from);
}
/** \internal default implementation of ei_palign() allowing partial specialization */
/** \internal default implementation of palign() allowing partial specialization */
template<int Offset,typename PacketType>
struct ei_palign_impl
struct palign_impl
{
// by default data are aligned, so there is nothing to be done :)
inline static void run(PacketType&, const PacketType&) {}
@@ -291,20 +318,22 @@ struct ei_palign_impl
/** \internal update \a first using the concatenation of the \a Offset last elements
* of \a first and packet_size minus \a Offset first elements of \a second */
template<int Offset,typename PacketType>
inline void ei_palign(PacketType& first, const PacketType& second)
inline void palign(PacketType& first, const PacketType& second)
{
ei_palign_impl<Offset,PacketType>::run(first,second);
palign_impl<Offset,PacketType>::run(first,second);
}
/***************************************************************************
* Fast complex products (GCC generates a function call which is very slow)
***************************************************************************/
template<> inline std::complex<float> ei_pmul(const std::complex<float>& a, const std::complex<float>& b)
{ return std::complex<float>(ei_real(a)*ei_real(b) - ei_imag(a)*ei_imag(b), ei_imag(a)*ei_real(b) + ei_real(a)*ei_imag(b)); }
template<> inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b)
{ return std::complex<float>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
template<> inline std::complex<double> ei_pmul(const std::complex<double>& a, const std::complex<double>& b)
{ return std::complex<double>(ei_real(a)*ei_real(b) - ei_imag(a)*ei_imag(b), ei_imag(a)*ei_real(b) + ei_real(a)*ei_imag(b)); }
template<> inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b)
{ return std::complex<double>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
} // end namespace internal
#endif // EIGEN_GENERIC_PACKET_MATH_H

View File

@@ -28,7 +28,7 @@
#define EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(NAME,FUNCTOR) \
template<typename Derived> \
inline const Eigen::CwiseUnaryOp<Eigen::FUNCTOR<typename Derived::Scalar>, Derived> \
inline const Eigen::CwiseUnaryOp<Eigen::internal::FUNCTOR<typename Derived::Scalar>, const Derived> \
NAME(const Eigen::ArrayBase<Derived>& x) { \
return x.derived(); \
}
@@ -38,7 +38,7 @@
template<typename Derived> \
struct NAME##_retval<ArrayBase<Derived> > \
{ \
typedef const Eigen::CwiseUnaryOp<Eigen::FUNCTOR<typename Derived::Scalar>, Derived> type; \
typedef const Eigen::CwiseUnaryOp<Eigen::internal::FUNCTOR<typename Derived::Scalar>, const Derived> type; \
}; \
template<typename Derived> \
struct NAME##_impl<ArrayBase<Derived> > \
@@ -52,17 +52,20 @@
namespace std
{
EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(real,ei_scalar_real_op)
EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(imag,ei_scalar_imag_op)
EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(sin,ei_scalar_sin_op)
EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(cos,ei_scalar_cos_op)
EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(exp,ei_scalar_exp_op)
EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(log,ei_scalar_log_op)
EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(abs,ei_scalar_abs_op)
EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(sqrt,ei_scalar_sqrt_op)
EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(real,scalar_real_op)
EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(imag,scalar_imag_op)
EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(sin,scalar_sin_op)
EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(cos,scalar_cos_op)
EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(asin,scalar_asin_op)
EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(acos,scalar_acos_op)
EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(tan,scalar_tan_op)
EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(exp,scalar_exp_op)
EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(log,scalar_log_op)
EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(abs,scalar_abs_op)
EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(sqrt,scalar_sqrt_op)
template<typename Derived>
inline const Eigen::CwiseUnaryOp<Eigen::ei_scalar_pow_op<typename Derived::Scalar>, Derived>
inline const Eigen::CwiseUnaryOp<Eigen::internal::scalar_pow_op<typename Derived::Scalar>, const Derived>
pow(const Eigen::ArrayBase<Derived>& x, const typename Derived::Scalar& exponent) { \
return x.derived().pow(exponent); \
}
@@ -70,17 +73,23 @@ namespace std
namespace Eigen
{
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(ei_real,ei_scalar_real_op)
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(ei_imag,ei_scalar_imag_op)
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(ei_sin,ei_scalar_sin_op)
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(ei_cos,ei_scalar_cos_op)
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(ei_exp,ei_scalar_exp_op)
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(ei_log,ei_scalar_log_op)
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(ei_abs,ei_scalar_abs_op)
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(ei_abs2,ei_scalar_abs2_op)
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(ei_sqrt,ei_scalar_sqrt_op)
namespace internal
{
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(real,scalar_real_op)
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(imag,scalar_imag_op)
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(sin,scalar_sin_op)
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(cos,scalar_cos_op)
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(asin,scalar_asin_op)
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(acos,scalar_acos_op)
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(tan,scalar_tan_op)
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(exp,scalar_exp_op)
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(log,scalar_log_op)
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(abs,scalar_abs_op)
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(abs2,scalar_abs2_op)
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(sqrt,scalar_sqrt_op)
}
}
// TODO: cleanly disable those functions that are not supported on Array (ei_real_ref, ei_random, ei_isApprox...)
// TODO: cleanly disable those functions that are not supported on Array (internal::real_ref, internal::random, internal::isApprox...)
#endif // EIGEN_GLOBAL_FUNCTIONS_H

View File

@@ -30,6 +30,11 @@ enum { DontAlignCols = 1 };
enum { StreamPrecision = -1,
FullPrecision = -2 };
namespace internal {
template<typename Derived>
std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat& fmt);
}
/** \class IOFormat
* \ingroup Core_Module
*
@@ -106,7 +111,7 @@ class WithFormat
friend std::ostream & operator << (std::ostream & s, const WithFormat& wf)
{
return ei_print_matrix(s, wf.m_matrix.eval(), wf.m_format);
return internal::print_matrix(s, wf.m_matrix.eval(), wf.m_format);
}
protected:
@@ -128,18 +133,21 @@ DenseBase<Derived>::format(const IOFormat& fmt) const
return WithFormat<Derived>(derived(), fmt);
}
namespace internal {
template<typename Scalar, bool IsInteger>
struct ei_significant_decimals_default_impl
struct significant_decimals_default_impl
{
typedef typename NumTraits<Scalar>::Real RealScalar;
static inline int run()
{
return ei_cast<RealScalar,int>(std::ceil(-ei_log(NumTraits<RealScalar>::epsilon())/ei_log(RealScalar(10))));
using std::ceil;
return cast<RealScalar,int>(ceil(-log(NumTraits<RealScalar>::epsilon())/log(RealScalar(10))));
}
};
template<typename Scalar>
struct ei_significant_decimals_default_impl<Scalar, true>
struct significant_decimals_default_impl<Scalar, true>
{
static inline int run()
{
@@ -148,14 +156,14 @@ struct ei_significant_decimals_default_impl<Scalar, true>
};
template<typename Scalar>
struct ei_significant_decimals_impl
: ei_significant_decimals_default_impl<Scalar, NumTraits<Scalar>::IsInteger>
struct significant_decimals_impl
: significant_decimals_default_impl<Scalar, NumTraits<Scalar>::IsInteger>
{};
/** \internal
* print the matrix \a _m to the output stream \a s using the output format \a fmt */
template<typename Derived>
std::ostream & ei_print_matrix(std::ostream & s, const Derived& _m, const IOFormat& fmt)
std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat& fmt)
{
if(_m.size() == 0)
{
@@ -182,7 +190,7 @@ std::ostream & ei_print_matrix(std::ostream & s, const Derived& _m, const IOForm
}
else
{
explicit_precision = ei_significant_decimals_impl<Scalar>::run();
explicit_precision = significant_decimals_impl<Scalar>::run();
}
}
else
@@ -228,6 +236,8 @@ std::ostream & ei_print_matrix(std::ostream & s, const Derived& _m, const IOForm
return s;
}
} // end namespace internal
/** \relates DenseBase
*
* Outputs the matrix, to the given stream.
@@ -244,7 +254,7 @@ std::ostream & operator <<
(std::ostream & s,
const DenseBase<Derived> & m)
{
return ei_print_matrix(s, m.eval(), EIGEN_DEFAULT_IO_FORMAT);
return internal::print_matrix(s, m.eval(), EIGEN_DEFAULT_IO_FORMAT);
}
#endif // EIGEN_IO_H

View File

@@ -31,10 +31,10 @@
*
* \brief A matrix or vector expression mapping an existing array of data.
*
* \param PlainObjectType the equivalent matrix type of the mapped data
* \param MapOptions specifies whether the pointer is \c Aligned, or \c Unaligned.
* The default is \c Unaligned.
* \param StrideType optionnally specifies strides. By default, Map assumes the memory layout
* \tparam PlainObjectType the equivalent matrix type of the mapped data
* \tparam MapOptions specifies whether the pointer is \c #Aligned, or \c #Unaligned.
* The default is \c #Unaligned.
* \tparam StrideType optionally specifies strides. By default, Map assumes the memory layout
* of an ordinary, contiguous array. This can be overridden by specifying strides.
* The type passed here must be a specialization of the Stride template, see examples below.
*
@@ -44,7 +44,7 @@
* data is laid out contiguously in memory. You can however override this by explicitly specifying
* inner and outer strides.
*
* Here's an example of simply mapping a contiguous array as a column-major matrix:
* Here's an example of simply mapping a contiguous array as a \ref TopicStorageOrders "column-major" matrix:
* \include Map_simple.cpp
* Output: \verbinclude Map_simple.out
*
@@ -72,14 +72,17 @@
* Example: \include Map_placement_new.cpp
* Output: \verbinclude Map_placement_new.out
*
* This class is the return type of Matrix::Map() but can also be used directly.
* This class is the return type of PlainObjectBase::Map() but can also be used directly.
*
* \sa Matrix::Map()
* \sa PlainObjectBase::Map(), \ref TopicStorageOrders
*/
namespace internal {
template<typename PlainObjectType, int MapOptions, typename StrideType>
struct ei_traits<Map<PlainObjectType, MapOptions, StrideType> >
: public ei_traits<PlainObjectType>
struct traits<Map<PlainObjectType, MapOptions, StrideType> >
: public traits<PlainObjectType>
{
typedef traits<PlainObjectType> TraitsBase;
typedef typename PlainObjectType::Index Index;
typedef typename PlainObjectType::Scalar Scalar;
enum {
@@ -92,21 +95,24 @@ struct ei_traits<Map<PlainObjectType, MapOptions, StrideType> >
HasNoInnerStride = InnerStrideAtCompileTime == 1,
HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0,
HasNoStride = HasNoInnerStride && HasNoOuterStride,
IsAligned = int(int(MapOptions)&Aligned)==Aligned,
IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned),
IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic,
KeepsPacketAccess = bool(HasNoInnerStride)
&& ( bool(IsDynamicSize)
|| HasNoOuterStride
|| ( OuterStrideAtCompileTime!=Dynamic
&& ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime)%16)==0 ) ),
Flags0 = ei_traits<PlainObjectType>::Flags,
Flags0 = TraitsBase::Flags & (~NestByRefBit),
Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit),
Flags2 = HasNoStride ? int(Flags1) : int(Flags1 & ~LinearAccessBit),
Flags = KeepsPacketAccess ? int(Flags2) : (int(Flags2) & ~PacketAccessBit)
Flags2 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime))
? int(Flags1) : int(Flags1 & ~LinearAccessBit),
Flags3 = is_lvalue<PlainObjectType>::value ? int(Flags2) : (int(Flags2) & ~LvalueBit),
Flags = KeepsPacketAccess ? int(Flags3) : (int(Flags3) & ~PacketAccessBit)
};
private:
enum { Options }; // Expressions don't support Options
enum { Options }; // Expressions don't have Options
};
}
template<typename PlainObjectType, int MapOptions, typename StrideType> class Map
: public MapBase<Map<PlainObjectType, MapOptions, StrideType> >
@@ -114,9 +120,17 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
public:
typedef MapBase<Map> Base;
EIGEN_DENSE_PUBLIC_INTERFACE(Map)
typedef typename Base::PointerType PointerType;
#if EIGEN2_SUPPORT_STAGE <= STAGE30_FULL_EIGEN3_API
typedef const Scalar* PointerArgType;
inline PointerType cast_to_pointer_type(PointerArgType ptr) { return const_cast<PointerType>(ptr); }
#else
typedef PointerType PointerArgType;
inline PointerType cast_to_pointer_type(PointerArgType ptr) { return ptr; }
#endif
inline Index innerStride() const
{
return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1;
@@ -135,8 +149,8 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
* \param data pointer to the array to map
* \param stride optional Stride object, passing the strides.
*/
inline Map(const Scalar* data, const StrideType& stride = StrideType())
: Base(data), m_stride(stride)
inline Map(PointerArgType data, const StrideType& stride = StrideType())
: Base(cast_to_pointer_type(data)), m_stride(stride)
{
PlainObjectType::Base::_check_template_params();
}
@@ -147,8 +161,8 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
* \param size the size of the vector expression
* \param stride optional Stride object, passing the strides.
*/
inline Map(const Scalar* data, Index size, const StrideType& stride = StrideType())
: Base(data, size), m_stride(stride)
inline Map(PointerArgType data, Index size, const StrideType& stride = StrideType())
: Base(cast_to_pointer_type(data), size), m_stride(stride)
{
PlainObjectType::Base::_check_template_params();
}
@@ -160,24 +174,30 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
* \param cols the number of columns of the matrix expression
* \param stride optional Stride object, passing the strides.
*/
inline Map(const Scalar* data, Index rows, Index cols, const StrideType& stride = StrideType())
: Base(data, rows, cols), m_stride(stride)
inline Map(PointerArgType data, Index rows, Index cols, const StrideType& stride = StrideType())
: Base(cast_to_pointer_type(data), rows, cols), m_stride(stride)
{
PlainObjectType::Base::_check_template_params();
}
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Map)
protected:
StrideType m_stride;
};
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
inline Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>
::Array(const Scalar *data)
{
this->_set_noalias(Eigen::Map<const Array>(data));
}
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
inline Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>
::Matrix(const Scalar *data)
{
_set_noalias(Eigen::Map<Matrix>(data));
this->_set_noalias(Eigen::Map<const Matrix>(data));
}
#endif // EIGEN_MAP_H

View File

@@ -26,6 +26,11 @@
#ifndef EIGEN_MAPBASE_H
#define EIGEN_MAPBASE_H
#define EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) \
EIGEN_STATIC_ASSERT((int(internal::traits<Derived>::Flags) & LinearAccessBit) || Derived::IsVectorAtCompileTime, \
YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT)
/** \class MapBase
* \ingroup Core_Module
*
@@ -33,24 +38,28 @@
*
* \sa class Map, class Block
*/
template<typename Derived> class MapBase
: public ei_dense_xpr_base<Derived>::type
template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
: public internal::dense_xpr_base<Derived>::type
{
public:
typedef typename ei_dense_xpr_base<Derived>::type Base;
typedef typename internal::dense_xpr_base<Derived>::type Base;
enum {
RowsAtCompileTime = ei_traits<Derived>::RowsAtCompileTime,
ColsAtCompileTime = ei_traits<Derived>::ColsAtCompileTime,
RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,
ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
SizeAtCompileTime = Base::SizeAtCompileTime
};
typedef typename ei_traits<Derived>::StorageKind StorageKind;
typedef typename ei_traits<Derived>::Index Index;
typedef typename ei_traits<Derived>::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
typedef typename internal::traits<Derived>::StorageKind StorageKind;
typedef typename internal::traits<Derived>::Index Index;
typedef typename internal::traits<Derived>::Scalar Scalar;
typedef typename internal::packet_traits<Scalar>::type PacketScalar;
typedef typename NumTraits<Scalar>::Real RealScalar;
typedef typename internal::conditional<
bool(internal::is_lvalue<Derived>::value),
Scalar *,
const Scalar *>::type
PointerType;
using Base::derived;
// using Base::RowsAtCompileTime;
@@ -63,10 +72,6 @@ template<typename Derived> class MapBase
using Base::Flags;
using Base::IsRowMajor;
using Base::CoeffReadCost;
// using Base::derived;
using Base::const_cast_derived;
using Base::rows;
using Base::cols;
using Base::size;
@@ -74,17 +79,14 @@ template<typename Derived> class MapBase
using Base::coeffRef;
using Base::lazyAssign;
using Base::eval;
// using Base::operator=;
using Base::operator+=;
using Base::operator-=;
using Base::operator*=;
using Base::operator/=;
using Base::innerStride;
using Base::outerStride;
using Base::rowStride;
using Base::colStride;
// bug 217 - compile error on ICC 11.1
using Base::operator=;
typedef typename Base::CoeffReturnType CoeffReturnType;
@@ -104,98 +106,150 @@ template<typename Derived> class MapBase
return m_data[col * colStride() + row * rowStride()];
}
inline Scalar& coeffRef(Index row, Index col)
{
return const_cast<Scalar*>(m_data)[col * colStride() + row * rowStride()];
}
inline const Scalar& coeff(Index index) const
{
ei_assert(Derived::IsVectorAtCompileTime || (ei_traits<Derived>::Flags & LinearAccessBit));
EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
return m_data[index * innerStride()];
}
inline Scalar& coeffRef(Index index)
inline const Scalar& coeffRef(Index row, Index col) const
{
ei_assert(Derived::IsVectorAtCompileTime || (ei_traits<Derived>::Flags & LinearAccessBit));
return const_cast<Scalar*>(m_data)[index * innerStride()];
return this->m_data[col * colStride() + row * rowStride()];
}
inline const Scalar& coeffRef(Index index) const
{
EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
return this->m_data[index * innerStride()];
}
template<int LoadMode>
inline PacketScalar packet(Index row, Index col) const
{
return ei_ploadt<PacketScalar, LoadMode>
return internal::ploadt<PacketScalar, LoadMode>
(m_data + (col * colStride() + row * rowStride()));
}
template<int LoadMode>
inline PacketScalar packet(Index index) const
{
return ei_ploadt<PacketScalar, LoadMode>(m_data + index * innerStride());
EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
return internal::ploadt<PacketScalar, LoadMode>(m_data + index * innerStride());
}
template<int StoreMode>
inline void writePacket(Index row, Index col, const PacketScalar& x)
{
ei_pstoret<Scalar, PacketScalar, StoreMode>
(const_cast<Scalar*>(m_data) + (col * colStride() + row * rowStride()), x);
}
template<int StoreMode>
inline void writePacket(Index index, const PacketScalar& x)
{
ei_pstoret<Scalar, PacketScalar, StoreMode>
(const_cast<Scalar*>(m_data) + index * innerStride(), x);
}
inline MapBase(const Scalar* data) : m_data(data), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime)
inline MapBase(PointerType data) : m_data(data), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime)
{
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
checkSanity();
}
inline MapBase(const Scalar* data, Index size)
inline MapBase(PointerType data, Index size)
: m_data(data),
m_rows(RowsAtCompileTime == Dynamic ? size : Index(RowsAtCompileTime)),
m_cols(ColsAtCompileTime == Dynamic ? size : Index(ColsAtCompileTime))
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
ei_assert(size >= 0);
ei_assert(data == 0 || SizeAtCompileTime == Dynamic || SizeAtCompileTime == size);
eigen_assert(size >= 0);
eigen_assert(data == 0 || SizeAtCompileTime == Dynamic || SizeAtCompileTime == size);
checkSanity();
}
inline MapBase(const Scalar* data, Index rows, Index cols)
inline MapBase(PointerType data, Index rows, Index cols)
: m_data(data), m_rows(rows), m_cols(cols)
{
ei_assert( (data == 0)
eigen_assert( (data == 0)
|| ( rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
&& cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols)));
checkSanity();
}
Derived& operator=(const MapBase& other)
{
Base::operator=(other);
return derived();
}
using Base::operator=;
protected:
void checkSanity() const
{
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(ei_traits<Derived>::Flags&PacketAccessBit,
ei_inner_stride_at_compile_time<Derived>::ret==1),
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(internal::traits<Derived>::Flags&PacketAccessBit,
internal::inner_stride_at_compile_time<Derived>::ret==1),
PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1);
ei_assert(EIGEN_IMPLIES(ei_traits<Derived>::Flags&AlignedBit, (size_t(m_data) % (sizeof(Scalar)*ei_packet_traits<Scalar>::size)) == 0)
&& "data is not aligned");
eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::Flags&AlignedBit, (size_t(m_data) % 16) == 0)
&& "data is not aligned");
}
const Scalar* EIGEN_RESTRICT m_data;
const ei_variable_if_dynamic<Index, RowsAtCompileTime> m_rows;
const ei_variable_if_dynamic<Index, ColsAtCompileTime> m_cols;
PointerType m_data;
const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_rows;
const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_cols;
};
template<typename Derived> class MapBase<Derived, WriteAccessors>
: public MapBase<Derived, ReadOnlyAccessors>
{
public:
typedef MapBase<Derived, ReadOnlyAccessors> Base;
typedef typename Base::Scalar Scalar;
typedef typename Base::PacketScalar PacketScalar;
typedef typename Base::Index Index;
typedef typename Base::PointerType PointerType;
using Base::derived;
using Base::rows;
using Base::cols;
using Base::size;
using Base::coeff;
using Base::coeffRef;
using Base::innerStride;
using Base::outerStride;
using Base::rowStride;
using Base::colStride;
typedef typename internal::conditional<
internal::is_lvalue<Derived>::value,
Scalar,
const Scalar
>::type ScalarWithConstIfNotLvalue;
inline const Scalar* data() const { return this->m_data; }
inline ScalarWithConstIfNotLvalue* data() { return this->m_data; } // no const-cast here so non-const-correct code will give a compile error
inline ScalarWithConstIfNotLvalue& coeffRef(Index row, Index col)
{
return this->m_data[col * colStride() + row * rowStride()];
}
inline ScalarWithConstIfNotLvalue& coeffRef(Index index)
{
EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
return this->m_data[index * innerStride()];
}
template<int StoreMode>
inline void writePacket(Index row, Index col, const PacketScalar& x)
{
internal::pstoret<Scalar, PacketScalar, StoreMode>
(this->m_data + (col * colStride() + row * rowStride()), x);
}
template<int StoreMode>
inline void writePacket(Index index, const PacketScalar& x)
{
EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
internal::pstoret<Scalar, PacketScalar, StoreMode>
(this->m_data + index * innerStride(), x);
}
explicit inline MapBase(PointerType data) : Base(data) {}
inline MapBase(PointerType data, Index size) : Base(data, size) {}
inline MapBase(PointerType data, Index rows, Index cols) : Base(data, rows, cols) {}
Derived& operator=(const MapBase& other)
{
Base::Base::operator=(other);
return derived();
}
using Base::Base::operator=;
};
#endif // EIGEN_MAPBASE_H

File diff suppressed because it is too large Load Diff

View File

@@ -43,9 +43,9 @@
* \tparam _Cols Number of columns, or \b Dynamic
*
* The remaining template parameters are optional -- in most cases you don't have to worry about them.
* \tparam _Options \anchor matrix_tparam_options A combination of either \b RowMajor or \b ColMajor, and of either
* \b AutoAlign or \b DontAlign.
* The former controls storage order, and defaults to column-major. The latter controls alignment, which is required
* \tparam _Options \anchor matrix_tparam_options A combination of either \b #RowMajor or \b #ColMajor, and of either
* \b #AutoAlign or \b #DontAlign.
* The former controls \ref TopicStorageOrders "storage order", and defaults to column-major. The latter controls alignment, which is required
* for vectorization. It defaults to aligning matrices except for fixed sizes that aren't a multiple of the packet size.
* \tparam _MaxRows Maximum number of rows. Defaults to \a _Rows (\ref maxrows "note").
* \tparam _MaxCols Maximum number of columns. Defaults to \a _Cols (\ref maxrows "note").
@@ -79,6 +79,9 @@
* m(0, 3) = 3;
* \endcode
*
* This class can be extended with the help of the plugin mechanism described on the page
* \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_MATRIX_PLUGIN.
*
* <i><b>Some notes:</b></i>
*
* <dl>
@@ -107,10 +110,13 @@
* are the dimensions of the original matrix, while _Rows and _Cols are Dynamic.</dd>
* </dl>
*
* \see MatrixBase for the majority of the API methods for matrices, \ref TopicClassHierarchy
* \see MatrixBase for the majority of the API methods for matrices, \ref TopicClassHierarchy,
* \ref TopicStorageOrders
*/
namespace internal {
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
struct ei_traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
struct traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
{
typedef _Scalar Scalar;
typedef Dense StorageKind;
@@ -121,24 +127,25 @@ struct ei_traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
ColsAtCompileTime = _Cols,
MaxRowsAtCompileTime = _MaxRows,
MaxColsAtCompileTime = _MaxCols,
Flags = ei_compute_matrix_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret,
Flags = compute_matrix_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret,
CoeffReadCost = NumTraits<Scalar>::ReadCost,
Options = _Options,
InnerStrideAtCompileTime = 1,
OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime
};
};
}
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
class Matrix
: public DenseStorageBase<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
: public PlainObjectBase<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
{
public:
/** \brief Base class typedef.
* \sa DenseStorageBase
* \sa PlainObjectBase
*/
typedef DenseStorageBase<Matrix> Base;
typedef PlainObjectBase<Matrix> Base;
enum { Options = _Options };
@@ -146,10 +153,6 @@ class Matrix
typedef typename Base::PlainObject PlainObject;
enum { NeedsToAlign = (!(Options&DontAlign))
&& SizeAtCompileTime!=Dynamic && ((static_cast<int>(sizeof(Scalar))*SizeAtCompileTime)%16)==0 };
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
using Base::base;
using Base::coeffRef;
@@ -217,8 +220,8 @@ class Matrix
}
// FIXME is it still needed
Matrix(ei_constructor_without_unaligned_array_assert)
: Base(ei_constructor_without_unaligned_array_assert())
Matrix(internal::constructor_without_unaligned_array_assert)
: Base(internal::constructor_without_unaligned_array_assert())
{ Base::_check_template_params(); EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED }
/** \brief Constructs a vector or row-vector with given dimension. \only_for_vectors
@@ -232,8 +235,8 @@ class Matrix
{
Base::_check_template_params();
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Matrix)
ei_assert(dim > 0);
ei_assert(SizeAtCompileTime == Dynamic || SizeAtCompileTime == dim);
eigen_assert(dim >= 0);
eigen_assert(SizeAtCompileTime == Dynamic || SizeAtCompileTime == dim);
EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
}
@@ -282,6 +285,11 @@ class Matrix
EIGEN_STRONG_INLINE Matrix(const MatrixBase<OtherDerived>& other)
: Base(other.rows() * other.cols(), other.rows(), other.cols())
{
// This test resides here, to bring the error messages closer to the user. Normally, these checks
// are performed deeply within the library, thus causing long and scary error traces.
EIGEN_STATIC_ASSERT((internal::is_same<Scalar, typename OtherDerived::Scalar>::value),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
Base::_check_template_params();
Base::_set_noalias(other);
}
@@ -320,7 +328,7 @@ class Matrix
* of same type it is enough to swap the data pointers.
*/
template<typename OtherDerived>
void swap(MatrixBase<OtherDerived> EIGEN_REF_TO_TEMPORARY other)
void swap(MatrixBase<OtherDerived> const & other)
{ this->_swap(other.derived()); }
inline Index innerStride() const { return 1; }
@@ -333,6 +341,13 @@ class Matrix
template<typename OtherDerived>
Matrix& operator=(const RotationBase<OtherDerived,ColsAtCompileTime>& r);
#ifdef EIGEN2_SUPPORT
template<typename OtherDerived>
explicit Matrix(const eigen2_RotationBase<OtherDerived,ColsAtCompileTime>& r);
template<typename OtherDerived>
Matrix& operator=(const eigen2_RotationBase<OtherDerived,ColsAtCompileTime>& r);
#endif
// allow to extend Matrix outside Eigen
#ifdef EIGEN_MATRIX_PLUGIN
#include EIGEN_MATRIX_PLUGIN
@@ -340,7 +355,7 @@ class Matrix
protected:
template <typename Derived, typename OtherDerived, bool IsVector>
friend struct ei_conservative_resize_like_impl;
friend struct internal::conservative_resize_like_impl;
using Base::m_storage;
};

View File

@@ -38,7 +38,7 @@
* Note that some methods are defined in other modules such as the \ref LU_Module LU module
* for all functions related to matrix inversions.
*
* \param Derived is the derived type, e.g. a matrix type, or an expression, etc.
* \tparam Derived is the derived type, e.g. a matrix type, or an expression, etc.
*
* When writing a function taking Eigen objects as argument, if you want your function
* to take as argument any matrix, vector, or expression, just let it take a
@@ -53,6 +53,9 @@
}
* \endcode
*
* This class can be extended with the help of the plugin mechanism described on the page
* \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_MATRIXBASE_PLUGIN.
*
* \sa \ref TopicClassHierarchy
*/
template<typename Derived> class MatrixBase
@@ -61,10 +64,10 @@ template<typename Derived> class MatrixBase
public:
#ifndef EIGEN_PARSED_BY_DOXYGEN
typedef MatrixBase StorageBaseType;
typedef typename ei_traits<Derived>::StorageKind StorageKind;
typedef typename ei_traits<Derived>::Index Index;
typedef typename ei_traits<Derived>::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
typedef typename internal::traits<Derived>::StorageKind StorageKind;
typedef typename internal::traits<Derived>::Index Index;
typedef typename internal::traits<Derived>::Scalar Scalar;
typedef typename internal::packet_traits<Scalar>::type PacketScalar;
typedef typename NumTraits<Scalar>::Real RealScalar;
typedef DenseBase<Derived> Base;
@@ -93,6 +96,7 @@ template<typename Derived> class MatrixBase
using Base::operator/=;
typedef typename Base::CoeffReturnType CoeffReturnType;
typedef typename Base::ConstTransposeReturnType ConstTransposeReturnType;
typedef typename Base::RowXpr RowXpr;
typedef typename Base::ColXpr ColXpr;
#endif // not EIGEN_PARSED_BY_DOXYGEN
@@ -107,7 +111,7 @@ template<typename Derived> class MatrixBase
/** \returns the size of the main diagonal, which is min(rows(),cols()).
* \sa rows(), cols(), SizeAtCompileTime. */
inline Index diagonalSize() const { return std::min(rows(),cols()); }
inline Index diagonalSize() const { return (std::min)(rows(),cols()); }
/** \brief The plain matrix type corresponding to this expression.
*
@@ -115,30 +119,30 @@ template<typename Derived> class MatrixBase
* the return type of eval() is a const reference to a matrix, not a matrix! It is however guaranteed
* that the return type of eval() is either PlainObject or const PlainObject&.
*/
typedef Matrix<typename ei_traits<Derived>::Scalar,
ei_traits<Derived>::RowsAtCompileTime,
ei_traits<Derived>::ColsAtCompileTime,
AutoAlign | (ei_traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
ei_traits<Derived>::MaxRowsAtCompileTime,
ei_traits<Derived>::MaxColsAtCompileTime
typedef Matrix<typename internal::traits<Derived>::Scalar,
internal::traits<Derived>::RowsAtCompileTime,
internal::traits<Derived>::ColsAtCompileTime,
AutoAlign | (internal::traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
internal::traits<Derived>::MaxRowsAtCompileTime,
internal::traits<Derived>::MaxColsAtCompileTime
> PlainObject;
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** \internal Represents a matrix with all coefficients equal to one another*/
typedef CwiseNullaryOp<ei_scalar_constant_op<Scalar>,Derived> ConstantReturnType;
typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,Derived> ConstantReturnType;
/** \internal the return type of MatrixBase::adjoint() */
typedef typename ei_meta_if<NumTraits<Scalar>::IsComplex,
CwiseUnaryOp<ei_scalar_conjugate_op<Scalar>, Eigen::Transpose<Derived> >,
Transpose<Derived>
>::ret AdjointReturnType;
typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, ConstTransposeReturnType>,
ConstTransposeReturnType
>::type AdjointReturnType;
/** \internal Return type of eigenvalues() */
typedef Matrix<std::complex<RealScalar>, ei_traits<Derived>::ColsAtCompileTime, 1, ColMajor> EigenvaluesReturnType;
typedef Matrix<std::complex<RealScalar>, internal::traits<Derived>::ColsAtCompileTime, 1, ColMajor> EigenvaluesReturnType;
/** \internal the return type of identity */
typedef CwiseNullaryOp<ei_scalar_identity_op<Scalar>,Derived> IdentityReturnType;
typedef CwiseNullaryOp<internal::scalar_identity_op<Scalar>,Derived> IdentityReturnType;
/** \internal the return type of unit vectors */
typedef Block<CwiseNullaryOp<ei_scalar_identity_op<Scalar>, SquareMatrixType>,
ei_traits<Derived>::RowsAtCompileTime,
ei_traits<Derived>::ColsAtCompileTime> BasisReturnType;
typedef Block<const CwiseNullaryOp<internal::scalar_identity_op<Scalar>, SquareMatrixType>,
internal::traits<Derived>::RowsAtCompileTime,
internal::traits<Derived>::ColsAtCompileTime> BasisReturnType;
#endif // not EIGEN_PARSED_BY_DOXYGEN
#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::MatrixBase
@@ -200,7 +204,14 @@ template<typename Derived> class MatrixBase
operator*(const DiagonalBase<DiagonalDerived> &diagonal) const;
template<typename OtherDerived>
Scalar dot(const MatrixBase<OtherDerived>& other) const;
typename internal::scalar_product_traits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
dot(const MatrixBase<OtherDerived>& other) const;
#ifdef EIGEN2_SUPPORT
template<typename OtherDerived>
Scalar eigen2_dot(const MatrixBase<OtherDerived>& other) const;
#endif
RealScalar squaredNorm() const;
RealScalar norm() const;
RealScalar stableNorm() const;
@@ -212,23 +223,50 @@ template<typename Derived> class MatrixBase
const AdjointReturnType adjoint() const;
void adjointInPlace();
Diagonal<Derived,0> diagonal();
const Diagonal<Derived,0> diagonal() const;
typedef Diagonal<Derived> DiagonalReturnType;
DiagonalReturnType diagonal();
typedef const Diagonal<const Derived> ConstDiagonalReturnType;
const ConstDiagonalReturnType diagonal() const;
template<int Index> Diagonal<Derived,Index> diagonal();
template<int Index> const Diagonal<Derived,Index> diagonal() const;
template<int Index> struct DiagonalIndexReturnType { typedef Diagonal<Derived,Index> Type; };
template<int Index> struct ConstDiagonalIndexReturnType { typedef const Diagonal<const Derived,Index> Type; };
Diagonal<Derived, Dynamic> diagonal(Index index);
const Diagonal<Derived, Dynamic> diagonal(Index index) const;
template<int Index> typename DiagonalIndexReturnType<Index>::Type diagonal();
template<int Index> typename ConstDiagonalIndexReturnType<Index>::Type diagonal() const;
template<unsigned int Mode> TriangularView<Derived, Mode> part();
template<unsigned int Mode> const TriangularView<Derived, Mode> part() const;
// Note: The "MatrixBase::" prefixes are added to help MSVC9 to match these declarations with the later implementations.
// On the other hand they confuse MSVC8...
#if (defined _MSC_VER) && (_MSC_VER >= 1500) // 2008 or later
typename MatrixBase::template DiagonalIndexReturnType<Dynamic>::Type diagonal(Index index);
typename MatrixBase::template ConstDiagonalIndexReturnType<Dynamic>::Type diagonal(Index index) const;
#else
typename DiagonalIndexReturnType<Dynamic>::Type diagonal(Index index);
typename ConstDiagonalIndexReturnType<Dynamic>::Type diagonal(Index index) const;
#endif
template<unsigned int Mode> TriangularView<Derived, Mode> triangularView();
template<unsigned int Mode> const TriangularView<Derived, Mode> triangularView() const;
#ifdef EIGEN2_SUPPORT
template<unsigned int Mode> typename internal::eigen2_part_return_type<Derived, Mode>::type part();
template<unsigned int Mode> const typename internal::eigen2_part_return_type<Derived, Mode>::type part() const;
// huuuge hack. make Eigen2's matrix.part<Diagonal>() work in eigen3. Problem: Diagonal is now a class template instead
// of an integer constant. Solution: overload the part() method template wrt template parameters list.
// Note: replacing next line by "template<template<typename T, int n> class U>" produces a mysterious error C2082 in MSVC.
template<template<typename, int> class U>
const DiagonalWrapper<ConstDiagonalReturnType> part() const
{ return diagonal().asDiagonal(); }
#endif // EIGEN2_SUPPORT
template<unsigned int UpLo> SelfAdjointView<Derived, UpLo> selfadjointView();
template<unsigned int UpLo> const SelfAdjointView<Derived, UpLo> selfadjointView() const;
template<unsigned int Mode> struct TriangularViewReturnType { typedef TriangularView<Derived, Mode> Type; };
template<unsigned int Mode> struct ConstTriangularViewReturnType { typedef const TriangularView<const Derived, Mode> Type; };
template<unsigned int Mode> typename TriangularViewReturnType<Mode>::Type triangularView();
template<unsigned int Mode> typename ConstTriangularViewReturnType<Mode>::Type triangularView() const;
template<unsigned int UpLo> struct SelfAdjointViewReturnType { typedef SelfAdjointView<Derived, UpLo> Type; };
template<unsigned int UpLo> struct ConstSelfAdjointViewReturnType { typedef const SelfAdjointView<const Derived, UpLo> Type; };
template<unsigned int UpLo> typename SelfAdjointViewReturnType<UpLo>::Type selfadjointView();
template<unsigned int UpLo> typename ConstSelfAdjointViewReturnType<UpLo>::Type selfadjointView() const;
const SparseView<Derived> sparseView(const Scalar& m_reference = Scalar(0),
typename NumTraits<Scalar>::Real m_epsilon = NumTraits<Scalar>::dummy_precision()) const;
@@ -241,7 +279,8 @@ template<typename Derived> class MatrixBase
static const BasisReturnType UnitZ();
static const BasisReturnType UnitW();
const DiagonalWrapper<Derived> asDiagonal() const;
const DiagonalWrapper<const Derived> asDiagonal() const;
const PermutationWrapper<const Derived> asPermutation() const;
Derived& setIdentity();
Derived& setIdentity(Index rows, Index cols);
@@ -277,8 +316,8 @@ template<typename Derived> class MatrixBase
inline const ForceAlignedAccess<Derived> forceAlignedAccess() const;
inline ForceAlignedAccess<Derived> forceAlignedAccess();
template<bool Enable> inline typename ei_makeconst<typename ei_meta_if<Enable,ForceAlignedAccess<Derived>,Derived&>::ret>::type forceAlignedAccessIf() const;
template<bool Enable> inline typename ei_meta_if<Enable,ForceAlignedAccess<Derived>,Derived&>::ret forceAlignedAccessIf();
template<bool Enable> inline typename internal::add_const_on_value_type<typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type>::type forceAlignedAccessIf() const;
template<bool Enable> inline typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf();
Scalar trace() const;
@@ -298,8 +337,27 @@ template<typename Derived> class MatrixBase
const FullPivLU<PlainObject> fullPivLu() const;
const PartialPivLU<PlainObject> partialPivLu() const;
#if EIGEN2_SUPPORT_STAGE < STAGE20_RESOLVE_API_CONFLICTS
const LU<PlainObject> lu() const;
#endif
#ifdef EIGEN2_SUPPORT
const LU<PlainObject> eigen2_lu() const;
#endif
#if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS
const PartialPivLU<PlainObject> lu() const;
const ei_inverse_impl<Derived> inverse() const;
#endif
#ifdef EIGEN2_SUPPORT
template<typename ResultType>
void computeInverse(MatrixBase<ResultType> *result) const {
*result = this->inverse();
}
#endif
const internal::inverse_impl<Derived> inverse() const;
template<typename ResultType>
void computeInverseAndDetWithCheck(
ResultType& inverse,
@@ -325,37 +383,57 @@ template<typename Derived> class MatrixBase
const HouseholderQR<PlainObject> householderQr() const;
const ColPivHouseholderQR<PlainObject> colPivHouseholderQr() const;
const FullPivHouseholderQR<PlainObject> fullPivHouseholderQr() const;
#ifdef EIGEN2_SUPPORT
const QR<PlainObject> qr() const;
#endif
EigenvaluesReturnType eigenvalues() const;
RealScalar operatorNorm() const;
/////////// SVD module ///////////
JacobiSVD<PlainObject> jacobiSvd(unsigned int computationOptions = 0) const;
#ifdef EIGEN2_SUPPORT
SVD<PlainObject> svd() const;
#endif
/////////// Geometry module ///////////
#ifndef EIGEN_PARSED_BY_DOXYGEN
/// \internal helper struct to form the return type of the cross product
template<typename OtherDerived> struct cross_product_return_type {
typedef typename internal::scalar_product_traits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType Scalar;
typedef Matrix<Scalar,MatrixBase::RowsAtCompileTime,MatrixBase::ColsAtCompileTime> type;
};
#endif // EIGEN_PARSED_BY_DOXYGEN
template<typename OtherDerived>
PlainObject cross(const MatrixBase<OtherDerived>& other) const;
typename cross_product_return_type<OtherDerived>::type
cross(const MatrixBase<OtherDerived>& other) const;
template<typename OtherDerived>
PlainObject cross3(const MatrixBase<OtherDerived>& other) const;
PlainObject unitOrthogonal(void) const;
Matrix<Scalar,3,1> eulerAngles(Index a0, Index a1, Index a2) const;
#if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS
ScalarMultipleReturnType operator*(const UniformScaling<Scalar>& s) const;
enum {
SizeMinusOne = SizeAtCompileTime==Dynamic ? Dynamic : SizeAtCompileTime-1
};
typedef Block<Derived,
ei_traits<Derived>::ColsAtCompileTime==1 ? SizeMinusOne : 1,
ei_traits<Derived>::ColsAtCompileTime==1 ? 1 : SizeMinusOne> StartMinusOne;
typedef CwiseUnaryOp<ei_scalar_quotient1_op<typename ei_traits<Derived>::Scalar>,
StartMinusOne > HNormalizedReturnType;
HNormalizedReturnType hnormalized() const;
// put this as separate enum value to work around possible GCC 4.3 bug (?)
enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1?Vertical:Horizontal };
typedef Homogeneous<Derived, HomogeneousReturnTypeDirection> HomogeneousReturnType;
HomogeneousReturnType homogeneous() const;
#endif
enum {
SizeMinusOne = SizeAtCompileTime==Dynamic ? Dynamic : SizeAtCompileTime-1
};
typedef Block<const Derived,
internal::traits<Derived>::ColsAtCompileTime==1 ? SizeMinusOne : 1,
internal::traits<Derived>::ColsAtCompileTime==1 ? 1 : SizeMinusOne> ConstStartMinusOne;
typedef CwiseUnaryOp<internal::scalar_quotient1_op<typename internal::traits<Derived>::Scalar>,
const ConstStartMinusOne > HNormalizedReturnType;
const HNormalizedReturnType hnormalized() const;
////////// Householder module ///////////
@@ -375,13 +453,13 @@ template<typename Derived> class MatrixBase
///////// Jacobi module /////////
template<typename OtherScalar>
void applyOnTheLeft(Index p, Index q, const PlanarRotation<OtherScalar>& j);
void applyOnTheLeft(Index p, Index q, const JacobiRotation<OtherScalar>& j);
template<typename OtherScalar>
void applyOnTheRight(Index p, Index q, const PlanarRotation<OtherScalar>& j);
void applyOnTheRight(Index p, Index q, const JacobiRotation<OtherScalar>& j);
///////// MatrixFunctions module /////////
typedef typename ei_stem_function<Scalar>::type StemFunction;
typedef typename internal::stem_function<Scalar>::type StemFunction;
const MatrixExponentialReturnValue<Derived> exp() const;
const MatrixFunctionReturnValue<Derived> matrixFunction(StemFunction f) const;
const MatrixFunctionReturnValue<Derived> cosh() const;
@@ -412,13 +490,13 @@ template<typename Derived> class MatrixBase
inline Cwise<Derived> cwise();
VectorBlock<Derived> start(Index size);
const VectorBlock<Derived> start(Index size) const;
const VectorBlock<const Derived> start(Index size) const;
VectorBlock<Derived> end(Index size);
const VectorBlock<Derived> end(Index size) const;
const VectorBlock<const Derived> end(Index size) const;
template<int Size> VectorBlock<Derived,Size> start();
template<int Size> const VectorBlock<Derived,Size> start() const;
template<int Size> const VectorBlock<const Derived,Size> start() const;
template<int Size> VectorBlock<Derived,Size> end();
template<int Size> const VectorBlock<Derived,Size> end() const;
template<int Size> const VectorBlock<const Derived,Size> end() const;
Minor<Derived> minor(Index row, Index col);
const Minor<Derived> minor(Index row, Index col) const;
@@ -433,10 +511,10 @@ template<typename Derived> class MatrixBase
template<typename OtherDerived> explicit MatrixBase(const MatrixBase<OtherDerived>&);
protected:
// mixing arrays and matrices is not legal
template<typename OtherDerived> Derived& operator+=(const ArrayBase<OtherDerived>& array)
template<typename OtherDerived> Derived& operator+=(const ArrayBase<OtherDerived>& )
{EIGEN_STATIC_ASSERT(sizeof(typename OtherDerived::Scalar)==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES);}
// mixing arrays and matrices is not legal
template<typename OtherDerived> Derived& operator-=(const ArrayBase<OtherDerived>& array)
template<typename OtherDerived> Derived& operator-=(const ArrayBase<OtherDerived>& )
{EIGEN_STATIC_ASSERT(sizeof(typename OtherDerived::Scalar)==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES);}
};

View File

@@ -1,300 +0,0 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2006-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
// Copyright (C) 2010 Hauke Heibel <hauke.heibel@gmail.com>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#ifndef EIGEN_MATRIXSTORAGE_H
#define EIGEN_MATRIXSTORAGE_H
#ifdef EIGEN_DEBUG_MATRIX_CTOR
#define EIGEN_INT_DEBUG_MATRIX_CTOR EIGEN_DEBUG_MATRIX_CTOR;
#else
#define EIGEN_INT_DEBUG_MATRIX_CTOR
#endif
struct ei_constructor_without_unaligned_array_assert {};
/** \internal
* Static array. If the MatrixOptions require auto-alignment, the array will be automatically aligned:
* to 16 bytes boundary if the total size is a multiple of 16 bytes.
*/
template <typename T, int Size, int MatrixOptions,
int Alignment = (MatrixOptions&DontAlign) ? 0
: (((Size*sizeof(T))%16)==0) ? 16
: 0 >
struct ei_matrix_array
{
T array[Size];
ei_matrix_array() {}
ei_matrix_array(ei_constructor_without_unaligned_array_assert) {}
};
#ifdef EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
#define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask)
#else
#define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \
ei_assert((reinterpret_cast<size_t>(array) & sizemask) == 0 \
&& "this assertion is explained here: " \
"http://eigen.tuxfamily.org/dox/UnalignedArrayAssert.html" \
" **** READ THIS WEB PAGE !!! ****");
#endif
template <typename T, int Size, int MatrixOptions>
struct ei_matrix_array<T, Size, MatrixOptions, 16>
{
EIGEN_ALIGN16 T array[Size];
ei_matrix_array() { EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(0xf) }
ei_matrix_array(ei_constructor_without_unaligned_array_assert) {}
};
template <typename T, int MatrixOptions, int Alignment>
struct ei_matrix_array<T, 0, MatrixOptions, Alignment>
{
EIGEN_ALIGN16 T array[1];
ei_matrix_array() {}
ei_matrix_array(ei_constructor_without_unaligned_array_assert) {}
};
/** \internal
*
* \class ei_matrix_storage
* \ingroup Core_Module
*
* \brief Stores the data of a matrix
*
* This class stores the data of fixed-size, dynamic-size or mixed matrices
* in a way as compact as possible.
*
* \sa Matrix
*/
template<typename T, int Size, int _Rows, int _Cols, int _Options> class ei_matrix_storage;
// purely fixed-size matrix
template<typename T, int Size, int _Rows, int _Cols, int _Options> class ei_matrix_storage
{
ei_matrix_array<T,Size,_Options> m_data;
public:
inline explicit ei_matrix_storage() {}
inline ei_matrix_storage(ei_constructor_without_unaligned_array_assert)
: m_data(ei_constructor_without_unaligned_array_assert()) {}
inline ei_matrix_storage(DenseIndex,DenseIndex,DenseIndex) {}
inline void swap(ei_matrix_storage& other) { std::swap(m_data,other.m_data); }
inline static DenseIndex rows(void) {return _Rows;}
inline static DenseIndex cols(void) {return _Cols;}
inline void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {}
inline void resize(DenseIndex,DenseIndex,DenseIndex) {}
inline const T *data() const { return m_data.array; }
inline T *data() { return m_data.array; }
};
// null matrix
template<typename T, int _Rows, int _Cols, int _Options> class ei_matrix_storage<T, 0, _Rows, _Cols, _Options>
{
public:
inline explicit ei_matrix_storage() {}
inline ei_matrix_storage(ei_constructor_without_unaligned_array_assert) {}
inline ei_matrix_storage(DenseIndex,DenseIndex,DenseIndex) {}
inline void swap(ei_matrix_storage& ) {}
inline static DenseIndex rows(void) {return _Rows;}
inline static DenseIndex cols(void) {return _Cols;}
inline void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {}
inline void resize(DenseIndex,DenseIndex,DenseIndex) {}
inline const T *data() const { return 0; }
inline T *data() { return 0; }
};
// dynamic-size matrix with fixed-size storage
template<typename T, int Size, int _Options> class ei_matrix_storage<T, Size, Dynamic, Dynamic, _Options>
{
ei_matrix_array<T,Size,_Options> m_data;
DenseIndex m_rows;
DenseIndex m_cols;
public:
inline explicit ei_matrix_storage() : m_rows(0), m_cols(0) {}
inline ei_matrix_storage(ei_constructor_without_unaligned_array_assert)
: m_data(ei_constructor_without_unaligned_array_assert()), m_rows(0), m_cols(0) {}
inline ei_matrix_storage(DenseIndex, DenseIndex rows, DenseIndex cols) : m_rows(rows), m_cols(cols) {}
inline void swap(ei_matrix_storage& other)
{ std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
inline DenseIndex rows(void) const {return m_rows;}
inline DenseIndex cols(void) const {return m_cols;}
inline void conservativeResize(DenseIndex, DenseIndex rows, DenseIndex cols) { m_rows = rows; m_cols = cols; }
inline void resize(DenseIndex, DenseIndex rows, DenseIndex cols) { m_rows = rows; m_cols = cols; }
inline const T *data() const { return m_data.array; }
inline T *data() { return m_data.array; }
};
// dynamic-size matrix with fixed-size storage and fixed width
template<typename T, int Size, int _Cols, int _Options> class ei_matrix_storage<T, Size, Dynamic, _Cols, _Options>
{
ei_matrix_array<T,Size,_Options> m_data;
DenseIndex m_rows;
public:
inline explicit ei_matrix_storage() : m_rows(0) {}
inline ei_matrix_storage(ei_constructor_without_unaligned_array_assert)
: m_data(ei_constructor_without_unaligned_array_assert()), m_rows(0) {}
inline ei_matrix_storage(DenseIndex, DenseIndex rows, DenseIndex) : m_rows(rows) {}
inline void swap(ei_matrix_storage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
inline DenseIndex rows(void) const {return m_rows;}
inline DenseIndex cols(void) const {return _Cols;}
inline void conservativeResize(DenseIndex, DenseIndex rows, DenseIndex) { m_rows = rows; }
inline void resize(DenseIndex, DenseIndex rows, DenseIndex) { m_rows = rows; }
inline const T *data() const { return m_data.array; }
inline T *data() { return m_data.array; }
};
// dynamic-size matrix with fixed-size storage and fixed height
template<typename T, int Size, int _Rows, int _Options> class ei_matrix_storage<T, Size, _Rows, Dynamic, _Options>
{
ei_matrix_array<T,Size,_Options> m_data;
DenseIndex m_cols;
public:
inline explicit ei_matrix_storage() : m_cols(0) {}
inline ei_matrix_storage(ei_constructor_without_unaligned_array_assert)
: m_data(ei_constructor_without_unaligned_array_assert()), m_cols(0) {}
inline ei_matrix_storage(DenseIndex, DenseIndex, DenseIndex cols) : m_cols(cols) {}
inline void swap(ei_matrix_storage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
inline DenseIndex rows(void) const {return _Rows;}
inline DenseIndex cols(void) const {return m_cols;}
inline void conservativeResize(DenseIndex, DenseIndex, DenseIndex cols) { m_cols = cols; }
inline void resize(DenseIndex, DenseIndex, DenseIndex cols) { m_cols = cols; }
inline const T *data() const { return m_data.array; }
inline T *data() { return m_data.array; }
};
// purely dynamic matrix.
template<typename T, int _Options> class ei_matrix_storage<T, Dynamic, Dynamic, Dynamic, _Options>
{
T *m_data;
DenseIndex m_rows;
DenseIndex m_cols;
public:
inline explicit ei_matrix_storage() : m_data(0), m_rows(0), m_cols(0) {}
inline ei_matrix_storage(ei_constructor_without_unaligned_array_assert)
: m_data(0), m_rows(0), m_cols(0) {}
inline ei_matrix_storage(DenseIndex size, DenseIndex rows, DenseIndex cols)
: m_data(ei_conditional_aligned_new<T,(_Options&DontAlign)==0>(size)), m_rows(rows), m_cols(cols)
{ EIGEN_INT_DEBUG_MATRIX_CTOR }
inline ~ei_matrix_storage() { ei_conditional_aligned_delete<T,(_Options&DontAlign)==0>(m_data, m_rows*m_cols); }
inline void swap(ei_matrix_storage& other)
{ std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
inline DenseIndex rows(void) const {return m_rows;}
inline DenseIndex cols(void) const {return m_cols;}
inline void conservativeResize(DenseIndex size, DenseIndex rows, DenseIndex cols)
{
m_data = ei_conditional_aligned_realloc_new<T,(_Options&DontAlign)==0>(m_data, size, m_rows*m_cols);
m_rows = rows;
m_cols = cols;
}
void resize(DenseIndex size, DenseIndex rows, DenseIndex cols)
{
if(size != m_rows*m_cols)
{
ei_conditional_aligned_delete<T,(_Options&DontAlign)==0>(m_data, m_rows*m_cols);
if (size)
m_data = ei_conditional_aligned_new<T,(_Options&DontAlign)==0>(size);
else
m_data = 0;
EIGEN_INT_DEBUG_MATRIX_CTOR
}
m_rows = rows;
m_cols = cols;
}
inline const T *data() const { return m_data; }
inline T *data() { return m_data; }
};
// matrix with dynamic width and fixed height (so that matrix has dynamic size).
template<typename T, int _Rows, int _Options> class ei_matrix_storage<T, Dynamic, _Rows, Dynamic, _Options>
{
T *m_data;
DenseIndex m_cols;
public:
inline explicit ei_matrix_storage() : m_data(0), m_cols(0) {}
inline ei_matrix_storage(ei_constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {}
inline ei_matrix_storage(DenseIndex size, DenseIndex, DenseIndex cols) : m_data(ei_conditional_aligned_new<T,(_Options&DontAlign)==0>(size)), m_cols(cols)
{ EIGEN_INT_DEBUG_MATRIX_CTOR }
inline ~ei_matrix_storage() { ei_conditional_aligned_delete<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols); }
inline void swap(ei_matrix_storage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
inline static DenseIndex rows(void) {return _Rows;}
inline DenseIndex cols(void) const {return m_cols;}
inline void conservativeResize(DenseIndex size, DenseIndex, DenseIndex cols)
{
m_data = ei_conditional_aligned_realloc_new<T,(_Options&DontAlign)==0>(m_data, size, _Rows*m_cols);
m_cols = cols;
}
EIGEN_STRONG_INLINE void resize(DenseIndex size, DenseIndex, DenseIndex cols)
{
if(size != _Rows*m_cols)
{
ei_conditional_aligned_delete<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols);
if (size)
m_data = ei_conditional_aligned_new<T,(_Options&DontAlign)==0>(size);
else
m_data = 0;
EIGEN_INT_DEBUG_MATRIX_CTOR
}
m_cols = cols;
}
inline const T *data() const { return m_data; }
inline T *data() { return m_data; }
};
// matrix with dynamic height and fixed width (so that matrix has dynamic size).
template<typename T, int _Cols, int _Options> class ei_matrix_storage<T, Dynamic, Dynamic, _Cols, _Options>
{
T *m_data;
DenseIndex m_rows;
public:
inline explicit ei_matrix_storage() : m_data(0), m_rows(0) {}
inline ei_matrix_storage(ei_constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {}
inline ei_matrix_storage(DenseIndex size, DenseIndex rows, DenseIndex) : m_data(ei_conditional_aligned_new<T,(_Options&DontAlign)==0>(size)), m_rows(rows)
{ EIGEN_INT_DEBUG_MATRIX_CTOR }
inline ~ei_matrix_storage() { ei_conditional_aligned_delete<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows); }
inline void swap(ei_matrix_storage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
inline DenseIndex rows(void) const {return m_rows;}
inline static DenseIndex cols(void) {return _Cols;}
inline void conservativeResize(DenseIndex size, DenseIndex rows, DenseIndex)
{
m_data = ei_conditional_aligned_realloc_new<T,(_Options&DontAlign)==0>(m_data, size, m_rows*_Cols);
m_rows = rows;
}
EIGEN_STRONG_INLINE void resize(DenseIndex size, DenseIndex rows, DenseIndex)
{
if(size != m_rows*_Cols)
{
ei_conditional_aligned_delete<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows);
if (size)
m_data = ei_conditional_aligned_new<T,(_Options&DontAlign)==0>(size);
else
m_data = 0;
EIGEN_INT_DEBUG_MATRIX_CTOR
}
m_rows = rows;
}
inline const T *data() const { return m_data; }
inline T *data() { return m_data; }
};
#endif // EIGEN_MATRIX_H

View File

@@ -38,16 +38,19 @@
*
* \sa MatrixBase::nestByValue()
*/
namespace internal {
template<typename ExpressionType>
struct ei_traits<NestByValue<ExpressionType> > : public ei_traits<ExpressionType>
struct traits<NestByValue<ExpressionType> > : public traits<ExpressionType>
{};
}
template<typename ExpressionType> class NestByValue
: public ei_dense_xpr_base< NestByValue<ExpressionType> >::type
: public internal::dense_xpr_base< NestByValue<ExpressionType> >::type
{
public:
typedef typename ei_dense_xpr_base<NestByValue>::type Base;
typedef typename internal::dense_xpr_base<NestByValue>::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(NestByValue)
inline NestByValue(const ExpressionType& matrix) : m_expression(matrix) {}

View File

@@ -51,17 +51,17 @@ class NoAlias
* \sa MatrixBase::lazyAssign() */
template<typename OtherDerived>
EIGEN_STRONG_INLINE ExpressionType& operator=(const StorageBase<OtherDerived>& other)
{ return ei_assign_selector<ExpressionType,OtherDerived,false>::run(m_expression,other.derived()); }
{ return internal::assign_selector<ExpressionType,OtherDerived,false>::run(m_expression,other.derived()); }
/** \sa MatrixBase::operator+= */
template<typename OtherDerived>
EIGEN_STRONG_INLINE ExpressionType& operator+=(const StorageBase<OtherDerived>& other)
{
typedef SelfCwiseBinaryOp<ei_scalar_sum_op<Scalar>, ExpressionType, OtherDerived> SelfAdder;
typedef SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, ExpressionType, OtherDerived> SelfAdder;
SelfAdder tmp(m_expression);
typedef typename ei_nested<OtherDerived>::type OtherDerivedNested;
typedef typename ei_cleantype<OtherDerivedNested>::type _OtherDerivedNested;
ei_assign_selector<SelfAdder,_OtherDerivedNested,false>::run(tmp,OtherDerivedNested(other.derived()));
typedef typename internal::nested<OtherDerived>::type OtherDerivedNested;
typedef typename internal::remove_all<OtherDerivedNested>::type _OtherDerivedNested;
internal::assign_selector<SelfAdder,_OtherDerivedNested,false>::run(tmp,OtherDerivedNested(other.derived()));
return m_expression;
}
@@ -69,11 +69,11 @@ class NoAlias
template<typename OtherDerived>
EIGEN_STRONG_INLINE ExpressionType& operator-=(const StorageBase<OtherDerived>& other)
{
typedef SelfCwiseBinaryOp<ei_scalar_difference_op<Scalar>, ExpressionType, OtherDerived> SelfAdder;
typedef SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, ExpressionType, OtherDerived> SelfAdder;
SelfAdder tmp(m_expression);
typedef typename ei_nested<OtherDerived>::type OtherDerivedNested;
typedef typename ei_cleantype<OtherDerivedNested>::type _OtherDerivedNested;
ei_assign_selector<SelfAdder,_OtherDerivedNested,false>::run(tmp,OtherDerivedNested(other.derived()));
typedef typename internal::nested<OtherDerived>::type OtherDerivedNested;
typedef typename internal::remove_all<OtherDerivedNested>::type _OtherDerivedNested;
internal::assign_selector<SelfAdder,_OtherDerivedNested,false>::run(tmp,OtherDerivedNested(other.derived()));
return m_expression;
}

View File

@@ -40,7 +40,7 @@
* is a typedef to \a U.
* \li A typedef \a NonInteger, giving the type that should be used for operations producing non-integral values,
* such as quotients, square roots, etc. If \a T is a floating-point type, then this typedef just gives
* \a T again. Note however that many Eigen functions such as ei_sqrt simply refuse to
* \a T again. Note however that many Eigen functions such as internal::sqrt simply refuse to
* take integers. Outside of a few cases, Eigen doesn't do automatic type promotion. Thus, this typedef is
* only intended as a helper for code that needs to explicitly promote types.
* \li A typedef \a Nested giving the type to use to nest a value inside of the expression tree. If you don't know what
@@ -53,6 +53,8 @@
* to by move / add / mul instructions respectively, assuming the data is already stored in CPU registers.
* Stay vague here. No need to do architecture-specific stuff.
* \li An enum value \a IsSigned. It is equal to \c 1 if \a T is a signed type and to 0 if \a T is unsigned.
* \li An enum value \a RequireInitialization. It is equal to \c 1 if the constructor of the numeric type \a T must
* be called, and to 0 if it is safe not to call it. Default is 0 if \a T is an arithmetic type, and 1 otherwise.
* \li An epsilon() function which, unlike std::numeric_limits::epsilon(), returns a \a Real instead of a \a T.
* \li A dummy_precision() function returning a weak epsilon value. It is mainly used as a default
* value by the fuzzy comparison operators.
@@ -65,17 +67,18 @@ template<typename T> struct GenericNumTraits
IsInteger = std::numeric_limits<T>::is_integer,
IsSigned = std::numeric_limits<T>::is_signed,
IsComplex = 0,
RequireInitialization = internal::is_arithmetic<T>::value ? 0 : 1,
ReadCost = 1,
AddCost = 1,
MulCost = 1
};
typedef T Real;
typedef typename ei_meta_if<
typedef typename internal::conditional<
IsInteger,
typename ei_meta_if<sizeof(T)<=2, float, double>::ret,
typename internal::conditional<sizeof(T)<=2, float, double>::type,
T
>::ret NonInteger;
>::type NonInteger;
typedef T Nested;
inline static Real epsilon() { return std::numeric_limits<T>::epsilon(); }
@@ -84,8 +87,15 @@ template<typename T> struct GenericNumTraits
// make sure to override this for floating-point types
return Real(0);
}
inline static T highest() { return std::numeric_limits<T>::max(); }
inline static T lowest() { return IsInteger ? std::numeric_limits<T>::min() : (-std::numeric_limits<T>::max()); }
inline static T highest() { return (std::numeric_limits<T>::max)(); }
inline static T lowest() { return IsInteger ? (std::numeric_limits<T>::min)() : (-(std::numeric_limits<T>::max)()); }
#ifdef EIGEN2_SUPPORT
enum {
HasFloatingPoint = !IsInteger
};
typedef NonInteger FloatingPoint;
#endif
};
template<typename T> struct NumTraits : GenericNumTraits<T>
@@ -114,6 +124,7 @@ template<typename _Real> struct NumTraits<std::complex<_Real> >
typedef _Real Real;
enum {
IsComplex = 1,
RequireInitialization = NumTraits<_Real>::RequireInitialization,
ReadCost = 2 * NumTraits<_Real>::ReadCost,
AddCost = 2 * NumTraits<Real>::AddCost,
MulCost = 4 * NumTraits<Real>::MulCost + 2 * NumTraits<Real>::AddCost
@@ -137,6 +148,7 @@ struct NumTraits<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
IsComplex = NumTraits<Scalar>::IsComplex,
IsInteger = NumTraits<Scalar>::IsInteger,
IsSigned = NumTraits<Scalar>::IsSigned,
RequireInitialization = 1,
ReadCost = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::ReadCost,
AddCost = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::AddCost,
MulCost = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::MulCost

View File

@@ -2,7 +2,7 @@
// for linear algebra.
//
// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2009-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
@@ -26,15 +26,17 @@
#ifndef EIGEN_PERMUTATIONMATRIX_H
#define EIGEN_PERMUTATIONMATRIX_H
/** \class PermutationMatrix
template<int RowCol,typename IndicesType,typename MatrixType, typename StorageKind> class PermutedImpl;
/** \class PermutationBase
* \ingroup Core_Module
*
* \brief Permutation matrix
* \brief Base class for permutations
*
* \param SizeAtCompileTime the number of rows/cols, or Dynamic
* \param MaxSizeAtCompileTime the maximum number of rows/cols, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it.
* \param Derived the derived class
*
* This class represents a permutation matrix, internally stored as a vector of integers.
* This class is the base class for all expressions representing a permutation matrix,
* internally stored as a vector of integers.
* The convention followed here is that if \f$ \sigma \f$ is a permutation, the corresponding permutation matrix
* \f$ P_\sigma \f$ is such that if \f$ (e_1,\ldots,e_p) \f$ is the canonical basis, we have:
* \f[ P_\sigma(e_i) = e_{\sigma(i)}. \f]
@@ -44,26 +46,29 @@
* Permutation matrices are square and invertible.
*
* Notice that in addition to the member functions and operators listed here, there also are non-member
* operator* to multiply a PermutationMatrix with any kind of matrix expression (MatrixBase) on either side.
* operator* to multiply any kind of permutation object with any kind of matrix expression (MatrixBase)
* on either side.
*
* \sa class DiagonalMatrix
* \sa class PermutationMatrix, class PermutationWrapper
*/
template<typename PermutationType, typename MatrixType, int Side, bool Transposed=false> struct ei_permut_matrix_product_retval;
template<int SizeAtCompileTime, int MaxSizeAtCompileTime>
struct ei_traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> >
: ei_traits<Matrix<int,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
{};
namespace internal {
template<int SizeAtCompileTime, int MaxSizeAtCompileTime>
class PermutationMatrix : public EigenBase<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> >
template<typename PermutationType, typename MatrixType, int Side, bool Transposed=false>
struct permut_matrix_product_retval;
enum PermPermProduct_t {PermPermProduct};
} // end namespace internal
template<typename Derived>
class PermutationBase : public EigenBase<Derived>
{
typedef internal::traits<Derived> Traits;
typedef EigenBase<Derived> Base;
public:
#ifndef EIGEN_PARSED_BY_DOXYGEN
typedef ei_traits<PermutationMatrix> Traits;
typedef Matrix<int,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime>
DenseMatrixType;
typedef typename Traits::IndicesType IndicesType;
enum {
Flags = Traits::Flags,
CoeffReadCost = Traits::CoeffReadCost,
@@ -74,9 +79,227 @@ class PermutationMatrix : public EigenBase<PermutationMatrix<SizeAtCompileTime,
};
typedef typename Traits::Scalar Scalar;
typedef typename Traits::Index Index;
typedef Matrix<Scalar,RowsAtCompileTime,ColsAtCompileTime,0,MaxRowsAtCompileTime,MaxColsAtCompileTime>
DenseMatrixType;
typedef PermutationMatrix<IndicesType::SizeAtCompileTime,IndicesType::MaxSizeAtCompileTime,Index>
PlainPermutationType;
using Base::derived;
#endif
typedef Matrix<int, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType;
/** Copies the other permutation into *this */
template<typename OtherDerived>
Derived& operator=(const PermutationBase<OtherDerived>& other)
{
indices() = other.indices();
return derived();
}
/** Assignment from the Transpositions \a tr */
template<typename OtherDerived>
Derived& operator=(const TranspositionsBase<OtherDerived>& tr)
{
setIdentity(tr.size());
for(Index k=size()-1; k>=0; --k)
applyTranspositionOnTheRight(k,tr.coeff(k));
return derived();
}
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** This is a special case of the templated operator=. Its purpose is to
* prevent a default operator= from hiding the templated operator=.
*/
Derived& operator=(const PermutationBase& other)
{
indices() = other.indices();
return derived();
}
#endif
/** \returns the number of rows */
inline Index rows() const { return indices().size(); }
/** \returns the number of columns */
inline Index cols() const { return indices().size(); }
/** \returns the size of a side of the respective square matrix, i.e., the number of indices */
inline Index size() const { return indices().size(); }
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename DenseDerived>
void evalTo(MatrixBase<DenseDerived>& other) const
{
other.setZero();
for (int i=0; i<rows();++i)
other.coeffRef(indices().coeff(i),i) = typename DenseDerived::Scalar(1);
}
#endif
/** \returns a Matrix object initialized from this permutation matrix. Notice that it
* is inefficient to return this Matrix object by value. For efficiency, favor using
* the Matrix constructor taking EigenBase objects.
*/
DenseMatrixType toDenseMatrix() const
{
return derived();
}
/** const version of indices(). */
const IndicesType& indices() const { return derived().indices(); }
/** \returns a reference to the stored array representing the permutation. */
IndicesType& indices() { return derived().indices(); }
/** Resizes to given size.
*/
inline void resize(Index size)
{
indices().resize(size);
}
/** Sets *this to be the identity permutation matrix */
void setIdentity()
{
for(Index i = 0; i < size(); ++i)
indices().coeffRef(i) = i;
}
/** Sets *this to be the identity permutation matrix of given size.
*/
void setIdentity(Index size)
{
resize(size);
setIdentity();
}
/** Multiplies *this by the transposition \f$(ij)\f$ on the left.
*
* \returns a reference to *this.
*
* \warning This is much slower than applyTranspositionOnTheRight(int,int):
* this has linear complexity and requires a lot of branching.
*
* \sa applyTranspositionOnTheRight(int,int)
*/
Derived& applyTranspositionOnTheLeft(Index i, Index j)
{
eigen_assert(i>=0 && j>=0 && i<size() && j<size());
for(Index k = 0; k < size(); ++k)
{
if(indices().coeff(k) == i) indices().coeffRef(k) = j;
else if(indices().coeff(k) == j) indices().coeffRef(k) = i;
}
return derived();
}
/** Multiplies *this by the transposition \f$(ij)\f$ on the right.
*
* \returns a reference to *this.
*
* This is a fast operation, it only consists in swapping two indices.
*
* \sa applyTranspositionOnTheLeft(int,int)
*/
Derived& applyTranspositionOnTheRight(Index i, Index j)
{
eigen_assert(i>=0 && j>=0 && i<size() && j<size());
std::swap(indices().coeffRef(i), indices().coeffRef(j));
return derived();
}
/** \returns the inverse permutation matrix.
*
* \note \note_try_to_help_rvo
*/
inline Transpose<PermutationBase> inverse() const
{ return derived(); }
/** \returns the tranpose permutation matrix.
*
* \note \note_try_to_help_rvo
*/
inline Transpose<PermutationBase> transpose() const
{ return derived(); }
/**** multiplication helpers to hopefully get RVO ****/
#ifndef EIGEN_PARSED_BY_DOXYGEN
protected:
template<typename OtherDerived>
void assignTranspose(const PermutationBase<OtherDerived>& other)
{
for (int i=0; i<rows();++i) indices().coeffRef(other.indices().coeff(i)) = i;
}
template<typename Lhs,typename Rhs>
void assignProduct(const Lhs& lhs, const Rhs& rhs)
{
eigen_assert(lhs.cols() == rhs.rows());
for (int i=0; i<rows();++i) indices().coeffRef(i) = lhs.indices().coeff(rhs.indices().coeff(i));
}
#endif
public:
/** \returns the product permutation matrix.
*
* \note \note_try_to_help_rvo
*/
template<typename Other>
inline PlainPermutationType operator*(const PermutationBase<Other>& other) const
{ return PlainPermutationType(internal::PermPermProduct, derived(), other.derived()); }
/** \returns the product of a permutation with another inverse permutation.
*
* \note \note_try_to_help_rvo
*/
template<typename Other>
inline PlainPermutationType operator*(const Transpose<PermutationBase<Other> >& other) const
{ return PlainPermutationType(internal::PermPermProduct, *this, other.eval()); }
/** \returns the product of an inverse permutation with another permutation.
*
* \note \note_try_to_help_rvo
*/
template<typename Other> friend
inline PlainPermutationType operator*(const Transpose<PermutationBase<Other> >& other, const PermutationBase& perm)
{ return PlainPermutationType(internal::PermPermProduct, other.eval(), perm); }
protected:
};
/** \class PermutationMatrix
* \ingroup Core_Module
*
* \brief Permutation matrix
*
* \param SizeAtCompileTime the number of rows/cols, or Dynamic
* \param MaxSizeAtCompileTime the maximum number of rows/cols, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it.
* \param IndexType the interger type of the indices
*
* This class represents a permutation matrix, internally stored as a vector of integers.
*
* \sa class PermutationBase, class PermutationWrapper, class DiagonalMatrix
*/
namespace internal {
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType>
struct traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType> >
: traits<Matrix<IndexType,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
{
typedef IndexType Index;
typedef Matrix<IndexType, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType;
};
}
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType>
class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType> >
{
typedef PermutationBase<PermutationMatrix> Base;
typedef internal::traits<PermutationMatrix> Traits;
public:
#ifndef EIGEN_PARSED_BY_DOXYGEN
typedef typename Traits::IndicesType IndicesType;
#endif
inline PermutationMatrix()
{}
@@ -87,8 +310,8 @@ class PermutationMatrix : public EigenBase<PermutationMatrix<SizeAtCompileTime,
{}
/** Copy constructor. */
template<int OtherSize, int OtherMaxSize>
inline PermutationMatrix(const PermutationMatrix<OtherSize, OtherMaxSize>& other)
template<typename OtherDerived>
inline PermutationMatrix(const PermutationBase<OtherDerived>& other)
: m_indices(other.indices()) {}
#ifndef EIGEN_PARSED_BY_DOXYGEN
@@ -109,29 +332,26 @@ class PermutationMatrix : public EigenBase<PermutationMatrix<SizeAtCompileTime,
{}
/** Convert the Transpositions \a tr to a permutation matrix */
template<int OtherSize, int OtherMaxSize>
explicit PermutationMatrix(const Transpositions<OtherSize,OtherMaxSize>& tr)
template<typename Other>
explicit PermutationMatrix(const TranspositionsBase<Other>& tr)
: m_indices(tr.size())
{
*this = tr;
}
/** Copies the other permutation into *this */
template<int OtherSize, int OtherMaxSize>
PermutationMatrix& operator=(const PermutationMatrix<OtherSize, OtherMaxSize>& other)
template<typename Other>
PermutationMatrix& operator=(const PermutationBase<Other>& other)
{
m_indices = other.indices();
return *this;
}
/** Assignment from the Transpositions \a tr */
template<int OtherSize, int OtherMaxSize>
PermutationMatrix& operator=(const Transpositions<OtherSize,OtherMaxSize>& tr)
template<typename Other>
PermutationMatrix& operator=(const TranspositionsBase<Other>& tr)
{
setIdentity(tr.size());
for(Index k=size()-1; k>=0; --k)
applyTranspositionOnTheRight(k,tr.coeff(k));
return *this;
return Base::operator=(tr.derived());
}
#ifndef EIGEN_PARSED_BY_DOXYGEN
@@ -145,197 +365,195 @@ class PermutationMatrix : public EigenBase<PermutationMatrix<SizeAtCompileTime,
}
#endif
/** \returns the number of rows */
inline Index rows() const { return m_indices.size(); }
/** \returns the number of columns */
inline Index cols() const { return m_indices.size(); }
/** \returns the size of a side of the respective square matrix, i.e., the number of indices */
inline Index size() const { return m_indices.size(); }
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename DenseDerived>
void evalTo(MatrixBase<DenseDerived>& other) const
{
other.setZero();
for (int i=0; i<rows();++i)
other.coeffRef(m_indices.coeff(i),i) = typename DenseDerived::Scalar(1);
}
#endif
/** \returns a Matrix object initialized from this permutation matrix. Notice that it
* is inefficient to return this Matrix object by value. For efficiency, favor using
* the Matrix constructor taking EigenBase objects.
*/
DenseMatrixType toDenseMatrix() const
{
return *this;
}
/** const version of indices(). */
const IndicesType& indices() const { return m_indices; }
/** \returns a reference to the stored array representing the permutation. */
IndicesType& indices() { return m_indices; }
/** Resizes to given size.
*/
inline void resize(Index size)
{
m_indices.resize(size);
}
/** Sets *this to be the identity permutation matrix */
void setIdentity()
{
for(Index i = 0; i < m_indices.size(); ++i)
m_indices.coeffRef(i) = i;
}
/** Sets *this to be the identity permutation matrix of given size.
*/
void setIdentity(Index size)
{
resize(size);
setIdentity();
}
/** Multiplies *this by the transposition \f$(ij)\f$ on the left.
*
* \returns a reference to *this.
*
* \warning This is much slower than applyTranspositionOnTheRight(int,int):
* this has linear complexity and requires a lot of branching.
*
* \sa applyTranspositionOnTheRight(int,int)
*/
PermutationMatrix& applyTranspositionOnTheLeft(Index i, Index j)
{
ei_assert(i>=0 && j>=0 && i<m_indices.size() && j<m_indices.size());
for(Index k = 0; k < m_indices.size(); ++k)
{
if(m_indices.coeff(k) == i) m_indices.coeffRef(k) = j;
else if(m_indices.coeff(k) == j) m_indices.coeffRef(k) = i;
}
return *this;
}
/** Multiplies *this by the transposition \f$(ij)\f$ on the right.
*
* \returns a reference to *this.
*
* This is a fast operation, it only consists in swapping two indices.
*
* \sa applyTranspositionOnTheLeft(int,int)
*/
PermutationMatrix& applyTranspositionOnTheRight(Index i, Index j)
{
ei_assert(i>=0 && j>=0 && i<m_indices.size() && j<m_indices.size());
std::swap(m_indices.coeffRef(i), m_indices.coeffRef(j));
return *this;
}
/** \returns the inverse permutation matrix.
*
* \note \note_try_to_help_rvo
*/
inline Transpose<PermutationMatrix> inverse() const
{ return *this; }
/** \returns the tranpose permutation matrix.
*
* \note \note_try_to_help_rvo
*/
inline Transpose<PermutationMatrix> transpose() const
{ return *this; }
/**** multiplication helpers to hopefully get RVO ****/
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<int OtherSize, int OtherMaxSize>
PermutationMatrix(const Transpose<PermutationMatrix<OtherSize,OtherMaxSize> >& other)
template<typename Other>
PermutationMatrix(const Transpose<PermutationBase<Other> >& other)
: m_indices(other.nestedPermutation().size())
{
for (int i=0; i<rows();++i) m_indices.coeffRef(other.nestedPermutation().indices().coeff(i)) = i;
for (int i=0; i<m_indices.size();++i) m_indices.coeffRef(other.nestedPermutation().indices().coeff(i)) = i;
}
protected:
enum Product_t {Product};
PermutationMatrix(Product_t, const PermutationMatrix& lhs, const PermutationMatrix& rhs)
: m_indices(lhs.m_indices.size())
template<typename Lhs,typename Rhs>
PermutationMatrix(internal::PermPermProduct_t, const Lhs& lhs, const Rhs& rhs)
: m_indices(lhs.indices().size())
{
ei_assert(lhs.cols() == rhs.rows());
for (int i=0; i<rows();++i) m_indices.coeffRef(i) = lhs.m_indices.coeff(rhs.m_indices.coeff(i));
Base::assignProduct(lhs,rhs);
}
#endif
public:
/** \returns the product permutation matrix.
*
* \note \note_try_to_help_rvo
*/
template<int OtherSize, int OtherMaxSize>
inline PermutationMatrix operator*(const PermutationMatrix<OtherSize, OtherMaxSize>& other) const
{ return PermutationMatrix(Product, *this, other); }
/** \returns the product of a permutation with another inverse permutation.
*
* \note \note_try_to_help_rvo
*/
template<int OtherSize, int OtherMaxSize>
inline PermutationMatrix operator*(const Transpose<PermutationMatrix<OtherSize,OtherMaxSize> >& other) const
{ return PermutationMatrix(Product, *this, other.eval()); }
/** \returns the product of an inverse permutation with another permutation.
*
* \note \note_try_to_help_rvo
*/
template<int OtherSize, int OtherMaxSize> friend
inline PermutationMatrix operator*(const Transpose<PermutationMatrix<OtherSize,OtherMaxSize> >& other, const PermutationMatrix& perm)
{ return PermutationMatrix(Product, other.eval(), perm); }
protected:
IndicesType m_indices;
};
namespace internal {
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType, int _PacketAccess>
struct traits<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType>,_PacketAccess> >
: traits<Matrix<IndexType,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
{
typedef IndexType Index;
typedef Map<const Matrix<IndexType, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1>, _PacketAccess> IndicesType;
};
}
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType, int _PacketAccess>
class Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType>,_PacketAccess>
: public PermutationBase<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType>,_PacketAccess> >
{
typedef PermutationBase<Map> Base;
typedef internal::traits<Map> Traits;
public:
#ifndef EIGEN_PARSED_BY_DOXYGEN
typedef typename Traits::IndicesType IndicesType;
typedef typename IndicesType::Scalar Index;
#endif
inline Map(const Index* indices)
: m_indices(indices)
{}
inline Map(const Index* indices, Index size)
: m_indices(indices,size)
{}
/** Copies the other permutation into *this */
template<typename Other>
Map& operator=(const PermutationBase<Other>& other)
{ return Base::operator=(other.derived()); }
/** Assignment from the Transpositions \a tr */
template<typename Other>
Map& operator=(const TranspositionsBase<Other>& tr)
{ return Base::operator=(tr.derived()); }
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** This is a special case of the templated operator=. Its purpose is to
* prevent a default operator= from hiding the templated operator=.
*/
Map& operator=(const Map& other)
{
m_indices = other.m_indices;
return *this;
}
#endif
/** const version of indices(). */
const IndicesType& indices() const { return m_indices; }
/** \returns a reference to the stored array representing the permutation. */
IndicesType& indices() { return m_indices; }
protected:
IndicesType m_indices;
};
/** \class PermutationWrapper
* \ingroup Core_Module
*
* \brief Class to view a vector of integers as a permutation matrix
*
* \param _IndicesType the type of the vector of integer (can be any compatible expression)
*
* This class allows to view any vector expression of integers as a permutation matrix.
*
* \sa class PermutationBase, class PermutationMatrix
*/
struct PermutationStorage {};
template<typename _IndicesType> class TranspositionsWrapper;
namespace internal {
template<typename _IndicesType>
struct traits<PermutationWrapper<_IndicesType> >
{
typedef PermutationStorage StorageKind;
typedef typename _IndicesType::Scalar Scalar;
typedef typename _IndicesType::Scalar Index;
typedef _IndicesType IndicesType;
enum {
RowsAtCompileTime = _IndicesType::SizeAtCompileTime,
ColsAtCompileTime = _IndicesType::SizeAtCompileTime,
MaxRowsAtCompileTime = IndicesType::MaxRowsAtCompileTime,
MaxColsAtCompileTime = IndicesType::MaxColsAtCompileTime,
Flags = 0,
CoeffReadCost = _IndicesType::CoeffReadCost
};
};
}
template<typename _IndicesType>
class PermutationWrapper : public PermutationBase<PermutationWrapper<_IndicesType> >
{
typedef PermutationBase<PermutationWrapper> Base;
typedef internal::traits<PermutationWrapper> Traits;
public:
#ifndef EIGEN_PARSED_BY_DOXYGEN
typedef typename Traits::IndicesType IndicesType;
#endif
inline PermutationWrapper(const IndicesType& indices)
: m_indices(indices)
{}
/** const version of indices(). */
const typename internal::remove_all<typename IndicesType::Nested>::type&
indices() const { return m_indices; }
protected:
const typename IndicesType::Nested m_indices;
};
/** \returns the matrix with the permutation applied to the columns.
*/
template<typename Derived, int SizeAtCompileTime, int MaxSizeAtCompileTime>
inline const ei_permut_matrix_product_retval<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime>, Derived, OnTheRight>
template<typename Derived, typename PermutationDerived>
inline const internal::permut_matrix_product_retval<PermutationDerived, Derived, OnTheRight>
operator*(const MatrixBase<Derived>& matrix,
const PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> &permutation)
const PermutationBase<PermutationDerived> &permutation)
{
return ei_permut_matrix_product_retval
<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime>, Derived, OnTheRight>
(permutation, matrix.derived());
return internal::permut_matrix_product_retval
<PermutationDerived, Derived, OnTheRight>
(permutation.derived(), matrix.derived());
}
/** \returns the matrix with the permutation applied to the rows.
*/
template<typename Derived, int SizeAtCompileTime, int MaxSizeAtCompileTime>
inline const ei_permut_matrix_product_retval
<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime>, Derived, OnTheLeft>
operator*(const PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> &permutation,
template<typename Derived, typename PermutationDerived>
inline const internal::permut_matrix_product_retval
<PermutationDerived, Derived, OnTheLeft>
operator*(const PermutationBase<PermutationDerived> &permutation,
const MatrixBase<Derived>& matrix)
{
return ei_permut_matrix_product_retval
<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime>, Derived, OnTheLeft>
(permutation, matrix.derived());
return internal::permut_matrix_product_retval
<PermutationDerived, Derived, OnTheLeft>
(permutation.derived(), matrix.derived());
}
namespace internal {
template<typename PermutationType, typename MatrixType, int Side, bool Transposed>
struct ei_traits<ei_permut_matrix_product_retval<PermutationType, MatrixType, Side, Transposed> >
struct traits<permut_matrix_product_retval<PermutationType, MatrixType, Side, Transposed> >
{
typedef typename MatrixType::PlainObject ReturnType;
};
template<typename PermutationType, typename MatrixType, int Side, bool Transposed>
struct ei_permut_matrix_product_retval
: public ReturnByValue<ei_permut_matrix_product_retval<PermutationType, MatrixType, Side, Transposed> >
struct permut_matrix_product_retval
: public ReturnByValue<permut_matrix_product_retval<PermutationType, MatrixType, Side, Transposed> >
{
typedef typename ei_cleantype<typename MatrixType::Nested>::type MatrixTypeNestedCleaned;
typedef typename remove_all<typename MatrixType::Nested>::type MatrixTypeNestedCleaned;
ei_permut_matrix_product_retval(const PermutationType& perm, const MatrixType& matrix)
permut_matrix_product_retval(const PermutationType& perm, const MatrixType& matrix)
: m_permutation(perm), m_matrix(matrix)
{}
@@ -346,7 +564,7 @@ struct ei_permut_matrix_product_retval
{
const int n = Side==OnTheLeft ? rows() : cols();
if(ei_is_same_type<MatrixTypeNestedCleaned,Dest>::ret && ei_extract_data(dst) == ei_extract_data(m_matrix))
if(is_same<MatrixTypeNestedCleaned,Dest>::value && extract_data(dst) == extract_data(m_matrix))
{
// apply the permutation inplace
Matrix<bool,PermutationType::RowsAtCompileTime,1,0,PermutationType::MaxRowsAtCompileTime> mask(m_permutation.size());
@@ -382,7 +600,7 @@ struct ei_permut_matrix_product_retval
=
Block<MatrixTypeNestedCleaned,Side==OnTheLeft ? 1 : MatrixType::RowsAtCompileTime,Side==OnTheRight ? 1 : MatrixType::ColsAtCompileTime>
Block<const MatrixTypeNestedCleaned,Side==OnTheLeft ? 1 : MatrixType::RowsAtCompileTime,Side==OnTheRight ? 1 : MatrixType::ColsAtCompileTime>
(m_matrix, ((Side==OnTheRight) ^ Transposed) ? m_permutation.indices().coeff(i) : i);
}
}
@@ -395,23 +613,25 @@ struct ei_permut_matrix_product_retval
/* Template partial specialization for transposed/inverse permutations */
template<int SizeAtCompileTime, int MaxSizeAtCompileTime>
struct ei_traits<Transpose<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> > >
: ei_traits<Matrix<int,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
template<typename Derived>
struct traits<Transpose<PermutationBase<Derived> > >
: traits<Derived>
{};
template<int SizeAtCompileTime, int MaxSizeAtCompileTime>
class Transpose<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> >
: public EigenBase<Transpose<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> > >
} // end namespace internal
template<typename Derived>
class Transpose<PermutationBase<Derived> >
: public EigenBase<Transpose<PermutationBase<Derived> > >
{
typedef PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> PermutationType;
typedef Derived PermutationType;
typedef typename PermutationType::IndicesType IndicesType;
typedef typename PermutationType::PlainPermutationType PlainPermutationType;
public:
#ifndef EIGEN_PARSED_BY_DOXYGEN
typedef ei_traits<PermutationType> Traits;
typedef Matrix<int,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime>
DenseMatrixType;
typedef internal::traits<PermutationType> Traits;
typedef typename Derived::DenseMatrixType DenseMatrixType;
enum {
Flags = Traits::Flags,
CoeffReadCost = Traits::CoeffReadCost,
@@ -439,26 +659,26 @@ class Transpose<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> >
#endif
/** \return the equivalent permutation matrix */
PermutationType eval() const { return *this; }
PlainPermutationType eval() const { return *this; }
DenseMatrixType toDenseMatrix() const { return *this; }
/** \returns the matrix with the inverse permutation applied to the columns.
*/
template<typename Derived> friend
inline const ei_permut_matrix_product_retval<PermutationType, Derived, OnTheRight, true>
operator*(const MatrixBase<Derived>& matrix, const Transpose& trPerm)
template<typename OtherDerived> friend
inline const internal::permut_matrix_product_retval<PermutationType, OtherDerived, OnTheRight, true>
operator*(const MatrixBase<OtherDerived>& matrix, const Transpose& trPerm)
{
return ei_permut_matrix_product_retval<PermutationType, Derived, OnTheRight, true>(trPerm.m_permutation, matrix.derived());
return internal::permut_matrix_product_retval<PermutationType, OtherDerived, OnTheRight, true>(trPerm.m_permutation, matrix.derived());
}
/** \returns the matrix with the inverse permutation applied to the rows.
*/
template<typename Derived>
inline const ei_permut_matrix_product_retval<PermutationType, Derived, OnTheLeft, true>
operator*(const MatrixBase<Derived>& matrix) const
template<typename OtherDerived>
inline const internal::permut_matrix_product_retval<PermutationType, OtherDerived, OnTheLeft, true>
operator*(const MatrixBase<OtherDerived>& matrix) const
{
return ei_permut_matrix_product_retval<PermutationType, Derived, OnTheLeft, true>(m_permutation, matrix.derived());
return internal::permut_matrix_product_retval<PermutationType, OtherDerived, OnTheLeft, true>(m_permutation, matrix.derived());
}
const PermutationType& nestedPermutation() const { return m_permutation; }
@@ -467,4 +687,10 @@ class Transpose<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> >
const PermutationType& m_permutation;
};
template<typename Derived>
const PermutationWrapper<const Derived> MatrixBase<Derived>::asPermutation() const
{
return derived();
}
#endif // EIGEN_PERMUTATIONMATRIX_H

View File

@@ -32,25 +32,48 @@
# define EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
#endif
template <typename Derived, typename OtherDerived = Derived, bool IsVector = static_cast<bool>(Derived::IsVectorAtCompileTime)> struct ei_conservative_resize_like_impl;
template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers> struct ei_matrix_swap_impl;
namespace internal {
template<typename Index>
EIGEN_ALWAYS_INLINE void check_rows_cols_for_overflow(Index rows, Index cols)
{
// http://hg.mozilla.org/mozilla-central/file/6c8a909977d3/xpcom/ds/CheckedInt.h#l242
// we assume Index is signed
Index max_index = (size_t(1) << (8 * sizeof(Index) - 1)) - 1; // assume Index is signed
bool error = (rows < 0 || cols < 0) ? true
: (rows == 0 || cols == 0) ? false
: (rows > max_index / cols);
if (error)
throw_std_bad_alloc();
}
template <typename Derived, typename OtherDerived = Derived, bool IsVector = bool(Derived::IsVectorAtCompileTime)> struct conservative_resize_like_impl;
template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers> struct matrix_swap_impl;
} // end namespace internal
/**
* \brief %Dense storage base class for matrices and arrays.
*
* This class can be extended with the help of the plugin mechanism described on the page
* \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_PLAINOBJECTBASE_PLUGIN.
*
* \sa \ref TopicClassHierarchy
*/
template<typename Derived>
class DenseStorageBase : public ei_dense_xpr_base<Derived>::type
class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
{
public:
enum { Options = ei_traits<Derived>::Options };
typedef typename ei_dense_xpr_base<Derived>::type Base;
enum { Options = internal::traits<Derived>::Options };
typedef typename internal::dense_xpr_base<Derived>::type Base;
typedef typename ei_traits<Derived>::StorageKind StorageKind;
typedef typename ei_traits<Derived>::Index Index;
typedef typename ei_traits<Derived>::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
typedef typename internal::traits<Derived>::StorageKind StorageKind;
typedef typename internal::traits<Derived>::Index Index;
typedef typename internal::traits<Derived>::Scalar Scalar;
typedef typename internal::packet_traits<Scalar>::type PacketScalar;
typedef typename NumTraits<Scalar>::Real RealScalar;
typedef Derived DenseType;
using Base::RowsAtCompileTime;
using Base::ColsAtCompileTime;
@@ -61,17 +84,25 @@ class DenseStorageBase : public ei_dense_xpr_base<Derived>::type
using Base::IsVectorAtCompileTime;
using Base::Flags;
template<typename PlainObjectType, int MapOptions, typename StrideType> friend class Eigen::Map;
friend class Eigen::Map<Derived, Unaligned>;
typedef class Eigen::Map<Derived, Unaligned> UnalignedMapType;
typedef Eigen::Map<Derived, Unaligned> MapType;
friend class Eigen::Map<const Derived, Unaligned>;
typedef const Eigen::Map<const Derived, Unaligned> ConstMapType;
friend class Eigen::Map<Derived, Aligned>;
typedef class Eigen::Map<Derived, Aligned> AlignedMapType;
typedef Eigen::Map<Derived, Aligned> AlignedMapType;
friend class Eigen::Map<const Derived, Aligned>;
typedef const Eigen::Map<const Derived, Aligned> ConstAlignedMapType;
template<typename StrideType> struct StridedMapType { typedef Eigen::Map<Derived, Unaligned, StrideType> type; };
template<typename StrideType> struct StridedConstMapType { typedef Eigen::Map<const Derived, Unaligned, StrideType> type; };
template<typename StrideType> struct StridedAlignedMapType { typedef Eigen::Map<Derived, Aligned, StrideType> type; };
template<typename StrideType> struct StridedConstAlignedMapType { typedef Eigen::Map<const Derived, Aligned, StrideType> type; };
protected:
ei_matrix_storage<Scalar, Base::MaxSizeAtCompileTime, Base::RowsAtCompileTime, Base::ColsAtCompileTime, Options> m_storage;
DenseStorage<Scalar, Base::MaxSizeAtCompileTime, Base::RowsAtCompileTime, Base::ColsAtCompileTime, Options> m_storage;
public:
enum { NeedsToAlign = (!(Options&DontAlign))
&& SizeAtCompileTime!=Dynamic && ((static_cast<int>(sizeof(Scalar))*SizeAtCompileTime)%16)==0 };
enum { NeedsToAlign = SizeAtCompileTime != Dynamic && (internal::traits<Derived>::Flags & AlignedBit) != 0 };
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
Base& base() { return *static_cast<Base*>(this); }
@@ -106,34 +137,51 @@ class DenseStorageBase : public ei_dense_xpr_base<Derived>::type
return m_storage.data()[index];
}
EIGEN_STRONG_INLINE const Scalar& coeffRef(Index row, Index col) const
{
if(Flags & RowMajorBit)
return m_storage.data()[col + row * m_storage.cols()];
else // column-major
return m_storage.data()[row + col * m_storage.rows()];
}
EIGEN_STRONG_INLINE const Scalar& coeffRef(Index index) const
{
return m_storage.data()[index];
}
/** \internal */
template<int LoadMode>
EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const
{
return ei_ploadt<PacketScalar, LoadMode>
return internal::ploadt<PacketScalar, LoadMode>
(m_storage.data() + (Flags & RowMajorBit
? col + row * m_storage.cols()
: row + col * m_storage.rows()));
}
/** \internal */
template<int LoadMode>
EIGEN_STRONG_INLINE PacketScalar packet(Index index) const
{
return ei_ploadt<PacketScalar, LoadMode>(m_storage.data() + index);
return internal::ploadt<PacketScalar, LoadMode>(m_storage.data() + index);
}
/** \internal */
template<int StoreMode>
EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketScalar& x)
{
ei_pstoret<Scalar, PacketScalar, StoreMode>
internal::pstoret<Scalar, PacketScalar, StoreMode>
(m_storage.data() + (Flags & RowMajorBit
? col + row * m_storage.cols()
: row + col * m_storage.rows()), x);
}
/** \internal */
template<int StoreMode>
EIGEN_STRONG_INLINE void writePacket(Index index, const PacketScalar& x)
{
ei_pstoret<Scalar, PacketScalar, StoreMode>(m_storage.data() + index, x);
internal::pstoret<Scalar, PacketScalar, StoreMode>(m_storage.data() + index, x);
}
/** \returns a const pointer to the data array of this matrix */
@@ -163,11 +211,13 @@ class DenseStorageBase : public ei_dense_xpr_base<Derived>::type
EIGEN_STRONG_INLINE void resize(Index rows, Index cols)
{
#ifdef EIGEN_INITIALIZE_MATRICES_BY_ZERO
internal::check_rows_cols_for_overflow(rows, cols);
Index size = rows*cols;
bool size_changed = size != this->size();
m_storage.resize(size, rows, cols);
if(size_changed) EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
#else
internal::check_rows_cols_for_overflow(rows, cols);
m_storage.resize(rows*cols, rows, cols);
#endif
}
@@ -185,8 +235,8 @@ class DenseStorageBase : public ei_dense_xpr_base<Derived>::type
*/
inline void resize(Index size)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(DenseStorageBase)
ei_assert(SizeAtCompileTime == Dynamic || SizeAtCompileTime == size);
EIGEN_STATIC_ASSERT_VECTOR_ONLY(PlainObjectBase)
eigen_assert(SizeAtCompileTime == Dynamic || SizeAtCompileTime == size);
#ifdef EIGEN_INITIALIZE_MATRICES_BY_ZERO
bool size_changed = size != this->size();
#endif
@@ -236,47 +286,62 @@ class DenseStorageBase : public ei_dense_xpr_base<Derived>::type
EIGEN_STRONG_INLINE void resizeLike(const EigenBase<OtherDerived>& _other)
{
const OtherDerived& other = _other.derived();
internal::check_rows_cols_for_overflow(other.rows(), other.cols());
const Index othersize = other.rows()*other.cols();
if(RowsAtCompileTime == 1)
{
ei_assert(other.rows() == 1 || other.cols() == 1);
eigen_assert(other.rows() == 1 || other.cols() == 1);
resize(1, othersize);
}
else if(ColsAtCompileTime == 1)
{
ei_assert(other.rows() == 1 || other.cols() == 1);
eigen_assert(other.rows() == 1 || other.cols() == 1);
resize(othersize, 1);
}
else resize(other.rows(), other.cols());
}
/** Resizes \c *this to a \a rows x \a cols matrix while leaving old values of \c *this untouched.
/** Resizes the matrix to \a rows x \a cols while leaving old values untouched.
*
* This method is intended for dynamic-size matrices. If you only want to change the number
* of rows and/or of columns, you can use conservativeResize(NoChange_t, Index),
* The method is intended for matrices of dynamic size. If you only want to change the number
* of rows and/or of columns, you can use conservativeResize(NoChange_t, Index) or
* conservativeResize(Index, NoChange_t).
*
* The top-left part of the resized matrix will be the same as the overlapping top-left corner
* of \c *this. In case values need to be appended to the matrix they will be uninitialized.
* Matrices are resized relative to the top-left element. In case values need to be
* appended to the matrix they will be uninitialized.
*/
EIGEN_STRONG_INLINE void conservativeResize(Index rows, Index cols)
{
ei_conservative_resize_like_impl<Derived>::run(*this, rows, cols);
internal::conservative_resize_like_impl<Derived>::run(*this, rows, cols);
}
/** Resizes the matrix to \a rows x \a cols while leaving old values untouched.
*
* As opposed to conservativeResize(Index rows, Index cols), this version leaves
* the number of columns unchanged.
*
* In case the matrix is growing, new rows will be uninitialized.
*/
EIGEN_STRONG_INLINE void conservativeResize(Index rows, NoChange_t)
{
// Note: see the comment in conservativeResize(Index,Index)
conservativeResize(rows, cols());
}
/** Resizes the matrix to \a rows x \a cols while leaving old values untouched.
*
* As opposed to conservativeResize(Index rows, Index cols), this version leaves
* the number of rows unchanged.
*
* In case the matrix is growing, new columns will be uninitialized.
*/
EIGEN_STRONG_INLINE void conservativeResize(NoChange_t, Index cols)
{
// Note: see the comment in conservativeResize(Index,Index)
conservativeResize(rows(), cols);
}
/** Resizes \c *this to a vector of length \a size while retaining old values of *this.
/** Resizes the vector to \a size while retaining old values.
*
* \only_for_vectors. This method does not work for
* partially dynamic matrices when the static dimension is anything other
@@ -286,19 +351,28 @@ class DenseStorageBase : public ei_dense_xpr_base<Derived>::type
*/
EIGEN_STRONG_INLINE void conservativeResize(Index size)
{
ei_conservative_resize_like_impl<Derived>::run(*this, size);
internal::conservative_resize_like_impl<Derived>::run(*this, size);
}
/** Resizes the matrix to \a rows x \a cols of \c other, while leaving old values untouched.
*
* The method is intended for matrices of dynamic size. If you only want to change the number
* of rows and/or of columns, you can use conservativeResize(NoChange_t, Index) or
* conservativeResize(Index, NoChange_t).
*
* Matrices are resized relative to the top-left element. In case values need to be
* appended to the matrix they will copied from \c other.
*/
template<typename OtherDerived>
EIGEN_STRONG_INLINE void conservativeResizeLike(const DenseBase<OtherDerived>& other)
{
ei_conservative_resize_like_impl<Derived,OtherDerived>::run(*this, other);
internal::conservative_resize_like_impl<Derived,OtherDerived>::run(*this, other);
}
/** This is a special case of the templated operator=. Its purpose is to
* prevent a default operator= from hiding the templated operator=.
*/
EIGEN_STRONG_INLINE Derived& operator=(const DenseStorageBase& other)
EIGEN_STRONG_INLINE Derived& operator=(const PlainObjectBase& other)
{
return _set(other);
}
@@ -318,7 +392,7 @@ class DenseStorageBase : public ei_dense_xpr_base<Derived>::type
return Base::operator=(func);
}
EIGEN_STRONG_INLINE explicit DenseStorageBase() : m_storage()
EIGEN_STRONG_INLINE explicit PlainObjectBase() : m_storage()
{
// _check_template_params();
// EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
@@ -327,14 +401,14 @@ class DenseStorageBase : public ei_dense_xpr_base<Derived>::type
#ifndef EIGEN_PARSED_BY_DOXYGEN
// FIXME is it still needed ?
/** \internal */
DenseStorageBase(ei_constructor_without_unaligned_array_assert)
: m_storage(ei_constructor_without_unaligned_array_assert())
PlainObjectBase(internal::constructor_without_unaligned_array_assert)
: m_storage(internal::constructor_without_unaligned_array_assert())
{
// _check_template_params(); EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
}
#endif
EIGEN_STRONG_INLINE DenseStorageBase(Index size, Index rows, Index cols)
EIGEN_STRONG_INLINE PlainObjectBase(Index size, Index rows, Index cols)
: m_storage(size, rows, cols)
{
// _check_template_params();
@@ -353,10 +427,11 @@ class DenseStorageBase : public ei_dense_xpr_base<Derived>::type
/** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */
template<typename OtherDerived>
EIGEN_STRONG_INLINE DenseStorageBase(const EigenBase<OtherDerived> &other)
EIGEN_STRONG_INLINE PlainObjectBase(const EigenBase<OtherDerived> &other)
: m_storage(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols())
{
_check_template_params();
internal::check_rows_cols_for_overflow(other.derived().rows(), other.derived().cols());
Base::operator=(other.derived());
}
@@ -365,37 +440,72 @@ class DenseStorageBase : public ei_dense_xpr_base<Derived>::type
* while the AlignedMap() functions return aligned Map objects and thus should be called only with 16-byte-aligned
* \a data pointers.
*
* These methods do not allow to specify strides. If you need to specify strides, you have to
* use the Map class directly.
*
* \see class Map
*/
//@{
inline static const UnalignedMapType Map(const Scalar* data)
{ return UnalignedMapType(data); }
inline static UnalignedMapType Map(Scalar* data)
{ return UnalignedMapType(data); }
inline static const UnalignedMapType Map(const Scalar* data, Index size)
{ return UnalignedMapType(data, size); }
inline static UnalignedMapType Map(Scalar* data, Index size)
{ return UnalignedMapType(data, size); }
inline static const UnalignedMapType Map(const Scalar* data, Index rows, Index cols)
{ return UnalignedMapType(data, rows, cols); }
inline static UnalignedMapType Map(Scalar* data, Index rows, Index cols)
{ return UnalignedMapType(data, rows, cols); }
inline static ConstMapType Map(const Scalar* data)
{ return ConstMapType(data); }
inline static MapType Map(Scalar* data)
{ return MapType(data); }
inline static ConstMapType Map(const Scalar* data, Index size)
{ return ConstMapType(data, size); }
inline static MapType Map(Scalar* data, Index size)
{ return MapType(data, size); }
inline static ConstMapType Map(const Scalar* data, Index rows, Index cols)
{ return ConstMapType(data, rows, cols); }
inline static MapType Map(Scalar* data, Index rows, Index cols)
{ return MapType(data, rows, cols); }
inline static const AlignedMapType MapAligned(const Scalar* data)
{ return AlignedMapType(data); }
inline static ConstAlignedMapType MapAligned(const Scalar* data)
{ return ConstAlignedMapType(data); }
inline static AlignedMapType MapAligned(Scalar* data)
{ return AlignedMapType(data); }
inline static const AlignedMapType MapAligned(const Scalar* data, Index size)
{ return AlignedMapType(data, size); }
inline static ConstAlignedMapType MapAligned(const Scalar* data, Index size)
{ return ConstAlignedMapType(data, size); }
inline static AlignedMapType MapAligned(Scalar* data, Index size)
{ return AlignedMapType(data, size); }
inline static const AlignedMapType MapAligned(const Scalar* data, Index rows, Index cols)
{ return AlignedMapType(data, rows, cols); }
inline static ConstAlignedMapType MapAligned(const Scalar* data, Index rows, Index cols)
{ return ConstAlignedMapType(data, rows, cols); }
inline static AlignedMapType MapAligned(Scalar* data, Index rows, Index cols)
{ return AlignedMapType(data, rows, cols); }
template<int Outer, int Inner>
inline static typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, const Stride<Outer, Inner>& stride)
{ return typename StridedConstMapType<Stride<Outer, Inner> >::type(data, stride); }
template<int Outer, int Inner>
inline static typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, const Stride<Outer, Inner>& stride)
{ return typename StridedMapType<Stride<Outer, Inner> >::type(data, stride); }
template<int Outer, int Inner>
inline static typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, Index size, const Stride<Outer, Inner>& stride)
{ return typename StridedConstMapType<Stride<Outer, Inner> >::type(data, size, stride); }
template<int Outer, int Inner>
inline static typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, Index size, const Stride<Outer, Inner>& stride)
{ return typename StridedMapType<Stride<Outer, Inner> >::type(data, size, stride); }
template<int Outer, int Inner>
inline static typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
{ return typename StridedConstMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
template<int Outer, int Inner>
inline static typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
{ return typename StridedMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
template<int Outer, int Inner>
inline static typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, const Stride<Outer, Inner>& stride)
{ return typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type(data, stride); }
template<int Outer, int Inner>
inline static typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, const Stride<Outer, Inner>& stride)
{ return typename StridedAlignedMapType<Stride<Outer, Inner> >::type(data, stride); }
template<int Outer, int Inner>
inline static typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, Index size, const Stride<Outer, Inner>& stride)
{ return typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type(data, size, stride); }
template<int Outer, int Inner>
inline static typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, Index size, const Stride<Outer, Inner>& stride)
{ return typename StridedAlignedMapType<Stride<Outer, Inner> >::type(data, size, stride); }
template<int Outer, int Inner>
inline static typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
{ return typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
template<int Outer, int Inner>
inline static typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
{ return typename StridedAlignedMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
//@}
using Base::setConstant;
@@ -414,8 +524,8 @@ class DenseStorageBase : public ei_dense_xpr_base<Derived>::type
Derived& setRandom(Index size);
Derived& setRandom(Index rows, Index cols);
#ifdef EIGEN_DENSESTORAGEBASE_PLUGIN
#include EIGEN_DENSESTORAGEBASE_PLUGIN
#ifdef EIGEN_PLAINOBJECTBASE_PLUGIN
#include EIGEN_PLAINOBJECTBASE_PLUGIN
#endif
protected:
@@ -430,9 +540,10 @@ class DenseStorageBase : public ei_dense_xpr_base<Derived>::type
EIGEN_STRONG_INLINE void _resize_to_match(const EigenBase<OtherDerived>& other)
{
#ifdef EIGEN_NO_AUTOMATIC_RESIZING
ei_assert((this->size()==0 || (IsVectorAtCompileTime ? (this->size() == other.size())
eigen_assert((this->size()==0 || (IsVectorAtCompileTime ? (this->size() == other.size())
: (rows() == other.rows() && cols() == other.cols())))
&& "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined");
EIGEN_ONLY_USED_FOR_DEBUG(other);
#else
resizeLike(other);
#endif
@@ -455,15 +566,15 @@ class DenseStorageBase : public ei_dense_xpr_base<Derived>::type
template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived& _set(const DenseBase<OtherDerived>& other)
{
_set_selector(other.derived(), typename ei_meta_if<static_cast<bool>(int(OtherDerived::Flags) & EvalBeforeAssigningBit), ei_meta_true, ei_meta_false>::ret());
_set_selector(other.derived(), typename internal::conditional<static_cast<bool>(int(OtherDerived::Flags) & EvalBeforeAssigningBit), internal::true_type, internal::false_type>::type());
return this->derived();
}
template<typename OtherDerived>
EIGEN_STRONG_INLINE void _set_selector(const OtherDerived& other, const ei_meta_true&) { _set_noalias(other.eval()); }
EIGEN_STRONG_INLINE void _set_selector(const OtherDerived& other, const internal::true_type&) { _set_noalias(other.eval()); }
template<typename OtherDerived>
EIGEN_STRONG_INLINE void _set_selector(const OtherDerived& other, const ei_meta_false&) { _set_noalias(other); }
EIGEN_STRONG_INLINE void _set_selector(const OtherDerived& other, const internal::false_type&) { _set_noalias(other); }
/** \internal Like _set() but additionally makes the assumption that no aliasing effect can happen (which
* is the case when creating a new matrix) so one can enforce lazy evaluation.
@@ -478,36 +589,37 @@ class DenseStorageBase : public ei_dense_xpr_base<Derived>::type
//_resize_to_match(other);
// the 'false' below means to enforce lazy evaluation. We don't use lazyAssign() because
// it wouldn't allow to copy a row-vector into a column-vector.
return ei_assign_selector<Derived,OtherDerived,false>::run(this->derived(), other.derived());
return internal::assign_selector<Derived,OtherDerived,false>::run(this->derived(), other.derived());
}
template<typename T0, typename T1>
EIGEN_STRONG_INLINE void _init2(Index rows, Index cols, typename ei_enable_if<Base::SizeAtCompileTime!=2,T0>::type* = 0)
EIGEN_STRONG_INLINE void _init2(Index rows, Index cols, typename internal::enable_if<Base::SizeAtCompileTime!=2,T0>::type* = 0)
{
ei_assert(rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
eigen_assert(rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
&& cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols));
internal::check_rows_cols_for_overflow(rows, cols);
m_storage.resize(rows*cols,rows,cols);
EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
}
template<typename T0, typename T1>
EIGEN_STRONG_INLINE void _init2(const Scalar& x, const Scalar& y, typename ei_enable_if<Base::SizeAtCompileTime==2,T0>::type* = 0)
EIGEN_STRONG_INLINE void _init2(const Scalar& x, const Scalar& y, typename internal::enable_if<Base::SizeAtCompileTime==2,T0>::type* = 0)
{
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(DenseStorageBase, 2)
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2)
m_storage.data()[0] = x;
m_storage.data()[1] = y;
}
template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers>
friend struct ei_matrix_swap_impl;
friend struct internal::matrix_swap_impl;
/** \internal generic implementation of swap for dense storage since for dynamic-sized matrices of same type it is enough to swap the
* data pointers.
*/
template<typename OtherDerived>
void _swap(DenseBase<OtherDerived> EIGEN_REF_TO_TEMPORARY other)
void _swap(DenseBase<OtherDerived> const & other)
{
enum { SwapPointers = ei_is_same_type<Derived, OtherDerived>::ret && Base::SizeAtCompileTime==Dynamic };
ei_matrix_swap_impl<Derived, OtherDerived, bool(SwapPointers)>::run(this->derived(), other.const_cast_derived());
enum { SwapPointers = internal::is_same<Derived, OtherDerived>::value && Base::SizeAtCompileTime==Dynamic };
internal::matrix_swap_impl<Derived, OtherDerived, bool(SwapPointers)>::run(this->derived(), other.const_cast_derived());
}
public:
@@ -526,10 +638,13 @@ class DenseStorageBase : public ei_dense_xpr_base<Derived>::type
INVALID_MATRIX_TEMPLATE_PARAMETERS)
}
#endif
private:
enum { ThisConstantIsPrivateInPlainObjectBase };
};
template <typename Derived, typename OtherDerived, bool IsVector>
struct ei_conservative_resize_like_impl
struct internal::conservative_resize_like_impl
{
typedef typename Derived::Index Index;
static void run(DenseBase<Derived>& _this, Index rows, Index cols)
@@ -540,14 +655,15 @@ struct ei_conservative_resize_like_impl
if ( ( Derived::IsRowMajor && _this.cols() == cols) || // row-major and we change only the number of rows
(!Derived::IsRowMajor && _this.rows() == rows) ) // column-major and we change only the number of columns
{
internal::check_rows_cols_for_overflow(rows, cols);
_this.derived().m_storage.conservativeResize(rows*cols,rows,cols);
}
else
{
// The storage order does not allow us to use reallocation.
typename Derived::PlainObject tmp(rows,cols);
const Index common_rows = std::min(rows, _this.rows());
const Index common_cols = std::min(cols, _this.cols());
const Index common_rows = (std::min)(rows, _this.rows());
const Index common_cols = (std::min)(cols, _this.cols());
tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
_this.derived().swap(tmp);
}
@@ -580,16 +696,18 @@ struct ei_conservative_resize_like_impl
{
// The storage order does not allow us to use reallocation.
typename Derived::PlainObject tmp(other);
const Index common_rows = std::min(tmp.rows(), _this.rows());
const Index common_cols = std::min(tmp.cols(), _this.cols());
const Index common_rows = (std::min)(tmp.rows(), _this.rows());
const Index common_cols = (std::min)(tmp.cols(), _this.cols());
tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
_this.derived().swap(tmp);
}
}
};
namespace internal {
template <typename Derived, typename OtherDerived>
struct ei_conservative_resize_like_impl<Derived,OtherDerived,true>
struct conservative_resize_like_impl<Derived,OtherDerived,true>
{
typedef typename Derived::Index Index;
static void run(DenseBase<Derived>& _this, Index size)
@@ -615,7 +733,7 @@ struct ei_conservative_resize_like_impl<Derived,OtherDerived,true>
};
template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers>
struct ei_matrix_swap_impl
struct matrix_swap_impl
{
static inline void run(MatrixTypeA& a, MatrixTypeB& b)
{
@@ -624,7 +742,7 @@ struct ei_matrix_swap_impl
};
template<typename MatrixTypeA, typename MatrixTypeB>
struct ei_matrix_swap_impl<MatrixTypeA, MatrixTypeB, true>
struct matrix_swap_impl<MatrixTypeA, MatrixTypeB, true>
{
static inline void run(MatrixTypeA& a, MatrixTypeB& b)
{
@@ -632,4 +750,6 @@ struct ei_matrix_swap_impl<MatrixTypeA, MatrixTypeB, true>
}
};
} // end namespace internal
#endif // EIGEN_DENSESTORAGEBASE_H

View File

@@ -45,39 +45,57 @@
*
* \sa ProductReturnType, MatrixBase::operator*(const MatrixBase<OtherDerived>&)
*/
template<typename Lhs, typename Rhs, int ProductType = ei_product_type<Lhs,Rhs>::value>
template<typename Lhs, typename Rhs, int ProductType = internal::product_type<Lhs,Rhs>::value>
class GeneralProduct;
template<int Rows, int Cols, int Depth> struct ei_product_type_selector;
enum {
Large = 2,
Small = 3
};
template<typename Lhs, typename Rhs> struct ei_product_type
namespace internal {
template<int Rows, int Cols, int Depth> struct product_type_selector;
template<int Size, int MaxSize> struct product_size_category
{
typedef typename ei_cleantype<Lhs>::type _Lhs;
typedef typename ei_cleantype<Rhs>::type _Rhs;
enum { is_large = MaxSize == Dynamic ||
Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD,
value = is_large ? Large
: Size == 1 ? 1
: Small
};
};
template<typename Lhs, typename Rhs> struct product_type
{
typedef typename remove_all<Lhs>::type _Lhs;
typedef typename remove_all<Rhs>::type _Rhs;
enum {
Rows = _Lhs::MaxRowsAtCompileTime,
Cols = _Rhs::MaxColsAtCompileTime,
Depth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::MaxColsAtCompileTime,_Rhs::MaxRowsAtCompileTime)
MaxRows = _Lhs::MaxRowsAtCompileTime,
Rows = _Lhs::RowsAtCompileTime,
MaxCols = _Rhs::MaxColsAtCompileTime,
Cols = _Rhs::ColsAtCompileTime,
MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::MaxColsAtCompileTime,
_Rhs::MaxRowsAtCompileTime),
Depth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::ColsAtCompileTime,
_Rhs::RowsAtCompileTime),
LargeThreshold = EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
};
// the splitting into different lines of code here, introducing the _select enums and the typedef below,
// is to work around an internal compiler error with gcc 4.1 and 4.2.
private:
enum {
rows_select = Rows == Dynamic || Rows >=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ? Large : (Rows==1 ? 1 : Small),
cols_select = Cols == Dynamic || Cols >=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ? Large : (Cols==1 ? 1 : Small),
depth_select = Depth == Dynamic || Depth>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ? Large : (Depth==1 ? 1 : Small)
rows_select = product_size_category<Rows,MaxRows>::value,
cols_select = product_size_category<Cols,MaxCols>::value,
depth_select = product_size_category<Depth,MaxDepth>::value
};
typedef ei_product_type_selector<rows_select, cols_select, depth_select> product_type_selector;
typedef product_type_selector<rows_select, cols_select, depth_select> selector;
public:
enum {
value = product_type_selector::ret
value = selector::ret
};
#ifdef EIGEN_DEBUG_PRODUCT
static void debug()
@@ -93,32 +111,35 @@ public:
#endif
};
/* The following allows to select the kind of product at compile time
* based on the three dimensions of the product.
* This is a compile time mapping from {1,Small,Large}^3 -> {product types} */
// FIXME I'm not sure the current mapping is the ideal one.
template<int M, int N> struct ei_product_type_selector<M,N,1> { enum { ret = OuterProduct }; };
template<int Depth> struct ei_product_type_selector<1, 1, Depth> { enum { ret = InnerProduct }; };
template<> struct ei_product_type_selector<1, 1, 1> { enum { ret = InnerProduct }; };
template<> struct ei_product_type_selector<Small,1, Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct ei_product_type_selector<1, Small,Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct ei_product_type_selector<Small,Small,Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct ei_product_type_selector<Small, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; };
template<> struct ei_product_type_selector<Small, Large, 1> { enum { ret = LazyCoeffBasedProductMode }; };
template<> struct ei_product_type_selector<Large, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; };
template<> struct ei_product_type_selector<1, Large,Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct ei_product_type_selector<1, Large,Large> { enum { ret = GemvProduct }; };
template<> struct ei_product_type_selector<1, Small,Large> { enum { ret = CoeffBasedProductMode }; };
template<> struct ei_product_type_selector<Large,1, Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct ei_product_type_selector<Large,1, Large> { enum { ret = GemvProduct }; };
template<> struct ei_product_type_selector<Small,1, Large> { enum { ret = CoeffBasedProductMode }; };
template<> struct ei_product_type_selector<Small,Small,Large> { enum { ret = GemmProduct }; };
template<> struct ei_product_type_selector<Large,Small,Large> { enum { ret = GemmProduct }; };
template<> struct ei_product_type_selector<Small,Large,Large> { enum { ret = GemmProduct }; };
template<> struct ei_product_type_selector<Large,Large,Large> { enum { ret = GemmProduct }; };
template<> struct ei_product_type_selector<Large,Small,Small> { enum { ret = GemmProduct }; };
template<> struct ei_product_type_selector<Small,Large,Small> { enum { ret = GemmProduct }; };
template<> struct ei_product_type_selector<Large,Large,Small> { enum { ret = GemmProduct }; };
template<int M, int N> struct product_type_selector<M,N,1> { enum { ret = OuterProduct }; };
template<int Depth> struct product_type_selector<1, 1, Depth> { enum { ret = InnerProduct }; };
template<> struct product_type_selector<1, 1, 1> { enum { ret = InnerProduct }; };
template<> struct product_type_selector<Small,1, Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<1, Small,Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<Small,Small,Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<Small, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; };
template<> struct product_type_selector<Small, Large, 1> { enum { ret = LazyCoeffBasedProductMode }; };
template<> struct product_type_selector<Large, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; };
template<> struct product_type_selector<1, Large,Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<1, Large,Large> { enum { ret = GemvProduct }; };
template<> struct product_type_selector<1, Small,Large> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<Large,1, Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<Large,1, Large> { enum { ret = GemvProduct }; };
template<> struct product_type_selector<Small,1, Large> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<Small,Small,Large> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Large,Small,Large> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Small,Large,Large> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Large,Large,Large> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Large,Small,Small> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Small,Large,Small> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Large,Large,Small> { enum { ret = GemmProduct }; };
} // end namespace internal
/** \class ProductReturnType
* \ingroup Core_Module
@@ -127,7 +148,7 @@ template<> struct ei_product_type_selector<Large,Large,Small> { en
*
* \param Lhs the type of the left-hand side
* \param Rhs the type of the right-hand side
* \param ProductMode the type of the product (determined automatically by ei_product_mode)
* \param ProductMode the type of the product (determined automatically by internal::product_mode)
*
* This class defines the typename Type representing the optimized product expression
* between two matrix expressions. In practice, using ProductReturnType<Lhs,Rhs>::Type
@@ -141,8 +162,8 @@ template<typename Lhs, typename Rhs, int ProductType>
struct ProductReturnType
{
// TODO use the nested type to reduce instanciations ????
// typedef typename ei_nested<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
// typedef typename ei_nested<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
// typedef typename internal::nested<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
// typedef typename internal::nested<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
typedef GeneralProduct<Lhs/*Nested*/, Rhs/*Nested*/, ProductType> Type;
};
@@ -150,16 +171,16 @@ struct ProductReturnType
template<typename Lhs, typename Rhs>
struct ProductReturnType<Lhs,Rhs,CoeffBasedProductMode>
{
typedef typename ei_nested<Lhs, Rhs::ColsAtCompileTime, typename ei_plain_matrix_type<Lhs>::type >::type LhsNested;
typedef typename ei_nested<Rhs, Lhs::RowsAtCompileTime, typename ei_plain_matrix_type<Rhs>::type >::type RhsNested;
typedef typename internal::nested<Lhs, Rhs::ColsAtCompileTime, typename internal::plain_matrix_type<Lhs>::type >::type LhsNested;
typedef typename internal::nested<Rhs, Lhs::RowsAtCompileTime, typename internal::plain_matrix_type<Rhs>::type >::type RhsNested;
typedef CoeffBasedProduct<LhsNested, RhsNested, EvalBeforeAssigningBit | EvalBeforeNestingBit> Type;
};
template<typename Lhs, typename Rhs>
struct ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode>
{
typedef typename ei_nested<Lhs, Rhs::ColsAtCompileTime, typename ei_plain_matrix_type<Lhs>::type >::type LhsNested;
typedef typename ei_nested<Rhs, Lhs::RowsAtCompileTime, typename ei_plain_matrix_type<Rhs>::type >::type RhsNested;
typedef typename internal::nested<Lhs, Rhs::ColsAtCompileTime, typename internal::plain_matrix_type<Lhs>::type >::type LhsNested;
typedef typename internal::nested<Rhs, Lhs::RowsAtCompileTime, typename internal::plain_matrix_type<Rhs>::type >::type RhsNested;
typedef CoeffBasedProduct<LhsNested, RhsNested, NestByRefBit> Type;
};
@@ -179,28 +200,30 @@ struct LazyProductReturnType : public ProductReturnType<Lhs,Rhs,LazyCoeffBasedPr
// product ends up to a row-vector times col-vector product... To tackle this use
// case, we could have a specialization for Block<MatrixType,1,1> with: operator=(Scalar x);
namespace internal {
template<typename Lhs, typename Rhs>
struct ei_traits<GeneralProduct<Lhs,Rhs,InnerProduct> >
: ei_traits<Matrix<typename ei_scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> >
struct traits<GeneralProduct<Lhs,Rhs,InnerProduct> >
: traits<Matrix<typename scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> >
{};
}
template<typename Lhs, typename Rhs>
class GeneralProduct<Lhs, Rhs, InnerProduct>
: ei_no_assignment_operator,
public Matrix<typename ei_scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1>
: internal::no_assignment_operator,
public Matrix<typename internal::scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1>
{
typedef Matrix<typename ei_scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> Base;
typedef Matrix<typename internal::scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> Base;
public:
GeneralProduct(const Lhs& lhs, const Rhs& rhs)
{
EIGEN_STATIC_ASSERT((ei_is_same_type<typename Lhs::RealScalar, typename Rhs::RealScalar>::ret),
EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
Base::coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
}
typename Base::Scalar value() const { return Base::coeff(0,0); }
/** Convertion to scalar */
operator const typename Base::Scalar() const {
return Base::coeff(0,0);
@@ -210,13 +233,17 @@ class GeneralProduct<Lhs, Rhs, InnerProduct>
/***********************************************************************
* Implementation of Outer Vector Vector Product
***********************************************************************/
template<int StorageOrder> struct ei_outer_product_selector;
namespace internal {
template<int StorageOrder> struct outer_product_selector;
template<typename Lhs, typename Rhs>
struct ei_traits<GeneralProduct<Lhs,Rhs,OuterProduct> >
: ei_traits<ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs> >
struct traits<GeneralProduct<Lhs,Rhs,OuterProduct> >
: traits<ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs> >
{};
}
template<typename Lhs, typename Rhs>
class GeneralProduct<Lhs, Rhs, OuterProduct>
: public ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs>
@@ -226,17 +253,19 @@ class GeneralProduct<Lhs, Rhs, OuterProduct>
GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
{
EIGEN_STATIC_ASSERT((ei_is_same_type<typename Lhs::RealScalar, typename Rhs::RealScalar>::ret),
EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
}
template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const
{
ei_outer_product_selector<(int(Dest::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(*this, dest, alpha);
internal::outer_product_selector<(int(Dest::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(*this, dest, alpha);
}
};
template<> struct ei_outer_product_selector<ColMajor> {
namespace internal {
template<> struct outer_product_selector<ColMajor> {
template<typename ProductType, typename Dest>
static EIGEN_DONT_INLINE void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) {
typedef typename Dest::Index Index;
@@ -248,7 +277,7 @@ template<> struct ei_outer_product_selector<ColMajor> {
}
};
template<> struct ei_outer_product_selector<RowMajor> {
template<> struct outer_product_selector<RowMajor> {
template<typename ProductType, typename Dest>
static EIGEN_DONT_INLINE void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) {
typedef typename Dest::Index Index;
@@ -260,6 +289,8 @@ template<> struct ei_outer_product_selector<RowMajor> {
}
};
} // end namespace internal
/***********************************************************************
* Implementation of General Matrix Vector Product
***********************************************************************/
@@ -271,13 +302,17 @@ template<> struct ei_outer_product_selector<RowMajor> {
* Therefore we need a lower level meta selector.
* Furthermore, if the matrix is the rhs, then the product has to be transposed.
*/
namespace internal {
template<typename Lhs, typename Rhs>
struct ei_traits<GeneralProduct<Lhs,Rhs,GemvProduct> >
: ei_traits<ProductBase<GeneralProduct<Lhs,Rhs,GemvProduct>, Lhs, Rhs> >
struct traits<GeneralProduct<Lhs,Rhs,GemvProduct> >
: traits<ProductBase<GeneralProduct<Lhs,Rhs,GemvProduct>, Lhs, Rhs> >
{};
template<int Side, int StorageOrder, bool BlasCompatible>
struct ei_gemv_selector;
struct gemv_selector;
} // end namespace internal
template<typename Lhs, typename Rhs>
class GeneralProduct<Lhs, Rhs, GemvProduct>
@@ -291,40 +326,78 @@ class GeneralProduct<Lhs, Rhs, GemvProduct>
GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
{
// EIGEN_STATIC_ASSERT((ei_is_same_type<typename Lhs::Scalar, typename Rhs::Scalar>::ret),
// EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::Scalar, typename Rhs::Scalar>::value),
// YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
}
enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
typedef typename ei_meta_if<int(Side)==OnTheRight,_LhsNested,_RhsNested>::ret MatrixType;
typedef typename internal::conditional<int(Side)==OnTheRight,_LhsNested,_RhsNested>::type MatrixType;
template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
{
ei_assert(m_lhs.rows() == dst.rows() && m_rhs.cols() == dst.cols());
ei_gemv_selector<Side,(int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
bool(ei_blas_traits<MatrixType>::HasUsableDirectAccess)>::run(*this, dst, alpha);
eigen_assert(m_lhs.rows() == dst.rows() && m_rhs.cols() == dst.cols());
internal::gemv_selector<Side,(int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)>::run(*this, dst, alpha);
}
};
namespace internal {
// The vector is on the left => transposition
template<int StorageOrder, bool BlasCompatible>
struct ei_gemv_selector<OnTheLeft,StorageOrder,BlasCompatible>
struct gemv_selector<OnTheLeft,StorageOrder,BlasCompatible>
{
template<typename ProductType, typename Dest>
static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
{
Transpose<Dest> destT(dest);
enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor };
ei_gemv_selector<OnTheRight,OtherStorageOrder,BlasCompatible>
::run(GeneralProduct<Transpose<typename ProductType::_RhsNested>,Transpose<typename ProductType::_LhsNested>, GemvProduct>
gemv_selector<OnTheRight,OtherStorageOrder,BlasCompatible>
::run(GeneralProduct<Transpose<const typename ProductType::_RhsNested>,Transpose<const typename ProductType::_LhsNested>, GemvProduct>
(prod.rhs().transpose(), prod.lhs().transpose()), destT, alpha);
}
};
template<> struct ei_gemv_selector<OnTheRight,ColMajor,true>
template<typename Scalar,int Size,int MaxSize,bool Cond> struct gemv_static_vector_if;
template<typename Scalar,int Size,int MaxSize>
struct gemv_static_vector_if<Scalar,Size,MaxSize,false>
{
EIGEN_STRONG_INLINE Scalar* data() { eigen_internal_assert(false && "should never be called"); return 0; }
};
template<typename Scalar,int Size>
struct gemv_static_vector_if<Scalar,Size,Dynamic,true>
{
EIGEN_STRONG_INLINE Scalar* data() { return 0; }
};
template<typename Scalar,int Size,int MaxSize>
struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
{
#if EIGEN_ALIGN_STATICALLY
internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0> m_data;
EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
#else
// Some architectures cannot align on the stack,
// => let's manually enforce alignment by allocating more data and return the address of the first aligned element.
enum {
ForceAlignment = internal::packet_traits<Scalar>::Vectorizable,
PacketSize = internal::packet_traits<Scalar>::size
};
internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?PacketSize:0),0> m_data;
EIGEN_STRONG_INLINE Scalar* data() {
return ForceAlignment
? reinterpret_cast<Scalar*>((reinterpret_cast<size_t>(m_data.array) & ~(size_t(15))) + 16)
: m_data.array;
}
#endif
};
template<> struct gemv_selector<OnTheRight,ColMajor,true>
{
template<typename ProductType, typename Dest>
static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
static inline void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
{
typedef typename ProductType::Index Index;
typedef typename ProductType::LhsScalar LhsScalar;
@@ -337,60 +410,65 @@ template<> struct ei_gemv_selector<OnTheRight,ColMajor,true>
typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
ActualLhsType actualLhs = LhsBlasTraits::extract(prod.lhs());
ActualRhsType actualRhs = RhsBlasTraits::extract(prod.rhs());
const ActualLhsType actualLhs = LhsBlasTraits::extract(prod.lhs());
const ActualRhsType actualRhs = RhsBlasTraits::extract(prod.rhs());
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
* RhsBlasTraits::extractScalarFactor(prod.rhs());
enum {
// FIXME find a way to allow an inner stride on the result if ei_packet_traits<Scalar>::size==1
// FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
// on, the other hand it is good for the cache to pack the vector anyways...
EvalToDestAtCompileTime = Dest::InnerStrideAtCompileTime==1,
ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex)
ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
MightCannotUseDest = (Dest::InnerStrideAtCompileTime!=1) || ComplexByReal
};
bool alphaIsCompatible = (!ComplexByReal) || (ei_imag(actualAlpha)==RealScalar(0));
gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
// this is written like this (i.e., with a ?:) to workaround an ICE with ICC 12
bool alphaIsCompatible = (!ComplexByReal) ? true : (imag(actualAlpha)==RealScalar(0));
bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
RhsScalar compatibleAlpha = ei_get_factor<ResScalar,RhsScalar>::run(actualAlpha);
RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
ResScalar* actualDest;
if (evalToDest)
ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
evalToDest ? dest.data() : static_dest.data());
if(!evalToDest)
{
actualDest = &dest.coeffRef(0);
}
else
{
actualDest = ei_aligned_stack_new(ResScalar,dest.size());
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
int size = dest.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
if(!alphaIsCompatible)
{
MappedDest(actualDest, dest.size()).setZero();
MappedDest(actualDestPtr, dest.size()).setZero();
compatibleAlpha = RhsScalar(1);
}
else
MappedDest(actualDest, dest.size()) = dest;
MappedDest(actualDestPtr, dest.size()) = dest;
}
ei_general_matrix_vector_product
general_matrix_vector_product
<Index,LhsScalar,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run(
actualLhs.rows(), actualLhs.cols(),
&actualLhs.const_cast_derived().coeffRef(0,0), actualLhs.outerStride(),
&actualLhs.coeffRef(0,0), actualLhs.outerStride(),
actualRhs.data(), actualRhs.innerStride(),
actualDest, 1,
actualDestPtr, 1,
compatibleAlpha);
if (!evalToDest)
{
if(!alphaIsCompatible)
dest += actualAlpha * MappedDest(actualDest, dest.size());
dest += actualAlpha * MappedDest(actualDestPtr, dest.size());
else
dest = MappedDest(actualDest, dest.size());
ei_aligned_stack_delete(ResScalar, actualDest, dest.size());
dest = MappedDest(actualDestPtr, dest.size());
}
}
};
template<> struct ei_gemv_selector<OnTheRight,RowMajor,true>
template<> struct gemv_selector<OnTheRight,RowMajor,true>
{
template<typename ProductType, typename Dest>
static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
@@ -405,41 +483,43 @@ template<> struct ei_gemv_selector<OnTheRight,RowMajor,true>
typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
ActualLhsType actualLhs = LhsBlasTraits::extract(prod.lhs());
ActualRhsType actualRhs = RhsBlasTraits::extract(prod.rhs());
typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
* RhsBlasTraits::extractScalarFactor(prod.rhs());
enum {
// FIXME I think here we really have to check for ei_packet_traits<Scalar>::size==1
// because in this case it is fine to have an inner stride
DirectlyUseRhs = ((ei_packet_traits<RhsScalar>::size==1) || (_ActualRhsType::Flags&ActualPacketAccessBit))
&& (!(_ActualRhsType::Flags & RowMajorBit))
// FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
// on, the other hand it is good for the cache to pack the vector anyways...
DirectlyUseRhs = _ActualRhsType::InnerStrideAtCompileTime==1
};
RhsScalar* rhs_data;
if (DirectlyUseRhs)
rhs_data = &actualRhs.const_cast_derived().coeffRef(0);
else
gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;
ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(),
DirectlyUseRhs ? const_cast<RhsScalar*>(actualRhs.data()) : static_rhs.data());
if(!DirectlyUseRhs)
{
rhs_data = ei_aligned_stack_new(RhsScalar, actualRhs.size());
Map<typename _ActualRhsType::PlainObject>(rhs_data, actualRhs.size()) = actualRhs;
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
int size = actualRhs.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
}
ei_general_matrix_vector_product
general_matrix_vector_product
<Index,LhsScalar,RowMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run(
actualLhs.rows(), actualLhs.cols(),
&actualLhs.const_cast_derived().coeffRef(0,0), actualLhs.outerStride(),
rhs_data, 1,
&actualLhs.coeffRef(0,0), actualLhs.outerStride(),
actualRhsPtr, 1,
&dest.coeffRef(0,0), dest.innerStride(),
actualAlpha);
if (!DirectlyUseRhs) ei_aligned_stack_delete(RhsScalar, rhs_data, prod.rhs().size());
}
};
template<> struct ei_gemv_selector<OnTheRight,ColMajor,false>
template<> struct gemv_selector<OnTheRight,ColMajor,false>
{
template<typename ProductType, typename Dest>
static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
@@ -452,7 +532,7 @@ template<> struct ei_gemv_selector<OnTheRight,ColMajor,false>
}
};
template<> struct ei_gemv_selector<OnTheRight,RowMajor,false>
template<> struct gemv_selector<OnTheRight,RowMajor,false>
{
template<typename ProductType, typename Dest>
static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
@@ -465,6 +545,8 @@ template<> struct ei_gemv_selector<OnTheRight,RowMajor,false>
}
};
} // end namespace internal
/***************************************************************************
* Implementation of matrix base methods
***************************************************************************/
@@ -481,7 +563,7 @@ inline const typename ProductReturnType<Derived,OtherDerived>::Type
MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
{
// A note regarding the function declaration: In MSVC, this function will sometimes
// not be inlined since ei_matrix_storage is an unwindable object for dynamic
// not be inlined since DenseStorage is an unwindable object for dynamic
// matrices and product types are holding a member to store the result.
// Thus it does not help tagging this function with EIGEN_STRONG_INLINE.
enum {
@@ -500,7 +582,7 @@ MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
#ifdef EIGEN_DEBUG_PRODUCT
ei_product_type<Derived,OtherDerived>::debug();
internal::product_type<Derived,OtherDerived>::debug();
#endif
return typename ProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
}

View File

@@ -29,29 +29,32 @@
* \ingroup Core_Module
*
*/
namespace internal {
template<typename Derived, typename _Lhs, typename _Rhs>
struct ei_traits<ProductBase<Derived,_Lhs,_Rhs> >
struct traits<ProductBase<Derived,_Lhs,_Rhs> >
{
typedef MatrixXpr XprKind;
typedef typename ei_cleantype<_Lhs>::type Lhs;
typedef typename ei_cleantype<_Rhs>::type Rhs;
typedef typename ei_scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType Scalar;
typedef typename ei_promote_storage_type<typename ei_traits<Lhs>::StorageKind,
typename ei_traits<Rhs>::StorageKind>::ret StorageKind;
typedef typename ei_promote_index_type<typename ei_traits<Lhs>::Index,
typename ei_traits<Rhs>::Index>::type Index;
typedef typename remove_all<_Lhs>::type Lhs;
typedef typename remove_all<_Rhs>::type Rhs;
typedef typename scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType Scalar;
typedef typename promote_storage_type<typename traits<Lhs>::StorageKind,
typename traits<Rhs>::StorageKind>::ret StorageKind;
typedef typename promote_index_type<typename traits<Lhs>::Index,
typename traits<Rhs>::Index>::type Index;
enum {
RowsAtCompileTime = ei_traits<Lhs>::RowsAtCompileTime,
ColsAtCompileTime = ei_traits<Rhs>::ColsAtCompileTime,
MaxRowsAtCompileTime = ei_traits<Lhs>::MaxRowsAtCompileTime,
MaxColsAtCompileTime = ei_traits<Rhs>::MaxColsAtCompileTime,
RowsAtCompileTime = traits<Lhs>::RowsAtCompileTime,
ColsAtCompileTime = traits<Rhs>::ColsAtCompileTime,
MaxRowsAtCompileTime = traits<Lhs>::MaxRowsAtCompileTime,
MaxColsAtCompileTime = traits<Rhs>::MaxColsAtCompileTime,
Flags = (MaxRowsAtCompileTime==1 ? RowMajorBit : 0)
| EvalBeforeNestingBit | EvalBeforeAssigningBit | NestByRefBit,
// Note that EvalBeforeNestingBit and NestByRefBit
// are not used in practice because ei_nested is overloaded for products
// are not used in practice because nested is overloaded for products
CoeffReadCost = 0 // FIXME why is it needed ?
};
};
}
#define EIGEN_PRODUCT_PUBLIC_INTERFACE(Derived) \
typedef ProductBase<Derived, Lhs, Rhs > Base; \
@@ -75,18 +78,20 @@ class ProductBase : public MatrixBase<Derived>
public:
typedef MatrixBase<Derived> Base;
EIGEN_DENSE_PUBLIC_INTERFACE(ProductBase)
protected:
typedef typename Lhs::Nested LhsNested;
typedef typename ei_cleantype<LhsNested>::type _LhsNested;
typedef ei_blas_traits<_LhsNested> LhsBlasTraits;
typedef typename internal::remove_all<LhsNested>::type _LhsNested;
typedef internal::blas_traits<_LhsNested> LhsBlasTraits;
typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
typedef typename ei_cleantype<ActualLhsType>::type _ActualLhsType;
typedef typename internal::remove_all<ActualLhsType>::type _ActualLhsType;
typedef typename internal::traits<Lhs>::Scalar LhsScalar;
typedef typename Rhs::Nested RhsNested;
typedef typename ei_cleantype<RhsNested>::type _RhsNested;
typedef ei_blas_traits<_RhsNested> RhsBlasTraits;
typedef typename internal::remove_all<RhsNested>::type _RhsNested;
typedef internal::blas_traits<_RhsNested> RhsBlasTraits;
typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
typedef typename ei_cleantype<ActualRhsType>::type _ActualRhsType;
typedef typename internal::remove_all<ActualRhsType>::type _ActualRhsType;
typedef typename internal::traits<Rhs>::Scalar RhsScalar;
// Diagonal of a product: no need to evaluate the arguments because they are going to be evaluated only once
typedef CoeffBasedProduct<LhsNested, RhsNested, 0> FullyLazyCoeffBaseProductType;
@@ -98,7 +103,7 @@ class ProductBase : public MatrixBase<Derived>
ProductBase(const Lhs& lhs, const Rhs& rhs)
: m_lhs(lhs), m_rhs(rhs)
{
ei_assert(lhs.cols() == rhs.rows()
eigen_assert(lhs.cols() == rhs.rows()
&& "invalid matrix product"
&& "if you wanted a coeff-wise or a dot product use the respective explicit functions");
}
@@ -129,7 +134,7 @@ class ProductBase : public MatrixBase<Derived>
return m_result;
}
const Diagonal<FullyLazyCoeffBaseProductType,0> diagonal() const
const Diagonal<const FullyLazyCoeffBaseProductType,0> diagonal() const
{ return FullyLazyCoeffBaseProductType(m_lhs, m_rhs); }
template<int Index>
@@ -139,29 +144,58 @@ class ProductBase : public MatrixBase<Derived>
const Diagonal<FullyLazyCoeffBaseProductType,Dynamic> diagonal(Index index) const
{ return FullyLazyCoeffBaseProductType(m_lhs, m_rhs).diagonal(index); }
// restrict coeff accessors to 1x1 expressions. No need to care about mutators here since this isnt a Lvalue expression
typename Base::CoeffReturnType coeff(Index row, Index col) const
{
#ifdef EIGEN2_SUPPORT
return lhs().row(row).cwiseProduct(rhs().col(col).transpose()).sum();
#else
EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)
eigen_assert(this->rows() == 1 && this->cols() == 1);
Matrix<Scalar,1,1> result = *this;
return result.coeff(row,col);
#endif
}
typename Base::CoeffReturnType coeff(Index i) const
{
EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)
eigen_assert(this->rows() == 1 && this->cols() == 1);
Matrix<Scalar,1,1> result = *this;
return result.coeff(i);
}
const Scalar& coeffRef(Index row, Index col) const
{
EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)
eigen_assert(this->rows() == 1 && this->cols() == 1);
return derived().coeffRef(row,col);
}
const Scalar& coeffRef(Index i) const
{
EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)
eigen_assert(this->rows() == 1 && this->cols() == 1);
return derived().coeffRef(i);
}
protected:
const LhsNested m_lhs;
const RhsNested m_rhs;
mutable PlainObject m_result;
private:
// discard coeff methods
void coeff(Index,Index) const;
void coeffRef(Index,Index);
void coeff(Index) const;
void coeffRef(Index);
};
// here we need to overload the nested rule for products
// such that the nested type is a const reference to a plain matrix
namespace internal {
template<typename Lhs, typename Rhs, int Mode, int N, typename PlainObject>
struct ei_nested<GeneralProduct<Lhs,Rhs,Mode>, N, PlainObject>
struct nested<GeneralProduct<Lhs,Rhs,Mode>, N, PlainObject>
{
typedef PlainObject const& type;
};
}
template<typename NestedProduct>
class ScaledProduct;
@@ -178,7 +212,7 @@ operator*(const ProductBase<Derived,Lhs,Rhs>& prod, typename Derived::Scalar x)
{ return ScaledProduct<Derived>(prod.derived(), x); }
template<typename Derived,typename Lhs,typename Rhs>
typename ei_enable_if<!ei_is_same_type<typename Derived::Scalar,typename Derived::RealScalar>::ret,
typename internal::enable_if<!internal::is_same<typename Derived::Scalar,typename Derived::RealScalar>::value,
const ScaledProduct<Derived> >::type
operator*(const ProductBase<Derived,Lhs,Rhs>& prod, typename Derived::RealScalar x)
{ return ScaledProduct<Derived>(prod.derived(), x); }
@@ -190,20 +224,21 @@ operator*(typename Derived::Scalar x,const ProductBase<Derived,Lhs,Rhs>& prod)
{ return ScaledProduct<Derived>(prod.derived(), x); }
template<typename Derived,typename Lhs,typename Rhs>
typename ei_enable_if<!ei_is_same_type<typename Derived::Scalar,typename Derived::RealScalar>::ret,
typename internal::enable_if<!internal::is_same<typename Derived::Scalar,typename Derived::RealScalar>::value,
const ScaledProduct<Derived> >::type
operator*(typename Derived::RealScalar x,const ProductBase<Derived,Lhs,Rhs>& prod)
{ return ScaledProduct<Derived>(prod.derived(), x); }
namespace internal {
template<typename NestedProduct>
struct ei_traits<ScaledProduct<NestedProduct> >
: ei_traits<ProductBase<ScaledProduct<NestedProduct>,
struct traits<ScaledProduct<NestedProduct> >
: traits<ProductBase<ScaledProduct<NestedProduct>,
typename NestedProduct::_LhsNested,
typename NestedProduct::_RhsNested> >
{
typedef typename ei_traits<NestedProduct>::StorageKind StorageKind;
typedef typename traits<NestedProduct>::StorageKind StorageKind;
};
}
template<typename NestedProduct>
class ScaledProduct
@@ -223,16 +258,18 @@ class ScaledProduct
: Base(prod.lhs(),prod.rhs()), m_prod(prod), m_alpha(x) {}
template<typename Dest>
inline void evalTo(Dest& dst) const { dst.setZero(); scaleAndAddTo(dst,m_alpha); }
inline void evalTo(Dest& dst) const { dst.setZero(); scaleAndAddTo(dst, Scalar(1)); }
template<typename Dest>
inline void addTo(Dest& dst) const { scaleAndAddTo(dst,m_alpha); }
inline void addTo(Dest& dst) const { scaleAndAddTo(dst, Scalar(1)); }
template<typename Dest>
inline void subTo(Dest& dst) const { scaleAndAddTo(dst,-m_alpha); }
inline void subTo(Dest& dst) const { scaleAndAddTo(dst, Scalar(-1)); }
template<typename Dest>
inline void scaleAndAddTo(Dest& dst,Scalar alpha) const { m_prod.derived().scaleAndAddTo(dst,alpha); }
inline void scaleAndAddTo(Dest& dst,Scalar alpha) const { m_prod.derived().scaleAndAddTo(dst,alpha * m_alpha); }
const Scalar& alpha() const { return m_alpha; }
protected:
const NestedProduct& m_prod;

View File

@@ -25,15 +25,20 @@
#ifndef EIGEN_RANDOM_H
#define EIGEN_RANDOM_H
template<typename Scalar> struct ei_scalar_random_op {
EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_random_op)
namespace internal {
template<typename Scalar> struct scalar_random_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_random_op)
template<typename Index>
inline const Scalar operator() (Index, Index = 0) const { return ei_random<Scalar>(); }
inline const Scalar operator() (Index, Index = 0) const { return random<Scalar>(); }
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_random_op<Scalar> >
struct functor_traits<scalar_random_op<Scalar> >
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false, IsRepeatable = false }; };
} // end namespace internal
/** \returns a random matrix expression
*
* The parameters \a rows and \a cols are the number of rows and of columns of
@@ -53,10 +58,10 @@ struct ei_functor_traits<ei_scalar_random_op<Scalar> >
* \sa MatrixBase::setRandom(), MatrixBase::Random(Index), MatrixBase::Random()
*/
template<typename Derived>
inline const CwiseNullaryOp<ei_scalar_random_op<typename ei_traits<Derived>::Scalar>, Derived>
inline const CwiseNullaryOp<internal::scalar_random_op<typename internal::traits<Derived>::Scalar>, Derived>
DenseBase<Derived>::Random(Index rows, Index cols)
{
return NullaryExpr(rows, cols, ei_scalar_random_op<Scalar>());
return NullaryExpr(rows, cols, internal::scalar_random_op<Scalar>());
}
/** \returns a random vector expression
@@ -80,10 +85,10 @@ DenseBase<Derived>::Random(Index rows, Index cols)
* \sa MatrixBase::setRandom(), MatrixBase::Random(Index,Index), MatrixBase::Random()
*/
template<typename Derived>
inline const CwiseNullaryOp<ei_scalar_random_op<typename ei_traits<Derived>::Scalar>, Derived>
inline const CwiseNullaryOp<internal::scalar_random_op<typename internal::traits<Derived>::Scalar>, Derived>
DenseBase<Derived>::Random(Index size)
{
return NullaryExpr(size, ei_scalar_random_op<Scalar>());
return NullaryExpr(size, internal::scalar_random_op<Scalar>());
}
/** \returns a fixed-size random matrix or vector expression
@@ -101,10 +106,10 @@ DenseBase<Derived>::Random(Index size)
* \sa MatrixBase::setRandom(), MatrixBase::Random(Index,Index), MatrixBase::Random(Index)
*/
template<typename Derived>
inline const CwiseNullaryOp<ei_scalar_random_op<typename ei_traits<Derived>::Scalar>, Derived>
inline const CwiseNullaryOp<internal::scalar_random_op<typename internal::traits<Derived>::Scalar>, Derived>
DenseBase<Derived>::Random()
{
return NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, ei_scalar_random_op<Scalar>());
return NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_random_op<Scalar>());
}
/** Sets all coefficients in this expression to random values.
@@ -131,7 +136,7 @@ inline Derived& DenseBase<Derived>::setRandom()
*/
template<typename Derived>
EIGEN_STRONG_INLINE Derived&
DenseStorageBase<Derived>::setRandom(Index size)
PlainObjectBase<Derived>::setRandom(Index size)
{
resize(size);
return setRandom();
@@ -149,7 +154,7 @@ DenseStorageBase<Derived>::setRandom(Index size)
*/
template<typename Derived>
EIGEN_STRONG_INLINE Derived&
DenseStorageBase<Derived>::setRandom(Index rows, Index cols)
PlainObjectBase<Derived>::setRandom(Index rows, Index cols)
{
resize(rows, cols);
return setRandom();

View File

@@ -26,6 +26,8 @@
#ifndef EIGEN_REDUX_H
#define EIGEN_REDUX_H
namespace internal {
// TODO
// * implement other kind of vectorization
// * factorize code
@@ -35,11 +37,11 @@
***************************************************************************/
template<typename Func, typename Derived>
struct ei_redux_traits
struct redux_traits
{
public:
enum {
PacketSize = ei_packet_traits<typename Derived::Scalar>::size,
PacketSize = packet_traits<typename Derived::Scalar>::size,
InnerMaxSize = int(Derived::IsRowMajor)
? Derived::MaxColsAtCompileTime
: Derived::MaxRowsAtCompileTime
@@ -47,7 +49,7 @@ public:
enum {
MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit)
&& (ei_functor_traits<Func>::PacketAccess),
&& (functor_traits<Func>::PacketAccess),
MayLinearVectorize = MightVectorize && (int(Derived::Flags)&LinearAccessBit),
MaySliceVectorize = MightVectorize && int(InnerMaxSize)>=3*PacketSize
};
@@ -63,10 +65,10 @@ public:
enum {
Cost = ( Derived::SizeAtCompileTime == Dynamic
|| Derived::CoeffReadCost == Dynamic
|| (Derived::SizeAtCompileTime!=1 && ei_functor_traits<Func>::Cost == Dynamic)
|| (Derived::SizeAtCompileTime!=1 && functor_traits<Func>::Cost == Dynamic)
) ? Dynamic
: Derived::SizeAtCompileTime * Derived::CoeffReadCost
+ (Derived::SizeAtCompileTime-1) * ei_functor_traits<Func>::Cost,
+ (Derived::SizeAtCompileTime-1) * functor_traits<Func>::Cost,
UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
};
@@ -85,7 +87,7 @@ public:
/*** no vectorization ***/
template<typename Func, typename Derived, int Start, int Length>
struct ei_redux_novec_unroller
struct redux_novec_unroller
{
enum {
HalfLength = Length/2
@@ -95,13 +97,13 @@ struct ei_redux_novec_unroller
EIGEN_STRONG_INLINE static Scalar run(const Derived &mat, const Func& func)
{
return func(ei_redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
ei_redux_novec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func));
return func(redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
redux_novec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func));
}
};
template<typename Func, typename Derived, int Start>
struct ei_redux_novec_unroller<Func, Derived, Start, 1>
struct redux_novec_unroller<Func, Derived, Start, 1>
{
enum {
outer = Start / Derived::InnerSizeAtCompileTime,
@@ -120,7 +122,7 @@ struct ei_redux_novec_unroller<Func, Derived, Start, 1>
// to prevent false warnings regarding failed inlining though
// for 0 length run() will never be called at all.
template<typename Func, typename Derived, int Start>
struct ei_redux_novec_unroller<Func, Derived, Start, 0>
struct redux_novec_unroller<Func, Derived, Start, 0>
{
typedef typename Derived::Scalar Scalar;
EIGEN_STRONG_INLINE static Scalar run(const Derived&, const Func&) { return Scalar(); }
@@ -129,36 +131,36 @@ struct ei_redux_novec_unroller<Func, Derived, Start, 0>
/*** vectorization ***/
template<typename Func, typename Derived, int Start, int Length>
struct ei_redux_vec_unroller
struct redux_vec_unroller
{
enum {
PacketSize = ei_packet_traits<typename Derived::Scalar>::size,
PacketSize = packet_traits<typename Derived::Scalar>::size,
HalfLength = Length/2
};
typedef typename Derived::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
typedef typename packet_traits<Scalar>::type PacketScalar;
EIGEN_STRONG_INLINE static PacketScalar run(const Derived &mat, const Func& func)
{
return func.packetOp(
ei_redux_vec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
ei_redux_vec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func) );
redux_vec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
redux_vec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func) );
}
};
template<typename Func, typename Derived, int Start>
struct ei_redux_vec_unroller<Func, Derived, Start, 1>
struct redux_vec_unroller<Func, Derived, Start, 1>
{
enum {
index = Start * ei_packet_traits<typename Derived::Scalar>::size,
index = Start * packet_traits<typename Derived::Scalar>::size,
outer = index / int(Derived::InnerSizeAtCompileTime),
inner = index % int(Derived::InnerSizeAtCompileTime),
alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned
};
typedef typename Derived::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
typedef typename packet_traits<Scalar>::type PacketScalar;
EIGEN_STRONG_INLINE static PacketScalar run(const Derived &mat, const Func&)
{
@@ -171,19 +173,19 @@ struct ei_redux_vec_unroller<Func, Derived, Start, 1>
***************************************************************************/
template<typename Func, typename Derived,
int Traversal = ei_redux_traits<Func, Derived>::Traversal,
int Unrolling = ei_redux_traits<Func, Derived>::Unrolling
int Traversal = redux_traits<Func, Derived>::Traversal,
int Unrolling = redux_traits<Func, Derived>::Unrolling
>
struct ei_redux_impl;
struct redux_impl;
template<typename Func, typename Derived>
struct ei_redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>
struct redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>
{
typedef typename Derived::Scalar Scalar;
typedef typename Derived::Index Index;
static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func)
{
ei_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
Scalar res;
res = mat.coeffByOuterInner(0, 0);
for(Index i = 1; i < mat.innerSize(); ++i)
@@ -196,25 +198,25 @@ struct ei_redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>
};
template<typename Func, typename Derived>
struct ei_redux_impl<Func,Derived, DefaultTraversal, CompleteUnrolling>
: public ei_redux_novec_unroller<Func,Derived, 0, Derived::SizeAtCompileTime>
struct redux_impl<Func,Derived, DefaultTraversal, CompleteUnrolling>
: public redux_novec_unroller<Func,Derived, 0, Derived::SizeAtCompileTime>
{};
template<typename Func, typename Derived>
struct ei_redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
{
typedef typename Derived::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
typedef typename packet_traits<Scalar>::type PacketScalar;
typedef typename Derived::Index Index;
static Scalar run(const Derived& mat, const Func& func)
{
const Index size = mat.size();
ei_assert(size && "you are using an empty matrix");
const Index packetSize = ei_packet_traits<Scalar>::size;
const Index alignedStart = ei_first_aligned(mat);
eigen_assert(size && "you are using an empty matrix");
const Index packetSize = packet_traits<Scalar>::size;
const Index alignedStart = first_aligned(mat);
enum {
alignment = (Derived::Flags & DirectAccessBit) || (Derived::Flags & AlignedBit)
alignment = bool(Derived::Flags & DirectAccessBit) || bool(Derived::Flags & AlignedBit)
? Aligned : Unaligned
};
const Index alignedSize = ((size-alignedStart)/packetSize)*packetSize;
@@ -246,19 +248,19 @@ struct ei_redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
};
template<typename Func, typename Derived>
struct ei_redux_impl<Func, Derived, SliceVectorizedTraversal, NoUnrolling>
struct redux_impl<Func, Derived, SliceVectorizedTraversal, NoUnrolling>
{
typedef typename Derived::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
typedef typename packet_traits<Scalar>::type PacketScalar;
typedef typename Derived::Index Index;
static Scalar run(const Derived& mat, const Func& func)
{
ei_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
const Index innerSize = mat.innerSize();
const Index outerSize = mat.outerSize();
enum {
packetSize = ei_packet_traits<Scalar>::size
packetSize = packet_traits<Scalar>::size
};
const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize;
Scalar res;
@@ -277,7 +279,7 @@ struct ei_redux_impl<Func, Derived, SliceVectorizedTraversal, NoUnrolling>
else // too small to vectorize anything.
// since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
{
res = ei_redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>::run(mat, func);
res = redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>::run(mat, func);
}
return res;
@@ -285,25 +287,31 @@ struct ei_redux_impl<Func, Derived, SliceVectorizedTraversal, NoUnrolling>
};
template<typename Func, typename Derived>
struct ei_redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
{
typedef typename Derived::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
typedef typename packet_traits<Scalar>::type PacketScalar;
enum {
PacketSize = ei_packet_traits<Scalar>::size,
PacketSize = packet_traits<Scalar>::size,
Size = Derived::SizeAtCompileTime,
VectorizedSize = (Size / PacketSize) * PacketSize
};
EIGEN_STRONG_INLINE static Scalar run(const Derived& mat, const Func& func)
{
ei_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
Scalar res = func.predux(ei_redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));
eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));
if (VectorizedSize != Size)
res = func(res,ei_redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func));
res = func(res,redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func));
return res;
}
};
} // end namespace internal
/***************************************************************************
* Part 4 : public API
***************************************************************************/
/** \returns the result of a full redux operation on the whole matrix or vector using \a func
*
@@ -314,30 +322,30 @@ struct ei_redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling
*/
template<typename Derived>
template<typename Func>
EIGEN_STRONG_INLINE typename ei_result_of<Func(typename ei_traits<Derived>::Scalar)>::type
EIGEN_STRONG_INLINE typename internal::result_of<Func(typename internal::traits<Derived>::Scalar)>::type
DenseBase<Derived>::redux(const Func& func) const
{
typedef typename ei_cleantype<typename Derived::Nested>::type ThisNested;
return ei_redux_impl<Func, ThisNested>
typedef typename internal::remove_all<typename Derived::Nested>::type ThisNested;
return internal::redux_impl<Func, ThisNested>
::run(derived(), func);
}
/** \returns the minimum of all coefficients of *this
*/
template<typename Derived>
EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
DenseBase<Derived>::minCoeff() const
{
return this->redux(Eigen::ei_scalar_min_op<Scalar>());
return this->redux(Eigen::internal::scalar_min_op<Scalar>());
}
/** \returns the maximum of all coefficients of *this
*/
template<typename Derived>
EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
DenseBase<Derived>::maxCoeff() const
{
return this->redux(Eigen::ei_scalar_max_op<Scalar>());
return this->redux(Eigen::internal::scalar_max_op<Scalar>());
}
/** \returns the sum of all coefficients of *this
@@ -345,12 +353,12 @@ DenseBase<Derived>::maxCoeff() const
* \sa trace(), prod(), mean()
*/
template<typename Derived>
EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
DenseBase<Derived>::sum() const
{
if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
return Scalar(0);
return this->redux(Eigen::ei_scalar_sum_op<Scalar>());
return this->redux(Eigen::internal::scalar_sum_op<Scalar>());
}
/** \returns the mean of all coefficients of *this
@@ -358,10 +366,10 @@ DenseBase<Derived>::sum() const
* \sa trace(), prod(), sum()
*/
template<typename Derived>
EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
DenseBase<Derived>::mean() const
{
return Scalar(this->redux(Eigen::ei_scalar_sum_op<Scalar>())) / Scalar(this->size());
return Scalar(this->redux(Eigen::internal::scalar_sum_op<Scalar>())) / Scalar(this->size());
}
/** \returns the product of all coefficients of *this
@@ -372,12 +380,12 @@ DenseBase<Derived>::mean() const
* \sa sum(), mean(), trace()
*/
template<typename Derived>
EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
DenseBase<Derived>::prod() const
{
if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
return Scalar(1);
return this->redux(Eigen::ei_scalar_product_op<Scalar>());
return this->redux(Eigen::internal::scalar_product_op<Scalar>());
}
/** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal.
@@ -387,7 +395,7 @@ DenseBase<Derived>::prod() const
* \sa diagonal(), sum()
*/
template<typename Derived>
EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
MatrixBase<Derived>::trace() const
{
return derived().diagonal().sum();

View File

@@ -39,15 +39,20 @@
*
* \sa DenseBase::replicate()
*/
namespace internal {
template<typename MatrixType,int RowFactor,int ColFactor>
struct ei_traits<Replicate<MatrixType,RowFactor,ColFactor> >
: ei_traits<MatrixType>
struct traits<Replicate<MatrixType,RowFactor,ColFactor> >
: traits<MatrixType>
{
typedef typename MatrixType::Scalar Scalar;
typedef typename ei_traits<MatrixType>::StorageKind StorageKind;
typedef typename ei_traits<MatrixType>::XprKind XprKind;
typedef typename ei_nested<MatrixType>::type MatrixTypeNested;
typedef typename ei_unref<MatrixTypeNested>::type _MatrixTypeNested;
typedef typename traits<MatrixType>::StorageKind StorageKind;
typedef typename traits<MatrixType>::XprKind XprKind;
enum {
Factor = (RowFactor==Dynamic || ColFactor==Dynamic) ? Dynamic : RowFactor*ColFactor
};
typedef typename nested<MatrixType,Factor>::type MatrixTypeNested;
typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;
enum {
RowsAtCompileTime = RowFactor==Dynamic || int(MatrixType::RowsAtCompileTime)==Dynamic
? Dynamic
@@ -65,29 +70,32 @@ struct ei_traits<Replicate<MatrixType,RowFactor,ColFactor> >
CoeffReadCost = _MatrixTypeNested::CoeffReadCost
};
};
}
template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
: public ei_dense_xpr_base< Replicate<MatrixType,RowFactor,ColFactor> >::type
: public internal::dense_xpr_base< Replicate<MatrixType,RowFactor,ColFactor> >::type
{
typedef typename internal::traits<Replicate>::MatrixTypeNested MatrixTypeNested;
typedef typename internal::traits<Replicate>::_MatrixTypeNested _MatrixTypeNested;
public:
typedef typename ei_dense_xpr_base<Replicate>::type Base;
typedef typename internal::dense_xpr_base<Replicate>::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(Replicate)
template<typename OriginalMatrixType>
inline explicit Replicate(const OriginalMatrixType& matrix)
: m_matrix(matrix), m_rowFactor(RowFactor), m_colFactor(ColFactor)
{
EIGEN_STATIC_ASSERT((ei_is_same_type<MatrixType,OriginalMatrixType>::ret),
EIGEN_STATIC_ASSERT((internal::is_same<typename internal::remove_const<MatrixType>::type,OriginalMatrixType>::value),
THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE)
ei_assert(RowFactor!=Dynamic && ColFactor!=Dynamic);
eigen_assert(RowFactor!=Dynamic && ColFactor!=Dynamic);
}
template<typename OriginalMatrixType>
inline Replicate(const OriginalMatrixType& matrix, int rowFactor, int colFactor)
: m_matrix(matrix), m_rowFactor(rowFactor), m_colFactor(colFactor)
{
EIGEN_STATIC_ASSERT((ei_is_same_type<MatrixType,OriginalMatrixType>::ret),
EIGEN_STATIC_ASSERT((internal::is_same<typename internal::remove_const<MatrixType>::type,OriginalMatrixType>::value),
THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE)
}
@@ -97,10 +105,10 @@ template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
inline Scalar coeff(Index row, Index col) const
{
// try to avoid using modulo; this is a pure optimization strategy
const Index actual_row = ei_traits<MatrixType>::RowsAtCompileTime==1 ? 0
const Index actual_row = internal::traits<MatrixType>::RowsAtCompileTime==1 ? 0
: RowFactor==1 ? row
: row%m_matrix.rows();
const Index actual_col = ei_traits<MatrixType>::ColsAtCompileTime==1 ? 0
const Index actual_col = internal::traits<MatrixType>::ColsAtCompileTime==1 ? 0
: ColFactor==1 ? col
: col%m_matrix.cols();
@@ -109,10 +117,10 @@ template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
template<int LoadMode>
inline PacketScalar packet(Index row, Index col) const
{
const Index actual_row = ei_traits<MatrixType>::RowsAtCompileTime==1 ? 0
const Index actual_row = internal::traits<MatrixType>::RowsAtCompileTime==1 ? 0
: RowFactor==1 ? row
: row%m_matrix.rows();
const Index actual_col = ei_traits<MatrixType>::ColsAtCompileTime==1 ? 0
const Index actual_col = internal::traits<MatrixType>::ColsAtCompileTime==1 ? 0
: ColFactor==1 ? col
: col%m_matrix.cols();
@@ -121,9 +129,9 @@ template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
protected:
const typename MatrixType::Nested m_matrix;
const ei_variable_if_dynamic<Index, RowFactor> m_rowFactor;
const ei_variable_if_dynamic<Index, ColFactor> m_colFactor;
const MatrixTypeNested m_matrix;
const internal::variable_if_dynamic<Index, RowFactor> m_rowFactor;
const internal::variable_if_dynamic<Index, ColFactor> m_colFactor;
};
/**

View File

@@ -30,43 +30,50 @@
* \ingroup Core_Module
*
*/
namespace internal {
template<typename Derived>
struct ei_traits<ReturnByValue<Derived> >
: public ei_traits<typename ei_traits<Derived>::ReturnType>
struct traits<ReturnByValue<Derived> >
: public traits<typename traits<Derived>::ReturnType>
{
enum {
// We're disabling the DirectAccess because e.g. the constructor of
// the Block-with-DirectAccess expression requires to have a coeffRef method.
// Also, we don't want to have to implement the stride stuff.
Flags = (ei_traits<typename ei_traits<Derived>::ReturnType>::Flags
Flags = (traits<typename traits<Derived>::ReturnType>::Flags
| EvalBeforeNestingBit) & ~DirectAccessBit
};
};
/* The ReturnByValue object doesn't even have a coeff() method.
* So the only way that nesting it in an expression can work, is by evaluating it into a plain matrix.
* So ei_nested always gives the plain return matrix type.
* So internal::nested always gives the plain return matrix type.
*
* FIXME: I don't understand why we need this specialization: isn't this taken care of by the EvalBeforeNestingBit ??
*/
template<typename Derived,int n,typename PlainObject>
struct ei_nested<ReturnByValue<Derived>, n, PlainObject>
struct nested<ReturnByValue<Derived>, n, PlainObject>
{
typedef typename ei_traits<Derived>::ReturnType type;
typedef typename traits<Derived>::ReturnType type;
};
} // end namespace internal
template<typename Derived> class ReturnByValue
: public ei_dense_xpr_base< ReturnByValue<Derived> >::type
: public internal::dense_xpr_base< ReturnByValue<Derived> >::type
{
public:
typedef typename ei_traits<Derived>::ReturnType ReturnType;
typedef typename internal::traits<Derived>::ReturnType ReturnType;
typedef typename ei_dense_xpr_base<ReturnByValue>::type Base;
typedef typename internal::dense_xpr_base<ReturnByValue>::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(ReturnByValue)
template<typename Dest>
inline void evalTo(Dest& dst) const
{ static_cast<const Derived* const>(this)->evalTo(dst); }
inline Index rows() const { return static_cast<const Derived* const>(this)->rows(); }
inline Index cols() const { return static_cast<const Derived* const>(this)->cols(); }
{ static_cast<const Derived*>(this)->evalTo(dst); }
inline Index rows() const { return static_cast<const Derived*>(this)->rows(); }
inline Index cols() const { return static_cast<const Derived*>(this)->cols(); }
#ifndef EIGEN_PARSED_BY_DOXYGEN
#define Unusable YOU_ARE_TRYING_TO_ACCESS_A_SINGLE_COEFFICIENT_IN_A_SPECIAL_EXPRESSION_WHERE_THAT_IS_NOT_ALLOWED_BECAUSE_THAT_WOULD_BE_INEFFICIENT

View File

@@ -40,15 +40,18 @@
*
* \sa MatrixBase::reverse(), VectorwiseOp::reverse()
*/
namespace internal {
template<typename MatrixType, int Direction>
struct ei_traits<Reverse<MatrixType, Direction> >
: ei_traits<MatrixType>
struct traits<Reverse<MatrixType, Direction> >
: traits<MatrixType>
{
typedef typename MatrixType::Scalar Scalar;
typedef typename ei_traits<MatrixType>::StorageKind StorageKind;
typedef typename ei_traits<MatrixType>::XprKind XprKind;
typedef typename ei_nested<MatrixType>::type MatrixTypeNested;
typedef typename ei_unref<MatrixTypeNested>::type _MatrixTypeNested;
typedef typename traits<MatrixType>::StorageKind StorageKind;
typedef typename traits<MatrixType>::XprKind XprKind;
typedef typename nested<MatrixType>::type MatrixTypeNested;
typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;
enum {
RowsAtCompileTime = MatrixType::RowsAtCompileTime,
ColsAtCompileTime = MatrixType::ColsAtCompileTime,
@@ -65,21 +68,24 @@ struct ei_traits<Reverse<MatrixType, Direction> >
};
};
template<typename PacketScalar, bool ReversePacket> struct ei_reverse_packet_cond
template<typename PacketScalar, bool ReversePacket> struct reverse_packet_cond
{
static inline PacketScalar run(const PacketScalar& x) { return ei_preverse(x); }
static inline PacketScalar run(const PacketScalar& x) { return preverse(x); }
};
template<typename PacketScalar> struct ei_reverse_packet_cond<PacketScalar,false>
template<typename PacketScalar> struct reverse_packet_cond<PacketScalar,false>
{
static inline PacketScalar run(const PacketScalar& x) { return x; }
};
} // end namespace internal
template<typename MatrixType, int Direction> class Reverse
: public ei_dense_xpr_base< Reverse<MatrixType, Direction> >::type
: public internal::dense_xpr_base< Reverse<MatrixType, Direction> >::type
{
public:
typedef typename ei_dense_xpr_base<Reverse>::type Base;
typedef typename internal::dense_xpr_base<Reverse>::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(Reverse)
using Base::IsRowMajor;
@@ -89,7 +95,7 @@ template<typename MatrixType, int Direction> class Reverse
protected:
enum {
PacketSize = ei_packet_traits<Scalar>::size,
PacketSize = internal::packet_traits<Scalar>::size,
IsColMajor = !IsRowMajor,
ReverseRow = (Direction == Vertical) || (Direction == BothDirections),
ReverseCol = (Direction == Horizontal) || (Direction == BothDirections),
@@ -99,7 +105,7 @@ template<typename MatrixType, int Direction> class Reverse
|| ((Direction == Vertical) && IsColMajor)
|| ((Direction == Horizontal) && IsRowMajor)
};
typedef ei_reverse_packet_cond<PacketScalar,ReversePacket> reverse_packet;
typedef internal::reverse_packet_cond<PacketScalar,ReversePacket> reverse_packet;
public:
inline Reverse(const MatrixType& matrix) : m_matrix(matrix) { }
@@ -116,7 +122,7 @@ template<typename MatrixType, int Direction> class Reverse
inline Scalar& operator()(Index row, Index col)
{
ei_assert(row >= 0 && row < rows() && col >= 0 && col < cols());
eigen_assert(row >= 0 && row < rows() && col >= 0 && col < cols());
return coeffRef(row, col);
}
@@ -144,7 +150,7 @@ template<typename MatrixType, int Direction> class Reverse
inline Scalar& operator()(Index index)
{
ei_assert(index >= 0 && index < m_matrix.size());
eigen_assert(index >= 0 && index < m_matrix.size());
return coeffRef(index);
}
@@ -168,13 +174,13 @@ template<typename MatrixType, int Direction> class Reverse
template<int LoadMode>
inline const PacketScalar packet(Index index) const
{
return ei_preverse(m_matrix.template packet<LoadMode>( m_matrix.size() - index - PacketSize ));
return internal::preverse(m_matrix.template packet<LoadMode>( m_matrix.size() - index - PacketSize ));
}
template<int LoadMode>
inline void writePacket(Index index, const PacketScalar& x)
{
m_matrix.const_cast_derived().template writePacket<LoadMode>(m_matrix.size() - index - PacketSize, ei_preverse(x));
m_matrix.const_cast_derived().template writePacket<LoadMode>(m_matrix.size() - index - PacketSize, internal::preverse(x));
}
protected:
@@ -188,7 +194,7 @@ template<typename MatrixType, int Direction> class Reverse
*
*/
template<typename Derived>
inline Reverse<Derived, BothDirections>
inline typename DenseBase<Derived>::ReverseReturnType
DenseBase<Derived>::reverse()
{
return derived();
@@ -196,7 +202,7 @@ DenseBase<Derived>::reverse()
/** This is the const version of reverse(). */
template<typename Derived>
inline const Reverse<Derived, BothDirections>
inline const typename DenseBase<Derived>::ConstReverseReturnType
DenseBase<Derived>::reverse() const
{
return derived();
@@ -210,7 +216,7 @@ DenseBase<Derived>::reverse() const
* the following additional features:
* - less error prone: doing the same operation with .reverse() requires special care:
* \code m = m.reverse().eval(); \endcode
* - no temporary object is created (currently there is one created but could be avoided using swap)
* - this API allows to avoid creating a temporary (the current implementation creates a temporary, but that could be avoided using swap)
* - it allows future optimizations (cache friendliness, etc.)
*
* \sa reverse() */

View File

@@ -40,13 +40,14 @@
* \sa DenseBase::select(const DenseBase<ThenDerived>&, const DenseBase<ElseDerived>&) const
*/
namespace internal {
template<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType>
struct ei_traits<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
: ei_traits<ThenMatrixType>
struct traits<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
: traits<ThenMatrixType>
{
typedef typename ei_traits<ThenMatrixType>::Scalar Scalar;
typedef typename traits<ThenMatrixType>::Scalar Scalar;
typedef Dense StorageKind;
typedef typename ei_traits<ThenMatrixType>::XprKind XprKind;
typedef typename traits<ThenMatrixType>::XprKind XprKind;
typedef typename ConditionMatrixType::Nested ConditionMatrixNested;
typedef typename ThenMatrixType::Nested ThenMatrixNested;
typedef typename ElseMatrixType::Nested ElseMatrixNested;
@@ -56,19 +57,20 @@ struct ei_traits<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
MaxRowsAtCompileTime = ConditionMatrixType::MaxRowsAtCompileTime,
MaxColsAtCompileTime = ConditionMatrixType::MaxColsAtCompileTime,
Flags = (unsigned int)ThenMatrixType::Flags & ElseMatrixType::Flags & HereditaryBits,
CoeffReadCost = ei_traits<typename ei_cleantype<ConditionMatrixNested>::type>::CoeffReadCost
+ EIGEN_SIZE_MAX(ei_traits<typename ei_cleantype<ThenMatrixNested>::type>::CoeffReadCost,
ei_traits<typename ei_cleantype<ElseMatrixNested>::type>::CoeffReadCost)
CoeffReadCost = traits<typename remove_all<ConditionMatrixNested>::type>::CoeffReadCost
+ EIGEN_SIZE_MAX(traits<typename remove_all<ThenMatrixNested>::type>::CoeffReadCost,
traits<typename remove_all<ElseMatrixNested>::type>::CoeffReadCost)
};
};
}
template<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType>
class Select : ei_no_assignment_operator,
public ei_dense_xpr_base< Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >::type
class Select : internal::no_assignment_operator,
public internal::dense_xpr_base< Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >::type
{
public:
typedef typename ei_dense_xpr_base<Select>::type Base;
typedef typename internal::dense_xpr_base<Select>::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(Select)
Select(const ConditionMatrixType& conditionMatrix,
@@ -76,8 +78,8 @@ class Select : ei_no_assignment_operator,
const ElseMatrixType& elseMatrix)
: m_condition(conditionMatrix), m_then(thenMatrix), m_else(elseMatrix)
{
ei_assert(m_condition.rows() == m_then.rows() && m_condition.rows() == m_else.rows());
ei_assert(m_condition.cols() == m_then.cols() && m_condition.cols() == m_else.cols());
eigen_assert(m_condition.rows() == m_then.rows() && m_condition.rows() == m_else.rows());
eigen_assert(m_condition.cols() == m_then.cols() && m_condition.cols() == m_else.cols());
}
Index rows() const { return m_condition.rows(); }

View File

@@ -32,27 +32,31 @@
* \brief Expression of a selfadjoint matrix from a triangular part of a dense matrix
*
* \param MatrixType the type of the dense matrix storing the coefficients
* \param TriangularPart can be either \c Lower or \c Upper
* \param TriangularPart can be either \c #Lower or \c #Upper
*
* This class is an expression of a sefladjoint matrix from a triangular part of a matrix
* with given dense storage of the coefficients. It is the return type of MatrixBase::selfadjointView()
* and most of the time this is the only way that it is used.
*
* \sa class TriangularBase, MatrixBase::selfAdjointView()
* \sa class TriangularBase, MatrixBase::selfadjointView()
*/
namespace internal {
template<typename MatrixType, unsigned int UpLo>
struct ei_traits<SelfAdjointView<MatrixType, UpLo> > : ei_traits<MatrixType>
struct traits<SelfAdjointView<MatrixType, UpLo> > : traits<MatrixType>
{
typedef typename ei_nested<MatrixType>::type MatrixTypeNested;
typedef typename ei_unref<MatrixTypeNested>::type _MatrixTypeNested;
typedef typename nested<MatrixType>::type MatrixTypeNested;
typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
typedef MatrixType ExpressionType;
typedef typename MatrixType::PlainObject DenseMatrixType;
enum {
Mode = UpLo | SelfAdjoint,
Flags = _MatrixTypeNested::Flags & (HereditaryBits)
Flags = MatrixTypeNestedCleaned::Flags & (HereditaryBits)
& (~(PacketAccessBit | DirectAccessBit | LinearAccessBit)), // FIXME these flags should be preserved
CoeffReadCost = _MatrixTypeNested::CoeffReadCost
CoeffReadCost = MatrixTypeNestedCleaned::CoeffReadCost
};
};
}
template <typename Lhs, int LhsMode, bool LhsIsVector,
typename Rhs, int RhsMode, bool RhsIsVector>
@@ -65,19 +69,21 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
public:
typedef TriangularBase<SelfAdjointView> Base;
typedef typename internal::traits<SelfAdjointView>::MatrixTypeNested MatrixTypeNested;
typedef typename internal::traits<SelfAdjointView>::MatrixTypeNestedCleaned MatrixTypeNestedCleaned;
/** \brief The type of coefficients in this matrix */
typedef typename ei_traits<SelfAdjointView>::Scalar Scalar;
typedef typename internal::traits<SelfAdjointView>::Scalar Scalar;
typedef typename MatrixType::Index Index;
enum {
Mode = ei_traits<SelfAdjointView>::Mode
Mode = internal::traits<SelfAdjointView>::Mode
};
typedef typename MatrixType::PlainObject PlainObject;
inline SelfAdjointView(const MatrixType& matrix) : m_matrix(matrix)
{ ei_assert(ei_are_flags_consistent<Mode>::ret); }
{}
inline Index rows() const { return m_matrix.rows(); }
inline Index cols() const { return m_matrix.cols(); }
@@ -103,10 +109,10 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
}
/** \internal */
const MatrixType& _expression() const { return m_matrix; }
const MatrixTypeNestedCleaned& _expression() const { return m_matrix; }
const MatrixType& nestedExpression() const { return m_matrix; }
MatrixType& nestedExpression() { return const_cast<MatrixType&>(m_matrix); }
const MatrixTypeNestedCleaned& nestedExpression() const { return m_matrix; }
MatrixTypeNestedCleaned& nestedExpression() { return *const_cast<MatrixTypeNestedCleaned*>(&m_matrix); }
/** Efficient self-adjoint matrix times vector/matrix product */
template<typename OtherDerived>
@@ -129,7 +135,7 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
}
/** Perform a symmetric rank 2 update of the selfadjoint matrix \c *this:
* \f$ this = this + \alpha ( u v^* + v u^*) \f$
* \f$ this = this + \alpha u v^* + conj(\alpha) v u^* \f$
* \returns a reference to \c *this
*
* The vectors \a u and \c v \b must be column vectors, however they can be
@@ -164,27 +170,52 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
/** Real part of #Scalar */
typedef typename NumTraits<Scalar>::Real RealScalar;
/** Return type of eigenvalues() */
typedef Matrix<RealScalar, ei_traits<MatrixType>::ColsAtCompileTime, 1> EigenvaluesReturnType;
typedef Matrix<RealScalar, internal::traits<MatrixType>::ColsAtCompileTime, 1> EigenvaluesReturnType;
EigenvaluesReturnType eigenvalues() const;
RealScalar operatorNorm() const;
#ifdef EIGEN2_SUPPORT
template<typename OtherDerived>
SelfAdjointView& operator=(const MatrixBase<OtherDerived>& other)
{
enum {
OtherPart = UpLo == Upper ? StrictlyLower : StrictlyUpper
};
m_matrix.const_cast_derived().template triangularView<UpLo>() = other;
m_matrix.const_cast_derived().template triangularView<OtherPart>() = other.adjoint();
return *this;
}
template<typename OtherMatrixType, unsigned int OtherMode>
SelfAdjointView& operator=(const TriangularView<OtherMatrixType, OtherMode>& other)
{
enum {
OtherPart = UpLo == Upper ? StrictlyLower : StrictlyUpper
};
m_matrix.const_cast_derived().template triangularView<UpLo>() = other.toDenseMatrix();
m_matrix.const_cast_derived().template triangularView<OtherPart>() = other.toDenseMatrix().adjoint();
return *this;
}
#endif
protected:
const typename MatrixType::Nested m_matrix;
const MatrixTypeNested m_matrix;
};
// template<typename OtherDerived, typename MatrixType, unsigned int UpLo>
// ei_selfadjoint_matrix_product_returntype<OtherDerived,SelfAdjointView<MatrixType,UpLo> >
// internal::selfadjoint_matrix_product_returntype<OtherDerived,SelfAdjointView<MatrixType,UpLo> >
// operator*(const MatrixBase<OtherDerived>& lhs, const SelfAdjointView<MatrixType,UpLo>& rhs)
// {
// return ei_matrix_selfadjoint_product_returntype<OtherDerived,SelfAdjointView<MatrixType,UpLo> >(lhs.derived(),rhs);
// return internal::matrix_selfadjoint_product_returntype<OtherDerived,SelfAdjointView<MatrixType,UpLo> >(lhs.derived(),rhs);
// }
// selfadjoint to dense matrix
namespace internal {
template<typename Derived1, typename Derived2, int UnrollCount, bool ClearOpposite>
struct ei_triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), UnrollCount, ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), UnrollCount, ClearOpposite>
{
enum {
col = (UnrollCount-1) / Derived1::RowsAtCompileTime,
@@ -193,23 +224,23 @@ struct ei_triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper)
inline static void run(Derived1 &dst, const Derived2 &src)
{
ei_triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), UnrollCount-1, ClearOpposite>::run(dst, src);
triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), UnrollCount-1, ClearOpposite>::run(dst, src);
if(row == col)
dst.coeffRef(row, col) = ei_real(src.coeff(row, col));
dst.coeffRef(row, col) = real(src.coeff(row, col));
else if(row < col)
dst.coeffRef(col, row) = ei_conj(dst.coeffRef(row, col) = src.coeff(row, col));
dst.coeffRef(col, row) = conj(dst.coeffRef(row, col) = src.coeff(row, col));
}
};
template<typename Derived1, typename Derived2, bool ClearOpposite>
struct ei_triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, 0, ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, 0, ClearOpposite>
{
inline static void run(Derived1 &, const Derived2 &) {}
};
template<typename Derived1, typename Derived2, int UnrollCount, bool ClearOpposite>
struct ei_triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), UnrollCount, ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), UnrollCount, ClearOpposite>
{
enum {
col = (UnrollCount-1) / Derived1::RowsAtCompileTime,
@@ -218,23 +249,23 @@ struct ei_triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower)
inline static void run(Derived1 &dst, const Derived2 &src)
{
ei_triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), UnrollCount-1, ClearOpposite>::run(dst, src);
triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), UnrollCount-1, ClearOpposite>::run(dst, src);
if(row == col)
dst.coeffRef(row, col) = ei_real(src.coeff(row, col));
dst.coeffRef(row, col) = real(src.coeff(row, col));
else if(row > col)
dst.coeffRef(col, row) = ei_conj(dst.coeffRef(row, col) = src.coeff(row, col));
dst.coeffRef(col, row) = conj(dst.coeffRef(row, col) = src.coeff(row, col));
}
};
template<typename Derived1, typename Derived2, bool ClearOpposite>
struct ei_triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Lower, 0, ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Lower, 0, ClearOpposite>
{
inline static void run(Derived1 &, const Derived2 &) {}
};
template<typename Derived1, typename Derived2, bool ClearOpposite>
struct ei_triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, Dynamic, ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, Dynamic, ClearOpposite>
{
typedef typename Derived1::Index Index;
inline static void run(Derived1 &dst, const Derived2 &src)
@@ -244,7 +275,7 @@ struct ei_triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper,
for(Index i = 0; i < j; ++i)
{
dst.copyCoeff(i, j, src);
dst.coeffRef(j,i) = ei_conj(dst.coeff(i,j));
dst.coeffRef(j,i) = conj(dst.coeff(i,j));
}
dst.copyCoeff(j, j, src);
}
@@ -252,7 +283,7 @@ struct ei_triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper,
};
template<typename Derived1, typename Derived2, bool ClearOpposite>
struct ei_triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Lower, Dynamic, ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Lower, Dynamic, ClearOpposite>
{
inline static void run(Derived1 &dst, const Derived2 &src)
{
@@ -262,27 +293,31 @@ struct ei_triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Lower,
for(Index j = 0; j < i; ++j)
{
dst.copyCoeff(i, j, src);
dst.coeffRef(j,i) = ei_conj(dst.coeff(i,j));
dst.coeffRef(j,i) = conj(dst.coeff(i,j));
}
dst.copyCoeff(i, i, src);
}
}
};
} // end namespace internal
/***************************************************************************
* Implementation of MatrixBase methods
***************************************************************************/
template<typename Derived>
template<unsigned int UpLo>
const SelfAdjointView<Derived, UpLo> MatrixBase<Derived>::selfadjointView() const
typename MatrixBase<Derived>::template ConstSelfAdjointViewReturnType<UpLo>::Type
MatrixBase<Derived>::selfadjointView() const
{
return derived();
}
template<typename Derived>
template<unsigned int UpLo>
SelfAdjointView<Derived, UpLo> MatrixBase<Derived>::selfadjointView()
typename MatrixBase<Derived>::template SelfAdjointViewReturnType<UpLo>::Type
MatrixBase<Derived>::selfadjointView()
{
return derived();
}

View File

@@ -39,28 +39,31 @@
*
* \sa class SwapWrapper for a similar trick.
*/
namespace internal {
template<typename BinaryOp, typename Lhs, typename Rhs>
struct ei_traits<SelfCwiseBinaryOp<BinaryOp,Lhs,Rhs> >
: ei_traits<CwiseBinaryOp<BinaryOp,Lhs,Rhs> >
struct traits<SelfCwiseBinaryOp<BinaryOp,Lhs,Rhs> >
: traits<CwiseBinaryOp<BinaryOp,Lhs,Rhs> >
{
enum {
// Note that it is still a good idea to preserve the DirectAccessBit
// so that assign can correctly align the data.
Flags = ei_traits<CwiseBinaryOp<BinaryOp,Lhs,Rhs> >::Flags | (Lhs::Flags&DirectAccessBit) | (Lhs::Flags&LvalueBit),
Flags = traits<CwiseBinaryOp<BinaryOp,Lhs,Rhs> >::Flags | (Lhs::Flags&DirectAccessBit) | (Lhs::Flags&LvalueBit),
OuterStrideAtCompileTime = Lhs::OuterStrideAtCompileTime,
InnerStrideAtCompileTime = Lhs::InnerStrideAtCompileTime
};
};
}
template<typename BinaryOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp
: public ei_dense_xpr_base< SelfCwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type
: public internal::dense_xpr_base< SelfCwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type
{
public:
typedef typename ei_dense_xpr_base<SelfCwiseBinaryOp>::type Base;
typedef typename internal::dense_xpr_base<SelfCwiseBinaryOp>::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(SelfCwiseBinaryOp)
typedef typename ei_packet_traits<Scalar>::type Packet;
typedef typename internal::packet_traits<Scalar>::type Packet;
inline SelfCwiseBinaryOp(Lhs& xpr, const BinaryOp& func = BinaryOp()) : m_matrix(xpr), m_functor(func) {}
@@ -74,12 +77,22 @@ template<typename BinaryOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp
// TODO make Assign use .data()
inline Scalar& coeffRef(Index row, Index col)
{
EIGEN_STATIC_ASSERT_LVALUE(Lhs)
return m_matrix.const_cast_derived().coeffRef(row, col);
}
inline const Scalar& coeffRef(Index row, Index col) const
{
return m_matrix.coeffRef(row, col);
}
// note that this function is needed by assign to correctly align loads/stores
// TODO make Assign use .data()
inline Scalar& coeffRef(Index index)
{
EIGEN_STATIC_ASSERT_LVALUE(Lhs)
return m_matrix.const_cast_derived().coeffRef(index);
}
inline const Scalar& coeffRef(Index index) const
{
return m_matrix.const_cast_derived().coeffRef(index);
}
@@ -88,7 +101,7 @@ template<typename BinaryOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp
void copyCoeff(Index row, Index col, const DenseBase<OtherDerived>& other)
{
OtherDerived& _other = other.const_cast_derived();
ei_internal_assert(row >= 0 && row < rows()
eigen_internal_assert(row >= 0 && row < rows()
&& col >= 0 && col < cols());
Scalar& tmp = m_matrix.coeffRef(row,col);
tmp = m_functor(tmp, _other.coeff(row,col));
@@ -98,7 +111,7 @@ template<typename BinaryOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp
void copyCoeff(Index index, const DenseBase<OtherDerived>& other)
{
OtherDerived& _other = other.const_cast_derived();
ei_internal_assert(index >= 0 && index < m_matrix.size());
eigen_internal_assert(index >= 0 && index < m_matrix.size());
Scalar& tmp = m_matrix.coeffRef(index);
tmp = m_functor(tmp, _other.coeff(index));
}
@@ -107,7 +120,7 @@ template<typename BinaryOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp
void copyPacket(Index row, Index col, const DenseBase<OtherDerived>& other)
{
OtherDerived& _other = other.const_cast_derived();
ei_internal_assert(row >= 0 && row < rows()
eigen_internal_assert(row >= 0 && row < rows()
&& col >= 0 && col < cols());
m_matrix.template writePacket<StoreMode>(row, col,
m_functor.packetOp(m_matrix.template packet<StoreMode>(row, col),_other.template packet<LoadMode>(row, col)) );
@@ -117,7 +130,7 @@ template<typename BinaryOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp
void copyPacket(Index index, const DenseBase<OtherDerived>& other)
{
OtherDerived& _other = other.const_cast_derived();
ei_internal_assert(index >= 0 && index < m_matrix.size());
eigen_internal_assert(index >= 0 && index < m_matrix.size());
m_matrix.template writePacket<StoreMode>(index,
m_functor.packetOp(m_matrix.template packet<StoreMode>(index),_other.template packet<LoadMode>(index)) );
}
@@ -131,10 +144,10 @@ template<typename BinaryOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp
EIGEN_CHECK_BINARY_COMPATIBILIY(BinaryOp,typename Lhs::Scalar,typename RhsDerived::Scalar);
#ifdef EIGEN_DEBUG_ASSIGN
ei_assign_traits<SelfCwiseBinaryOp, RhsDerived>::debug();
internal::assign_traits<SelfCwiseBinaryOp, RhsDerived>::debug();
#endif
ei_assert(rows() == rhs.rows() && cols() == rhs.cols());
ei_assign_impl<SelfCwiseBinaryOp, RhsDerived>::run(*this,rhs.derived());
eigen_assert(rows() == rhs.rows() && cols() == rhs.cols());
internal::assign_impl<SelfCwiseBinaryOp, RhsDerived>::run(*this,rhs.derived());
#ifndef EIGEN_NO_DEBUG
this->checkTransposeAliasing(rhs.derived());
#endif
@@ -146,7 +159,7 @@ template<typename BinaryOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp
// at first...
SelfCwiseBinaryOp& operator=(const Rhs& _rhs)
{
typename ei_nested<Rhs>::type rhs(_rhs);
typename internal::nested<Rhs>::type rhs(_rhs);
return Base::operator=(rhs);
}
@@ -162,7 +175,7 @@ template<typename Derived>
inline Derived& DenseBase<Derived>::operator*=(const Scalar& other)
{
typedef typename Derived::PlainObject PlainObject;
SelfCwiseBinaryOp<ei_scalar_product_op<Scalar>, Derived, typename PlainObject::ConstantReturnType> tmp(derived());
SelfCwiseBinaryOp<internal::scalar_product_op<Scalar>, Derived, typename PlainObject::ConstantReturnType> tmp(derived());
tmp = PlainObject::Constant(rows(),cols(),other);
return derived();
}
@@ -170,9 +183,9 @@ inline Derived& DenseBase<Derived>::operator*=(const Scalar& other)
template<typename Derived>
inline Derived& DenseBase<Derived>::operator/=(const Scalar& other)
{
typedef typename ei_meta_if<NumTraits<Scalar>::IsInteger,
ei_scalar_quotient_op<Scalar>,
ei_scalar_product_op<Scalar> >::ret BinOp;
typedef typename internal::conditional<NumTraits<Scalar>::IsInteger,
internal::scalar_quotient_op<Scalar>,
internal::scalar_product_op<Scalar> >::type BinOp;
typedef typename Derived::PlainObject PlainObject;
SelfCwiseBinaryOp<BinOp, Derived, typename PlainObject::ConstantReturnType> tmp(derived());
tmp = PlainObject::Constant(rows(),cols(), NumTraits<Scalar>::IsInteger ? other : Scalar(1)/other);

View File

@@ -25,8 +25,19 @@
#ifndef EIGEN_SOLVETRIANGULAR_H
#define EIGEN_SOLVETRIANGULAR_H
namespace internal {
// Forward declarations:
// The following two routines are implemented in the products/TriangularSolver*.h files
template<typename LhsScalar, typename RhsScalar, typename Index, int Side, int Mode, bool Conjugate, int StorageOrder>
struct triangular_solve_vector;
template <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder, int OtherStorageOrder>
struct triangular_solve_matrix;
// small helper struct extracting some traits on the underlying solver operation
template<typename Lhs, typename Rhs, int Side>
class ei_trsolve_traits
class trsolve_traits
{
private:
enum {
@@ -43,150 +54,56 @@ class ei_trsolve_traits
template<typename Lhs, typename Rhs,
int Side, // can be OnTheLeft/OnTheRight
int Mode, // can be Upper/Lower | UnitDiag
int Unrolling = ei_trsolve_traits<Lhs,Rhs,Side>::Unrolling,
int StorageOrder = (int(Lhs::Flags) & RowMajorBit) ? RowMajor : ColMajor,
int RhsVectors = ei_trsolve_traits<Lhs,Rhs,Side>::RhsVectors
int Unrolling = trsolve_traits<Lhs,Rhs,Side>::Unrolling,
int RhsVectors = trsolve_traits<Lhs,Rhs,Side>::RhsVectors
>
struct ei_triangular_solver_selector;
struct triangular_solver_selector;
// forward and backward substitution, row-major, rhs is a vector
template<typename Lhs, typename Rhs, int Mode>
struct ei_triangular_solver_selector<Lhs,Rhs,OnTheLeft,Mode,NoUnrolling,RowMajor,1>
template<typename Lhs, typename Rhs, int Side, int Mode>
struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,1>
{
typedef typename Lhs::Scalar LhsScalar;
typedef typename Rhs::Scalar RhsScalar;
typedef ei_blas_traits<Lhs> LhsProductTraits;
typedef blas_traits<Lhs> LhsProductTraits;
typedef typename LhsProductTraits::ExtractType ActualLhsType;
typedef typename Lhs::Index Index;
enum {
IsLower = ((Mode&Lower)==Lower)
};
static void run(const Lhs& lhs, Rhs& other)
{
static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
ActualLhsType actualLhs = LhsProductTraits::extract(lhs);
const Index size = lhs.cols();
for(Index pi=IsLower ? 0 : size;
IsLower ? pi<size : pi>0;
IsLower ? pi+=PanelWidth : pi-=PanelWidth)
{
Index actualPanelWidth = std::min(IsLower ? size - pi : pi, PanelWidth);
Index r = IsLower ? pi : size - pi; // remaining size
if (r > 0)
{
// let's directly call the low level product function because:
// 1 - it is faster to compile
// 2 - it is slighlty faster at runtime
Index startRow = IsLower ? pi : pi-actualPanelWidth;
Index startCol = IsLower ? 0 : pi;
ei_general_matrix_vector_product<Index,LhsScalar,RowMajor,LhsProductTraits::NeedToConjugate,RhsScalar,false>::run(
actualPanelWidth, r,
&(actualLhs.const_cast_derived().coeffRef(startRow,startCol)), actualLhs.outerStride(),
&(other.coeffRef(startCol)), other.innerStride(),
&other.coeffRef(startRow), other.innerStride(),
RhsScalar(-1));
}
for(Index k=0; k<actualPanelWidth; ++k)
{
Index i = IsLower ? pi+k : pi-k-1;
Index s = IsLower ? pi : i+1;
if (k>0)
other.coeffRef(i) -= (lhs.row(i).segment(s,k).transpose().cwiseProduct(other.segment(s,k))).sum();
if(!(Mode & UnitDiag))
other.coeffRef(i) /= lhs.coeff(i,i);
}
}
}
};
// forward and backward substitution, column-major, rhs is a vector
template<typename Lhs, typename Rhs, int Mode>
struct ei_triangular_solver_selector<Lhs,Rhs,OnTheLeft,Mode,NoUnrolling,ColMajor,1>
{
typedef typename Lhs::Scalar LhsScalar;
typedef typename Rhs::Scalar RhsScalar;
typedef ei_blas_traits<Lhs> LhsProductTraits;
typedef typename LhsProductTraits::ExtractType ActualLhsType;
typedef typename Lhs::Index Index;
enum {
IsLower = ((Mode&Lower)==Lower)
};
static void run(const Lhs& lhs, Rhs& other)
{
static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
ActualLhsType actualLhs = LhsProductTraits::extract(lhs);
const Index size = lhs.cols();
for(Index pi=IsLower ? 0 : size;
IsLower ? pi<size : pi>0;
IsLower ? pi+=PanelWidth : pi-=PanelWidth)
{
Index actualPanelWidth = std::min(IsLower ? size - pi : pi, PanelWidth);
Index startBlock = IsLower ? pi : pi-actualPanelWidth;
Index endBlock = IsLower ? pi + actualPanelWidth : 0;
for(Index k=0; k<actualPanelWidth; ++k)
{
Index i = IsLower ? pi+k : pi-k-1;
if(!(Mode & UnitDiag))
other.coeffRef(i) /= lhs.coeff(i,i);
Index r = actualPanelWidth - k - 1; // remaining size
Index s = IsLower ? i+1 : i-r;
if (r>0)
other.segment(s,r) -= other.coeffRef(i) * Block<Lhs,Dynamic,1>(lhs, s, i, r, 1);
}
Index r = IsLower ? size - endBlock : startBlock; // remaining size
if (r > 0)
{
// let's directly call the low level product function because:
// 1 - it is faster to compile
// 2 - it is slighlty faster at runtime
ei_general_matrix_vector_product<Index,LhsScalar,ColMajor,LhsProductTraits::NeedToConjugate,RhsScalar,false>::run(
r, actualPanelWidth,
&(actualLhs.const_cast_derived().coeffRef(endBlock,startBlock)), actualLhs.outerStride(),
&other.coeff(startBlock), other.innerStride(),
&(other.coeffRef(endBlock, 0)), other.innerStride(), RhsScalar(-1));
}
}
}
};
// transpose OnTheRight cases for vectors
template<typename Lhs, typename Rhs, int Mode, int Unrolling, int StorageOrder>
struct ei_triangular_solver_selector<Lhs,Rhs,OnTheRight,Mode,Unrolling,StorageOrder,1>
{
typedef Map<Matrix<RhsScalar,Dynamic,1>, Aligned> MappedRhs;
static void run(const Lhs& lhs, Rhs& rhs)
{
Transpose<Rhs> rhsTr(rhs);
Transpose<Lhs> lhsTr(lhs);
ei_triangular_solver_selector<Transpose<Lhs>,Transpose<Rhs>,OnTheLeft,TriangularView<Lhs,Mode>::TransposeMode>::run(lhsTr,rhsTr);
ActualLhsType actualLhs = LhsProductTraits::extract(lhs);
// FIXME find a way to allow an inner stride if packet_traits<Scalar>::size==1
bool useRhsDirectly = Rhs::InnerStrideAtCompileTime==1 || rhs.innerStride()==1;
ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhs,rhs.size(),
(useRhsDirectly ? rhs.data() : 0));
if(!useRhsDirectly)
MappedRhs(actualRhs,rhs.size()) = rhs;
triangular_solve_vector<LhsScalar, RhsScalar, typename Lhs::Index, Side, Mode, LhsProductTraits::NeedToConjugate,
(int(Lhs::Flags) & RowMajorBit) ? RowMajor : ColMajor>
::run(actualLhs.cols(), actualLhs.data(), actualLhs.outerStride(), actualRhs);
if(!useRhsDirectly)
rhs = MappedRhs(actualRhs, rhs.size());
}
};
template <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder, int OtherStorageOrder>
struct ei_triangular_solve_matrix;
// the rhs is a matrix
template<typename Lhs, typename Rhs, int Side, int Mode, int StorageOrder>
struct ei_triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,StorageOrder,Dynamic>
template<typename Lhs, typename Rhs, int Side, int Mode>
struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,Dynamic>
{
typedef typename Rhs::Scalar Scalar;
typedef typename Rhs::Index Index;
typedef ei_blas_traits<Lhs> LhsProductTraits;
typedef blas_traits<Lhs> LhsProductTraits;
typedef typename LhsProductTraits::DirectLinearAccessType ActualLhsType;
static void run(const Lhs& lhs, Rhs& rhs)
{
const ActualLhsType actualLhs = LhsProductTraits::extract(lhs);
ei_triangular_solve_matrix<Scalar,Index,Side,Mode,LhsProductTraits::NeedToConjugate,StorageOrder,
triangular_solve_matrix<Scalar,Index,Side,Mode,LhsProductTraits::NeedToConjugate,(int(Lhs::Flags) & RowMajorBit) ? RowMajor : ColMajor,
(Rhs::Flags&RowMajorBit) ? RowMajor : ColMajor>
::run(lhs.rows(), Side==OnTheLeft? rhs.cols() : rhs.rows(), &actualLhs.coeff(0,0), actualLhs.outerStride(), &rhs.coeffRef(0,0), rhs.outerStride());
::run(lhs.rows(), Side==OnTheLeft? rhs.cols() : rhs.rows(), &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &rhs.coeffRef(0,0), rhs.outerStride());
}
};
@@ -196,10 +113,10 @@ struct ei_triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,StorageOrder,
template<typename Lhs, typename Rhs, int Mode, int Index, int Size,
bool Stop = Index==Size>
struct ei_triangular_solver_unroller;
struct triangular_solver_unroller;
template<typename Lhs, typename Rhs, int Mode, int Index, int Size>
struct ei_triangular_solver_unroller<Lhs,Rhs,Mode,Index,Size,false> {
struct triangular_solver_unroller<Lhs,Rhs,Mode,Index,Size,false> {
enum {
IsLower = ((Mode&Lower)==Lower),
I = IsLower ? Index : Size - Index - 1,
@@ -208,33 +125,47 @@ struct ei_triangular_solver_unroller<Lhs,Rhs,Mode,Index,Size,false> {
static void run(const Lhs& lhs, Rhs& rhs)
{
if (Index>0)
rhs.coeffRef(I) -= lhs.row(I).template segment<Index>(S).transpose().cwiseProduct(rhs.template segment<Index>(S)).sum();
rhs.coeffRef(I) -= lhs.row(I).template segment<Index>(S).transpose()
.cwiseProduct(rhs.template segment<Index>(S)).sum();
if(!(Mode & UnitDiag))
rhs.coeffRef(I) /= lhs.coeff(I,I);
ei_triangular_solver_unroller<Lhs,Rhs,Mode,Index+1,Size>::run(lhs,rhs);
triangular_solver_unroller<Lhs,Rhs,Mode,Index+1,Size>::run(lhs,rhs);
}
};
template<typename Lhs, typename Rhs, int Mode, int Index, int Size>
struct ei_triangular_solver_unroller<Lhs,Rhs,Mode,Index,Size,true> {
struct triangular_solver_unroller<Lhs,Rhs,Mode,Index,Size,true> {
static void run(const Lhs&, Rhs&) {}
};
template<typename Lhs, typename Rhs, int Mode, int StorageOrder>
struct ei_triangular_solver_selector<Lhs,Rhs,OnTheLeft,Mode,CompleteUnrolling,StorageOrder,1> {
template<typename Lhs, typename Rhs, int Mode>
struct triangular_solver_selector<Lhs,Rhs,OnTheLeft,Mode,CompleteUnrolling,1> {
static void run(const Lhs& lhs, Rhs& rhs)
{ ei_triangular_solver_unroller<Lhs,Rhs,Mode,0,Rhs::SizeAtCompileTime>::run(lhs,rhs); }
{ triangular_solver_unroller<Lhs,Rhs,Mode,0,Rhs::SizeAtCompileTime>::run(lhs,rhs); }
};
template<typename Lhs, typename Rhs, int Mode>
struct triangular_solver_selector<Lhs,Rhs,OnTheRight,Mode,CompleteUnrolling,1> {
static void run(const Lhs& lhs, Rhs& rhs)
{
Transpose<const Lhs> trLhs(lhs);
Transpose<Rhs> trRhs(rhs);
triangular_solver_unroller<Transpose<const Lhs>,Transpose<Rhs>,
((Mode&Upper)==Upper ? Lower : Upper) | (Mode&UnitDiag),
0,Rhs::SizeAtCompileTime>::run(trLhs,trRhs);
}
};
} // end namespace internal
/***************************************************************************
* TriangularView methods
***************************************************************************/
/** "in-place" version of TriangularView::solve() where the result is written in \a other
*
*
*
* \warning The parameter is only marked 'const' to make the C++ compiler accept a temporary expression here.
* This function will const_cast it, so constness isn't honored here.
@@ -246,17 +177,17 @@ template<int Side, typename OtherDerived>
void TriangularView<MatrixType,Mode>::solveInPlace(const MatrixBase<OtherDerived>& _other) const
{
OtherDerived& other = _other.const_cast_derived();
ei_assert(cols() == rows());
ei_assert( (Side==OnTheLeft && cols() == other.rows()) || (Side==OnTheRight && cols() == other.cols()) );
ei_assert(!(Mode & ZeroDiag));
ei_assert(Mode & (Upper|Lower));
eigen_assert(cols() == rows());
eigen_assert( (Side==OnTheLeft && cols() == other.rows()) || (Side==OnTheRight && cols() == other.cols()) );
eigen_assert(!(Mode & ZeroDiag));
eigen_assert((Mode & (Upper|Lower)) != 0);
enum { copy = ei_traits<OtherDerived>::Flags & RowMajorBit && OtherDerived::IsVectorAtCompileTime };
typedef typename ei_meta_if<copy,
typename ei_plain_matrix_type_column_major<OtherDerived>::type, OtherDerived&>::ret OtherCopy;
enum { copy = internal::traits<OtherDerived>::Flags & RowMajorBit && OtherDerived::IsVectorAtCompileTime };
typedef typename internal::conditional<copy,
typename internal::plain_matrix_type_column_major<OtherDerived>::type, OtherDerived&>::type OtherCopy;
OtherCopy otherCopy(other);
ei_triangular_solver_selector<MatrixType, typename ei_unref<OtherCopy>::type,
internal::triangular_solver_selector<MatrixType, typename internal::remove_reference<OtherCopy>::type,
Side, Mode>::run(nestedExpression(), otherCopy);
if (copy)
@@ -265,43 +196,68 @@ void TriangularView<MatrixType,Mode>::solveInPlace(const MatrixBase<OtherDerived
/** \returns the product of the inverse of \c *this with \a other, \a *this being triangular.
*
* This function computes the inverse-matrix matrix product inverse(\c *this) * \a other if
* \a Side==OnTheLeft (the default), or the right-inverse-multiply \a other * inverse(\c *this) if
* \a Side==OnTheRight.
*
*
* This function computes the inverse-matrix matrix product inverse(\c *this) * \a other.
* The matrix \c *this must be triangular and invertible (i.e., all the coefficients of the
* diagonal must be non zero). It works as a forward (resp. backward) substitution if \c *this
* is an upper (resp. lower) triangular matrix.
*
* It is required that \c *this be marked as either an upper or a lower triangular matrix, which
* can be done by marked(), and that is automatically the case with expressions such as those returned
* by extract().
*
* Example: \include MatrixBase_marked.cpp
* Output: \verbinclude MatrixBase_marked.out
*
* This function is essentially a wrapper to the faster solveTriangularInPlace() function creating
* a temporary copy of \a other, calling solveTriangularInPlace() on the copy and returning it.
* Therefore, if \a other is not needed anymore, it is quite faster to call solveTriangularInPlace()
* instead of solveTriangular().
* This function returns an expression of the inverse-multiply and can works in-place if it is assigned
* to the same matrix or vector \a other.
*
* For users coming from BLAS, this function (and more specifically solveTriangularInPlace()) offer
* For users coming from BLAS, this function (and more specifically solveInPlace()) offer
* all the operations supported by the \c *TRSV and \c *TRSM BLAS routines.
*
* \b Tips: to perform a \em "right-inverse-multiply" you can simply transpose the operation, e.g.:
* \code
* M * T^1 <=> T.transpose().solveInPlace(M.transpose());
* \endcode
*
* \sa TriangularView::solveInPlace()
*/
template<typename Derived, unsigned int Mode>
template<int Side, typename RhsDerived>
typename ei_plain_matrix_type_column_major<RhsDerived>::type
TriangularView<Derived,Mode>::solve(const MatrixBase<RhsDerived>& rhs) const
template<int Side, typename Other>
const internal::triangular_solve_retval<Side,TriangularView<Derived,Mode>,Other>
TriangularView<Derived,Mode>::solve(const MatrixBase<Other>& other) const
{
typename ei_plain_matrix_type_column_major<RhsDerived>::type res(rhs);
solveInPlace<Side>(res);
return res;
return internal::triangular_solve_retval<Side,TriangularView,Other>(*this, other.derived());
}
namespace internal {
template<int Side, typename TriangularType, typename Rhs>
struct traits<triangular_solve_retval<Side, TriangularType, Rhs> >
{
typedef typename internal::plain_matrix_type_column_major<Rhs>::type ReturnType;
};
template<int Side, typename TriangularType, typename Rhs> struct triangular_solve_retval
: public ReturnByValue<triangular_solve_retval<Side, TriangularType, Rhs> >
{
typedef typename remove_all<typename Rhs::Nested>::type RhsNestedCleaned;
typedef ReturnByValue<triangular_solve_retval> Base;
typedef typename Base::Index Index;
triangular_solve_retval(const TriangularType& tri, const Rhs& rhs)
: m_triangularMatrix(tri), m_rhs(rhs)
{}
inline Index rows() const { return m_rhs.rows(); }
inline Index cols() const { return m_rhs.cols(); }
template<typename Dest> inline void evalTo(Dest& dst) const
{
if(!(is_same<RhsNestedCleaned,Dest>::value && extract_data(dst) == extract_data(m_rhs)))
dst = m_rhs;
m_triangularMatrix.template solveInPlace<Side>(dst);
}
protected:
const TriangularType& m_triangularMatrix;
const typename Rhs::Nested m_rhs;
};
} // namespace internal
#endif // EIGEN_SOLVETRIANGULAR_H

View File

@@ -25,13 +25,14 @@
#ifndef EIGEN_STABLENORM_H
#define EIGEN_STABLENORM_H
namespace internal {
template<typename ExpressionType, typename Scalar>
inline void ei_stable_norm_kernel(const ExpressionType& bl, Scalar& ssq, Scalar& scale, Scalar& invScale)
inline void stable_norm_kernel(const ExpressionType& bl, Scalar& ssq, Scalar& scale, Scalar& invScale)
{
Scalar max = bl.cwiseAbs().maxCoeff();
if (max>scale)
{
ssq = ssq * ei_abs2(scale/max);
ssq = ssq * abs2(scale/max);
scale = max;
invScale = Scalar(1)/scale;
}
@@ -39,6 +40,7 @@ inline void ei_stable_norm_kernel(const ExpressionType& bl, Scalar& ssq, Scalar&
// then we can neglect this sub vector
ssq += (bl*invScale).squaredNorm();
}
}
/** \returns the \em l2 norm of \c *this avoiding underflow and overflow.
* This version use a blockwise two passes algorithm:
@@ -51,9 +53,10 @@ inline void ei_stable_norm_kernel(const ExpressionType& bl, Scalar& ssq, Scalar&
* \sa norm(), blueNorm(), hypotNorm()
*/
template<typename Derived>
inline typename NumTraits<typename ei_traits<Derived>::Scalar>::Real
inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
MatrixBase<Derived>::stableNorm() const
{
using std::min;
const Index blockSize = 4096;
RealScalar scale = 0;
RealScalar invScale = 1;
@@ -62,12 +65,12 @@ MatrixBase<Derived>::stableNorm() const
Alignment = (int(Flags)&DirectAccessBit) || (int(Flags)&AlignedBit) ? 1 : 0
};
Index n = size();
Index bi = ei_first_aligned(derived());
Index bi = internal::first_aligned(derived());
if (bi>0)
ei_stable_norm_kernel(this->head(bi), ssq, scale, invScale);
internal::stable_norm_kernel(this->head(bi), ssq, scale, invScale);
for (; bi<n; bi+=blockSize)
ei_stable_norm_kernel(this->segment(bi,std::min(blockSize, n - bi)).template forceAlignedAccessIf<Alignment>(), ssq, scale, invScale);
return scale * ei_sqrt(ssq);
internal::stable_norm_kernel(this->segment(bi,(min)(blockSize, n - bi)).template forceAlignedAccessIf<Alignment>(), ssq, scale, invScale);
return scale * internal::sqrt(ssq);
}
/** \returns the \em l2 norm of \c *this using the Blue's algorithm.
@@ -80,9 +83,12 @@ MatrixBase<Derived>::stableNorm() const
* \sa norm(), stableNorm(), hypotNorm()
*/
template<typename Derived>
inline typename NumTraits<typename ei_traits<Derived>::Scalar>::Real
inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
MatrixBase<Derived>::blueNorm() const
{
using std::pow;
using std::min;
using std::max;
static Index nmax = -1;
static RealScalar b1, b2, s1m, s2m, overfl, rbig, relerr;
if(nmax <= 0)
@@ -97,26 +103,26 @@ MatrixBase<Derived>::blueNorm() const
// For portability, the PORT subprograms "ilmaeh" and "rlmach"
// are used. For any specific computer, each of the assignment
// statements can be replaced
nbig = std::numeric_limits<Index>::max(); // largest integer
nbig = (std::numeric_limits<Index>::max)(); // largest integer
ibeta = std::numeric_limits<RealScalar>::radix; // base for floating-point numbers
it = std::numeric_limits<RealScalar>::digits; // number of base-beta digits in mantissa
iemin = std::numeric_limits<RealScalar>::min_exponent; // minimum exponent
iemax = std::numeric_limits<RealScalar>::max_exponent; // maximum exponent
rbig = std::numeric_limits<RealScalar>::max(); // largest floating-point number
rbig = (std::numeric_limits<RealScalar>::max)(); // largest floating-point number
iexp = -((1-iemin)/2);
b1 = RealScalar(std::pow(RealScalar(ibeta),RealScalar(iexp))); // lower boundary of midrange
b1 = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // lower boundary of midrange
iexp = (iemax + 1 - it)/2;
b2 = RealScalar(std::pow(RealScalar(ibeta),RealScalar(iexp))); // upper boundary of midrange
b2 = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // upper boundary of midrange
iexp = (2-iemin)/2;
s1m = RealScalar(std::pow(RealScalar(ibeta),RealScalar(iexp))); // scaling factor for lower range
s1m = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // scaling factor for lower range
iexp = - ((iemax+it)/2);
s2m = RealScalar(std::pow(RealScalar(ibeta),RealScalar(iexp))); // scaling factor for upper range
s2m = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // scaling factor for upper range
overfl = rbig*s2m; // overflow boundary for abig
eps = RealScalar(std::pow(double(ibeta), 1-it));
relerr = ei_sqrt(eps); // tolerance for neglecting asml
eps = RealScalar(pow(double(ibeta), 1-it));
relerr = internal::sqrt(eps); // tolerance for neglecting asml
abig = RealScalar(1.0/eps - 1.0);
if (RealScalar(nbig)>abig) nmax = int(abig); // largest safe n
else nmax = nbig;
@@ -128,23 +134,23 @@ MatrixBase<Derived>::blueNorm() const
RealScalar abig = RealScalar(0);
for(Index j=0; j<n; ++j)
{
RealScalar ax = ei_abs(coeff(j));
if(ax > ab2) abig += ei_abs2(ax*s2m);
else if(ax < b1) asml += ei_abs2(ax*s1m);
else amed += ei_abs2(ax);
RealScalar ax = internal::abs(coeff(j));
if(ax > ab2) abig += internal::abs2(ax*s2m);
else if(ax < b1) asml += internal::abs2(ax*s1m);
else amed += internal::abs2(ax);
}
if(abig > RealScalar(0))
{
abig = ei_sqrt(abig);
abig = internal::sqrt(abig);
if(abig > overfl)
{
ei_assert(false && "overflow");
eigen_assert(false && "overflow");
return rbig;
}
if(amed > RealScalar(0))
{
abig = abig/s2m;
amed = ei_sqrt(amed);
amed = internal::sqrt(amed);
}
else
return abig/s2m;
@@ -153,20 +159,20 @@ MatrixBase<Derived>::blueNorm() const
{
if (amed > RealScalar(0))
{
abig = ei_sqrt(amed);
amed = ei_sqrt(asml) / s1m;
abig = internal::sqrt(amed);
amed = internal::sqrt(asml) / s1m;
}
else
return ei_sqrt(asml)/s1m;
return internal::sqrt(asml)/s1m;
}
else
return ei_sqrt(amed);
asml = std::min(abig, amed);
abig = std::max(abig, amed);
return internal::sqrt(amed);
asml = (min)(abig, amed);
abig = (max)(abig, amed);
if(asml <= abig*relerr)
return abig;
else
return abig * ei_sqrt(RealScalar(1) + ei_abs2(asml/abig));
return abig * internal::sqrt(RealScalar(1) + internal::abs2(asml/abig));
}
/** \returns the \em l2 norm of \c *this avoiding undeflow and overflow.
@@ -175,10 +181,10 @@ MatrixBase<Derived>::blueNorm() const
* \sa norm(), stableNorm()
*/
template<typename Derived>
inline typename NumTraits<typename ei_traits<Derived>::Scalar>::Real
inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
MatrixBase<Derived>::hypotNorm() const
{
return this->cwiseAbs().redux(ei_scalar_hypot_op<RealScalar>());
return this->cwiseAbs().redux(internal::scalar_hypot_op<RealScalar>());
}
#endif // EIGEN_STABLENORM_H

View File

@@ -51,7 +51,7 @@
* \include Map_general_stride.cpp
* Output: \verbinclude Map_general_stride.out
*
* \sa class InnerStride, class OuterStride
* \sa class InnerStride, class OuterStride, \ref TopicStorageOrders
*/
template<int _OuterStrideAtCompileTime, int _InnerStrideAtCompileTime>
class Stride
@@ -67,14 +67,14 @@ class Stride
Stride()
: m_outer(OuterStrideAtCompileTime), m_inner(InnerStrideAtCompileTime)
{
ei_assert(InnerStrideAtCompileTime != Dynamic && OuterStrideAtCompileTime != Dynamic);
eigen_assert(InnerStrideAtCompileTime != Dynamic && OuterStrideAtCompileTime != Dynamic);
}
/** Constructor allowing to pass the strides at runtime */
Stride(Index outerStride, Index innerStride)
: m_outer(outerStride), m_inner(innerStride)
{
ei_assert(innerStride>=0 && outerStride>=0);
eigen_assert(innerStride>=0 && outerStride>=0);
}
/** Copy constructor */
@@ -88,8 +88,8 @@ class Stride
inline Index inner() const { return m_inner.value(); }
protected:
ei_variable_if_dynamic<Index, OuterStrideAtCompileTime> m_outer;
ei_variable_if_dynamic<Index, InnerStrideAtCompileTime> m_inner;
internal::variable_if_dynamic<Index, OuterStrideAtCompileTime> m_outer;
internal::variable_if_dynamic<Index, InnerStrideAtCompileTime> m_inner;
};
/** \brief Convenience specialization of Stride to specify only an inner stride

View File

@@ -32,17 +32,19 @@
*
* \brief Internal helper class for swapping two expressions
*/
namespace internal {
template<typename ExpressionType>
struct ei_traits<SwapWrapper<ExpressionType> > : ei_traits<ExpressionType> {};
struct traits<SwapWrapper<ExpressionType> > : traits<ExpressionType> {};
}
template<typename ExpressionType> class SwapWrapper
: public ei_dense_xpr_base<SwapWrapper<ExpressionType> >::type
: public internal::dense_xpr_base<SwapWrapper<ExpressionType> >::type
{
public:
typedef typename ei_dense_xpr_base<SwapWrapper>::type Base;
typedef typename internal::dense_xpr_base<SwapWrapper>::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(SwapWrapper)
typedef typename ei_packet_traits<Scalar>::type Packet;
typedef typename internal::packet_traits<Scalar>::type Packet;
inline SwapWrapper(ExpressionType& xpr) : m_expression(xpr) {}
@@ -61,11 +63,21 @@ template<typename ExpressionType> class SwapWrapper
return m_expression.const_cast_derived().coeffRef(index);
}
inline Scalar& coeffRef(Index row, Index col) const
{
return m_expression.coeffRef(row, col);
}
inline Scalar& coeffRef(Index index) const
{
return m_expression.coeffRef(index);
}
template<typename OtherDerived>
void copyCoeff(Index row, Index col, const DenseBase<OtherDerived>& other)
{
OtherDerived& _other = other.const_cast_derived();
ei_internal_assert(row >= 0 && row < rows()
eigen_internal_assert(row >= 0 && row < rows()
&& col >= 0 && col < cols());
Scalar tmp = m_expression.coeff(row, col);
m_expression.coeffRef(row, col) = _other.coeff(row, col);
@@ -76,7 +88,7 @@ template<typename ExpressionType> class SwapWrapper
void copyCoeff(Index index, const DenseBase<OtherDerived>& other)
{
OtherDerived& _other = other.const_cast_derived();
ei_internal_assert(index >= 0 && index < m_expression.size());
eigen_internal_assert(index >= 0 && index < m_expression.size());
Scalar tmp = m_expression.coeff(index);
m_expression.coeffRef(index) = _other.coeff(index);
_other.coeffRef(index) = tmp;
@@ -86,7 +98,7 @@ template<typename ExpressionType> class SwapWrapper
void copyPacket(Index row, Index col, const DenseBase<OtherDerived>& other)
{
OtherDerived& _other = other.const_cast_derived();
ei_internal_assert(row >= 0 && row < rows()
eigen_internal_assert(row >= 0 && row < rows()
&& col >= 0 && col < cols());
Packet tmp = m_expression.template packet<StoreMode>(row, col);
m_expression.template writePacket<StoreMode>(row, col,
@@ -99,7 +111,7 @@ template<typename ExpressionType> class SwapWrapper
void copyPacket(Index index, const DenseBase<OtherDerived>& other)
{
OtherDerived& _other = other.const_cast_derived();
ei_internal_assert(index >= 0 && index < m_expression.size());
eigen_internal_assert(index >= 0 && index < m_expression.size());
Packet tmp = m_expression.template packet<StoreMode>(index);
m_expression.template writePacket<StoreMode>(index,
_other.template packet<LoadMode>(index)
@@ -111,18 +123,4 @@ template<typename ExpressionType> class SwapWrapper
ExpressionType& m_expression;
};
/** swaps *this with the expression \a other.
*
* \note \a other is only marked for internal reasons, but of course
* it gets const-casted. One reason is that one will often call swap
* on temporary objects (hence non-const references are forbidden).
* Another reason is that lazyAssign takes a const argument anyway.
*/
template<typename Derived>
template<typename OtherDerived>
void DenseBase<Derived>::swap(DenseBase<OtherDerived> EIGEN_REF_TO_TEMPORARY other)
{
(SwapWrapper<Derived>(derived())).lazyAssign(other);
}
#endif // EIGEN_SWAP_H

View File

@@ -39,37 +39,43 @@
*
* \sa MatrixBase::transpose(), MatrixBase::adjoint()
*/
namespace internal {
template<typename MatrixType>
struct ei_traits<Transpose<MatrixType> > : ei_traits<MatrixType>
struct traits<Transpose<MatrixType> > : traits<MatrixType>
{
typedef typename MatrixType::Scalar Scalar;
typedef typename ei_nested<MatrixType>::type MatrixTypeNested;
typedef typename ei_unref<MatrixTypeNested>::type _MatrixTypeNested;
typedef typename ei_traits<MatrixType>::StorageKind StorageKind;
typedef typename ei_traits<MatrixType>::XprKind XprKind;
typedef typename nested<MatrixType>::type MatrixTypeNested;
typedef typename remove_reference<MatrixTypeNested>::type MatrixTypeNestedPlain;
typedef typename traits<MatrixType>::StorageKind StorageKind;
typedef typename traits<MatrixType>::XprKind XprKind;
enum {
RowsAtCompileTime = MatrixType::ColsAtCompileTime,
ColsAtCompileTime = MatrixType::RowsAtCompileTime,
MaxRowsAtCompileTime = MatrixType::MaxColsAtCompileTime,
MaxColsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
Flags = int(_MatrixTypeNested::Flags & ~NestByRefBit) ^ RowMajorBit,
CoeffReadCost = _MatrixTypeNested::CoeffReadCost,
InnerStrideAtCompileTime = ei_inner_stride_at_compile_time<MatrixType>::ret,
OuterStrideAtCompileTime = ei_outer_stride_at_compile_time<MatrixType>::ret
FlagsLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
Flags0 = MatrixTypeNestedPlain::Flags & ~(LvalueBit | NestByRefBit),
Flags1 = Flags0 | FlagsLvalueBit,
Flags = Flags1 ^ RowMajorBit,
CoeffReadCost = MatrixTypeNestedPlain::CoeffReadCost,
InnerStrideAtCompileTime = inner_stride_at_compile_time<MatrixType>::ret,
OuterStrideAtCompileTime = outer_stride_at_compile_time<MatrixType>::ret
};
};
}
template<typename MatrixType, typename StorageKind> class TransposeImpl;
template<typename MatrixType> class Transpose
: public TransposeImpl<MatrixType,typename ei_traits<MatrixType>::StorageKind>
: public TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind>
{
public:
typedef typename TransposeImpl<MatrixType,typename ei_traits<MatrixType>::StorageKind>::Base Base;
typedef typename TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base;
EIGEN_GENERIC_PUBLIC_INTERFACE(Transpose)
inline Transpose(const MatrixType& matrix) : m_matrix(matrix) {}
inline Transpose(MatrixType& matrix) : m_matrix(matrix) {}
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Transpose)
@@ -77,50 +83,73 @@ template<typename MatrixType> class Transpose
inline Index cols() const { return m_matrix.rows(); }
/** \returns the nested expression */
const typename ei_cleantype<typename MatrixType::Nested>::type&
const typename internal::remove_all<typename MatrixType::Nested>::type&
nestedExpression() const { return m_matrix; }
/** \returns the nested expression */
typename ei_cleantype<typename MatrixType::Nested>::type&
typename internal::remove_all<typename MatrixType::Nested>::type&
nestedExpression() { return m_matrix.const_cast_derived(); }
protected:
const typename MatrixType::Nested m_matrix;
};
template<typename MatrixType, bool HasDirectAccess = ei_has_direct_access<MatrixType>::ret>
struct ei_TransposeImpl_base
namespace internal {
template<typename MatrixType, bool HasDirectAccess = has_direct_access<MatrixType>::ret>
struct TransposeImpl_base
{
typedef typename ei_dense_xpr_base<Transpose<MatrixType> >::type type;
typedef typename dense_xpr_base<Transpose<MatrixType> >::type type;
};
template<typename MatrixType>
struct ei_TransposeImpl_base<MatrixType, false>
struct TransposeImpl_base<MatrixType, false>
{
typedef typename ei_dense_xpr_base<Transpose<MatrixType> >::type type;
typedef typename dense_xpr_base<Transpose<MatrixType> >::type type;
};
} // end namespace internal
template<typename MatrixType> class TransposeImpl<MatrixType,Dense>
: public ei_TransposeImpl_base<MatrixType>::type
: public internal::TransposeImpl_base<MatrixType>::type
{
public:
typedef typename ei_TransposeImpl_base<MatrixType>::type Base;
typedef typename internal::TransposeImpl_base<MatrixType>::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(Transpose<MatrixType>)
inline Index innerStride() const { return derived().nestedExpression().innerStride(); }
inline Index outerStride() const { return derived().nestedExpression().outerStride(); }
inline Scalar* data() { return derived().nestedExpression().data(); }
typedef typename internal::conditional<
internal::is_lvalue<MatrixType>::value,
Scalar,
const Scalar
>::type ScalarWithConstIfNotLvalue;
inline ScalarWithConstIfNotLvalue* data() { return derived().nestedExpression().data(); }
inline const Scalar* data() const { return derived().nestedExpression().data(); }
inline Scalar& coeffRef(Index row, Index col)
inline ScalarWithConstIfNotLvalue& coeffRef(Index row, Index col)
{
return const_cast_derived().nestedExpression().coeffRef(col, row);
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
return derived().nestedExpression().const_cast_derived().coeffRef(col, row);
}
inline Scalar& coeffRef(Index index)
inline ScalarWithConstIfNotLvalue& coeffRef(Index index)
{
return const_cast_derived().nestedExpression().coeffRef(index);
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
return derived().nestedExpression().const_cast_derived().coeffRef(index);
}
inline const Scalar& coeffRef(Index row, Index col) const
{
return derived().nestedExpression().coeffRef(col, row);
}
inline const Scalar& coeffRef(Index index) const
{
return derived().nestedExpression().coeffRef(index);
}
inline const CoeffReturnType coeff(Index row, Index col) const
@@ -142,7 +171,7 @@ template<typename MatrixType> class TransposeImpl<MatrixType,Dense>
template<int LoadMode>
inline void writePacket(Index row, Index col, const PacketScalar& x)
{
const_cast_derived().nestedExpression().template writePacket<LoadMode>(col, row, x);
derived().nestedExpression().const_cast_derived().template writePacket<LoadMode>(col, row, x);
}
template<int LoadMode>
@@ -154,7 +183,7 @@ template<typename MatrixType> class TransposeImpl<MatrixType,Dense>
template<int LoadMode>
inline void writePacket(Index index, const PacketScalar& x)
{
const_cast_derived().nestedExpression().template writePacket<LoadMode>(index, x);
derived().nestedExpression().const_cast_derived().template writePacket<LoadMode>(index, x);
}
};
@@ -190,10 +219,10 @@ DenseBase<Derived>::transpose()
*
* \sa transposeInPlace(), adjoint() */
template<typename Derived>
inline const Transpose<Derived>
inline const typename DenseBase<Derived>::ConstTransposeReturnType
DenseBase<Derived>::transpose() const
{
return derived();
return ConstTransposeReturnType(derived());
}
/** \returns an expression of the adjoint (i.e. conjugate transpose) of *this.
@@ -214,31 +243,34 @@ DenseBase<Derived>::transpose() const
* m = m.adjoint().eval();
* \endcode
*
* \sa adjointInPlace(), transpose(), conjugate(), class Transpose, class ei_scalar_conjugate_op */
* \sa adjointInPlace(), transpose(), conjugate(), class Transpose, class internal::scalar_conjugate_op */
template<typename Derived>
inline const typename MatrixBase<Derived>::AdjointReturnType
MatrixBase<Derived>::adjoint() const
{
return this->transpose();
return this->transpose(); // in the complex case, the .conjugate() is be implicit here
// due to implicit conversion to return type
}
/***************************************************************************
* "in place" transpose implementation
***************************************************************************/
namespace internal {
template<typename MatrixType,
bool IsSquare = (MatrixType::RowsAtCompileTime == MatrixType::ColsAtCompileTime) && MatrixType::RowsAtCompileTime!=Dynamic>
struct ei_inplace_transpose_selector;
struct inplace_transpose_selector;
template<typename MatrixType>
struct ei_inplace_transpose_selector<MatrixType,true> { // square matrix
struct inplace_transpose_selector<MatrixType,true> { // square matrix
static void run(MatrixType& m) {
m.template triangularView<StrictlyUpper>().swap(m.transpose());
}
};
template<typename MatrixType>
struct ei_inplace_transpose_selector<MatrixType,false> { // non square matrix
struct inplace_transpose_selector<MatrixType,false> { // non square matrix
static void run(MatrixType& m) {
if (m.rows()==m.cols())
m.template triangularView<StrictlyUpper>().swap(m.transpose());
@@ -247,6 +279,8 @@ struct ei_inplace_transpose_selector<MatrixType,false> { // non square matrix
}
};
} // end namespace internal
/** This is the "in place" version of transpose(): it replaces \c *this by its own transpose.
* Thus, doing
* \code
@@ -268,7 +302,7 @@ struct ei_inplace_transpose_selector<MatrixType,false> { // non square matrix
template<typename Derived>
inline void DenseBase<Derived>::transposeInPlace()
{
ei_inplace_transpose_selector<Derived>::run(derived());
internal::inplace_transpose_selector<Derived>::run(derived());
}
/***************************************************************************
@@ -303,45 +337,46 @@ inline void MatrixBase<Derived>::adjointInPlace()
// The following is to detect aliasing problems in most common cases.
namespace internal {
template<typename BinOp,typename NestedXpr,typename Rhs>
struct ei_blas_traits<SelfCwiseBinaryOp<BinOp,NestedXpr,Rhs> >
: ei_blas_traits<NestedXpr>
struct blas_traits<SelfCwiseBinaryOp<BinOp,NestedXpr,Rhs> >
: blas_traits<NestedXpr>
{
typedef SelfCwiseBinaryOp<BinOp,NestedXpr,Rhs> XprType;
static inline const XprType extract(const XprType& x) { return x; }
};
template<bool DestIsTransposed, typename OtherDerived>
struct ei_check_transpose_aliasing_compile_time_selector
struct check_transpose_aliasing_compile_time_selector
{
enum { ret = ei_blas_traits<OtherDerived>::IsTransposed != DestIsTransposed
};
enum { ret = bool(blas_traits<OtherDerived>::IsTransposed) != DestIsTransposed };
};
template<bool DestIsTransposed, typename BinOp, typename DerivedA, typename DerivedB>
struct ei_check_transpose_aliasing_compile_time_selector<DestIsTransposed,CwiseBinaryOp<BinOp,DerivedA,DerivedB> >
struct check_transpose_aliasing_compile_time_selector<DestIsTransposed,CwiseBinaryOp<BinOp,DerivedA,DerivedB> >
{
enum { ret = ei_blas_traits<DerivedA>::IsTransposed != DestIsTransposed
|| ei_blas_traits<DerivedB>::IsTransposed != DestIsTransposed
enum { ret = bool(blas_traits<DerivedA>::IsTransposed) != DestIsTransposed
|| bool(blas_traits<DerivedB>::IsTransposed) != DestIsTransposed
};
};
template<typename Scalar, bool DestIsTransposed, typename OtherDerived>
struct ei_check_transpose_aliasing_run_time_selector
struct check_transpose_aliasing_run_time_selector
{
static bool run(const Scalar* dest, const OtherDerived& src)
{
return (ei_blas_traits<OtherDerived>::IsTransposed != DestIsTransposed) && (dest!=0 && dest==(Scalar*)ei_extract_data(src));
return (bool(blas_traits<OtherDerived>::IsTransposed) != DestIsTransposed) && (dest!=0 && dest==(Scalar*)extract_data(src));
}
};
template<typename Scalar, bool DestIsTransposed, typename BinOp, typename DerivedA, typename DerivedB>
struct ei_check_transpose_aliasing_run_time_selector<Scalar,DestIsTransposed,CwiseBinaryOp<BinOp,DerivedA,DerivedB> >
struct check_transpose_aliasing_run_time_selector<Scalar,DestIsTransposed,CwiseBinaryOp<BinOp,DerivedA,DerivedB> >
{
static bool run(const Scalar* dest, const CwiseBinaryOp<BinOp,DerivedA,DerivedB>& src)
{
return ((ei_blas_traits<DerivedA>::IsTransposed != DestIsTransposed) && (dest!=0 && dest==(Scalar*)ei_extract_data(src.lhs())))
|| ((ei_blas_traits<DerivedB>::IsTransposed != DestIsTransposed) && (dest!=0 && dest==(Scalar*)ei_extract_data(src.rhs())));
return ((blas_traits<DerivedA>::IsTransposed != DestIsTransposed) && (dest!=0 && dest==(Scalar*)extract_data(src.lhs())))
|| ((blas_traits<DerivedB>::IsTransposed != DestIsTransposed) && (dest!=0 && dest==(Scalar*)extract_data(src.rhs())));
}
};
@@ -353,16 +388,16 @@ struct ei_check_transpose_aliasing_run_time_selector<Scalar,DestIsTransposed,Cwi
template<typename Derived, typename OtherDerived,
bool MightHaveTransposeAliasing
= ei_check_transpose_aliasing_compile_time_selector
<ei_blas_traits<Derived>::IsTransposed,OtherDerived>::ret
= check_transpose_aliasing_compile_time_selector
<blas_traits<Derived>::IsTransposed,OtherDerived>::ret
>
struct checkTransposeAliasing_impl
{
static void run(const Derived& dst, const OtherDerived& other)
{
ei_assert((!ei_check_transpose_aliasing_run_time_selector
<typename Derived::Scalar,ei_blas_traits<Derived>::IsTransposed,OtherDerived>
::run(ei_extract_data(dst), other))
eigen_assert((!check_transpose_aliasing_run_time_selector
<typename Derived::Scalar,blas_traits<Derived>::IsTransposed,OtherDerived>
::run(extract_data(dst), other))
&& "aliasing detected during tranposition, use transposeInPlace() "
"or evaluate the rhs into a temporary using .eval()");
@@ -377,12 +412,13 @@ struct checkTransposeAliasing_impl<Derived, OtherDerived, false>
}
};
} // end namespace internal
template<typename Derived>
template<typename OtherDerived>
void DenseBase<Derived>::checkTransposeAliasing(const OtherDerived& other) const
{
checkTransposeAliasing_impl<Derived, OtherDerived>::run(derived(), other);
internal::checkTransposeAliasing_impl<Derived, OtherDerived>::run(derived(), other);
}
#endif

View File

@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2010-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
@@ -53,90 +53,75 @@
*
* \sa class PermutationMatrix
*/
template<typename TranspositionType, typename MatrixType, int Side, bool Transposed=false> struct ei_transposition_matrix_product_retval;
template<int SizeAtCompileTime, int MaxSizeAtCompileTime>
class Transpositions
namespace internal {
template<typename TranspositionType, typename MatrixType, int Side, bool Transposed=false> struct transposition_matrix_product_retval;
}
template<typename Derived>
class TranspositionsBase
{
typedef internal::traits<Derived> Traits;
public:
typedef Matrix<DenseIndex, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType;
typedef typename IndicesType::Index Index;
typedef typename Traits::IndicesType IndicesType;
typedef typename IndicesType::Scalar Index;
inline Transpositions() {}
/** Copy constructor. */
template<int OtherSize, int OtherMaxSize>
inline Transpositions(const Transpositions<OtherSize, OtherMaxSize>& other)
: m_indices(other.indices()) {}
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** Standard copy constructor. Defined only to prevent a default copy constructor
* from hiding the other templated constructor */
inline Transpositions(const Transpositions& other) : m_indices(other.indices()) {}
#endif
/** Generic constructor from expression of the transposition indices. */
template<typename Other>
explicit inline Transpositions(const MatrixBase<Other>& indices) : m_indices(indices)
{}
Derived& derived() { return *static_cast<Derived*>(this); }
const Derived& derived() const { return *static_cast<const Derived*>(this); }
/** Copies the \a other transpositions into \c *this */
template<int OtherSize, int OtherMaxSize>
Transpositions& operator=(const Transpositions<OtherSize, OtherMaxSize>& other)
template<typename OtherDerived>
Derived& operator=(const TranspositionsBase<OtherDerived>& other)
{
m_indices = other.indices();
return *this;
indices() = other.indices();
return derived();
}
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** This is a special case of the templated operator=. Its purpose is to
* prevent a default operator= from hiding the templated operator=.
*/
Transpositions& operator=(const Transpositions& other)
Derived& operator=(const TranspositionsBase& other)
{
m_indices = other.m_indices;
return *this;
indices() = other.indices();
return derived();
}
#endif
/** Constructs an uninitialized permutation matrix of given size.
*/
inline Transpositions(Index size) : m_indices(size)
{}
/** \returns the number of transpositions */
inline Index size() const { return m_indices.size(); }
inline Index size() const { return indices().size(); }
/** Direct access to the underlying index vector */
inline const Index& coeff(Index i) const { return m_indices.coeff(i); }
inline const Index& coeff(Index i) const { return indices().coeff(i); }
/** Direct access to the underlying index vector */
inline Index& coeffRef(Index i) { return m_indices.coeffRef(i); }
inline Index& coeffRef(Index i) { return indices().coeffRef(i); }
/** Direct access to the underlying index vector */
inline const Index& operator()(Index i) const { return m_indices(i); }
inline const Index& operator()(Index i) const { return indices()(i); }
/** Direct access to the underlying index vector */
inline Index& operator()(Index i) { return m_indices(i); }
inline Index& operator()(Index i) { return indices()(i); }
/** Direct access to the underlying index vector */
inline const Index& operator[](Index i) const { return m_indices(i); }
inline const Index& operator[](Index i) const { return indices()(i); }
/** Direct access to the underlying index vector */
inline Index& operator[](Index i) { return m_indices(i); }
inline Index& operator[](Index i) { return indices()(i); }
/** const version of indices(). */
const IndicesType& indices() const { return m_indices; }
const IndicesType& indices() const { return derived().indices(); }
/** \returns a reference to the stored array representing the transpositions. */
IndicesType& indices() { return m_indices; }
IndicesType& indices() { return derived().indices(); }
/** Resizes to given size. */
inline void resize(int size)
{
m_indices.resize(size);
indices().resize(size);
}
/** Sets \c *this to represents an identity transformation */
void setIdentity()
{
for(int i = 0; i < m_indices.size(); ++i)
m_indices.coeffRef(i) = i;
for(int i = 0; i < indices().size(); ++i)
coeffRef(i) = i;
}
// FIXME: do we want such methods ?
@@ -161,69 +146,238 @@ class Transpositions
*/
/** \returns the inverse transformation */
inline Transpose<Transpositions> inverse() const
{ return *this; }
inline Transpose<TranspositionsBase> inverse() const
{ return Transpose<TranspositionsBase>(derived()); }
/** \returns the tranpose transformation */
inline Transpose<Transpositions> transpose() const
{ return *this; }
inline Transpose<TranspositionsBase> transpose() const
{ return Transpose<TranspositionsBase>(derived()); }
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<int OtherSize, int OtherMaxSize>
Transpositions(const Transpose<Transpositions<OtherSize,OtherMaxSize> >& other)
: m_indices(other.size())
protected:
};
namespace internal {
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType>
struct traits<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,IndexType> >
{
typedef IndexType Index;
typedef Matrix<Index, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType;
};
}
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType>
class Transpositions : public TranspositionsBase<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,IndexType> >
{
typedef internal::traits<Transpositions> Traits;
public:
typedef TranspositionsBase<Transpositions> Base;
typedef typename Traits::IndicesType IndicesType;
typedef typename IndicesType::Scalar Index;
inline Transpositions() {}
/** Copy constructor. */
template<typename OtherDerived>
inline Transpositions(const TranspositionsBase<OtherDerived>& other)
: m_indices(other.indices()) {}
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** Standard copy constructor. Defined only to prevent a default copy constructor
* from hiding the other templated constructor */
inline Transpositions(const Transpositions& other) : m_indices(other.indices()) {}
#endif
/** Generic constructor from expression of the transposition indices. */
template<typename Other>
explicit inline Transpositions(const MatrixBase<Other>& indices) : m_indices(indices)
{}
/** Copies the \a other transpositions into \c *this */
template<typename OtherDerived>
Transpositions& operator=(const TranspositionsBase<OtherDerived>& other)
{
Index n = size();
Index j = size-1;
for(Index i=0; i<n;++i,--j)
m_indices.coeffRef(j) = other.nestedTranspositions().indices().coeff(i);
return Base::operator=(other);
}
#endif
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** This is a special case of the templated operator=. Its purpose is to
* prevent a default operator= from hiding the templated operator=.
*/
Transpositions& operator=(const Transpositions& other)
{
m_indices = other.m_indices;
return *this;
}
#endif
/** Constructs an uninitialized permutation matrix of given size.
*/
inline Transpositions(Index size) : m_indices(size)
{}
/** const version of indices(). */
const IndicesType& indices() const { return m_indices; }
/** \returns a reference to the stored array representing the transpositions. */
IndicesType& indices() { return m_indices; }
protected:
IndicesType m_indices;
};
namespace internal {
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType, int _PacketAccess>
struct traits<Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,IndexType>,_PacketAccess> >
{
typedef IndexType Index;
typedef Map<const Matrix<Index,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1>, _PacketAccess> IndicesType;
};
}
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType, int PacketAccess>
class Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,IndexType>,PacketAccess>
: public TranspositionsBase<Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,IndexType>,PacketAccess> >
{
typedef internal::traits<Map> Traits;
public:
typedef TranspositionsBase<Map> Base;
typedef typename Traits::IndicesType IndicesType;
typedef typename IndicesType::Scalar Index;
inline Map(const Index* indices)
: m_indices(indices)
{}
inline Map(const Index* indices, Index size)
: m_indices(indices,size)
{}
/** Copies the \a other transpositions into \c *this */
template<typename OtherDerived>
Map& operator=(const TranspositionsBase<OtherDerived>& other)
{
return Base::operator=(other);
}
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** This is a special case of the templated operator=. Its purpose is to
* prevent a default operator= from hiding the templated operator=.
*/
Map& operator=(const Map& other)
{
m_indices = other.m_indices;
return *this;
}
#endif
/** const version of indices(). */
const IndicesType& indices() const { return m_indices; }
/** \returns a reference to the stored array representing the transpositions. */
IndicesType& indices() { return m_indices; }
protected:
IndicesType m_indices;
};
namespace internal {
template<typename _IndicesType>
struct traits<TranspositionsWrapper<_IndicesType> >
{
typedef typename _IndicesType::Scalar Index;
typedef _IndicesType IndicesType;
};
}
template<typename _IndicesType>
class TranspositionsWrapper
: public TranspositionsBase<TranspositionsWrapper<_IndicesType> >
{
typedef internal::traits<TranspositionsWrapper> Traits;
public:
typedef TranspositionsBase<TranspositionsWrapper> Base;
typedef typename Traits::IndicesType IndicesType;
typedef typename IndicesType::Scalar Index;
inline TranspositionsWrapper(IndicesType& indices)
: m_indices(indices)
{}
/** Copies the \a other transpositions into \c *this */
template<typename OtherDerived>
TranspositionsWrapper& operator=(const TranspositionsBase<OtherDerived>& other)
{
return Base::operator=(other);
}
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** This is a special case of the templated operator=. Its purpose is to
* prevent a default operator= from hiding the templated operator=.
*/
TranspositionsWrapper& operator=(const TranspositionsWrapper& other)
{
m_indices = other.m_indices;
return *this;
}
#endif
/** const version of indices(). */
const IndicesType& indices() const { return m_indices; }
/** \returns a reference to the stored array representing the transpositions. */
IndicesType& indices() { return m_indices; }
protected:
const typename IndicesType::Nested m_indices;
};
/** \returns the \a matrix with the \a transpositions applied to the columns.
*/
template<typename Derived, int SizeAtCompileTime, int MaxSizeAtCompileTime>
inline const ei_transposition_matrix_product_retval<Transpositions<SizeAtCompileTime, MaxSizeAtCompileTime>, Derived, OnTheRight>
template<typename Derived, typename TranspositionsDerived>
inline const internal::transposition_matrix_product_retval<TranspositionsDerived, Derived, OnTheRight>
operator*(const MatrixBase<Derived>& matrix,
const Transpositions<SizeAtCompileTime, MaxSizeAtCompileTime> &transpositions)
const TranspositionsBase<TranspositionsDerived> &transpositions)
{
return ei_transposition_matrix_product_retval
<Transpositions<SizeAtCompileTime, MaxSizeAtCompileTime>, Derived, OnTheRight>
(transpositions, matrix.derived());
return internal::transposition_matrix_product_retval
<TranspositionsDerived, Derived, OnTheRight>
(transpositions.derived(), matrix.derived());
}
/** \returns the \a matrix with the \a transpositions applied to the rows.
*/
template<typename Derived, int SizeAtCompileTime, int MaxSizeAtCompileTime>
inline const ei_transposition_matrix_product_retval
<Transpositions<SizeAtCompileTime, MaxSizeAtCompileTime>, Derived, OnTheLeft>
operator*(const Transpositions<SizeAtCompileTime, MaxSizeAtCompileTime> &transpositions,
template<typename Derived, typename TranspositionDerived>
inline const internal::transposition_matrix_product_retval
<TranspositionDerived, Derived, OnTheLeft>
operator*(const TranspositionsBase<TranspositionDerived> &transpositions,
const MatrixBase<Derived>& matrix)
{
return ei_transposition_matrix_product_retval
<Transpositions<SizeAtCompileTime, MaxSizeAtCompileTime>, Derived, OnTheLeft>
(transpositions, matrix.derived());
return internal::transposition_matrix_product_retval
<TranspositionDerived, Derived, OnTheLeft>
(transpositions.derived(), matrix.derived());
}
namespace internal {
template<typename TranspositionType, typename MatrixType, int Side, bool Transposed>
struct ei_traits<ei_transposition_matrix_product_retval<TranspositionType, MatrixType, Side, Transposed> >
struct traits<transposition_matrix_product_retval<TranspositionType, MatrixType, Side, Transposed> >
{
typedef typename MatrixType::PlainObject ReturnType;
};
template<typename TranspositionType, typename MatrixType, int Side, bool Transposed>
struct ei_transposition_matrix_product_retval
: public ReturnByValue<ei_transposition_matrix_product_retval<TranspositionType, MatrixType, Side, Transposed> >
struct transposition_matrix_product_retval
: public ReturnByValue<transposition_matrix_product_retval<TranspositionType, MatrixType, Side, Transposed> >
{
typedef typename ei_cleantype<typename MatrixType::Nested>::type MatrixTypeNestedCleaned;
typedef typename remove_all<typename MatrixType::Nested>::type MatrixTypeNestedCleaned;
typedef typename TranspositionType::Index Index;
ei_transposition_matrix_product_retval(const TranspositionType& tr, const MatrixType& matrix)
transposition_matrix_product_retval(const TranspositionType& tr, const MatrixType& matrix)
: m_transpositions(tr), m_matrix(matrix)
{}
@@ -235,7 +389,7 @@ struct ei_transposition_matrix_product_retval
const int size = m_transpositions.size();
Index j = 0;
if(!(ei_is_same_type<MatrixTypeNestedCleaned,Dest>::ret && ei_extract_data(dst) == ei_extract_data(m_matrix)))
if(!(is_same<MatrixTypeNestedCleaned,Dest>::value && extract_data(dst) == extract_data(m_matrix)))
dst = m_matrix;
for(int k=(Transposed?size-1:0) ; Transposed?k>=0:k<size ; Transposed?--k:++k)
@@ -253,12 +407,14 @@ struct ei_transposition_matrix_product_retval
const typename MatrixType::Nested m_matrix;
};
} // end namespace internal
/* Template partial specialization for transposed/inverse transpositions */
template<int SizeAtCompileTime, int MaxSizeAtCompileTime>
class Transpose<Transpositions<SizeAtCompileTime, MaxSizeAtCompileTime> >
template<typename TranspositionsDerived>
class Transpose<TranspositionsBase<TranspositionsDerived> >
{
typedef Transpositions<SizeAtCompileTime, MaxSizeAtCompileTime> TranspositionType;
typedef TranspositionsDerived TranspositionType;
typedef typename TranspositionType::IndicesType IndicesType;
public:
@@ -269,23 +425,21 @@ class Transpose<Transpositions<SizeAtCompileTime, MaxSizeAtCompileTime> >
/** \returns the \a matrix with the inverse transpositions applied to the columns.
*/
template<typename Derived> friend
inline const ei_transposition_matrix_product_retval<TranspositionType, Derived, OnTheRight, true>
inline const internal::transposition_matrix_product_retval<TranspositionType, Derived, OnTheRight, true>
operator*(const MatrixBase<Derived>& matrix, const Transpose& trt)
{
return ei_transposition_matrix_product_retval<TranspositionType, Derived, OnTheRight, true>(trt.m_transpositions, matrix.derived());
return internal::transposition_matrix_product_retval<TranspositionType, Derived, OnTheRight, true>(trt.m_transpositions, matrix.derived());
}
/** \returns the \a matrix with the inverse transpositions applied to the rows.
*/
template<typename Derived>
inline const ei_transposition_matrix_product_retval<TranspositionType, Derived, OnTheLeft, true>
inline const internal::transposition_matrix_product_retval<TranspositionType, Derived, OnTheLeft, true>
operator*(const MatrixBase<Derived>& matrix) const
{
return ei_transposition_matrix_product_retval<TranspositionType, Derived, OnTheLeft, true>(m_transpositions, matrix.derived());
return internal::transposition_matrix_product_retval<TranspositionType, Derived, OnTheLeft, true>(m_transpositions, matrix.derived());
}
const TranspositionType& nestedTranspositions() const { return m_transpositions; }
protected:
const TranspositionType& m_transpositions;
};

View File

@@ -26,6 +26,12 @@
#ifndef EIGEN_TRIANGULARMATRIX_H
#define EIGEN_TRIANGULARMATRIX_H
namespace internal {
template<int Side, typename TriangularType, typename Rhs> struct triangular_solve_retval;
}
/** \internal
*
* \class TriangularBase
@@ -38,18 +44,20 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
public:
enum {
Mode = ei_traits<Derived>::Mode,
CoeffReadCost = ei_traits<Derived>::CoeffReadCost,
RowsAtCompileTime = ei_traits<Derived>::RowsAtCompileTime,
ColsAtCompileTime = ei_traits<Derived>::ColsAtCompileTime,
MaxRowsAtCompileTime = ei_traits<Derived>::MaxRowsAtCompileTime,
MaxColsAtCompileTime = ei_traits<Derived>::MaxColsAtCompileTime
Mode = internal::traits<Derived>::Mode,
CoeffReadCost = internal::traits<Derived>::CoeffReadCost,
RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,
ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
MaxRowsAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime,
MaxColsAtCompileTime = internal::traits<Derived>::MaxColsAtCompileTime
};
typedef typename ei_traits<Derived>::Scalar Scalar;
typedef typename ei_traits<Derived>::StorageKind StorageKind;
typedef typename ei_traits<Derived>::Index Index;
typedef typename internal::traits<Derived>::Scalar Scalar;
typedef typename internal::traits<Derived>::StorageKind StorageKind;
typedef typename internal::traits<Derived>::Index Index;
typedef typename internal::traits<Derived>::DenseMatrixType DenseMatrixType;
typedef DenseMatrixType DenseType;
inline TriangularBase() { ei_assert(!((Mode&UnitDiag) && (Mode&ZeroDiag))); }
inline TriangularBase() { eigen_assert(!((Mode&UnitDiag) && (Mode&ZeroDiag))); }
inline Index rows() const { return derived().rows(); }
inline Index cols() const { return derived().cols(); }
@@ -88,17 +96,26 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
template<typename DenseDerived>
void evalToLazy(MatrixBase<DenseDerived> &other) const;
DenseMatrixType toDenseMatrix() const
{
DenseMatrixType res(rows(), cols());
evalToLazy(res);
return res;
}
protected:
void check_coordinates(Index row, Index col) const
{
EIGEN_ONLY_USED_FOR_DEBUG(row);
EIGEN_ONLY_USED_FOR_DEBUG(col);
ei_assert(col>=0 && col<cols() && row>=0 && row<rows());
ei_assert( (Mode==Upper && col>=row)
|| (Mode==Lower && col<=row)
|| ((Mode==StrictlyUpper || Mode==UnitUpper) && col>row)
|| ((Mode==StrictlyLower || Mode==UnitLower) && col<row));
eigen_assert(col>=0 && col<cols() && row>=0 && row<rows());
const int mode = int(Mode) & ~SelfAdjoint;
EIGEN_ONLY_USED_FOR_DEBUG(mode);
eigen_assert((mode==Upper && col>=row)
|| (mode==Lower && col<=row)
|| ((mode==StrictlyUpper || mode==UnitUpper) && col>row)
|| ((mode==StrictlyLower || mode==UnitLower) && col<row));
}
#ifdef EIGEN_INTERNAL_DEBUGGING
@@ -118,29 +135,33 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
* \brief Base class for triangular part in a matrix
*
* \param MatrixType the type of the object in which we are taking the triangular part
* \param Mode the kind of triangular matrix expression to construct. Can be Upper,
* Lower, UpperSelfadjoint, or LowerSelfadjoint. This is in fact a bit field;
* it must have either Upper or Lower, and additionnaly it may have either
* UnitDiag or Selfadjoint.
* \param Mode the kind of triangular matrix expression to construct. Can be #Upper,
* #Lower, #UnitUpper, #UnitLower, #StrictlyUpper, or #StrictlyLower.
* This is in fact a bit field; it must have either #Upper or #Lower,
* and additionnaly it may have #UnitDiag or #ZeroDiag or neither.
*
* This class represents a triangular part of a matrix, not necessarily square. Strictly speaking, for rectangular
* matrices one should speak ok "trapezoid" parts. This class is the return type
* matrices one should speak of "trapezoid" parts. This class is the return type
* of MatrixBase::triangularView() and most of the time this is the only way it is used.
*
* \sa MatrixBase::triangularView()
*/
namespace internal {
template<typename MatrixType, unsigned int _Mode>
struct ei_traits<TriangularView<MatrixType, _Mode> > : ei_traits<MatrixType>
struct traits<TriangularView<MatrixType, _Mode> > : traits<MatrixType>
{
typedef typename ei_nested<MatrixType>::type MatrixTypeNested;
typedef typename ei_unref<MatrixTypeNested>::type _MatrixTypeNested;
typedef typename nested<MatrixType>::type MatrixTypeNested;
typedef typename remove_reference<MatrixTypeNested>::type MatrixTypeNestedNonRef;
typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
typedef MatrixType ExpressionType;
typedef typename MatrixType::PlainObject DenseMatrixType;
enum {
Mode = _Mode,
Flags = (_MatrixTypeNested::Flags & (HereditaryBits) & (~(PacketAccessBit | DirectAccessBit | LinearAccessBit))) | Mode,
CoeffReadCost = _MatrixTypeNested::CoeffReadCost
Flags = (MatrixTypeNestedCleaned::Flags & (HereditaryBits) & (~(PacketAccessBit | DirectAccessBit | LinearAccessBit))) | Mode,
CoeffReadCost = MatrixTypeNestedCleaned::CoeffReadCost
};
};
}
template<int Mode, bool LhsIsTriangular,
typename Lhs, bool LhsIsVector,
@@ -153,22 +174,25 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
public:
typedef TriangularBase<TriangularView> Base;
typedef typename ei_traits<TriangularView>::Scalar Scalar;
typedef typename internal::traits<TriangularView>::Scalar Scalar;
typedef _MatrixType MatrixType;
typedef typename MatrixType::PlainObject DenseMatrixType;
typedef typename internal::traits<TriangularView>::DenseMatrixType DenseMatrixType;
typedef DenseMatrixType PlainObject;
protected:
typedef typename MatrixType::Nested MatrixTypeNested;
typedef typename ei_cleantype<MatrixTypeNested>::type _MatrixTypeNested;
typedef typename ei_cleantype<typename MatrixType::ConjugateReturnType>::type MatrixConjugateReturnType;
typedef typename internal::traits<TriangularView>::MatrixTypeNested MatrixTypeNested;
typedef typename internal::traits<TriangularView>::MatrixTypeNestedNonRef MatrixTypeNestedNonRef;
typedef typename internal::traits<TriangularView>::MatrixTypeNestedCleaned MatrixTypeNestedCleaned;
typedef typename internal::remove_all<typename MatrixType::ConjugateReturnType>::type MatrixConjugateReturnType;
public:
using Base::evalToLazy;
typedef typename ei_traits<TriangularView>::StorageKind StorageKind;
typedef typename ei_traits<TriangularView>::Index Index;
typedef typename internal::traits<TriangularView>::StorageKind StorageKind;
typedef typename internal::traits<TriangularView>::Index Index;
enum {
Mode = _Mode,
@@ -179,7 +203,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
};
inline TriangularView(const MatrixType& matrix) : m_matrix(matrix)
{ ei_assert(ei_are_flags_consistent<Mode>::ret); }
{}
inline Index rows() const { return m_matrix.rows(); }
inline Index cols() const { return m_matrix.cols(); }
@@ -187,13 +211,13 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
inline Index innerStride() const { return m_matrix.innerStride(); }
/** \sa MatrixBase::operator+=() */
template<typename Other> TriangularView& operator+=(const Other& other) { return *this = m_matrix + other; }
template<typename Other> TriangularView& operator+=(const DenseBase<Other>& other) { return *this = m_matrix + other.derived(); }
/** \sa MatrixBase::operator-=() */
template<typename Other> TriangularView& operator-=(const Other& other) { return *this = m_matrix - other; }
template<typename Other> TriangularView& operator-=(const DenseBase<Other>& other) { return *this = m_matrix - other.derived(); }
/** \sa MatrixBase::operator*=() */
TriangularView& operator*=(const typename ei_traits<MatrixType>::Scalar& other) { return *this = m_matrix * other; }
TriangularView& operator*=(const typename internal::traits<MatrixType>::Scalar& other) { return *this = m_matrix * other; }
/** \sa MatrixBase::operator/=() */
TriangularView& operator/=(const typename ei_traits<MatrixType>::Scalar& other) { return *this = m_matrix / other; }
TriangularView& operator/=(const typename internal::traits<MatrixType>::Scalar& other) { return *this = m_matrix / other; }
/** \sa MatrixBase::fill() */
void fill(const Scalar& value) { setConstant(value); }
@@ -223,8 +247,8 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
return m_matrix.const_cast_derived().coeffRef(row, col);
}
const MatrixType& nestedExpression() const { return m_matrix; }
MatrixType& nestedExpression() { return const_cast<MatrixType&>(m_matrix); }
const MatrixTypeNestedCleaned& nestedExpression() const { return m_matrix; }
MatrixTypeNestedCleaned& nestedExpression() { return *const_cast<MatrixTypeNestedCleaned*>(&m_matrix); }
/** Assigns a triangular matrix to a triangular part of a dense matrix */
template<typename OtherDerived>
@@ -258,18 +282,14 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
/** \sa MatrixBase::transpose() */
inline TriangularView<Transpose<MatrixType>,TransposeMode> transpose()
{ return m_matrix.transpose(); }
{
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
return m_matrix.const_cast_derived().transpose();
}
/** \sa MatrixBase::transpose() const */
inline const TriangularView<Transpose<MatrixType>,TransposeMode> transpose() const
{ return m_matrix.transpose(); }
DenseMatrixType toDenseMatrix() const
{
DenseMatrixType res(rows(), cols());
evalToLazy(res);
return res;
}
/** Efficient triangular matrix times vector/matrix product */
template<typename OtherDerived>
TriangularProduct<Mode,true,MatrixType,false,OtherDerived,OtherDerived::IsVectorAtCompileTime>
@@ -290,42 +310,70 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
(lhs.derived(),rhs.m_matrix);
}
#ifdef EIGEN2_SUPPORT
template<typename OtherDerived>
struct eigen2_product_return_type
{
typedef typename TriangularView<MatrixType,Mode>::DenseMatrixType DenseMatrixType;
typedef typename OtherDerived::PlainObject::DenseType OtherPlainObject;
typedef typename ProductReturnType<DenseMatrixType, OtherPlainObject>::Type ProdRetType;
typedef typename ProdRetType::PlainObject type;
};
template<typename OtherDerived>
const typename eigen2_product_return_type<OtherDerived>::type
operator*(const EigenBase<OtherDerived>& rhs) const
{
typename OtherDerived::PlainObject::DenseType rhsPlainObject;
rhs.evalTo(rhsPlainObject);
return this->toDenseMatrix() * rhsPlainObject;
}
template<typename OtherMatrixType>
bool isApprox(const TriangularView<OtherMatrixType, Mode>& other, typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) const
{
return this->toDenseMatrix().isApprox(other.toDenseMatrix(), precision);
}
template<typename OtherDerived>
bool isApprox(const MatrixBase<OtherDerived>& other, typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) const
{
return this->toDenseMatrix().isApprox(other, precision);
}
#endif // EIGEN2_SUPPORT
template<int Side, typename OtherDerived>
typename ei_plain_matrix_type_column_major<OtherDerived>::type
solve(const MatrixBase<OtherDerived>& other) const;
template<int Side, typename Other>
inline const internal::triangular_solve_retval<Side,TriangularView, Other>
solve(const MatrixBase<Other>& other) const;
template<int Side, typename OtherDerived>
void solveInPlace(const MatrixBase<OtherDerived>& other) const;
template<typename OtherDerived>
typename ei_plain_matrix_type_column_major<OtherDerived>::type
solve(const MatrixBase<OtherDerived>& other) const
template<typename Other>
inline const internal::triangular_solve_retval<OnTheLeft,TriangularView, Other>
solve(const MatrixBase<Other>& other) const
{ return solve<OnTheLeft>(other); }
template<typename OtherDerived>
void solveInPlace(const MatrixBase<OtherDerived>& other) const
{ return solveInPlace<OnTheLeft>(other); }
const SelfAdjointView<_MatrixTypeNested,Mode> selfadjointView() const
const SelfAdjointView<MatrixTypeNestedNonRef,Mode> selfadjointView() const
{
EIGEN_STATIC_ASSERT((Mode&UnitDiag)==0,PROGRAMMING_ERROR);
return SelfAdjointView<_MatrixTypeNested,Mode>(m_matrix);
return SelfAdjointView<MatrixTypeNestedNonRef,Mode>(m_matrix);
}
SelfAdjointView<_MatrixTypeNested,Mode> selfadjointView()
SelfAdjointView<MatrixTypeNestedNonRef,Mode> selfadjointView()
{
EIGEN_STATIC_ASSERT((Mode&UnitDiag)==0,PROGRAMMING_ERROR);
return SelfAdjointView<_MatrixTypeNested,Mode>(m_matrix);
return SelfAdjointView<MatrixTypeNestedNonRef,Mode>(m_matrix);
}
template<typename OtherDerived>
void swap(TriangularBase<OtherDerived> EIGEN_REF_TO_TEMPORARY other)
void swap(TriangularBase<OtherDerived> const & other)
{
TriangularView<SwapWrapper<MatrixType>,Mode>(const_cast<MatrixType&>(m_matrix)).lazyAssign(other.derived());
}
template<typename OtherDerived>
void swap(MatrixBase<OtherDerived> EIGEN_REF_TO_TEMPORARY other)
void swap(MatrixBase<OtherDerived> const & other)
{
TriangularView<SwapWrapper<MatrixType>,Mode>(const_cast<MatrixType&>(m_matrix)).lazyAssign(other.derived());
}
@@ -339,8 +387,51 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
else
return m_matrix.diagonal().prod();
}
// TODO simplify the following:
template<typename ProductDerived, typename Lhs, typename Rhs>
EIGEN_STRONG_INLINE TriangularView& operator=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
{
setZero();
return assignProduct(other,1);
}
template<typename ProductDerived, typename Lhs, typename Rhs>
EIGEN_STRONG_INLINE TriangularView& operator+=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
{
return assignProduct(other,1);
}
template<typename ProductDerived, typename Lhs, typename Rhs>
EIGEN_STRONG_INLINE TriangularView& operator-=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
{
return assignProduct(other,-1);
}
template<typename ProductDerived>
EIGEN_STRONG_INLINE TriangularView& operator=(const ScaledProduct<ProductDerived>& other)
{
setZero();
return assignProduct(other,other.alpha());
}
template<typename ProductDerived>
EIGEN_STRONG_INLINE TriangularView& operator+=(const ScaledProduct<ProductDerived>& other)
{
return assignProduct(other,other.alpha());
}
template<typename ProductDerived>
EIGEN_STRONG_INLINE TriangularView& operator-=(const ScaledProduct<ProductDerived>& other)
{
return assignProduct(other,-other.alpha());
}
protected:
template<typename ProductDerived, typename Lhs, typename Rhs>
EIGEN_STRONG_INLINE TriangularView& assignProduct(const ProductBase<ProductDerived, Lhs,Rhs>& prod, const Scalar& alpha);
const MatrixTypeNested m_matrix;
};
@@ -349,19 +440,23 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
* Implementation of triangular evaluation/assignment
***************************************************************************/
namespace internal {
template<typename Derived1, typename Derived2, unsigned int Mode, int UnrollCount, bool ClearOpposite>
struct ei_triangular_assignment_selector
struct triangular_assignment_selector
{
enum {
col = (UnrollCount-1) / Derived1::RowsAtCompileTime,
row = (UnrollCount-1) % Derived1::RowsAtCompileTime
};
typedef typename Derived1::Scalar Scalar;
inline static void run(Derived1 &dst, const Derived2 &src)
{
ei_triangular_assignment_selector<Derived1, Derived2, Mode, UnrollCount-1, ClearOpposite>::run(dst, src);
triangular_assignment_selector<Derived1, Derived2, Mode, UnrollCount-1, ClearOpposite>::run(dst, src);
ei_assert( Mode == Upper || Mode == Lower
eigen_assert( Mode == Upper || Mode == Lower
|| Mode == StrictlyUpper || Mode == StrictlyLower
|| Mode == UnitUpper || Mode == UnitLower);
if((Mode == Upper && row <= col)
@@ -374,40 +469,41 @@ struct ei_triangular_assignment_selector
else if(ClearOpposite)
{
if (Mode&UnitDiag && row==col)
dst.coeffRef(row, col) = 1;
dst.coeffRef(row, col) = Scalar(1);
else
dst.coeffRef(row, col) = 0;
dst.coeffRef(row, col) = Scalar(0);
}
}
};
// prevent buggy user code from causing an infinite recursion
template<typename Derived1, typename Derived2, unsigned int Mode, bool ClearOpposite>
struct ei_triangular_assignment_selector<Derived1, Derived2, Mode, 0, ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, Mode, 0, ClearOpposite>
{
inline static void run(Derived1 &, const Derived2 &) {}
};
template<typename Derived1, typename Derived2, bool ClearOpposite>
struct ei_triangular_assignment_selector<Derived1, Derived2, Upper, Dynamic, ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, Upper, Dynamic, ClearOpposite>
{
typedef typename Derived1::Index Index;
typedef typename Derived1::Scalar Scalar;
inline static void run(Derived1 &dst, const Derived2 &src)
{
for(Index j = 0; j < dst.cols(); ++j)
{
Index maxi = std::min(j, dst.rows()-1);
Index maxi = (std::min)(j, dst.rows()-1);
for(Index i = 0; i <= maxi; ++i)
dst.copyCoeff(i, j, src);
if (ClearOpposite)
for(Index i = maxi+1; i < dst.rows(); ++i)
dst.coeffRef(i, j) = 0;
dst.coeffRef(i, j) = Scalar(0);
}
}
};
template<typename Derived1, typename Derived2, bool ClearOpposite>
struct ei_triangular_assignment_selector<Derived1, Derived2, Lower, Dynamic, ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, Lower, Dynamic, ClearOpposite>
{
typedef typename Derived1::Index Index;
inline static void run(Derived1 &dst, const Derived2 &src)
@@ -416,23 +512,23 @@ struct ei_triangular_assignment_selector<Derived1, Derived2, Lower, Dynamic, Cle
{
for(Index i = j; i < dst.rows(); ++i)
dst.copyCoeff(i, j, src);
Index maxi = std::min(j, dst.rows());
Index maxi = (std::min)(j, dst.rows());
if (ClearOpposite)
for(Index i = 0; i < maxi; ++i)
dst.coeffRef(i, j) = 0;
dst.coeffRef(i, j) = static_cast<typename Derived1::Scalar>(0);
}
}
};
template<typename Derived1, typename Derived2, bool ClearOpposite>
struct ei_triangular_assignment_selector<Derived1, Derived2, StrictlyUpper, Dynamic, ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, StrictlyUpper, Dynamic, ClearOpposite>
{
typedef typename Derived1::Index Index;
inline static void run(Derived1 &dst, const Derived2 &src)
{
for(Index j = 0; j < dst.cols(); ++j)
{
Index maxi = std::min(j, dst.rows());
Index maxi = (std::min)(j, dst.rows());
for(Index i = 0; i < maxi; ++i)
dst.copyCoeff(i, j, src);
if (ClearOpposite)
@@ -443,7 +539,7 @@ struct ei_triangular_assignment_selector<Derived1, Derived2, StrictlyUpper, Dyna
};
template<typename Derived1, typename Derived2, bool ClearOpposite>
struct ei_triangular_assignment_selector<Derived1, Derived2, StrictlyLower, Dynamic, ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, StrictlyLower, Dynamic, ClearOpposite>
{
typedef typename Derived1::Index Index;
inline static void run(Derived1 &dst, const Derived2 &src)
@@ -452,23 +548,23 @@ struct ei_triangular_assignment_selector<Derived1, Derived2, StrictlyLower, Dyna
{
for(Index i = j+1; i < dst.rows(); ++i)
dst.copyCoeff(i, j, src);
Index maxi = std::min(j, dst.rows()-1);
Index maxi = (std::min)(j, dst.rows()-1);
if (ClearOpposite)
for(Index i = 0; i <= maxi; ++i)
dst.coeffRef(i, j) = 0;
dst.coeffRef(i, j) = static_cast<typename Derived1::Scalar>(0);
}
}
};
template<typename Derived1, typename Derived2, bool ClearOpposite>
struct ei_triangular_assignment_selector<Derived1, Derived2, UnitUpper, Dynamic, ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, UnitUpper, Dynamic, ClearOpposite>
{
typedef typename Derived1::Index Index;
inline static void run(Derived1 &dst, const Derived2 &src)
{
for(Index j = 0; j < dst.cols(); ++j)
{
Index maxi = std::min(j, dst.rows());
Index maxi = (std::min)(j, dst.rows());
for(Index i = 0; i < maxi; ++i)
dst.copyCoeff(i, j, src);
if (ClearOpposite)
@@ -481,14 +577,14 @@ struct ei_triangular_assignment_selector<Derived1, Derived2, UnitUpper, Dynamic,
}
};
template<typename Derived1, typename Derived2, bool ClearOpposite>
struct ei_triangular_assignment_selector<Derived1, Derived2, UnitLower, Dynamic, ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, UnitLower, Dynamic, ClearOpposite>
{
typedef typename Derived1::Index Index;
inline static void run(Derived1 &dst, const Derived2 &src)
{
for(Index j = 0; j < dst.cols(); ++j)
{
Index maxi = std::min(j, dst.rows());
Index maxi = (std::min)(j, dst.rows());
for(Index i = maxi+1; i < dst.rows(); ++i)
dst.copyCoeff(i, j, src);
if (ClearOpposite)
@@ -501,6 +597,8 @@ struct ei_triangular_assignment_selector<Derived1, Derived2, UnitLower, Dynamic,
}
};
} // end namespace internal
// FIXME should we keep that possibility
template<typename MatrixType, unsigned int Mode>
template<typename OtherDerived>
@@ -509,7 +607,7 @@ TriangularView<MatrixType, Mode>::operator=(const MatrixBase<OtherDerived>& othe
{
if(OtherDerived::Flags & EvalBeforeAssigningBit)
{
typename ei_plain_matrix_type<OtherDerived>::type other_evaluated(other.rows(), other.cols());
typename internal::plain_matrix_type<OtherDerived>::type other_evaluated(other.rows(), other.cols());
other_evaluated.template triangularView<Mode>().lazyAssign(other.derived());
lazyAssign(other_evaluated);
}
@@ -525,12 +623,12 @@ void TriangularView<MatrixType, Mode>::lazyAssign(const MatrixBase<OtherDerived>
{
enum {
unroll = MatrixType::SizeAtCompileTime != Dynamic
&& ei_traits<OtherDerived>::CoeffReadCost != Dynamic
&& MatrixType::SizeAtCompileTime*ei_traits<OtherDerived>::CoeffReadCost/2 <= EIGEN_UNROLLING_LIMIT
&& internal::traits<OtherDerived>::CoeffReadCost != Dynamic
&& MatrixType::SizeAtCompileTime*internal::traits<OtherDerived>::CoeffReadCost/2 <= EIGEN_UNROLLING_LIMIT
};
ei_assert(m_matrix.rows() == other.rows() && m_matrix.cols() == other.cols());
eigen_assert(m_matrix.rows() == other.rows() && m_matrix.cols() == other.cols());
ei_triangular_assignment_selector
internal::triangular_assignment_selector
<MatrixType, OtherDerived, int(Mode),
unroll ? int(MatrixType::SizeAtCompileTime) : Dynamic,
false // do not change the opposite triangular part
@@ -544,8 +642,8 @@ template<typename OtherDerived>
inline TriangularView<MatrixType, Mode>&
TriangularView<MatrixType, Mode>::operator=(const TriangularBase<OtherDerived>& other)
{
ei_assert(Mode == int(OtherDerived::Mode));
if(ei_traits<OtherDerived>::Flags & EvalBeforeAssigningBit)
eigen_assert(Mode == int(OtherDerived::Mode));
if(internal::traits<OtherDerived>::Flags & EvalBeforeAssigningBit)
{
typename OtherDerived::DenseMatrixType other_evaluated(other.rows(), other.cols());
other_evaluated.template triangularView<Mode>().lazyAssign(other.derived().nestedExpression());
@@ -562,13 +660,13 @@ void TriangularView<MatrixType, Mode>::lazyAssign(const TriangularBase<OtherDeri
{
enum {
unroll = MatrixType::SizeAtCompileTime != Dynamic
&& ei_traits<OtherDerived>::CoeffReadCost != Dynamic
&& MatrixType::SizeAtCompileTime * ei_traits<OtherDerived>::CoeffReadCost / 2
&& internal::traits<OtherDerived>::CoeffReadCost != Dynamic
&& MatrixType::SizeAtCompileTime * internal::traits<OtherDerived>::CoeffReadCost / 2
<= EIGEN_UNROLLING_LIMIT
};
ei_assert(m_matrix.rows() == other.rows() && m_matrix.cols() == other.cols());
eigen_assert(m_matrix.rows() == other.rows() && m_matrix.cols() == other.cols());
ei_triangular_assignment_selector
internal::triangular_assignment_selector
<MatrixType, OtherDerived, int(Mode),
unroll ? int(MatrixType::SizeAtCompileTime) : Dynamic,
false // preserve the opposite triangular part
@@ -585,9 +683,9 @@ template<typename Derived>
template<typename DenseDerived>
void TriangularBase<Derived>::evalTo(MatrixBase<DenseDerived> &other) const
{
if(ei_traits<Derived>::Flags & EvalBeforeAssigningBit)
if(internal::traits<Derived>::Flags & EvalBeforeAssigningBit)
{
typename ei_plain_matrix_type<Derived>::type other_evaluated(rows(), cols());
typename internal::plain_matrix_type<Derived>::type other_evaluated(rows(), cols());
evalToLazy(other_evaluated);
other.derived().swap(other_evaluated);
}
@@ -603,14 +701,14 @@ void TriangularBase<Derived>::evalToLazy(MatrixBase<DenseDerived> &other) const
{
enum {
unroll = DenseDerived::SizeAtCompileTime != Dynamic
&& ei_traits<Derived>::CoeffReadCost != Dynamic
&& DenseDerived::SizeAtCompileTime * ei_traits<Derived>::CoeffReadCost / 2
&& internal::traits<Derived>::CoeffReadCost != Dynamic
&& DenseDerived::SizeAtCompileTime * internal::traits<Derived>::CoeffReadCost / 2
<= EIGEN_UNROLLING_LIMIT
};
ei_assert(this->rows() == other.rows() && this->cols() == other.cols());
other.derived().resize(this->rows(), this->cols());
ei_triangular_assignment_selector
<DenseDerived, typename ei_traits<Derived>::ExpressionType, Derived::Mode,
internal::triangular_assignment_selector
<DenseDerived, typename internal::traits<Derived>::MatrixTypeNestedCleaned, Derived::Mode,
unroll ? int(DenseDerived::SizeAtCompileTime) : Dynamic,
true // clear the opposite triangular part
>::run(other.derived(), derived().nestedExpression());
@@ -624,10 +722,28 @@ void TriangularBase<Derived>::evalToLazy(MatrixBase<DenseDerived> &other) const
* Implementation of MatrixBase methods
***************************************************************************/
#ifdef EIGEN2_SUPPORT
// implementation of part<>(), including the SelfAdjoint case.
namespace internal {
template<typename MatrixType, unsigned int Mode>
struct eigen2_part_return_type
{
typedef TriangularView<MatrixType, Mode> type;
};
template<typename MatrixType>
struct eigen2_part_return_type<MatrixType, SelfAdjoint>
{
typedef SelfAdjointView<MatrixType, Upper> type;
};
}
/** \deprecated use MatrixBase::triangularView() */
template<typename Derived>
template<unsigned int Mode>
EIGEN_DEPRECATED const TriangularView<Derived, Mode> MatrixBase<Derived>::part() const
const typename internal::eigen2_part_return_type<Derived, Mode>::type MatrixBase<Derived>::part() const
{
return derived();
}
@@ -635,16 +751,17 @@ EIGEN_DEPRECATED const TriangularView<Derived, Mode> MatrixBase<Derived>::part()
/** \deprecated use MatrixBase::triangularView() */
template<typename Derived>
template<unsigned int Mode>
EIGEN_DEPRECATED TriangularView<Derived, Mode> MatrixBase<Derived>::part()
typename internal::eigen2_part_return_type<Derived, Mode>::type MatrixBase<Derived>::part()
{
return derived();
}
#endif
/**
* \returns an expression of a triangular view extracted from the current matrix
*
* The parameter \a Mode can have the following values: \c Upper, \c StrictlyUpper, \c UnitUpper,
* \c Lower, \c StrictlyLower, \c UnitLower.
* The parameter \a Mode can have the following values: \c #Upper, \c #StrictlyUpper, \c #UnitUpper,
* \c #Lower, \c #StrictlyLower, \c #UnitLower.
*
* Example: \include MatrixBase_extract.cpp
* Output: \verbinclude MatrixBase_extract.out
@@ -653,7 +770,8 @@ EIGEN_DEPRECATED TriangularView<Derived, Mode> MatrixBase<Derived>::part()
*/
template<typename Derived>
template<unsigned int Mode>
TriangularView<Derived, Mode> MatrixBase<Derived>::triangularView()
typename MatrixBase<Derived>::template TriangularViewReturnType<Mode>::Type
MatrixBase<Derived>::triangularView()
{
return derived();
}
@@ -661,7 +779,8 @@ TriangularView<Derived, Mode> MatrixBase<Derived>::triangularView()
/** This is the const version of MatrixBase::triangularView() */
template<typename Derived>
template<unsigned int Mode>
const TriangularView<Derived, Mode> MatrixBase<Derived>::triangularView() const
typename MatrixBase<Derived>::template ConstTriangularViewReturnType<Mode>::Type
MatrixBase<Derived>::triangularView() const
{
return derived();
}
@@ -669,7 +788,7 @@ const TriangularView<Derived, Mode> MatrixBase<Derived>::triangularView() const
/** \returns true if *this is approximately equal to an upper triangular matrix,
* within the precision given by \a prec.
*
* \sa isLowerTriangular(), extract(), part(), marked()
* \sa isLowerTriangular()
*/
template<typename Derived>
bool MatrixBase<Derived>::isUpperTriangular(RealScalar prec) const
@@ -677,24 +796,24 @@ bool MatrixBase<Derived>::isUpperTriangular(RealScalar prec) const
RealScalar maxAbsOnUpperPart = static_cast<RealScalar>(-1);
for(Index j = 0; j < cols(); ++j)
{
Index maxi = std::min(j, rows()-1);
Index maxi = (std::min)(j, rows()-1);
for(Index i = 0; i <= maxi; ++i)
{
RealScalar absValue = ei_abs(coeff(i,j));
RealScalar absValue = internal::abs(coeff(i,j));
if(absValue > maxAbsOnUpperPart) maxAbsOnUpperPart = absValue;
}
}
RealScalar threshold = maxAbsOnUpperPart * prec;
for(Index j = 0; j < cols(); ++j)
for(Index i = j+1; i < rows(); ++i)
if(ei_abs(coeff(i, j)) > threshold) return false;
if(internal::abs(coeff(i, j)) > threshold) return false;
return true;
}
/** \returns true if *this is approximately equal to a lower triangular matrix,
* within the precision given by \a prec.
*
* \sa isUpperTriangular(), extract(), part(), marked()
* \sa isUpperTriangular()
*/
template<typename Derived>
bool MatrixBase<Derived>::isLowerTriangular(RealScalar prec) const
@@ -703,15 +822,15 @@ bool MatrixBase<Derived>::isLowerTriangular(RealScalar prec) const
for(Index j = 0; j < cols(); ++j)
for(Index i = j; i < rows(); ++i)
{
RealScalar absValue = ei_abs(coeff(i,j));
RealScalar absValue = internal::abs(coeff(i,j));
if(absValue > maxAbsOnLowerPart) maxAbsOnLowerPart = absValue;
}
RealScalar threshold = maxAbsOnLowerPart * prec;
for(Index j = 1; j < cols(); ++j)
{
Index maxi = std::min(j, rows()-1);
Index maxi = (std::min)(j, rows()-1);
for(Index i = 0; i < maxi; ++i)
if(ei_abs(coeff(i, j)) > threshold) return false;
if(internal::abs(coeff(i, j)) > threshold) return false;
}
return true;
}

View File

@@ -56,24 +56,27 @@
*
* \sa class Block, DenseBase::segment(Index,Index,Index,Index), DenseBase::segment(Index,Index)
*/
namespace internal {
template<typename VectorType, int Size>
struct ei_traits<VectorBlock<VectorType, Size> >
: public ei_traits<Block<VectorType,
ei_traits<VectorType>::Flags & RowMajorBit ? 1 : Size,
ei_traits<VectorType>::Flags & RowMajorBit ? Size : 1> >
struct traits<VectorBlock<VectorType, Size> >
: public traits<Block<VectorType,
traits<VectorType>::Flags & RowMajorBit ? 1 : Size,
traits<VectorType>::Flags & RowMajorBit ? Size : 1> >
{
};
}
template<typename VectorType, int Size> class VectorBlock
: public Block<VectorType,
ei_traits<VectorType>::Flags & RowMajorBit ? 1 : Size,
ei_traits<VectorType>::Flags & RowMajorBit ? Size : 1>
internal::traits<VectorType>::Flags & RowMajorBit ? 1 : Size,
internal::traits<VectorType>::Flags & RowMajorBit ? Size : 1>
{
typedef Block<VectorType,
ei_traits<VectorType>::Flags & RowMajorBit ? 1 : Size,
ei_traits<VectorType>::Flags & RowMajorBit ? Size : 1> Base;
internal::traits<VectorType>::Flags & RowMajorBit ? 1 : Size,
internal::traits<VectorType>::Flags & RowMajorBit ? Size : 1> Base;
enum {
IsColVector = !(ei_traits<VectorType>::Flags & RowMajorBit)
IsColVector = !(internal::traits<VectorType>::Flags & RowMajorBit)
};
public:
EIGEN_DENSE_PUBLIC_INTERFACE(VectorBlock)
@@ -82,7 +85,7 @@ template<typename VectorType, int Size> class VectorBlock
/** Dynamic-size constructor
*/
inline VectorBlock(const VectorType& vector, Index start, Index size)
inline VectorBlock(VectorType& vector, Index start, Index size)
: Base(vector,
IsColVector ? start : 0, IsColVector ? 0 : start,
IsColVector ? size : 1, IsColVector ? 1 : size)
@@ -92,7 +95,7 @@ template<typename VectorType, int Size> class VectorBlock
/** Fixed-size constructor
*/
inline VectorBlock(const VectorType& vector, Index start)
inline VectorBlock(VectorType& vector, Index start)
: Base(vector, IsColVector ? start : 0, IsColVector ? 0 : start)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorBlock);
@@ -117,20 +120,20 @@ template<typename VectorType, int Size> class VectorBlock
* \sa class Block, segment(Index)
*/
template<typename Derived>
inline VectorBlock<Derived> DenseBase<Derived>
::segment(Index start, Index size)
inline typename DenseBase<Derived>::SegmentReturnType
DenseBase<Derived>::segment(Index start, Index size)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return VectorBlock<Derived>(derived(), start, size);
return SegmentReturnType(derived(), start, size);
}
/** This is the const version of segment(Index,Index).*/
template<typename Derived>
inline const VectorBlock<Derived>
inline typename DenseBase<Derived>::ConstSegmentReturnType
DenseBase<Derived>::segment(Index start, Index size) const
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return VectorBlock<Derived>(derived(), start, size);
return ConstSegmentReturnType(derived(), start, size);
}
/** \returns a dynamic-size expression of the first coefficients of *this.
@@ -149,20 +152,20 @@ DenseBase<Derived>::segment(Index start, Index size) const
* \sa class Block, block(Index,Index)
*/
template<typename Derived>
inline VectorBlock<Derived>
inline typename DenseBase<Derived>::SegmentReturnType
DenseBase<Derived>::head(Index size)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return VectorBlock<Derived>(derived(), 0, size);
return SegmentReturnType(derived(), 0, size);
}
/** This is the const version of head(Index).*/
template<typename Derived>
inline const VectorBlock<Derived>
inline typename DenseBase<Derived>::ConstSegmentReturnType
DenseBase<Derived>::head(Index size) const
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return VectorBlock<Derived>(derived(), 0, size);
return ConstSegmentReturnType(derived(), 0, size);
}
/** \returns a dynamic-size expression of the last coefficients of *this.
@@ -181,20 +184,20 @@ DenseBase<Derived>::head(Index size) const
* \sa class Block, block(Index,Index)
*/
template<typename Derived>
inline VectorBlock<Derived>
inline typename DenseBase<Derived>::SegmentReturnType
DenseBase<Derived>::tail(Index size)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return VectorBlock<Derived>(derived(), this->size() - size, size);
return SegmentReturnType(derived(), this->size() - size, size);
}
/** This is the const version of tail(Index).*/
template<typename Derived>
inline const VectorBlock<Derived>
inline typename DenseBase<Derived>::ConstSegmentReturnType
DenseBase<Derived>::tail(Index size) const
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return VectorBlock<Derived>(derived(), this->size() - size, size);
return ConstSegmentReturnType(derived(), this->size() - size, size);
}
/** \returns a fixed-size expression of a segment (i.e. a vector block) in \c *this
@@ -212,21 +215,21 @@ DenseBase<Derived>::tail(Index size) const
*/
template<typename Derived>
template<int Size>
inline VectorBlock<Derived,Size>
inline typename DenseBase<Derived>::template FixedSegmentReturnType<Size>::Type
DenseBase<Derived>::segment(Index start)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return VectorBlock<Derived,Size>(derived(), start);
return typename FixedSegmentReturnType<Size>::Type(derived(), start);
}
/** This is the const version of segment<int>(Index).*/
template<typename Derived>
template<int Size>
inline const VectorBlock<Derived,Size>
inline typename DenseBase<Derived>::template ConstFixedSegmentReturnType<Size>::Type
DenseBase<Derived>::segment(Index start) const
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return VectorBlock<Derived,Size>(derived(), start);
return typename ConstFixedSegmentReturnType<Size>::Type(derived(), start);
}
/** \returns a fixed-size expression of the first coefficients of *this.
@@ -242,21 +245,21 @@ DenseBase<Derived>::segment(Index start) const
*/
template<typename Derived>
template<int Size>
inline VectorBlock<Derived,Size>
inline typename DenseBase<Derived>::template FixedSegmentReturnType<Size>::Type
DenseBase<Derived>::head()
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return VectorBlock<Derived,Size>(derived(), 0);
return typename FixedSegmentReturnType<Size>::Type(derived(), 0);
}
/** This is the const version of head<int>().*/
template<typename Derived>
template<int Size>
inline const VectorBlock<Derived,Size>
inline typename DenseBase<Derived>::template ConstFixedSegmentReturnType<Size>::Type
DenseBase<Derived>::head() const
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return VectorBlock<Derived,Size>(derived(), 0);
return typename ConstFixedSegmentReturnType<Size>::Type(derived(), 0);
}
/** \returns a fixed-size expression of the last coefficients of *this.
@@ -272,21 +275,21 @@ DenseBase<Derived>::head() const
*/
template<typename Derived>
template<int Size>
inline VectorBlock<Derived,Size>
inline typename DenseBase<Derived>::template FixedSegmentReturnType<Size>::Type
DenseBase<Derived>::tail()
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return VectorBlock<Derived, Size>(derived(), size() - Size);
return typename FixedSegmentReturnType<Size>::Type(derived(), size() - Size);
}
/** This is the const version of tail<int>.*/
template<typename Derived>
template<int Size>
inline const VectorBlock<Derived,Size>
inline typename DenseBase<Derived>::template ConstFixedSegmentReturnType<Size>::Type
DenseBase<Derived>::tail() const
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return VectorBlock<Derived, Size>(derived(), size() - Size);
return typename ConstFixedSegmentReturnType<Size>::Type(derived(), size() - Size);
}

View File

@@ -31,9 +31,9 @@
*
* \brief Generic expression of a partially reduxed matrix
*
* \param MatrixType the type of the matrix we are applying the redux operation
* \param MemberOp type of the member functor
* \param Direction indicates the direction of the redux (Vertical or Horizontal)
* \tparam MatrixType the type of the matrix we are applying the redux operation
* \tparam MemberOp type of the member functor
* \tparam Direction indicates the direction of the redux (#Vertical or #Horizontal)
*
* This class represents an expression of a partial redux operator of a matrix.
* It is the return type of some VectorwiseOp functions,
@@ -45,16 +45,17 @@
template< typename MatrixType, typename MemberOp, int Direction>
class PartialReduxExpr;
namespace internal {
template<typename MatrixType, typename MemberOp, int Direction>
struct ei_traits<PartialReduxExpr<MatrixType, MemberOp, Direction> >
: ei_traits<MatrixType>
struct traits<PartialReduxExpr<MatrixType, MemberOp, Direction> >
: traits<MatrixType>
{
typedef typename MemberOp::result_type Scalar;
typedef typename ei_traits<MatrixType>::StorageKind StorageKind;
typedef typename ei_traits<MatrixType>::XprKind XprKind;
typedef typename traits<MatrixType>::StorageKind StorageKind;
typedef typename traits<MatrixType>::XprKind XprKind;
typedef typename MatrixType::Scalar InputScalar;
typedef typename ei_nested<MatrixType>::type MatrixTypeNested;
typedef typename ei_cleantype<MatrixTypeNested>::type _MatrixTypeNested;
typedef typename nested<MatrixType>::type MatrixTypeNested;
typedef typename remove_all<MatrixTypeNested>::type _MatrixTypeNested;
enum {
RowsAtCompileTime = Direction==Vertical ? 1 : MatrixType::RowsAtCompileTime,
ColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::ColsAtCompileTime,
@@ -70,20 +71,21 @@ struct ei_traits<PartialReduxExpr<MatrixType, MemberOp, Direction> >
typedef typename MemberOp::template Cost<InputScalar,TraversalSize> CostOpType;
#endif
enum {
CoeffReadCost = TraversalSize * ei_traits<_MatrixTypeNested>::CoeffReadCost + int(CostOpType::value)
CoeffReadCost = TraversalSize * traits<_MatrixTypeNested>::CoeffReadCost + int(CostOpType::value)
};
};
}
template< typename MatrixType, typename MemberOp, int Direction>
class PartialReduxExpr : ei_no_assignment_operator,
public ei_dense_xpr_base< PartialReduxExpr<MatrixType, MemberOp, Direction> >::type
class PartialReduxExpr : internal::no_assignment_operator,
public internal::dense_xpr_base< PartialReduxExpr<MatrixType, MemberOp, Direction> >::type
{
public:
typedef typename ei_dense_xpr_base<PartialReduxExpr>::type Base;
typedef typename internal::dense_xpr_base<PartialReduxExpr>::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(PartialReduxExpr)
typedef typename ei_traits<PartialReduxExpr>::MatrixTypeNested MatrixTypeNested;
typedef typename ei_traits<PartialReduxExpr>::_MatrixTypeNested _MatrixTypeNested;
typedef typename internal::traits<PartialReduxExpr>::MatrixTypeNested MatrixTypeNested;
typedef typename internal::traits<PartialReduxExpr>::_MatrixTypeNested _MatrixTypeNested;
PartialReduxExpr(const MatrixType& mat, const MemberOp& func = MemberOp())
: m_matrix(mat), m_functor(func) {}
@@ -114,8 +116,8 @@ class PartialReduxExpr : ei_no_assignment_operator,
#define EIGEN_MEMBER_FUNCTOR(MEMBER,COST) \
template <typename ResultType> \
struct ei_member_##MEMBER { \
EIGEN_EMPTY_STRUCT_CTOR(ei_member_##MEMBER) \
struct member_##MEMBER { \
EIGEN_EMPTY_STRUCT_CTOR(member_##MEMBER) \
typedef ResultType result_type; \
template<typename Scalar, int Size> struct Cost \
{ enum { value = COST }; }; \
@@ -124,11 +126,13 @@ class PartialReduxExpr : ei_no_assignment_operator,
{ return mat.MEMBER(); } \
}
namespace internal {
EIGEN_MEMBER_FUNCTOR(squaredNorm, Size * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost);
EIGEN_MEMBER_FUNCTOR(norm, (Size+5) * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost);
EIGEN_MEMBER_FUNCTOR(stableNorm, (Size+5) * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost);
EIGEN_MEMBER_FUNCTOR(blueNorm, (Size+5) * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost);
EIGEN_MEMBER_FUNCTOR(hypotNorm, (Size-1) * ei_functor_traits<ei_scalar_hypot_op<Scalar> >::Cost );
EIGEN_MEMBER_FUNCTOR(hypotNorm, (Size-1) * functor_traits<scalar_hypot_op<Scalar> >::Cost );
EIGEN_MEMBER_FUNCTOR(sum, (Size-1)*NumTraits<Scalar>::AddCost);
EIGEN_MEMBER_FUNCTOR(mean, (Size-1)*NumTraits<Scalar>::AddCost + NumTraits<Scalar>::MulCost);
EIGEN_MEMBER_FUNCTOR(minCoeff, (Size-1)*NumTraits<Scalar>::AddCost);
@@ -139,20 +143,20 @@ EIGEN_MEMBER_FUNCTOR(count, (Size-1)*NumTraits<Scalar>::AddCost);
EIGEN_MEMBER_FUNCTOR(prod, (Size-1)*NumTraits<Scalar>::MulCost);
/** \internal */
template <typename BinaryOp, typename Scalar>
struct ei_member_redux {
typedef typename ei_result_of<
struct member_redux {
typedef typename result_of<
BinaryOp(Scalar)
>::type result_type;
template<typename _Scalar, int Size> struct Cost
{ enum { value = (Size-1) * ei_functor_traits<BinaryOp>::Cost }; };
ei_member_redux(const BinaryOp func) : m_functor(func) {}
{ enum { value = (Size-1) * functor_traits<BinaryOp>::Cost }; };
member_redux(const BinaryOp func) : m_functor(func) {}
template<typename Derived>
inline result_type operator()(const DenseBase<Derived>& mat) const
{ return mat.redux(m_functor); }
const BinaryOp m_functor;
};
}
/** \class VectorwiseOp
* \ingroup Core_Module
@@ -160,7 +164,7 @@ struct ei_member_redux {
* \brief Pseudo expression providing partial reduction operations
*
* \param ExpressionType the type of the object on which to do partial reductions
* \param Direction indicates the direction of the redux (Vertical or Horizontal)
* \param Direction indicates the direction of the redux (#Vertical or #Horizontal)
*
* This class represents a pseudo expression with partial reduction features.
* It is the return type of DenseBase::colwise() and DenseBase::rowwise()
@@ -178,11 +182,12 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
typedef typename ExpressionType::Scalar Scalar;
typedef typename ExpressionType::RealScalar RealScalar;
typedef typename ExpressionType::Index Index;
typedef typename ei_meta_if<ei_must_nest_by_value<ExpressionType>::ret,
ExpressionType, const ExpressionType&>::ret ExpressionTypeNested;
typedef typename internal::conditional<internal::must_nest_by_value<ExpressionType>::ret,
ExpressionType, ExpressionType&>::type ExpressionTypeNested;
typedef typename internal::remove_all<ExpressionTypeNested>::type ExpressionTypeNestedCleaned;
template<template<typename _Scalar> class Functor,
typename Scalar=typename ei_traits<ExpressionType>::Scalar> struct ReturnType
typename Scalar=typename internal::traits<ExpressionType>::Scalar> struct ReturnType
{
typedef PartialReduxExpr<ExpressionType,
Functor<Scalar>,
@@ -193,7 +198,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
template<typename BinaryOp> struct ReduxReturnType
{
typedef PartialReduxExpr<ExpressionType,
ei_member_redux<BinaryOp,typename ei_traits<ExpressionType>::Scalar>,
internal::member_redux<BinaryOp,typename internal::traits<ExpressionType>::Scalar>,
Direction
> Type;
};
@@ -207,9 +212,9 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
/** \internal
* \returns the i-th subvector according to the \c Direction */
typedef typename ei_meta_if<Direction==Vertical,
typedef typename internal::conditional<Direction==Vertical,
typename ExpressionType::ColXpr,
typename ExpressionType::RowXpr>::ret SubVector;
typename ExpressionType::RowXpr>::type SubVector;
SubVector subVector(Index i)
{
return SubVector(m_matrix.derived(),i);
@@ -241,7 +246,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
public:
inline VectorwiseOp(const ExpressionType& matrix) : m_matrix(matrix) {}
inline VectorwiseOp(ExpressionType& matrix) : m_matrix(matrix) {}
/** \internal */
inline const ExpressionType& _expression() const { return m_matrix; }
@@ -265,7 +270,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* Output: \verbinclude PartialRedux_minCoeff.out
*
* \sa DenseBase::minCoeff() */
const typename ReturnType<ei_member_minCoeff>::Type minCoeff() const
const typename ReturnType<internal::member_minCoeff>::Type minCoeff() const
{ return _expression(); }
/** \returns a row (or column) vector expression of the largest coefficient
@@ -275,7 +280,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* Output: \verbinclude PartialRedux_maxCoeff.out
*
* \sa DenseBase::maxCoeff() */
const typename ReturnType<ei_member_maxCoeff>::Type maxCoeff() const
const typename ReturnType<internal::member_maxCoeff>::Type maxCoeff() const
{ return _expression(); }
/** \returns a row (or column) vector expression of the squared norm
@@ -285,7 +290,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* Output: \verbinclude PartialRedux_squaredNorm.out
*
* \sa DenseBase::squaredNorm() */
const typename ReturnType<ei_member_squaredNorm,RealScalar>::Type squaredNorm() const
const typename ReturnType<internal::member_squaredNorm,RealScalar>::Type squaredNorm() const
{ return _expression(); }
/** \returns a row (or column) vector expression of the norm
@@ -295,7 +300,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* Output: \verbinclude PartialRedux_norm.out
*
* \sa DenseBase::norm() */
const typename ReturnType<ei_member_norm,RealScalar>::Type norm() const
const typename ReturnType<internal::member_norm,RealScalar>::Type norm() const
{ return _expression(); }
@@ -304,7 +309,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* blue's algorithm.
*
* \sa DenseBase::blueNorm() */
const typename ReturnType<ei_member_blueNorm,RealScalar>::Type blueNorm() const
const typename ReturnType<internal::member_blueNorm,RealScalar>::Type blueNorm() const
{ return _expression(); }
@@ -313,7 +318,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* underflow and overflow.
*
* \sa DenseBase::stableNorm() */
const typename ReturnType<ei_member_stableNorm,RealScalar>::Type stableNorm() const
const typename ReturnType<internal::member_stableNorm,RealScalar>::Type stableNorm() const
{ return _expression(); }
@@ -322,7 +327,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* underflow and overflow using a concatenation of hypot() calls.
*
* \sa DenseBase::hypotNorm() */
const typename ReturnType<ei_member_hypotNorm,RealScalar>::Type hypotNorm() const
const typename ReturnType<internal::member_hypotNorm,RealScalar>::Type hypotNorm() const
{ return _expression(); }
/** \returns a row (or column) vector expression of the sum
@@ -332,28 +337,28 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* Output: \verbinclude PartialRedux_sum.out
*
* \sa DenseBase::sum() */
const typename ReturnType<ei_member_sum>::Type sum() const
const typename ReturnType<internal::member_sum>::Type sum() const
{ return _expression(); }
/** \returns a row (or column) vector expression of the mean
* of each column (or row) of the referenced expression.
*
* \sa DenseBase::mean() */
const typename ReturnType<ei_member_mean>::Type mean() const
const typename ReturnType<internal::member_mean>::Type mean() const
{ return _expression(); }
/** \returns a row (or column) vector expression representing
* whether \b all coefficients of each respective column (or row) are \c true.
*
* \sa DenseBase::all() */
const typename ReturnType<ei_member_all>::Type all() const
const typename ReturnType<internal::member_all>::Type all() const
{ return _expression(); }
/** \returns a row (or column) vector expression representing
* whether \b at \b least one coefficient of each respective column (or row) is \c true.
*
* \sa DenseBase::any() */
const typename ReturnType<ei_member_any>::Type any() const
const typename ReturnType<internal::member_any>::Type any() const
{ return _expression(); }
/** \returns a row (or column) vector expression representing
@@ -363,7 +368,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* Output: \verbinclude PartialRedux_count.out
*
* \sa DenseBase::count() */
const PartialReduxExpr<ExpressionType, ei_member_count<Index>, Direction> count() const
const PartialReduxExpr<ExpressionType, internal::member_count<Index>, Direction> count() const
{ return _expression(); }
/** \returns a row (or column) vector expression of the product
@@ -373,7 +378,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* Output: \verbinclude PartialRedux_prod.out
*
* \sa DenseBase::prod() */
const typename ReturnType<ei_member_prod>::Type prod() const
const typename ReturnType<internal::member_prod>::Type prod() const
{ return _expression(); }
@@ -413,7 +418,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
ExpressionType& operator=(const DenseBase<OtherDerived>& other)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
//ei_assert((m_matrix.isNull()) == (other.isNull())); FIXME
//eigen_assert((m_matrix.isNull()) == (other.isNull())); FIXME
for(Index j=0; j<subVectors(); ++j)
subVector(j) = other;
return const_cast<ExpressionType&>(m_matrix);
@@ -440,10 +445,10 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
}
/** Returns the expression of the sum of the vector \a other to each subvector of \c *this */
template<typename OtherDerived> EIGEN_STRONG_INLINE
CwiseBinaryOp<ei_scalar_sum_op<Scalar>,
ExpressionType,
typename ExtendedType<OtherDerived>::Type>
template<typename OtherDerived> EIGEN_STRONG_INLINE
CwiseBinaryOp<internal::scalar_sum_op<Scalar>,
const ExpressionTypeNestedCleaned,
const typename ExtendedType<OtherDerived>::Type>
operator+(const DenseBase<OtherDerived>& other) const
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived);
@@ -452,9 +457,9 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
/** Returns the expression of the difference between each subvector of \c *this and the vector \a other */
template<typename OtherDerived>
CwiseBinaryOp<ei_scalar_difference_op<Scalar>,
ExpressionType,
typename ExtendedType<OtherDerived>::Type>
CwiseBinaryOp<internal::scalar_difference_op<Scalar>,
const ExpressionTypeNestedCleaned,
const typename ExtendedType<OtherDerived>::Type>
operator-(const DenseBase<OtherDerived>& other) const
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived);
@@ -463,35 +468,37 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
/////////// Geometry module ///////////
#if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS
Homogeneous<ExpressionType,Direction> homogeneous() const;
#endif
typedef typename ExpressionType::PlainObject CrossReturnType;
template<typename OtherDerived>
const CrossReturnType cross(const MatrixBase<OtherDerived>& other) const;
enum {
HNormalized_Size = Direction==Vertical ? ei_traits<ExpressionType>::RowsAtCompileTime
: ei_traits<ExpressionType>::ColsAtCompileTime,
HNormalized_Size = Direction==Vertical ? internal::traits<ExpressionType>::RowsAtCompileTime
: internal::traits<ExpressionType>::ColsAtCompileTime,
HNormalized_SizeMinusOne = HNormalized_Size==Dynamic ? Dynamic : HNormalized_Size-1
};
typedef Block<ExpressionType,
typedef Block<const ExpressionType,
Direction==Vertical ? int(HNormalized_SizeMinusOne)
: int(ei_traits<ExpressionType>::RowsAtCompileTime),
: int(internal::traits<ExpressionType>::RowsAtCompileTime),
Direction==Horizontal ? int(HNormalized_SizeMinusOne)
: int(ei_traits<ExpressionType>::ColsAtCompileTime)>
: int(internal::traits<ExpressionType>::ColsAtCompileTime)>
HNormalized_Block;
typedef Block<ExpressionType,
Direction==Vertical ? 1 : int(ei_traits<ExpressionType>::RowsAtCompileTime),
Direction==Horizontal ? 1 : int(ei_traits<ExpressionType>::ColsAtCompileTime)>
typedef Block<const ExpressionType,
Direction==Vertical ? 1 : int(internal::traits<ExpressionType>::RowsAtCompileTime),
Direction==Horizontal ? 1 : int(internal::traits<ExpressionType>::ColsAtCompileTime)>
HNormalized_Factors;
typedef CwiseBinaryOp<ei_scalar_quotient_op<typename ei_traits<ExpressionType>::Scalar>,
HNormalized_Block,
Replicate<HNormalized_Factors,
typedef CwiseBinaryOp<internal::scalar_quotient_op<typename internal::traits<ExpressionType>::Scalar>,
const HNormalized_Block,
const Replicate<HNormalized_Factors,
Direction==Vertical ? HNormalized_SizeMinusOne : 1,
Direction==Horizontal ? HNormalized_SizeMinusOne : 1> >
HNormalizedReturnType;
HNormalizedReturnType hnormalized() const;
const HNormalizedReturnType hnormalized() const;
protected:
ExpressionTypeNested m_matrix;
@@ -505,7 +512,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* \sa rowwise(), class VectorwiseOp
*/
template<typename Derived>
inline const VectorwiseOp<Derived,Vertical>
inline const typename DenseBase<Derived>::ConstColwiseReturnType
DenseBase<Derived>::colwise() const
{
return derived();
@@ -516,7 +523,7 @@ DenseBase<Derived>::colwise() const
* \sa rowwise(), class VectorwiseOp
*/
template<typename Derived>
inline VectorwiseOp<Derived,Vertical>
inline typename DenseBase<Derived>::ColwiseReturnType
DenseBase<Derived>::colwise()
{
return derived();
@@ -530,7 +537,7 @@ DenseBase<Derived>::colwise()
* \sa colwise(), class VectorwiseOp
*/
template<typename Derived>
inline const VectorwiseOp<Derived,Horizontal>
inline const typename DenseBase<Derived>::ConstRowwiseReturnType
DenseBase<Derived>::rowwise() const
{
return derived();
@@ -541,7 +548,7 @@ DenseBase<Derived>::rowwise() const
* \sa colwise(), class VectorwiseOp
*/
template<typename Derived>
inline VectorwiseOp<Derived,Horizontal>
inline typename DenseBase<Derived>::RowwiseReturnType
DenseBase<Derived>::rowwise()
{
return derived();

View File

@@ -25,8 +25,10 @@
#ifndef EIGEN_VISITOR_H
#define EIGEN_VISITOR_H
namespace internal {
template<typename Visitor, typename Derived, int UnrollCount>
struct ei_visitor_impl
struct visitor_impl
{
enum {
col = (UnrollCount-1) / Derived::RowsAtCompileTime,
@@ -35,13 +37,13 @@ struct ei_visitor_impl
inline static void run(const Derived &mat, Visitor& visitor)
{
ei_visitor_impl<Visitor, Derived, UnrollCount-1>::run(mat, visitor);
visitor_impl<Visitor, Derived, UnrollCount-1>::run(mat, visitor);
visitor(mat.coeff(row, col), row, col);
}
};
template<typename Visitor, typename Derived>
struct ei_visitor_impl<Visitor, Derived, 1>
struct visitor_impl<Visitor, Derived, 1>
{
inline static void run(const Derived &mat, Visitor& visitor)
{
@@ -50,7 +52,7 @@ struct ei_visitor_impl<Visitor, Derived, 1>
};
template<typename Visitor, typename Derived>
struct ei_visitor_impl<Visitor, Derived, Dynamic>
struct visitor_impl<Visitor, Derived, Dynamic>
{
typedef typename Derived::Index Index;
inline static void run(const Derived& mat, Visitor& visitor)
@@ -64,6 +66,7 @@ struct ei_visitor_impl<Visitor, Derived, Dynamic>
}
};
} // end namespace internal
/** Applies the visitor \a visitor to the whole coefficients of the matrix or vector.
*
@@ -88,19 +91,21 @@ void DenseBase<Derived>::visit(Visitor& visitor) const
{
enum { unroll = SizeAtCompileTime != Dynamic
&& CoeffReadCost != Dynamic
&& (SizeAtCompileTime == 1 || ei_functor_traits<Visitor>::Cost != Dynamic)
&& SizeAtCompileTime * CoeffReadCost + (SizeAtCompileTime-1) * ei_functor_traits<Visitor>::Cost
&& (SizeAtCompileTime == 1 || internal::functor_traits<Visitor>::Cost != Dynamic)
&& SizeAtCompileTime * CoeffReadCost + (SizeAtCompileTime-1) * internal::functor_traits<Visitor>::Cost
<= EIGEN_UNROLLING_LIMIT };
return ei_visitor_impl<Visitor, Derived,
return internal::visitor_impl<Visitor, Derived,
unroll ? int(SizeAtCompileTime) : Dynamic
>::run(derived(), visitor);
}
namespace internal {
/** \internal
* \brief Base class to implement min and max visitors
*/
template <typename Derived>
struct ei_coeff_visitor
struct coeff_visitor
{
typedef typename Derived::Index Index;
typedef typename Derived::Scalar Scalar;
@@ -120,7 +125,7 @@ struct ei_coeff_visitor
* \sa DenseBase::minCoeff(Index*, Index*)
*/
template <typename Derived>
struct ei_min_coeff_visitor : ei_coeff_visitor<Derived>
struct min_coeff_visitor : coeff_visitor<Derived>
{
typedef typename Derived::Index Index;
typedef typename Derived::Scalar Scalar;
@@ -136,7 +141,7 @@ struct ei_min_coeff_visitor : ei_coeff_visitor<Derived>
};
template<typename Scalar>
struct ei_functor_traits<ei_min_coeff_visitor<Scalar> > {
struct functor_traits<min_coeff_visitor<Scalar> > {
enum {
Cost = NumTraits<Scalar>::AddCost
};
@@ -148,7 +153,7 @@ struct ei_functor_traits<ei_min_coeff_visitor<Scalar> > {
* \sa DenseBase::maxCoeff(Index*, Index*)
*/
template <typename Derived>
struct ei_max_coeff_visitor : ei_coeff_visitor<Derived>
struct max_coeff_visitor : coeff_visitor<Derived>
{
typedef typename Derived::Index Index;
typedef typename Derived::Scalar Scalar;
@@ -164,22 +169,25 @@ struct ei_max_coeff_visitor : ei_coeff_visitor<Derived>
};
template<typename Scalar>
struct ei_functor_traits<ei_max_coeff_visitor<Scalar> > {
struct functor_traits<max_coeff_visitor<Scalar> > {
enum {
Cost = NumTraits<Scalar>::AddCost
};
};
} // end namespace internal
/** \returns the minimum of all coefficients of *this
* and puts in *row and *col its location.
*
* \sa DenseBase::minCoeff(Index*), DenseBase::maxCoeff(Index*,Index*), DenseBase::visitor(), DenseBase::minCoeff()
*/
template<typename Derived>
typename ei_traits<Derived>::Scalar
DenseBase<Derived>::minCoeff(Index* row, Index* col) const
template<typename IndexType>
typename internal::traits<Derived>::Scalar
DenseBase<Derived>::minCoeff(IndexType* row, IndexType* col) const
{
ei_min_coeff_visitor<Derived> minVisitor;
internal::min_coeff_visitor<Derived> minVisitor;
this->visit(minVisitor);
*row = minVisitor.row;
if (col) *col = minVisitor.col;
@@ -189,14 +197,15 @@ DenseBase<Derived>::minCoeff(Index* row, Index* col) const
/** \returns the minimum of all coefficients of *this
* and puts in *index its location.
*
* \sa DenseBase::minCoeff(Index*,Index*), DenseBase::maxCoeff(Index*,Index*), DenseBase::visitor(), DenseBase::minCoeff()
* \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::visitor(), DenseBase::minCoeff()
*/
template<typename Derived>
typename ei_traits<Derived>::Scalar
DenseBase<Derived>::minCoeff(Index* index) const
template<typename IndexType>
typename internal::traits<Derived>::Scalar
DenseBase<Derived>::minCoeff(IndexType* index) const
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
ei_min_coeff_visitor<Derived> minVisitor;
internal::min_coeff_visitor<Derived> minVisitor;
this->visit(minVisitor);
*index = (RowsAtCompileTime==1) ? minVisitor.col : minVisitor.row;
return minVisitor.res;
@@ -205,13 +214,14 @@ DenseBase<Derived>::minCoeff(Index* index) const
/** \returns the maximum of all coefficients of *this
* and puts in *row and *col its location.
*
* \sa DenseBase::minCoeff(Index*,Index*), DenseBase::visitor(), DenseBase::maxCoeff()
* \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visitor(), DenseBase::maxCoeff()
*/
template<typename Derived>
typename ei_traits<Derived>::Scalar
DenseBase<Derived>::maxCoeff(Index* row, Index* col) const
template<typename IndexType>
typename internal::traits<Derived>::Scalar
DenseBase<Derived>::maxCoeff(IndexType* row, IndexType* col) const
{
ei_max_coeff_visitor<Derived> maxVisitor;
internal::max_coeff_visitor<Derived> maxVisitor;
this->visit(maxVisitor);
*row = maxVisitor.row;
if (col) *col = maxVisitor.col;
@@ -221,14 +231,15 @@ DenseBase<Derived>::maxCoeff(Index* row, Index* col) const
/** \returns the maximum of all coefficients of *this
* and puts in *index its location.
*
* \sa DenseBase::maxCoeff(Index*,Index*), DenseBase::minCoeff(Index*,Index*), DenseBase::visitor(), DenseBase::maxCoeff()
* \sa DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visitor(), DenseBase::maxCoeff()
*/
template<typename Derived>
typename ei_traits<Derived>::Scalar
DenseBase<Derived>::maxCoeff(Index* index) const
template<typename IndexType>
typename internal::traits<Derived>::Scalar
DenseBase<Derived>::maxCoeff(IndexType* index) const
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
ei_max_coeff_visitor<Derived> maxVisitor;
internal::max_coeff_visitor<Derived> maxVisitor;
this->visit(maxVisitor);
*index = (RowsAtCompileTime==1) ? maxVisitor.col : maxVisitor.row;
return maxVisitor.res;

View File

@@ -25,13 +25,15 @@
#ifndef EIGEN_COMPLEX_ALTIVEC_H
#define EIGEN_COMPLEX_ALTIVEC_H
static Packet4ui ei_p4ui_CONJ_XOR = vec_mergeh((Packet4ui)ei_p4i_ZERO, (Packet4ui)ei_p4f_ZERO_);//{ 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
static Packet16uc ei_p16uc_COMPLEX_RE = vec_sld((Packet16uc) vec_splat((Packet4ui)ei_p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)ei_p16uc_FORWARD, 2), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };
static Packet16uc ei_p16uc_COMPLEX_IM = vec_sld((Packet16uc) vec_splat((Packet4ui)ei_p16uc_FORWARD, 1), (Packet16uc) vec_splat((Packet4ui)ei_p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 };
static Packet16uc ei_p16uc_COMPLEX_REV = vec_sld(ei_p16uc_REVERSE, ei_p16uc_REVERSE, 8);//{ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11 };
static Packet16uc ei_p16uc_COMPLEX_REV2 = vec_sld(ei_p16uc_FORWARD, ei_p16uc_FORWARD, 8);//{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
static Packet16uc ei_p16uc_PSET_HI = (Packet16uc) vec_mergeh((Packet4ui) vec_splat((Packet4ui)ei_p16uc_FORWARD, 0), (Packet4ui) vec_splat((Packet4ui)ei_p16uc_FORWARD, 1));//{ 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 };
static Packet16uc ei_p16uc_PSET_LO = (Packet16uc) vec_mergeh((Packet4ui) vec_splat((Packet4ui)ei_p16uc_FORWARD, 2), (Packet4ui) vec_splat((Packet4ui)ei_p16uc_FORWARD, 3));//{ 8,9,10,11, 12,13,14,15, 8,9,10,11, 12,13,14,15 };
namespace internal {
static Packet4ui p4ui_CONJ_XOR = vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_ZERO_);//{ 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
static Packet16uc p16uc_COMPLEX_RE = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };
static Packet16uc p16uc_COMPLEX_IM = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 1), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 };
static Packet16uc p16uc_COMPLEX_REV = vec_sld(p16uc_REVERSE, p16uc_REVERSE, 8);//{ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11 };
static Packet16uc p16uc_COMPLEX_REV2 = vec_sld(p16uc_FORWARD, p16uc_FORWARD, 8);//{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
static Packet16uc p16uc_PSET_HI = (Packet16uc) vec_mergeh((Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 1));//{ 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 };
static Packet16uc p16uc_PSET_LO = (Packet16uc) vec_mergeh((Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 2), (Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 3));//{ 8,9,10,11, 12,13,14,15, 8,9,10,11, 12,13,14,15 };
//---------- float ----------
struct Packet2cf
@@ -41,11 +43,12 @@ struct Packet2cf
Packet4f v;
};
template<> struct ei_packet_traits<std::complex<float> > : ei_default_packet_traits
template<> struct packet_traits<std::complex<float> > : default_packet_traits
{
typedef Packet2cf type;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size = 2,
HasAdd = 1,
@@ -61,106 +64,109 @@ template<> struct ei_packet_traits<std::complex<float> > : ei_default_packet_tr
};
};
template<> struct ei_unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; };
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; };
template<> EIGEN_STRONG_INLINE Packet2cf ei_pset1<Packet2cf>(const std::complex<float>& from)
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
{
Packet2cf res;
/* On AltiVec we cannot load 64-bit registers, so wa have to take care of alignment */
if ((ptrdiff_t)&from % 16 == 0) {
res.v = ei_pload((const float *)&from);
res.v = vec_perm(res.v, res.v, ei_p16uc_PSET_HI);
} else {
res.v = ei_ploadu((const float *)&from);
res.v = vec_perm(res.v, res.v, ei_p16uc_PSET_LO);
}
if((ptrdiff_t(&from) % 16) == 0)
res.v = pload<Packet4f>((const float *)&from);
else
res.v = ploadu<Packet4f>((const float *)&from);
res.v = vec_perm(res.v, res.v, p16uc_PSET_HI);
return res;
}
template<> EIGEN_STRONG_INLINE Packet2cf ei_padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_add(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf ei_psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_sub(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf ei_pnegate(const Packet2cf& a) { return Packet2cf(ei_psub<Packet4f>(ei_p4f_ZERO, a.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf ei_pconj(const Packet2cf& a) { return Packet2cf((Packet4f)vec_xor((Packet4ui)a.v, ei_p4ui_CONJ_XOR)); }
template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_add(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_sub(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(a.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) { return Packet2cf((Packet4f)vec_xor((Packet4ui)a.v, p4ui_CONJ_XOR)); }
template<> EIGEN_STRONG_INLINE Packet2cf ei_pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
Packet4f v1, v2;
// Permute and multiply the real parts of a and b
v1 = vec_perm(a.v, a.v, ei_p16uc_COMPLEX_RE);
v1 = vec_perm(a.v, a.v, p16uc_COMPLEX_RE);
// Get the imaginary parts of a
v2 = vec_perm(a.v, a.v, ei_p16uc_COMPLEX_IM);
v2 = vec_perm(a.v, a.v, p16uc_COMPLEX_IM);
// multiply a_re * b
v1 = vec_madd(v1, b.v, ei_p4f_ZERO);
v1 = vec_madd(v1, b.v, p4f_ZERO);
// multiply a_im * b and get the conjugate result
v2 = vec_madd(v2, b.v, ei_p4f_ZERO);
v2 = (Packet4f) vec_xor((Packet4ui)v2, ei_p4ui_CONJ_XOR);
v2 = vec_madd(v2, b.v, p4f_ZERO);
v2 = (Packet4f) vec_xor((Packet4ui)v2, p4ui_CONJ_XOR);
// permute back to a proper order
v2 = vec_perm(v2, v2, ei_p16uc_COMPLEX_REV);
v2 = vec_perm(v2, v2, p16uc_COMPLEX_REV);
return Packet2cf(vec_add(v1, v2));
}
template<> EIGEN_STRONG_INLINE Packet2cf ei_pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_and(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf ei_por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_or(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf ei_pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_xor(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf ei_pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_and(a.v, vec_nor(b.v,b.v))); }
template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_and(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_or(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_xor(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_and(a.v, vec_nor(b.v,b.v))); }
template<> EIGEN_STRONG_INLINE Packet2cf ei_pload <std::complex<float> >(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(ei_pload((const float*)from)); }
template<> EIGEN_STRONG_INLINE Packet2cf ei_ploadu<std::complex<float> >(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ei_ploadu((const float*)from)); }
template<> EIGEN_STRONG_INLINE Packet2cf pload <Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>((const float*)from)); }
template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>((const float*)from)); }
template<> EIGEN_STRONG_INLINE void ei_pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE ei_pstore((float*)to, from.v); }
template<> EIGEN_STRONG_INLINE void ei_pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE ei_pstoreu((float*)to, from.v); }
template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from)
{
return pset1<Packet2cf>(*from);
}
template<> EIGEN_STRONG_INLINE void ei_prefetch<std::complex<float> >(const std::complex<float> * addr) { vec_dstt((float *)addr, DST_CTRL(2,2,32), DST_CHAN); }
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
template<> EIGEN_STRONG_INLINE std::complex<float> ei_pfirst<Packet2cf>(const Packet2cf& a)
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { vec_dstt((float *)addr, DST_CTRL(2,2,32), DST_CHAN); }
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
{
std::complex<float> EIGEN_ALIGN16 res[2];
ei_pstore((float *)&res, a.v);
pstore((float *)&res, a.v);
return res[0];
}
template<> EIGEN_STRONG_INLINE Packet2cf ei_preverse(const Packet2cf& a)
template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
{
Packet4f rev_a;
rev_a = vec_perm(a.v, a.v, ei_p16uc_COMPLEX_REV2);
rev_a = vec_perm(a.v, a.v, p16uc_COMPLEX_REV2);
return Packet2cf(rev_a);
}
template<> EIGEN_STRONG_INLINE std::complex<float> ei_predux<Packet2cf>(const Packet2cf& a)
template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
{
Packet4f b;
b = (Packet4f) vec_sld(a.v, a.v, 8);
b = ei_padd(a.v, b);
return ei_pfirst(Packet2cf(sum));
b = padd(a.v, b);
return pfirst(Packet2cf(b));
}
template<> EIGEN_STRONG_INLINE Packet2cf ei_preduxp<Packet2cf>(const Packet2cf* vecs)
template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
{
Packet4f b1, b2;
b1 = (Packet4f) vec_sld(vecs[0].v, vecs[1].v, 8);
b2 = (Packet4f) vec_sld(vecs[1].v, vecs[0].v, 8);
b2 = (Packet4f) vec_sld(b2, b2, 8);
b2 = ei_padd(b1, b2);
b2 = padd(b1, b2);
return Packet2cf(b2);
}
template<> EIGEN_STRONG_INLINE std::complex<float> ei_predux_mul<Packet2cf>(const Packet2cf& a)
template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
{
Packet4f b;
Packet2cf prod;
b = (Packet4f) vec_sld(a.v, a.v, 8);
prod = ei_pmul(a, Packet2cf(b));
prod = pmul(a, Packet2cf(b));
return ei_pfirst(prod);
return pfirst(prod);
}
template<int Offset>
struct ei_palign_impl<Offset,Packet2cf>
struct palign_impl<Offset,Packet2cf>
{
EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second)
{
@@ -171,45 +177,52 @@ struct ei_palign_impl<Offset,Packet2cf>
}
};
template<> struct ei_conj_helper<Packet2cf, Packet2cf, false,true>
template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
{ return ei_padd(pmul(x,y),c); }
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
return ei_pmul(a, ei_pconj(b));
return internal::pmul(a, pconj(b));
}
};
template<> struct ei_conj_helper<Packet2cf, Packet2cf, true,false>
template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
{ return ei_padd(pmul(x,y),c); }
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
return ei_pmul(ei_pconj(a), b);
return internal::pmul(pconj(a), b);
}
};
template<> struct ei_conj_helper<Packet2cf, Packet2cf, true,true>
template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
{ return ei_padd(pmul(x,y),c); }
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
return ei_pconj(ei_pmul(a, b));
return pconj(internal::pmul(a, b));
}
};
template<> EIGEN_STRONG_INLINE Packet2cf ei_pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
// TODO optimize it for AltiVec
Packet2cf res = ei_conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
Packet4f s = vec_madd(b.v, b.v, ei_p4f_ZERO);
return Packet2cf(ei_pdiv(res.v, vec_add(s,vec_perm(s, s, ei_p16uc_COMPLEX_REV))));
Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
Packet4f s = vec_madd(b.v, b.v, p4f_ZERO);
return Packet2cf(pdiv(res.v, vec_add(s,vec_perm(s, s, p16uc_COMPLEX_REV))));
}
template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& x)
{
return Packet2cf(vec_perm(x.v, x.v, p16uc_COMPLEX_REV));
}
} // end namespace internal
#endif // EIGEN_COMPLEX_ALTIVEC_H

View File

@@ -25,6 +25,8 @@
#ifndef EIGEN_PACKET_MATH_ALTIVEC_H
#define EIGEN_PACKET_MATH_ALTIVEC_H
namespace internal {
#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 4
#endif
@@ -33,10 +35,6 @@
#define EIGEN_HAS_FUSE_CJMADD 1
#endif
#ifndef EIGEN_TUNE_FOR_CPU_CACHE_SIZE
#define EIGEN_TUNE_FOR_CPU_CACHE_SIZE 8*256*256
#endif
// NOTE Altivec has 32 registers, but Eigen only accepts a value of 8 or 16
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 16
@@ -53,38 +51,39 @@ typedef __vector unsigned char Packet16uc;
// and it doesn't really work to declare them global, so we define macros instead
#define _EIGEN_DECLARE_CONST_FAST_Packet4f(NAME,X) \
Packet4f ei_p4f_##NAME = (Packet4f) vec_splat_s32(X)
Packet4f p4f_##NAME = (Packet4f) vec_splat_s32(X)
#define _EIGEN_DECLARE_CONST_FAST_Packet4i(NAME,X) \
Packet4i ei_p4i_##NAME = vec_splat_s32(X)
Packet4i p4i_##NAME = vec_splat_s32(X)
#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
Packet4f ei_p4f_##NAME = ei_pset1<Packet4f>(X)
Packet4f p4f_##NAME = pset1<Packet4f>(X)
#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
Packet4f ei_p4f_##NAME = vreinterpretq_f32_u32(ei_pset1<int>(X))
Packet4f p4f_##NAME = vreinterpretq_f32_u32(pset1<int>(X))
#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
Packet4i ei_p4i_##NAME = ei_pset1<Packet4i>(X)
Packet4i p4i_##NAME = pset1<Packet4i>(X)
#define DST_CHAN 1
#define DST_CTRL(size, count, stride) (((size) << 24) | ((count) << 16) | (stride))
// Define global static constants:
static Packet4f ei_p4f_COUNTDOWN = { 3.0, 2.0, 1.0, 0.0 };
static Packet4i ei_p4i_COUNTDOWN = { 3, 2, 1, 0 };
static Packet16uc ei_p16uc_REVERSE = {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3};
static Packet16uc ei_p16uc_FORWARD = vec_lvsl(0, (float*)0);
static Packet4f p4f_COUNTDOWN = { 3.0, 2.0, 1.0, 0.0 };
static Packet4i p4i_COUNTDOWN = { 3, 2, 1, 0 };
static Packet16uc p16uc_REVERSE = {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3};
static Packet16uc p16uc_FORWARD = vec_lvsl(0, (float*)0);
static Packet16uc p16uc_DUPLICATE = {0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7};
static _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0);
static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0);
static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE,1);
static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS16,-16);
static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS1,-1);
static Packet4f ei_p4f_ONE = vec_ctf(ei_p4i_ONE, 0);
static Packet4f ei_p4f_ZERO_ = (Packet4f) vec_sl((Packet4ui)ei_p4i_MINUS1, (Packet4ui)ei_p4i_MINUS1);
static Packet4f p4f_ONE = vec_ctf(p4i_ONE, 0);
static Packet4f p4f_ZERO_ = (Packet4f) vec_sl((Packet4ui)p4i_MINUS1, (Packet4ui)p4i_MINUS1);
template<> struct ei_packet_traits<float> : ei_default_packet_traits
template<> struct packet_traits<float> : default_packet_traits
{
typedef Packet4f type;
enum {
@@ -100,7 +99,7 @@ template<> struct ei_packet_traits<float> : ei_default_packet_traits
HasSqrt = 0
};
};
template<> struct ei_packet_traits<int> : ei_default_packet_traits
template<> struct packet_traits<int> : default_packet_traits
{
typedef Packet4i type;
enum {
@@ -111,8 +110,8 @@ template<> struct ei_packet_traits<int> : ei_default_packet_traits
};
};
template<> struct ei_unpacket_traits<Packet4f> { typedef float type; enum {size=4}; };
template<> struct ei_unpacket_traits<Packet4i> { typedef int type; enum {size=4}; };
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}; };
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; };
/*
inline std::ostream & operator <<(std::ostream & s, const Packet4f & v)
{
@@ -158,7 +157,7 @@ inline std::ostream & operator <<(std::ostream & s, const Packetbi & v)
return s;
}
*/
template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<Packet4f>(const float& from) {
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) {
// Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
float EIGEN_ALIGN16 af[4];
af[0] = from;
@@ -167,7 +166,7 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<Packet4f>(const float& from) {
return vc;
}
template<> EIGEN_STRONG_INLINE Packet4i ei_pset1<Packet4i>(const int& from) {
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) {
int EIGEN_ALIGN16 ai[4];
ai[0] = from;
Packet4i vc = vec_ld(0, ai);
@@ -175,22 +174,22 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_pset1<Packet4i>(const int& from)
return vc;
}
template<> EIGEN_STRONG_INLINE Packet4f ei_plset<float>(const float& a) { return vec_add(ei_pset1<Packet4f>(a), ei_p4f_COUNTDOWN); }
template<> EIGEN_STRONG_INLINE Packet4i ei_plset<int>(const int& a) { return vec_add(ei_pset1<Packet4i>(a), ei_p4i_COUNTDOWN); }
template<> EIGEN_STRONG_INLINE Packet4f plset<float>(const float& a) { return vec_add(pset1<Packet4f>(a), p4f_COUNTDOWN); }
template<> EIGEN_STRONG_INLINE Packet4i plset<int>(const int& a) { return vec_add(pset1<Packet4i>(a), p4i_COUNTDOWN); }
template<> EIGEN_STRONG_INLINE Packet4f ei_padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_add(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_add(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_add(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_add(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f ei_psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_sub(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_sub(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_sub(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_sub(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pnegate(const Packet4f& a) { return ei_psub<Packet4f>(ei_p4f_ZERO, a); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pnegate(const Packet4i& a) { return ei_psub<Packet4i>(ei_p4i_ZERO, a); }
template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) { return psub<Packet4f>(p4f_ZERO, a); }
template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return psub<Packet4i>(p4i_ZERO, a); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_madd(a,b,ei_p4f_ZERO); }
template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_madd(a,b,p4f_ZERO); }
/* Commented out: it's actually slower than processing it scalar
*
template<> EIGEN_STRONG_INLINE Packet4i ei_pmul<Packet4i>(const Packet4i& a, const Packet4i& b)
template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b)
{
// Detailed in: http://freevec.org/content/32bit_signed_integer_multiplication_altivec
//Set up constants, variables
@@ -201,21 +200,21 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_pmul<Packet4i>(const Packet4i& a, con
b1 = vec_abs(b);
// Get the signs using xor
Packet4bi sgn = (Packet4bi) vec_cmplt(vec_xor(a, b), ei_p4i_ZERO);
Packet4bi sgn = (Packet4bi) vec_cmplt(vec_xor(a, b), p4i_ZERO);
// Do the multiplication for the asbolute values.
bswap = (Packet4i) vec_rl((Packet4ui) b1, (Packet4ui) ei_p4i_MINUS16 );
bswap = (Packet4i) vec_rl((Packet4ui) b1, (Packet4ui) p4i_MINUS16 );
low_prod = vec_mulo((Packet8i) a1, (Packet8i)b1);
high_prod = vec_msum((Packet8i) a1, (Packet8i) bswap, ei_p4i_ZERO);
high_prod = (Packet4i) vec_sl((Packet4ui) high_prod, (Packet4ui) ei_p4i_MINUS16);
high_prod = vec_msum((Packet8i) a1, (Packet8i) bswap, p4i_ZERO);
high_prod = (Packet4i) vec_sl((Packet4ui) high_prod, (Packet4ui) p4i_MINUS16);
prod = vec_add( low_prod, high_prod );
// NOR the product and select only the negative elements according to the sign mask
prod_ = vec_nor(prod, prod);
prod_ = vec_sel(ei_p4i_ZERO, prod_, sgn);
prod_ = vec_sel(p4i_ZERO, prod_, sgn);
// Add 1 to the result to get the negative numbers
v1sel = vec_sel(ei_p4i_ZERO, ei_p4i_ONE, sgn);
v1sel = vec_sel(p4i_ZERO, p4i_ONE, sgn);
prod_ = vec_add(prod_, v1sel);
// Merge the results back to the final vector.
@@ -224,7 +223,7 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_pmul<Packet4i>(const Packet4i& a, con
return prod;
}
*/
template<> EIGEN_STRONG_INLINE Packet4f ei_pdiv<Packet4f>(const Packet4f& a, const Packet4f& b)
template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b)
{
Packet4f t, y_0, y_1, res;
@@ -232,45 +231,45 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_pdiv<Packet4f>(const Packet4f& a, con
y_0 = vec_re(b);
// Do one Newton-Raphson iteration to get the needed accuracy
t = vec_nmsub(y_0, b, ei_p4f_ONE);
t = vec_nmsub(y_0, b, p4f_ONE);
y_1 = vec_madd(y_0, t, y_0);
res = vec_madd(a, y_1, ei_p4f_ZERO);
res = vec_madd(a, y_1, p4f_ZERO);
return res;
}
template<> EIGEN_STRONG_INLINE Packet4i ei_pdiv<Packet4i>(const Packet4i& /*a*/, const Packet4i& /*b*/)
{ ei_assert(false && "packet integer division are not supported by AltiVec");
return ei_pset1<Packet4i>(0);
template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, const Packet4i& /*b*/)
{ eigen_assert(false && "packet integer division are not supported by AltiVec");
return pset1<Packet4i>(0);
}
// for some weird raisons, it has to be overloaded for packet of integers
template<> EIGEN_STRONG_INLINE Packet4f ei_pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_madd(a, b, c); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return ei_padd(ei_pmul(a,b), c); }
template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_madd(a, b, c); }
template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd(pmul(a,b), c); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_min(a, b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); }
template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_min(a, b); }
template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_max(a, b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); }
template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_max(a, b); }
template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); }
// Logical Operations are not supported for float, so we have to reinterpret casts using NEON intrinsics
template<> EIGEN_STRONG_INLINE Packet4f ei_pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, b); }
template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, b); }
template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, b); }
template<> EIGEN_STRONG_INLINE Packet4f ei_por<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_or(a, b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_or(a, b); }
template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_or(a, b); }
template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_or(a, b); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pxor<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_xor(a, b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_xor(a, b); }
template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_xor(a, b); }
template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_xor(a, b); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, vec_nor(b, b)); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, vec_nor(b, b)); }
template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, vec_nor(b, b)); }
template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, vec_nor(b, b)); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); }
template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); }
template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); }
template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu<Packet4f>(const float* from)
template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
{
EIGEN_DEBUG_ALIGNED_LOAD
// Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
@@ -282,7 +281,7 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu<Packet4f>(const float* from)
return (Packet4f) vec_perm(MSQ, LSQ, mask); // align the data
}
template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu<Packet4i>(const int* from)
template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
{
EIGEN_DEBUG_ALIGNED_LOAD
// Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
@@ -294,10 +293,25 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu<Packet4i>(const int* from)
return (Packet4i) vec_perm(MSQ, LSQ, mask); // align the data
}
template<> EIGEN_STRONG_INLINE void ei_pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vec_st(from, 0, to); }
template<> EIGEN_STRONG_INLINE void ei_pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vec_st(from, 0, to); }
template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
{
Packet4f p;
if((ptrdiff_t(&from) % 16) == 0) p = pload<Packet4f>(from);
else p = ploadu<Packet4f>(from);
return vec_perm(p, p, p16uc_DUPLICATE);
}
template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
{
Packet4i p;
if((ptrdiff_t(&from) % 16) == 0) p = pload<Packet4i>(from);
else p = ploadu<Packet4i>(from);
return vec_perm(p, p, p16uc_DUPLICATE);
}
template<> EIGEN_STRONG_INLINE void ei_pstoreu<float>(float* to, const Packet4f& from)
template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vec_st(from, 0, to); }
template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vec_st(from, 0, to); }
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from)
{
EIGEN_DEBUG_UNALIGNED_STORE
// Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
@@ -315,7 +329,7 @@ template<> EIGEN_STRONG_INLINE void ei_pstoreu<float>(float* to, const Packet4f
vec_st( LSQ, 15, (unsigned char *)to ); // Store the LSQ part first
vec_st( MSQ, 0, (unsigned char *)to ); // Store the MSQ part
}
template<> EIGEN_STRONG_INLINE void ei_pstoreu<int>(int* to, const Packet4i& from)
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from)
{
EIGEN_DEBUG_UNALIGNED_STORE
// Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
@@ -334,29 +348,29 @@ template<> EIGEN_STRONG_INLINE void ei_pstoreu<int>(int* to, const Packet4i
vec_st( MSQ, 0, (unsigned char *)to ); // Store the MSQ part
}
template<> EIGEN_STRONG_INLINE void ei_prefetch<float>(const float* addr) { vec_dstt(addr, DST_CTRL(2,2,32), DST_CHAN); }
template<> EIGEN_STRONG_INLINE void ei_prefetch<int>(const int* addr) { vec_dstt(addr, DST_CTRL(2,2,32), DST_CHAN); }
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { vec_dstt(addr, DST_CTRL(2,2,32), DST_CHAN); }
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { vec_dstt(addr, DST_CTRL(2,2,32), DST_CHAN); }
template<> EIGEN_STRONG_INLINE float ei_pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vec_st(a, 0, x); return x[0]; }
template<> EIGEN_STRONG_INLINE int ei_pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; vec_st(a, 0, x); return x[0]; }
template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vec_st(a, 0, x); return x[0]; }
template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; vec_st(a, 0, x); return x[0]; }
template<> EIGEN_STRONG_INLINE Packet4f ei_preverse(const Packet4f& a) { return (Packet4f)vec_perm((Packet16uc)a,(Packet16uc)a, ei_p16uc_REVERSE); }
template<> EIGEN_STRONG_INLINE Packet4i ei_preverse(const Packet4i& a) { return (Packet4i)vec_perm((Packet16uc)a,(Packet16uc)a, ei_p16uc_REVERSE); }
template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) { return (Packet4f)vec_perm((Packet16uc)a,(Packet16uc)a, p16uc_REVERSE); }
template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) { return (Packet4i)vec_perm((Packet16uc)a,(Packet16uc)a, p16uc_REVERSE); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pabs(const Packet4f& a) { return vec_abs(a); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pabs(const Packet4i& a) { return vec_abs(a); }
template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vec_abs(a); }
template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vec_abs(a); }
template<> EIGEN_STRONG_INLINE float ei_predux<Packet4f>(const Packet4f& a)
template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
{
Packet4f b, sum;
b = (Packet4f) vec_sld(a, a, 8);
sum = vec_add(a, b);
b = (Packet4f) vec_sld(sum, sum, 4);
sum = vec_add(sum, b);
return ei_pfirst(sum);
return pfirst(sum);
}
template<> EIGEN_STRONG_INLINE Packet4f ei_preduxp<Packet4f>(const Packet4f* vecs)
template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
{
Packet4f v[4], sum[4];
@@ -384,15 +398,15 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_preduxp<Packet4f>(const Packet4f* vec
return sum[0];
}
template<> EIGEN_STRONG_INLINE int ei_predux<Packet4i>(const Packet4i& a)
template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
{
Packet4i sum;
sum = vec_sums(a, ei_p4i_ZERO);
sum = vec_sld(sum, ei_p4i_ZERO, 12);
return ei_pfirst(sum);
sum = vec_sums(a, p4i_ZERO);
sum = vec_sld(sum, p4i_ZERO, 12);
return pfirst(sum);
}
template<> EIGEN_STRONG_INLINE Packet4i ei_preduxp<Packet4i>(const Packet4i* vecs)
template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
{
Packet4i v[4], sum[4];
@@ -422,56 +436,56 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_preduxp<Packet4i>(const Packet4i* vec
// Other reduction functions:
// mul
template<> EIGEN_STRONG_INLINE float ei_predux_mul<Packet4f>(const Packet4f& a)
template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
{
Packet4f prod;
prod = ei_pmul(a, (Packet4f)vec_sld(a, a, 8));
return ei_pfirst(ei_pmul(prod, (Packet4f)vec_sld(prod, prod, 4)));
prod = pmul(a, (Packet4f)vec_sld(a, a, 8));
return pfirst(pmul(prod, (Packet4f)vec_sld(prod, prod, 4)));
}
template<> EIGEN_STRONG_INLINE int ei_predux_mul<Packet4i>(const Packet4i& a)
template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
{
EIGEN_ALIGN16 int aux[4];
ei_pstore(aux, a);
pstore(aux, a);
return aux[0] * aux[1] * aux[2] * aux[3];
}
// min
template<> EIGEN_STRONG_INLINE float ei_predux_min<Packet4f>(const Packet4f& a)
template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
{
Packet4f b, res;
b = vec_min(a, vec_sld(a, a, 8));
res = vec_min(b, vec_sld(b, b, 4));
return ei_pfirst(res);
return pfirst(res);
}
template<> EIGEN_STRONG_INLINE int ei_predux_min<Packet4i>(const Packet4i& a)
template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
{
Packet4i b, res;
b = vec_min(a, vec_sld(a, a, 8));
res = vec_min(b, vec_sld(b, b, 4));
return ei_pfirst(res);
return pfirst(res);
}
// max
template<> EIGEN_STRONG_INLINE float ei_predux_max<Packet4f>(const Packet4f& a)
template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
{
Packet4f b, res;
b = vec_max(a, vec_sld(a, a, 8));
res = vec_max(b, vec_sld(b, b, 4));
return ei_pfirst(res);
return pfirst(res);
}
template<> EIGEN_STRONG_INLINE int ei_predux_max<Packet4i>(const Packet4i& a)
template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
{
Packet4i b, res;
b = vec_max(a, vec_sld(a, a, 8));
res = vec_max(b, vec_sld(b, b, 4));
return ei_pfirst(res);
return pfirst(res);
}
template<int Offset>
struct ei_palign_impl<Offset,Packet4f>
struct palign_impl<Offset,Packet4f>
{
EIGEN_STRONG_INLINE static void run(Packet4f& first, const Packet4f& second)
{
@@ -481,7 +495,7 @@ struct ei_palign_impl<Offset,Packet4f>
};
template<int Offset>
struct ei_palign_impl<Offset,Packet4i>
struct palign_impl<Offset,Packet4i>
{
EIGEN_STRONG_INLINE static void run(Packet4i& first, const Packet4i& second)
{
@@ -489,4 +503,7 @@ struct ei_palign_impl<Offset,Packet4i>
first = vec_sld(first, second, Offset*4);
}
};
} // end namespace internal
#endif // EIGEN_PACKET_MATH_ALTIVEC_H

View File

@@ -46,15 +46,6 @@
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
#endif
/** Defines the maximal size in Bytes of blocks fitting in CPU cache.
* The current value is set to generate blocks of 256x256 for float
*
* Typically for a single-threaded application you would set that to 25% of the size of your CPU caches in bytes
*/
#ifndef EIGEN_TUNE_FOR_CPU_CACHE_SIZE
#define EIGEN_TUNE_FOR_CPU_CACHE_SIZE (sizeof(float)*512*512)
#endif
/** Defines the maximal width of the blocks used in the triangular product and solver
* for vectors (level 2 blas xTRMV and xTRSV). The default is 8.
*/

View File

@@ -22,11 +22,13 @@
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#ifndef EIGEN_COMPLEX_ALTIVEC_H
#define EIGEN_COMPLEX_ALTIVEC_H
#ifndef EIGEN_COMPLEX_NEON_H
#define EIGEN_COMPLEX_NEON_H
static uint32x4_t ei_p4ui_CONJ_XOR = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
static uint32x2_t ei_p2ui_CONJ_XOR = { 0x00000000, 0x80000000 };
namespace internal {
static uint32x4_t p4ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET4(0x00000000, 0x80000000, 0x00000000, 0x80000000);
static uint32x2_t p2ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET2(0x00000000, 0x80000000);
//---------- float ----------
struct Packet2cf
@@ -36,11 +38,12 @@ struct Packet2cf
Packet4f v;
};
template<> struct ei_packet_traits<std::complex<float> > : ei_default_packet_traits
template<> struct packet_traits<std::complex<float> > : default_packet_traits
{
typedef Packet2cf type;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size = 2,
HasAdd = 1,
@@ -56,9 +59,9 @@ template<> struct ei_packet_traits<std::complex<float> > : ei_default_packet_tr
};
};
template<> struct ei_unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; };
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; };
template<> EIGEN_STRONG_INLINE Packet2cf ei_pset1<Packet2cf>(const std::complex<float>& from)
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
{
float32x2_t r64;
r64 = vld1_f32((float *)&from);
@@ -66,15 +69,16 @@ template<> EIGEN_STRONG_INLINE Packet2cf ei_pset1<Packet2cf>(const std::complex<
return Packet2cf(vcombine_f32(r64, r64));
}
template<> EIGEN_STRONG_INLINE Packet2cf ei_padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(ei_padd<Packet4f>(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf ei_psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(ei_psub<Packet4f>(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf ei_pnegate(const Packet2cf& a) { return Packet2cf(ei_pnegate<Packet4f>(a.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf ei_pconj(const Packet2cf& a)
template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(padd<Packet4f>(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(psub<Packet4f>(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate<Packet4f>(a.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
{
return Packet2cf(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a.v), ei_p4ui_CONJ_XOR)));
Packet4ui b = vreinterpretq_u32_f32(a.v);
return Packet2cf(vreinterpretq_f32_u32(veorq_u32(b, p4ui_CONJ_XOR)));
}
template<> EIGEN_STRONG_INLINE Packet2cf ei_pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
Packet4f v1, v2;
float32x2_t a_lo, a_hi;
@@ -88,7 +92,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf ei_pmul<Packet2cf>(const Packet2cf& a,
// Multiply the imag a with b
v2 = vmulq_f32(v2, b.v);
// Conjugate v2
v2 = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(v2), ei_p4ui_CONJ_XOR));
v2 = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(v2), p4ui_CONJ_XOR));
// Swap real/imag elements in v2.
a_lo = vrev64_f32(vget_low_f32(v2));
a_hi = vrev64_f32(vget_high_f32(v2));
@@ -97,39 +101,41 @@ template<> EIGEN_STRONG_INLINE Packet2cf ei_pmul<Packet2cf>(const Packet2cf& a,
return Packet2cf(vaddq_f32(v1, v2));
}
template<> EIGEN_STRONG_INLINE Packet2cf ei_pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
}
template<> EIGEN_STRONG_INLINE Packet2cf ei_por <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
}
template<> EIGEN_STRONG_INLINE Packet2cf ei_pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
return Packet2cf(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
}
template<> EIGEN_STRONG_INLINE Packet2cf ei_pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
return Packet2cf(vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
}
template<> EIGEN_STRONG_INLINE Packet2cf ei_pload <std::complex<float> >(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(ei_pload((const float*)from)); }
template<> EIGEN_STRONG_INLINE Packet2cf ei_ploadu<std::complex<float> >(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ei_ploadu((const float*)from)); }
template<> EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>((const float*)from)); }
template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>((const float*)from)); }
template<> EIGEN_STRONG_INLINE void ei_pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE ei_pstore((float*)to, from.v); }
template<> EIGEN_STRONG_INLINE void ei_pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE ei_pstoreu((float*)to, from.v); }
template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
template<> EIGEN_STRONG_INLINE void ei_prefetch<std::complex<float> >(const std::complex<float> * addr) { __pld((float *)addr); }
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
template<> EIGEN_STRONG_INLINE std::complex<float> ei_pfirst<Packet2cf>(const Packet2cf& a)
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { __pld((float *)addr); }
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
{
std::complex<float> EIGEN_ALIGN16 x[2];
vst1q_f32((float *)x, a.v);
return x[0];
}
template<> EIGEN_STRONG_INLINE Packet2cf ei_preverse(const Packet2cf& a)
template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
{
float32x2_t a_lo, a_hi;
Packet4f a_r128;
@@ -141,12 +147,12 @@ template<> EIGEN_STRONG_INLINE Packet2cf ei_preverse(const Packet2cf& a)
return Packet2cf(a_r128);
}
EIGEN_STRONG_INLINE Packet2cf ei_pcplxflip/*<Packet2cf>*/(const Packet2cf& x)
template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& a)
{
return Packet2cf(vrev64q_f32(a.v));
}
template<> EIGEN_STRONG_INLINE std::complex<float> ei_predux<Packet2cf>(const Packet2cf& a)
template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
{
float32x2_t a1, a2;
std::complex<float> s;
@@ -159,7 +165,7 @@ template<> EIGEN_STRONG_INLINE std::complex<float> ei_predux<Packet2cf>(const Pa
return s;
}
template<> EIGEN_STRONG_INLINE Packet2cf ei_preduxp<Packet2cf>(const Packet2cf* vecs)
template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
{
Packet4f sum1, sum2, sum;
@@ -171,7 +177,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf ei_preduxp<Packet2cf>(const Packet2cf*
return Packet2cf(sum);
}
template<> EIGEN_STRONG_INLINE std::complex<float> ei_predux_mul<Packet2cf>(const Packet2cf& a)
template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
{
float32x2_t a1, a2, v1, v2, prod;
std::complex<float> s;
@@ -187,7 +193,7 @@ template<> EIGEN_STRONG_INLINE std::complex<float> ei_predux_mul<Packet2cf>(cons
// Multiply the imag a with b
v2 = vmul_f32(v2, a2);
// Conjugate v2
v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), ei_p2ui_CONJ_XOR));
v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), p2ui_CONJ_XOR));
// Swap real/imag elements in v2.
v2 = vrev64_f32(v2);
// Add v1, v2
@@ -199,7 +205,7 @@ template<> EIGEN_STRONG_INLINE std::complex<float> ei_predux_mul<Packet2cf>(cons
}
template<int Offset>
struct ei_palign_impl<Offset,Packet2cf>
struct palign_impl<Offset,Packet2cf>
{
EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second)
{
@@ -210,43 +216,43 @@ struct ei_palign_impl<Offset,Packet2cf>
}
};
template<> struct ei_conj_helper<Packet2cf, Packet2cf, false,true>
template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
{ return ei_padd(pmul(x,y),c); }
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
return ei_pmul(a, ei_pconj(b));
return internal::pmul(a, pconj(b));
}
};
template<> struct ei_conj_helper<Packet2cf, Packet2cf, true,false>
template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
{ return ei_padd(pmul(x,y),c); }
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
return ei_pmul(ei_pconj(a), b);
return internal::pmul(pconj(a), b);
}
};
template<> struct ei_conj_helper<Packet2cf, Packet2cf, true,true>
template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
{ return ei_padd(pmul(x,y),c); }
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
return ei_pconj(ei_pmul(a, b));
return pconj(internal::pmul(a, b));
}
};
template<> EIGEN_STRONG_INLINE Packet2cf ei_pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
// TODO optimize it for AltiVec
Packet2cf res = ei_conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
Packet4f s, rev_s;
float32x2_t a_lo, a_hi;
@@ -256,7 +262,9 @@ template<> EIGEN_STRONG_INLINE Packet2cf ei_pdiv<Packet2cf>(const Packet2cf& a,
a_hi = vrev64_f32(vget_high_f32(s));
rev_s = vcombine_f32(a_lo, a_hi);
return Packet2cf(ei_pdiv(res.v, vaddq_f32(s,rev_s)));
return Packet2cf(pdiv(res.v, vaddq_f32(s,rev_s)));
}
#endif // EIGEN_COMPLEX_ALTIVEC_H
} // end namespace internal
#endif // EIGEN_COMPLEX_NEON_H

View File

@@ -27,14 +27,12 @@
#ifndef EIGEN_PACKET_MATH_NEON_H
#define EIGEN_PACKET_MATH_NEON_H
namespace internal {
#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
#endif
#ifndef EIGEN_TUNE_FOR_CPU_CACHE_SIZE
#define EIGEN_TUNE_FOR_CPU_CACHE_SIZE 4*192*192
#endif
// FIXME NEON has 16 quad registers, but since the current register allocator
// is so bad, it is much better to reduce it to 8
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
@@ -43,21 +41,32 @@
typedef float32x4_t Packet4f;
typedef int32x4_t Packet4i;
typedef uint32x4_t Packet4ui;
#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
const Packet4f ei_p4f_##NAME = ei_pset1<Packet4f>(X)
const Packet4f p4f_##NAME = pset1<Packet4f>(X)
#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
const Packet4f ei_p4f_##NAME = vreinterpretq_f32_u32(ei_pset1<int>(X))
const Packet4f p4f_##NAME = vreinterpretq_f32_u32(pset1<int>(X))
#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
const Packet4i ei_p4i_##NAME = ei_pset1<Packet4i>(X)
const Packet4i p4i_##NAME = pset1<Packet4i>(X)
#if defined(__llvm__) && !defined(__clang__)
//Special treatment for Apple's llvm-gcc, its NEON packet types are unions
#define EIGEN_INIT_NEON_PACKET2(X, Y) {{X, Y}}
#define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {{X, Y, Z, W}}
#else
//Default initializer for packets
#define EIGEN_INIT_NEON_PACKET2(X, Y) {X, Y}
#define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {X, Y, Z, W}
#endif
#ifndef __pld
#define __pld(x) asm volatile ( " pld [%[addr]]\n" :: [addr] "r" (x) : "cc" );
#endif
template<> struct ei_packet_traits<float> : ei_default_packet_traits
template<> struct packet_traits<float> : default_packet_traits
{
typedef Packet4f type;
enum {
@@ -74,7 +83,7 @@ template<> struct ei_packet_traits<float> : ei_default_packet_traits
HasSqrt = 0
};
};
template<> struct ei_packet_traits<int> : ei_default_packet_traits
template<> struct packet_traits<int> : default_packet_traits
{
typedef Packet4i type;
enum {
@@ -85,36 +94,44 @@ template<> struct ei_packet_traits<int> : ei_default_packet_traits
};
};
template<> struct ei_unpacket_traits<Packet4f> { typedef float type; enum {size=4}; };
template<> struct ei_unpacket_traits<Packet4i> { typedef int type; enum {size=4}; };
#if EIGEN_GNUC_AT_MOST(4,4) && !defined(__llvm__)
// workaround gcc 4.2, 4.3 and 4.4 compilatin issue
EIGEN_STRONG_INLINE float32x4_t vld1q_f32(const float* x) { return ::vld1q_f32((const float32_t*)x); }
EIGEN_STRONG_INLINE float32x2_t vld1_f32 (const float* x) { return ::vld1_f32 ((const float32_t*)x); }
EIGEN_STRONG_INLINE void vst1q_f32(float* to, float32x4_t from) { ::vst1q_f32((float32_t*)to,from); }
EIGEN_STRONG_INLINE void vst1_f32 (float* to, float32x2_t from) { ::vst1_f32 ((float32_t*)to,from); }
#endif
template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<Packet4f>(const float& from) { return vdupq_n_f32(from); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pset1<Packet4i>(const int& from) { return vdupq_n_s32(from); }
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}; };
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; };
template<> EIGEN_STRONG_INLINE Packet4f ei_plset<float>(const float& a)
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return vdupq_n_f32(from); }
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return vdupq_n_s32(from); }
template<> EIGEN_STRONG_INLINE Packet4f plset<float>(const float& a)
{
Packet4f countdown = { 3, 2, 1, 0 };
return vaddq_f32(ei_pset1<Packet4f>(a), countdown);
Packet4f countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3);
return vaddq_f32(pset1<Packet4f>(a), countdown);
}
template<> EIGEN_STRONG_INLINE Packet4i ei_plset<int>(const int& a)
template<> EIGEN_STRONG_INLINE Packet4i plset<int>(const int& a)
{
Packet4i countdown = { 3, 2, 1, 0 };
return vaddq_s32(ei_pset1<Packet4i>(a), countdown);
Packet4i countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3);
return vaddq_s32(pset1<Packet4i>(a), countdown);
}
template<> EIGEN_STRONG_INLINE Packet4f ei_padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return vaddq_f32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return vaddq_s32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return vaddq_f32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return vaddq_s32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f ei_psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return vsubq_f32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return vsubq_s32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return vsubq_f32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return vsubq_s32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pnegate(const Packet4f& a) { return vnegq_f32(a); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pnegate(const Packet4i& a) { return vnegq_s32(a); }
template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) { return vnegq_f32(a); }
template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return vnegq_s32(a); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return vmulq_f32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { return vmulq_s32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return vmulq_f32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { return vmulq_s32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pdiv<Packet4f>(const Packet4f& a, const Packet4f& b)
template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b)
{
Packet4f inv, restep, div;
@@ -135,80 +152,80 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_pdiv<Packet4f>(const Packet4f& a, con
return div;
}
template<> EIGEN_STRONG_INLINE Packet4i ei_pdiv<Packet4i>(const Packet4i& /*a*/, const Packet4i& /*b*/)
{ ei_assert(false && "packet integer division are not supported by NEON");
return ei_pset1<Packet4i>(0);
template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, const Packet4i& /*b*/)
{ eigen_assert(false && "packet integer division are not supported by NEON");
return pset1<Packet4i>(0);
}
// for some weird raisons, it has to be overloaded for packet of integers
template<> EIGEN_STRONG_INLINE Packet4i ei_pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return ei_padd(ei_pmul(a,b), c); }
template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd(pmul(a,b), c); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return vminq_f32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vminq_s32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return vminq_f32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vminq_s32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return vmaxq_f32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vmaxq_s32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return vmaxq_f32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vmaxq_s32(a,b); }
// Logical Operations are not supported for float, so we have to reinterpret casts using NEON intrinsics
template<> EIGEN_STRONG_INLINE Packet4f ei_pand<Packet4f>(const Packet4f& a, const Packet4f& b)
template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b)
{
return vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
}
template<> EIGEN_STRONG_INLINE Packet4i ei_pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vandq_s32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vandq_s32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f ei_por<Packet4f>(const Packet4f& a, const Packet4f& b)
template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b)
{
return vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
}
template<> EIGEN_STRONG_INLINE Packet4i ei_por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vorrq_s32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vorrq_s32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pxor<Packet4f>(const Packet4f& a, const Packet4f& b)
template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b)
{
return vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
}
template<> EIGEN_STRONG_INLINE Packet4i ei_pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return veorq_s32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return veorq_s32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pandnot<Packet4f>(const Packet4f& a, const Packet4f& b)
template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b)
{
return vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
}
template<> EIGEN_STRONG_INLINE Packet4i ei_pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vbicq_s32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vbicq_s32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pload<float>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pload<int>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from); }
template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from); }
template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from); }
template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_f32(from); }
template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_s32(from); }
template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_f32(from); }
template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_s32(from); }
template<> EIGEN_STRONG_INLINE Packet4f ei_ploaddup<Packet4f>(const float* from)
template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
{
float32x2_t lo, ho;
float32x2_t lo, hi;
lo = vdup_n_f32(*from);
hi = vdup_n_f32(*from);
hi = vdup_n_f32(*(from+1));
return vcombine_f32(lo, hi);
}
template<> EIGEN_STRONG_INLINE Packet4i ei_ploaddup<Packet4i>(const float* from)
template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
{
int32x2_t lo, ho;
int32x2_t lo, hi;
lo = vdup_n_s32(*from);
hi = vdup_n_s32(*from);
hi = vdup_n_s32(*(from+1));
return vcombine_s32(lo, hi);
}
template<> EIGEN_STRONG_INLINE void ei_pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_f32(to, from); }
template<> EIGEN_STRONG_INLINE void ei_pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_s32(to, from); }
template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_f32(to, from); }
template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_s32(to, from); }
template<> EIGEN_STRONG_INLINE void ei_pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f32(to, from); }
template<> EIGEN_STRONG_INLINE void ei_pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_s32(to, from); }
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f32(to, from); }
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_s32(to, from); }
template<> EIGEN_STRONG_INLINE void ei_prefetch<float>(const float* addr) { __pld(addr); }
template<> EIGEN_STRONG_INLINE void ei_prefetch<int>(const int* addr) { __pld(addr); }
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { __pld(addr); }
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { __pld(addr); }
// FIXME only store the 2 first elements ?
template<> EIGEN_STRONG_INLINE float ei_pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vst1q_f32(x, a); return x[0]; }
template<> EIGEN_STRONG_INLINE int ei_pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; vst1q_s32(x, a); return x[0]; }
template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vst1q_f32(x, a); return x[0]; }
template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; vst1q_s32(x, a); return x[0]; }
template<> EIGEN_STRONG_INLINE Packet4f ei_preverse(const Packet4f& a) {
template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) {
float32x2_t a_lo, a_hi;
Packet4f a_r64;
@@ -217,7 +234,7 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_preverse(const Packet4f& a) {
a_hi = vget_high_f32(a_r64);
return vcombine_f32(a_hi, a_lo);
}
template<> EIGEN_STRONG_INLINE Packet4i ei_preverse(const Packet4i& a) {
template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) {
int32x2_t a_lo, a_hi;
Packet4i a_r64;
@@ -226,10 +243,10 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_preverse(const Packet4i& a) {
a_hi = vget_high_s32(a_r64);
return vcombine_s32(a_hi, a_lo);
}
template<> EIGEN_STRONG_INLINE Packet4f ei_pabs(const Packet4f& a) { return vabsq_f32(a); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pabs(const Packet4i& a) { return vabsq_s32(a); }
template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vabsq_f32(a); }
template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vabsq_s32(a); }
template<> EIGEN_STRONG_INLINE float ei_predux<Packet4f>(const Packet4f& a)
template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
{
float32x2_t a_lo, a_hi, sum;
float s[2];
@@ -243,7 +260,7 @@ template<> EIGEN_STRONG_INLINE float ei_predux<Packet4f>(const Packet4f& a)
return s[0];
}
template<> EIGEN_STRONG_INLINE Packet4f ei_preduxp<Packet4f>(const Packet4f* vecs)
template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
{
float32x4x2_t vtrn1, vtrn2, res1, res2;
Packet4f sum1, sum2, sum;
@@ -263,7 +280,7 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_preduxp<Packet4f>(const Packet4f* vec
return sum;
}
template<> EIGEN_STRONG_INLINE int ei_predux<Packet4i>(const Packet4i& a)
template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
{
int32x2_t a_lo, a_hi, sum;
int32_t s[2];
@@ -277,7 +294,7 @@ template<> EIGEN_STRONG_INLINE int ei_predux<Packet4i>(const Packet4i& a)
return s[0];
}
template<> EIGEN_STRONG_INLINE Packet4i ei_preduxp<Packet4i>(const Packet4i* vecs)
template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
{
int32x4x2_t vtrn1, vtrn2, res1, res2;
Packet4i sum1, sum2, sum;
@@ -299,7 +316,7 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_preduxp<Packet4i>(const Packet4i* vec
// Other reduction functions:
// mul
template<> EIGEN_STRONG_INLINE float ei_predux_mul<Packet4f>(const Packet4f& a)
template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
{
float32x2_t a_lo, a_hi, prod;
float s[2];
@@ -315,7 +332,7 @@ template<> EIGEN_STRONG_INLINE float ei_predux_mul<Packet4f>(const Packet4f& a)
return s[0];
}
template<> EIGEN_STRONG_INLINE int ei_predux_mul<Packet4i>(const Packet4i& a)
template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
{
int32x2_t a_lo, a_hi, prod;
int32_t s[2];
@@ -333,7 +350,7 @@ template<> EIGEN_STRONG_INLINE int ei_predux_mul<Packet4i>(const Packet4i& a)
}
// min
template<> EIGEN_STRONG_INLINE float ei_predux_min<Packet4f>(const Packet4f& a)
template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
{
float32x2_t a_lo, a_hi, min;
float s[2];
@@ -346,7 +363,7 @@ template<> EIGEN_STRONG_INLINE float ei_predux_min<Packet4f>(const Packet4f& a)
return s[0];
}
template<> EIGEN_STRONG_INLINE int ei_predux_min<Packet4i>(const Packet4i& a)
template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
{
int32x2_t a_lo, a_hi, min;
int32_t s[2];
@@ -361,7 +378,7 @@ template<> EIGEN_STRONG_INLINE int ei_predux_min<Packet4i>(const Packet4i& a)
}
// max
template<> EIGEN_STRONG_INLINE float ei_predux_max<Packet4f>(const Packet4f& a)
template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
{
float32x2_t a_lo, a_hi, max;
float s[2];
@@ -374,7 +391,7 @@ template<> EIGEN_STRONG_INLINE float ei_predux_max<Packet4f>(const Packet4f& a)
return s[0];
}
template<> EIGEN_STRONG_INLINE int ei_predux_max<Packet4i>(const Packet4i& a)
template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
{
int32x2_t a_lo, a_hi, max;
int32_t s[2];
@@ -388,23 +405,30 @@ template<> EIGEN_STRONG_INLINE int ei_predux_max<Packet4i>(const Packet4i& a)
return s[0];
}
template<int Offset>
struct ei_palign_impl<Offset,Packet4f>
{
EIGEN_STRONG_INLINE static void run(Packet4f& first, const Packet4f& second)
{
if (Offset!=0)
first = vextq_f32(first, second, Offset);
}
};
// this PALIGN_NEON business is to work around a bug in LLVM Clang 3.0 causing incorrect compilation errors,
// see bug 347 and this LLVM bug: http://llvm.org/bugs/show_bug.cgi?id=11074
#define PALIGN_NEON(Offset,Type,Command) \
template<>\
struct palign_impl<Offset,Type>\
{\
EIGEN_STRONG_INLINE static void run(Type& first, const Type& second)\
{\
if (Offset!=0)\
first = Command(first, second, Offset);\
}\
};\
PALIGN_NEON(0,Packet4f,vextq_f32)
PALIGN_NEON(1,Packet4f,vextq_f32)
PALIGN_NEON(2,Packet4f,vextq_f32)
PALIGN_NEON(3,Packet4f,vextq_f32)
PALIGN_NEON(0,Packet4i,vextq_s32)
PALIGN_NEON(1,Packet4i,vextq_s32)
PALIGN_NEON(2,Packet4i,vextq_s32)
PALIGN_NEON(3,Packet4i,vextq_s32)
#undef PALIGN_NEON
} // end namespace internal
template<int Offset>
struct ei_palign_impl<Offset,Packet4i>
{
EIGEN_STRONG_INLINE static void run(Packet4i& first, const Packet4i& second)
{
if (Offset!=0)
first = vextq_s32(first, second, Offset);
}
};
#endif // EIGEN_PACKET_MATH_NEON_H

View File

@@ -25,6 +25,8 @@
#ifndef EIGEN_COMPLEX_SSE_H
#define EIGEN_COMPLEX_SSE_H
namespace internal {
//---------- float ----------
struct Packet2cf
{
@@ -33,7 +35,7 @@ struct Packet2cf
__m128 v;
};
template<> struct ei_packet_traits<std::complex<float> > : ei_default_packet_traits
template<> struct packet_traits<std::complex<float> > : default_packet_traits
{
typedef Packet2cf type;
enum {
@@ -54,85 +56,100 @@ template<> struct ei_packet_traits<std::complex<float> > : ei_default_packet_tr
};
};
template<> struct ei_unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; };
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; };
template<> EIGEN_STRONG_INLINE Packet2cf ei_padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_add_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf ei_psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_sub_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf ei_pnegate(const Packet2cf& a)
template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_add_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_sub_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a)
{
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));
return Packet2cf(_mm_xor_ps(a.v,mask));
}
template<> EIGEN_STRONG_INLINE Packet2cf ei_pconj(const Packet2cf& a)
template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
{
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
return Packet2cf(_mm_xor_ps(a.v,mask));
}
template<> EIGEN_STRONG_INLINE Packet2cf ei_pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
// TODO optimize it for SSE3 and 4
#ifdef EIGEN_VECTORIZE_SSE3
return Packet2cf(_mm_addsub_ps(_mm_mul_ps(_mm_moveldup_ps(a.v), b.v),
_mm_mul_ps(_mm_movehdup_ps(a.v),
ei_vec4f_swizzle1(b.v, 1, 0, 3, 2))));
// return Packet2cf(_mm_addsub_ps(_mm_mul_ps(ei_vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
// _mm_mul_ps(ei_vec4f_swizzle1(a.v, 1, 1, 3, 3),
// ei_vec4f_swizzle1(b.v, 1, 0, 3, 2))));
vec4f_swizzle1(b.v, 1, 0, 3, 2))));
// return Packet2cf(_mm_addsub_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
// _mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
// vec4f_swizzle1(b.v, 1, 0, 3, 2))));
#else
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x00000000,0x80000000,0x00000000));
return Packet2cf(_mm_add_ps(_mm_mul_ps(ei_vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
_mm_xor_ps(_mm_mul_ps(ei_vec4f_swizzle1(a.v, 1, 1, 3, 3),
ei_vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask)));
return Packet2cf(_mm_add_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask)));
#endif
}
template<> EIGEN_STRONG_INLINE Packet2cf ei_pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_and_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf ei_por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_or_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf ei_pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_xor_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf ei_pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_andnot_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_and_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_or_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_xor_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_andnot_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf ei_pload <Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(ei_pload<Packet4f>(&ei_real_ref(*from))); }
template<> EIGEN_STRONG_INLINE Packet2cf ei_ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ei_ploadu<Packet4f>(&ei_real_ref(*from))); }
template<> EIGEN_STRONG_INLINE Packet2cf pload <Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>(&real_ref(*from))); }
template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>(&real_ref(*from))); }
template<> EIGEN_STRONG_INLINE void ei_pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE ei_pstore(&ei_real_ref(*to), from.v); }
template<> EIGEN_STRONG_INLINE void ei_pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE ei_pstoreu(&ei_real_ref(*to), from.v); }
template<> EIGEN_STRONG_INLINE void ei_prefetch<std::complex<float> >(const std::complex<float> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE Packet2cf ei_pset1<Packet2cf>(const std::complex<float>& from)
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
{
Packet2cf res;
#if EIGEN_GNUC_AT_MOST(4,2)
// workaround annoying "may be used uninitialized in this function" warning with gcc 4.2
res.v = _mm_loadl_pi(_mm_set1_ps(0.0f), (const __m64*)&from);
#else
res.v = _mm_loadl_pi(res.v, (const __m64*)&from);
#endif
return Packet2cf(_mm_movelh_ps(res.v,res.v));
}
template<> EIGEN_STRONG_INLINE std::complex<float> ei_pfirst<Packet2cf>(const Packet2cf& a)
template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore(&real_ref(*to), from.v); }
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&real_ref(*to), from.v); }
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
{
#if EIGEN_GNUC_AT_MOST(4,3)
// Workaround gcc 4.2 ICE - this is not performance wise ideal, but who cares...
// This workaround also fix invalid code generation with gcc 4.3
EIGEN_ALIGN16 std::complex<float> res[2];
_mm_store_ps((float*)res, a.v);
return res[0];
#else
std::complex<float> res;
_mm_storel_pi((__m64*)&res, a.v);
return res;
#endif
}
template<> EIGEN_STRONG_INLINE Packet2cf ei_preverse(const Packet2cf& a) { return Packet2cf(_mm_castpd_ps(ei_preverse(_mm_castps_pd(a.v)))); }
template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a) { return Packet2cf(_mm_castpd_ps(preverse(_mm_castps_pd(a.v)))); }
template<> EIGEN_STRONG_INLINE std::complex<float> ei_predux<Packet2cf>(const Packet2cf& a)
template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
{
return ei_pfirst(Packet2cf(_mm_add_ps(a.v, _mm_movehl_ps(a.v,a.v))));
return pfirst(Packet2cf(_mm_add_ps(a.v, _mm_movehl_ps(a.v,a.v))));
}
template<> EIGEN_STRONG_INLINE Packet2cf ei_preduxp<Packet2cf>(const Packet2cf* vecs)
template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
{
return Packet2cf(_mm_add_ps(_mm_movelh_ps(vecs[0].v,vecs[1].v), _mm_movehl_ps(vecs[1].v,vecs[0].v)));
}
template<> EIGEN_STRONG_INLINE std::complex<float> ei_predux_mul<Packet2cf>(const Packet2cf& a)
template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
{
return ei_pfirst(ei_pmul(a, Packet2cf(_mm_movehl_ps(a.v,a.v))));
return pfirst(pmul(a, Packet2cf(_mm_movehl_ps(a.v,a.v))));
}
template<int Offset>
struct ei_palign_impl<Offset,Packet2cf>
struct palign_impl<Offset,Packet2cf>
{
EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second)
{
@@ -144,89 +161,89 @@ struct ei_palign_impl<Offset,Packet2cf>
}
};
template<> struct ei_conj_helper<Packet2cf, Packet2cf, false,true>
template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
{ return ei_padd(pmul(x,y),c); }
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
#ifdef EIGEN_VECTORIZE_SSE3
return ei_pmul(a, ei_pconj(b));
return internal::pmul(a, pconj(b));
#else
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
return Packet2cf(_mm_add_ps(_mm_xor_ps(_mm_mul_ps(ei_vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask),
_mm_mul_ps(ei_vec4f_swizzle1(a.v, 1, 1, 3, 3),
ei_vec4f_swizzle1(b.v, 1, 0, 3, 2))));
return Packet2cf(_mm_add_ps(_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask),
_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
vec4f_swizzle1(b.v, 1, 0, 3, 2))));
#endif
}
};
template<> struct ei_conj_helper<Packet2cf, Packet2cf, true,false>
template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
{ return ei_padd(pmul(x,y),c); }
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
#ifdef EIGEN_VECTORIZE_SSE3
return ei_pmul(ei_pconj(a), b);
return internal::pmul(pconj(a), b);
#else
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
return Packet2cf(_mm_add_ps(_mm_mul_ps(ei_vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
_mm_xor_ps(_mm_mul_ps(ei_vec4f_swizzle1(a.v, 1, 1, 3, 3),
ei_vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask)));
return Packet2cf(_mm_add_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask)));
#endif
}
};
template<> struct ei_conj_helper<Packet2cf, Packet2cf, true,true>
template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
{ return ei_padd(pmul(x,y),c); }
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
#ifdef EIGEN_VECTORIZE_SSE3
return ei_pconj(ei_pmul(a, b));
return pconj(internal::pmul(a, b));
#else
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
return Packet2cf(_mm_sub_ps(_mm_xor_ps(_mm_mul_ps(ei_vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask),
_mm_mul_ps(ei_vec4f_swizzle1(a.v, 1, 1, 3, 3),
ei_vec4f_swizzle1(b.v, 1, 0, 3, 2))));
return Packet2cf(_mm_sub_ps(_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask),
_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
vec4f_swizzle1(b.v, 1, 0, 3, 2))));
#endif
}
};
template<> struct ei_conj_helper<Packet4f, Packet2cf, false,false>
template<> struct conj_helper<Packet4f, Packet2cf, false,false>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet4f& x, const Packet2cf& y, const Packet2cf& c) const
{ return ei_padd(c, pmul(x,y)); }
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet4f& x, const Packet2cf& y) const
{ return Packet2cf(ei_pmul(x, y.v)); }
{ return Packet2cf(Eigen::internal::pmul(x, y.v)); }
};
template<> struct ei_conj_helper<Packet2cf, Packet4f, false,false>
template<> struct conj_helper<Packet2cf, Packet4f, false,false>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet4f& y, const Packet2cf& c) const
{ return ei_padd(c, pmul(x,y)); }
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& x, const Packet4f& y) const
{ return Packet2cf(ei_pmul(x.v, y)); }
{ return Packet2cf(Eigen::internal::pmul(x.v, y)); }
};
template<> EIGEN_STRONG_INLINE Packet2cf ei_pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
// TODO optimize it for SSE3 and 4
Packet2cf res = ei_conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
__m128 s = _mm_mul_ps(b.v,b.v);
return Packet2cf(_mm_div_ps(res.v,_mm_add_ps(s,_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(s), 0xb1)))));
}
EIGEN_STRONG_INLINE Packet2cf ei_pcplxflip/*<Packet2cf>*/(const Packet2cf& x)
EIGEN_STRONG_INLINE Packet2cf pcplxflip/*<Packet2cf>*/(const Packet2cf& x)
{
return Packet2cf(ei_vec4f_swizzle1(x.v, 1, 0, 3, 2));
return Packet2cf(vec4f_swizzle1(x.v, 1, 0, 3, 2));
}
@@ -238,7 +255,7 @@ struct Packet1cd
__m128d v;
};
template<> struct ei_packet_traits<std::complex<double> > : ei_default_packet_traits
template<> struct packet_traits<std::complex<double> > : default_packet_traits
{
typedef Packet1cd type;
enum {
@@ -259,77 +276,79 @@ template<> struct ei_packet_traits<std::complex<double> > : ei_default_packet_t
};
};
template<> struct ei_unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1}; };
template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1}; };
template<> EIGEN_STRONG_INLINE Packet1cd ei_padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_add_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd ei_psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_sub_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd ei_pnegate(const Packet1cd& a) { return Packet1cd(ei_pnegate(a.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd ei_pconj(const Packet1cd& a)
template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_add_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_sub_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(a.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a)
{
const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
return Packet1cd(_mm_xor_pd(a.v,mask));
}
template<> EIGEN_STRONG_INLINE Packet1cd ei_pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{
// TODO optimize it for SSE3 and 4
#ifdef EIGEN_VECTORIZE_SSE3
return Packet1cd(_mm_addsub_pd(_mm_mul_pd(ei_vec2d_swizzle1(a.v, 0, 0), b.v),
_mm_mul_pd(ei_vec2d_swizzle1(a.v, 1, 1),
ei_vec2d_swizzle1(b.v, 1, 0))));
return Packet1cd(_mm_addsub_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v),
_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
vec2d_swizzle1(b.v, 1, 0))));
#else
const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));
return Packet1cd(_mm_add_pd(_mm_mul_pd(ei_vec2d_swizzle1(a.v, 0, 0), b.v),
_mm_xor_pd(_mm_mul_pd(ei_vec2d_swizzle1(a.v, 1, 1),
ei_vec2d_swizzle1(b.v, 1, 0)), mask)));
return Packet1cd(_mm_add_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v),
_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
vec2d_swizzle1(b.v, 1, 0)), mask)));
#endif
}
template<> EIGEN_STRONG_INLINE Packet1cd ei_pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_and_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd ei_por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_or_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd ei_pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_xor_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd ei_pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_andnot_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_and_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_or_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_xor_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_andnot_pd(a.v,b.v)); }
// FIXME force unaligned load, this is a temporary fix
template<> EIGEN_STRONG_INLINE Packet1cd ei_pload <Packet1cd>(const std::complex<double>* from)
{ EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(ei_pload<Packet2d>((const double*)from)); }
template<> EIGEN_STRONG_INLINE Packet1cd ei_ploadu<Packet1cd>(const std::complex<double>* from)
{ EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ei_ploadu<Packet2d>((const double*)from)); }
template<> EIGEN_STRONG_INLINE Packet1cd ei_pset1<Packet1cd>(const std::complex<double>& from)
{ /* here we really have to use unaligned loads :( */ return ei_ploadu<Packet1cd>(&from); }
template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from)
{ EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from)
{ EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }
template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }
// FIXME force unaligned store, this is a temporary fix
template<> EIGEN_STRONG_INLINE void ei_pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE ei_pstore((double*)to, from.v); }
template<> EIGEN_STRONG_INLINE void ei_pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE ei_pstoreu((double*)to, from.v); }
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
template<> EIGEN_STRONG_INLINE void ei_prefetch<std::complex<double> >(const std::complex<double> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE std::complex<double> ei_pfirst<Packet1cd>(const Packet1cd& a)
template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
{
EIGEN_ALIGN16 double res[2];
_mm_store_pd(res, a.v);
return std::complex<double>(res[0],res[1]);
}
template<> EIGEN_STRONG_INLINE Packet1cd ei_preverse(const Packet1cd& a) { return a; }
template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
template<> EIGEN_STRONG_INLINE std::complex<double> ei_predux<Packet1cd>(const Packet1cd& a)
template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a)
{
return ei_pfirst(a);
return pfirst(a);
}
template<> EIGEN_STRONG_INLINE Packet1cd ei_preduxp<Packet1cd>(const Packet1cd* vecs)
template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs)
{
return vecs[0];
}
template<> EIGEN_STRONG_INLINE std::complex<double> ei_predux_mul<Packet1cd>(const Packet1cd& a)
template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a)
{
return ei_pfirst(a);
return pfirst(a);
}
template<int Offset>
struct ei_palign_impl<Offset,Packet1cd>
struct palign_impl<Offset,Packet1cd>
{
EIGEN_STRONG_INLINE static void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
{
@@ -338,89 +357,91 @@ struct ei_palign_impl<Offset,Packet1cd>
}
};
template<> struct ei_conj_helper<Packet1cd, Packet1cd, false,true>
template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
{ return ei_padd(pmul(x,y),c); }
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
{
#ifdef EIGEN_VECTORIZE_SSE3
return ei_pmul(a, ei_pconj(b));
return internal::pmul(a, pconj(b));
#else
const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
return Packet1cd(_mm_add_pd(_mm_xor_pd(_mm_mul_pd(ei_vec2d_swizzle1(a.v, 0, 0), b.v), mask),
_mm_mul_pd(ei_vec2d_swizzle1(a.v, 1, 1),
ei_vec2d_swizzle1(b.v, 1, 0))));
return Packet1cd(_mm_add_pd(_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), mask),
_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
vec2d_swizzle1(b.v, 1, 0))));
#endif
}
};
template<> struct ei_conj_helper<Packet1cd, Packet1cd, true,false>
template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
{ return ei_padd(pmul(x,y),c); }
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
{
#ifdef EIGEN_VECTORIZE_SSE3
return ei_pmul(ei_pconj(a), b);
return internal::pmul(pconj(a), b);
#else
const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
return Packet1cd(_mm_add_pd(_mm_mul_pd(ei_vec2d_swizzle1(a.v, 0, 0), b.v),
_mm_xor_pd(_mm_mul_pd(ei_vec2d_swizzle1(a.v, 1, 1),
ei_vec2d_swizzle1(b.v, 1, 0)), mask)));
return Packet1cd(_mm_add_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v),
_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
vec2d_swizzle1(b.v, 1, 0)), mask)));
#endif
}
};
template<> struct ei_conj_helper<Packet1cd, Packet1cd, true,true>
template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
{ return ei_padd(pmul(x,y),c); }
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
{
#ifdef EIGEN_VECTORIZE_SSE3
return ei_pconj(ei_pmul(a, b));
return pconj(internal::pmul(a, b));
#else
const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
return Packet1cd(_mm_sub_pd(_mm_xor_pd(_mm_mul_pd(ei_vec2d_swizzle1(a.v, 0, 0), b.v), mask),
_mm_mul_pd(ei_vec2d_swizzle1(a.v, 1, 1),
ei_vec2d_swizzle1(b.v, 1, 0))));
return Packet1cd(_mm_sub_pd(_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), mask),
_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
vec2d_swizzle1(b.v, 1, 0))));
#endif
}
};
template<> struct ei_conj_helper<Packet2d, Packet1cd, false,false>
template<> struct conj_helper<Packet2d, Packet1cd, false,false>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet2d& x, const Packet1cd& y, const Packet1cd& c) const
{ return ei_padd(c, pmul(x,y)); }
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet2d& x, const Packet1cd& y) const
{ return Packet1cd(ei_pmul(x, y.v)); }
{ return Packet1cd(Eigen::internal::pmul(x, y.v)); }
};
template<> struct ei_conj_helper<Packet1cd, Packet2d, false,false>
template<> struct conj_helper<Packet1cd, Packet2d, false,false>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet2d& y, const Packet1cd& c) const
{ return ei_padd(c, pmul(x,y)); }
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& x, const Packet2d& y) const
{ return Packet1cd(ei_pmul(x.v, y)); }
{ return Packet1cd(Eigen::internal::pmul(x.v, y)); }
};
template<> EIGEN_STRONG_INLINE Packet1cd ei_pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{
// TODO optimize it for SSE3 and 4
Packet1cd res = ei_conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
__m128d s = _mm_mul_pd(b.v,b.v);
return Packet1cd(_mm_div_pd(res.v, _mm_add_pd(s,_mm_shuffle_pd(s, s, 0x1))));
}
EIGEN_STRONG_INLINE Packet1cd ei_pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
{
return Packet1cd(ei_preverse(x.v));
return Packet1cd(preverse(x.v));
}
} // end namespace internal
#endif // EIGEN_COMPLEX_SSE_H

View File

@@ -30,8 +30,10 @@
#ifndef EIGEN_MATH_FUNCTIONS_SSE_H
#define EIGEN_MATH_FUNCTIONS_SSE_H
namespace internal {
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet4f ei_plog<Packet4f>(const Packet4f& _x)
Packet4f plog<Packet4f>(const Packet4f& _x)
{
Packet4f x = _x;
_EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
@@ -64,15 +66,15 @@ Packet4f ei_plog<Packet4f>(const Packet4f& _x)
Packet4f invalid_mask = _mm_cmple_ps(x, _mm_setzero_ps());
x = ei_pmax(x, ei_p4f_min_norm_pos); /* cut off denormalized stuff */
x = pmax(x, p4f_min_norm_pos); /* cut off denormalized stuff */
emm0 = _mm_srli_epi32(_mm_castps_si128(x), 23);
/* keep only the fractional part */
x = _mm_and_ps(x, ei_p4f_inv_mant_mask);
x = _mm_or_ps(x, ei_p4f_half);
x = _mm_and_ps(x, p4f_inv_mant_mask);
x = _mm_or_ps(x, p4f_half);
emm0 = _mm_sub_epi32(emm0, ei_p4i_0x7f);
Packet4f e = ei_padd(_mm_cvtepi32_ps(emm0), ei_p4f_1);
emm0 = _mm_sub_epi32(emm0, p4i_0x7f);
Packet4f e = padd(_mm_cvtepi32_ps(emm0), p4f_1);
/* part2:
if( x < SQRTHF ) {
@@ -80,38 +82,38 @@ Packet4f ei_plog<Packet4f>(const Packet4f& _x)
x = x + x - 1.0;
} else { x = x - 1.0; }
*/
Packet4f mask = _mm_cmplt_ps(x, ei_p4f_cephes_SQRTHF);
Packet4f mask = _mm_cmplt_ps(x, p4f_cephes_SQRTHF);
Packet4f tmp = _mm_and_ps(x, mask);
x = ei_psub(x, ei_p4f_1);
e = ei_psub(e, _mm_and_ps(ei_p4f_1, mask));
x = ei_padd(x, tmp);
x = psub(x, p4f_1);
e = psub(e, _mm_and_ps(p4f_1, mask));
x = padd(x, tmp);
Packet4f x2 = ei_pmul(x,x);
Packet4f x3 = ei_pmul(x2,x);
Packet4f x2 = pmul(x,x);
Packet4f x3 = pmul(x2,x);
Packet4f y, y1, y2;
y = ei_pmadd(ei_p4f_cephes_log_p0, x, ei_p4f_cephes_log_p1);
y1 = ei_pmadd(ei_p4f_cephes_log_p3, x, ei_p4f_cephes_log_p4);
y2 = ei_pmadd(ei_p4f_cephes_log_p6, x, ei_p4f_cephes_log_p7);
y = ei_pmadd(y , x, ei_p4f_cephes_log_p2);
y1 = ei_pmadd(y1, x, ei_p4f_cephes_log_p5);
y2 = ei_pmadd(y2, x, ei_p4f_cephes_log_p8);
y = ei_pmadd(y, x3, y1);
y = ei_pmadd(y, x3, y2);
y = ei_pmul(y, x3);
y = pmadd(p4f_cephes_log_p0, x, p4f_cephes_log_p1);
y1 = pmadd(p4f_cephes_log_p3, x, p4f_cephes_log_p4);
y2 = pmadd(p4f_cephes_log_p6, x, p4f_cephes_log_p7);
y = pmadd(y , x, p4f_cephes_log_p2);
y1 = pmadd(y1, x, p4f_cephes_log_p5);
y2 = pmadd(y2, x, p4f_cephes_log_p8);
y = pmadd(y, x3, y1);
y = pmadd(y, x3, y2);
y = pmul(y, x3);
y1 = ei_pmul(e, ei_p4f_cephes_log_q1);
tmp = ei_pmul(x2, ei_p4f_half);
y = ei_padd(y, y1);
x = ei_psub(x, tmp);
y2 = ei_pmul(e, ei_p4f_cephes_log_q2);
x = ei_padd(x, y);
x = ei_padd(x, y2);
y1 = pmul(e, p4f_cephes_log_q1);
tmp = pmul(x2, p4f_half);
y = padd(y, y1);
x = psub(x, tmp);
y2 = pmul(e, p4f_cephes_log_q2);
x = padd(x, y);
x = padd(x, y2);
return _mm_or_ps(x, invalid_mask); // negative arg will be NAN
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet4f ei_pexp<Packet4f>(const Packet4f& _x)
Packet4f pexp<Packet4f>(const Packet4f& _x)
{
Packet4f x = _x;
_EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
@@ -119,7 +121,7 @@ Packet4f ei_pexp<Packet4f>(const Packet4f& _x)
_EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
_EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647949f);
_EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647950f);
_EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
@@ -137,40 +139,40 @@ Packet4f ei_pexp<Packet4f>(const Packet4f& _x)
Packet4i emm0;
// clamp x
x = ei_pmax(ei_pmin(x, ei_p4f_exp_hi), ei_p4f_exp_lo);
x = pmax(pmin(x, p4f_exp_hi), p4f_exp_lo);
/* express exp(x) as exp(g + n*log(2)) */
fx = ei_pmadd(x, ei_p4f_cephes_LOG2EF, ei_p4f_half);
fx = pmadd(x, p4f_cephes_LOG2EF, p4f_half);
/* how to perform a floorf with SSE: just below */
emm0 = _mm_cvttps_epi32(fx);
tmp = _mm_cvtepi32_ps(emm0);
/* if greater, substract 1 */
Packet4f mask = _mm_cmpgt_ps(tmp, fx);
mask = _mm_and_ps(mask, ei_p4f_1);
fx = ei_psub(tmp, mask);
mask = _mm_and_ps(mask, p4f_1);
fx = psub(tmp, mask);
tmp = ei_pmul(fx, ei_p4f_cephes_exp_C1);
Packet4f z = ei_pmul(fx, ei_p4f_cephes_exp_C2);
x = ei_psub(x, tmp);
x = ei_psub(x, z);
tmp = pmul(fx, p4f_cephes_exp_C1);
Packet4f z = pmul(fx, p4f_cephes_exp_C2);
x = psub(x, tmp);
x = psub(x, z);
z = ei_pmul(x,x);
z = pmul(x,x);
Packet4f y = ei_p4f_cephes_exp_p0;
y = ei_pmadd(y, x, ei_p4f_cephes_exp_p1);
y = ei_pmadd(y, x, ei_p4f_cephes_exp_p2);
y = ei_pmadd(y, x, ei_p4f_cephes_exp_p3);
y = ei_pmadd(y, x, ei_p4f_cephes_exp_p4);
y = ei_pmadd(y, x, ei_p4f_cephes_exp_p5);
y = ei_pmadd(y, z, x);
y = ei_padd(y, ei_p4f_1);
Packet4f y = p4f_cephes_exp_p0;
y = pmadd(y, x, p4f_cephes_exp_p1);
y = pmadd(y, x, p4f_cephes_exp_p2);
y = pmadd(y, x, p4f_cephes_exp_p3);
y = pmadd(y, x, p4f_cephes_exp_p4);
y = pmadd(y, x, p4f_cephes_exp_p5);
y = pmadd(y, z, x);
y = padd(y, p4f_1);
/* build 2^n */
// build 2^n
emm0 = _mm_cvttps_epi32(fx);
emm0 = _mm_add_epi32(emm0, ei_p4i_0x7f);
emm0 = _mm_add_epi32(emm0, p4i_0x7f);
emm0 = _mm_slli_epi32(emm0, 23);
return ei_pmul(y, _mm_castsi128_ps(emm0));
return pmul(y, _mm_castsi128_ps(emm0));
}
/* evaluation of 4 sines at onces, using SSE2 intrinsics.
@@ -186,7 +188,7 @@ Packet4f ei_pexp<Packet4f>(const Packet4f& _x)
*/
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet4f ei_psin<Packet4f>(const Packet4f& _x)
Packet4f psin<Packet4f>(const Packet4f& _x)
{
Packet4f x = _x;
_EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
@@ -215,24 +217,24 @@ Packet4f ei_psin<Packet4f>(const Packet4f& _x)
Packet4i emm0, emm2;
sign_bit = x;
/* take the absolute value */
x = ei_pabs(x);
x = pabs(x);
/* take the modulo */
/* extract the sign bit (upper one) */
sign_bit = _mm_and_ps(sign_bit, ei_p4f_sign_mask);
sign_bit = _mm_and_ps(sign_bit, p4f_sign_mask);
/* scale by 4/Pi */
y = ei_pmul(x, ei_p4f_cephes_FOPI);
y = pmul(x, p4f_cephes_FOPI);
/* store the integer part of y in mm0 */
emm2 = _mm_cvttps_epi32(y);
/* j=(j+1) & (~1) (see the cephes sources) */
emm2 = _mm_add_epi32(emm2, ei_p4i_1);
emm2 = _mm_and_si128(emm2, ei_p4i_not1);
emm2 = _mm_add_epi32(emm2, p4i_1);
emm2 = _mm_and_si128(emm2, p4i_not1);
y = _mm_cvtepi32_ps(emm2);
/* get the swap sign flag */
emm0 = _mm_and_si128(emm2, ei_p4i_4);
emm0 = _mm_and_si128(emm2, p4i_4);
emm0 = _mm_slli_epi32(emm0, 29);
/* get the polynom selection mask
there is one polynom for 0 <= x <= Pi/4
@@ -240,7 +242,7 @@ Packet4f ei_psin<Packet4f>(const Packet4f& _x)
Both branches will be computed.
*/
emm2 = _mm_and_si128(emm2, ei_p4i_2);
emm2 = _mm_and_si128(emm2, p4i_2);
emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
Packet4f swap_sign_bit = _mm_castsi128_ps(emm0);
@@ -249,33 +251,33 @@ Packet4f ei_psin<Packet4f>(const Packet4f& _x)
/* The magic pass: "Extended precision modular arithmetic"
x = ((x - y * DP1) - y * DP2) - y * DP3; */
xmm1 = ei_pmul(y, ei_p4f_minus_cephes_DP1);
xmm2 = ei_pmul(y, ei_p4f_minus_cephes_DP2);
xmm3 = ei_pmul(y, ei_p4f_minus_cephes_DP3);
x = ei_padd(x, xmm1);
x = ei_padd(x, xmm2);
x = ei_padd(x, xmm3);
xmm1 = pmul(y, p4f_minus_cephes_DP1);
xmm2 = pmul(y, p4f_minus_cephes_DP2);
xmm3 = pmul(y, p4f_minus_cephes_DP3);
x = padd(x, xmm1);
x = padd(x, xmm2);
x = padd(x, xmm3);
/* Evaluate the first polynom (0 <= x <= Pi/4) */
y = ei_p4f_coscof_p0;
y = p4f_coscof_p0;
Packet4f z = _mm_mul_ps(x,x);
y = ei_pmadd(y, z, ei_p4f_coscof_p1);
y = ei_pmadd(y, z, ei_p4f_coscof_p2);
y = ei_pmul(y, z);
y = ei_pmul(y, z);
Packet4f tmp = ei_pmul(z, ei_p4f_half);
y = ei_psub(y, tmp);
y = ei_padd(y, ei_p4f_1);
y = pmadd(y, z, p4f_coscof_p1);
y = pmadd(y, z, p4f_coscof_p2);
y = pmul(y, z);
y = pmul(y, z);
Packet4f tmp = pmul(z, p4f_half);
y = psub(y, tmp);
y = padd(y, p4f_1);
/* Evaluate the second polynom (Pi/4 <= x <= 0) */
Packet4f y2 = ei_p4f_sincof_p0;
y2 = ei_pmadd(y2, z, ei_p4f_sincof_p1);
y2 = ei_pmadd(y2, z, ei_p4f_sincof_p2);
y2 = ei_pmul(y2, z);
y2 = ei_pmul(y2, x);
y2 = ei_padd(y2, x);
Packet4f y2 = p4f_sincof_p0;
y2 = pmadd(y2, z, p4f_sincof_p1);
y2 = pmadd(y2, z, p4f_sincof_p2);
y2 = pmul(y2, z);
y2 = pmul(y2, x);
y2 = padd(y2, x);
/* select the correct result from the two polynoms */
y2 = _mm_and_ps(poly_mask, y2);
@@ -285,9 +287,9 @@ Packet4f ei_psin<Packet4f>(const Packet4f& _x)
return _mm_xor_ps(y, sign_bit);
}
/* almost the same as ei_psin */
/* almost the same as psin */
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet4f ei_pcos<Packet4f>(const Packet4f& _x)
Packet4f pcos<Packet4f>(const Packet4f& _x)
{
Packet4f x = _x;
_EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
@@ -312,25 +314,25 @@ Packet4f ei_pcos<Packet4f>(const Packet4f& _x)
Packet4f xmm1, xmm2 = _mm_setzero_ps(), xmm3, y;
Packet4i emm0, emm2;
x = ei_pabs(x);
x = pabs(x);
/* scale by 4/Pi */
y = ei_pmul(x, ei_p4f_cephes_FOPI);
y = pmul(x, p4f_cephes_FOPI);
/* get the integer part of y */
emm2 = _mm_cvttps_epi32(y);
/* j=(j+1) & (~1) (see the cephes sources) */
emm2 = _mm_add_epi32(emm2, ei_p4i_1);
emm2 = _mm_and_si128(emm2, ei_p4i_not1);
emm2 = _mm_add_epi32(emm2, p4i_1);
emm2 = _mm_and_si128(emm2, p4i_not1);
y = _mm_cvtepi32_ps(emm2);
emm2 = _mm_sub_epi32(emm2, ei_p4i_2);
emm2 = _mm_sub_epi32(emm2, p4i_2);
/* get the swap sign flag */
emm0 = _mm_andnot_si128(emm2, ei_p4i_4);
emm0 = _mm_andnot_si128(emm2, p4i_4);
emm0 = _mm_slli_epi32(emm0, 29);
/* get the polynom selection mask */
emm2 = _mm_and_si128(emm2, ei_p4i_2);
emm2 = _mm_and_si128(emm2, p4i_2);
emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
Packet4f sign_bit = _mm_castsi128_ps(emm0);
@@ -338,31 +340,31 @@ Packet4f ei_pcos<Packet4f>(const Packet4f& _x)
/* The magic pass: "Extended precision modular arithmetic"
x = ((x - y * DP1) - y * DP2) - y * DP3; */
xmm1 = ei_pmul(y, ei_p4f_minus_cephes_DP1);
xmm2 = ei_pmul(y, ei_p4f_minus_cephes_DP2);
xmm3 = ei_pmul(y, ei_p4f_minus_cephes_DP3);
x = ei_padd(x, xmm1);
x = ei_padd(x, xmm2);
x = ei_padd(x, xmm3);
xmm1 = pmul(y, p4f_minus_cephes_DP1);
xmm2 = pmul(y, p4f_minus_cephes_DP2);
xmm3 = pmul(y, p4f_minus_cephes_DP3);
x = padd(x, xmm1);
x = padd(x, xmm2);
x = padd(x, xmm3);
/* Evaluate the first polynom (0 <= x <= Pi/4) */
y = ei_p4f_coscof_p0;
Packet4f z = ei_pmul(x,x);
y = p4f_coscof_p0;
Packet4f z = pmul(x,x);
y = ei_pmadd(y,z,ei_p4f_coscof_p1);
y = ei_pmadd(y,z,ei_p4f_coscof_p2);
y = ei_pmul(y, z);
y = ei_pmul(y, z);
Packet4f tmp = _mm_mul_ps(z, ei_p4f_half);
y = ei_psub(y, tmp);
y = ei_padd(y, ei_p4f_1);
y = pmadd(y,z,p4f_coscof_p1);
y = pmadd(y,z,p4f_coscof_p2);
y = pmul(y, z);
y = pmul(y, z);
Packet4f tmp = _mm_mul_ps(z, p4f_half);
y = psub(y, tmp);
y = padd(y, p4f_1);
/* Evaluate the second polynom (Pi/4 <= x <= 0) */
Packet4f y2 = ei_p4f_sincof_p0;
y2 = ei_pmadd(y2, z, ei_p4f_sincof_p1);
y2 = ei_pmadd(y2, z, ei_p4f_sincof_p2);
y2 = ei_pmul(y2, z);
y2 = ei_pmadd(y2, x, x);
Packet4f y2 = p4f_sincof_p0;
y2 = pmadd(y2, z, p4f_sincof_p1);
y2 = pmadd(y2, z, p4f_sincof_p2);
y2 = pmul(y2, z);
y2 = pmadd(y2, x, x);
/* select the correct result from the two polynoms */
y2 = _mm_and_ps(poly_mask, y2);
@@ -376,16 +378,18 @@ Packet4f ei_pcos<Packet4f>(const Packet4f& _x)
// This is based on Quake3's fast inverse square root.
// For detail see here: http://www.beyond3d.com/content/articles/8/
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet4f ei_psqrt<Packet4f>(const Packet4f& _x)
Packet4f psqrt<Packet4f>(const Packet4f& _x)
{
Packet4f half = ei_pmul(_x, ei_pset1<Packet4f>(.5f));
Packet4f half = pmul(_x, pset1<Packet4f>(.5f));
/* select only the inverse sqrt of non-zero inputs */
Packet4f non_zero_mask = _mm_cmpgt_ps(_x, ei_pset1<Packet4f>(std::numeric_limits<float>::epsilon()));
Packet4f non_zero_mask = _mm_cmpgt_ps(_x, pset1<Packet4f>(std::numeric_limits<float>::epsilon()));
Packet4f x = _mm_and_ps(non_zero_mask, _mm_rsqrt_ps(_x));
x = ei_pmul(x, ei_psub(ei_pset1<Packet4f>(1.5f), ei_pmul(half, ei_pmul(x,x))));
return ei_pmul(_x,x);
x = pmul(x, psub(pset1<Packet4f>(1.5f), pmul(half, pmul(x,x))));
return pmul(_x,x);
}
} // end namespace internal
#endif // EIGEN_MATH_FUNCTIONS_SSE_H

View File

@@ -25,6 +25,8 @@
#ifndef EIGEN_PACKET_MATH_SSE_H
#define EIGEN_PACKET_MATH_SSE_H
namespace internal {
#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
#endif
@@ -37,36 +39,36 @@ typedef __m128 Packet4f;
typedef __m128i Packet4i;
typedef __m128d Packet2d;
template<> struct ei_is_arithmetic<__m128> { enum { ret = true }; };
template<> struct ei_is_arithmetic<__m128i> { enum { ret = true }; };
template<> struct ei_is_arithmetic<__m128d> { enum { ret = true }; };
template<> struct is_arithmetic<__m128> { enum { value = true }; };
template<> struct is_arithmetic<__m128i> { enum { value = true }; };
template<> struct is_arithmetic<__m128d> { enum { value = true }; };
#define ei_vec4f_swizzle1(v,p,q,r,s) \
#define vec4f_swizzle1(v,p,q,r,s) \
(_mm_castsi128_ps(_mm_shuffle_epi32( _mm_castps_si128(v), ((s)<<6|(r)<<4|(q)<<2|(p)))))
#define ei_vec4i_swizzle1(v,p,q,r,s) \
#define vec4i_swizzle1(v,p,q,r,s) \
(_mm_shuffle_epi32( v, ((s)<<6|(r)<<4|(q)<<2|(p))))
#define ei_vec2d_swizzle1(v,p,q) \
#define vec2d_swizzle1(v,p,q) \
(_mm_castsi128_pd(_mm_shuffle_epi32( _mm_castpd_si128(v), ((q*2+1)<<6|(q*2)<<4|(p*2+1)<<2|(p*2)))))
#define ei_vec4f_swizzle2(a,b,p,q,r,s) \
#define vec4f_swizzle2(a,b,p,q,r,s) \
(_mm_shuffle_ps( (a), (b), ((s)<<6|(r)<<4|(q)<<2|(p))))
#define ei_vec4i_swizzle2(a,b,p,q,r,s) \
#define vec4i_swizzle2(a,b,p,q,r,s) \
(_mm_castps_si128( (_mm_shuffle_ps( _mm_castsi128_ps(a), _mm_castsi128_ps(b), ((s)<<6|(r)<<4|(q)<<2|(p))))))
#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
const Packet4f ei_p4f_##NAME = ei_pset1<Packet4f>(X)
const Packet4f p4f_##NAME = pset1<Packet4f>(X)
#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
const Packet4f ei_p4f_##NAME = _mm_castsi128_ps(ei_pset1<Packet4i>(X))
const Packet4f p4f_##NAME = _mm_castsi128_ps(pset1<Packet4i>(X))
#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
const Packet4i ei_p4i_##NAME = ei_pset1<Packet4i>(X)
const Packet4i p4i_##NAME = pset1<Packet4i>(X)
template<> struct ei_packet_traits<float> : ei_default_packet_traits
template<> struct packet_traits<float> : default_packet_traits
{
typedef Packet4f type;
enum {
@@ -82,7 +84,7 @@ template<> struct ei_packet_traits<float> : ei_default_packet_traits
HasSqrt = 1
};
};
template<> struct ei_packet_traits<double> : ei_default_packet_traits
template<> struct packet_traits<double> : default_packet_traits
{
typedef Packet2d type;
enum {
@@ -93,7 +95,7 @@ template<> struct ei_packet_traits<double> : ei_default_packet_traits
HasDiv = 1
};
};
template<> struct ei_packet_traits<int> : ei_default_packet_traits
template<> struct packet_traits<int> : default_packet_traits
{
typedef Packet4i type;
enum {
@@ -104,125 +106,124 @@ template<> struct ei_packet_traits<int> : ei_default_packet_traits
};
};
template<> struct ei_unpacket_traits<Packet4f> { typedef float type; enum {size=4}; };
template<> struct ei_unpacket_traits<Packet2d> { typedef double type; enum {size=2}; };
template<> struct ei_unpacket_traits<Packet4i> { typedef int type; enum {size=4}; };
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}; };
template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2}; };
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; };
#ifdef __GNUC__
// Sometimes GCC implements _mm_set1_p* using multiple moves,
// that is inefficient :( (e.g., see ei_gemm_pack_rhs)
template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<Packet4f>(const float& from) {
Packet4f res = _mm_set_ss(from);
return ei_vec4f_swizzle1(res,0,0,0,0);
}
template<> EIGEN_STRONG_INLINE Packet2d ei_pset1<Packet2d>(const double& from) {
// NOTE the SSE3 intrinsic _mm_loaddup_pd is never faster but sometimes much slower
Packet2d res = _mm_set_sd(from);
return ei_vec2d_swizzle1(res, 0, 0);
}
#else
template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<Packet4f>(const float& from) { return _mm_set1_ps(from); }
template<> EIGEN_STRONG_INLINE Packet2d ei_pset1<Packet2d>(const double& from) { return _mm_set1_pd(from); }
#endif
template<> EIGEN_STRONG_INLINE Packet4i ei_pset1<Packet4i>(const int& from) { return _mm_set1_epi32(from); }
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set1_ps(from); }
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set1_pd(from); }
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set1_epi32(from); }
template<> EIGEN_STRONG_INLINE Packet4f ei_plset<float>(const float& a) { return _mm_add_ps(ei_pset1<Packet4f>(a), _mm_set_ps(3,2,1,0)); }
template<> EIGEN_STRONG_INLINE Packet2d ei_plset<double>(const double& a) { return _mm_add_pd(ei_pset1<Packet2d>(a),_mm_set_pd(1,0)); }
template<> EIGEN_STRONG_INLINE Packet4i ei_plset<int>(const int& a) { return _mm_add_epi32(ei_pset1<Packet4i>(a),_mm_set_epi32(3,2,1,0)); }
template<> EIGEN_STRONG_INLINE Packet4f plset<float>(const float& a) { return _mm_add_ps(pset1<Packet4f>(a), _mm_set_ps(3,2,1,0)); }
template<> EIGEN_STRONG_INLINE Packet2d plset<double>(const double& a) { return _mm_add_pd(pset1<Packet2d>(a),_mm_set_pd(1,0)); }
template<> EIGEN_STRONG_INLINE Packet4i plset<int>(const int& a) { return _mm_add_epi32(pset1<Packet4i>(a),_mm_set_epi32(3,2,1,0)); }
template<> EIGEN_STRONG_INLINE Packet4f ei_padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_add_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d ei_padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_add_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_add_epi32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_add_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_add_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_add_epi32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f ei_psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_sub_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d ei_psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_sub_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_sub_epi32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_sub_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_sub_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_sub_epi32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pnegate(const Packet4f& a)
template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a)
{
const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));
return _mm_xor_ps(a,mask);
}
template<> EIGEN_STRONG_INLINE Packet2d ei_pnegate(const Packet2d& a)
template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a)
{
const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x80000000,0x0,0x80000000));
return _mm_xor_pd(a,mask);
}
template<> EIGEN_STRONG_INLINE Packet4i ei_pnegate(const Packet4i& a)
template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a)
{
return ei_psub(_mm_setr_epi32(0,0,0,0), a);
return psub(_mm_setr_epi32(0,0,0,0), a);
}
template<> EIGEN_STRONG_INLINE Packet4f ei_pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_mul_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d ei_pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_mul_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pmul<Packet4i>(const Packet4i& a, const Packet4i& b)
template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_mul_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_mul_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b)
{
#ifdef EIGEN_VECTORIZE_SSE4_1
return _mm_mullo_epi32(a,b);
#else
// this version is slightly faster than 4 scalar products
return ei_vec4i_swizzle1(
ei_vec4i_swizzle2(
return vec4i_swizzle1(
vec4i_swizzle2(
_mm_mul_epu32(a,b),
_mm_mul_epu32(ei_vec4i_swizzle1(a,1,0,3,2),
ei_vec4i_swizzle1(b,1,0,3,2)),
_mm_mul_epu32(vec4i_swizzle1(a,1,0,3,2),
vec4i_swizzle1(b,1,0,3,2)),
0,2,0,2),
0,2,1,3);
#endif
}
template<> EIGEN_STRONG_INLINE Packet4f ei_pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_div_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d ei_pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_div_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pdiv<Packet4i>(const Packet4i& /*a*/, const Packet4i& /*b*/)
{ ei_assert(false && "packet integer division are not supported by SSE");
return ei_pset1<Packet4i>(0);
template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_div_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_div_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, const Packet4i& /*b*/)
{ eigen_assert(false && "packet integer division are not supported by SSE");
return pset1<Packet4i>(0);
}
// for some weird raisons, it has to be overloaded for packet of integers
template<> EIGEN_STRONG_INLINE Packet4i ei_pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return ei_padd(ei_pmul(a,b), c); }
template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd(pmul(a,b), c); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_min_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d ei_pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_min_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pmin<Packet4i>(const Packet4i& a, const Packet4i& b)
template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_min_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_min_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b)
{
// after some bench, this version *is* faster than a scalar implementation
Packet4i mask = _mm_cmplt_epi32(a,b);
return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
}
template<> EIGEN_STRONG_INLINE Packet4f ei_pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_max_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d ei_pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_max_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pmax<Packet4i>(const Packet4i& a, const Packet4i& b)
template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_max_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_max_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b)
{
// after some bench, this version *is* faster than a scalar implementation
Packet4i mask = _mm_cmpgt_epi32(a,b);
return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
}
template<> EIGEN_STRONG_INLINE Packet4f ei_pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_and_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d ei_pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_and_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_and_si128(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_and_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_and_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_and_si128(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f ei_por<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_or_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d ei_por<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_or_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_por<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_or_si128(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_or_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_or_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_or_si128(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pxor<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_xor_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d ei_pxor<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_xor_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_xor_si128(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_xor_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_xor_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_xor_si128(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_andnot_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d ei_pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_andnot_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_andnot_si128(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_andnot_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_andnot_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_andnot_si128(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_ps(from); }
template<> EIGEN_STRONG_INLINE Packet2d ei_pload<Packet2d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_pd(from); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(reinterpret_cast<const Packet4i*>(from)); }
template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_ps(from); }
template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_pd(from); }
template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(reinterpret_cast<const Packet4i*>(from)); }
#if defined(_MSC_VER)
template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu<Packet4f>(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_ps(from); }
template<> EIGEN_STRONG_INLINE Packet2d ei_ploadu<Packet2d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_pd(from); }
template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu<Packet4i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_si128(reinterpret_cast<const Packet4i*>(from)); }
template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) {
EIGEN_DEBUG_UNALIGNED_LOAD
#if (_MSC_VER==1600)
// NOTE Some version of MSVC10 generates bad code when using _mm_loadu_ps
// (i.e., it does not generate an unaligned load!!
// TODO On most architectures this version should also be faster than a single _mm_loadu_ps
// so we could also enable it for MSVC08 but first we have to make this later does not generate crap when doing so...
__m128 res = _mm_loadl_pi(_mm_set1_ps(0.0f), (const __m64*)(from));
res = _mm_loadh_pi(res, (const __m64*)(from+2));
return res;
#else
return _mm_loadu_ps(from);
#endif
}
template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_pd(from); }
template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_si128(reinterpret_cast<const Packet4i*>(from)); }
#else
// Fast unaligned loads. Note that here we cannot directly use intrinsics: this would
// require pointer casting to incompatible pointer types and leads to invalid code
@@ -230,97 +231,133 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_pload<Packet4i>(const int* from)
// a correct instruction dependency.
// TODO: do the same for MSVC (ICC is compatible)
// NOTE: with the code below, MSVC's compiler crashes!
template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu<Packet4f>(const float* from)
#if defined(__GNUC__) && defined(__i386__)
// bug 195: gcc/i386 emits weird x87 fldl/fstpl instructions for _mm_load_sd
#define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 1
#elif defined(__clang__)
// bug 201: Segfaults in __mm_loadh_pd with clang 2.8
#define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 1
#else
#define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 0
#endif
template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
{
EIGEN_DEBUG_UNALIGNED_LOAD
#if EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS
return _mm_loadu_ps(from);
#else
__m128d res;
res = _mm_load_sd((const double*)(from)) ;
res = _mm_loadh_pd(res, (const double*)(from+2)) ;
return _mm_castpd_ps(res);
#endif
}
template<> EIGEN_STRONG_INLINE Packet2d ei_ploadu<Packet2d>(const double* from)
template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from)
{
EIGEN_DEBUG_UNALIGNED_LOAD
#if EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS
return _mm_loadu_pd(from);
#else
__m128d res;
res = _mm_load_sd(from) ;
res = _mm_loadh_pd(res,from+1);
return res;
#endif
}
template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu<Packet4i>(const int* from)
template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
{
EIGEN_DEBUG_UNALIGNED_LOAD
#if EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS
return _mm_loadu_si128(reinterpret_cast<const Packet4i*>(from));
#else
__m128d res;
res = _mm_load_sd((const double*)(from)) ;
res = _mm_loadh_pd(res, (const double*)(from+2)) ;
return _mm_castpd_si128(res);
#endif
}
#endif
template<> EIGEN_STRONG_INLINE Packet4f ei_ploaddup<Packet4f>(const float* from)
template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
{
return ei_vec4f_swizzle1(_mm_castpd_ps(_mm_load_sd((const double*)from)), 0, 0, 1, 1);
return vec4f_swizzle1(_mm_castpd_ps(_mm_load_sd((const double*)from)), 0, 0, 1, 1);
}
template<> EIGEN_STRONG_INLINE Packet2d ei_ploaddup<Packet2d>(const double* from)
{ return ei_pset1<Packet2d>(from[0]); }
template<> EIGEN_STRONG_INLINE Packet4i ei_ploaddup<Packet4i>(const int* from)
template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
{ return pset1<Packet2d>(from[0]); }
template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
{
Packet4i tmp;
tmp = _mm_loadl_epi64(reinterpret_cast<const Packet4i*>(from));
return ei_vec4i_swizzle1(tmp, 0, 0, 1, 1);
return vec4i_swizzle1(tmp, 0, 0, 1, 1);
}
template<> EIGEN_STRONG_INLINE void ei_pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_ps(to, from); }
template<> EIGEN_STRONG_INLINE void ei_pstore<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_pd(to, from); }
template<> EIGEN_STRONG_INLINE void ei_pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<Packet4i*>(to), from); }
template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_ps(to, from); }
template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_pd(to, from); }
template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<Packet4i*>(to), from); }
template<> EIGEN_STRONG_INLINE void ei_pstoreu<double>(double* to, const Packet2d& from) {
template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) {
EIGEN_DEBUG_UNALIGNED_STORE
_mm_storel_pd((to), from);
_mm_storeh_pd((to+1), from);
}
template<> EIGEN_STRONG_INLINE void ei_pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE ei_pstoreu((double*)to, _mm_castps_pd(from)); }
template<> EIGEN_STRONG_INLINE void ei_pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE ei_pstoreu((double*)to, _mm_castsi128_pd(from)); }
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, _mm_castps_pd(from)); }
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, _mm_castsi128_pd(from)); }
template<> EIGEN_STRONG_INLINE void ei_prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void ei_prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void ei_prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
// some compilers might be tempted to perform multiple moves instead of using a vector path.
template<> EIGEN_STRONG_INLINE void pstore1<Packet4f>(float* to, const float& a)
{
Packet4f pa = _mm_set_ss(a);
pstore(to, vec4f_swizzle1(pa,0,0,0,0));
}
// some compilers might be tempted to perform multiple moves instead of using a vector path.
template<> EIGEN_STRONG_INLINE void pstore1<Packet2d>(double* to, const double& a)
{
Packet2d pa = _mm_set_sd(a);
pstore(to, vec2d_swizzle1(pa,0,0));
}
#if defined(_MSC_VER) && (_MSC_VER <= 1500) && defined(_WIN64) && !defined(__INTEL_COMPILER)
// The temporary variable fixes an internal compilation error.
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
#if defined(_MSC_VER) && defined(_WIN64) && !defined(__INTEL_COMPILER)
// The temporary variable fixes an internal compilation error in vs <= 2008 and a wrong-result bug in vs 2010
// Direct of the struct members fixed bug #62.
template<> EIGEN_STRONG_INLINE float ei_pfirst<Packet4f>(const Packet4f& a) { return a.m128_f32[0]; }
template<> EIGEN_STRONG_INLINE double ei_pfirst<Packet2d>(const Packet2d& a) { return a.m128d_f64[0]; }
template<> EIGEN_STRONG_INLINE int ei_pfirst<Packet4i>(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; }
#elif defined(_MSC_VER) && (_MSC_VER <= 1500) && !defined(__INTEL_COMPILER)
// The temporary variable fixes an internal compilation error.
template<> EIGEN_STRONG_INLINE float ei_pfirst<Packet4f>(const Packet4f& a) { float x = _mm_cvtss_f32(a); return x; }
template<> EIGEN_STRONG_INLINE double ei_pfirst<Packet2d>(const Packet2d& a) { double x = _mm_cvtsd_f64(a); return x; }
template<> EIGEN_STRONG_INLINE int ei_pfirst<Packet4i>(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; }
template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { return a.m128_f32[0]; }
template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return a.m128d_f64[0]; }
template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; }
#elif defined(_MSC_VER) && !defined(__INTEL_COMPILER)
// The temporary variable fixes an internal compilation error in vs <= 2008 and a wrong-result bug in vs 2010
template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float x = _mm_cvtss_f32(a); return x; }
template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { double x = _mm_cvtsd_f64(a); return x; }
template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; }
#else
template<> EIGEN_STRONG_INLINE float ei_pfirst<Packet4f>(const Packet4f& a) { return _mm_cvtss_f32(a); }
template<> EIGEN_STRONG_INLINE double ei_pfirst<Packet2d>(const Packet2d& a) { return _mm_cvtsd_f64(a); }
template<> EIGEN_STRONG_INLINE int ei_pfirst<Packet4i>(const Packet4i& a) { return _mm_cvtsi128_si32(a); }
template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { return _mm_cvtss_f32(a); }
template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return _mm_cvtsd_f64(a); }
template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { return _mm_cvtsi128_si32(a); }
#endif
template<> EIGEN_STRONG_INLINE Packet4f ei_preverse(const Packet4f& a)
template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a)
{ return _mm_shuffle_ps(a,a,0x1B); }
template<> EIGEN_STRONG_INLINE Packet2d ei_preverse(const Packet2d& a)
template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a)
{ return _mm_shuffle_pd(a,a,0x1); }
template<> EIGEN_STRONG_INLINE Packet4i ei_preverse(const Packet4i& a)
template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a)
{ return _mm_shuffle_epi32(a,0x1B); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pabs(const Packet4f& a)
template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a)
{
const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF));
return _mm_and_ps(a,mask);
}
template<> EIGEN_STRONG_INLINE Packet2d ei_pabs(const Packet2d& a)
template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a)
{
const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF));
return _mm_and_pd(a,mask);
}
template<> EIGEN_STRONG_INLINE Packet4i ei_pabs(const Packet4i& a)
template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a)
{
#ifdef EIGEN_VECTORIZE_SSSE3
return _mm_abs_epi32(a);
@@ -330,7 +367,7 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_pabs(const Packet4i& a)
#endif
}
EIGEN_STRONG_INLINE void ei_punpackp(Packet4f* vecs)
EIGEN_STRONG_INLINE void punpackp(Packet4f* vecs)
{
vecs[1] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x55));
vecs[2] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0xAA));
@@ -340,47 +377,47 @@ EIGEN_STRONG_INLINE void ei_punpackp(Packet4f* vecs)
#ifdef EIGEN_VECTORIZE_SSE3
// TODO implement SSE2 versions as well as integer versions
template<> EIGEN_STRONG_INLINE Packet4f ei_preduxp<Packet4f>(const Packet4f* vecs)
template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
{
return _mm_hadd_ps(_mm_hadd_ps(vecs[0], vecs[1]),_mm_hadd_ps(vecs[2], vecs[3]));
}
template<> EIGEN_STRONG_INLINE Packet2d ei_preduxp<Packet2d>(const Packet2d* vecs)
template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
{
return _mm_hadd_pd(vecs[0], vecs[1]);
}
// SSSE3 version:
// EIGEN_STRONG_INLINE Packet4i ei_preduxp(const Packet4i* vecs)
// EIGEN_STRONG_INLINE Packet4i preduxp(const Packet4i* vecs)
// {
// return _mm_hadd_epi32(_mm_hadd_epi32(vecs[0], vecs[1]),_mm_hadd_epi32(vecs[2], vecs[3]));
// }
template<> EIGEN_STRONG_INLINE float ei_predux<Packet4f>(const Packet4f& a)
template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
{
Packet4f tmp0 = _mm_hadd_ps(a,a);
return ei_pfirst(_mm_hadd_ps(tmp0, tmp0));
return pfirst(_mm_hadd_ps(tmp0, tmp0));
}
template<> EIGEN_STRONG_INLINE double ei_predux<Packet2d>(const Packet2d& a) { return ei_pfirst(_mm_hadd_pd(a, a)); }
template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) { return pfirst(_mm_hadd_pd(a, a)); }
// SSSE3 version:
// EIGEN_STRONG_INLINE float ei_predux(const Packet4i& a)
// EIGEN_STRONG_INLINE float predux(const Packet4i& a)
// {
// Packet4i tmp0 = _mm_hadd_epi32(a,a);
// return ei_pfirst(_mm_hadd_epi32(tmp0, tmp0));
// return pfirst(_mm_hadd_epi32(tmp0, tmp0));
// }
#else
// SSE2 versions
template<> EIGEN_STRONG_INLINE float ei_predux<Packet4f>(const Packet4f& a)
template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
{
Packet4f tmp = _mm_add_ps(a, _mm_movehl_ps(a,a));
return ei_pfirst(_mm_add_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
return pfirst(_mm_add_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
}
template<> EIGEN_STRONG_INLINE double ei_predux<Packet2d>(const Packet2d& a)
template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
{
return ei_pfirst(_mm_add_sd(a, _mm_unpackhi_pd(a,a)));
return pfirst(_mm_add_sd(a, _mm_unpackhi_pd(a,a)));
}
template<> EIGEN_STRONG_INLINE Packet4f ei_preduxp<Packet4f>(const Packet4f* vecs)
template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
{
Packet4f tmp0, tmp1, tmp2;
tmp0 = _mm_unpacklo_ps(vecs[0], vecs[1]);
@@ -394,19 +431,19 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_preduxp<Packet4f>(const Packet4f* vec
return _mm_add_ps(tmp0, tmp2);
}
template<> EIGEN_STRONG_INLINE Packet2d ei_preduxp<Packet2d>(const Packet2d* vecs)
template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
{
return _mm_add_pd(_mm_unpacklo_pd(vecs[0], vecs[1]), _mm_unpackhi_pd(vecs[0], vecs[1]));
}
#endif // SSE3
template<> EIGEN_STRONG_INLINE int ei_predux<Packet4i>(const Packet4i& a)
template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
{
Packet4i tmp = _mm_add_epi32(a, _mm_unpackhi_epi64(a,a));
return ei_pfirst(tmp) + ei_pfirst(_mm_shuffle_epi32(tmp, 1));
return pfirst(tmp) + pfirst(_mm_shuffle_epi32(tmp, 1));
}
template<> EIGEN_STRONG_INLINE Packet4i ei_preduxp<Packet4i>(const Packet4i* vecs)
template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
{
Packet4i tmp0, tmp1, tmp2;
tmp0 = _mm_unpacklo_epi32(vecs[0], vecs[1]);
@@ -423,69 +460,69 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_preduxp<Packet4i>(const Packet4i* vec
// Other reduction functions:
// mul
template<> EIGEN_STRONG_INLINE float ei_predux_mul<Packet4f>(const Packet4f& a)
template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
{
Packet4f tmp = _mm_mul_ps(a, _mm_movehl_ps(a,a));
return ei_pfirst(_mm_mul_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
return pfirst(_mm_mul_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
}
template<> EIGEN_STRONG_INLINE double ei_predux_mul<Packet2d>(const Packet2d& a)
template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a)
{
return ei_pfirst(_mm_mul_sd(a, _mm_unpackhi_pd(a,a)));
return pfirst(_mm_mul_sd(a, _mm_unpackhi_pd(a,a)));
}
template<> EIGEN_STRONG_INLINE int ei_predux_mul<Packet4i>(const Packet4i& a)
template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
{
// after some experiments, it is seems this is the fastest way to implement it
// for GCC (eg., reusing ei_pmul is very slow !)
// for GCC (eg., reusing pmul is very slow !)
// TODO try to call _mm_mul_epu32 directly
EIGEN_ALIGN16 int aux[4];
ei_pstore(aux, a);
pstore(aux, a);
return (aux[0] * aux[1]) * (aux[2] * aux[3]);;
}
// min
template<> EIGEN_STRONG_INLINE float ei_predux_min<Packet4f>(const Packet4f& a)
template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
{
Packet4f tmp = _mm_min_ps(a, _mm_movehl_ps(a,a));
return ei_pfirst(_mm_min_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
return pfirst(_mm_min_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
}
template<> EIGEN_STRONG_INLINE double ei_predux_min<Packet2d>(const Packet2d& a)
template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)
{
return ei_pfirst(_mm_min_sd(a, _mm_unpackhi_pd(a,a)));
return pfirst(_mm_min_sd(a, _mm_unpackhi_pd(a,a)));
}
template<> EIGEN_STRONG_INLINE int ei_predux_min<Packet4i>(const Packet4i& a)
template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
{
// after some experiments, it is seems this is the fastest way to implement it
// for GCC (eg., it does not like using std::min after the ei_pstore !!)
// for GCC (eg., it does not like using std::min after the pstore !!)
EIGEN_ALIGN16 int aux[4];
ei_pstore(aux, a);
register int aux0 = aux[0]<aux[1] ? aux[0] : aux[1];
register int aux2 = aux[2]<aux[3] ? aux[2] : aux[3];
pstore(aux, a);
int aux0 = aux[0]<aux[1] ? aux[0] : aux[1];
int aux2 = aux[2]<aux[3] ? aux[2] : aux[3];
return aux0<aux2 ? aux0 : aux2;
}
// max
template<> EIGEN_STRONG_INLINE float ei_predux_max<Packet4f>(const Packet4f& a)
template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
{
Packet4f tmp = _mm_max_ps(a, _mm_movehl_ps(a,a));
return ei_pfirst(_mm_max_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
return pfirst(_mm_max_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
}
template<> EIGEN_STRONG_INLINE double ei_predux_max<Packet2d>(const Packet2d& a)
template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
{
return ei_pfirst(_mm_max_sd(a, _mm_unpackhi_pd(a,a)));
return pfirst(_mm_max_sd(a, _mm_unpackhi_pd(a,a)));
}
template<> EIGEN_STRONG_INLINE int ei_predux_max<Packet4i>(const Packet4i& a)
template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
{
// after some experiments, it is seems this is the fastest way to implement it
// for GCC (eg., it does not like using std::min after the ei_pstore !!)
// for GCC (eg., it does not like using std::min after the pstore !!)
EIGEN_ALIGN16 int aux[4];
ei_pstore(aux, a);
register int aux0 = aux[0]>aux[1] ? aux[0] : aux[1];
register int aux2 = aux[2]>aux[3] ? aux[2] : aux[3];
pstore(aux, a);
int aux0 = aux[0]>aux[1] ? aux[0] : aux[1];
int aux2 = aux[2]>aux[3] ? aux[2] : aux[3];
return aux0>aux2 ? aux0 : aux2;
}
#if (defined __GNUC__)
// template <> EIGEN_STRONG_INLINE Packet4f ei_pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c)
// template <> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c)
// {
// Packet4f res = b;
// asm("mulps %[a], %[b] \n\taddps %[c], %[b]" : [b] "+x" (res) : [a] "x" (a), [c] "x" (c));
@@ -502,7 +539,7 @@ template<> EIGEN_STRONG_INLINE int ei_predux_max<Packet4i>(const Packet4i& a)
#ifdef EIGEN_VECTORIZE_SSSE3
// SSSE3 versions
template<int Offset>
struct ei_palign_impl<Offset,Packet4f>
struct palign_impl<Offset,Packet4f>
{
EIGEN_STRONG_INLINE static void run(Packet4f& first, const Packet4f& second)
{
@@ -512,7 +549,7 @@ struct ei_palign_impl<Offset,Packet4f>
};
template<int Offset>
struct ei_palign_impl<Offset,Packet4i>
struct palign_impl<Offset,Packet4i>
{
EIGEN_STRONG_INLINE static void run(Packet4i& first, const Packet4i& second)
{
@@ -522,7 +559,7 @@ struct ei_palign_impl<Offset,Packet4i>
};
template<int Offset>
struct ei_palign_impl<Offset,Packet2d>
struct palign_impl<Offset,Packet2d>
{
EIGEN_STRONG_INLINE static void run(Packet2d& first, const Packet2d& second)
{
@@ -533,7 +570,7 @@ struct ei_palign_impl<Offset,Packet2d>
#else
// SSE2 versions
template<int Offset>
struct ei_palign_impl<Offset,Packet4f>
struct palign_impl<Offset,Packet4f>
{
EIGEN_STRONG_INLINE static void run(Packet4f& first, const Packet4f& second)
{
@@ -556,7 +593,7 @@ struct ei_palign_impl<Offset,Packet4f>
};
template<int Offset>
struct ei_palign_impl<Offset,Packet4i>
struct palign_impl<Offset,Packet4i>
{
EIGEN_STRONG_INLINE static void run(Packet4i& first, const Packet4i& second)
{
@@ -579,7 +616,7 @@ struct ei_palign_impl<Offset,Packet4i>
};
template<int Offset>
struct ei_palign_impl<Offset,Packet2d>
struct palign_impl<Offset,Packet2d>
{
EIGEN_STRONG_INLINE static void run(Packet2d& first, const Packet2d& second)
{
@@ -592,4 +629,6 @@ struct ei_palign_impl<Offset,Packet2d>
};
#endif
} // end namespace internal
#endif // EIGEN_PACKET_MATH_SSE_H

View File

@@ -26,6 +26,8 @@
#ifndef EIGEN_COEFFBASED_PRODUCT_H
#define EIGEN_COEFFBASED_PRODUCT_H
namespace internal {
/*********************************************************************************
* Coefficient based product implementation.
* It is designed for the following use cases:
@@ -40,22 +42,22 @@
*/
template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
struct ei_product_coeff_impl;
struct product_coeff_impl;
template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct ei_product_packet_impl;
struct product_packet_impl;
template<typename LhsNested, typename RhsNested, int NestingFlags>
struct ei_traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
struct traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
{
typedef MatrixXpr XprKind;
typedef typename ei_cleantype<LhsNested>::type _LhsNested;
typedef typename ei_cleantype<RhsNested>::type _RhsNested;
typedef typename ei_scalar_product_traits<typename _LhsNested::Scalar, typename _RhsNested::Scalar>::ReturnType Scalar;
typedef typename ei_promote_storage_type<typename ei_traits<_LhsNested>::StorageKind,
typename ei_traits<_RhsNested>::StorageKind>::ret StorageKind;
typedef typename ei_promote_index_type<typename ei_traits<_LhsNested>::Index,
typename ei_traits<_RhsNested>::Index>::type Index;
typedef typename remove_all<LhsNested>::type _LhsNested;
typedef typename remove_all<RhsNested>::type _RhsNested;
typedef typename scalar_product_traits<typename _LhsNested::Scalar, typename _RhsNested::Scalar>::ReturnType Scalar;
typedef typename promote_storage_type<typename traits<_LhsNested>::StorageKind,
typename traits<_RhsNested>::StorageKind>::ret StorageKind;
typedef typename promote_index_type<typename traits<_LhsNested>::Index,
typename traits<_RhsNested>::Index>::type Index;
enum {
LhsCoeffReadCost = _LhsNested::CoeffReadCost,
@@ -73,18 +75,18 @@ struct ei_traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
LhsRowMajor = LhsFlags & RowMajorBit,
RhsRowMajor = RhsFlags & RowMajorBit,
SameType = ei_is_same_type<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::ret,
SameType = is_same<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::value,
CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit)
&& (ColsAtCompileTime == Dynamic
|| ( (ColsAtCompileTime % ei_packet_traits<Scalar>::size) == 0
|| ( (ColsAtCompileTime % packet_traits<Scalar>::size) == 0
&& (RhsFlags&AlignedBit)
)
),
CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit)
&& (RowsAtCompileTime == Dynamic
|| ( (RowsAtCompileTime % ei_packet_traits<Scalar>::size) == 0
|| ( (RowsAtCompileTime % packet_traits<Scalar>::size) == 0
&& (LhsFlags&AlignedBit)
)
),
@@ -96,6 +98,7 @@ struct ei_traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
| (EvalToRowMajor ? RowMajorBit : 0)
| NestingFlags
| (LhsFlags & RhsFlags & AlignedBit)
// TODO enable vectorization for mixed types
| (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0),
@@ -113,13 +116,15 @@ struct ei_traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
&& (!RhsRowMajor)
&& (LhsFlags & RhsFlags & ActualPacketAccessBit)
&& (LhsFlags & RhsFlags & AlignedBit)
&& (InnerSize % ei_packet_traits<Scalar>::size == 0)
&& (InnerSize % packet_traits<Scalar>::size == 0)
};
};
} // end namespace internal
template<typename LhsNested, typename RhsNested, int NestingFlags>
class CoeffBasedProduct
: ei_no_assignment_operator,
: internal::no_assignment_operator,
public MatrixBase<CoeffBasedProduct<LhsNested, RhsNested, NestingFlags> >
{
public:
@@ -130,19 +135,19 @@ class CoeffBasedProduct
private:
typedef typename ei_traits<CoeffBasedProduct>::_LhsNested _LhsNested;
typedef typename ei_traits<CoeffBasedProduct>::_RhsNested _RhsNested;
typedef typename internal::traits<CoeffBasedProduct>::_LhsNested _LhsNested;
typedef typename internal::traits<CoeffBasedProduct>::_RhsNested _RhsNested;
enum {
PacketSize = ei_packet_traits<Scalar>::size,
InnerSize = ei_traits<CoeffBasedProduct>::InnerSize,
PacketSize = internal::packet_traits<Scalar>::size,
InnerSize = internal::traits<CoeffBasedProduct>::InnerSize,
Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
CanVectorizeInner = ei_traits<CoeffBasedProduct>::CanVectorizeInner
CanVectorizeInner = internal::traits<CoeffBasedProduct>::CanVectorizeInner
};
typedef ei_product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal,
Unroll ? InnerSize-1 : Dynamic,
_LhsNested, _RhsNested, Scalar> ScalarCoeffImpl;
typedef internal::product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal,
Unroll ? InnerSize-1 : Dynamic,
_LhsNested, _RhsNested, Scalar> ScalarCoeffImpl;
typedef CoeffBasedProduct<LhsNested,RhsNested,NestByRefBit> LazyCoeffBasedProductType;
@@ -158,9 +163,9 @@ class CoeffBasedProduct
{
// we don't allow taking products of matrices of different real types, as that wouldn't be vectorizable.
// We still allow to mix T and complex<T>.
EIGEN_STATIC_ASSERT((ei_is_same_type<typename Lhs::RealScalar, typename Rhs::RealScalar>::ret),
EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
ei_assert(lhs.cols() == rhs.rows()
eigen_assert(lhs.cols() == rhs.rows()
&& "invalid matrix product"
&& "if you wanted a coeff-wise or a dot product use the respective explicit functions");
}
@@ -191,9 +196,9 @@ class CoeffBasedProduct
EIGEN_STRONG_INLINE const PacketScalar packet(Index row, Index col) const
{
PacketScalar res;
ei_product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor,
Unroll ? InnerSize-1 : Dynamic,
_LhsNested, _RhsNested, PacketScalar, LoadMode>
internal::product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor,
Unroll ? InnerSize-1 : Dynamic,
_LhsNested, _RhsNested, PacketScalar, LoadMode>
::run(row, col, m_lhs, m_rhs, res);
return res;
}
@@ -208,14 +213,14 @@ class CoeffBasedProduct
const _LhsNested& lhs() const { return m_lhs; }
const _RhsNested& rhs() const { return m_rhs; }
const Diagonal<LazyCoeffBasedProductType,0> diagonal() const
const Diagonal<const LazyCoeffBasedProductType,0> diagonal() const
{ return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
template<int DiagonalIndex>
const Diagonal<LazyCoeffBasedProductType,DiagonalIndex> diagonal() const
const Diagonal<const LazyCoeffBasedProductType,DiagonalIndex> diagonal() const
{ return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
const Diagonal<LazyCoeffBasedProductType,Dynamic> diagonal(Index index) const
const Diagonal<const LazyCoeffBasedProductType,Dynamic> diagonal(Index index) const
{ return reinterpret_cast<const LazyCoeffBasedProductType&>(*this).diagonal(index); }
protected:
@@ -225,10 +230,12 @@ class CoeffBasedProduct
mutable PlainObject m_result;
};
namespace internal {
// here we need to overload the nested rule for products
// such that the nested type is a const reference to a plain matrix
template<typename Lhs, typename Rhs, int N, typename PlainObject>
struct ei_nested<CoeffBasedProduct<Lhs,Rhs,EvalBeforeNestingBit|EvalBeforeAssigningBit>, N, PlainObject>
struct nested<CoeffBasedProduct<Lhs,Rhs,EvalBeforeNestingBit|EvalBeforeAssigningBit>, N, PlainObject>
{
typedef PlainObject const& type;
};
@@ -242,18 +249,18 @@ struct ei_nested<CoeffBasedProduct<Lhs,Rhs,EvalBeforeNestingBit|EvalBeforeAssign
**************************************/
template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
struct ei_product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
struct product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
{
ei_product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res);
product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res);
res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col);
}
};
template<typename Lhs, typename Rhs, typename RetScalar>
struct ei_product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
struct product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
@@ -263,12 +270,12 @@ struct ei_product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
};
template<typename Lhs, typename Rhs, typename RetScalar>
struct ei_product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
struct product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res)
{
ei_assert(lhs.cols()>0 && "you are using a non initialized matrix");
eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
res = lhs.coeff(row, 0) * rhs.coeff(0, col);
for(Index i = 1; i < lhs.cols(); ++i)
res += lhs.coeff(row, i) * rhs.coeff(i, col);
@@ -280,44 +287,44 @@ struct ei_product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
*******************************************/
template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet>
struct ei_product_coeff_vectorized_unroller
struct product_coeff_vectorized_unroller
{
typedef typename Lhs::Index Index;
enum { PacketSize = ei_packet_traits<typename Lhs::Scalar>::size };
enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
{
ei_product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
pres = ei_padd(pres, ei_pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) ));
product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) ));
}
};
template<typename Lhs, typename Rhs, typename Packet>
struct ei_product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet>
struct product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
{
pres = ei_pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col));
pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col));
}
};
template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
struct ei_product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
struct product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
{
typedef typename Lhs::PacketScalar Packet;
typedef typename Lhs::Index Index;
enum { PacketSize = ei_packet_traits<typename Lhs::Scalar>::size };
enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
{
Packet pres;
ei_product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
ei_product_coeff_impl<DefaultTraversal,UnrollingIndex,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, res);
res = ei_predux(pres);
product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
product_coeff_impl<DefaultTraversal,UnrollingIndex,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, res);
res = predux(pres);
}
};
template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int RhsCols = Rhs::ColsAtCompileTime>
struct ei_product_coeff_vectorized_dyn_selector
struct product_coeff_vectorized_dyn_selector
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
@@ -329,7 +336,7 @@ struct ei_product_coeff_vectorized_dyn_selector
// NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower
// NOTE maybe they are now useless since we have a specialization for Block<Matrix>
template<typename Lhs, typename Rhs, int RhsCols>
struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
@@ -339,7 +346,7 @@ struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
};
template<typename Lhs, typename Rhs, int LhsRows>
struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
@@ -349,7 +356,7 @@ struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
};
template<typename Lhs, typename Rhs>
struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
@@ -359,12 +366,12 @@ struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
};
template<typename Lhs, typename Rhs, typename RetScalar>
struct ei_product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar>
struct product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
{
ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, res);
product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, res);
}
};
@@ -373,71 +380,73 @@ struct ei_product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetSca
*******************/
template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct ei_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
struct product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
{
ei_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
res = ei_pmadd(ei_pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
}
};
template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct ei_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
struct product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
{
ei_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
res = ei_pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), ei_pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
}
};
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct ei_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
{
res = ei_pmul(ei_pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
}
};
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct ei_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
{
res = ei_pmul(lhs.template packet<LoadMode>(row, 0), ei_pset1<Packet>(rhs.coeff(0, col)));
res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
}
};
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct ei_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
struct product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
{
ei_assert(lhs.cols()>0 && "you are using a non initialized matrix");
res = ei_pmul(ei_pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
for(Index i = 1; i < lhs.cols(); ++i)
res = ei_pmadd(ei_pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
}
};
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct ei_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
struct product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
{
ei_assert(lhs.cols()>0 && "you are using a non initialized matrix");
res = ei_pmul(lhs.template packet<LoadMode>(row, 0), ei_pset1<Packet>(rhs.coeff(0, col)));
eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
for(Index i = 1; i < lhs.cols(); ++i)
res = ei_pmadd(lhs.template packet<LoadMode>(row, i), ei_pset1<Packet>(rhs.coeff(i, col)), res);
res = pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
}
};
} // end namespace internal
#endif // EIGEN_COEFFBASED_PRODUCT_H

View File

@@ -25,70 +25,50 @@
#ifndef EIGEN_GENERAL_BLOCK_PANEL_H
#define EIGEN_GENERAL_BLOCK_PANEL_H
namespace internal {
template<typename _LhsScalar, typename _RhsScalar, bool _ConjLhs=false, bool _ConjRhs=false>
class ei_gebp_traits;
class gebp_traits;
/** \internal \returns b if a<=0, and returns a otherwise. */
inline std::ptrdiff_t manage_caching_sizes_helper(std::ptrdiff_t a, std::ptrdiff_t b)
{
return a<=0 ? b : a;
}
/** \internal */
inline void ei_manage_caching_sizes(Action action, std::ptrdiff_t* l1=0, std::ptrdiff_t* l2=0)
inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1=0, std::ptrdiff_t* l2=0)
{
static std::ptrdiff_t m_l1CacheSize = 0;
static std::ptrdiff_t m_l2CacheSize = 0;
#ifdef _OPENMP
#pragma omp threadprivate(m_l1CacheSize,m_l2CacheSize)
#endif
if(m_l1CacheSize==0)
{
m_l1CacheSize = ei_queryL1CacheSize();
m_l2CacheSize = ei_queryTopLevelCacheSize();
if(m_l1CacheSize<=0) m_l1CacheSize = 8 * 1024;
if(m_l2CacheSize<=0) m_l2CacheSize = 1 * 1024 * 1024;
m_l1CacheSize = manage_caching_sizes_helper(queryL1CacheSize(),8 * 1024);
m_l2CacheSize = manage_caching_sizes_helper(queryTopLevelCacheSize(),1*1024*1024);
}
if(action==SetAction)
{
// set the cpu cache size and cache all block sizes from a global cache size in byte
ei_internal_assert(l1!=0 && l2!=0);
eigen_internal_assert(l1!=0 && l2!=0);
m_l1CacheSize = *l1;
m_l2CacheSize = *l2;
}
else if(action==GetAction)
{
ei_internal_assert(l1!=0 && l2!=0);
eigen_internal_assert(l1!=0 && l2!=0);
*l1 = m_l1CacheSize;
*l2 = m_l2CacheSize;
}
else
{
ei_internal_assert(false);
eigen_internal_assert(false);
}
}
/** \returns the currently set level 1 cpu cache size (in bytes) used to estimate the ideal blocking size parameters.
* \sa setCpuCacheSize */
inline std::ptrdiff_t l1CacheSize()
{
std::ptrdiff_t l1, l2;
ei_manage_caching_sizes(GetAction, &l1, &l2);
return l1;
}
/** \returns the currently set level 2 cpu cache size (in bytes) used to estimate the ideal blocking size parameters.
* \sa setCpuCacheSize */
inline std::ptrdiff_t l2CacheSize()
{
std::ptrdiff_t l1, l2;
ei_manage_caching_sizes(GetAction, &l1, &l2);
return l2;
}
/** Set the cpu L1 and L2 cache sizes (in bytes).
* These values are use to adjust the size of the blocks
* for the algorithms working per blocks.
*
* \sa computeProductBlockingSizes */
inline void setCpuCacheSizes(std::ptrdiff_t l1, std::ptrdiff_t l2)
{
ei_manage_caching_sizes(SetAction, &l1, &l2);
}
/** \brief Computes the blocking parameters for a m x k times k x n matrix product
*
* \param[in,out] k Input: the third dimension of the product. Output: the blocking size along the same dimension.
@@ -100,13 +80,14 @@ inline void setCpuCacheSizes(std::ptrdiff_t l1, std::ptrdiff_t l2)
* for matrix products and related algorithms. The blocking sizes depends on various
* parameters:
* - the L1 and L2 cache sizes,
* - the register level blocking sizes defined by ei_gebp_traits,
* - the register level blocking sizes defined by gebp_traits,
* - the number of scalars that fit into a packet (when vectorization is enabled).
*
* \sa setCpuCacheSizes */
template<typename LhsScalar, typename RhsScalar, int KcFactor>
void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, std::ptrdiff_t& n)
{
EIGEN_UNUSED_VARIABLE(n);
// Explanations:
// Let's recall the product algorithms form kc x nc horizontal panels B' on the rhs and
// mc x kc blocks A' on the lhs. A' has to fit into L2 cache. Moreover, B' is processed
@@ -116,19 +97,18 @@ void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, std::ptrd
// stay in L1 cache.
std::ptrdiff_t l1, l2;
typedef ei_gebp_traits<LhsScalar,RhsScalar> Traits;
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
enum {
kdiv = KcFactor * 2 * Traits::nr
* Traits::RhsProgress * sizeof(RhsScalar),
mr = ei_gebp_traits<LhsScalar,RhsScalar>::mr,
mr = gebp_traits<LhsScalar,RhsScalar>::mr,
mr_mask = (0xffffffff/mr)*mr
};
ei_manage_caching_sizes(GetAction, &l1, &l2);
manage_caching_sizes(GetAction, &l1, &l2);
k = std::min<std::ptrdiff_t>(k, l1/kdiv);
std::ptrdiff_t _m = k>0 ? l2/(4 * sizeof(LhsScalar) * k) : 0;
if(_m<m) m = _m & mr_mask;
n = n;
}
template<typename LhsScalar, typename RhsScalar>
@@ -143,28 +123,28 @@ inline void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, st
// FIXME (a bit overkill maybe ?)
template<typename CJ, typename A, typename B, typename C, typename T> struct ei_gebp_madd_selector {
EIGEN_STRONG_INLINE EIGEN_ALWAYS_INLINE_ATTRIB static void run(const CJ& cj, A& a, B& b, C& c, T& /*t*/)
template<typename CJ, typename A, typename B, typename C, typename T> struct gebp_madd_selector {
EIGEN_ALWAYS_INLINE static void run(const CJ& cj, A& a, B& b, C& c, T& /*t*/)
{
c = cj.pmadd(a,b,c);
}
};
template<typename CJ, typename T> struct ei_gebp_madd_selector<CJ,T,T,T,T> {
EIGEN_STRONG_INLINE EIGEN_ALWAYS_INLINE_ATTRIB static void run(const CJ& cj, T& a, T& b, T& c, T& t)
template<typename CJ, typename T> struct gebp_madd_selector<CJ,T,T,T,T> {
EIGEN_ALWAYS_INLINE static void run(const CJ& cj, T& a, T& b, T& c, T& t)
{
t = b; t = cj.pmul(a,t); c = ei_padd(c,t);
t = b; t = cj.pmul(a,t); c = padd(c,t);
}
};
template<typename CJ, typename A, typename B, typename C, typename T>
EIGEN_STRONG_INLINE void ei_gebp_madd(const CJ& cj, A& a, B& b, C& c, T& t)
EIGEN_STRONG_INLINE void gebp_madd(const CJ& cj, A& a, B& b, C& c, T& t)
{
ei_gebp_madd_selector<CJ,A,B,C,T>::run(cj,a,b,c,t);
gebp_madd_selector<CJ,A,B,C,T>::run(cj,a,b,c,t);
}
#define MADD(CJ,A,B,C,T) ei_gebp_madd(CJ,A,B,C,T);
// #define MADD(CJ,A,B,C,T) T = B; T = CJ.pmul(A,T); C = ei_padd(C,T);
#define MADD(CJ,A,B,C,T) gebp_madd(CJ,A,B,C,T);
// #define MADD(CJ,A,B,C,T) T = B; T = CJ.pmul(A,T); C = padd(C,T);
#endif
/* Vectorization logic
@@ -178,20 +158,20 @@ inline void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, st
* real*cplx : load lhs as (a0,a0,a1,a1), and mul as usual
*/
template<typename _LhsScalar, typename _RhsScalar, bool _ConjLhs, bool _ConjRhs>
class ei_gebp_traits
class gebp_traits
{
public:
typedef _LhsScalar LhsScalar;
typedef _RhsScalar RhsScalar;
typedef typename ei_scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
enum {
ConjLhs = _ConjLhs,
ConjRhs = _ConjRhs,
Vectorizable = ei_packet_traits<LhsScalar>::Vectorizable && ei_packet_traits<RhsScalar>::Vectorizable,
LhsPacketSize = Vectorizable ? ei_packet_traits<LhsScalar>::size : 1,
RhsPacketSize = Vectorizable ? ei_packet_traits<RhsScalar>::size : 1,
ResPacketSize = Vectorizable ? ei_packet_traits<ResScalar>::size : 1,
Vectorizable = packet_traits<LhsScalar>::Vectorizable && packet_traits<RhsScalar>::Vectorizable,
LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1,
NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
@@ -207,67 +187,67 @@ public:
RhsProgress = RhsPacketSize
};
typedef typename ei_packet_traits<LhsScalar>::type _LhsPacket;
typedef typename ei_packet_traits<RhsScalar>::type _RhsPacket;
typedef typename ei_packet_traits<ResScalar>::type _ResPacket;
typedef typename packet_traits<LhsScalar>::type _LhsPacket;
typedef typename packet_traits<RhsScalar>::type _RhsPacket;
typedef typename packet_traits<ResScalar>::type _ResPacket;
typedef typename ei_meta_if<Vectorizable,_LhsPacket,LhsScalar>::ret LhsPacket;
typedef typename ei_meta_if<Vectorizable,_RhsPacket,RhsScalar>::ret RhsPacket;
typedef typename ei_meta_if<Vectorizable,_ResPacket,ResScalar>::ret ResPacket;
typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
typedef ResPacket AccPacket;
EIGEN_STRONG_INLINE void initAcc(AccPacket& p)
{
p = ei_pset1<ResPacket>(ResScalar(0));
p = pset1<ResPacket>(ResScalar(0));
}
EIGEN_STRONG_INLINE void unpackRhs(DenseIndex n, const RhsScalar* rhs, RhsScalar* b)
{
for(DenseIndex k=0; k<n; k++)
ei_pstore(&b[k*RhsPacketSize], ei_pset1<RhsPacket>(rhs[k]));
pstore1<RhsPacket>(&b[k*RhsPacketSize], rhs[k]);
}
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const
{
dest = ei_pload<RhsPacket>(b);
dest = pload<RhsPacket>(b);
}
EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const
{
dest = ei_pload<LhsPacket>(a);
dest = pload<LhsPacket>(a);
}
EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, AccPacket& tmp) const
{
tmp = b; tmp = ei_pmul(a,tmp); c = ei_padd(c,tmp);
tmp = b; tmp = pmul(a,tmp); c = padd(c,tmp);
}
EIGEN_STRONG_INLINE void acc(const AccPacket& c, const ResPacket& alpha, ResPacket& r) const
{
r = ei_pmadd(c,alpha,r);
r = pmadd(c,alpha,r);
}
protected:
// ei_conj_helper<LhsScalar,RhsScalar,ConjLhs,ConjRhs> cj;
// ei_conj_helper<LhsPacket,RhsPacket,ConjLhs,ConjRhs> pcj;
// conj_helper<LhsScalar,RhsScalar,ConjLhs,ConjRhs> cj;
// conj_helper<LhsPacket,RhsPacket,ConjLhs,ConjRhs> pcj;
};
template<typename RealScalar, bool _ConjLhs>
class ei_gebp_traits<std::complex<RealScalar>, RealScalar, _ConjLhs, false>
class gebp_traits<std::complex<RealScalar>, RealScalar, _ConjLhs, false>
{
public:
typedef std::complex<RealScalar> LhsScalar;
typedef RealScalar RhsScalar;
typedef typename ei_scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
enum {
ConjLhs = _ConjLhs,
ConjRhs = false,
Vectorizable = ei_packet_traits<LhsScalar>::Vectorizable && ei_packet_traits<RhsScalar>::Vectorizable,
LhsPacketSize = Vectorizable ? ei_packet_traits<LhsScalar>::size : 1,
RhsPacketSize = Vectorizable ? ei_packet_traits<RhsScalar>::size : 1,
ResPacketSize = Vectorizable ? ei_packet_traits<ResScalar>::size : 1,
Vectorizable = packet_traits<LhsScalar>::Vectorizable && packet_traits<RhsScalar>::Vectorizable,
LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1,
NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
nr = NumberOfRegisters/4,
@@ -278,48 +258,48 @@ public:
RhsProgress = RhsPacketSize
};
typedef typename ei_packet_traits<LhsScalar>::type _LhsPacket;
typedef typename ei_packet_traits<RhsScalar>::type _RhsPacket;
typedef typename ei_packet_traits<ResScalar>::type _ResPacket;
typedef typename packet_traits<LhsScalar>::type _LhsPacket;
typedef typename packet_traits<RhsScalar>::type _RhsPacket;
typedef typename packet_traits<ResScalar>::type _ResPacket;
typedef typename ei_meta_if<Vectorizable,_LhsPacket,LhsScalar>::ret LhsPacket;
typedef typename ei_meta_if<Vectorizable,_RhsPacket,RhsScalar>::ret RhsPacket;
typedef typename ei_meta_if<Vectorizable,_ResPacket,ResScalar>::ret ResPacket;
typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
typedef ResPacket AccPacket;
EIGEN_STRONG_INLINE void initAcc(AccPacket& p)
{
p = ei_pset1<ResPacket>(ResScalar(0));
p = pset1<ResPacket>(ResScalar(0));
}
EIGEN_STRONG_INLINE void unpackRhs(DenseIndex n, const RhsScalar* rhs, RhsScalar* b)
{
for(DenseIndex k=0; k<n; k++)
ei_pstore(&b[k*RhsPacketSize], ei_pset1<RhsPacket>(rhs[k]));
pstore1<RhsPacket>(&b[k*RhsPacketSize], rhs[k]);
}
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const
{
dest = ei_pload<RhsPacket>(b);
dest = pload<RhsPacket>(b);
}
EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const
{
dest = ei_pload<LhsPacket>(a);
dest = pload<LhsPacket>(a);
}
EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp) const
{
madd_impl(a, b, c, tmp, typename ei_meta_if<Vectorizable,ei_meta_true,ei_meta_false>::ret());
madd_impl(a, b, c, tmp, typename conditional<Vectorizable,true_type,false_type>::type());
}
EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const ei_meta_true&) const
EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const
{
tmp = b; tmp = ei_pmul(a.v,tmp); c.v = ei_padd(c.v,tmp);
tmp = b; tmp = pmul(a.v,tmp); c.v = padd(c.v,tmp);
}
EIGEN_STRONG_INLINE void madd_impl(const LhsScalar& a, const RhsScalar& b, ResScalar& c, RhsScalar& /*tmp*/, const ei_meta_false&) const
EIGEN_STRONG_INLINE void madd_impl(const LhsScalar& a, const RhsScalar& b, ResScalar& c, RhsScalar& /*tmp*/, const false_type&) const
{
c += a * b;
}
@@ -330,11 +310,11 @@ public:
}
protected:
ei_conj_helper<ResPacket,ResPacket,ConjLhs,false> cj;
conj_helper<ResPacket,ResPacket,ConjLhs,false> cj;
};
template<typename RealScalar, bool _ConjLhs, bool _ConjRhs>
class ei_gebp_traits<std::complex<RealScalar>, std::complex<RealScalar>, _ConjLhs, _ConjRhs >
class gebp_traits<std::complex<RealScalar>, std::complex<RealScalar>, _ConjLhs, _ConjRhs >
{
public:
typedef std::complex<RealScalar> Scalar;
@@ -345,10 +325,10 @@ public:
enum {
ConjLhs = _ConjLhs,
ConjRhs = _ConjRhs,
Vectorizable = ei_packet_traits<RealScalar>::Vectorizable
&& ei_packet_traits<Scalar>::Vectorizable,
RealPacketSize = Vectorizable ? ei_packet_traits<RealScalar>::size : 1,
ResPacketSize = Vectorizable ? ei_packet_traits<ResScalar>::size : 1,
Vectorizable = packet_traits<RealScalar>::Vectorizable
&& packet_traits<Scalar>::Vectorizable,
RealPacketSize = Vectorizable ? packet_traits<RealScalar>::size : 1,
ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1,
nr = 2,
mr = 2 * ResPacketSize,
@@ -358,25 +338,25 @@ public:
RhsProgress = Vectorizable ? 2*ResPacketSize : 1
};
typedef typename ei_packet_traits<RealScalar>::type RealPacket;
typedef typename ei_packet_traits<Scalar>::type ScalarPacket;
typedef typename packet_traits<RealScalar>::type RealPacket;
typedef typename packet_traits<Scalar>::type ScalarPacket;
struct DoublePacket
{
RealPacket first;
RealPacket second;
};
typedef typename ei_meta_if<Vectorizable,RealPacket, Scalar>::ret LhsPacket;
typedef typename ei_meta_if<Vectorizable,DoublePacket,Scalar>::ret RhsPacket;
typedef typename ei_meta_if<Vectorizable,ScalarPacket,Scalar>::ret ResPacket;
typedef typename ei_meta_if<Vectorizable,DoublePacket,Scalar>::ret AccPacket;
typedef typename conditional<Vectorizable,RealPacket, Scalar>::type LhsPacket;
typedef typename conditional<Vectorizable,DoublePacket,Scalar>::type RhsPacket;
typedef typename conditional<Vectorizable,ScalarPacket,Scalar>::type ResPacket;
typedef typename conditional<Vectorizable,DoublePacket,Scalar>::type AccPacket;
EIGEN_STRONG_INLINE void initAcc(Scalar& p) { p = Scalar(0); }
EIGEN_STRONG_INLINE void initAcc(DoublePacket& p)
{
p.first = ei_pset1<RealPacket>(RealScalar(0));
p.second = ei_pset1<RealPacket>(RealScalar(0));
p.first = pset1<RealPacket>(RealScalar(0));
p.second = pset1<RealPacket>(RealScalar(0));
}
/* Unpack the rhs coeff such that each complex coefficient is spread into
@@ -389,8 +369,8 @@ public:
{
if(Vectorizable)
{
ei_pstore((RealScalar*)&b[k*ResPacketSize*2+0], ei_pset1<RealPacket>(ei_real(rhs[k])));
ei_pstore((RealScalar*)&b[k*ResPacketSize*2+ResPacketSize], ei_pset1<RealPacket>(ei_imag(rhs[k])));
pstore1<RealPacket>((RealScalar*)&b[k*ResPacketSize*2+0], real(rhs[k]));
pstore1<RealPacket>((RealScalar*)&b[k*ResPacketSize*2+ResPacketSize], imag(rhs[k]));
}
else
b[k] = rhs[k];
@@ -401,20 +381,20 @@ public:
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, DoublePacket& dest) const
{
dest.first = ei_pload<RealPacket>((const RealScalar*)b);
dest.second = ei_pload<RealPacket>((const RealScalar*)(b+ResPacketSize));
dest.first = pload<RealPacket>((const RealScalar*)b);
dest.second = pload<RealPacket>((const RealScalar*)(b+ResPacketSize));
}
// nothing special here
EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const
{
dest = ei_pload<LhsPacket>((const typename ei_unpacket_traits<LhsPacket>::type*)(a));
dest = pload<LhsPacket>((const typename unpacket_traits<LhsPacket>::type*)(a));
}
EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, DoublePacket& c, RhsPacket& /*tmp*/) const
{
c.first = ei_padd(ei_pmul(a,b.first), c.first);
c.second = ei_padd(ei_pmul(a,b.second),c.second);
c.first = padd(pmul(a,b.first), c.first);
c.second = padd(pmul(a,b.second),c.second);
}
EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, ResPacket& c, RhsPacket& /*tmp*/) const
@@ -430,34 +410,34 @@ public:
ResPacket tmp;
if((!ConjLhs)&&(!ConjRhs))
{
tmp = ei_pcplxflip(ei_pconj(ResPacket(c.second)));
tmp = ei_padd(ResPacket(c.first),tmp);
tmp = pcplxflip(pconj(ResPacket(c.second)));
tmp = padd(ResPacket(c.first),tmp);
}
else if((!ConjLhs)&&(ConjRhs))
{
tmp = ei_pconj(ei_pcplxflip(ResPacket(c.second)));
tmp = ei_padd(ResPacket(c.first),tmp);
tmp = pconj(pcplxflip(ResPacket(c.second)));
tmp = padd(ResPacket(c.first),tmp);
}
else if((ConjLhs)&&(!ConjRhs))
{
tmp = ei_pcplxflip(ResPacket(c.second));
tmp = ei_padd(ei_pconj(ResPacket(c.first)),tmp);
tmp = pcplxflip(ResPacket(c.second));
tmp = padd(pconj(ResPacket(c.first)),tmp);
}
else if((ConjLhs)&&(ConjRhs))
{
tmp = ei_pcplxflip(ResPacket(c.second));
tmp = ei_psub(ei_pconj(ResPacket(c.first)),tmp);
tmp = pcplxflip(ResPacket(c.second));
tmp = psub(pconj(ResPacket(c.first)),tmp);
}
r = ei_pmadd(tmp,alpha,r);
r = pmadd(tmp,alpha,r);
}
protected:
ei_conj_helper<LhsScalar,RhsScalar,ConjLhs,ConjRhs> cj;
conj_helper<LhsScalar,RhsScalar,ConjLhs,ConjRhs> cj;
};
template<typename RealScalar, bool _ConjRhs>
class ei_gebp_traits<RealScalar, std::complex<RealScalar>, false, _ConjRhs >
class gebp_traits<RealScalar, std::complex<RealScalar>, false, _ConjRhs >
{
public:
typedef std::complex<RealScalar> Scalar;
@@ -468,11 +448,11 @@ public:
enum {
ConjLhs = false,
ConjRhs = _ConjRhs,
Vectorizable = ei_packet_traits<RealScalar>::Vectorizable
&& ei_packet_traits<Scalar>::Vectorizable,
LhsPacketSize = Vectorizable ? ei_packet_traits<LhsScalar>::size : 1,
RhsPacketSize = Vectorizable ? ei_packet_traits<RhsScalar>::size : 1,
ResPacketSize = Vectorizable ? ei_packet_traits<ResScalar>::size : 1,
Vectorizable = packet_traits<RealScalar>::Vectorizable
&& packet_traits<Scalar>::Vectorizable,
LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1,
NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
nr = 4,
@@ -483,48 +463,48 @@ public:
RhsProgress = ResPacketSize
};
typedef typename ei_packet_traits<LhsScalar>::type _LhsPacket;
typedef typename ei_packet_traits<RhsScalar>::type _RhsPacket;
typedef typename ei_packet_traits<ResScalar>::type _ResPacket;
typedef typename packet_traits<LhsScalar>::type _LhsPacket;
typedef typename packet_traits<RhsScalar>::type _RhsPacket;
typedef typename packet_traits<ResScalar>::type _ResPacket;
typedef typename ei_meta_if<Vectorizable,_LhsPacket,LhsScalar>::ret LhsPacket;
typedef typename ei_meta_if<Vectorizable,_RhsPacket,RhsScalar>::ret RhsPacket;
typedef typename ei_meta_if<Vectorizable,_ResPacket,ResScalar>::ret ResPacket;
typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
typedef ResPacket AccPacket;
EIGEN_STRONG_INLINE void initAcc(AccPacket& p)
{
p = ei_pset1<ResPacket>(ResScalar(0));
p = pset1<ResPacket>(ResScalar(0));
}
EIGEN_STRONG_INLINE void unpackRhs(DenseIndex n, const RhsScalar* rhs, RhsScalar* b)
{
for(DenseIndex k=0; k<n; k++)
ei_pstore(&b[k*RhsPacketSize], ei_pset1<RhsPacket>(rhs[k]));
pstore1<RhsPacket>(&b[k*RhsPacketSize], rhs[k]);
}
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const
{
dest = ei_pload<RhsPacket>(b);
dest = pload<RhsPacket>(b);
}
EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const
{
dest = ei_ploaddup<LhsPacket>(a);
dest = ploaddup<LhsPacket>(a);
}
EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp) const
{
madd_impl(a, b, c, tmp, typename ei_meta_if<Vectorizable,ei_meta_true,ei_meta_false>::ret());
madd_impl(a, b, c, tmp, typename conditional<Vectorizable,true_type,false_type>::type());
}
EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const ei_meta_true&) const
EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const
{
tmp = b; tmp.v = ei_pmul(a,tmp.v); c = ei_padd(c,tmp);
tmp = b; tmp.v = pmul(a,tmp.v); c = padd(c,tmp);
}
EIGEN_STRONG_INLINE void madd_impl(const LhsScalar& a, const RhsScalar& b, ResScalar& c, RhsScalar& /*tmp*/, const ei_meta_false&) const
EIGEN_STRONG_INLINE void madd_impl(const LhsScalar& a, const RhsScalar& b, ResScalar& c, RhsScalar& /*tmp*/, const false_type&) const
{
c += a * b;
}
@@ -535,7 +515,7 @@ public:
}
protected:
ei_conj_helper<ResPacket,ResPacket,false,ConjRhs> cj;
conj_helper<ResPacket,ResPacket,false,ConjRhs> cj;
};
/* optimized GEneral packed Block * packed Panel product kernel
@@ -546,9 +526,9 @@ protected:
* |cplx |real | easy vectorization
*/
template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>
struct ei_gebp_kernel
struct gebp_kernel
{
typedef ei_gebp_traits<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> Traits;
typedef gebp_traits<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> Traits;
typedef typename Traits::ResScalar ResScalar;
typedef typename Traits::LhsPacket LhsPacket;
typedef typename Traits::RhsPacket RhsPacket;
@@ -570,8 +550,8 @@ struct ei_gebp_kernel
if(strideA==-1) strideA = depth;
if(strideB==-1) strideB = depth;
ei_conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;
// ei_conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj;
conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;
// conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj;
Index packet_cols = (cols/nr) * nr;
const Index peeled_mc = (rows/mr)*mr;
// FIXME:
@@ -592,7 +572,7 @@ struct ei_gebp_kernel
for(Index i=0; i<peeled_mc; i+=mr)
{
const LhsScalar* blA = &blockA[i*strideA+offsetA*mr];
ei_prefetch(&blA[0]);
prefetch(&blA[0]);
// gets res block as register
AccPacket C0, C1, C2, C3, C4, C5, C6, C7;
@@ -610,10 +590,10 @@ struct ei_gebp_kernel
ResScalar* r2 = r1 + resStride;
ResScalar* r3 = r2 + resStride;
ei_prefetch(r0+16);
ei_prefetch(r1+16);
ei_prefetch(r2+16);
ei_prefetch(r3+16);
prefetch(r0+16);
prefetch(r1+16);
prefetch(r2+16);
prefetch(r3+16);
// performs "inner" product
// TODO let's check wether the folowing peeled loop could not be
@@ -780,42 +760,64 @@ EIGEN_ASM_COMMENT("mybegin4");
blA += mr;
}
ResPacket R0, R1, R2, R3, R4, R5, R6, R7;
ResPacket alphav = ei_pset1<ResPacket>(alpha);
if(nr==4)
{
ResPacket R0, R1, R2, R3, R4, R5, R6;
ResPacket alphav = pset1<ResPacket>(alpha);
R0 = ei_ploadu<ResPacket>(r0);
R1 = ei_ploadu<ResPacket>(r1);
if(nr==4) R2 = ei_ploadu<ResPacket>(r2);
if(nr==4) R3 = ei_ploadu<ResPacket>(r3);
R4 = ei_ploadu<ResPacket>(r0 + ResPacketSize);
R5 = ei_ploadu<ResPacket>(r1 + ResPacketSize);
if(nr==4) R6 = ei_ploadu<ResPacket>(r2 + ResPacketSize);
if(nr==4) R7 = ei_ploadu<ResPacket>(r3 + ResPacketSize);
R0 = ploadu<ResPacket>(r0);
R1 = ploadu<ResPacket>(r1);
R2 = ploadu<ResPacket>(r2);
R3 = ploadu<ResPacket>(r3);
R4 = ploadu<ResPacket>(r0 + ResPacketSize);
R5 = ploadu<ResPacket>(r1 + ResPacketSize);
R6 = ploadu<ResPacket>(r2 + ResPacketSize);
traits.acc(C0, alphav, R0);
pstoreu(r0, R0);
R0 = ploadu<ResPacket>(r3 + ResPacketSize);
traits.acc(C0, alphav, R0);
traits.acc(C1, alphav, R1);
if(nr==4) traits.acc(C2, alphav, R2);
if(nr==4) traits.acc(C3, alphav, R3);
traits.acc(C4, alphav, R4);
traits.acc(C5, alphav, R5);
if(nr==4) traits.acc(C6, alphav, R6);
if(nr==4) traits.acc(C7, alphav, R7);
traits.acc(C1, alphav, R1);
traits.acc(C2, alphav, R2);
traits.acc(C3, alphav, R3);
traits.acc(C4, alphav, R4);
traits.acc(C5, alphav, R5);
traits.acc(C6, alphav, R6);
traits.acc(C7, alphav, R0);
pstoreu(r1, R1);
pstoreu(r2, R2);
pstoreu(r3, R3);
pstoreu(r0 + ResPacketSize, R4);
pstoreu(r1 + ResPacketSize, R5);
pstoreu(r2 + ResPacketSize, R6);
pstoreu(r3 + ResPacketSize, R0);
}
else
{
ResPacket R0, R1, R4;
ResPacket alphav = pset1<ResPacket>(alpha);
ei_pstoreu(r0, R0);
ei_pstoreu(r1, R1);
if(nr==4) ei_pstoreu(r2, R2);
if(nr==4) ei_pstoreu(r3, R3);
ei_pstoreu(r0 + ResPacketSize, R4);
ei_pstoreu(r1 + ResPacketSize, R5);
if(nr==4) ei_pstoreu(r2 + ResPacketSize, R6);
if(nr==4) ei_pstoreu(r3 + ResPacketSize, R7);
R0 = ploadu<ResPacket>(r0);
R1 = ploadu<ResPacket>(r1);
R4 = ploadu<ResPacket>(r0 + ResPacketSize);
traits.acc(C0, alphav, R0);
pstoreu(r0, R0);
R0 = ploadu<ResPacket>(r1 + ResPacketSize);
traits.acc(C1, alphav, R1);
traits.acc(C4, alphav, R4);
traits.acc(C5, alphav, R0);
pstoreu(r1, R1);
pstoreu(r0 + ResPacketSize, R4);
pstoreu(r1 + ResPacketSize, R0);
}
}
if(rows-peeled_mc>=LhsProgress)
{
Index i = peeled_mc;
const LhsScalar* blA = &blockA[i*strideA+offsetA*LhsProgress];
ei_prefetch(&blA[0]);
prefetch(&blA[0]);
// gets res block as register
AccPacket C0, C1, C2, C3;
@@ -939,32 +941,32 @@ EIGEN_ASM_COMMENT("mybegin4");
}
ResPacket R0, R1, R2, R3;
ResPacket alphav = ei_pset1<ResPacket>(alpha);
ResPacket alphav = pset1<ResPacket>(alpha);
ResScalar* r0 = &res[(j2+0)*resStride + i];
ResScalar* r1 = r0 + resStride;
ResScalar* r2 = r1 + resStride;
ResScalar* r3 = r2 + resStride;
R0 = ei_ploadu<ResPacket>(r0);
R1 = ei_ploadu<ResPacket>(r1);
if(nr==4) R2 = ei_ploadu<ResPacket>(r2);
if(nr==4) R3 = ei_ploadu<ResPacket>(r3);
R0 = ploadu<ResPacket>(r0);
R1 = ploadu<ResPacket>(r1);
if(nr==4) R2 = ploadu<ResPacket>(r2);
if(nr==4) R3 = ploadu<ResPacket>(r3);
traits.acc(C0, alphav, R0);
traits.acc(C1, alphav, R1);
if(nr==4) traits.acc(C2, alphav, R2);
if(nr==4) traits.acc(C3, alphav, R3);
ei_pstoreu(r0, R0);
ei_pstoreu(r1, R1);
if(nr==4) ei_pstoreu(r2, R2);
if(nr==4) ei_pstoreu(r3, R3);
pstoreu(r0, R0);
pstoreu(r1, R1);
if(nr==4) pstoreu(r2, R2);
if(nr==4) pstoreu(r3, R3);
}
for(Index i=peeled_mc2; i<rows; i++)
{
const LhsScalar* blA = &blockA[i*strideA+offsetA];
ei_prefetch(&blA[0]);
prefetch(&blA[0]);
// gets a 1 x nr res block as registers
ResScalar C0(0), C1(0), C2(0), C3(0);
@@ -1013,17 +1015,12 @@ EIGEN_ASM_COMMENT("mybegin4");
for(Index j2=packet_cols; j2<cols; j2++)
{
// unpack B
{
traits.unpackRhs(depth, &blockB[j2*strideB+offsetB], unpackedB);
// const RhsScalar* blB = &blockB[j2*strideB+offsetB];
// for(Index k=0; k<depth; k++)
// ei_pstore(&unpackedB[k*RhsPacketSize], ei_pset1<RhsPacket>(blB[k]));
}
traits.unpackRhs(depth, &blockB[j2*strideB+offsetB], unpackedB);
for(Index i=0; i<peeled_mc; i+=mr)
{
const LhsScalar* blA = &blockA[i*strideA+offsetA*mr];
ei_prefetch(&blA[0]);
prefetch(&blA[0]);
// TODO move the res loads to the stores
@@ -1049,24 +1046,24 @@ EIGEN_ASM_COMMENT("mybegin4");
blA += 2*LhsProgress;
}
ResPacket R0, R4;
ResPacket alphav = ei_pset1<ResPacket>(alpha);
ResPacket alphav = pset1<ResPacket>(alpha);
ResScalar* r0 = &res[(j2+0)*resStride + i];
R0 = ei_ploadu<ResPacket>(r0);
R4 = ei_ploadu<ResPacket>(r0+ResPacketSize);
R0 = ploadu<ResPacket>(r0);
R4 = ploadu<ResPacket>(r0+ResPacketSize);
traits.acc(C0, alphav, R0);
traits.acc(C4, alphav, R4);
ei_pstoreu(r0, R0);
ei_pstoreu(r0+ResPacketSize, R4);
pstoreu(r0, R0);
pstoreu(r0+ResPacketSize, R4);
}
if(rows-peeled_mc>=LhsProgress)
{
Index i = peeled_mc;
const LhsScalar* blA = &blockA[i*strideA+offsetA*LhsProgress];
ei_prefetch(&blA[0]);
prefetch(&blA[0]);
AccPacket C0;
traits.initAcc(C0);
@@ -1083,15 +1080,15 @@ EIGEN_ASM_COMMENT("mybegin4");
blA += LhsProgress;
}
ResPacket alphav = ei_pset1<ResPacket>(alpha);
ResPacket R0 = ei_ploadu<ResPacket>(&res[(j2+0)*resStride + i]);
ResPacket alphav = pset1<ResPacket>(alpha);
ResPacket R0 = ploadu<ResPacket>(&res[(j2+0)*resStride + i]);
traits.acc(C0, alphav, R0);
ei_pstoreu(&res[(j2+0)*resStride + i], R0);
pstoreu(&res[(j2+0)*resStride + i], R0);
}
for(Index i=peeled_mc2; i<rows; i++)
{
const LhsScalar* blA = &blockA[i*strideA+offsetA];
ei_prefetch(&blA[0]);
prefetch(&blA[0]);
// gets a 1 x 1 res block as registers
ResScalar C0(0);
@@ -1126,15 +1123,15 @@ EIGEN_ASM_COMMENT("mybegin4");
// 32 33 34 35 ...
// 36 36 38 39 ...
template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder, bool Conjugate, bool PanelMode>
struct ei_gemm_pack_lhs
struct gemm_pack_lhs
{
void operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, Index lhsStride, Index depth, Index rows,
Index stride=0, Index offset=0)
{
// enum { PacketSize = ei_packet_traits<Scalar>::size };
ei_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
ei_conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
ei_const_blas_data_mapper<Scalar, Index, StorageOrder> lhs(_lhs,lhsStride);
// enum { PacketSize = packet_traits<Scalar>::size };
eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
const_blas_data_mapper<Scalar, Index, StorageOrder> lhs(_lhs,lhsStride);
Index count = 0;
Index peeled_mc = (rows/Pack1)*Pack1;
for(Index i=0; i<peeled_mc; i+=Pack1)
@@ -1172,15 +1169,15 @@ struct ei_gemm_pack_lhs
// 8 9 10 11 20 21 22 23 26 29
// . . . . . . . . . .
template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode>
struct ei_gemm_pack_rhs<Scalar, Index, nr, ColMajor, Conjugate, PanelMode>
struct gemm_pack_rhs<Scalar, Index, nr, ColMajor, Conjugate, PanelMode>
{
typedef typename ei_packet_traits<Scalar>::type Packet;
enum { PacketSize = ei_packet_traits<Scalar>::size };
typedef typename packet_traits<Scalar>::type Packet;
enum { PacketSize = packet_traits<Scalar>::size };
void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols,
Index stride=0, Index offset=0)
{
ei_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
ei_conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
Index packet_cols = (cols/nr) * nr;
Index count = 0;
for(Index j2=0; j2<packet_cols; j2+=nr)
@@ -1220,14 +1217,14 @@ struct ei_gemm_pack_rhs<Scalar, Index, nr, ColMajor, Conjugate, PanelMode>
// this version is optimized for row major matrices
template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode>
struct ei_gemm_pack_rhs<Scalar, Index, nr, RowMajor, Conjugate, PanelMode>
struct gemm_pack_rhs<Scalar, Index, nr, RowMajor, Conjugate, PanelMode>
{
enum { PacketSize = ei_packet_traits<Scalar>::size };
enum { PacketSize = packet_traits<Scalar>::size };
void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols,
Index stride=0, Index offset=0)
{
ei_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
ei_conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
Index packet_cols = (cols/nr) * nr;
Index count = 0;
for(Index j2=0; j2<packet_cols; j2+=nr)
@@ -1261,4 +1258,34 @@ struct ei_gemm_pack_rhs<Scalar, Index, nr, RowMajor, Conjugate, PanelMode>
}
};
} // end namespace internal
/** \returns the currently set level 1 cpu cache size (in bytes) used to estimate the ideal blocking size parameters.
* \sa setCpuCacheSize */
inline std::ptrdiff_t l1CacheSize()
{
std::ptrdiff_t l1, l2;
internal::manage_caching_sizes(GetAction, &l1, &l2);
return l1;
}
/** \returns the currently set level 2 cpu cache size (in bytes) used to estimate the ideal blocking size parameters.
* \sa setCpuCacheSize */
inline std::ptrdiff_t l2CacheSize()
{
std::ptrdiff_t l1, l2;
internal::manage_caching_sizes(GetAction, &l1, &l2);
return l2;
}
/** Set the cpu L1 and L2 cache sizes (in bytes).
* These values are use to adjust the size of the blocks
* for the algorithms working per blocks.
*
* \sa computeProductBlockingSizes */
inline void setCpuCacheSizes(std::ptrdiff_t l1, std::ptrdiff_t l2)
{
internal::manage_caching_sizes(SetAction, &l1, &l2);
}
#endif // EIGEN_GENERAL_BLOCK_PANEL_H

View File

@@ -25,27 +25,29 @@
#ifndef EIGEN_GENERAL_MATRIX_MATRIX_H
#define EIGEN_GENERAL_MATRIX_MATRIX_H
template<typename _LhsScalar, typename _RhsScalar> class ei_level3_blocking;
namespace internal {
template<typename _LhsScalar, typename _RhsScalar> class level3_blocking;
/* Specialization for a row-major destination matrix => simple transposition of the product */
template<
typename Index,
typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs>
struct ei_general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor>
struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor>
{
typedef typename ei_scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
static EIGEN_STRONG_INLINE void run(
Index rows, Index cols, Index depth,
const LhsScalar* lhs, Index lhsStride,
const RhsScalar* rhs, Index rhsStride,
ResScalar* res, Index resStride,
ResScalar alpha,
ei_level3_blocking<RhsScalar,LhsScalar>& blocking,
level3_blocking<RhsScalar,LhsScalar>& blocking,
GemmParallelInfo<Index>* info = 0)
{
// transpose the product such that the result is column major
ei_general_matrix_matrix_product<Index,
general_matrix_matrix_product<Index,
RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs,
LhsScalar, LhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateLhs,
ColMajor>
@@ -59,29 +61,29 @@ template<
typename Index,
typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs>
struct ei_general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor>
struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor>
{
typedef typename ei_scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
static void run(Index rows, Index cols, Index depth,
const LhsScalar* _lhs, Index lhsStride,
const RhsScalar* _rhs, Index rhsStride,
ResScalar* res, Index resStride,
ResScalar alpha,
ei_level3_blocking<LhsScalar,RhsScalar>& blocking,
level3_blocking<LhsScalar,RhsScalar>& blocking,
GemmParallelInfo<Index>* info = 0)
{
ei_const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
ei_const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
typedef ei_gebp_traits<LhsScalar,RhsScalar> Traits;
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
Index kc = blocking.kc(); // cache block size along the K direction
Index mc = std::min(rows,blocking.mc()); // cache block size along the M direction
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
//Index nc = blocking.nc(); // cache block size along the N direction
ei_gemm_pack_lhs<LhsScalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
ei_gemm_pack_rhs<RhsScalar, Index, Traits::nr, RhsStorageOrder> pack_rhs;
ei_gebp_kernel<LhsScalar, RhsScalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp;
gemm_pack_lhs<LhsScalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
gemm_pack_rhs<RhsScalar, Index, Traits::nr, RhsStorageOrder> pack_rhs;
gebp_kernel<LhsScalar, RhsScalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp;
#ifdef EIGEN_HAS_OPENMP
if(info)
@@ -89,17 +91,19 @@ static void run(Index rows, Index cols, Index depth,
// this is the parallel version!
Index tid = omp_get_thread_num();
Index threads = omp_get_num_threads();
LhsScalar* blockA = ei_aligned_stack_new(LhsScalar, kc*mc);
std::size_t sizeA = kc*mc;
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
RhsScalar* w = ei_aligned_stack_new(RhsScalar, sizeW);
ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, 0);
ei_declare_aligned_stack_constructed_variable(RhsScalar, w, sizeW, 0);
RhsScalar* blockB = blocking.blockB();
ei_internal_assert(blockB!=0);
eigen_internal_assert(blockB!=0);
// For each horizontal panel of the rhs, and corresponding vertical panel of the lhs...
for(Index k=0; k<depth; k+=kc)
{
const Index actual_kc = std::min(k+kc,depth)-k; // => rows of B', and cols of the A'
const Index actual_kc = (std::min)(k+kc,depth)-k; // => rows of B', and cols of the A'
// In order to reduce the chance that a thread has to wait for the other,
// let's start by packing A'.
@@ -114,7 +118,7 @@ static void run(Index rows, Index cols, Index depth,
while(info[tid].users!=0) {}
info[tid].users += threads;
pack_rhs(blockB+info[tid].rhs_start*kc, &rhs(k,info[tid].rhs_start), rhsStride, actual_kc, info[tid].rhs_length);
pack_rhs(blockB+info[tid].rhs_start*actual_kc, &rhs(k,info[tid].rhs_start), rhsStride, actual_kc, info[tid].rhs_length);
// Notify the other threads that the part B'_j is ready to go.
info[tid].sync = k;
@@ -130,13 +134,13 @@ static void run(Index rows, Index cols, Index depth,
if(shift>0)
while(info[j].sync!=k) {}
gebp(res+info[j].rhs_start*resStride, resStride, blockA, blockB+info[j].rhs_start*kc, mc, actual_kc, info[j].rhs_length, alpha, -1,-1,0,0, w);
gebp(res+info[j].rhs_start*resStride, resStride, blockA, blockB+info[j].rhs_start*actual_kc, mc, actual_kc, info[j].rhs_length, alpha, -1,-1,0,0, w);
}
// Then keep going as usual with the remaining A'
for(Index i=mc; i<rows; i+=mc)
{
const Index actual_mc = std::min(i+mc,rows)-i;
const Index actual_mc = (std::min)(i+mc,rows)-i;
// pack A_i,k to A'
pack_lhs(blockA, &lhs(i,k), lhsStride, actual_kc, actual_mc);
@@ -151,9 +155,6 @@ static void run(Index rows, Index cols, Index depth,
#pragma omp atomic
--(info[j].users);
}
ei_aligned_stack_delete(LhsScalar, blockA, kc*mc);
ei_aligned_stack_delete(RhsScalar, w, sizeW);
}
else
#endif // EIGEN_HAS_OPENMP
@@ -164,15 +165,16 @@ static void run(Index rows, Index cols, Index depth,
std::size_t sizeA = kc*mc;
std::size_t sizeB = kc*cols;
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
LhsScalar *blockA = blocking.blockA()==0 ? ei_aligned_stack_new(LhsScalar, sizeA) : blocking.blockA();
RhsScalar *blockB = blocking.blockB()==0 ? ei_aligned_stack_new(RhsScalar, sizeB) : blocking.blockB();
RhsScalar *blockW = blocking.blockW()==0 ? ei_aligned_stack_new(RhsScalar, sizeW) : blocking.blockW();
ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, blocking.blockA());
ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, blocking.blockB());
ei_declare_aligned_stack_constructed_variable(RhsScalar, blockW, sizeW, blocking.blockW());
// For each horizontal panel of the rhs, and corresponding panel of the lhs...
// (==GEMM_VAR1)
for(Index k2=0; k2<depth; k2+=kc)
{
const Index actual_kc = std::min(k2+kc,depth)-k2;
const Index actual_kc = (std::min)(k2+kc,depth)-k2;
// OK, here we have selected one horizontal panel of rhs and one vertical panel of lhs.
// => Pack rhs's panel into a sequential chunk of memory (L2 caching)
@@ -185,7 +187,7 @@ static void run(Index rows, Index cols, Index depth,
// (==GEPP_VAR1)
for(Index i2=0; i2<rows; i2+=mc)
{
const Index actual_mc = std::min(i2+mc,rows)-i2;
const Index actual_mc = (std::min)(i2+mc,rows)-i2;
// We pack the lhs's block into a sequential chunk of memory (L1 caching)
// Note that this block will be read a very high number of times, which is equal to the number of
@@ -197,10 +199,6 @@ static void run(Index rows, Index cols, Index depth,
}
}
if(blocking.blockA()==0) ei_aligned_stack_delete(LhsScalar, blockA, kc*mc);
if(blocking.blockB()==0) ei_aligned_stack_delete(RhsScalar, blockB, sizeB);
if(blocking.blockW()==0) ei_aligned_stack_delete(RhsScalar, blockW, sizeW);
}
}
@@ -208,18 +206,18 @@ static void run(Index rows, Index cols, Index depth,
/*********************************************************************************
* Specialization of GeneralProduct<> for "large" GEMM, i.e.,
* implementation of the high level wrapper to ei_general_matrix_matrix_product
* implementation of the high level wrapper to general_matrix_matrix_product
**********************************************************************************/
template<typename Lhs, typename Rhs>
struct ei_traits<GeneralProduct<Lhs,Rhs,GemmProduct> >
: ei_traits<ProductBase<GeneralProduct<Lhs,Rhs,GemmProduct>, Lhs, Rhs> >
struct traits<GeneralProduct<Lhs,Rhs,GemmProduct> >
: traits<ProductBase<GeneralProduct<Lhs,Rhs,GemmProduct>, Lhs, Rhs> >
{};
template<typename Scalar, typename Index, typename Gemm, typename Lhs, typename Rhs, typename Dest, typename BlockingType>
struct ei_gemm_functor
struct gemm_functor
{
ei_gemm_functor(const Lhs& lhs, const Rhs& rhs, Dest& dest, Scalar actualAlpha,
gemm_functor(const Lhs& lhs, const Rhs& rhs, Dest& dest, Scalar actualAlpha,
BlockingType& blocking)
: m_lhs(lhs), m_rhs(rhs), m_dest(dest), m_actualAlpha(actualAlpha), m_blocking(blocking)
{}
@@ -235,8 +233,8 @@ struct ei_gemm_functor
cols = m_rhs.cols();
Gemm::run(rows, cols, m_lhs.cols(),
/*(const Scalar*)*/&(m_lhs.const_cast_derived().coeffRef(row,0)), m_lhs.outerStride(),
/*(const Scalar*)*/&(m_rhs.const_cast_derived().coeffRef(0,col)), m_rhs.outerStride(),
/*(const Scalar*)*/&m_lhs.coeffRef(row,0), m_lhs.outerStride(),
/*(const Scalar*)*/&m_rhs.coeffRef(0,col), m_rhs.outerStride(),
(Scalar*)&(m_dest.coeffRef(row,col)), m_dest.outerStride(),
m_actualAlpha, m_blocking, info);
}
@@ -250,10 +248,10 @@ struct ei_gemm_functor
};
template<int StorageOrder, typename LhsScalar, typename RhsScalar, int MaxRows, int MaxCols, int MaxDepth,
bool FiniteAtCompileTime = MaxRows!=Dynamic && MaxCols!=Dynamic && MaxDepth != Dynamic> class ei_gemm_blocking_space;
bool FiniteAtCompileTime = MaxRows!=Dynamic && MaxCols!=Dynamic && MaxDepth != Dynamic> class gemm_blocking_space;
template<typename _LhsScalar, typename _RhsScalar>
class ei_level3_blocking
class level3_blocking
{
typedef _LhsScalar LhsScalar;
typedef _RhsScalar RhsScalar;
@@ -269,7 +267,7 @@ class ei_level3_blocking
public:
ei_level3_blocking()
level3_blocking()
: m_blockA(0), m_blockB(0), m_blockW(0), m_mc(0), m_nc(0), m_kc(0)
{}
@@ -283,19 +281,19 @@ class ei_level3_blocking
};
template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth>
class ei_gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, true>
: public ei_level3_blocking<
typename ei_meta_if<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::ret,
typename ei_meta_if<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::ret>
class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, true>
: public level3_blocking<
typename conditional<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::type,
typename conditional<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::type>
{
enum {
Transpose = StorageOrder==RowMajor,
ActualRows = Transpose ? MaxCols : MaxRows,
ActualCols = Transpose ? MaxRows : MaxCols
};
typedef typename ei_meta_if<Transpose,_RhsScalar,_LhsScalar>::ret LhsScalar;
typedef typename ei_meta_if<Transpose,_LhsScalar,_RhsScalar>::ret RhsScalar;
typedef ei_gebp_traits<LhsScalar,RhsScalar> Traits;
typedef typename conditional<Transpose,_RhsScalar,_LhsScalar>::type LhsScalar;
typedef typename conditional<Transpose,_LhsScalar,_RhsScalar>::type RhsScalar;
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
enum {
SizeA = ActualRows * MaxDepth,
SizeB = ActualCols * MaxDepth,
@@ -308,7 +306,7 @@ class ei_gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols
public:
ei_gemm_blocking_space(DenseIndex /*rows*/, DenseIndex /*cols*/, DenseIndex /*depth*/)
gemm_blocking_space(DenseIndex /*rows*/, DenseIndex /*cols*/, DenseIndex /*depth*/)
{
this->m_mc = ActualRows;
this->m_nc = ActualCols;
@@ -325,17 +323,17 @@ class ei_gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols
};
template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth>
class ei_gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, false>
: public ei_level3_blocking<
typename ei_meta_if<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::ret,
typename ei_meta_if<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::ret>
class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, false>
: public level3_blocking<
typename conditional<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::type,
typename conditional<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::type>
{
enum {
Transpose = StorageOrder==RowMajor
};
typedef typename ei_meta_if<Transpose,_RhsScalar,_LhsScalar>::ret LhsScalar;
typedef typename ei_meta_if<Transpose,_LhsScalar,_RhsScalar>::ret RhsScalar;
typedef ei_gebp_traits<LhsScalar,RhsScalar> Traits;
typedef typename conditional<Transpose,_RhsScalar,_LhsScalar>::type LhsScalar;
typedef typename conditional<Transpose,_LhsScalar,_RhsScalar>::type RhsScalar;
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
DenseIndex m_sizeA;
DenseIndex m_sizeB;
@@ -343,7 +341,7 @@ class ei_gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols
public:
ei_gemm_blocking_space(DenseIndex rows, DenseIndex cols, DenseIndex depth)
gemm_blocking_space(DenseIndex rows, DenseIndex cols, DenseIndex depth)
{
this->m_mc = Transpose ? cols : rows;
this->m_nc = Transpose ? rows : cols;
@@ -358,19 +356,19 @@ class ei_gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols
void allocateA()
{
if(this->m_blockA==0)
this->m_blockA = ei_aligned_new<LhsScalar>(m_sizeA);
this->m_blockA = aligned_new<LhsScalar>(m_sizeA);
}
void allocateB()
{
if(this->m_blockB==0)
this->m_blockB = ei_aligned_new<RhsScalar>(m_sizeB);
this->m_blockB = aligned_new<RhsScalar>(m_sizeB);
}
void allocateW()
{
if(this->m_blockW==0)
this->m_blockW = ei_aligned_new<RhsScalar>(m_sizeW);
this->m_blockW = aligned_new<RhsScalar>(m_sizeW);
}
void allocateAll()
@@ -380,14 +378,16 @@ class ei_gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols
allocateW();
}
~ei_gemm_blocking_space()
~gemm_blocking_space()
{
ei_aligned_delete(this->m_blockA, m_sizeA);
ei_aligned_delete(this->m_blockB, m_sizeB);
ei_aligned_delete(this->m_blockW, m_sizeW);
aligned_delete(this->m_blockA, m_sizeA);
aligned_delete(this->m_blockB, m_sizeB);
aligned_delete(this->m_blockW, m_sizeW);
}
};
} // end namespace internal
template<typename Lhs, typename Rhs>
class GeneralProduct<Lhs, Rhs, GemmProduct>
: public ProductBase<GeneralProduct<Lhs,Rhs,GemmProduct>, Lhs, Rhs>
@@ -404,13 +404,13 @@ class GeneralProduct<Lhs, Rhs, GemmProduct>
GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
{
typedef ei_scalar_product_op<LhsScalar,RhsScalar> BinOp;
typedef internal::scalar_product_op<LhsScalar,RhsScalar> BinOp;
EIGEN_CHECK_BINARY_COMPATIBILIY(BinOp,LhsScalar,RhsScalar);
}
template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
{
ei_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs);
const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs);
@@ -418,12 +418,12 @@ class GeneralProduct<Lhs, Rhs, GemmProduct>
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
* RhsBlasTraits::extractScalarFactor(m_rhs);
typedef ei_gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,LhsScalar,RhsScalar,
typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,LhsScalar,RhsScalar,
Dest::MaxRowsAtCompileTime,Dest::MaxColsAtCompileTime,MaxDepthAtCompileTime> BlockingType;
typedef ei_gemm_functor<
typedef internal::gemm_functor<
Scalar, Index,
ei_general_matrix_matrix_product<
internal::general_matrix_matrix_product<
Index,
LhsScalar, (_ActualLhsType::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(LhsBlasTraits::NeedToConjugate),
RhsScalar, (_ActualRhsType::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(RhsBlasTraits::NeedToConjugate),
@@ -432,7 +432,7 @@ class GeneralProduct<Lhs, Rhs, GemmProduct>
BlockingType blocking(dst.rows(), dst.cols(), lhs.cols());
ei_parallelize_gemm<(Dest::MaxRowsAtCompileTime>32 || Dest::MaxRowsAtCompileTime==Dynamic)>(GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), this->rows(), this->cols(), Dest::Flags&RowMajorBit);
internal::parallelize_gemm<(Dest::MaxRowsAtCompileTime>32 || Dest::MaxRowsAtCompileTime==Dynamic)>(GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), this->rows(), this->cols(), Dest::Flags&RowMajorBit);
}
};

View File

@@ -0,0 +1,225 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H
#define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H
namespace internal {
/**********************************************************************
* This file implements a general A * B product while
* evaluating only one triangular part of the product.
* This is more general version of self adjoint product (C += A A^T)
* as the level 3 SYRK Blas routine.
**********************************************************************/
// forward declarations (defined at the end of this file)
template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int UpLo>
struct tribb_kernel;
/* Optimized matrix-matrix product evaluating only one triangular half */
template <typename Index,
typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
int ResStorageOrder, int UpLo>
struct general_matrix_matrix_triangular_product;
// as usual if the result is row major => we transpose the product
template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo>
struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor,UpLo>
{
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride,
const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, ResScalar alpha)
{
general_matrix_matrix_triangular_product<Index,
RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs,
LhsScalar, LhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateLhs,
ColMajor, UpLo==Lower?Upper:Lower>
::run(size,depth,rhs,rhsStride,lhs,lhsStride,res,resStride,alpha);
}
};
template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo>
struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo>
{
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride,
const RhsScalar* _rhs, Index rhsStride, ResScalar* res, Index resStride, ResScalar alpha)
{
const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
Index kc = depth; // cache block size along the K direction
Index mc = size; // cache block size along the M direction
Index nc = size; // cache block size along the N direction
computeProductBlockingSizes<LhsScalar,RhsScalar>(kc, mc, nc);
// !!! mc must be a multiple of nr:
if(mc > Traits::nr)
mc = (mc/Traits::nr)*Traits::nr;
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
std::size_t sizeB = sizeW + kc*size;
ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, kc*mc, 0);
ei_declare_aligned_stack_constructed_variable(RhsScalar, allocatedBlockB, sizeB, 0);
RhsScalar* blockB = allocatedBlockB + sizeW;
gemm_pack_lhs<LhsScalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
gemm_pack_rhs<RhsScalar, Index, Traits::nr, RhsStorageOrder> pack_rhs;
gebp_kernel <LhsScalar, RhsScalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp;
tribb_kernel<LhsScalar, RhsScalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs, UpLo> sybb;
for(Index k2=0; k2<depth; k2+=kc)
{
const Index actual_kc = (std::min)(k2+kc,depth)-k2;
// note that the actual rhs is the transpose/adjoint of mat
pack_rhs(blockB, &rhs(k2,0), rhsStride, actual_kc, size);
for(Index i2=0; i2<size; i2+=mc)
{
const Index actual_mc = (std::min)(i2+mc,size)-i2;
pack_lhs(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);
// the selected actual_mc * size panel of res is split into three different part:
// 1 - before the diagonal => processed with gebp or skipped
// 2 - the actual_mc x actual_mc symmetric block => processed with a special kernel
// 3 - after the diagonal => processed with gebp or skipped
if (UpLo==Lower)
gebp(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, (std::min)(size,i2), alpha,
-1, -1, 0, 0, allocatedBlockB);
sybb(res+resStride*i2 + i2, resStride, blockA, blockB + actual_kc*i2, actual_mc, actual_kc, alpha, allocatedBlockB);
if (UpLo==Upper)
{
Index j2 = i2+actual_mc;
gebp(res+resStride*j2+i2, resStride, blockA, blockB+actual_kc*j2, actual_mc, actual_kc, (std::max)(Index(0), size-j2), alpha,
-1, -1, 0, 0, allocatedBlockB);
}
}
}
}
};
// Optimized packed Block * packed Block product kernel evaluating only one given triangular part
// This kernel is built on top of the gebp kernel:
// - the current destination block is processed per panel of actual_mc x BlockSize
// where BlockSize is set to the minimal value allowing gebp to be as fast as possible
// - then, as usual, each panel is split into three parts along the diagonal,
// the sub blocks above and below the diagonal are processed as usual,
// while the triangular block overlapping the diagonal is evaluated into a
// small temporary buffer which is then accumulated into the result using a
// triangular traversal.
template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int UpLo>
struct tribb_kernel
{
typedef gebp_traits<LhsScalar,RhsScalar,ConjLhs,ConjRhs> Traits;
typedef typename Traits::ResScalar ResScalar;
enum {
BlockSize = EIGEN_PLAIN_ENUM_MAX(mr,nr)
};
void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, ResScalar alpha, RhsScalar* workspace)
{
gebp_kernel<LhsScalar, RhsScalar, Index, mr, nr, ConjLhs, ConjRhs> gebp_kernel;
Matrix<ResScalar,BlockSize,BlockSize,ColMajor> buffer;
// let's process the block per panel of actual_mc x BlockSize,
// again, each is split into three parts, etc.
for (Index j=0; j<size; j+=BlockSize)
{
Index actualBlockSize = std::min<Index>(BlockSize,size - j);
const RhsScalar* actual_b = blockB+j*depth;
if(UpLo==Upper)
gebp_kernel(res+j*resStride, resStride, blockA, actual_b, j, depth, actualBlockSize, alpha,
-1, -1, 0, 0, workspace);
// selfadjoint micro block
{
Index i = j;
buffer.setZero();
// 1 - apply the kernel on the temporary buffer
gebp_kernel(buffer.data(), BlockSize, blockA+depth*i, actual_b, actualBlockSize, depth, actualBlockSize, alpha,
-1, -1, 0, 0, workspace);
// 2 - triangular accumulation
for(Index j1=0; j1<actualBlockSize; ++j1)
{
ResScalar* r = res + (j+j1)*resStride + i;
for(Index i1=UpLo==Lower ? j1 : 0;
UpLo==Lower ? i1<actualBlockSize : i1<=j1; ++i1)
r[i1] += buffer(i1,j1);
}
}
if(UpLo==Lower)
{
Index i = j+actualBlockSize;
gebp_kernel(res+j*resStride+i, resStride, blockA+depth*i, actual_b, size-i, depth, actualBlockSize, alpha,
-1, -1, 0, 0, workspace);
}
}
}
};
} // end namespace internal
// high level API
template<typename MatrixType, unsigned int UpLo>
template<typename ProductDerived, typename _Lhs, typename _Rhs>
TriangularView<MatrixType,UpLo>& TriangularView<MatrixType,UpLo>::assignProduct(const ProductBase<ProductDerived, _Lhs,_Rhs>& prod, const Scalar& alpha)
{
typedef typename internal::remove_all<typename ProductDerived::LhsNested>::type Lhs;
typedef internal::blas_traits<Lhs> LhsBlasTraits;
typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs;
typedef typename internal::remove_all<ActualLhs>::type _ActualLhs;
const ActualLhs actualLhs = LhsBlasTraits::extract(prod.lhs());
typedef typename internal::remove_all<typename ProductDerived::RhsNested>::type Rhs;
typedef internal::blas_traits<Rhs> RhsBlasTraits;
typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs;
typedef typename internal::remove_all<ActualRhs>::type _ActualRhs;
const ActualRhs actualRhs = RhsBlasTraits::extract(prod.rhs());
typename ProductDerived::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());
internal::general_matrix_matrix_triangular_product<Index,
typename Lhs::Scalar, _ActualLhs::Flags&RowMajorBit ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
typename Rhs::Scalar, _ActualRhs::Flags&RowMajorBit ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor, UpLo>
::run(m_matrix.cols(), actualLhs.cols(),
&actualLhs.coeffRef(0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,0), actualRhs.outerStride(),
const_cast<Scalar*>(m_matrix.data()), m_matrix.outerStride(), actualAlpha);
return *this;
}
#endif // EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H

View File

@@ -25,6 +25,8 @@
#ifndef EIGEN_GENERAL_MATRIX_VECTOR_H
#define EIGEN_GENERAL_MATRIX_VECTOR_H
namespace internal {
/* Optimized col-major matrix * vector product:
* This algorithm processes 4 columns at onces that allows to both reduce
* the number of load/stores of the result by a factor 4 and to reduce
@@ -39,25 +41,25 @@
* |cplx |real |real | optimal case, vectorization possible via real-cplx mul
*/
template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
struct ei_general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjugateLhs,RhsScalar,ConjugateRhs>
struct general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjugateLhs,RhsScalar,ConjugateRhs>
{
typedef typename ei_scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
enum {
Vectorizable = ei_packet_traits<LhsScalar>::Vectorizable && ei_packet_traits<RhsScalar>::Vectorizable
&& int(ei_packet_traits<LhsScalar>::size)==int(ei_packet_traits<RhsScalar>::size),
LhsPacketSize = Vectorizable ? ei_packet_traits<LhsScalar>::size : 1,
RhsPacketSize = Vectorizable ? ei_packet_traits<RhsScalar>::size : 1,
ResPacketSize = Vectorizable ? ei_packet_traits<ResScalar>::size : 1
Vectorizable = packet_traits<LhsScalar>::Vectorizable && packet_traits<RhsScalar>::Vectorizable
&& int(packet_traits<LhsScalar>::size)==int(packet_traits<RhsScalar>::size),
LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1
};
typedef typename ei_packet_traits<LhsScalar>::type _LhsPacket;
typedef typename ei_packet_traits<RhsScalar>::type _RhsPacket;
typedef typename ei_packet_traits<ResScalar>::type _ResPacket;
typedef typename packet_traits<LhsScalar>::type _LhsPacket;
typedef typename packet_traits<RhsScalar>::type _RhsPacket;
typedef typename packet_traits<ResScalar>::type _ResPacket;
typedef typename ei_meta_if<Vectorizable,_LhsPacket,LhsScalar>::ret LhsPacket;
typedef typename ei_meta_if<Vectorizable,_RhsPacket,RhsScalar>::ret RhsPacket;
typedef typename ei_meta_if<Vectorizable,_ResPacket,ResScalar>::ret ResPacket;
typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
EIGEN_DONT_INLINE static void run(
Index rows, Index cols,
@@ -69,23 +71,23 @@ EIGEN_DONT_INLINE static void run(
#endif
, RhsScalar alpha)
{
ei_internal_assert(resIncr==1);
eigen_internal_assert(resIncr==1);
#ifdef _EIGEN_ACCUMULATE_PACKETS
#error _EIGEN_ACCUMULATE_PACKETS has already been defined
#endif
#define _EIGEN_ACCUMULATE_PACKETS(A0,A13,A2) \
ei_pstore(&res[j], \
ei_padd(ei_pload<ResPacket>(&res[j]), \
ei_padd( \
ei_padd(pcj.pmul(EIGEN_CAT(ei_ploa , A0)<LhsPacket>(&lhs0[j]), ptmp0), \
pcj.pmul(EIGEN_CAT(ei_ploa , A13)<LhsPacket>(&lhs1[j]), ptmp1)), \
ei_padd(pcj.pmul(EIGEN_CAT(ei_ploa , A2)<LhsPacket>(&lhs2[j]), ptmp2), \
pcj.pmul(EIGEN_CAT(ei_ploa , A13)<LhsPacket>(&lhs3[j]), ptmp3)) )))
pstore(&res[j], \
padd(pload<ResPacket>(&res[j]), \
padd( \
padd(pcj.pmul(EIGEN_CAT(ploa , A0)<LhsPacket>(&lhs0[j]), ptmp0), \
pcj.pmul(EIGEN_CAT(ploa , A13)<LhsPacket>(&lhs1[j]), ptmp1)), \
padd(pcj.pmul(EIGEN_CAT(ploa , A2)<LhsPacket>(&lhs2[j]), ptmp2), \
pcj.pmul(EIGEN_CAT(ploa , A13)<LhsPacket>(&lhs3[j]), ptmp3)) )))
ei_conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;
ei_conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj;
conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;
conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj;
if(ConjugateRhs)
alpha = ei_conj(alpha);
alpha = conj(alpha);
enum { AllAligned = 0, EvenAligned, FirstAligned, NoneAligned };
const Index columnsAtOnce = 4;
@@ -97,7 +99,7 @@ EIGEN_DONT_INLINE static void run(
// How many coeffs of the result do we have to skip to be aligned.
// Here we assume data are at least aligned on the base scalar type.
Index alignedStart = ei_first_aligned(res,size);
Index alignedStart = first_aligned(res,size);
Index alignedSize = ResPacketSize>1 ? alignedStart + ((size-alignedStart) & ~ResPacketAlignedMask) : 0;
const Index peeledSize = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart;
@@ -107,7 +109,7 @@ EIGEN_DONT_INLINE static void run(
: FirstAligned;
// we cannot assume the first element is aligned because of sub-matrices
const Index lhsAlignmentOffset = ei_first_aligned(lhs,size);
const Index lhsAlignmentOffset = first_aligned(lhs,size);
// find how many columns do we have to skip to be aligned with the result (if possible)
Index skipColumns = 0;
@@ -119,7 +121,7 @@ EIGEN_DONT_INLINE static void run(
}
else if (LhsPacketSize>1)
{
ei_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || size<LhsPacketSize);
eigen_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || size<LhsPacketSize);
while (skipColumns<LhsPacketSize &&
alignedStart != ((lhsAlignmentOffset + alignmentStep*skipColumns)%LhsPacketSize))
@@ -132,11 +134,11 @@ EIGEN_DONT_INLINE static void run(
}
else
{
skipColumns = std::min(skipColumns,cols);
skipColumns = (std::min)(skipColumns,cols);
// note that the skiped columns are processed later.
}
ei_internal_assert( (alignmentPattern==NoneAligned)
eigen_internal_assert( (alignmentPattern==NoneAligned)
|| (skipColumns + columnsAtOnce >= cols)
|| LhsPacketSize > size
|| (size_t(lhs+alignedStart+lhsStride*skipColumns)%sizeof(LhsPacket))==0);
@@ -154,10 +156,10 @@ EIGEN_DONT_INLINE static void run(
Index columnBound = ((cols-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns;
for (Index i=skipColumns; i<columnBound; i+=columnsAtOnce)
{
RhsPacket ptmp0 = ei_pset1<RhsPacket>(alpha*rhs[i*rhsIncr]),
ptmp1 = ei_pset1<RhsPacket>(alpha*rhs[(i+offset1)*rhsIncr]),
ptmp2 = ei_pset1<RhsPacket>(alpha*rhs[(i+2)*rhsIncr]),
ptmp3 = ei_pset1<RhsPacket>(alpha*rhs[(i+offset3)*rhsIncr]);
RhsPacket ptmp0 = pset1<RhsPacket>(alpha*rhs[i*rhsIncr]),
ptmp1 = pset1<RhsPacket>(alpha*rhs[(i+offset1)*rhsIncr]),
ptmp2 = pset1<RhsPacket>(alpha*rhs[(i+2)*rhsIncr]),
ptmp3 = pset1<RhsPacket>(alpha*rhs[(i+offset3)*rhsIncr]);
// this helps a lot generating better binary code
const LhsScalar *lhs0 = lhs + i*lhsStride, *lhs1 = lhs + (i+offset1)*lhsStride,
@@ -169,10 +171,10 @@ EIGEN_DONT_INLINE static void run(
// process initial unaligned coeffs
for (Index j=0; j<alignedStart; ++j)
{
res[j] = cj.pmadd(lhs0[j], ei_pfirst(ptmp0), res[j]);
res[j] = cj.pmadd(lhs1[j], ei_pfirst(ptmp1), res[j]);
res[j] = cj.pmadd(lhs2[j], ei_pfirst(ptmp2), res[j]);
res[j] = cj.pmadd(lhs3[j], ei_pfirst(ptmp3), res[j]);
res[j] = cj.pmadd(lhs0[j], pfirst(ptmp0), res[j]);
res[j] = cj.pmadd(lhs1[j], pfirst(ptmp1), res[j]);
res[j] = cj.pmadd(lhs2[j], pfirst(ptmp2), res[j]);
res[j] = cj.pmadd(lhs3[j], pfirst(ptmp3), res[j]);
}
if (alignedSize>alignedStart)
@@ -193,32 +195,32 @@ EIGEN_DONT_INLINE static void run(
LhsPacket A00, A01, A02, A03, A10, A11, A12, A13;
ResPacket T0, T1;
A01 = ei_pload<LhsPacket>(&lhs1[alignedStart-1]);
A02 = ei_pload<LhsPacket>(&lhs2[alignedStart-2]);
A03 = ei_pload<LhsPacket>(&lhs3[alignedStart-3]);
A01 = pload<LhsPacket>(&lhs1[alignedStart-1]);
A02 = pload<LhsPacket>(&lhs2[alignedStart-2]);
A03 = pload<LhsPacket>(&lhs3[alignedStart-3]);
for (Index j = alignedStart; j<peeledSize; j+=peels*ResPacketSize)
{
A11 = ei_pload<LhsPacket>(&lhs1[j-1+LhsPacketSize]); ei_palign<1>(A01,A11);
A12 = ei_pload<LhsPacket>(&lhs2[j-2+LhsPacketSize]); ei_palign<2>(A02,A12);
A13 = ei_pload<LhsPacket>(&lhs3[j-3+LhsPacketSize]); ei_palign<3>(A03,A13);
A11 = pload<LhsPacket>(&lhs1[j-1+LhsPacketSize]); palign<1>(A01,A11);
A12 = pload<LhsPacket>(&lhs2[j-2+LhsPacketSize]); palign<2>(A02,A12);
A13 = pload<LhsPacket>(&lhs3[j-3+LhsPacketSize]); palign<3>(A03,A13);
A00 = ei_pload<LhsPacket>(&lhs0[j]);
A10 = ei_pload<LhsPacket>(&lhs0[j+LhsPacketSize]);
T0 = pcj.pmadd(A00, ptmp0, ei_pload<ResPacket>(&res[j]));
T1 = pcj.pmadd(A10, ptmp0, ei_pload<ResPacket>(&res[j+ResPacketSize]));
A00 = pload<LhsPacket>(&lhs0[j]);
A10 = pload<LhsPacket>(&lhs0[j+LhsPacketSize]);
T0 = pcj.pmadd(A00, ptmp0, pload<ResPacket>(&res[j]));
T1 = pcj.pmadd(A10, ptmp0, pload<ResPacket>(&res[j+ResPacketSize]));
T0 = pcj.pmadd(A01, ptmp1, T0);
A01 = ei_pload<LhsPacket>(&lhs1[j-1+2*LhsPacketSize]); ei_palign<1>(A11,A01);
A01 = pload<LhsPacket>(&lhs1[j-1+2*LhsPacketSize]); palign<1>(A11,A01);
T0 = pcj.pmadd(A02, ptmp2, T0);
A02 = ei_pload<LhsPacket>(&lhs2[j-2+2*LhsPacketSize]); ei_palign<2>(A12,A02);
A02 = pload<LhsPacket>(&lhs2[j-2+2*LhsPacketSize]); palign<2>(A12,A02);
T0 = pcj.pmadd(A03, ptmp3, T0);
ei_pstore(&res[j],T0);
A03 = ei_pload<LhsPacket>(&lhs3[j-3+2*LhsPacketSize]); ei_palign<3>(A13,A03);
pstore(&res[j],T0);
A03 = pload<LhsPacket>(&lhs3[j-3+2*LhsPacketSize]); palign<3>(A13,A03);
T1 = pcj.pmadd(A11, ptmp1, T1);
T1 = pcj.pmadd(A12, ptmp2, T1);
T1 = pcj.pmadd(A13, ptmp3, T1);
ei_pstore(&res[j+ResPacketSize],T1);
pstore(&res[j+ResPacketSize],T1);
}
}
for (Index j = peeledSize; j<alignedSize; j+=ResPacketSize)
@@ -235,10 +237,10 @@ EIGEN_DONT_INLINE static void run(
/* process remaining coeffs (or all if there is no explicit vectorization) */
for (Index j=alignedSize; j<size; ++j)
{
res[j] = cj.pmadd(lhs0[j], ei_pfirst(ptmp0), res[j]);
res[j] = cj.pmadd(lhs1[j], ei_pfirst(ptmp1), res[j]);
res[j] = cj.pmadd(lhs2[j], ei_pfirst(ptmp2), res[j]);
res[j] = cj.pmadd(lhs3[j], ei_pfirst(ptmp3), res[j]);
res[j] = cj.pmadd(lhs0[j], pfirst(ptmp0), res[j]);
res[j] = cj.pmadd(lhs1[j], pfirst(ptmp1), res[j]);
res[j] = cj.pmadd(lhs2[j], pfirst(ptmp2), res[j]);
res[j] = cj.pmadd(lhs3[j], pfirst(ptmp3), res[j]);
}
}
@@ -249,7 +251,7 @@ EIGEN_DONT_INLINE static void run(
{
for (Index k=start; k<end; ++k)
{
RhsPacket ptmp0 = ei_pset1<RhsPacket>(alpha*rhs[k*rhsIncr]);
RhsPacket ptmp0 = pset1<RhsPacket>(alpha*rhs[k*rhsIncr]);
const LhsScalar* lhs0 = lhs + k*lhsStride;
if (Vectorizable)
@@ -257,19 +259,19 @@ EIGEN_DONT_INLINE static void run(
/* explicit vectorization */
// process first unaligned result's coeffs
for (Index j=0; j<alignedStart; ++j)
res[j] += cj.pmul(lhs0[j], ei_pfirst(ptmp0));
res[j] += cj.pmul(lhs0[j], pfirst(ptmp0));
// process aligned result's coeffs
if ((size_t(lhs0+alignedStart)%sizeof(LhsPacket))==0)
for (Index i = alignedStart;i<alignedSize;i+=ResPacketSize)
ei_pstore(&res[i], pcj.pmadd(ei_ploadu<LhsPacket>(&lhs0[i]), ptmp0, ei_pload<ResPacket>(&res[i])));
pstore(&res[i], pcj.pmadd(ploadu<LhsPacket>(&lhs0[i]), ptmp0, pload<ResPacket>(&res[i])));
else
for (Index i = alignedStart;i<alignedSize;i+=ResPacketSize)
ei_pstore(&res[i], pcj.pmadd(ei_ploadu<LhsPacket>(&lhs0[i]), ptmp0, ei_pload<ResPacket>(&res[i])));
pstore(&res[i], pcj.pmadd(ploadu<LhsPacket>(&lhs0[i]), ptmp0, pload<ResPacket>(&res[i])));
}
// process remaining scalars (or all if no explicit vectorization)
for (Index i=alignedSize; i<size; ++i)
res[i] += cj.pmul(lhs0[i], ei_pfirst(ptmp0));
res[i] += cj.pmul(lhs0[i], pfirst(ptmp0));
}
if (skipColumns)
{
@@ -295,25 +297,25 @@ EIGEN_DONT_INLINE static void run(
* - no vectorization
*/
template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
struct ei_general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjugateLhs,RhsScalar,ConjugateRhs>
struct general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjugateLhs,RhsScalar,ConjugateRhs>
{
typedef typename ei_scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
enum {
Vectorizable = ei_packet_traits<LhsScalar>::Vectorizable && ei_packet_traits<RhsScalar>::Vectorizable
&& int(ei_packet_traits<LhsScalar>::size)==int(ei_packet_traits<RhsScalar>::size),
LhsPacketSize = Vectorizable ? ei_packet_traits<LhsScalar>::size : 1,
RhsPacketSize = Vectorizable ? ei_packet_traits<RhsScalar>::size : 1,
ResPacketSize = Vectorizable ? ei_packet_traits<ResScalar>::size : 1
Vectorizable = packet_traits<LhsScalar>::Vectorizable && packet_traits<RhsScalar>::Vectorizable
&& int(packet_traits<LhsScalar>::size)==int(packet_traits<RhsScalar>::size),
LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1
};
typedef typename ei_packet_traits<LhsScalar>::type _LhsPacket;
typedef typename ei_packet_traits<RhsScalar>::type _RhsPacket;
typedef typename ei_packet_traits<ResScalar>::type _ResPacket;
typedef typename packet_traits<LhsScalar>::type _LhsPacket;
typedef typename packet_traits<RhsScalar>::type _RhsPacket;
typedef typename packet_traits<ResScalar>::type _ResPacket;
typedef typename ei_meta_if<Vectorizable,_LhsPacket,LhsScalar>::ret LhsPacket;
typedef typename ei_meta_if<Vectorizable,_RhsPacket,RhsScalar>::ret RhsPacket;
typedef typename ei_meta_if<Vectorizable,_ResPacket,ResScalar>::ret ResPacket;
typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
EIGEN_DONT_INLINE static void run(
Index rows, Index cols,
@@ -323,20 +325,20 @@ EIGEN_DONT_INLINE static void run(
ResScalar alpha)
{
EIGEN_UNUSED_VARIABLE(rhsIncr);
ei_internal_assert(rhsIncr==1);
eigen_internal_assert(rhsIncr==1);
#ifdef _EIGEN_ACCUMULATE_PACKETS
#error _EIGEN_ACCUMULATE_PACKETS has already been defined
#endif
#define _EIGEN_ACCUMULATE_PACKETS(A0,A13,A2) {\
RhsPacket b = ei_pload<RhsPacket>(&rhs[j]); \
ptmp0 = pcj.pmadd(EIGEN_CAT(ei_ploa,A0) <LhsPacket>(&lhs0[j]), b, ptmp0); \
ptmp1 = pcj.pmadd(EIGEN_CAT(ei_ploa,A13)<LhsPacket>(&lhs1[j]), b, ptmp1); \
ptmp2 = pcj.pmadd(EIGEN_CAT(ei_ploa,A2) <LhsPacket>(&lhs2[j]), b, ptmp2); \
ptmp3 = pcj.pmadd(EIGEN_CAT(ei_ploa,A13)<LhsPacket>(&lhs3[j]), b, ptmp3); }
RhsPacket b = pload<RhsPacket>(&rhs[j]); \
ptmp0 = pcj.pmadd(EIGEN_CAT(ploa,A0) <LhsPacket>(&lhs0[j]), b, ptmp0); \
ptmp1 = pcj.pmadd(EIGEN_CAT(ploa,A13)<LhsPacket>(&lhs1[j]), b, ptmp1); \
ptmp2 = pcj.pmadd(EIGEN_CAT(ploa,A2) <LhsPacket>(&lhs2[j]), b, ptmp2); \
ptmp3 = pcj.pmadd(EIGEN_CAT(ploa,A13)<LhsPacket>(&lhs3[j]), b, ptmp3); }
ei_conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;
ei_conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj;
conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;
conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj;
enum { AllAligned=0, EvenAligned=1, FirstAligned=2, NoneAligned=3 };
const Index rowsAtOnce = 4;
@@ -349,7 +351,7 @@ EIGEN_DONT_INLINE static void run(
// How many coeffs of the result do we have to skip to be aligned.
// Here we assume data are at least aligned on the base scalar type
// if that's not the case then vectorization is discarded, see below.
Index alignedStart = ei_first_aligned(rhs, depth);
Index alignedStart = first_aligned(rhs, depth);
Index alignedSize = RhsPacketSize>1 ? alignedStart + ((depth-alignedStart) & ~RhsPacketAlignedMask) : 0;
const Index peeledSize = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart;
@@ -359,7 +361,7 @@ EIGEN_DONT_INLINE static void run(
: FirstAligned;
// we cannot assume the first element is aligned because of sub-matrices
const Index lhsAlignmentOffset = ei_first_aligned(lhs,depth);
const Index lhsAlignmentOffset = first_aligned(lhs,depth);
// find how many rows do we have to skip to be aligned with rhs (if possible)
Index skipRows = 0;
@@ -371,7 +373,7 @@ EIGEN_DONT_INLINE static void run(
}
else if (LhsPacketSize>1)
{
ei_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || depth<LhsPacketSize);
eigen_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || depth<LhsPacketSize);
while (skipRows<LhsPacketSize &&
alignedStart != ((lhsAlignmentOffset + alignmentStep*skipRows)%LhsPacketSize))
@@ -384,10 +386,10 @@ EIGEN_DONT_INLINE static void run(
}
else
{
skipRows = std::min(skipRows,Index(rows));
skipRows = (std::min)(skipRows,Index(rows));
// note that the skiped columns are processed later.
}
ei_internal_assert( alignmentPattern==NoneAligned
eigen_internal_assert( alignmentPattern==NoneAligned
|| LhsPacketSize==1
|| (skipRows + rowsAtOnce >= rows)
|| LhsPacketSize > depth
@@ -416,8 +418,8 @@ EIGEN_DONT_INLINE static void run(
if (Vectorizable)
{
/* explicit vectorization */
ResPacket ptmp0 = ei_pset1<ResPacket>(ResScalar(0)), ptmp1 = ei_pset1<ResPacket>(ResScalar(0)),
ptmp2 = ei_pset1<ResPacket>(ResScalar(0)), ptmp3 = ei_pset1<ResPacket>(ResScalar(0));
ResPacket ptmp0 = pset1<ResPacket>(ResScalar(0)), ptmp1 = pset1<ResPacket>(ResScalar(0)),
ptmp2 = pset1<ResPacket>(ResScalar(0)), ptmp3 = pset1<ResPacket>(ResScalar(0));
// process initial unaligned coeffs
// FIXME this loop get vectorized by the compiler !
@@ -450,27 +452,27 @@ EIGEN_DONT_INLINE static void run(
* than basic unaligned loads.
*/
LhsPacket A01, A02, A03, A11, A12, A13;
A01 = ei_pload<LhsPacket>(&lhs1[alignedStart-1]);
A02 = ei_pload<LhsPacket>(&lhs2[alignedStart-2]);
A03 = ei_pload<LhsPacket>(&lhs3[alignedStart-3]);
A01 = pload<LhsPacket>(&lhs1[alignedStart-1]);
A02 = pload<LhsPacket>(&lhs2[alignedStart-2]);
A03 = pload<LhsPacket>(&lhs3[alignedStart-3]);
for (Index j = alignedStart; j<peeledSize; j+=peels*RhsPacketSize)
{
RhsPacket b = ei_pload<RhsPacket>(&rhs[j]);
A11 = ei_pload<LhsPacket>(&lhs1[j-1+LhsPacketSize]); ei_palign<1>(A01,A11);
A12 = ei_pload<LhsPacket>(&lhs2[j-2+LhsPacketSize]); ei_palign<2>(A02,A12);
A13 = ei_pload<LhsPacket>(&lhs3[j-3+LhsPacketSize]); ei_palign<3>(A03,A13);
RhsPacket b = pload<RhsPacket>(&rhs[j]);
A11 = pload<LhsPacket>(&lhs1[j-1+LhsPacketSize]); palign<1>(A01,A11);
A12 = pload<LhsPacket>(&lhs2[j-2+LhsPacketSize]); palign<2>(A02,A12);
A13 = pload<LhsPacket>(&lhs3[j-3+LhsPacketSize]); palign<3>(A03,A13);
ptmp0 = pcj.pmadd(ei_pload<LhsPacket>(&lhs0[j]), b, ptmp0);
ptmp0 = pcj.pmadd(pload<LhsPacket>(&lhs0[j]), b, ptmp0);
ptmp1 = pcj.pmadd(A01, b, ptmp1);
A01 = ei_pload<LhsPacket>(&lhs1[j-1+2*LhsPacketSize]); ei_palign<1>(A11,A01);
A01 = pload<LhsPacket>(&lhs1[j-1+2*LhsPacketSize]); palign<1>(A11,A01);
ptmp2 = pcj.pmadd(A02, b, ptmp2);
A02 = ei_pload<LhsPacket>(&lhs2[j-2+2*LhsPacketSize]); ei_palign<2>(A12,A02);
A02 = pload<LhsPacket>(&lhs2[j-2+2*LhsPacketSize]); palign<2>(A12,A02);
ptmp3 = pcj.pmadd(A03, b, ptmp3);
A03 = ei_pload<LhsPacket>(&lhs3[j-3+2*LhsPacketSize]); ei_palign<3>(A13,A03);
A03 = pload<LhsPacket>(&lhs3[j-3+2*LhsPacketSize]); palign<3>(A13,A03);
b = ei_pload<RhsPacket>(&rhs[j+RhsPacketSize]);
ptmp0 = pcj.pmadd(ei_pload<LhsPacket>(&lhs0[j+LhsPacketSize]), b, ptmp0);
b = pload<RhsPacket>(&rhs[j+RhsPacketSize]);
ptmp0 = pcj.pmadd(pload<LhsPacket>(&lhs0[j+LhsPacketSize]), b, ptmp0);
ptmp1 = pcj.pmadd(A11, b, ptmp1);
ptmp2 = pcj.pmadd(A12, b, ptmp2);
ptmp3 = pcj.pmadd(A13, b, ptmp3);
@@ -484,10 +486,10 @@ EIGEN_DONT_INLINE static void run(
_EIGEN_ACCUMULATE_PACKETS(du,du,du);
break;
}
tmp0 += ei_predux(ptmp0);
tmp1 += ei_predux(ptmp1);
tmp2 += ei_predux(ptmp2);
tmp3 += ei_predux(ptmp3);
tmp0 += predux(ptmp0);
tmp1 += predux(ptmp1);
tmp2 += predux(ptmp2);
tmp3 += predux(ptmp3);
}
} // end explicit vectorization
@@ -513,7 +515,7 @@ EIGEN_DONT_INLINE static void run(
for (Index i=start; i<end; ++i)
{
EIGEN_ALIGN16 ResScalar tmp0 = ResScalar(0);
ResPacket ptmp0 = ei_pset1<ResPacket>(tmp0);
ResPacket ptmp0 = pset1<ResPacket>(tmp0);
const LhsScalar* lhs0 = lhs + i*lhsStride;
// process first unaligned result's coeffs
// FIXME this loop get vectorized by the compiler !
@@ -525,11 +527,11 @@ EIGEN_DONT_INLINE static void run(
// process aligned rhs coeffs
if ((size_t(lhs0+alignedStart)%sizeof(LhsPacket))==0)
for (Index j = alignedStart;j<alignedSize;j+=RhsPacketSize)
ptmp0 = pcj.pmadd(ei_pload<LhsPacket>(&lhs0[j]), ei_pload<RhsPacket>(&rhs[j]), ptmp0);
ptmp0 = pcj.pmadd(pload<LhsPacket>(&lhs0[j]), pload<RhsPacket>(&rhs[j]), ptmp0);
else
for (Index j = alignedStart;j<alignedSize;j+=RhsPacketSize)
ptmp0 = pcj.pmadd(ei_ploadu<LhsPacket>(&lhs0[j]), ei_pload<RhsPacket>(&rhs[j]), ptmp0);
tmp0 += ei_predux(ptmp0);
ptmp0 = pcj.pmadd(ploadu<LhsPacket>(&lhs0[j]), pload<RhsPacket>(&rhs[j]), ptmp0);
tmp0 += predux(ptmp0);
}
// process remaining scalars
@@ -552,4 +554,6 @@ EIGEN_DONT_INLINE static void run(
}
};
} // end namespace internal
#endif // EIGEN_GENERAL_MATRIX_VECTOR_H

View File

@@ -25,19 +25,21 @@
#ifndef EIGEN_PARALLELIZER_H
#define EIGEN_PARALLELIZER_H
namespace internal {
/** \internal */
inline void ei_manage_multi_threading(Action action, int* v)
inline void manage_multi_threading(Action action, int* v)
{
static int m_maxThreads = -1;
static EIGEN_UNUSED int m_maxThreads = -1;
if(action==SetAction)
{
ei_internal_assert(v!=0);
eigen_internal_assert(v!=0);
m_maxThreads = *v;
}
else if(action==GetAction)
{
ei_internal_assert(v!=0);
eigen_internal_assert(v!=0);
#ifdef EIGEN_HAS_OPENMP
if(m_maxThreads>0)
*v = m_maxThreads;
@@ -49,7 +51,7 @@ inline void ei_manage_multi_threading(Action action, int* v)
}
else
{
ei_internal_assert(false);
eigen_internal_assert(false);
}
}
@@ -58,7 +60,7 @@ inline void ei_manage_multi_threading(Action action, int* v)
inline int nbThreads()
{
int ret;
ei_manage_multi_threading(GetAction, &ret);
manage_multi_threading(GetAction, &ret);
return ret;
}
@@ -66,7 +68,7 @@ inline int nbThreads()
* \sa nbThreads */
inline void setNbThreads(int v)
{
ei_manage_multi_threading(SetAction, &v);
manage_multi_threading(SetAction, &v);
}
template<typename Index> struct GemmParallelInfo
@@ -81,7 +83,7 @@ template<typename Index> struct GemmParallelInfo
};
template<bool Condition, typename Functor, typename Index>
void ei_parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpose)
void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpose)
{
#ifndef EIGEN_HAS_OPENMP
// FIXME the transpose variable is only needed to properly split
@@ -122,7 +124,7 @@ void ei_parallelize_gemm(const Functor& func, Index rows, Index cols, bool trans
Index blockCols = (cols / threads) & ~Index(0x3);
Index blockRows = (rows / threads) & ~Index(0x7);
GemmParallelInfo<Index>* info = new GemmParallelInfo<Index>[threads];
#pragma omp parallel for schedule(static,1) num_threads(threads)
@@ -147,4 +149,6 @@ void ei_parallelize_gemm(const Functor& func, Index rows, Index cols, bool trans
#endif
}
} // end namespace internal
#endif // EIGEN_PARALLELIZER_H

View File

@@ -25,12 +25,14 @@
#ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_H
#define EIGEN_SELFADJOINT_MATRIX_MATRIX_H
namespace internal {
// pack a selfadjoint block diagonal for use with the gebp_kernel
template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder>
struct ei_symm_pack_lhs
struct symm_pack_lhs
{
template<int BlockRows> inline
void pack(Scalar* blockA, const ei_const_blas_data_mapper<Scalar,Index,StorageOrder>& lhs, Index cols, Index i, Index& count)
void pack(Scalar* blockA, const const_blas_data_mapper<Scalar,Index,StorageOrder>& lhs, Index cols, Index i, Index& count)
{
// normal copy
for(Index k=0; k<i; k++)
@@ -41,9 +43,9 @@ struct ei_symm_pack_lhs
for(Index k=i; k<i+BlockRows; k++)
{
for(Index w=0; w<h; w++)
blockA[count++] = ei_conj(lhs(k, i+w)); // transposed
blockA[count++] = conj(lhs(k, i+w)); // transposed
blockA[count++] = ei_real(lhs(k,k)); // real (diagonal)
blockA[count++] = real(lhs(k,k)); // real (diagonal)
for(Index w=h+1; w<BlockRows; w++)
blockA[count++] = lhs(i+w, k); // normal
@@ -52,11 +54,11 @@ struct ei_symm_pack_lhs
// transposed copy
for(Index k=i+BlockRows; k<cols; k++)
for(Index w=0; w<BlockRows; w++)
blockA[count++] = ei_conj(lhs(k, i+w)); // transposed
blockA[count++] = conj(lhs(k, i+w)); // transposed
}
void operator()(Scalar* blockA, const Scalar* _lhs, Index lhsStride, Index cols, Index rows)
{
ei_const_blas_data_mapper<Scalar,Index,StorageOrder> lhs(_lhs,lhsStride);
const_blas_data_mapper<Scalar,Index,StorageOrder> lhs(_lhs,lhsStride);
Index count = 0;
Index peeled_mc = (rows/Pack1)*Pack1;
for(Index i=0; i<peeled_mc; i+=Pack1)
@@ -76,23 +78,23 @@ struct ei_symm_pack_lhs
for(Index k=0; k<i; k++)
blockA[count++] = lhs(i, k); // normal
blockA[count++] = ei_real(lhs(i, i)); // real (diagonal)
blockA[count++] = real(lhs(i, i)); // real (diagonal)
for(Index k=i+1; k<cols; k++)
blockA[count++] = ei_conj(lhs(k, i)); // transposed
blockA[count++] = conj(lhs(k, i)); // transposed
}
}
};
template<typename Scalar, typename Index, int nr, int StorageOrder>
struct ei_symm_pack_rhs
struct symm_pack_rhs
{
enum { PacketSize = ei_packet_traits<Scalar>::size };
enum { PacketSize = packet_traits<Scalar>::size };
void operator()(Scalar* blockB, const Scalar* _rhs, Index rhsStride, Index rows, Index cols, Index k2)
{
Index end_k = k2 + rows;
Index count = 0;
ei_const_blas_data_mapper<Scalar,Index,StorageOrder> rhs(_rhs,rhsStride);
const_blas_data_mapper<Scalar,Index,StorageOrder> rhs(_rhs,rhsStride);
Index packet_cols = (cols/nr)*nr;
// first part: normal case
@@ -112,18 +114,18 @@ struct ei_symm_pack_rhs
}
// second part: diagonal block
for(Index j2=k2; j2<std::min(k2+rows,packet_cols); j2+=nr)
for(Index j2=k2; j2<(std::min)(k2+rows,packet_cols); j2+=nr)
{
// again we can split vertically in three different parts (transpose, symmetric, normal)
// transpose
for(Index k=k2; k<j2; k++)
{
blockB[count+0] = ei_conj(rhs(j2+0,k));
blockB[count+1] = ei_conj(rhs(j2+1,k));
blockB[count+0] = conj(rhs(j2+0,k));
blockB[count+1] = conj(rhs(j2+1,k));
if (nr==4)
{
blockB[count+2] = ei_conj(rhs(j2+2,k));
blockB[count+3] = ei_conj(rhs(j2+3,k));
blockB[count+2] = conj(rhs(j2+2,k));
blockB[count+3] = conj(rhs(j2+3,k));
}
count += nr;
}
@@ -135,11 +137,11 @@ struct ei_symm_pack_rhs
for (Index w=0 ; w<h; ++w)
blockB[count+w] = rhs(k,j2+w);
blockB[count+h] = ei_real(rhs(k,k));
blockB[count+h] = real(rhs(k,k));
// transpose
for (Index w=h+1 ; w<nr; ++w)
blockB[count+w] = ei_conj(rhs(j2+w,k));
blockB[count+w] = conj(rhs(j2+w,k));
count += nr;
++h;
}
@@ -162,12 +164,12 @@ struct ei_symm_pack_rhs
{
for(Index k=k2; k<end_k; k++)
{
blockB[count+0] = ei_conj(rhs(j2+0,k));
blockB[count+1] = ei_conj(rhs(j2+1,k));
blockB[count+0] = conj(rhs(j2+0,k));
blockB[count+1] = conj(rhs(j2+1,k));
if (nr==4)
{
blockB[count+2] = ei_conj(rhs(j2+2,k));
blockB[count+3] = ei_conj(rhs(j2+3,k));
blockB[count+2] = conj(rhs(j2+2,k));
blockB[count+3] = conj(rhs(j2+3,k));
}
count += nr;
}
@@ -177,16 +179,16 @@ struct ei_symm_pack_rhs
for(Index j2=packet_cols; j2<cols; ++j2)
{
// transpose
Index half = std::min(end_k,j2);
Index half = (std::min)(end_k,j2);
for(Index k=k2; k<half; k++)
{
blockB[count] = ei_conj(rhs(j2,k));
blockB[count] = conj(rhs(j2,k));
count += 1;
}
if(half==j2 && half<k2+rows)
{
blockB[count] = ei_real(rhs(j2,j2));
blockB[count] = real(rhs(j2,j2));
count += 1;
}
else
@@ -209,12 +211,12 @@ template <typename Scalar, typename Index,
int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs,
int ResStorageOrder>
struct ei_product_selfadjoint_matrix;
struct product_selfadjoint_matrix;
template <typename Scalar, typename Index,
int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs>
struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,ConjugateLhs, RhsStorageOrder,RhsSelfAdjoint,ConjugateRhs,RowMajor>
struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,ConjugateLhs, RhsStorageOrder,RhsSelfAdjoint,ConjugateRhs,RowMajor>
{
static EIGEN_STRONG_INLINE void run(
@@ -224,7 +226,7 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint
Scalar* res, Index resStride,
Scalar alpha)
{
ei_product_selfadjoint_matrix<Scalar, Index,
product_selfadjoint_matrix<Scalar, Index,
EIGEN_LOGICAL_XOR(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
RhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsSelfAdjoint,ConjugateRhs),
EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
@@ -237,7 +239,7 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint
template <typename Scalar, typename Index,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs>
struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor>
struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor>
{
static EIGEN_DONT_INLINE void run(
@@ -249,32 +251,32 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,Conjugate
{
Index size = rows;
ei_const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
ei_const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
typedef ei_gebp_traits<Scalar,Scalar> Traits;
typedef gebp_traits<Scalar,Scalar> Traits;
Index kc = size; // cache block size along the K direction
Index mc = rows; // cache block size along the M direction
Index nc = cols; // cache block size along the N direction
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
// kc must smaller than mc
kc = std::min(kc,mc);
kc = (std::min)(kc,mc);
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
std::size_t sizeB = sizeW + kc*cols;
Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB);
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
Scalar* blockB = allocatedBlockB + sizeW;
ei_gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
ei_symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
ei_gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
ei_gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed;
gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed;
for(Index k2=0; k2<size; k2+=kc)
{
const Index actual_kc = std::min(k2+kc,size)-k2;
const Index actual_kc = (std::min)(k2+kc,size)-k2;
// we have selected one row panel of rhs and one column panel of lhs
// pack rhs's panel into a sequential chunk of memory
@@ -287,7 +289,7 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,Conjugate
// 3 - the panel below the diagonal block => generic packed copy
for(Index i2=0; i2<k2; i2+=mc)
{
const Index actual_mc = std::min(i2+mc,k2)-i2;
const Index actual_mc = (std::min)(i2+mc,k2)-i2;
// transposed packed copy
pack_lhs_transposed(blockA, &lhs(k2, i2), lhsStride, actual_kc, actual_mc);
@@ -295,7 +297,7 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,Conjugate
}
// the block diagonal
{
const Index actual_mc = std::min(k2+kc,size)-k2;
const Index actual_mc = (std::min)(k2+kc,size)-k2;
// symmetric packed copy
pack_lhs(blockA, &lhs(k2,k2), lhsStride, actual_kc, actual_mc);
@@ -304,16 +306,13 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,Conjugate
for(Index i2=k2+kc; i2<size; i2+=mc)
{
const Index actual_mc = std::min(i2+mc,size)-i2;
ei_gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder,false>()
const Index actual_mc = (std::min)(i2+mc,size)-i2;
gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder,false>()
(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
}
}
ei_aligned_stack_delete(Scalar, blockA, kc*mc);
ei_aligned_stack_delete(Scalar, allocatedBlockB, sizeB);
}
};
@@ -321,7 +320,7 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,Conjugate
template <typename Scalar, typename Index,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs>
struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor>
struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor>
{
static EIGEN_DONT_INLINE void run(
@@ -333,54 +332,54 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,Conjugat
{
Index size = cols;
ei_const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
typedef ei_gebp_traits<Scalar,Scalar> Traits;
typedef gebp_traits<Scalar,Scalar> Traits;
Index kc = size; // cache block size along the K direction
Index mc = rows; // cache block size along the M direction
Index nc = cols; // cache block size along the N direction
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
std::size_t sizeB = sizeW + kc*cols;
Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB);
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
Scalar* blockB = allocatedBlockB + sizeW;
ei_gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
ei_gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
ei_symm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
symm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
for(Index k2=0; k2<size; k2+=kc)
{
const Index actual_kc = std::min(k2+kc,size)-k2;
const Index actual_kc = (std::min)(k2+kc,size)-k2;
pack_rhs(blockB, _rhs, rhsStride, actual_kc, cols, k2);
// => GEPP
for(Index i2=0; i2<rows; i2+=mc)
{
const Index actual_mc = std::min(i2+mc,rows)-i2;
const Index actual_mc = (std::min)(i2+mc,rows)-i2;
pack_lhs(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
}
}
ei_aligned_stack_delete(Scalar, blockA, kc*mc);
ei_aligned_stack_delete(Scalar, allocatedBlockB, sizeB);
}
};
} // end namespace internal
/***************************************************************************
* Wrapper to ei_product_selfadjoint_matrix
* Wrapper to product_selfadjoint_matrix
***************************************************************************/
namespace internal {
template<typename Lhs, int LhsMode, typename Rhs, int RhsMode>
struct ei_traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false> >
: ei_traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>, Lhs, Rhs> >
struct traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false> >
: traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>, Lhs, Rhs> >
{};
}
template<typename Lhs, int LhsMode, typename Rhs, int RhsMode>
struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>
@@ -399,7 +398,7 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>
template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
{
ei_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs);
const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs);
@@ -407,18 +406,18 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
* RhsBlasTraits::extractScalarFactor(m_rhs);
ei_product_selfadjoint_matrix<Scalar, Index,
internal::product_selfadjoint_matrix<Scalar, Index,
EIGEN_LOGICAL_XOR(LhsIsUpper,
ei_traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint,
internal::traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint,
NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)),
EIGEN_LOGICAL_XOR(RhsIsUpper,
ei_traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint,
internal::traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint,
NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsIsUpper,bool(RhsBlasTraits::NeedToConjugate)),
ei_traits<Dest>::Flags&RowMajorBit ? RowMajor : ColMajor>
internal::traits<Dest>::Flags&RowMajorBit ? RowMajor : ColMajor>
::run(
lhs.rows(), rhs.cols(), // sizes
&lhs.coeff(0,0), lhs.outerStride(), // lhs info
&rhs.coeff(0,0), rhs.outerStride(), // rhs info
&lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
&rhs.coeffRef(0,0), rhs.outerStride(), // rhs info
&dst.coeffRef(0,0), dst.outerStride(), // result info
actualAlpha // alpha
);

View File

@@ -25,19 +25,23 @@
#ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_H
#define EIGEN_SELFADJOINT_MATRIX_VECTOR_H
namespace internal {
/* Optimized selfadjoint matrix * vector product:
* This algorithm processes 2 columns at onces that allows to both reduce
* the number of load/stores of the result by a factor 2 and to reduce
* the instruction dependency.
*/
template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs>
static EIGEN_DONT_INLINE void ei_product_selfadjoint_vector(
static EIGEN_DONT_INLINE void product_selfadjoint_vector(
Index size,
const Scalar* lhs, Index lhsStride,
const Scalar* _rhs, Index rhsIncr,
Scalar* res, Scalar alpha)
Scalar* res,
Scalar alpha)
{
typedef typename ei_packet_traits<Scalar>::type Packet;
typedef typename packet_traits<Scalar>::type Packet;
typedef typename NumTraits<Scalar>::Real RealScalar;
const Index PacketSize = sizeof(Packet)/sizeof(Scalar);
enum {
@@ -46,70 +50,70 @@ static EIGEN_DONT_INLINE void ei_product_selfadjoint_vector(
FirstTriangular = IsRowMajor == IsLower
};
ei_conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, IsRowMajor), ConjugateRhs> cj0;
ei_conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> cj1;
conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, IsRowMajor), ConjugateRhs> cj0;
conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> cj1;
conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex, ConjugateRhs> cjd;
ei_conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, IsRowMajor), ConjugateRhs> pcj0;
ei_conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> pcj1;
conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, IsRowMajor), ConjugateRhs> pcj0;
conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> pcj1;
Scalar cjAlpha = ConjugateRhs ? ei_conj(alpha) : alpha;
Scalar cjAlpha = ConjugateRhs ? conj(alpha) : alpha;
// FIXME this copy is now handled outside product_selfadjoint_vector, so it could probably be removed.
// if the rhs is not sequentially stored in memory we copy it to a temporary buffer,
// this is because we need to extract packets
const Scalar* EIGEN_RESTRICT rhs = _rhs;
ei_declare_aligned_stack_constructed_variable(Scalar,rhs,size,rhsIncr==1 ? const_cast<Scalar*>(_rhs) : 0);
if (rhsIncr!=1)
{
Scalar* r = ei_aligned_stack_new(Scalar, size);
const Scalar* it = _rhs;
for (Index i=0; i<size; ++i, it+=rhsIncr)
r[i] = *it;
rhs = r;
rhs[i] = *it;
}
Index bound = std::max(Index(0),size-8) & 0xfffffffe;
Index bound = (std::max)(Index(0),size-8) & 0xfffffffe;
if (FirstTriangular)
bound = size - bound;
for (Index j=FirstTriangular ? bound : 0;
j<(FirstTriangular ? size : bound);j+=2)
{
register const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
register const Scalar* EIGEN_RESTRICT A1 = lhs + (j+1)*lhsStride;
const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
const Scalar* EIGEN_RESTRICT A1 = lhs + (j+1)*lhsStride;
Scalar t0 = cjAlpha * rhs[j];
Packet ptmp0 = ei_pset1<Packet>(t0);
Packet ptmp0 = pset1<Packet>(t0);
Scalar t1 = cjAlpha * rhs[j+1];
Packet ptmp1 = ei_pset1<Packet>(t1);
Packet ptmp1 = pset1<Packet>(t1);
Scalar t2 = 0;
Packet ptmp2 = ei_pset1<Packet>(t2);
Packet ptmp2 = pset1<Packet>(t2);
Scalar t3 = 0;
Packet ptmp3 = ei_pset1<Packet>(t3);
Packet ptmp3 = pset1<Packet>(t3);
size_t starti = FirstTriangular ? 0 : j+2;
size_t endi = FirstTriangular ? j : size;
size_t alignedEnd = starti;
size_t alignedStart = (starti) + ei_first_aligned(&res[starti], endi-starti);
alignedEnd = alignedStart + ((endi-alignedStart)/(PacketSize))*(PacketSize);
size_t alignedStart = (starti) + first_aligned(&res[starti], endi-starti);
size_t alignedEnd = alignedStart + ((endi-alignedStart)/(PacketSize))*(PacketSize);
res[j] += cj0.pmul(A0[j], t0);
// TODO make sure this product is a real * complex and that the rhs is properly conjugated if needed
res[j] += cjd.pmul(internal::real(A0[j]), t0);
res[j+1] += cjd.pmul(internal::real(A1[j+1]), t1);
if(FirstTriangular)
{
res[j+1] += cj0.pmul(A1[j+1], t1);
res[j] += cj0.pmul(A1[j], t1);
t3 += cj1.pmul(A1[j], rhs[j]);
}
else
{
res[j+1] += cj0.pmul(A0[j+1],t0) + cj0.pmul(A1[j+1],t1);
res[j+1] += cj0.pmul(A0[j+1],t0);
t2 += cj1.pmul(A0[j+1], rhs[j+1]);
}
for (size_t i=starti; i<alignedStart; ++i)
{
res[i] += t0 * A0[i] + t1 * A1[i];
t2 += ei_conj(A0[i]) * rhs[i];
t3 += ei_conj(A1[i]) * rhs[i];
t2 += conj(A0[i]) * rhs[i];
t3 += conj(A1[i]) * rhs[i];
}
// Yes this an optimization for gcc 4.3 and 4.4 (=> huge speed up)
// gcc 4.2 does this optimization automatically.
@@ -119,15 +123,15 @@ static EIGEN_DONT_INLINE void ei_product_selfadjoint_vector(
Scalar* EIGEN_RESTRICT resIt = res + alignedStart;
for (size_t i=alignedStart; i<alignedEnd; i+=PacketSize)
{
Packet A0i = ei_ploadu<Packet>(a0It); a0It += PacketSize;
Packet A1i = ei_ploadu<Packet>(a1It); a1It += PacketSize;
Packet Bi = ei_ploadu<Packet>(rhsIt); rhsIt += PacketSize; // FIXME should be aligned in most cases
Packet Xi = ei_pload <Packet>(resIt);
Packet A0i = ploadu<Packet>(a0It); a0It += PacketSize;
Packet A1i = ploadu<Packet>(a1It); a1It += PacketSize;
Packet Bi = ploadu<Packet>(rhsIt); rhsIt += PacketSize; // FIXME should be aligned in most cases
Packet Xi = pload <Packet>(resIt);
Xi = pcj0.pmadd(A0i,ptmp0, pcj0.pmadd(A1i,ptmp1,Xi));
ptmp2 = pcj1.pmadd(A0i, Bi, ptmp2);
ptmp3 = pcj1.pmadd(A1i, Bi, ptmp3);
ei_pstore(resIt,Xi); resIt += PacketSize;
pstore(resIt,Xi); resIt += PacketSize;
}
for (size_t i=alignedEnd; i<endi; i++)
{
@@ -136,35 +140,38 @@ static EIGEN_DONT_INLINE void ei_product_selfadjoint_vector(
t3 += cj1.pmul(A1[i], rhs[i]);
}
res[j] += alpha * (t2 + ei_predux(ptmp2));
res[j+1] += alpha * (t3 + ei_predux(ptmp3));
res[j] += alpha * (t2 + predux(ptmp2));
res[j+1] += alpha * (t3 + predux(ptmp3));
}
for (Index j=FirstTriangular ? 0 : bound;j<(FirstTriangular ? bound : size);j++)
{
register const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
Scalar t1 = cjAlpha * rhs[j];
Scalar t2 = 0;
res[j] += cj0.pmul(A0[j],t1);
for (Index i=FirstTriangular ? 0 : j+1; i<(FirstTriangular ? j : size); i++) {
// TODO make sure this product is a real * complex and that the rhs is properly conjugated if needed
res[j] += cjd.pmul(internal::real(A0[j]), t1);
for (Index i=FirstTriangular ? 0 : j+1; i<(FirstTriangular ? j : size); i++)
{
res[i] += cj0.pmul(A0[i], t1);
t2 += cj1.pmul(A0[i], rhs[i]);
}
res[j] += alpha * t2;
}
if(rhsIncr!=1)
ei_aligned_stack_delete(Scalar, const_cast<Scalar*>(rhs), size);
}
} // end namespace internal
/***************************************************************************
* Wrapper to ei_product_selfadjoint_vector
* Wrapper to product_selfadjoint_vector
***************************************************************************/
namespace internal {
template<typename Lhs, int LhsMode, typename Rhs>
struct ei_traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true> >
: ei_traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>, Lhs, Rhs> >
struct traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true> >
: traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>, Lhs, Rhs> >
{};
}
template<typename Lhs, int LhsMode, typename Rhs>
struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
@@ -178,9 +185,13 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const
{
ei_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
typedef typename Dest::Scalar ResScalar;
typedef typename Base::RhsScalar RhsScalar;
typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
eigen_assert(dest.rows()==m_lhs.rows() && dest.cols()==m_rhs.cols());
const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs);
const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs);
@@ -188,23 +199,59 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
* RhsBlasTraits::extractScalarFactor(m_rhs);
ei_assert(dst.innerStride()==1 && "not implemented yet");
enum {
EvalToDest = (Dest::InnerStrideAtCompileTime==1),
UseRhs = (_ActualRhsType::InnerStrideAtCompileTime==1)
};
ei_product_selfadjoint_vector<Scalar, Index, (ei_traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)>
internal::gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,!EvalToDest> static_dest;
internal::gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!UseRhs> static_rhs;
ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
EvalToDest ? dest.data() : static_dest.data());
ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,rhs.size(),
UseRhs ? const_cast<RhsScalar*>(rhs.data()) : static_rhs.data());
if(!EvalToDest)
{
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
int size = dest.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
MappedDest(actualDestPtr, dest.size()) = dest;
}
if(!UseRhs)
{
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
int size = rhs.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, rhs.size()) = rhs;
}
internal::product_selfadjoint_vector<Scalar, Index, (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)>
(
lhs.rows(), // size
&lhs.coeff(0,0), lhs.outerStride(), // lhs info
&rhs.coeff(0), rhs.innerStride(), // rhs info
&dst.coeffRef(0), // result info
actualAlpha // scale factor
lhs.rows(), // size
&lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
actualRhsPtr, 1, // rhs info
actualDestPtr, // result info
actualAlpha // scale factor
);
if(!EvalToDest)
dest = MappedDest(actualDestPtr, dest.size());
}
};
namespace internal {
template<typename Lhs, typename Rhs, int RhsMode>
struct ei_traits<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false> >
: ei_traits<ProductBase<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>, Lhs, Rhs> >
struct traits<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false> >
: traits<ProductBase<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>, Lhs, Rhs> >
{};
}
template<typename Lhs, typename Rhs, int RhsMode>
struct SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>
@@ -218,28 +265,12 @@ struct SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>
SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const
{
ei_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs);
const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs);
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
* RhsBlasTraits::extractScalarFactor(m_rhs);
ei_assert(dst.innerStride()==1 && "not implemented yet");
// transpose the product
ei_product_selfadjoint_vector<Scalar, Index, (ei_traits<_ActualRhsType>::Flags&RowMajorBit) ? ColMajor : RowMajor, int(RhsUpLo)==Upper ? Lower : Upper,
bool(RhsBlasTraits::NeedToConjugate), bool(LhsBlasTraits::NeedToConjugate)>
(
rhs.rows(), // size
&rhs.coeff(0,0), rhs.outerStride(), // lhs info
&lhs.coeff(0), lhs.innerStride(), // rhs info
&dst.coeffRef(0), // result info
actualAlpha // scale factor
);
// let's simply transpose the product
Transpose<Dest> destT(dest);
SelfadjointProductMatrix<Transpose<const Rhs>, int(RhsUpLo)==Upper ? Lower : Upper, false,
Transpose<const Lhs>, 0, true>(m_rhs.transpose(), m_lhs.transpose()).scaleAndAddTo(destT, alpha);
}
};

Some files were not shown because too many files have changed in this diff Show More