Compare commits

...

778 Commits

Author SHA1 Message Date
Benoit Jacob
4931a719f4 bump 2011-03-14 14:10:05 -04:00
Jitse Niesen
27f34269d5 Document EIGEN_DEFAULT_DENSE_INDEX_TYPE.
Also, expand description of EIGEN_DONT_ALIGN.
2011-03-11 11:15:44 +00:00
Jitse Niesen
e7d2376688 Change int to Index in equalsIdentity().
This fixes compilation errors in nullary test on 64-bits machines.
2011-03-11 11:06:13 +00:00
Benoit Jacob
dc36efbb8f fix bug #219: Map Flags AlignedBit was miscomputed, didn't account for EIGEN_ALIGN 2011-03-10 10:17:17 -05:00
Benoit Jacob
9a47fb289b add test for EIGEN_DONT_ALIGN and EIGEN_DONT_ALIGN_STATICALLY, cf recent bugs (214 etc) and changeset 56818d907e 2011-03-10 09:44:59 -05:00
Jitse Niesen
151e3294cf Fix equalsIdentity() for rectangular matrices. 2011-03-10 13:49:06 +00:00
Oliver Ruepp
5d1263e7c5 bug #37: fix resizing when the destination sparse matrix is row major 2011-03-08 16:37:59 +01:00
Gael Guennebaud
c6c6c34909 repeat nullary tests, and fix some tests 2011-03-07 16:41:59 +01:00
Jitse Niesen
931edea57d Tweak geo_quaternion test to squash intermittent failures. 2011-03-07 11:42:55 +00:00
Benoit Jacob
bfcad536e8 * bug #206: correctly forward computationOptions and work towards avoiding mallocs after preallocation, with unit test.
* added EIGEN_RUNTIME_NO_MALLOC and new set_is_malloc_allowed() function to implement that test
2011-03-06 20:59:25 -05:00
Benoit Jacob
b464fc19bc try to fix a ICC 11.1 compiler error (bug #217) 2011-03-06 19:27:31 -05:00
Benoit Jacob
c541d0a62e disable ICC 12 warning 279 - controlling expression is constant 2011-03-06 19:06:44 -05:00
Benoit Jacob
b43d92a5a2 The Eigen2 intrusive std::vector hack really can't be supported in eigen3 (bug #215) 2011-03-04 10:24:41 -05:00
Benoit Jacob
56818d907e Make EIGEN_ALIGN16 always align to fix crashes with EIGEN_DONT_ALIGN_STATICALLY. New macro EIGEN_USER_ALIGN16 had the old behavior i.e. honors user preference. 2011-03-04 09:57:49 -05:00
Sameer Sheorey
e9868f438b Changed debug/gdb/printers.py to correctly display variable sized matrices.
There is no python error now.
2011-03-02 10:47:54 -06:00
Gael Guennebaud
4f0909b5f0 fix bug #212 (installation of Eigen2Support/Geometry) 2011-03-04 14:16:58 +01:00
Jitse Niesen
6cac61ca3e Copy fix of unit test when GSL is enabled to eigen2 test suite. 2011-03-04 11:04:07 +00:00
Jitse Niesen
1180ede36d Escape hash character in docs as required by doxygen. 2011-03-03 15:19:11 +00:00
Jitse Niesen
99fa279ed1 Use copy_bool() workaround in Eigen2 test suite.
See bug #89 and changeset 59596efdf7
.
2011-03-03 14:17:23 +00:00
Jitse Niesen
dbab12d6b0 Fix bug #205: eigen2_adjoint_5 test fails. 2011-03-02 22:00:48 +00:00
Gael Guennebaud
dc727d86f1 extend unit tests of Transform * MatrixBase and Transform * Homogeneous 2011-03-02 19:34:39 +01:00
Gael Guennebaud
5cec29162b fix compilation in the case of 1D Transform 2011-03-02 19:29:55 +01:00
Gael Guennebaud
703c8a0cc6 fix compilation when mixing CompactAffine with Homogeneous objects 2011-03-02 19:27:13 +01:00
Gael Guennebaud
d30f0c0953 fix transform * matrix products: in particular it now truely considers the rhs as a set of (homogeneous) points and do not neglect the homogeneous coordinates in the case of affine transform 2011-03-02 19:26:38 +01:00
Gael Guennebaud
adacacb285 fix bug #204: limit integer values to numbers which are representable using float 2011-03-02 14:24:26 +01:00
Gael Guennebaud
c8e1b679fa re-enable fast pset1-pstore by introducing a new higher level pstore1 function 2011-03-02 10:55:44 +01:00
Gael Guennebaud
951e238430 now fixing "unsupported" "legacy" code... 2011-03-01 16:45:46 +01:00
Benoit Jacob
9c5c8d8916 Added tag 3.0-beta4 for changeset 77fc6a9914 2011-02-28 00:55:59 -05:00
Benoit Jacob
77fc6a9914 bump 2011-02-28 00:55:52 -05:00
Benoit Jacob
eef03525b8 fix bug #203: revert to using _mm_set1_p[sd] 2011-02-28 00:04:05 -05:00
Benoit Jacob
31621ff0ef relax condition in matrix_exponential test for clang 2011-02-27 23:25:14 -05:00
Benoit Jacob
0b44893b4e fix umeyama test 2011-02-27 23:20:45 -05:00
Benoit Jacob
8cad73072e fix stable_norm test: the |small| value was 0 on clang with complex<float>. 2011-02-27 22:35:49 -05:00
Benoit Jacob
9be2712bf7 remove now-useless comments 2011-02-27 22:35:17 -05:00
Benoit Jacob
0612768c1c fix bug #201: Clang too has intrinsics bugs preventing us to use custom unaligned loads 2011-02-27 21:59:07 -05:00
Benoit Jacob
32025a2510 disable BVH test on Clang++. Looks like there's a good reason why BVH is unsupported. It seems to have a very weird usage pattern, relying on an externally defined bounding_box function in a naive way. 2011-02-27 21:37:34 -05:00
Benoit Jacob
771e64200f fix compilation of unit tests with clang 2011-02-27 20:33:58 -05:00
Benoit Jacob
4846c76d9d shut up a stupid clang 2.8 warning 2011-02-27 20:18:03 -05:00
Benoit Jacob
afc9efca15 fix compilation with clang 2.8 2011-02-27 20:17:47 -05:00
Benoit Jacob
ea7d872181 documentation fixes 2011-02-27 17:43:10 -05:00
Benoit Jacob
b6299c974f add option to build in 32bit mode 2011-02-27 17:27:23 -05:00
Benoit Jacob
b3544ce2ae bug #195 - fix this once and for all: just never use _mm_load_sd on gcc/i386, it generates redundant x87 ops 2011-02-27 17:26:59 -05:00
Jitse Niesen
a8f5ef9388 Document (non)sorting of eigenvalues.
Also, update docs for (Generalized)SelfAdjointEigenSolver to reflect that these
two classes were split apart.
2011-02-27 14:06:55 +00:00
Jitse Niesen
58abf0eb98 Use absolute error to test sum in which cancellation may occur. 2011-02-25 08:56:37 +00:00
Gael Guennebaud
ef73265987 to ease debugging let's catch invalid template options in Transform 2011-02-25 09:03:24 +01:00
Gael Guennebaud
4fbd78d993 fix compilation with gcc 3.4 2011-02-25 09:02:15 +01:00
Benoit Jacob
5dfae4524b fix bug #195: fast unaligned load for integer using _mm_load_sd failed when the value interpreted as a NaN 2011-02-24 10:31:57 -05:00
Hauke Heibel
2064c59878 Improved docs of PlainObjectBase::conservativeResize methods. 2011-02-24 15:48:41 +01:00
Gael Guennebaud
bb9a465c5a fix AltiVec ploaddup 2011-02-24 00:23:50 +03:00
Gael Guennebaud
28d17c5390 bounds the range of random integers for AltiVec 2011-02-24 00:22:53 +03:00
Gael Guennebaud
4bfe38eda2 extend testing of ploaddup 2011-02-24 00:22:10 +03:00
Gael Guennebaud
23aae0d63e fix pset1 for complex 2011-02-23 21:24:47 +03:00
Gael Guennebaud
0dfea7fce4 improve packetmath unit test 2011-02-23 21:24:26 +03:00
Gael Guennebaud
c121e6f390 implement ploaddup for complex and SSE/NEON even though they are not used in practice 2011-02-23 16:31:42 +01:00
Gael Guennebaud
955c099eb5 implement ploaddup for altivec and add respective unit test 2011-02-23 18:20:55 +03:00
Gael Guennebaud
a00aaf7f7e fix overflow in packetmath unit test 2011-02-23 17:57:18 +03:00
Gael Guennebaud
6e01780541 fix a couple of issues with pcplxflip 2011-02-23 17:51:40 +03:00
Gael Guennebaud
939f0327b6 mention reverse and replicate in the quick ref 2011-02-23 15:31:16 +01:00
Gael Guennebaud
78e1a62c54 implement pcplxflip for altivec 2011-02-23 14:20:58 +01:00
Gael Guennebaud
59eeb67187 add unit test for pcplxflip 2011-02-23 14:20:33 +01:00
Gael Guennebaud
b8374aec00 implement workarounds for MSVC IDEs and the Experimental target 2011-02-23 11:53:20 +01:00
Gael Guennebaud
7dc18b20bb same for neon 2011-02-23 09:41:55 +01:00
Gael Guennebaud
32e7dae776 Altivec: fix infinite loop (ei_ -> internal:: change) 2011-02-23 09:41:02 +01:00
Gael Guennebaud
9ab503903e suppress unused warning 2011-02-23 09:32:55 +01:00
Gael Guennebaud
14b164b00e do not try to use Eigen's blas/lapack if they cannot be compiled 2011-02-23 09:25:32 +01:00
Gael Guennebaud
c78b5fd9aa fix no newline warning 2011-02-23 09:23:11 +01:00
Gael Guennebaud
2fb5567e08 add missing AlignedOnScalar 2011-02-22 21:25:47 +01:00
Benoit Jacob
3df134dec2 fix icc warning #68 2011-02-22 10:11:03 -05:00
Benoit Jacob
c58a2ff03a add EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS non-default option. Use it in our own CMakeLists. also add a include-guard-like mechanism to prevent doing unmatched #pragma warning push/pop. 2011-02-22 10:05:41 -05:00
Benoit Jacob
9e1127619c merge 2011-02-22 09:33:01 -05:00
Benoit Jacob
720767ae40 ICC 12 / linux only defined __INTEL_COMPILER, not __intel_compiler 2011-02-22 09:32:39 -05:00
Benoit Jacob
d8e97aee89 shut up stupid ICC warnings 2011-02-22 09:31:22 -05:00
Benoit Jacob
625814464e fix legitimate ICC 12 warning 2011-02-22 09:30:54 -05:00
Gael Guennebaud
39b27fb656 altivec compilation fix 2011-02-22 15:26:28 +01:00
Benoit Jacob
25579df2d4 'fix' a couple of clang -Wconstant-logical-operand warnings (still not convinced about the pertinence of that warning) 2011-02-22 08:54:55 -05:00
Benoit Jacob
3884308da7 __attribute__((flatten)) seems to be recognized by neither clang nor icc despite these compilers defining __GNUC__. 2011-02-22 08:40:37 -05:00
Gael Guennebaud
68631e28d4 also test non_projective_only with row major transformations 2011-02-22 14:26:32 +01:00
Benoit Jacob
39d3bc2394 fix bug #190: directly pass Transform Options to Matrix, allowing to use RowMajor. Fix issues in Transform with non-default Options. 2011-02-22 08:14:38 -05:00
Gael Guennebaud
659c97ee49 gcc 4.4 also defines float32_t as a special type 2011-02-22 10:04:09 +01:00
Gael Guennebaud
769eeac35e disable output compression since this feature seems to be broken 2011-02-21 21:19:38 +01:00
Gael Guennebaud
51da67f211 more compilation fixes for altivec 2011-02-21 20:36:20 +01:00
Gael Guennebaud
05545d0197 fix compilation 2011-02-21 17:47:31 +01:00
Gael Guennebaud
8bee573a78 workaround ICC aggressive optimization 2011-02-21 16:17:58 +01:00
Gael Guennebaud
fb1a29fed5 fix ICE and warning with gcc 4.2.4 2011-02-21 16:11:18 +01:00
Gael Guennebaud
e129e985c3 link to blas/lapack only when needed, and use the static versions to hopefully workaround weird linking issues to gfortranbegin (see jitse dashboard) 2011-02-21 15:48:37 +01:00
Gael Guennebaud
2d5ea82807 fix bug #176 (workaround a too aggressive optimization made by ICC) 2011-02-21 11:00:07 +01:00
Gael Guennebaud
3c00e3da03 enable some tests that have been commented out 2011-02-18 18:08:58 +01:00
Gael Guennebaud
434817164e fix umfpack with complexes 2011-02-18 18:07:59 +01:00
Gael Guennebaud
2c1ac23c62 remove unused code 2011-02-18 17:54:48 +01:00
Gael Guennebaud
a0e5b00280 forgot that one, again 2011-02-18 17:50:36 +01:00
Gael Guennebaud
6456b74a89 merge 2011-02-18 17:40:31 +01:00
Gael Guennebaud
86ca05b324 remove largeEps in adjoint unit test and use a more accurate test_isApproxWithRef test. 2011-02-18 17:39:04 +01:00
Gael Guennebaud
8f8c67b8bd fix bug #186 (in 32 bits mode, gcc 4.3 messed up with pfirst for complex<float>) 2011-02-18 15:47:17 +01:00
Benoit Jacob
aa966ca319 fix bug #187: stable norm test was quite broken 2011-02-18 09:46:49 -05:00
Gael Guennebaud
f7cd63b964 fix bug #189 (issue with fortran concentions to return COMPLEX values) 2011-02-18 15:11:31 +01:00
Gael Guennebaud
69cecc45e5 extend mapstride unit test to test unaligned configurations 2011-02-18 14:41:40 +01:00
Gael Guennebaud
abce49ea21 fix a segfault in "slice vectorization" when the destination might not be aligned on a scalar (complex<double>) 2011-02-18 14:20:36 +01:00
Gael Guennebaud
d271ad38ce back to brute force linking to sparse libraries (fix cmake when these libs are not found) 2011-02-18 11:35:45 +01:00
Gael Guennebaud
3e2314dd67 forgot to include this file in previous commit (needed for lapack) 2011-02-18 11:32:39 +01:00
Gael Guennebaud
444c1bc55b now cholmod, umfpack, and superlu uses our own BLAS and LAPACK libs 2011-02-18 11:26:31 +01:00
Gael Guennebaud
390724b4b6 add lapack interface to real symmetric eigenvalue dec and enable building of the lapack shared library 2011-02-18 11:25:04 +01:00
Gael Guennebaud
d8ca948148 it is now up to user of these Find* module to find and link to BLAS and/or LAPACK 2011-02-18 11:23:27 +01:00
Gael Guennebaud
3345ea0ddd clean a bit SuperLU declarations 2011-02-18 10:23:32 +01:00
Gael Guennebaud
9195a224f3 fix division by zero if the matrix is exactly zero 2011-02-17 19:39:57 +01:00
Gael Guennebaud
b8ef48c46d for consistency forward declare tan, asin, acos functors 2011-02-17 18:23:04 +01:00
Gael Guennebaud
a53a7d6e6a use C linkage for umfpack (might fix some linking issues) 2011-02-17 18:19:28 +01:00
Gael Guennebaud
eda59ffc1b mention std::ptr_fun in the quickref guide 2011-02-17 18:07:21 +01:00
Gael Guennebaud
6f86c12339 typo 2011-02-17 17:48:16 +01:00
Gael Guennebaud
aea630a98a factorize implementation of standard real unary math functions, and add acos, asin 2011-02-17 17:37:11 +01:00
Gael Guennebaud
2ba55e90db make check no test everything - also rm the EigenTesting cmake sub-project 2011-02-17 16:58:18 +01:00
Benoit Jacob
d0b8ce8f2a fix unused var warning 2011-02-17 09:41:17 -05:00
Gael Guennebaud
1c4e85ac7e forgot to include this file in one pretty old commit (missing EXCLUDE_FROM_ALL) 2011-02-17 15:33:35 +01:00
Jitse Niesen
78fa34e8ff Add blas tests for buildtests target. 2011-02-17 13:53:20 +00:00
Benoit Jacob
8fb27fad36 remove #include <iostream> at the wrong place 2011-02-17 07:47:05 -05:00
Jitse Niesen
be224d93f4 Include necessary header files when working around bug #89.
Fixes bug #188.
2011-02-17 11:51:48 +00:00
Benoit Jacob
11402edfd3 with old gcc (bug #89), only include iostream in debug mode 2011-02-16 12:01:47 -05:00
Gael Guennebaud
fe8a710a21 properly report OpenGL as a disabled backend 2011-02-16 18:01:06 +01:00
Gael Guennebaud
03d86ea736 fix intallation of unsupported modules 2011-02-16 17:59:35 +01:00
Benoit Jacob
13a5582835 undo debugging change 2011-02-16 09:18:48 -05:00
Benoit Jacob
59596efdf7 Fix bug #89: on GCC <= 4.3, use a custom assert implementation to work around a compiler bug 2011-02-16 08:50:19 -05:00
Jitse Niesen
6db8fa7d04 Replace unset() by set() with no value specified; this does the same.
unset() was introduced in CMake 2.6.3 but we require only 2.6.2.
2011-02-16 10:16:47 +00:00
Gael Guennebaud
2f15f74218 CTEST_CUSTOM_* parameter have to be put in a CTestCustum.cmake file which itself has to be in the build directory 2011-02-15 12:39:45 +01:00
Gael Guennebaud
578d6f7ced now ctest does compile the test even though they are not in the "all" target 2011-02-15 11:40:43 +01:00
Gael Guennebaud
a1d7e9051e fix bug #184 (warning) 2011-02-14 15:41:00 +01:00
Gael Guennebaud
8e0a42350d fix stupid warning (bug #185) 2011-02-14 15:33:26 +01:00
Hauke Heibel
ac465a0891 Improve the Transform interface in order to prevent T.rotation() = R from compiling. 2011-02-14 12:00:47 +01:00
Jitse Niesen
211e1f8044 Improve documentation of plugins. 2011-02-13 22:50:57 +00:00
Benoit Jacob
d09b94e2ad Added tag 3.0-beta3 for changeset 58986ac832 2011-02-12 18:57:10 -05:00
Benoit Jacob
58986ac832 bump 2011-02-12 18:57:04 -05:00
Jitse Niesen
8bca23bbec Mention comma initializer can be used to concatenate vectors
(inspired by a question on IRC)
2011-02-12 23:17:31 +00:00
Hauke Heibel
1a6597b8e4 MSVC does not like using uninitialized SSE variables, so we have to pass all zeros. 2011-02-12 21:29:16 +01:00
Hauke Heibel
509ca63543 Merge 2011-02-12 18:50:53 +01:00
Hauke Heibel
beb03032b7 Disabled warning regarding the use of uninitialized variables on MSVC. 2011-02-12 18:48:57 +01:00
Jitse Niesen
9ac68e40a0 Write topic page for storage orders. 2011-02-12 17:43:29 +00:00
Hauke Heibel
7015aa00a9 Added configuration file for the 'eol' extension. 2011-02-12 18:38:56 +01:00
Gael Guennebaud
9d2bf35a05 implement optimized ploadu for MSVC10: this also fix bad code generation in gebp_kernel :) 2011-02-12 16:40:09 +01:00
Gael Guennebaud
ec7409b16e since gebp_kernel handled the scaling by alpha it used too many packets, this patch fix that. 2011-02-12 14:17:52 +01:00
Benoit Jacob
f7e4602a40 doc fixes 2011-02-11 09:55:54 -05:00
Hauke Heibel
bf79a3199c Reduced error traces when mixing matrices with different scalar types. 2011-02-11 09:41:48 +01:00
Gael Guennebaud
fe70113fab fix Transform documention regarding Mode 2011-02-10 18:58:37 +01:00
Benoit Jacob
f3b81302cd fix typo 2011-02-10 11:06:01 -05:00
Benoit Jacob
57b22204db document the eigen2 support stages 2011-02-10 10:55:22 -05:00
Benoit Jacob
6a5a13e394 The pfirst hack is needed also on msvc 2010 as it gets completely nuts, even though it doesnt segfault as msvc 2008 did 2011-02-09 15:13:23 -05:00
Benoit Jacob
63626bb966 remove debug #error 2011-02-09 14:37:52 -05:00
Benoit Jacob
85f9fab003 back out changeset efdf2e4056
. It turns out that the SSE3 header is always included, even without any SSE enabled, so it was making us wrongly use SSE3 paths. Backing this out fixes msvc related crashes, at least bug #165.
2011-02-09 14:01:26 -05:00
Gael Guennebaud
d6c4ca4845 fix redundancy 2011-02-09 13:44:05 +01:00
Gael Guennebaud
c0d5131435 workaround gcc 4.2.1 ICE (fix bug #145) 2011-02-09 13:04:35 +01:00
Gael Guennebaud
40526e24b4 fix memory leak (when conservatively resizing vectors of dynamically allocated scalar types such as bugnums) 2011-02-07 19:52:16 +01:00
Benoit Jacob
ba9f6a2c3b now random<integer types> spans over 0..RAND_MAX, or -RAND_MAX/2..RAND_MAX/2 for signed types, or the most significant bits for smaller integer types. 2011-02-07 10:55:41 -05:00
Benoit Jacob
3386a946f8 fix unit tests for integer types in preparation for next changeset making random<int> span over a much bigger range 2011-02-07 10:54:50 -05:00
Benoit Jacob
68a2e04a96 fix fuzzy compares for integer types, using a selector 2011-02-07 10:53:17 -05:00
Gael Guennebaud
c5c8efa575 workaround gcc 4.2 and 4.3 compilation issue with NEON 2011-02-07 16:41:21 +01:00
Benoit Jacob
9105e62d0a introduce EIGEN_MAKING_DOCS to tell whether we're compiling the docs examples 2011-02-06 12:51:42 -05:00
Benoit Jacob
02ee26a3a5 fix build of class Block examples 2011-02-06 12:43:01 -05:00
Benoit Jacob
182ed9ba6c merge 2011-02-06 11:57:31 -05:00
Benoit Jacob
bc6625ab87 fix const correctness in Diagonal::coeffRef (fix found by failtests) 2011-02-06 11:57:04 -05:00
Benoit Jacob
dab4e583cb fix EIGEN_STATIC_ASSERT_LVALUE (fix found by failtests) 2011-02-06 11:56:33 -05:00
Benoit Jacob
80500b693c add more failtests 2011-02-06 11:55:51 -05:00
Hauke Heibel
d975b82105 Removed internal::as_argument. This fixes the alignment issues of bug #165. 2011-02-06 17:33:04 +01:00
Hauke Heibel
7ea6ac79a3 Exposed failtetst publicly. 2011-02-06 13:43:08 +01:00
Gael Guennebaud
ea99880760 fix under- and overflow 2011-02-06 08:23:10 +01:00
Benoit Jacob
9ce08b352f add more failtests 2011-02-06 01:44:51 -05:00
Benoit Jacob
9b13e9aece failtest: a new cmake-based test suite for testing stuff that should fail to build. This first batch imports some const correctness checks from bug #54. 2011-02-05 18:57:29 -05:00
Hauke Heibel
8aee724274 Made MatrixBase::BasisReturnType const. 2011-02-05 15:53:17 +01:00
Hauke Heibel
6c3dc0d243 Fix Diagonal related const correctness issues. 2011-02-05 14:19:53 +01:00
Hauke Heibel
e20f1a44bb Fixed hidden const correctness issue. 2011-02-05 13:52:18 +01:00
Jitse Niesen
e2d46eac42 Remove all references to EIGEN_TUNE_CPU_CACHE_SIZE.
This macro is no longer used as of revision 0212eec23f
.
2011-02-04 22:33:53 +01:00
Thomas Capricelli
0b555a4a3d fix misc warnings 2011-02-04 13:55:12 +01:00
Thomas Capricelli
0ed604583f turnaround for a compiler bug in gcc 3.4.6 2011-02-04 12:09:30 +01:00
Gael Guennebaud
aee4e950d3 extend ctest script for SSSE3 and above 2011-02-03 18:04:43 +01:00
Gael Guennebaud
5887a086cf fix SSE3 issue (infinite loop after the ei_ => internal change) - this fix bug #174 2011-02-03 17:55:24 +01:00
Gael Guennebaud
1526de96a0 fix compilation with MSVC 2011-02-03 17:23:33 +01:00
Benoit Jacob
4489c56c9e add Map static methods taking Strides, add test checking for compilation errors 2011-02-03 10:05:45 -05:00
Gael Guennebaud
2e2614b0fd fix MSVC8 compilation 2011-02-03 15:40:48 +01:00
Gael Guennebaud
2f71277105 add global tan function 2011-02-03 14:45:21 +01:00
Jason Newton
d028262e06 add tan function in Array world 2011-02-03 14:34:40 +01:00
Gael Guennebaud
1eae6d0fb9 an even more stable procedure 2011-02-03 11:25:34 +01:00
Gael Guennebaud
5beb2f4f0d slightly more stable eigen vector computation 2011-02-03 10:31:45 +01:00
Gael Guennebaud
a617d7f2ad fix compilation with MSVC2005 (strange, stupid fixes for MSVC9 confuse MSVC8....) 2011-02-02 17:47:48 +01:00
Gael Guennebaud
52e0a44034 implement GBMV 2011-02-02 11:39:13 +01:00
Gael Guennebaud
d5f6819761 split BandMatrix to a base and a wrapper class 2011-02-02 11:38:08 +01:00
Gael Guennebaud
8915d5bd22 fix 168 : now TriangularView::solve returns by value making TriangularView::solveInPlace less important.
Also fix the very outdated documentation of this function.
2011-02-01 17:21:20 +01:00
Gael Guennebaud
59af20b390 extend nomalloc test 2011-02-01 16:46:35 +01:00
Gael Guennebaud
ffc8386fdb mark the packet access methods as internal 2011-02-01 16:14:53 +01:00
Gael Guennebaud
a486d5590a implement optimized path for selfadjoint rank 1 update (safe regarding dynamic alloc) 2011-02-01 15:49:10 +01:00
Benoit Jacob
3eb74cf9fc forgot hg add 2011-02-01 07:51:55 -05:00
Gael Guennebaud
fa32ce0fc5 fix alignment issue 2011-02-01 13:51:56 +01:00
Benoit Jacob
2d09b11a97 relax Matrix/Array(Index) ctors to allow size 0, add test. 2011-02-01 07:46:02 -05:00
Gael Guennebaud
faa1284c12 fix compilation of snippets 2011-02-01 13:28:14 +01:00
Gael Guennebaud
4cb9d0f943 notify the creation of manual temporaries 2011-02-01 11:41:52 +01:00
Gael Guennebaud
c60818fca8 fix trmv regarding strided vectors and static allocation of temporaries 2011-02-01 11:38:46 +01:00
Gael Guennebaud
0fdd01fe24 operator(int) and the likes are not only fine for linear storage 2011-02-01 11:09:02 +01:00
Gael Guennebaud
f4a7679904 fix packing criterion 2011-02-01 10:41:12 +01:00
Gael Guennebaud
f46ace61d3 fix dynamic allocation for fixed size objects in matrix-vector product 2011-01-31 21:30:27 +01:00
Benoit Jacob
5ca407de54 update .hgignore 2011-01-31 09:21:31 -05:00
Benoit Jacob
dc22ae101f kill stage 15, it's useless 2011-01-31 09:18:49 -05:00
Benoit Jacob
df06f0be31 eigen2 support: pass remaining 2 tests 2011-01-31 08:55:38 -05:00
Benoit Jacob
7032ec80ae eigen2support: disable sparse tests, and do not require to define YES_I_KNOW_NOT_STABLE 2011-01-31 08:44:49 -05:00
Benoit Jacob
374deaed5f make eigen2 eigensolver test pass 2011-01-31 08:36:14 -05:00
Gael Guennebaud
e2642ed620 clean the script to generate the plots 2011-01-31 12:45:18 +01:00
Gael Guennebaud
3874e6a72b include cblas.h header file to ease configuration 2011-01-31 11:02:59 +01:00
Gael Guennebaud
476cb4c65c fix name collision 2011-01-31 10:54:21 +01:00
Gael Guennebaud
9a73bfeb85 add GOTO2 and clean a bit the cmake macros 2011-01-31 10:45:03 +01:00
Gael Guennebaud
6e67d15795 now gemv supports strides 2011-01-30 08:17:46 +01:00
Hauke Heibel
157a5040d5 Added the /bigobj flag in order to enable compilation with MSVC when EIGEN_SPLIT_LARGE_TESTS is not set. 2011-01-29 14:35:24 +01:00
Benoit Jacob
a1f5ea8954 make eigen2 cholesky test pass 2011-01-28 13:04:23 -05:00
Benoit Jacob
e001db2a15 fix bug in triangular matrix-vector produce found by eigen2 tests! 2011-01-28 13:04:11 -05:00
Gael Guennebaud
852077fbc9 still test fftw even if the binary for long double is not available 2011-01-28 16:54:01 +01:00
Gael Guennebaud
c478e0039e disable broken determinant for complexes and SuperLU 2011-01-28 16:30:21 +01:00
Benoit Jacob
6f2ba1f52b typo reported by Don Lorenzo 2011-01-28 10:00:34 -05:00
Gael Guennebaud
817d86cbaf really fix permute_symm_to_symm for sparse complex matrix 2011-01-28 15:51:55 +01:00
Gael Guennebaud
6ec660ca7e fix crash in autodiff 2011-01-28 15:30:33 +01:00
Gael Guennebaud
af712e80e6 fix bug #73: weird compilation error in HouseholderSequence where double and float were mixed. Hopefuly this also solve bug #91... 2011-01-28 12:35:32 +01:00
Gael Guennebaud
d76ed18a9f rm useless ctor 2011-01-28 11:25:11 +01:00
Gael Guennebaud
1731a432e7 fix BTL cholesky action and output errors if the factorization failed 2011-01-28 11:24:18 +01:00
Gael Guennebaud
837f1ae59c fix compilation with old gcc 2011-01-28 11:23:02 +01:00
Gael Guennebaud
ddfd288dc9 start nighlty builds at 00:00:00 UTC 2011-01-28 10:33:02 +01:00
Gael Guennebaud
42d512d33c fix compilation with gcc 4.2 and older 2011-01-28 10:26:05 +01:00
Gael Guennebaud
97801e5e0e Eigen/Eigen should not include Sparse until it is API stable 2011-01-28 10:04:02 +01:00
Gael Guennebaud
736d00ab87 typo 2011-01-28 09:57:35 +01:00
Gael Guennebaud
162d29e696 fix compilation of sparse module with ICC 2011-01-28 09:55:32 +01:00
Thomas Capricelli
22db1a6e82 fix fftw test 2011-01-27 18:25:41 +01:00
Benoit Jacob
b2b8c6a89c dot() now always uses eigen3 convention, even in eigen2 support mode, even stage 10. Didn't have a choice as lots of eigen code is using it. 2011-01-27 12:04:26 -05:00
Gael Guennebaud
e761ba68f7 merge 2011-01-27 18:03:13 +01:00
Gael Guennebaud
3d8e179aa2 fix MaxCols in ComplexEigenSolver which was causing memory allocation instead of static allocation in the nomalloc test. Uncomment commenetd parts of the nomalloc test since now matrix-matrix products are safe. 2011-01-27 18:02:49 +01:00
Gael Guennebaud
32124bc64a EIGEN_YES_I_KNOW_SPARSE_MODULE_IS_NOT_STABLE_YET must be defined to use Eigen/Sparse 2011-01-27 17:36:58 +01:00
Benoit Jacob
52fed69baa add test for geometry with eigen2_ prefixes. fix that stuff. 2011-01-27 11:21:38 -05:00
Gael Guennebaud
955e096277 add an Options template parameter to Hyperplane and ParametrizedLine 2011-01-27 17:17:06 +01:00
Hauke Heibel
d5e81d866a Added regression tests for bug #148. 2011-01-27 16:37:06 +01:00
Benoit Jacob
fd400ffffb reverse order of testing for eigen2 support stages. Higher stages now have priority. So if your whole project builds with say stage 10, you can manually enable stage 20 for selected files. 2011-01-27 10:34:44 -05:00
Benoit Jacob
b69b6a9db2 add Threshold API to FullPivHouseholderQR 2011-01-27 10:17:52 -05:00
Gael Guennebaud
a954a0fbd5 Add an Options template paramter to Transform to enable/disable alignment 2011-01-27 16:07:33 +01:00
Jakob Schwendner
e3306953ef test case for unaligned quaternion 2011-01-27 09:14:30 -05:00
Christoph Hertzberg
0aa752fc4f add quaternion Options, add unaligned possibility 2011-01-27 09:14:22 -05:00
Gael Guennebaud
9ccd16609c fix twisted selfadjoint to selfadjoint (conjugation issue) 2011-01-27 14:39:01 +01:00
Gael Guennebaud
f5d0f115b4 EigenSolver is now in the Eigenvalues modules, not QR !
: Enter commit message.  Lines beginning with 'HG:' are removed.
2011-01-27 13:56:03 +01:00
Gael Guennebaud
255f2a1379 fix various compilations issues 2011-01-27 13:51:39 +01:00
Gael Guennebaud
999678c3f0 fix mixingtypes unit test 2011-01-27 13:51:17 +01:00
Eamon Nerbonne
40998f5e86 fix const-related compiler error on MSC. 2011-01-27 07:43:07 -05:00
Gael Guennebaud
5f03cbd44f fix many missing const in return types 2011-01-27 12:12:24 +01:00
Gael Guennebaud
e8d6a5ca87 fix cross product for complexes and add support for mixed real-complex cross products 2011-01-27 11:33:37 +01:00
Gael Guennebaud
0bfb78c824 allow mixed complex-real and real-complex dot products 2011-01-27 09:59:19 +01:00
Benoit Jacob
fe3bb545e0 allow matrix[index] in EIGEN2_SUPPORT 2011-01-26 20:22:33 -05:00
Gael Guennebaud
c90d0c363b improve automatic handling of gotoblas and atlas 2011-01-26 19:39:10 +01:00
Gael Guennebaud
0e8a532f87 always link to gfortran for gotoblas, it seems to be harmless for 1.x but needed for 2.x 2011-01-26 19:16:06 +01:00
Gael Guennebaud
240bfdd142 finish the move to Eigen3 in BTL, and let's use our own FindEigen3.cmake script 2011-01-26 19:12:35 +01:00
Gael Guennebaud
86acb46518 pass to eigen3 ;) 2011-01-26 18:41:06 +01:00
Gael Guennebaud
faeae169dd fix compilation 2011-01-26 17:58:17 +01:00
Gael Guennebaud
210a280daf update FindMKL to match the default installation behavior of MKL 11 2011-01-26 17:58:01 +01:00
Gael Guennebaud
1eb85b4cf1 allow the possibility to automatically call or not the ctors on a per scalar type basis, and disable automatic initialization of std::complex<> 2011-01-26 17:56:49 +01:00
Gael Guennebaud
4783748953 do not include reference lapack files if they are not there 2011-01-26 17:10:05 +01:00
Benoit Jacob
162cb8ff42 import back LeastSquares into eigen2support. Pass most of eigen2's 'regression' test, except for regression_4 which is about complex numbers. 2011-01-26 11:05:41 -05:00
Gael Guennebaud
98285ba81c merge 2011-01-26 16:36:07 +01:00
Gael Guennebaud
7ef9d82b39 add a minimalistict lapack wrapper 2011-01-26 16:34:45 +01:00
Gael Guennebaud
15ef62ca43 extend PermutationMatrix and Transpositions to support arbitrary interger types and to support the Map/Wrapper model via base and derived classes 2011-01-26 16:33:23 +01:00
Benoit Jacob
76c630d185 eigen2 support: import SVD back, pass SVD tests 2011-01-26 10:33:03 -05:00
Benoit Jacob
313eea8f10 fix the remainder of bug #159 2011-01-26 10:01:18 -05:00
Benoit Jacob
f88ca0ac79 fix the eigen3 part of bug #159 - build issue with selfadjointview 2011-01-26 09:49:06 -05:00
Benoit Jacob
9a5ded3e1d fix bug #160 - forgot hg add 2011-01-25 21:31:27 -05:00
Benoit Jacob
c350f6f12c fix bug #161 2011-01-25 21:28:20 -05:00
Benoit Jacob
39536d44da fix build 2011-01-25 21:24:31 -05:00
Benoit Jacob
1d98cc5e5d eigen2 support: implement part<SelfAdjoint>, mimic eigen2 behavior braindeadness-for-braindeadness 2011-01-25 21:22:04 -05:00
Benoit Jacob
4fbadfd230 merge 2011-01-25 11:19:54 -05:00
Benoit Jacob
07e3ef4f38 eigen2: pass QR decomposition and hyperplane tests 2011-01-25 11:19:26 -05:00
Gael Guennebaud
6896cab5b9 one more const missing 2011-01-25 16:52:40 +01:00
Gael Guennebaud
28d6e84150 fix compilation after recent const change in return types 2011-01-25 16:33:02 +01:00
Benoit Jacob
b1d6a9945c eigen2: pass the inverse test 2011-01-25 10:05:29 -05:00
Benoit Jacob
09d1923f61 eigen2: pass lu test 2011-01-25 10:02:36 -05:00
Benoit Jacob
3e2469f951 eigen2: split tests 2011-01-25 09:02:59 -05:00
Benoit Jacob
b04591fbb4 disable eigen2_first_aligned test, it's completely internal stuff 2011-01-25 08:38:22 -05:00
Benoit Jacob
acd2c82655 fix eigen2_bug_132 test 2011-01-25 08:37:32 -05:00
Benoit Jacob
8acd43bbdb let eigen2 tests use the same ei_add_test macro, which required to prefix them with eigen2_ ; rename buildtests_eigen2 to eigen2_buildtests, etc. 2011-01-25 08:37:18 -05:00
Benoit Jacob
dcfb58f529 eigen2: fix USING_PART_OF_NAMESPACE_EIGEN 2011-01-25 08:03:12 -05:00
Gael Guennebaud
84448b058c fix USING_PART_OF_NAMESPACE_EIGEN to export ei_ prefixed math functions 2011-01-25 09:35:49 +01:00
Gael Guennebaud
7dd4aaba9f fix missing const qualifier in cwiseEqual 2011-01-24 18:49:18 +01:00
Benoit Jacob
bd12ac4ffc import eigen2 Geometry module into Eigen2Support.
fix build of geometry tests
2011-01-24 11:21:58 -05:00
Benoit Jacob
5bfde30e48 fix compilation of array tests 2011-01-24 09:38:50 -05:00
Benoit Jacob
9089488210 fix compilation of Eigen/Geometry with EIGEN2_SUPPORT: was including non-existent header 2011-01-24 08:59:47 -05:00
Benoit Jacob
c3a4f6b5c5 const-qualify template parameters representing const arguments to expressions.
needed to fix docs compile issue.
2011-01-24 08:27:06 -05:00
Benoit Jacob
5331fa3033 fix compilation of LU class example 2011-01-24 07:41:47 -05:00
Benoit Jacob
1dabd133cc pass eigen2's triangular test 2011-01-23 21:53:28 -05:00
Benoit Jacob
5c82fd7f40 Move part() to EIGEN2_SUPPORT (had been deprecated for a long time) 2011-01-23 18:49:36 -05:00
Benoit Jacob
1cf4996d3c make eigen2 visitor test pass 2011-01-23 18:34:30 -05:00
Benoit Jacob
8df5bca979 rename build stages to multiples of 10; old stage 2 becomes stage 15, while stage 20 generates errors (instead of warnings) on conflicting API. 2011-01-23 18:22:18 -05:00
Benoit Jacob
cc1f70abc3 make eigen2 dynalloc test pass (add to eigen2 support some internal stuff that some users may have been relying on) 2011-01-21 10:47:31 -05:00
Benoit Jacob
30de1651d3 relax Map const correctness in eigen2 support stages <= 3
introduce new 'strict' stage 4
2011-01-21 10:42:19 -05:00
Benoit Jacob
54dfcdf86e remove eigen2 vectorization_logic test, it's not an API test 2011-01-21 10:29:43 -05:00
Benoit Jacob
5be269db88 make eigen2 submatrices test pass 2011-01-21 10:24:59 -05:00
Benoit Jacob
cc2b7a5397 introduce the 3 stages of eigen2 support, writing to the mailing list about that in Eigen2 to Eigen3 Migration Path thread 2011-01-21 09:51:03 -05:00
Benoit Jacob
34d93686db lots more EIGEN2_SUPPORT fixes. Now several of the most important core tests build and succeed. 2011-01-20 10:36:32 -05:00
Benoit Jacob
66a2ffa9bd Completely disable Eigen/Array in Eigen3; completely enable in EIGEN2_SUPPORT. 2011-01-20 08:12:24 -05:00
Benoit Jacob
96f08213f7 big eigen2support fix, aimed at users who relied on internal eigen2 stuff: now we dont need customizations in test/eigen2/main.h anymore.
These tests already build:
eigen2_basicstuff
eigen2_adjoint
eigen2_linearstructure
eigen2_prec_inverse_4x4
2011-01-19 11:01:07 -05:00
Benoit Jacob
bf0cffa897 restore the behavior of defaulting to Release build type 2011-01-19 10:15:36 -05:00
Benoit Jacob
1f6bd2915d import eigen2 test suite. enable by defining EIGEN_TEST_EIGEN2
only test_prec_inverse4x4 is fixed at the moment. now need to go over all those tests.
2011-01-19 10:10:54 -05:00
Benoit Jacob
604afc9aca fix bug #155, const-related compilation error 2011-01-18 09:14:14 -05:00
Hauke Heibel
9b2546fea8 Added remaining const coeffRef accessors to Array- and MatrixWrapper. 2011-01-18 13:19:13 +01:00
Benoit Jacob
c7eaca50a0 __cpuidex is not (always) present in VS 2008 + SP1, it seems 2011-01-17 11:17:45 -05:00
hamelin.philippe
5e28f34005 Replace CMAKE_SOURCE_DIR with PROJECT_SOURCE_DIR to allow the cmake project to be included by a root project. 2011-01-17 09:59:40 -05:00
Gael Guennebaud
5010033d88 do not stop the factorization if one pivot is exactly 0, and return the
index of the first zero pivot if any
2011-01-17 11:11:22 +01:00
Gael Guennebaud
ef3e690a0c return the index of the first non positive diagonal entry (more useful than simply true or false) 2011-01-17 11:09:03 +01:00
Gael Guennebaud
8b6c1caa3e fix compilation of rowmajor sparse time diagonal 2011-01-14 20:29:55 +01:00
Thomas Capricelli
dcbf091e60 fix EIGEN_TEST_NOQT (reported by Philippe Hamelin) 2011-01-14 14:30:06 +01:00
Jose Luis Blanco
cbfab7204f Update of CPUID macros to fix segfaults in amd64 code. 2011-01-05 02:43:43 +01:00
Benoit Jacob
98f0274305 third pass of const-correctness fixes (bug #54), hopefully the last one... 2011-01-07 05:16:01 -05:00
Gael Guennebaud
c7baf07a3e add plugin mechanism to sparse objects 2011-01-07 15:53:02 +01:00
Jitse Niesen
9111d73017 Fix compilation error in HouseholderSequence introduced in my previous commit. 2011-01-07 13:46:23 +00:00
Romain Bossart
4abb772b52 Fix bug #38
* address of temporaries were passed to umfpack_zi_* functions. It is ok with g++-4.4 or 4.5, but not with the -std=c++0x in both versions. This patch makes it work for c++98 and c++0x versions
2011-01-07 10:27:22 +01:00
Jitse Niesen
2cc75f4922 Make HouseholderSequence::setTrans() protected (cf. bug #50).
Users can call .transpose() instead.
2011-01-06 11:30:19 +00:00
Manuel Yguel
934720c4ba Decrease the degree of the polynomials being tested to reduce time spent during the tests. 2011-01-05 19:49:13 +01:00
Hauke Heibel
4ba0ec5e0e Fixed #148 where a const-accessor for coefficients was missing in the MatrixWrapper. 2011-01-04 15:35:50 +01:00
Gael Guennebaud
d7e1eeaece fix compilation when defaulting to row major 2011-01-04 14:40:06 +01:00
Gael Guennebaud
3a4d56171d fix openglsupport unit test when defaulting to row major 2011-01-04 14:34:17 +01:00
Gael Guennebaud
64356a622d fix vectorization_logic unit test when defaulting to row major 2011-01-04 14:18:07 +01:00
Jitse Niesen
004488a31d Fix bug in symmetric rank-2 update for row-major matrices (bug #144). 2011-01-04 10:35:39 +00:00
Jitse Niesen
fb023b871f Const-correctness fix for gemv_selector<OnTheRight,ColMajor,true> (bug #144). 2011-01-04 10:35:10 +00:00
Benoit Jacob
fd4e366d7e fix severe perf bug: coeff-based matrix products were not considered aligned, typically preventing vectorization.
added unit test.
2011-01-02 12:07:39 -05:00
Jitse Niesen
47a9d2ed54 Document HouseholderSequence.
Incomplete: I did not explain the difference between OnTheLeft and OnTheRight,
and there is only one example.
2011-01-02 16:59:44 +00:00
Gael Guennebaud
583f963517 make the table fit within 80 characters 2011-01-01 12:02:55 +01:00
Gael Guennebaud
e7318148b5 an attempt to fix a compilation issue with -std=c++0x 2011-01-01 11:40:30 +01:00
Jose Luis Blanco
7feb644620 Switched "MESSAGE(" -> "MESSAGE(STATUS " in CMake script, since otherwise they may look like errors to the user. 2010-12-29 22:02:01 +01:00
Gael Guennebaud
902af035d3 merge 2010-12-31 17:26:48 +01:00
Gael Guennebaud
25efcdd042 fix sparse time dense product with a rowmajor lhs 2010-12-31 17:11:17 +01:00
David J. Luitz
11e253bc10 [Sparse] Added regression tests for the two bugfixes, the code passes all sparse_product tests 2010-12-30 15:16:23 +01:00
Benoit Jacob
13867c15cc fix compilation of code using e.g. Transpose<const Foo>::data() non-const-qualified. Same problem existed for coeffRef() and also in MapBase.h. 2010-12-30 07:47:51 -05:00
Benoit Jacob
26c2afd55a fix compile errors in Tridiagonalization and in doc examples 2010-12-30 04:52:20 -05:00
Benoit Jacob
dbd9c5fd50 fix HouseholderSequence API, bug #50:
* remove ctors taking more than 2 ints
 * rename actualVectors to length
 * add length/shift/trans accessors/mutators
2010-12-30 04:18:40 -05:00
Trevor Irons
e112ad8124 In QuickRefPage LinSpaced is improperly documented. 2010-12-29 10:08:41 -07:00
Jitse Niesen
d6a5ba5a08 Rename EIGEN_DENSESTORAGEBASE_PLUGIN to EIGEN_PLAINOBJECTBASE_PLUGIN. 2010-12-29 19:12:39 +00:00
Jose Luis Blanco
3ca31a8b74 fixed msvc9 build errors. 2010-12-29 19:42:01 +01:00
Jitse Niesen
d84b135ed3 Enable GSL tests (reverts part of changeset 6628534eb5
).
2010-12-29 17:45:18 +00:00
Jose Luis Blanco
97c54ad220 fix MSVC warnings, bug #143 2010-12-29 06:15:41 -05:00
Thomas Capricelli
7a29ae0b5c fix preprocessor checks for availability of cpuid 2010-12-28 13:46:39 +01:00
Jitse Niesen
657013c974 Mention ptr_fun in docs for .unaryExpr() 2010-12-27 16:35:25 +00:00
Jitse Niesen
265e1ef4ef Extend doc page on preprocessor directives. 2010-12-27 16:34:58 +00:00
Jitse Niesen
8db9acbc16 Move doxygen comments for EIGEN_NO_DEBUG from source to I14.
This reverts changeset 76fbe94279
. Benoit and I agree that my
approach there (to use doxygen comments) pollutes the code too much.
2010-12-27 15:07:11 +00:00
Jitse Niesen
840c4e1ab5 Move section on preprocessor directives from I00 to its own page. 2010-12-27 15:07:07 +00:00
Jitse Niesen
42a050dc68 Finish doc page on aliasing. 2010-12-27 15:06:55 +00:00
Benoit Jacob
dc3618a557 move BandMatrix and TridiagonalMatrix to the internal:: namespace 2010-12-25 17:17:10 -05:00
Benoit Jacob
8d2a10c5c1 more renaming to make this file matrix-or-array-agnostic 2010-12-25 17:04:36 -05:00
Benoit Jacob
e8768251db rename macro 2010-12-25 17:01:01 -05:00
Benoit Jacob
86d3711fb7 remove EIGEN_REF_TO_TEMPORARY, clarify docs 2010-12-25 16:45:25 -05:00
Benoit Jacob
75b7d98665 bug #54 - really fix const correctness except in Sparse 2010-12-22 17:45:37 -05:00
Hauke Heibel
3b6d97b51a Re-enabled the BLAS compilation on non-MSVC systems. 2010-12-17 10:52:57 +01:00
Hauke Heibel
5e46f7a499 Switched back to the old behaviour where EIGEN_SPLIT_LARGE_TESTS was ON per default on MSVC systems.
Without splitting these tests, some do not compile
2010-12-17 09:42:17 +01:00
Gael Guennebaud
a21d56b766 disable blas if C++ compiler is MSVC 2010-12-16 20:51:44 +01:00
Hauke Heibel
efdf2e4056 Added automatic SSE3/4.1/4.2 support for MSVC. 2010-12-16 20:08:22 +01:00
Hauke Heibel
b31e1246e1 Re-enabled the missing tests, again... 2010-12-16 19:07:23 +01:00
Hauke Heibel
83e3c4582f Improved the array unit test - internal::isApprox needs to use the same precision as VERIFY_IS_NOT_APPROX.
Removed debug code from test_isApprox.
2010-12-16 18:53:02 +01:00
Hauke Heibel
2d0dfe5d60 Uups - re-enabled subtests 1 to 5. 2010-12-16 17:36:10 +01:00
Hauke Heibel
f578dc7aff Fixed compound subtraction in ArrayBase where the assignment needs to be carried out on the derived type.
Added unit tests for map based component wise arithmetic.
2010-12-16 17:34:13 +01:00
Hauke Heibel
dbfb53e8ef Added unit test for matrix creation from const raw data. 2010-12-15 15:28:43 +01:00
Hauke Heibel
6f5c45ceff Fixed ctor from const raw data for Matrices and added the missing implementation for Arrays.
Fixed a warning regarding the conversion from int to bool in MapBase.
2010-12-15 15:19:51 +01:00
Gael Guennebaud
6a9a6bbc78 fix warning 2010-12-13 10:18:33 +01:00
Gael Guennebaud
68fe80861c Fix bug #133: remove the EIGEN_RESTRICT which was useless here anyway 2010-12-13 09:56:13 +01:00
Jitse Niesen
f2c18f2e37 merge 2010-12-12 21:24:24 +00:00
Jitse Niesen
4a5ebcd1ce Fix compilation of Tridiagonalization_diagonal example.
After changeset 0d63212257
, matrixT() is a real matrix even if the matrix
which is decomposed is complex.
2010-12-12 13:53:42 +00:00
Gael Guennebaud
c7f01157dd enforce compilation of blas unit tests when running ctest 2010-12-12 13:10:00 +01:00
Jitse Niesen
9cd4f67e7f Specify root namespace for fftw_plan from FFTW3 library.
After changeset 4716040703
 (the ei_ --> internal:: change), there are two symbols
called fftw_plan, one from the FFTW3 library and one from Eigen.
2010-12-12 11:44:30 +00:00
Konstantinos Margaritis
e05c79cbd8 Fixed NEON compilation errors, changed float-abi back to softfp (which is the most used right now).
Some complex tests appear to segfault, needs a more careful look.
2010-12-10 20:27:46 +02:00
Benoit Jacob
b11343e15c fix intermittend failure of schur_real test: there only is an iterative process if size>2 2010-12-10 02:10:03 -05:00
Benoit Jacob
74cc42b22f bug #54 - The big Map const-correctness changes 2010-12-10 02:09:58 -05:00
Gael Guennebaud
e736df3edd suppress stupid warning 2010-12-10 15:53:13 +01:00
Gael Guennebaud
79cc86f701 fix compilation 2010-12-10 13:52:47 +01:00
Gael Guennebaud
67c28570e3 fix compilation with ICC (template keyword on a non template method) 2010-12-10 10:05:52 +01:00
Gael Guennebaud
5bc21c25c5 fix ICE with gcc 3.4 and 4.0.1 2010-12-10 09:59:44 +01:00
Gael Guennebaud
bacd531862 fix bug #128 : tridiagonalization failed for 1x1 matrices 2010-12-09 19:56:20 +01:00
Gael Guennebaud
17de59278b simplification 2010-12-09 19:47:02 +01:00
Gael Guennebaud
147a63c4b5 compilation fix 2010-12-09 19:46:26 +01:00
Gael Guennebaud
0b32c5bdda fix compilation of sparse_basic for DynamicSparseMatrix 2010-12-09 19:39:15 +01:00
Benoit Jacob
aec0782719 fix the build of eigensolver_complex test.
it was calling the .value() method on an inner product, and that was blocked in bad zero-sized case.

fixed by adding the .value() method to DenseBase for all 1x1 expressions, and allowing coeff accessors in ProductBase for 1x1 expressions.
2010-12-09 03:47:35 -05:00
Benoit Jacob
1be6449f2e fix bug #127. our product selection logic was flawed in that it used the Max-sized to determine whether the size is 1.
+ test.
2010-12-09 02:38:07 -05:00
Benoit Jacob
819bcbed19 fix comment 2010-12-07 02:17:15 -05:00
Eamon Nerbonne
7a7ca99a31 [mq]: Mingw32 fix
intrin.h is not required nor supported by mingw32.  It is present (and supported) on mingw-w64 builds, even those for 32-bit systems, but here too it's not required on 32-bit systems.  So if we're on mingw, and it's 64-bit, then and only then is the intrin.h inclusion necessary.
2010-12-03 23:24:06 +01:00
Gael Guennebaud
c49c013c47 add main ei_* functions into Eigen2Support 2010-12-03 11:22:35 +01:00
Gael Guennebaud
14208eb478 add a word about the ei_ prefix change in Eigen2 -> Eigen3 doc page. 2010-12-03 10:54:16 +01:00
Hauke Heibel
a289065c73 Applied a fix to our std::vector specialization which prevents the usage of workaround_msvc_stl_support when T is not a class. 2010-12-02 12:33:15 +01:00
Benoit Jacob
59b944cb50 add is_const 2010-12-01 09:22:54 -05:00
Benoit Jacob
46387cc180 remove makeconst_return_type 2010-12-01 09:22:50 -05:00
Hauke Heibel
f0ba513f41 Fixed compilation of tridiagonalization related unit tests. 2010-11-27 15:41:46 +01:00
Hauke Heibel
3899857e08 Removed remove_const_on_value_type since the meaning is unclear and it is in fact unused.
Extened the meta unit tests.
2010-11-26 18:06:08 +01:00
Hauke Heibel
60a544c879 Added STL like (add|remove)_const. Fixed add_const_on_value_type for "const T* const". 2010-11-26 16:56:03 +01:00
Hauke Heibel
bf9d25ce58 Postfixed add_const and remove_const by _on_value_type to express the differences to the STL. 2010-11-26 16:30:45 +01:00
Benoit Jacob
139392488d dos2unix 2010-11-26 10:10:26 -05:00
Jitse Niesen
e868b6736a Merge. 2010-11-26 14:37:58 +00:00
Gael Guennebaud
d551e99644 make HessenbergDecompositionMatrixHReturnType internal 2010-11-26 15:39:01 +01:00
Gael Guennebaud
e06c6553e0 make TridiagonalizationMatrixTReturnType internal and only export a public MatrixTReturnType typedef 2010-11-26 15:36:29 +01:00
Gael Guennebaud
0d63212257 add a TridiagonalizationMatrixTReturnType class to make Tridiagonalization::matrixT() more efficient and future proof. 2010-11-26 15:31:47 +01:00
Jitse Niesen
9bad7c7edb Compilation fix in case EIGEN_DEBUG_ASSERTS is defined. 2010-11-26 14:21:57 +00:00
Gael Guennebaud
421b2b5ff7 fix a couple of issues with TridiagonalMatrix 2010-11-26 13:04:20 +01:00
Gael Guennebaud
d8b26cfeec s/id/p to avoid name clash 2010-11-26 08:36:16 +01:00
Gael Guennebaud
156a31b0e9 fully implement scalar_fuzzy_impl<bool> as, e.g., the missing isMuchSmallerThan is convenient to filter out false values. 2010-11-25 18:00:30 +01:00
Jitse Niesen
010ed9510b Remove parentheses for compatibility with cmake 2.6.2 2010-11-24 22:26:13 +00:00
Benoit Jacob
cd1225ef14 make example compile 2010-11-24 09:18:49 -05:00
Benoit Jacob
f84cbba52a minor fixes 2010-11-24 09:16:30 -05:00
Benoit Jacob
07f2406dc1 some dox tweaks 2010-11-24 08:23:17 -05:00
Gael Guennebaud
f1690fb9fa fix bug #122 : rank 2 update test and scalar multiple extraction were both wrong 2010-11-23 19:19:04 +01:00
Benoit Jacob
0ab9a0a2f7 make UpperBidiagonalization internal: don't want to support it, it's not used.
Keeping it because it tests BandMatrix.
2010-11-23 11:12:42 -05:00
Benoit Jacob
ee38dbf1e6 Rework nested<> to be cleaner, see bug #76. 2010-11-23 11:11:40 -05:00
Frederic Gosselin
4c5932f8f5 Improves the filter for hidden files in "Eigen" and "Eigen/src".
This generic solution prevent cmake from having an error .svn folders when the source folder is under subversion.
2010-11-22 10:47:07 -05:00
Gael Guennebaud
5a65d7970a now the full blas folder requires a fortran compiler 2010-11-22 19:07:29 +01:00
Gael Guennebaud
3976a66889 fix bug #120 : compilation issue of trsolve unit test 2010-11-22 18:59:56 +01:00
Gael Guennebaud
f5f288b741 split level 1 and 2 implementation files into smaller ones and fix a couple of numerical and tricky issues discovered by the lapack test suite 2010-11-22 18:49:12 +01:00
Gael Guennebaud
a6f483e86b import reference BLAS routines which are not already implemented in Eigen : modified givens rotations, and packed and banded storages 2010-11-22 18:05:09 +01:00
Gael Guennebaud
7213dd1e6b this product still badly read the imaginary part on the diagonal 2010-11-22 18:00:47 +01:00
Benoit Jacob
a3f214ade9 holy crap, i had disabled all static asserts in 71f023de3e 2010-11-22 08:21:30 -05:00
Gael Guennebaud
d8396a8da0 fix compilation of product_mmtr 2010-11-21 10:23:06 +01:00
Gael Guennebaud
fb6d9ca951 add missing non const data() method to MapBase 2010-11-21 10:17:25 +01:00
Gael Guennebaud
0020ea544a implement HEMV level2 blas routine 2010-11-21 10:09:33 +01:00
Gael Guennebaud
12bfe5e718 make sure our internal selfadjoint*vector product does not use the imaginary part of the diagonal entries 2010-11-21 10:08:48 +01:00
Gael Guennebaud
e88901daf4 implement SYMV level2 blas routines 2010-11-21 09:34:41 +01:00
Gael Guennebaud
1ac9124fac implements TRMV level 2 blas routine 2010-11-20 23:29:20 +01:00
Gael Guennebaud
d72a8f1e50 make trmv uses direct access 2010-11-20 22:42:24 +01:00
Gael Guennebaud
437dff80ee fix issue 114: workaround cmake enable_language bug 2010-11-20 12:01:17 +01:00
Gael Guennebaud
86474115f5 IBM XL C compiler supports __attribute__((aligned(n))) syntax 2010-11-19 17:33:51 +01:00
Gael Guennebaud
8ad1f64e0a some cleaning in blas level 2 2010-11-19 17:22:43 +01:00
Thomas Capricelli
94f59a92cb fix typo 2010-11-19 17:16:28 +01:00
Gael Guennebaud
ed1ecb24d2 implement GERC and GERU blas routines 2010-11-19 17:05:24 +01:00
Gael Guennebaud
458637f097 implement GER blas routine 2010-11-19 17:02:24 +01:00
Gael Guennebaud
68f8519327 implement HER and HER2 blas routines 2010-11-19 16:51:52 +01:00
Gael Guennebaud
5ce199b1dd update rank 2 update doc 2010-11-19 16:50:49 +01:00
Gael Guennebaud
f369b5a711 makes rank 2 update function conformant to BLAS HER2 2010-11-19 16:50:15 +01:00
Gael Guennebaud
e14f14642d implement SYR and SYR2 2010-11-19 16:09:25 +01:00
Gael Guennebaud
661ef6c127 add regression unit test 2010-11-19 15:38:37 +01:00
Gael Guennebaud
3f24dbf6f5 fix compilation of transform * scaling 2010-11-19 14:45:45 +01:00
Gael Guennebaud
3e99356b59 clean a bit AMD and SimplicialCholesky and add support for partly stored selfadjoint matrices 2010-11-18 10:30:52 +01:00
Gael Guennebaud
1618df55df Add support for sparse symmetric permutations 2010-11-18 10:28:39 +01:00
Gael Guennebaud
fb71b737e4 update blas lib wrt recent change of general_matrix_matrix_triangular_product 2010-11-16 19:19:33 +01:00
Jitse Niesen
e54c8d20cb Docs: aliasing and component-wise operations. 2010-11-16 17:28:59 +00:00
Gael Guennebaud
da05b6af0e fix some remainign issue with ei_ -> internal change 2010-11-16 15:54:48 +01:00
Gael Guennebaud
9a3ec637ff new feature: copy from a sparse selfadjoint view to a full sparse matrix 2010-11-15 14:14:05 +01:00
Gael Guennebaud
5a3a229550 fix return type of rightHouseholderSequence() 2010-11-15 11:11:22 +01:00
Jitse Niesen
cad73d9cdc Correct std::map fix (two commits ago); copy fix to aligned_allocator doc. 2010-11-12 12:06:24 +00:00
Thomas Capricelli
d64e68c8bc fix doc compilation 2010-11-12 11:33:09 +01:00
Jose Luis Blanco
9ba15cd63c Docs: correct declaration of aligned std::map in TopicStlContainers. 2010-11-12 10:05:41 +00:00
Gael Guennebaud
b4fa8261b1 properly use nested types 2010-11-10 19:06:20 +01:00
Gael Guennebaud
05ed9be639 prevent warning 2010-11-10 18:59:16 +01:00
Gael Guennebaud
2577ef90c0 generalize our internal rank K update routine to support more general A*B product while evaluating only one triangular part and make it available via, e.g.:
R.triangularView<Lower>() += s * A * B;
2010-11-10 18:58:55 +01:00
Gael Guennebaud
c810d14d4d add missing specialization 2010-11-09 12:03:20 +01:00
Gael Guennebaud
39477e697a extend unit test to cover previous bug 2010-11-05 14:37:42 +01:00
Gael Guennebaud
572b5585e3 fix Eigen's trsv for complexes 2010-11-05 14:36:34 +01:00
Gael Guennebaud
0e30c4ae3f blas level2: gemv and trsv are green 2010-11-05 14:14:50 +01:00
Gael Guennebaud
3fdea699b8 trsv: simplifications/cleaning 2010-11-05 12:54:32 +01:00
Gael Guennebaud
0e6c1170ab trsv: add support for inner-stride!=1, reduce code instanciation, move implementation to a new products/XX.h file 2010-11-05 12:43:14 +01:00
Gael Guennebaud
fe1353080e fix error handling of level 1 routines 2010-11-04 22:25:59 +01:00
Gael Guennebaud
15e8ad686c add a minimum degree ordering routine based on CSparse (LGPL) and a new built-in sparse cholesky decomposition 2010-11-04 09:58:22 +01:00
Gael Guennebaud
5a4f77716d fix bug #107: SelfAdjointEigenSolver and RowMajor (and add unit test) 2010-11-04 09:33:05 +01:00
Gael Guennebaud
20fcef9656 fixes related to ei_ -> internal change 2010-11-04 08:38:16 +01:00
Gael Guennebaud
62a51184d7 merge 2010-11-04 08:32:52 +01:00
Gael Guennebaud
fd88d721d2 implement proper error handling in level 3 routines 2010-11-03 22:03:12 +01:00
Gael Guennebaud
a8fb6b0ad3 improve detection of erros 2010-11-03 22:02:44 +01:00
Gael Guennebaud
1eea88bff7 fix matrix product bug with OpenMP 2010-11-03 16:12:37 +01:00
Gael Guennebaud
8d27f55eb3 rm auto normalization in favor of clamping 2010-11-03 15:32:40 +01:00
Hauke Heibel
d204ec491d Additional fix to enforce the compiler to use the correct prunning method. 2010-11-02 14:33:33 +01:00
Hauke Heibel
3a3f163e31 Fix bug #65.
In order to prevent compilation errors, the default functor "struct func" must not be defined inside the function scope. I just moved it into a private section of SparseMatrix.
2010-11-02 14:32:41 +01:00
Hauke Heibel
b3007db131 Added a comment on why is_arithmetic is used in DenseCoeffsBase. 2010-11-02 10:11:22 +01:00
Hauke Heibel
96e4a4b59c Fixed compilation due to lacking Transform definitions. 2010-11-01 16:53:39 +01:00
Gael Guennebaud
d2e257cb5d oops (rm commented code) 2010-11-01 09:40:33 +01:00
Gael Guennebaud
c7eda0d866 Let's be safe: enable auto normalization is quaternion to angle-axis code since a slight numerical issue may trigger NaN. The overhead is small and I doubt the perf of this function could be critival for any application ! 2010-10-31 23:26:01 +01:00
Benoit Jacob
006c9a5105 implement VERIFY in a function so it doesn't get compiled thousands of times. 2010-10-29 10:27:20 -04:00
Benoit Jacob
7d441260db on test failure, abort instead of exit, so we can get a stack trace 2010-10-29 10:07:30 -04:00
Benoit Jacob
99ccb26cfe add eigen2support Transform typedefs, add Eigen2To3 section on Transform 2010-10-29 09:00:35 -04:00
Benoit Jacob
bd249d1121 fix bug #92 - we were doing stupid things when passing the list of libraries to link to. 2010-10-28 10:44:20 -04:00
Benoit Jacob
868f753d10 document LvalueBit better 2010-10-28 09:40:20 -04:00
Gael Guennebaud
1d4e80f09d generalize the prune function 2010-10-28 11:39:31 +02:00
Gael Guennebaud
02c8b6af82 fix sparse rankUpdate and triangularView iterator 2010-10-27 15:13:03 +02:00
Gael Guennebaud
241e5ee3e7 add the possibility to solve for sparse rhs with Cholmod 2010-10-27 14:31:23 +02:00
Hauke Heibel
5d4ff3f99c Fixed bug #95 by changing _M_IX64 to _M_X64 as proposed by Jan Schlicht. 2010-10-27 11:07:38 +02:00
Hauke Heibel
3efff8c69e Merge 2010-10-26 16:48:12 +02:00
Gael Guennebaud
f4a6a8e295 rm the useless SparseSolverBase class and provide more compile time traits 2010-10-26 16:47:47 +02:00
Hauke Heibel
c738cd56eb Renamed cleantype to remove_all since it is close to remove_{const|pointer|reference}. 2010-10-26 16:47:01 +02:00
Gael Guennebaud
2fbb9932b0 fix compilation (bad internal:: stuff) 2010-10-26 16:38:51 +02:00
Gael Guennebaud
5e95ee6662 fix compilation and unit test of adolc 2010-10-26 16:26:20 +02:00
Gael Guennebaud
92044fcc2b fix bug #94: add #include src/misc/Solve.h in SparseExtra 2010-10-26 15:51:06 +02:00
Gael Guennebaud
666c16cf63 add new API for Cholmod preserving the legacy one for now 2010-10-26 15:48:33 +02:00
Hauke Heibel
7bc8e3ac09 Initial fixes for bug #85.
Renamed meta_{true|false} to {true|false}_type, meta_if to conditional, is_same_type to is_same, un{ref|pointer|const} to remove_{reference|pointer|const} and makeconst to add_const.
Changed boolean type 'ret' member to 'value'.
Changed 'ret' members refering to types to 'type'.
Adapted all code occurences.
2010-10-25 22:13:49 +02:00
Hauke Heibel
597b2745e1 Allow unset ${CMAKE_BUILD_TYPE} which is required for some targets and corresponding to using default values. 2010-10-25 18:49:39 +02:00
Benoit Jacob
724af13540 make polynomialsolver test compile faster 2010-10-25 10:15:22 -04:00
Benoit Jacob
a94f216487 error out on bad build type 2010-10-25 10:15:22 -04:00
Benoit Jacob
fdaa3f311a adapt mpreal to eigen3 mathfunctions system 2010-10-25 10:15:22 -04:00
Benoit Jacob
4716040703 bug #86 : use internal:: namespace instead of ei_ prefix 2010-10-25 10:15:22 -04:00
Benoit Jacob
ca85a1f6c5 remove build type tweaking 2010-10-23 10:00:43 -04:00
Jitse Niesen
dbdf7ee942 Use 'Release' as default when build type is not specified.
Otherwise, "cmake /path/to/eigen/" in an empty build directory, as specified
on the CMake page on the wiki, yields a fatal error.
2010-10-22 12:23:35 +01:00
Benoit Jacob
bfd46eacad don't change the build type, fatal error if bad build type 2010-10-21 08:55:48 -04:00
Hauke Heibel
969518f99d Improved I13_FunctionsTakingEigenTypes.dox.
Removed the r-value reference part and focused on EIGEN_REF_TO_TEMPORARY only.
2010-10-21 10:14:23 +02:00
Hauke Heibel
ba86d3ef65 Fixed bug #84. 2010-10-21 10:13:17 +02:00
Hauke Heibel
9bbaff6b41 Fixed the unit test splitting for MSVC. 2010-10-21 07:39:06 +02:00
Benoit Jacob
ee60fc2062 fix typo and rephrase sentence 2010-10-20 09:43:16 -04:00
Benoit Jacob
8c17fab8f5 renaming: ei_matrix_storage -> DenseStorage
DenseStorageBase  -> PlainObjectBase
2010-10-20 09:34:13 -04:00
Hauke Heibel
9cf748757e Improved the fixed size array display. 2010-10-20 11:56:29 +02:00
Benoit Jacob
e259f71477 rename PlanarRotation -> JacobiRotation 2010-10-19 21:56:26 -04:00
Benoit Jacob
9044c98cff work around stupid msvc error when constructing at compile time an expression
that involves a division by zero, even if the numeric type has floating point
2010-10-19 21:56:11 -04:00
Gael Guennebaud
e5073746f3 allows blocks of code to be larger than the page body (like tables) 2010-10-19 16:55:49 +02:00
Gael Guennebaud
e19c6b89f5 update the position of the owl 2010-10-19 16:07:04 +02:00
Gael Guennebaud
54814eb05b factorize CSS code, make use of the "manual" class when appropriate, clean the style of the big linear algebra table 2010-10-19 15:25:00 +02:00
Benoit Jacob
70f95ef80d increase css max-width 2010-10-19 09:40:23 -04:00
Benoit Jacob
b1604ea553 merge 2010-10-19 09:32:19 -04:00
Benoit Jacob
b8dfc62f3c specify max-width in em not px 2010-10-19 09:31:22 -04:00
Gael Guennebaud
6d8e7d68e4 factorize CSS code, make use of the "manual" class when appropriate, clean the style of the big linear algebra table 2010-10-19 15:25:00 +02:00
Benoit Jacob
9e3005d552 css update: max-width and margins 2010-10-19 09:18:06 -04:00
Benoit Jacob
9fa54d4cc9 move tables from class "tutorial_code" to "example"
also remove a align="center" in the Aliasing page -- it doesn't make sense to have 1 centered table page when all others are left aligned.
2010-10-19 08:42:49 -04:00
Gael Guennebaud
ca4bd5851c update style of the quick ref guide 2010-10-19 11:59:11 +02:00
Gael Guennebaud
f66fe2663f update CSS to doxygen 1.7.2, new CSS and cleaning of the tutorial 2010-10-19 11:40:49 +02:00
Hauke Heibel
9f8b6ad43e Fixed bug #79. 2010-10-19 09:43:54 +02:00
Benoit Jacob
3481f10e7a re-fix the broken msvc warning in JacobiSVD 2010-10-18 09:46:22 -04:00
Benoit Jacob
3404d5fb14 improvements in pages 5 and 7 of the tutorial. 2010-10-18 09:09:30 -04:00
Benoit Jacob
1c15a6d96f improvements in tutorial page 4 : block operations 2010-10-18 08:44:27 -04:00
Benoit Jacob
4b0fb968ea fixed table html 2010-10-18 07:23:48 -04:00
Benoit Jacob
597bb61c23 fix stupid msvc warning in jacobisvd 2010-10-18 06:54:11 -04:00
Benoit Jacob
6628534eb5 fix bug i just introduced in ei_add_test_internal 2010-10-17 11:47:59 -04:00
Benoit Jacob
19ae4362bd ah ok, we want to build this even without GSL.
so the bug is in FindGSL.cmake.
2010-10-17 11:31:58 -04:00
Benoit Jacob
4e3feb023d more unsupported/ CMake fixes 2010-10-17 11:21:10 -04:00
Benoit Jacob
1e3a035275 Fix general linking issue for tests linking to multiple libs, and explicitly link mpfr_real test to GMP. 2010-10-17 11:04:43 -04:00
Benoit Jacob
8356bc8d06 add jacobiSvd() method, update test & docs 2010-10-17 09:40:52 -04:00
Hauke Heibel
cd3a9d1ccb Fixed bug #74. 2010-10-17 12:33:47 +02:00
Hauke Heibel
c19b965730 Added stddeque unit test dervied from the stdlist test. 2010-10-16 10:45:30 +02:00
Benoit Jacob
6f6400e488 Added tag 3.0-beta2 for changeset 3f79884f03 2010-10-15 09:46:45 -04:00
Benoit Jacob
3f79884f03 bump to 2.92.0 2010-10-15 09:46:20 -04:00
Benoit Jacob
26129229ec doc updates/improvements 2010-10-15 09:44:43 -04:00
Benoit Jacob
fcee1903be update the porting guide 2010-10-15 08:48:44 -04:00
Benoit Jacob
6dc478fd77 doc typo 2010-10-14 10:19:46 -04:00
Benoit Jacob
65c01e2bf7 JacobiSVD doc fix 2010-10-14 10:17:40 -04:00
Benoit Jacob
8f0e80fe30 JacobiSVD:
* fix preallocating constructors, allocate U and V of the right size for computation options
  * complete documentation and internal comments
  * improve unit test, test inf/nan values
2010-10-14 10:14:43 -04:00
Gael Guennebaud
e85a3857f0 import BLAS test suite 2010-10-14 13:46:01 +02:00
Gael Guennebaud
47197065da compilation fix 2010-10-14 10:19:55 +02:00
Benoit Jacob
bcb9068268 fix bug #44: use VERIFY_IS_APPROX instead of exact comparison to please x87 extended precision 2010-10-13 09:40:57 -04:00
Benoit Jacob
c8ecc897c0 add EIGEN_TEST_X87 option 2010-10-13 09:04:59 -04:00
Gael Guennebaud
3a2bb7f782 fix compilation and warnings with fcc 4.0.1 2010-10-13 10:21:28 +02:00
Gael Guennebaud
bf402dd9b8 add the possibility to disable OpenGL testing 2010-10-12 20:23:52 +02:00
Benoit Jacob
8eb0fc1e72 remove SVD class (was bad code taked from elsewhere)
Use JacobiSVD for now.
We do plan to reintroduce a bidiagonalizing SVD asap.
2010-10-12 10:19:59 -04:00
Benoit Jacob
dbedc70012 Jacobi improvements:
* add fixed-size vectorized path
  * add missing restrict keywords
  * use innerStride()
  * allow vectorization even if innerStride()>1, if PacketSize==1
    (think of the case of rows of std::complex<double>)
2010-10-12 09:58:53 -04:00
Benoit Jacob
12a152031d fix the Jacobi bug, expand unit test 2010-10-12 09:43:40 -04:00
Benoit Jacob
75e60121f4 add Jacobi unit test. jacobi_5 fails, exposing bug #39. 2010-10-12 09:12:36 -04:00
Gael Guennebaud
0308f64515 add support for uniform of double 2010-10-12 11:04:19 +02:00
Gael Guennebaud
fb30bb9e59 uncomment commented line for debug 2010-10-12 10:40:42 +02:00
Gael Guennebaud
20be8ad91e add support for uniforms 2010-10-12 10:39:28 +02:00
Benoit Jacob
b8bb804007 set ColPivHouseholderQR as default preconditioner for JacobiSVD 2010-10-11 21:00:42 -04:00
Benoit Jacob
5c3d21693b implement JacobiSVD::solve() and expand the unit test 2010-10-11 15:36:04 -04:00
Gael Guennebaud
0cae73d1eb add the prototype of all level2 functions 2010-10-08 23:31:57 +02:00
Gael Guennebaud
eb105cace8 compilation fix 2010-10-08 22:51:10 +02:00
Benoit Jacob
d229f99ba2 adapt Quaternion to JacobiSVD API changes. 2010-10-08 10:42:41 -04:00
Benoit Jacob
8ba8d90063 add option to compute thin U/V.
By default nothing is computed. You have to ask explicitly for thin/full U/V if you want them.
2010-10-08 10:42:40 -04:00
Benoit Jacob
6fad2eb97b Rework JacobiSVD api / template parameters.
There is now an integer QRPreconditioner template parameter, defaulting to full-piv QR.
Since we have to special-case each QR dec anyway, a template template parameter didn't add much value here.
There is an option NoQRPreconditioner if you know your matrices are already square (auto-detected for fixed-size matrices).
2010-10-08 10:42:32 -04:00
Benoit Jacob
58e0cce0f7 merge backout 2010-10-08 10:42:25 -04:00
Benoit Jacob
4a98cada26 Backed out changeset 2334291157
Sorry Thomas, these doc fixes are no longer relevant with the JacobiSVD API changes, and they are preventing me from applying my patches cleanly.
2010-10-08 10:42:06 -04:00
Gael Guennebaud
a76ce042e6 MSVC for windows mobile does not have the errno.h file 2010-10-07 18:09:15 +02:00
Gael Guennebaud
af22364988 an attempt to fix compilation on windows mobile 2010-10-07 17:54:46 +02:00
Gael Guennebaud
d9c131de5b remove the Taucs backend : Taucs is not maintained anymore and the backend was crap anyway 2010-10-06 17:42:17 +02:00
Gael Guennebaud
423f88aa1e improve FindCholmod 2010-10-06 17:38:02 +02:00
Romain Bossart
c6503e03eb Updates to the Sparse unsupported solvers module.
* change Sparse* specialization's signatures from <..., int Backend> to <..., typename Backend>. Update SparseExtra accordingly to use structs instead of the SparseBackend enum.
* add SparseLDLT Cholmod specialization
* for Cholmod and UmfPack, SparseLU, SparseLLT and SparseLDLT now use ei_solve_retval and have the new solve() method (to be closer to the 3.0 API).

* fix doc
2010-10-04 20:56:54 +02:00
Gael Guennebaud
e3d01f85b2 extend OpenGL support module with true unit tests and support for Transform, Translation, etc. 2010-10-06 13:28:13 +02:00
Gael Guennebaud
b5f32830fd fix geometry tutorial regarding the need to specify the "mode" 2010-10-06 13:27:14 +02:00
Gael Guennebaud
01fad14d78 mark LLT/LDLT solveInPlace func internal and rm their boolean returned value 2010-10-05 15:56:50 +02:00
Thomas Capricelli
2334291157 fix doc 2010-10-04 04:08:32 +02:00
Benoit Jacob
71f023de3e fix compilation on ubuntu 9.04's version of gcc 4.3 (yes, wtf) 2010-09-27 09:57:57 -04:00
Radu Bogdan Rusu
94ea1eed9a fix warning 2010-09-27 09:56:54 -04:00
Hauke Heibel
327ed3d1d3 Added a note to the Gram Schmidt code and improved some formatting. 2010-09-25 14:15:35 +02:00
Hauke Heibel
72d4d45133 Merge. 2010-09-24 17:34:49 +02:00
Hauke Heibel
316dadc8e4 Fixed some SVD issues.
Make the SVD's output unitary.
Improved unit tests.
Added an assert to the SVD ctor to check whether rows>=cols.
2010-09-24 17:32:44 +02:00
Hauke Heibel
053261de88 Make the SVD's output unitary and improved unit tests. 2010-09-24 16:28:20 +02:00
Benoit Jacob
1c54514bfc merge 2010-09-23 09:53:21 -04:00
Benoit Jacob
c253cc3d53 SVD:
* fix unit test for rectangular matrices.
 * enforce that rows >= cols since various places in the code assume that.
2010-09-23 09:51:08 -04:00
Hauke Heibel
947f84633b Fixed bad memory access in the SVD. 2010-09-23 11:15:36 +02:00
Hauke Heibel
62bf04b339 Fixed bad memory access in the SVD. 2010-09-23 11:15:36 +02:00
Gael Guennebaud
82e4a16759 remove superfluous #ifdef 2010-09-15 15:24:21 +02:00
Benoit Jacob
77c943670e add cmakelists for 2 subdirs and make sure all subdirs are installed (GLOB) 2010-09-14 04:11:15 -04:00
Gael Guennebaud
91e9344be9 fix vectorization logic and code of cross3 which was never enabled.. 2010-09-08 14:10:01 +02:00
Gael Guennebaud
f9123df772 fix unitialized quaternion 2010-09-08 12:57:33 +02:00
Gael Guennebaud
d591b0466d add a bench to compare various transformation methods 2010-09-07 18:21:36 +02:00
Gael Guennebaud
9bb75937cc fix += return by value like operations 2010-09-06 11:51:42 +02:00
Gael Guennebaud
62eb4dc99b noalias was wrongly skipping automatic transposition 2010-09-02 19:18:34 +02:00
Gael Guennebaud
4824db6444 add the possibility to extend QuaternionBase 2010-09-02 17:28:07 +02:00
Eamon Nerbonne
d17bb02ccd Fixes mingw32 compile issues 2010-09-02 10:38:23 +02:00
Gael Guennebaud
e0ea25fc21 add missing copyrights 2010-09-01 12:59:38 +02:00
Gael Guennebaud
b49dde01dc fix bad mat * mat * scalar when the implicit conversion operator to a Matrix is used 2010-08-31 09:54:38 +02:00
Hauke Heibel
dd94f10442 Docs: Improved the docs for writing functions taking Eigen types.
- Removed the wrong statement about the MSVC compiler.
- Reformulated "simple functions" usage.
- Reformulated the summary paragraph about writable parameters.
2010-08-27 08:19:09 +02:00
Gael Guennebaud
dcff9ba785 fix bad "using typename" 2010-08-25 13:34:35 +02:00
Gael Guennebaud
cb7a72d5b0 Fix Sun CC parsing of Eigen/Core. In particular,
I moved all the block related methods to a plugin file. This also
significantly reduce code verbosity.
2010-08-25 13:09:56 +02:00
Benoit Jacob
e17d17cea3 didn't want to commit that bench change. 2010-08-24 10:57:22 -04:00
Benoit Jacob
bd8d06033d make a couple of typedefs public so stuff compiles 2010-08-24 10:53:33 -04:00
Gael Guennebaud
a47bbf664c fix 4x4 SSE inversion when storage orders don't match 2010-08-24 13:00:59 +02:00
Gael Guennebaud
548ecc2fe5 update inverse unit test to highlight another bug in SSE 4x4 inversion code 2010-08-24 12:38:20 +02:00
Gael Guennebaud
ad9a7c69bc fix inversion of 4x4 unaligned matrices 2010-08-24 12:28:42 +02:00
Benoit Jacob
6924d4eec5 update this test to build against current eigen.
remove the 'normal' path as it was not compiling anymore and I couldn't see the point of it (?)
2010-08-23 23:21:25 -04:00
Gael Guennebaud
6261f4629f add TriangularMatrix::conjugate to be consistent since we have adjoint 2010-08-23 23:38:35 +02:00
Jitse Niesen
474c2996bd Docs: add section on resolving the aliasing issue. 2010-08-23 17:23:30 +01:00
Jitse Niesen
d1111d625c Docs: Typos in ArrayBase doxygen comments 2010-08-23 11:44:51 +01:00
Jitse Niesen
103b9351fd Docs: Add references to TopicClassHierarchy 2010-08-22 18:28:19 +01:00
Jitse Niesen
a6da803873 Document DenseCoeffsBase 2010-08-22 17:30:31 +01:00
Hauke Heibel
60aad09878 Fixed DiagonalMatrix assignment. 2010-08-21 16:34:46 +02:00
Hauke Heibel
92b1674c79 Fixed typos. 2010-08-19 20:11:06 +02:00
Hauke Heibel
610d79e686 Simplified to product templates to a minimum of template parameters.
Removed the ei_is_any_projective helper and added ei_transform_traits.
2010-08-19 20:02:46 +02:00
Hauke Heibel
a64aabf73c Removed unused code. 2010-08-19 19:33:13 +02:00
Hauke Heibel
55c7848877 Matrix product refactoring (rhs products only).
Added strong inlines required for MSVC for proper inlining.
Added specializations for DiagonalMatrix products to RotationBase.
Added left- and righ-hand-side products with DiagonalMatrix to Transform.
RHS Transform products now return Matrix objects only.
Split the geo_transformations unit test. Some tests were not made for projectivities.
Removed unused variables from main.h that caused warnings.
2010-08-19 19:25:35 +02:00
Gael Guennebaud
d4b664c4cd fix ugly conversion from double[2] to complex 2010-08-19 14:47:58 +02:00
Gael Guennebaud
5354ffbb4f add missing specialization for vector * selfadjoint 2010-08-19 14:05:21 +02:00
Gael Guennebaud
6264755dd3 merge 2010-08-18 15:34:55 +02:00
Gael Guennebaud
ab41c18d60 quickly mention how to solve a sparse problem 2010-08-18 15:33:58 +02:00
Benoit Jacob
216c9125e9 disable NonLinearOptimization test until it's fixed 2010-08-18 09:11:01 -04:00
Gael Guennebaud
ddbbd7065d * disable unalignment detection when vectorization is not enabled
* revert MapBase unalignment detection
2010-08-18 09:35:55 +02:00
Hauke Heibel
85fdcdf055 Fixed Geometry module failures.
Removed default parameter from Transform.
Removed the TransformXX typedefs.
Removed references to TransformXX from unit tests and docs.
Assigning Transforms to a sub-group is now forbidden at compile time.
Products should now properly support the Isometry flag.
Fixed alignment checks in MapBase.
2010-08-17 20:03:50 +02:00
Benoit Jacob
87aafc9169 fix Transform() constructor taking a Transform with other mode.
Not really tested as the geometry tests are currently busted.
2010-08-16 12:30:33 -04:00
Benoit Jacob
19d9c835e0 fix warnings 2010-08-16 11:11:43 -04:00
Gael Guennebaud
b37551f62a further improve compilation error message for array+=matrix 2010-08-16 11:13:02 +02:00
Gael Guennebaud
c625a6a85b improve compilation error message for array+=matrix and the likes 2010-08-16 11:07:17 +02:00
Gael Guennebaud
453d54325e fix declaration of AffineTransformType in Translation 2010-08-16 10:44:27 +02:00
Gael Guennebaud
ba212aeaa9 fix missdetection of GLUT 2010-08-16 09:50:24 +02:00
Gael Guennebaud
aa2b46aa91 allow vectorization of mat44.col() by adding a InnerPanel boolean
template parameter to Block
2010-07-23 16:29:29 +02:00
Gael Guennebaud
853c0e15df slightly generalize the alignment assert in MapBase 2010-08-16 09:41:07 +02:00
Gael Guennebaud
8566ef805b remove the aligned bit flag for non vectorizable types 2010-08-16 09:38:49 +02:00
Benoit Jacob
3a30a2bc3e forgot to remove a #endif 2010-08-13 14:03:38 -04:00
Benoit Jacob
b80d9dd42e fix determination of number of registers on sse:
__i386__ was not defined by MSVC 2010.
fixed as (2*sizeof(void*)).
also move that to SSE/ and let the default for unknown arch's be just 8.
2010-08-13 13:55:28 -04:00
Benoit Jacob
8bbe556e35 merge the backout 2010-08-11 00:06:31 -04:00
Benoit Jacob
97ced33b33 Backed out changeset 40f6e26a24
See thread on mailing list: "InnerPanel change mis-detects alignment?"
2010-08-11 00:04:06 -04:00
Jitse Niesen
76fbe94279 Document EIGEN_NO_DEBUG macro.
I needed some doxygen tricks to get this to work, so it may not be worth it.
2010-08-10 11:37:23 +01:00
Jitse Niesen
530b328769 Aliasing doc: explain that some cases are detected, reverse order examples. 2010-08-08 21:20:14 +01:00
Hauke Heibel
3dd8225862 Added more detailed docs to the QR decompositions classes. 2010-08-05 08:56:19 +02:00
Benoit Jacob
976d7c19e8 some small improvements to the page on functions taking eigen objects.
- make the beginning more precise
 - make the first example be a full selfcontained compiled example, no need for all the others, but having the first one doesn't hurt.
2010-08-04 21:42:32 -04:00
Hauke Heibel
5c7cb3c05c Added more examples to the function writing tutorial including EigenBase, DenseBase, etc. 2010-08-04 17:50:46 +02:00
Hauke Heibel
d558e84f0b Fixed some typos and reformulated a few sentences. 2010-08-04 16:40:33 +02:00
Hauke Heibel
224dd66e10 Added a tutorial on writing functions taking Eigen types. 2010-08-04 12:01:19 +02:00
Benoit Jacob
d90d7a006f fix warnings. The one in Reverse was potentially serious: coeff() methods should return CoeffReturnType, not "Scalar", if the expression is potentially a Lvalue. 2010-08-03 10:38:48 -04:00
Hauke Heibel
cc25edd5de Fixed Affine transform typedef. 2010-08-02 21:33:48 +02:00
Jitse Niesen
508b51cb62 Add page giving an overview of the class hierarchy.
This is mostly copied from the wiki, which in turn copies Benoit's email at
http://listengine.tuxfamily.org/lists.tuxfamily.org/eigen/2010/06/msg00576.html
I used ASCII art for the inheritance diagrams for now, but I don't mind
moving to GraphViz/dot as discussed earlier.
2010-08-02 11:36:44 +01:00
Jitse Niesen
a9fe75efc4 Documentation: Start special topic page on aliasing. 2010-07-31 21:37:29 +01:00
Hauke Heibel
7cefa75901 Added static method Identity() to the Translation class. 2010-07-29 17:30:37 +02:00
Hauke Heibel
e92993d7b9 Safeguarded some Transform functions with compile time asserts.
Added missing static Identity() to Rotation2D, AngleAxis.
2010-07-29 16:17:42 +02:00
Hauke Heibel
6b89ee0095 Transform is now per default Projective.
Improved invert() in the Transform class.
RotationBase offers matrix() to be conform with Transform's naming scheme.
Added Translation::translation() to be conform with Transform's naming scheme.
2010-07-29 15:54:32 +02:00
Hauke Heibel
2f0e8904f1 Removed debug outputs. 2010-07-28 10:47:58 +02:00
Kenneth Riddile
b038a4bb71 * added EIGEN_ALIGNED_ALLOCATOR macro to allow specifying a different aligned allocator
* attempted to add support for std::deque by copying and modifying the std::vector implementation...MSVC still fails to compile with the std::deque::resize() "will not be aligned" error...probably missing something simple but I'm not sure how to make it work
2010-07-26 19:06:47 -04:00
Jitse Niesen
1420f8b3a1 Several changes in comments to keep Doxygen happy. 2010-07-25 20:29:07 +01:00
Jitse Niesen
3d9764ee24 Add some more examples for the API documentation.
The only missing examples now are for homogeneous() and hnormalized();
I don't know what they're used for ...
2010-07-24 16:43:07 +01:00
Jitse Niesen
425444428c Add examples for API documentation of block methods in DenseBase. 2010-07-23 22:20:00 +01:00
Jitse Niesen
2b5a0060b4 Add examples for API documentation of MatrixBase::cwiseXxx() methods. 2010-07-23 20:32:33 +01:00
Jitse Niesen
072ee3c07d Set Doxygen config variable INCLUDE_PATH to plugins directory.
This is necessary to get functions like MatrixBase::cwiseAbs() documented;
otherwise doxygen can't find the include file in which they are defined.
2010-07-23 19:57:21 +01:00
Jitse Niesen
ae8425c74c Tutorial page 7: more typical example for .all(), minor copy-editing. 2010-07-23 19:20:10 +01:00
User Martin Senst
145830e067 Add newline at the end of Dense. 2010-07-23 19:00:02 +02:00
Gael Guennebaud
40f6e26a24 allow vectorization of mat44.col() by adding a InnerPanel boolean
template parameter to Block
2010-07-23 16:29:29 +02:00
Jitse Niesen
d0f6b1c21f Tutorial page 6: Fix typo, add table of contents. 2010-07-22 21:52:04 +01:00
Gael Guennebaud
9daa66f262 fix merge conflicts 2010-07-22 17:23:11 +02:00
Gael Guennebaud
5d98fa235d merge with complex branch 2010-07-22 16:57:14 +02:00
Jitse Niesen
403e672587 Extend tutorial page 5: Advanced initialization. 2010-07-22 15:53:21 +01:00
Gael Guennebaud
7020f30da3 sync with default branch 2010-07-22 16:29:35 +02:00
Gael Guennebaud
b9edd6fb85 oops 2010-07-22 16:24:01 +02:00
Gael Guennebaud
96ba7cd655 add an OpenGL module simplifying the way you can pass Eigen's objects to GL 2010-07-22 16:08:58 +02:00
Gael Guennebaud
fa6d36e0f7 fix SparseView: clean the nested matrix type 2010-07-22 15:57:01 +02:00
Hauke Heibel
734469e43f Unified LinSpaced in order to be conform with other setter methods as e.g. Constant. 2010-07-22 14:04:00 +02:00
Gael Guennebaud
c7f40e522e merge 2010-07-22 13:21:06 +02:00
Gael Guennebaud
06250a154c add matlab-like mixed product 2010-07-22 13:19:09 +02:00
Gael Guennebaud
bec3f9bfe4 rename indices to a common scheme 2010-07-22 13:17:39 +02:00
Gael Guennebaud
0916d69ca5 fix inner vectorization logic 2010-07-22 13:17:12 +02:00
Gael Guennebaud
0dfc5b296b fix strict aliasing issue 2010-07-22 13:16:53 +02:00
Gael Guennebaud
8a96b0080d now that we properly support mixing real-complex: clean mixingtypes test 2010-07-22 13:15:49 +02:00
Thomas Capricelli
8e21cef80a fix typo 2010-07-22 13:15:15 +02:00
Gael Guennebaud
4393f20fea fix compilation of quaternion demo 2010-07-21 17:34:32 +02:00
Gael Guennebaud
f1104a3b0f fix mandelbrot compilation, and make it use Array instead of Matrix 2010-07-21 17:13:02 +02:00
Gael Guennebaud
35f0bc70d8 fix a strict aliasing issue with gcc 4.3 2010-07-20 22:43:55 +02:00
Gael Guennebaud
b5f2b7d087 fix storage order request 2010-07-20 22:08:48 +02:00
Gael Guennebaud
7dbbc6ffd1 fix static allocation of workspace 2010-07-20 17:06:14 +02:00
Gael Guennebaud
ced1a45f82 add NEON ploaddup and pcplxflip functions 2010-07-20 14:24:01 +02:00
Gael Guennebaud
193eedbfe2 one more fix for openmp 2010-07-20 14:19:00 +02:00
Gael Guennebaud
d7fa09bf05 improve block-size heuristic 2010-07-20 13:23:50 +02:00
Gael Guennebaud
4824ac1363 fix openmp version 2010-07-20 13:23:19 +02:00
Gael Guennebaud
b551a2d77a fix declaration of pack_lhs in trsm 2010-07-20 12:58:22 +02:00
Gael Guennebaud
10a7668035 uncomment commented code for debug 2010-07-20 12:57:46 +02:00
Gael Guennebaud
7b23fad4c9 report a true assert when not checking for an assertion 2010-07-20 12:54:53 +02:00
Gael Guennebaud
44cb1e4802 it appears only the "on the left" case was tested 2010-07-20 10:32:56 +02:00
Gael Guennebaud
872523844a fix trmm and symm wrt lhs packing 2010-07-20 10:06:41 +02:00
Gael Guennebaud
76eb9c9fd9 fix compilation by including file in correct order 2010-07-19 23:32:13 +02:00
Gael Guennebaud
70b1ce11c6 * fix SelfCwiseBinaryOp traits and handling of mixed types
* improve compilation error in case of type mismatch
2010-07-19 23:31:08 +02:00
Gael Guennebaud
8b0b121c9e explicitely disable vectorization for mixed coeff based products 2010-07-19 23:28:57 +02:00
Gael Guennebaud
08c841eb87 fix lhs packing in the case of real * complex products 2010-07-19 23:16:03 +02:00
Gael Guennebaud
1ed4233fd2 port Jacobi to new ei_pset1/ei_pload API 2010-07-19 16:51:38 +02:00
Gael Guennebaud
c2ee454df4 * fix compilation of mixed scalar product
* optimize mixed scalar products
2010-07-19 16:49:09 +02:00
Gael Guennebaud
6e157dd7c6 * fix a couple of remaining issues with previous commit,
* merge ei_product_blocking_traits into ei_gepb_traits
2010-07-19 15:45:13 +02:00
Gael Guennebaud
f8aae7a908 * _mm_loaddup_pd is slow
* optimize SSE ei_ploaddup<Packet4f>
2010-07-19 15:43:27 +02:00
Gael Guennebaud
cd0e5dca9b wip: extend the gebp kernel to optimize complex and mixed products 2010-07-19 08:50:59 +02:00
Gael Guennebaud
45362f4eae update mixing type test 2010-07-15 08:40:09 +02:00
Gael Guennebaud
3f532edc6d update unit test for new API 2010-07-15 08:38:31 +02:00
Gael Guennebaud
1dc9aaaf36 add support for mixing type in trsv 2010-07-13 16:03:49 +02:00
Gael Guennebaud
36d9b51a44 optimize non fused MADD, and add a flatten attribute macro to enforce
inlining within a function
2010-07-13 15:16:34 +02:00
Gael Guennebaud
b72b7ab76f matrix product: move the alpha factor to gebp instead of the packing,
clean some temporaries, etc.
2010-07-12 16:31:46 +02:00
Gael Guennebaud
f8678272a4 mixing types step 3:
- improve support of colmajor by vector and matrix - matrix
- now all configurations are well handled, but the perf are not always very good
2010-07-11 23:57:23 +02:00
Gael Guennebaud
8e3c4283f5 make colmaj * vector uses pointers only 2010-07-11 16:01:48 +02:00
Gael Guennebaud
ff96c94043 mixing types in product step 2:
* pload* and pset1 are now templated on the packet type
* gemv routines are now embeded into a structure with
  a consistent API with respect to gemm
* some configurations of vector * matrix and matrix * matrix works fine,
  some need more work...
2010-07-11 15:48:30 +02:00
Gael Guennebaud
4161b8be67 sync 2010-07-10 22:58:51 +02:00
Gael Guennebaud
e5bc9526f1 * generalize rowmajor by vector
* fix weird compilation error when constructing a matrix with a row by matrix product
2010-07-10 22:53:27 +02:00
Gael Guennebaud
c4ef69b5bd fix compilation: make the check_coordinates* functions const 2010-07-10 22:37:16 +02:00
Benoit Jacob
6dcd373b9d let ei_pset1 use _mm_loaddup_pd. Not a significant speed improvement, but also not a speed regression, and replaces 3 instructions by 1 single instruction. 2010-07-09 18:51:17 -04:00
Konstantinos Margaritis
6ad3f1ab1f Added NEON/Complex.h, ~3.5x faster than scalar std::complex<float>
minor fix in AltiVec Complex.h
2010-07-10 00:09:29 +03:00
Gael Guennebaud
96f9015807 disable MSVC optimization when the underlying compiler is ICC 2010-07-09 19:33:43 +02:00
Gael Guennebaud
b2effa2b2c move ei_conj_if to a more appropriate file 2010-07-09 18:05:57 +02:00
Konstantinos Margaritis
642cc27eb1 forgot to commit ei_p4f_FORWARD; 2010-07-09 18:08:18 +03:00
Konstantinos Margaritis
f6bd508351 forgot to add the Complex.h include for AltiVec. 2010-07-09 17:56:53 +03:00
Konstantinos Margaritis
d9e134c73c Altivec port of Complex.h.
Note: For some reason g++ 4.4 is >200% slower than g++ 4.3 on altivec code.
The same benchmark (bench_gemm) was tested, on the same hardware/OS (G4/Debian testing),
with same CFLAGS. With some code reorganizing I managed to get some minor gain
on 4.4, but I just could not reach 4.3 speed. This is most likely a bug, but I'm waiting
to see if it's fixed on 4.5. I'll look into this a bit more.
2010-07-09 17:54:41 +03:00
Jitse Niesen
26cfe5a958 Be consistent in how the tutorial pages link together. 2010-07-09 11:59:29 +01:00
Jitse Niesen
2c03ca3325 Small changes to tutorial page 2 (matrix arithmetic):
* slightly more extensive discussion of aliasing
* layout: put example code and output side-by-side
* add some links, etc
2010-07-09 11:46:07 +01:00
Gael Guennebaud
b1a17dbfe4 fix a few weird issues with gcc 4.3 32bits and complex<float> 2010-07-09 08:27:58 +02:00
Thomas Capricelli
551cb9b7b4 bench: use of Eigen/Array is deprecated + fix includes for iostream 2010-07-09 03:59:36 +02:00
Gael Guennebaud
504d3a3586 fix SliceVectorizedTraversal for packetsize==1 2010-07-08 23:31:14 +02:00
Gael Guennebaud
51ec188da0 extend vectorization_logic 2010-07-08 23:30:16 +02:00
Carlos Becker
951da96f14 Added more redux types/examples in tutorial and fixed some display issues 2010-07-08 18:16:39 +01:00
Carlos Becker
cb3aad1d91 Reductions/Broadcasting/Visitor Tutorial added to index 2010-07-08 17:45:25 +01:00
Carlos Becker
9852e7b9cb Reductions/Broadcasting/Visitor Tutorial added 2010-07-08 17:42:23 +01:00
Gael Guennebaud
300a226ffa scalars fitting in a single packet requires more work, step 1
* add a, Alignable trait
* update LinearVectorization assignment
2010-07-08 14:27:47 +02:00
Gael Guennebaud
2a1500915a compilation fix 2010-07-08 14:26:00 +02:00
Gael Guennebaud
2066ed91de enabling aligned loads/store for complex<double> is much more tricky,
so the temporary fix is to always perform unaligned load/store
2010-07-07 22:50:19 +02:00
Gael Guennebaud
d89925e6de an attempt to fix wrong unaligned store 2010-07-07 22:35:06 +02:00
Gael Guennebaud
02fd3acd81 update to support mixin types 2010-07-07 19:49:48 +02:00
Gael Guennebaud
31a36aa9c4 support for real * complex matrix product - step 1 (works for some special cases) 2010-07-07 19:49:09 +02:00
Gael Guennebaud
fc3fd8ab57 mention that array = matrix is fine too 2010-07-07 18:10:11 +02:00
Gael Guennebaud
861962c55f sync 2010-07-07 16:44:05 +02:00
Gael Guennebaud
0f2d480af0 add support for complex 2010-07-07 16:41:29 +02:00
Gael Guennebaud
a2415388ef optimized conjugate products for SSE3 2010-07-07 16:37:20 +02:00
Gael Guennebaud
65257f6b29 optimize for SSE3 => significant speed up !! 2010-07-07 15:34:46 +02:00
Gael Guennebaud
dd18b22f0b optimize pmul for complex<double> 2010-07-07 15:29:04 +02:00
Gael Guennebaud
845994f18f optimize gemv for complex<double> and fix gcc alignment issue in 32bits 2010-07-07 15:28:41 +02:00
Gael Guennebaud
e07c0f6bb5 cleanning 2010-07-07 11:41:29 +02:00
Gael Guennebaud
3a7f16a655 typo 2010-07-07 11:13:30 +02:00
Gael Guennebaud
b0896382a3 s/IsVectorized/Vectorizable 2010-07-07 11:10:46 +02:00
Gael Guennebaud
74cf12cbe0 add a compile time error if someone call packet on Diagonal (instead of infinite runtime loop) 2010-07-07 11:07:12 +02:00
Gael Guennebaud
d5e0efaf69 fix vectorization rule of diagonal-product 2010-07-07 11:06:31 +02:00
Gael Guennebaud
c851044eae fix row cwise-prod column in coeff based products...
I really don't know why this worked so far...
2010-07-07 10:52:59 +02:00
Gael Guennebaud
55495dcbae extend product unit tests 2010-07-07 10:50:40 +02:00
Gael Guennebaud
e38fc9692d add a conj_product functor and optimize dot products 2010-07-07 10:00:08 +02:00
Gael Guennebaud
f8d3b4c060 fix mixing types in DiagonalProduct 2010-07-07 09:43:29 +02:00
Gael Guennebaud
bfa606d16f * add a IsVectorized mechanism (instead of packet-size>1...)
* vectorize complex<double>
2010-07-06 23:36:00 +02:00
Gael Guennebaud
38d0a0d5d6 add a unit test for previous bug 2010-07-06 20:54:35 +02:00
Gael Guennebaud
2dba4b7ce7 add a unit test for conj_helper and ei_pconj 2010-07-06 20:54:14 +02:00
Gael Guennebaud
bc57c68cf5 bug fix forgot to conjugate the scalar factor when needed 2010-07-06 20:53:48 +02:00
Gael Guennebaud
e04c3f2cc0 reduce code generation and minor speed up 2010-07-06 19:15:02 +02:00
Gael Guennebaud
d6454788d9 add support for vectorized conjugated products 2010-07-06 19:10:24 +02:00
Gael Guennebaud
291fef5760 fix range 2010-07-06 19:09:31 +02:00
Jitse Niesen
49747fa4a9 Various documentation improvements.
* Add short documentation for Array class
* Put all classes explicitly in Core module (where applicable)
* Section on Modules in Quick Reference Guide
* Put Page 7 after Page 6 in Contents :)
2010-07-06 13:10:08 +01:00
Jitse Niesen
3428d80d20 Small changes to tutorial page 1. 2010-07-06 10:48:25 +01:00
Jens Mueller
d849bc4401 Avoid calling resizeLike, if EIGEN_NO_AUTOMATIC_RESIZING is defined 2010-07-06 10:11:18 +02:00
Jens Mueller
5322b670c8 Add all unsupported modules and fix header file paths 2010-07-06 10:25:52 +02:00
Gael Guennebaud
7d23e7f9f1 indentation 2010-07-06 11:02:01 +02:00
Benoit Jacob
d1243b393e Added tag 3.0-beta1 for changeset 8cfbf33f60 2010-07-06 00:50:30 -04:00
Gael Guennebaud
c69a226192 * extend the Has* packet traits and makes all functor use it
* extend the packing routines to support conjugation
2010-07-05 23:27:54 +02:00
Gael Guennebaud
e1eccfad3f add intitial support for the vectorization of complex<float> 2010-07-05 16:18:09 +02:00
Manoj Rajagopalan
c64c0f382f Examples for DenseBase::middle{Rows,Cols}() 2010-06-30 11:26:31 -04:00
Manoj Rajagopalan
5c58582a08 Renamed DenseBase::{row,col}Range() to DenseBase::middle{Rows,Cols}() 2010-06-29 14:31:39 -04:00
Manoj Rajagopalan
6e5bed69dc Included tests for middleRows() and middleCols() 2010-06-29 12:39:58 -04:00
Manoj Rajagopalan
464fc297cf Included definitions for rowRange() and colRange() member functions of DenseBase 2010-06-26 17:37:17 -04:00
Martin Senst
4b474fdb34 Relax assertion to allow for matrices with cols() == 0 and/or rows() == 0. 2010-07-20 21:25:43 +02:00
Gael Guennebaud
95f2e7f3f5 introduce a new LvalueBit flag and split DenseCoeffBase into three level of accessors 2010-07-21 10:57:01 +02:00
Jitse Niesen
3abbdfd621 Add (set)LinSpaced to quick reference guide. 2010-07-20 21:55:22 +01:00
Jitse Niesen
abd5faf784 Require at least MPFR version 2.3.0, because we use mpfr_signbit.
Code in FindMPFR.cmake is taken from FindEigen2.cmake .
2010-07-19 12:26:52 +01:00
Gael Guennebaud
cac147ba10 add support for determinant on empty matrix 2010-07-19 10:45:06 +02:00
Gael Guennebaud
78d3c54631 add a small bench demoing the possibilities of a direct 3x3 eigen decomposition 2010-07-18 17:26:06 +02:00
Gael Guennebaud
ea27678153 fix compilation of ei_tridiagonalization_inplace_selector for 1x1 matrix 2010-07-18 17:10:11 +02:00
Gael Guennebaud
2a820d41df finish/fix level1 blas, all test pass 2010-07-17 13:49:43 +02:00
Gael Guennebaud
dd27e10360 fix level3 blas: it now passes all computational tests 2010-07-17 11:59:09 +02:00
Gael Guennebaud
2d78023815 fix hemm to not use the imaginary part of the diagonal entries 2010-07-17 11:57:54 +02:00
Gael Guennebaud
cbd6fe323c fix a couple a issue with blas (new TRMM api, and enforece column major) 2010-07-16 23:30:06 +02:00
Gael Guennebaud
f59226e901 fix compilation of blas lib 2010-07-16 22:27:24 +02:00
Gael Guennebaud
4c19024fbf re-enable writing to reversed objects 2010-07-16 22:26:07 +02:00
Gael Guennebaud
fb041c260c fix for empty matrices 2010-07-16 22:25:35 +02:00
Gael Guennebaud
883a8cbb2c disable the optimized 3x3 path for complexes which was not working at all 2010-07-16 18:22:00 +02:00
Gael Guennebaud
6ab9e8632f fix bad fuzzy comparison in 3x3 tridiagonalization 2010-07-16 16:38:58 +02:00
Gael Guennebaud
044424b0e2 fix sum()/prod() on empty matrix making sure this does not affect fixed sized object, extend related unit tests including partial reduction 2010-07-16 14:02:20 +02:00
Gael Guennebaud
6a370f50c7 MPRealSupport was missing 2010-07-15 20:45:45 +02:00
Gael Guennebaud
b08c26aefa merge 2010-07-15 20:41:33 +02:00
Gael Guennebaud
84fdbded4d add support for strictly triangular matrix in trmm though it is not really useful 2010-07-15 20:39:20 +02:00
Gael Guennebaud
87e89fea4e add a support module for MPFR C++ with basic unit testing 2010-07-15 16:29:04 +02:00
Gael Guennebaud
bfbe61454e merge 2010-07-15 09:54:31 +02:00
Gael Guennebaud
cf9edd9958 fix compilation for non trivial types 2010-07-14 23:31:38 +02:00
Gael Guennebaud
b6fac91998 merge 2010-07-14 22:51:53 +02:00
Gael Guennebaud
d4d4382b18 use dummy_precision by default instead of 0 2010-07-14 22:50:03 +02:00
Gael Guennebaud
90d6fc0e28 fix ei_aligned_delete for null pointers and non trivial dtors 2010-07-14 22:49:34 +02:00
Jitse Niesen
b0bd1cfa05 Tutorial page 4: add some text, diversify examples.
Use \verbinclude for output text to disable syntax highlighting.
Give tables consistent look.
2010-07-14 10:16:12 +01:00
Gael Guennebaud
e4f3759c4d add a bench for quaternion multiplication 2010-07-13 13:29:35 +02:00
Jitse Niesen
c36316f284 Change EXPAND_AS_DEFINED doxygen configuration option.
Add macros so that MatrixBase::cwiseProduct() and ArrayBase::min() are
documented, and remove one macro which is no longer used.
2010-07-13 10:14:58 +01:00
Jitse Niesen
140ad0908d Tutorial page 3: add more cwise operations, condense rest. 2010-07-12 22:45:57 +01:00
Christoph Hertzberg
6ba5d2c90c Implemented SSE optimized double-precision Quaternion multiplication 2010-07-12 23:30:47 +02:00
Jitse Niesen
8e776c94c1 Tutorial page 1: Put code and output side-by-side. 2010-07-12 12:02:31 +01:00
Gael Guennebaud
19a70ae939 fix doc compilation on non 32bits systems 2010-07-11 11:01:17 +02:00
Gael Guennebaud
850c6d8a2b fix unused warning 2010-07-11 10:58:58 +02:00
762 changed files with 84360 additions and 14713 deletions

3
.hgeol Normal file
View File

@@ -0,0 +1,3 @@
[patterns]
**.* = native
eigen_autoexp_part.dat = CRLF

View File

@@ -23,5 +23,10 @@ tags
activity.png activity.png
*.out *.out
*.php* *.php*
eigen_gen_credits.log *.log
*.orig
*.rej
log
patch
a
a.*

View File

@@ -2,19 +2,32 @@ project(Eigen)
cmake_minimum_required(VERSION 2.6.2) cmake_minimum_required(VERSION 2.6.2)
# guard against in-source builds # guard against in-source builds
if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR}) if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR})
message(FATAL_ERROR "In-source builds not allowed. Please make a new directory (called a build directory) and run CMake from there. (you may need to remove CMakeCache.txt ") message(FATAL_ERROR "In-source builds not allowed. Please make a new directory (called a build directory) and run CMake from there. You may need to remove CMakeCache.txt. ")
endif() endif()
# guard against bad build-type strings
if (NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "Release")
endif()
string(TOLOWER "${CMAKE_BUILD_TYPE}" cmake_build_type_tolower)
if( NOT cmake_build_type_tolower STREQUAL "debug"
AND NOT cmake_build_type_tolower STREQUAL "release"
AND NOT cmake_build_type_tolower STREQUAL "relwithdebinfo")
message(FATAL_ERROR "Unknown build type \"${CMAKE_BUILD_TYPE}\". Allowed values are Debug, Release, RelWithDebInfo (case-insensitive).")
endif()
############################################################################# #############################################################################
# retrieve version infomation # # retrieve version infomation #
############################################################################# #############################################################################
# automatically parse the version number # automatically parse the version number
file(READ "${CMAKE_SOURCE_DIR}/Eigen/src/Core/util/Macros.h" _eigen_version_header) file(READ "${PROJECT_SOURCE_DIR}/Eigen/src/Core/util/Macros.h" _eigen_version_header)
string(REGEX MATCH "define[ \t]+EIGEN_WORLD_VERSION[ \t]+([0-9]+)" _eigen_world_version_match "${_eigen_version_header}") string(REGEX MATCH "define[ \t]+EIGEN_WORLD_VERSION[ \t]+([0-9]+)" _eigen_world_version_match "${_eigen_version_header}")
set(EIGEN_WORLD_VERSION "${CMAKE_MATCH_1}") set(EIGEN_WORLD_VERSION "${CMAKE_MATCH_1}")
string(REGEX MATCH "define[ \t]+EIGEN_MAJOR_VERSION[ \t]+([0-9]+)" _eigen_major_version_match "${_eigen_version_header}") string(REGEX MATCH "define[ \t]+EIGEN_MAJOR_VERSION[ \t]+([0-9]+)" _eigen_major_version_match "${_eigen_version_header}")
@@ -81,13 +94,6 @@ endif(NOT WIN32)
set(CMAKE_INCLUDE_CURRENT_DIR ON) set(CMAKE_INCLUDE_CURRENT_DIR ON)
string(TOLOWER "${CMAKE_BUILD_TYPE}" cmake_build_type_tolower)
if(cmake_build_type_tolower STREQUAL "debug")
set(CMAKE_BUILD_TYPE "Debug")
else()
set(CMAKE_BUILD_TYPE "Release")
endif()
option(EIGEN_SPLIT_LARGE_TESTS "Split large tests into smaller executables" ON) option(EIGEN_SPLIT_LARGE_TESTS "Split large tests into smaller executables" ON)
option(EIGEN_DEFAULT_TO_ROW_MAJOR "Use row-major as default matrix storage order" OFF) option(EIGEN_DEFAULT_TO_ROW_MAJOR "Use row-major as default matrix storage order" OFF)
@@ -95,6 +101,8 @@ if(EIGEN_DEFAULT_TO_ROW_MAJOR)
add_definitions("-DEIGEN_DEFAULT_TO_ROW_MAJOR") add_definitions("-DEIGEN_DEFAULT_TO_ROW_MAJOR")
endif() endif()
add_definitions("-DEIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS")
if(CMAKE_COMPILER_IS_GNUCXX) if(CMAKE_COMPILER_IS_GNUCXX)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wnon-virtual-dtor -Wno-long-long -ansi -Wundef -Wcast-align -Wchar-subscripts -Wall -W -Wpointer-arith -Wwrite-strings -Wformat-security -fexceptions -fno-check-new -fno-common -fstrict-aliasing") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wnon-virtual-dtor -Wno-long-long -ansi -Wundef -Wcast-align -Wchar-subscripts -Wall -W -Wpointer-arith -Wwrite-strings -Wformat-security -fexceptions -fno-check-new -fno-common -fstrict-aliasing")
set(CMAKE_CXX_FLAGS_DEBUG "-g3") set(CMAKE_CXX_FLAGS_DEBUG "-g3")
@@ -115,43 +123,43 @@ if(CMAKE_COMPILER_IS_GNUCXX)
option(EIGEN_TEST_SSE2 "Enable/Disable SSE2 in tests/examples" OFF) option(EIGEN_TEST_SSE2 "Enable/Disable SSE2 in tests/examples" OFF)
if(EIGEN_TEST_SSE2) if(EIGEN_TEST_SSE2)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse2") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse2")
message("Enabling SSE2 in tests/examples") message(STATUS "Enabling SSE2 in tests/examples")
endif() endif()
option(EIGEN_TEST_SSE3 "Enable/Disable SSE3 in tests/examples" OFF) option(EIGEN_TEST_SSE3 "Enable/Disable SSE3 in tests/examples" OFF)
if(EIGEN_TEST_SSE3) if(EIGEN_TEST_SSE3)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse3") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse3")
message("Enabling SSE3 in tests/examples") message(STATUS "Enabling SSE3 in tests/examples")
endif() endif()
option(EIGEN_TEST_SSSE3 "Enable/Disable SSSE3 in tests/examples" OFF) option(EIGEN_TEST_SSSE3 "Enable/Disable SSSE3 in tests/examples" OFF)
if(EIGEN_TEST_SSSE3) if(EIGEN_TEST_SSSE3)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mssse3") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mssse3")
message("Enabling SSSE3 in tests/examples") message(STATUS "Enabling SSSE3 in tests/examples")
endif() endif()
option(EIGEN_TEST_SSE4_1 "Enable/Disable SSE4.1 in tests/examples" OFF) option(EIGEN_TEST_SSE4_1 "Enable/Disable SSE4.1 in tests/examples" OFF)
if(EIGEN_TEST_SSE4_1) if(EIGEN_TEST_SSE4_1)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1")
message("Enabling SSE4.1 in tests/examples") message(STATUS "Enabling SSE4.1 in tests/examples")
endif() endif()
option(EIGEN_TEST_SSE4_2 "Enable/Disable SSE4.2 in tests/examples" OFF) option(EIGEN_TEST_SSE4_2 "Enable/Disable SSE4.2 in tests/examples" OFF)
if(EIGEN_TEST_SSE4_2) if(EIGEN_TEST_SSE4_2)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2")
message("Enabling SSE4.2 in tests/examples") message(STATUS "Enabling SSE4.2 in tests/examples")
endif() endif()
option(EIGEN_TEST_ALTIVEC "Enable/Disable AltiVec in tests/examples" OFF) option(EIGEN_TEST_ALTIVEC "Enable/Disable AltiVec in tests/examples" OFF)
if(EIGEN_TEST_ALTIVEC) if(EIGEN_TEST_ALTIVEC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maltivec -mabi=altivec") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maltivec -mabi=altivec")
message("Enabling AltiVec in tests/examples") message(STATUS "Enabling AltiVec in tests/examples")
endif() endif()
option(EIGEN_TEST_NEON "Enable/Disable Neon in tests/examples" OFF) option(EIGEN_TEST_NEON "Enable/Disable Neon in tests/examples" OFF)
if(EIGEN_TEST_NEON) if(EIGEN_TEST_NEON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfloat-abi=softfp -mfpu=neon -mcpu=cortex-a8") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfloat-abi=softfp -mfpu=neon -mcpu=cortex-a8")
message("Enabling NEON in tests/examples") message(STATUS "Enabling NEON in tests/examples")
endif() endif()
check_cxx_compiler_flag("-fopenmp" COMPILER_SUPPORT_OPENMP) check_cxx_compiler_flag("-fopenmp" COMPILER_SUPPORT_OPENMP)
@@ -159,7 +167,7 @@ if(CMAKE_COMPILER_IS_GNUCXX)
option(EIGEN_TEST_OPENMP "Enable/Disable OpenMP in tests/examples" OFF) option(EIGEN_TEST_OPENMP "Enable/Disable OpenMP in tests/examples" OFF)
if(EIGEN_TEST_OPENMP) if(EIGEN_TEST_OPENMP)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
message("Enabling OpenMP in tests/examples") message(STATUS "Enabling OpenMP in tests/examples")
endif() endif()
endif() endif()
@@ -183,7 +191,7 @@ if(MSVC)
option(EIGEN_TEST_OPENMP "Enable/Disable OpenMP in tests/examples" OFF) option(EIGEN_TEST_OPENMP "Enable/Disable OpenMP in tests/examples" OFF)
if(EIGEN_TEST_OPENMP) if(EIGEN_TEST_OPENMP)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /openmp") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /openmp")
message("Enabling OpenMP in tests/examples") message(STATUS "Enabling OpenMP in tests/examples")
endif() endif()
endif() endif()
@@ -193,20 +201,42 @@ if(MSVC)
# arch is not supported on 64 bit systems, SSE is enabled automatically. # arch is not supported on 64 bit systems, SSE is enabled automatically.
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:SSE2") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:SSE2")
endif(NOT CMAKE_CL_64) endif(NOT CMAKE_CL_64)
message("Enabling SSE2 in tests/examples") message(STATUS "Enabling SSE2 in tests/examples")
endif(EIGEN_TEST_SSE2) endif(EIGEN_TEST_SSE2)
endif(MSVC) endif(MSVC)
option(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION "Disable explicit vectorization in tests/examples" OFF) option(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION "Disable explicit vectorization in tests/examples" OFF)
option(EIGEN_TEST_X87 "Force using X87 instructions. Implies no vectorization." OFF)
option(EIGEN_TEST_32BIT "Force generating 32bit code." OFF)
if(EIGEN_TEST_X87)
set(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION ON)
if(CMAKE_COMPILER_IS_GNUCXX)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpmath=387")
message(STATUS "Forcing use of x87 instructions in tests/examples")
else()
message(STATUS "EIGEN_TEST_X87 ignored on your compiler")
endif()
endif()
if(EIGEN_TEST_32BIT)
if(CMAKE_COMPILER_IS_GNUCXX)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32")
message(STATUS "Forcing generation of 32-bit code in tests/examples")
else()
message(STATUS "EIGEN_TEST_32BIT ignored on your compiler")
endif()
endif()
if(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION) if(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION)
add_definitions(-DEIGEN_DONT_VECTORIZE=1) add_definitions(-DEIGEN_DONT_VECTORIZE=1)
message("Disabling vectorization in tests/examples") message(STATUS "Disabling vectorization in tests/examples")
endif() endif()
option(EIGEN_TEST_NO_EXPLICIT_ALIGNMENT "Disable explicit alignment (hence vectorization) in tests/examples" OFF) option(EIGEN_TEST_NO_EXPLICIT_ALIGNMENT "Disable explicit alignment (hence vectorization) in tests/examples" OFF)
if(EIGEN_TEST_NO_EXPLICIT_ALIGNMENT) if(EIGEN_TEST_NO_EXPLICIT_ALIGNMENT)
add_definitions(-DEIGEN_DONT_ALIGN=1) add_definitions(-DEIGEN_DONT_ALIGN=1)
message("Disabling alignment in tests/examples") message(STATUS "Disabling alignment in tests/examples")
endif() endif()
option(EIGEN_TEST_C++0x "Enables all C++0x features." OFF) option(EIGEN_TEST_C++0x "Enables all C++0x features." OFF)
@@ -259,23 +289,66 @@ add_subdirectory(Eigen)
add_subdirectory(doc EXCLUDE_FROM_ALL) add_subdirectory(doc EXCLUDE_FROM_ALL)
add_custom_target(buildtests)
add_custom_target(check COMMAND "ctest")
add_dependencies(check buildtests)
# CMake/Ctest does not allow us to change the build command,
# so we have to workaround by directly editing the generated DartConfiguration.tcl file
# save CMAKE_MAKE_PROGRAM
set(CMAKE_MAKE_PROGRAM_SAVE ${CMAKE_MAKE_PROGRAM})
# and set a fake one
set(CMAKE_MAKE_PROGRAM "@EIGEN_MAKECOMMAND_PLACEHOLDER@")
include(CTest) include(CTest)
enable_testing() # must be called from the root CMakeLists, see man page enable_testing() # must be called from the root CMakeLists, see man page
include(EigenTesting) include(EigenTesting)
ei_init_testing() ei_init_testing()
# overwrite default DartConfiguration.tcl
# The worarounds are different for each version of the MSVC IDE
if(MSVC_IDE)
if(MSVC_VERSION EQUAL 1600) # MSVC 2010
set(EIGEN_MAKECOMMAND_PLACEHOLDER "${CMAKE_MAKE_PROGRAM_SAVE} buildtests.vcxproj /p:Configuration=\${CTEST_CONFIGURATION_TYPE} \n # ")
else() # MSVC 2008 (TODO check MSVC 2005)
set(EIGEN_MAKECOMMAND_PLACEHOLDER "${CMAKE_MAKE_PROGRAM_SAVE} /project buildtests")
endif()
else()
# for make and nmake
set(EIGEN_MAKECOMMAND_PLACEHOLDER "${CMAKE_MAKE_PROGRAM_SAVE} buildtests")
endif()
configure_file(${CMAKE_BINARY_DIR}/DartConfiguration.tcl ${CMAKE_BINARY_DIR}/DartConfiguration.tcl)
# restore default CMAKE_MAKE_PROGRAM
set(CMAKE_MAKE_PROGRAM ${CMAKE_MAKE_PROGRAM_SAVE})
# un-set temporary variables so that it is like they never existed.
# CMake 2.6.3 introduces the more logical unset() syntax for this.
set(CMAKE_MAKE_PROGRAM_SAVE)
set(EIGEN_MAKECOMMAND_PLACEHOLDER)
configure_file(${CMAKE_SOURCE_DIR}/CTestCustom.cmake.in ${CMAKE_BINARY_DIR}/CTestCustom.cmake)
if(EIGEN_LEAVE_TEST_IN_ALL_TARGET) if(EIGEN_LEAVE_TEST_IN_ALL_TARGET)
add_subdirectory(test) # can't do EXCLUDE_FROM_ALL here, breaks CTest add_subdirectory(test) # can't do EXCLUDE_FROM_ALL here, breaks CTest
else() else()
add_subdirectory(test EXCLUDE_FROM_ALL) add_subdirectory(test EXCLUDE_FROM_ALL)
endif() endif()
if(NOT MSVC)
if(EIGEN_LEAVE_TEST_IN_ALL_TARGET)
add_subdirectory(blas)
add_subdirectory(lapack)
else()
add_subdirectory(blas EXCLUDE_FROM_ALL)
add_subdirectory(lapack EXCLUDE_FROM_ALL)
endif()
endif(NOT MSVC)
add_subdirectory(unsupported) add_subdirectory(unsupported)
add_subdirectory(demos EXCLUDE_FROM_ALL) add_subdirectory(demos EXCLUDE_FROM_ALL)
add_subdirectory(blas EXCLUDE_FROM_ALL)
# must be after test and unsupported, for configuring buildtests.in # must be after test and unsupported, for configuring buildtests.in
add_subdirectory(scripts EXCLUDE_FROM_ALL) add_subdirectory(scripts EXCLUDE_FROM_ALL)
@@ -286,30 +359,35 @@ endif(EIGEN_BUILD_BTL)
ei_testing_print_summary() ei_testing_print_summary()
message("") message(STATUS "")
message("Configured Eigen ${EIGEN_VERSION_NUMBER}") message(STATUS "Configured Eigen ${EIGEN_VERSION_NUMBER}")
message("") message(STATUS "")
option(EIGEN_FAILTEST "Enable failtests." OFF)
if(EIGEN_FAILTEST)
add_subdirectory(failtest)
endif()
string(TOLOWER "${CMAKE_GENERATOR}" cmake_generator_tolower) string(TOLOWER "${CMAKE_GENERATOR}" cmake_generator_tolower)
if(cmake_generator_tolower MATCHES "makefile") if(cmake_generator_tolower MATCHES "makefile")
message("Some things you can do now:") message(STATUS "Some things you can do now:")
message("--------------+----------------------------------------------------------------") message(STATUS "--------------+--------------------------------------------------------------")
message("Command | Description") message(STATUS "Command | Description")
message("--------------+----------------------------------------------------------------") message(STATUS "--------------+--------------------------------------------------------------")
message("make install | Install to ${CMAKE_INSTALL_PREFIX}. To change that:") message(STATUS "make install | Install to ${CMAKE_INSTALL_PREFIX}. To change that:")
message(" | cmake . -DCMAKE_INSTALL_PREFIX=yourpath") message(STATUS " | cmake . -DCMAKE_INSTALL_PREFIX=yourpath")
message(" | Eigen headers will then be installed to:") message(STATUS " | Eigen headers will then be installed to:")
message(" | ${INCLUDE_INSTALL_DIR}") message(STATUS " | ${INCLUDE_INSTALL_DIR}")
message(" | To install Eigen headers to a separate location, do:") message(STATUS " | To install Eigen headers to a separate location, do:")
message(" | cmake . -DEIGEN_INCLUDE_INSTALL_DIR=yourpath") message(STATUS " | cmake . -DEIGEN_INCLUDE_INSTALL_DIR=yourpath")
message("make doc | Generate the API documentation, requires Doxygen & LaTeX") message(STATUS "make doc | Generate the API documentation, requires Doxygen & LaTeX")
message("make check | Build and run the unit-tests. Read this page:") message(STATUS "make check | Build and run the unit-tests. Read this page:")
message(" | http://eigen.tuxfamily.org/index.php?title=Tests") message(STATUS " | http://eigen.tuxfamily.org/index.php?title=Tests")
message("make blas | Build BLAS library (not the same thing as Eigen)") message(STATUS "make blas | Build BLAS library (not the same thing as Eigen)")
message("--------------+----------------------------------------------------------------") message(STATUS "--------------+--------------------------------------------------------------")
else() else()
message("To build/run the unit tests, read this page:") message(STATUS "To build/run the unit tests, read this page:")
message(" http://eigen.tuxfamily.org/index.php?title=Tests") message(STATUS " http://eigen.tuxfamily.org/index.php?title=Tests")
endif() endif()
message("") message(STATUS "")

View File

@@ -5,13 +5,9 @@
## ENABLE_TESTING() ## ENABLE_TESTING()
## INCLUDE(CTest) ## INCLUDE(CTest)
set(CTEST_PROJECT_NAME "Eigen") set(CTEST_PROJECT_NAME "Eigen")
set(CTEST_NIGHTLY_START_TIME "06:00:00 UTC") set(CTEST_NIGHTLY_START_TIME "00:00:00 UTC")
set(CTEST_DROP_METHOD "http") set(CTEST_DROP_METHOD "http")
set(CTEST_DROP_SITE "eigen.tuxfamily.org") set(CTEST_DROP_SITE "eigen.tuxfamily.org")
set(CTEST_DROP_LOCATION "/CDash/submit.php?project=Eigen") set(CTEST_DROP_LOCATION "/CDash/submit.php?project=Eigen")
set(CTEST_DROP_SITE_CDASH TRUE) set(CTEST_DROP_SITE_CDASH TRUE)
## A tribute to Dynamic!
set(CTEST_CUSTOM_MAXIMUM_NUMBER_OF_WARNINGS "33331")
set(CTEST_CUSTOM_MAXIMUM_NUMBER_OF_ERRORS "33331")

4
CTestCustom.cmake.in Normal file
View File

@@ -0,0 +1,4 @@
## A tribute to Dynamic!
set(CTEST_CUSTOM_MAXIMUM_NUMBER_OF_WARNINGS "33331")
set(CTEST_CUSTOM_MAXIMUM_NUMBER_OF_ERRORS "33331")

View File

@@ -1,14 +1,11 @@
#ifndef EIGEN_ARRAY_MODULE_H #ifndef EIGEN_ARRAY_MODULE_H
#define EIGEN_ARRAY_MODULE_H #define EIGEN_ARRAY_MODULE_H
#ifdef _MSC_VER // include Core first to handle Eigen2 support macros
#pragma message("The inclusion of Eigen/Array is deprecated. \
The array module is available as soon as Eigen/Core is included.")
#elif __GNUC__
#warning "The inclusion of Eigen/Array is deprecated. \
The array module is available as soon as Eigen/Core is included."
#endif
#include "Core" #include "Core"
#ifndef EIGEN2_SUPPORT
#error The Eigen/Array header does no longer exist in Eigen3. All that functionality has moved to Eigen/Core.
#endif
#endif // EIGEN_ARRAY_MODULE_H #endif // EIGEN_ARRAY_MODULE_H

View File

@@ -1,6 +1,12 @@
include(RegexUtils)
test_escape_string_as_regex()
file(GLOB Eigen_directory_files "*") file(GLOB Eigen_directory_files "*")
escape_string_as_regex(ESCAPED_CMAKE_CURRENT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
foreach(f ${Eigen_directory_files}) foreach(f ${Eigen_directory_files})
if(NOT f MATCHES ".txt" AND NOT f MATCHES "${CMAKE_CURRENT_SOURCE_DIR}/src") if(NOT f MATCHES "\\.txt" AND NOT f MATCHES "${ESCAPED_CMAKE_CURRENT_SOURCE_DIR}/[.].+" AND NOT f MATCHES "${ESCAPED_CMAKE_CURRENT_SOURCE_DIR}/src")
list(APPEND Eigen_directory_files_to_install ${f}) list(APPEND Eigen_directory_files_to_install ${f})
endif() endif()
endforeach(f ${Eigen_directory_files}) endforeach(f ${Eigen_directory_files})

View File

@@ -3,7 +3,7 @@
#include "Core" #include "Core"
#include "src/Core/util/DisableMSVCWarnings.h" #include "src/Core/util/DisableStupidWarnings.h"
namespace Eigen { namespace Eigen {
@@ -27,7 +27,7 @@ namespace Eigen {
} // namespace Eigen } // namespace Eigen
#include "src/Core/util/EnableMSVCWarnings.h" #include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_CHOLESKY_MODULE_H #endif // EIGEN_CHOLESKY_MODULE_H
/* vim: set filetype=cpp et sw=2 ts=2 ai: */ /* vim: set filetype=cpp et sw=2 ts=2 ai: */

View File

@@ -2,7 +2,7 @@
// for linear algebra. // for linear algebra.
// //
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr> // Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2007-2010 Benoit Jacob <jacob.benoit.1@gmail.com> // Copyright (C) 2007-2011 Benoit Jacob <jacob.benoit.1@gmail.com>
// //
// Eigen is free software; you can redistribute it and/or // Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public // modify it under the terms of the GNU Lesser General Public
@@ -26,8 +26,8 @@
#ifndef EIGEN_CORE_H #ifndef EIGEN_CORE_H
#define EIGEN_CORE_H #define EIGEN_CORE_H
// first thing Eigen does: prevent MSVC from committing suicide // first thing Eigen does: stop the compiler from committing suicide
#include "src/Core/util/DisableMSVCWarnings.h" #include "src/Core/util/DisableStupidWarnings.h"
// then include this file where all our macros are defined. It's really important to do it first because // then include this file where all our macros are defined. It's really important to do it first because
// it's where we do all the alignment settings (platform detection and honoring the user's will if he // it's where we do all the alignment settings (platform detection and honoring the user's will if he
@@ -88,20 +88,29 @@
// include files // include files
#include <emmintrin.h> // This extern "C" works around a MINGW-w64 compilation issue
#include <xmmintrin.h> // https://sourceforge.net/tracker/index.php?func=detail&aid=3018394&group_id=202880&atid=983354
#ifdef EIGEN_VECTORIZE_SSE3 // In essence, intrin.h is included by windows.h and also declares intrinsics (just as emmintrin.h etc. below do).
// However, intrin.h uses an extern "C" declaration, and g++ thus complains of duplicate declarations
// with conflicting linkage. The linkage for intrinsics doesn't matter, but at that stage the compiler doesn't know;
// so, to avoid compile errors when windows.h is included after Eigen/Core, ensure intrinsics are extern "C" here too.
// notice that since these are C headers, the extern "C" is theoretically needed anyways.
extern "C" {
#include <emmintrin.h>
#include <xmmintrin.h>
#ifdef EIGEN_VECTORIZE_SSE3
#include <pmmintrin.h> #include <pmmintrin.h>
#endif #endif
#ifdef EIGEN_VECTORIZE_SSSE3 #ifdef EIGEN_VECTORIZE_SSSE3
#include <tmmintrin.h> #include <tmmintrin.h>
#endif #endif
#ifdef EIGEN_VECTORIZE_SSE4_1 #ifdef EIGEN_VECTORIZE_SSE4_1
#include <smmintrin.h> #include <smmintrin.h>
#endif #endif
#ifdef EIGEN_VECTORIZE_SSE4_2 #ifdef EIGEN_VECTORIZE_SSE4_2
#include <nmmintrin.h> #include <nmmintrin.h>
#endif #endif
} // end extern "C"
#elif defined __ALTIVEC__ #elif defined __ALTIVEC__
#define EIGEN_VECTORIZE #define EIGEN_VECTORIZE
#define EIGEN_VECTORIZE_ALTIVEC #define EIGEN_VECTORIZE_ALTIVEC
@@ -126,7 +135,14 @@
#include <omp.h> #include <omp.h>
#endif #endif
// MSVC for windows mobile does not have the errno.h file
#if !(defined(_MSC_VER) && defined(_WIN32_WCE))
#define EIGEN_HAS_ERRNO
#endif
#ifdef EIGEN_HAS_ERRNO
#include <cerrno> #include <cerrno>
#endif
#include <cstdlib> #include <cstdlib>
#include <cmath> #include <cmath>
#include <complex> #include <complex>
@@ -136,16 +152,17 @@
#include <cstring> #include <cstring>
#include <string> #include <string>
#include <limits> #include <limits>
#include <climits> // for CHAR_BIT
// for min/max: // for min/max:
#include <algorithm> #include <algorithm>
// for outputting debug info // for outputting debug info
#ifdef EIGEN_DEBUG_ASSIGN #ifdef EIGEN_DEBUG_ASSIGN
#include<iostream> #include <iostream>
#endif #endif
// required for __cpuid, needs to be included after cmath // required for __cpuid, needs to be included after cmath
#ifdef _MSC_VER #if defined(_MSC_VER) && (defined(_M_IX86)||defined(_M_X64))
#include <intrin.h> #include <intrin.h>
#endif #endif
@@ -189,6 +206,32 @@ inline static const char *SimdInstructionSetsInUse(void) {
#endif #endif
} }
#define STAGE10_FULL_EIGEN2_API 10
#define STAGE20_RESOLVE_API_CONFLICTS 20
#define STAGE30_FULL_EIGEN3_API 30
#define STAGE40_FULL_EIGEN3_STRICTNESS 40
#define STAGE99_NO_EIGEN2_SUPPORT 99
#if defined EIGEN2_SUPPORT_STAGE40_FULL_EIGEN3_STRICTNESS
#define EIGEN2_SUPPORT
#define EIGEN2_SUPPORT_STAGE STAGE40_FULL_EIGEN3_STRICTNESS
#elif defined EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API
#define EIGEN2_SUPPORT
#define EIGEN2_SUPPORT_STAGE STAGE30_FULL_EIGEN3_API
#elif defined EIGEN2_SUPPORT_STAGE20_RESOLVE_API_CONFLICTS
#define EIGEN2_SUPPORT
#define EIGEN2_SUPPORT_STAGE STAGE20_RESOLVE_API_CONFLICTS
#elif defined EIGEN2_SUPPORT_STAGE10_FULL_EIGEN2_API
#define EIGEN2_SUPPORT
#define EIGEN2_SUPPORT_STAGE STAGE10_FULL_EIGEN2_API
#elif defined EIGEN2_SUPPORT
// default to stage 3, that's what it's always meant
#define EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API
#define EIGEN2_SUPPORT_STAGE STAGE30_FULL_EIGEN3_API
#else
#define EIGEN2_SUPPORT_STAGE STAGE99_NO_EIGEN2_SUPPORT
#endif
#ifdef EIGEN2_SUPPORT #ifdef EIGEN2_SUPPORT
#undef minor #undef minor
#endif #endif
@@ -221,10 +264,13 @@ using std::size_t;
#if defined EIGEN_VECTORIZE_SSE #if defined EIGEN_VECTORIZE_SSE
#include "src/Core/arch/SSE/PacketMath.h" #include "src/Core/arch/SSE/PacketMath.h"
#include "src/Core/arch/SSE/MathFunctions.h" #include "src/Core/arch/SSE/MathFunctions.h"
#include "src/Core/arch/SSE/Complex.h"
#elif defined EIGEN_VECTORIZE_ALTIVEC #elif defined EIGEN_VECTORIZE_ALTIVEC
#include "src/Core/arch/AltiVec/PacketMath.h" #include "src/Core/arch/AltiVec/PacketMath.h"
#include "src/Core/arch/AltiVec/Complex.h"
#elif defined EIGEN_VECTORIZE_NEON #elif defined EIGEN_VECTORIZE_NEON
#include "src/Core/arch/NEON/PacketMath.h" #include "src/Core/arch/NEON/PacketMath.h"
#include "src/Core/arch/NEON/Complex.h"
#endif #endif
#include "src/Core/arch/Default/Settings.h" #include "src/Core/arch/Default/Settings.h"
@@ -241,18 +287,19 @@ using std::size_t;
#endif #endif
#include "src/Core/util/BlasUtil.h" #include "src/Core/util/BlasUtil.h"
#include "src/Core/MatrixStorage.h" #include "src/Core/DenseStorage.h"
#include "src/Core/NestByValue.h" #include "src/Core/NestByValue.h"
#include "src/Core/ForceAlignedAccess.h" #include "src/Core/ForceAlignedAccess.h"
#include "src/Core/ReturnByValue.h" #include "src/Core/ReturnByValue.h"
#include "src/Core/NoAlias.h" #include "src/Core/NoAlias.h"
#include "src/Core/DenseStorageBase.h" #include "src/Core/PlainObjectBase.h"
#include "src/Core/Matrix.h" #include "src/Core/Matrix.h"
#include "src/Core/SelfCwiseBinaryOp.h" #include "src/Core/Array.h"
#include "src/Core/CwiseBinaryOp.h" #include "src/Core/CwiseBinaryOp.h"
#include "src/Core/CwiseUnaryOp.h" #include "src/Core/CwiseUnaryOp.h"
#include "src/Core/CwiseNullaryOp.h" #include "src/Core/CwiseNullaryOp.h"
#include "src/Core/CwiseUnaryView.h" #include "src/Core/CwiseUnaryView.h"
#include "src/Core/SelfCwiseBinaryOp.h"
#include "src/Core/Dot.h" #include "src/Core/Dot.h"
#include "src/Core/StableNorm.h" #include "src/Core/StableNorm.h"
#include "src/Core/MapBase.h" #include "src/Core/MapBase.h"
@@ -283,6 +330,7 @@ using std::size_t;
#include "src/Core/products/GeneralBlockPanelKernel.h" #include "src/Core/products/GeneralBlockPanelKernel.h"
#include "src/Core/products/GeneralMatrixVector.h" #include "src/Core/products/GeneralMatrixVector.h"
#include "src/Core/products/GeneralMatrixMatrix.h" #include "src/Core/products/GeneralMatrixMatrix.h"
#include "src/Core/products/GeneralMatrixMatrixTriangular.h"
#include "src/Core/products/SelfadjointMatrixVector.h" #include "src/Core/products/SelfadjointMatrixVector.h"
#include "src/Core/products/SelfadjointMatrixMatrix.h" #include "src/Core/products/SelfadjointMatrixMatrix.h"
#include "src/Core/products/SelfadjointProduct.h" #include "src/Core/products/SelfadjointProduct.h"
@@ -290,6 +338,7 @@ using std::size_t;
#include "src/Core/products/TriangularMatrixVector.h" #include "src/Core/products/TriangularMatrixVector.h"
#include "src/Core/products/TriangularMatrixMatrix.h" #include "src/Core/products/TriangularMatrixMatrix.h"
#include "src/Core/products/TriangularSolverMatrix.h" #include "src/Core/products/TriangularSolverMatrix.h"
#include "src/Core/products/TriangularSolverVector.h"
#include "src/Core/BandMatrix.h" #include "src/Core/BandMatrix.h"
#include "src/Core/BooleanRedux.h" #include "src/Core/BooleanRedux.h"
@@ -300,13 +349,12 @@ using std::size_t;
#include "src/Core/Reverse.h" #include "src/Core/Reverse.h"
#include "src/Core/ArrayBase.h" #include "src/Core/ArrayBase.h"
#include "src/Core/ArrayWrapper.h" #include "src/Core/ArrayWrapper.h"
#include "src/Core/Array.h"
} // namespace Eigen } // namespace Eigen
#include "src/Core/GlobalFunctions.h" #include "src/Core/GlobalFunctions.h"
#include "src/Core/util/EnableMSVCWarnings.h" #include "src/Core/util/ReenableStupidWarnings.h"
#ifdef EIGEN2_SUPPORT #ifdef EIGEN2_SUPPORT
#include "Eigen2Support" #include "Eigen2Support"

View File

@@ -4,4 +4,4 @@
#include "QR" #include "QR"
#include "SVD" #include "SVD"
#include "Geometry" #include "Geometry"
#include "Eigenvalues" #include "Eigenvalues"

View File

@@ -1,2 +1,2 @@
#include "Dense" #include "Dense"
#include "Sparse" //#include "Sparse"

View File

@@ -29,7 +29,7 @@
#error Eigen2 support must be enabled by defining EIGEN2_SUPPORT before including any Eigen header #error Eigen2 support must be enabled by defining EIGEN2_SUPPORT before including any Eigen header
#endif #endif
#include "src/Core/util/DisableMSVCWarnings.h" #include "src/Core/util/DisableStupidWarnings.h"
namespace Eigen { namespace Eigen {
@@ -43,6 +43,9 @@ namespace Eigen {
* *
*/ */
#include "src/Eigen2Support/Macros.h"
#include "src/Eigen2Support/Memory.h"
#include "src/Eigen2Support/Meta.h"
#include "src/Eigen2Support/Lazy.h" #include "src/Eigen2Support/Lazy.h"
#include "src/Eigen2Support/Cwise.h" #include "src/Eigen2Support/Cwise.h"
#include "src/Eigen2Support/CwiseOperators.h" #include "src/Eigen2Support/CwiseOperators.h"
@@ -50,11 +53,12 @@ namespace Eigen {
#include "src/Eigen2Support/Block.h" #include "src/Eigen2Support/Block.h"
#include "src/Eigen2Support/VectorBlock.h" #include "src/Eigen2Support/VectorBlock.h"
#include "src/Eigen2Support/Minor.h" #include "src/Eigen2Support/Minor.h"
#include "src/Eigen2Support/MathFunctions.h"
} // namespace Eigen } // namespace Eigen
#include "src/Core/util/EnableMSVCWarnings.h" #include "src/Core/util/ReenableStupidWarnings.h"
// Eigen2 used to include iostream // Eigen2 used to include iostream
#include<iostream> #include<iostream>

View File

@@ -3,7 +3,7 @@
#include "Core" #include "Core"
#include "src/Core/util/DisableMSVCWarnings.h" #include "src/Core/util/DisableStupidWarnings.h"
#include "Cholesky" #include "Cholesky"
#include "Jacobi" #include "Jacobi"
@@ -38,7 +38,7 @@ namespace Eigen {
} // namespace Eigen } // namespace Eigen
#include "src/Core/util/EnableMSVCWarnings.h" #include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_EIGENVALUES_MODULE_H #endif // EIGEN_EIGENVALUES_MODULE_H
/* vim: set filetype=cpp et sw=2 ts=2 ai: */ /* vim: set filetype=cpp et sw=2 ts=2 ai: */

View File

@@ -3,7 +3,7 @@
#include "Core" #include "Core"
#include "src/Core/util/DisableMSVCWarnings.h" #include "src/Core/util/DisableStupidWarnings.h"
#include "SVD" #include "SVD"
#include "LU" #include "LU"
@@ -33,27 +33,34 @@ namespace Eigen {
*/ */
#include "src/Geometry/OrthoMethods.h" #include "src/Geometry/OrthoMethods.h"
#include "src/Geometry/Homogeneous.h"
#include "src/Geometry/RotationBase.h"
#include "src/Geometry/Rotation2D.h"
#include "src/Geometry/Quaternion.h"
#include "src/Geometry/AngleAxis.h"
#include "src/Geometry/EulerAngles.h" #include "src/Geometry/EulerAngles.h"
#include "src/Geometry/Transform.h"
#include "src/Geometry/Translation.h"
#include "src/Geometry/Scaling.h"
#include "src/Geometry/Hyperplane.h"
#include "src/Geometry/ParametrizedLine.h"
#include "src/Geometry/AlignedBox.h"
#include "src/Geometry/Umeyama.h"
#if defined EIGEN_VECTORIZE_SSE #if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS
#include "src/Geometry/arch/Geometry_SSE.h" #include "src/Geometry/Homogeneous.h"
#include "src/Geometry/RotationBase.h"
#include "src/Geometry/Rotation2D.h"
#include "src/Geometry/Quaternion.h"
#include "src/Geometry/AngleAxis.h"
#include "src/Geometry/Transform.h"
#include "src/Geometry/Translation.h"
#include "src/Geometry/Scaling.h"
#include "src/Geometry/Hyperplane.h"
#include "src/Geometry/ParametrizedLine.h"
#include "src/Geometry/AlignedBox.h"
#include "src/Geometry/Umeyama.h"
#if defined EIGEN_VECTORIZE_SSE
#include "src/Geometry/arch/Geometry_SSE.h"
#endif
#endif
#ifdef EIGEN2_SUPPORT
#include "src/Eigen2Support/Geometry/All.h"
#endif #endif
} // namespace Eigen } // namespace Eigen
#include "src/Core/util/EnableMSVCWarnings.h" #include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_GEOMETRY_MODULE_H #endif // EIGEN_GEOMETRY_MODULE_H
/* vim: set filetype=cpp et sw=2 ts=2 ai: */ /* vim: set filetype=cpp et sw=2 ts=2 ai: */

View File

@@ -3,7 +3,7 @@
#include "Core" #include "Core"
#include "src/Core/util/DisableMSVCWarnings.h" #include "src/Core/util/DisableStupidWarnings.h"
namespace Eigen { namespace Eigen {
@@ -21,7 +21,7 @@ namespace Eigen {
} // namespace Eigen } // namespace Eigen
#include "src/Core/util/EnableMSVCWarnings.h" #include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_HOUSEHOLDER_MODULE_H #endif // EIGEN_HOUSEHOLDER_MODULE_H
/* vim: set filetype=cpp et sw=2 ts=2 ai: */ /* vim: set filetype=cpp et sw=2 ts=2 ai: */

View File

@@ -3,7 +3,7 @@
#include "Core" #include "Core"
#include "src/Core/util/DisableMSVCWarnings.h" #include "src/Core/util/DisableStupidWarnings.h"
namespace Eigen { namespace Eigen {
@@ -23,7 +23,7 @@ namespace Eigen {
} // namespace Eigen } // namespace Eigen
#include "src/Core/util/EnableMSVCWarnings.h" #include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_JACOBI_MODULE_H #endif // EIGEN_JACOBI_MODULE_H
/* vim: set filetype=cpp et sw=2 ts=2 ai: */ /* vim: set filetype=cpp et sw=2 ts=2 ai: */

View File

@@ -3,7 +3,7 @@
#include "Core" #include "Core"
#include "src/Core/util/DisableMSVCWarnings.h" #include "src/Core/util/DisableStupidWarnings.h"
namespace Eigen { namespace Eigen {
@@ -30,9 +30,13 @@ namespace Eigen {
#include "src/LU/arch/Inverse_SSE.h" #include "src/LU/arch/Inverse_SSE.h"
#endif #endif
#ifdef EIGEN2_SUPPORT
#include "src/Eigen2Support/LU.h"
#endif
} // namespace Eigen } // namespace Eigen
#include "src/Core/util/EnableMSVCWarnings.h" #include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_LU_MODULE_H #endif // EIGEN_LU_MODULE_H
/* vim: set filetype=cpp et sw=2 ts=2 ai: */ /* vim: set filetype=cpp et sw=2 ts=2 ai: */

36
Eigen/LeastSquares Normal file
View File

@@ -0,0 +1,36 @@
#ifndef EIGEN_REGRESSION_MODULE_H
#define EIGEN_REGRESSION_MODULE_H
#ifndef EIGEN2_SUPPORT
#error LeastSquares is only available in Eigen2 support mode (define EIGEN2_SUPPORT)
#endif
// exclude from normal eigen3-only documentation
#ifdef EIGEN2_SUPPORT
#include "Core"
#include "src/Core/util/DisableStupidWarnings.h"
#include "Eigenvalues"
#include "Geometry"
namespace Eigen {
/** \defgroup LeastSquares_Module LeastSquares module
* This module provides linear regression and related features.
*
* \code
* #include <Eigen/LeastSquares>
* \endcode
*/
#include "src/Eigen2Support/LeastSquares.h"
} // namespace Eigen
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN2_SUPPORT
#endif // EIGEN_REGRESSION_MODULE_H

View File

@@ -3,7 +3,7 @@
#include "Core" #include "Core"
#include "src/Core/util/DisableMSVCWarnings.h" #include "src/Core/util/DisableStupidWarnings.h"
#include "Cholesky" #include "Cholesky"
#include "Jacobi" #include "Jacobi"
@@ -29,13 +29,17 @@ namespace Eigen {
#include "src/QR/FullPivHouseholderQR.h" #include "src/QR/FullPivHouseholderQR.h"
#include "src/QR/ColPivHouseholderQR.h" #include "src/QR/ColPivHouseholderQR.h"
#ifdef EIGEN2_SUPPORT
#include "src/Eigen2Support/QR.h"
#endif
} // namespace Eigen } // namespace Eigen
#include "src/Core/util/EnableMSVCWarnings.h" #include "src/Core/util/ReenableStupidWarnings.h"
// FIXME for compatibility we include Eigenvalues here: #ifdef EIGEN2_SUPPORT
#include "Eigenvalues" #include "Eigenvalues"
#endif
#endif // EIGEN_QR_MODULE_H #endif // EIGEN_QR_MODULE_H
/* vim: set filetype=cpp et sw=2 ts=2 ai: */ /* vim: set filetype=cpp et sw=2 ts=2 ai: */

View File

@@ -6,27 +6,27 @@
#if (!EIGEN_MALLOC_ALREADY_ALIGNED) #if (!EIGEN_MALLOC_ALREADY_ALIGNED)
#include "src/Core/util/DisableMSVCWarnings.h" #include "src/Core/util/DisableStupidWarnings.h"
void *qMalloc(size_t size) void *qMalloc(size_t size)
{ {
return Eigen::ei_aligned_malloc(size); return Eigen::internal::aligned_malloc(size);
} }
void qFree(void *ptr) void qFree(void *ptr)
{ {
Eigen::ei_aligned_free(ptr); Eigen::internal::aligned_free(ptr);
} }
void *qRealloc(void *ptr, size_t size) void *qRealloc(void *ptr, size_t size)
{ {
void* newPtr = Eigen::ei_aligned_malloc(size); void* newPtr = Eigen::internal::aligned_malloc(size);
memcpy(newPtr, ptr, size); memcpy(newPtr, ptr, size);
Eigen::ei_aligned_free(ptr); Eigen::internal::aligned_free(ptr);
return newPtr; return newPtr;
} }
#include "src/Core/util/EnableMSVCWarnings.h" #include "src/Core/util/ReenableStupidWarnings.h"
#endif #endif

View File

@@ -5,7 +5,7 @@
#include "Householder" #include "Householder"
#include "Jacobi" #include "Jacobi"
#include "src/Core/util/DisableMSVCWarnings.h" #include "src/Core/util/DisableStupidWarnings.h"
namespace Eigen { namespace Eigen {
@@ -23,13 +23,16 @@ namespace Eigen {
*/ */
#include "src/misc/Solve.h" #include "src/misc/Solve.h"
#include "src/SVD/SVD.h"
#include "src/SVD/JacobiSVD.h" #include "src/SVD/JacobiSVD.h"
#include "src/SVD/UpperBidiagonalization.h" #include "src/SVD/UpperBidiagonalization.h"
#ifdef EIGEN2_SUPPORT
#include "src/Eigen2Support/SVD.h"
#endif
} // namespace Eigen } // namespace Eigen
#include "src/Core/util/EnableMSVCWarnings.h" #include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_SVD_MODULE_H #endif // EIGEN_SVD_MODULE_H
/* vim: set filetype=cpp et sw=2 ts=2 ai: */ /* vim: set filetype=cpp et sw=2 ts=2 ai: */

View File

@@ -3,7 +3,7 @@
#include "Core" #include "Core"
#include "src/Core/util/DisableMSVCWarnings.h" #include "src/Core/util/DisableStupidWarnings.h"
#include <vector> #include <vector>
#include <map> #include <map>
@@ -11,6 +11,14 @@
#include <cstring> #include <cstring>
#include <algorithm> #include <algorithm>
#ifdef EIGEN2_SUPPORT
#define EIGEN_YES_I_KNOW_SPARSE_MODULE_IS_NOT_STABLE_YET
#endif
#ifndef EIGEN_YES_I_KNOW_SPARSE_MODULE_IS_NOT_STABLE_YET
#error The sparse module API is not stable yet. To use it anyway, please define the EIGEN_YES_I_KNOW_SPARSE_MODULE_IS_NOT_STABLE_YET preprocessor token.
#endif
namespace Eigen { namespace Eigen {
/** \defgroup Sparse_Module Sparse module /** \defgroup Sparse_Module Sparse module
@@ -55,7 +63,7 @@ struct Sparse {};
} // namespace Eigen } // namespace Eigen
#include "src/Core/util/EnableMSVCWarnings.h" #include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_SPARSE_MODULE_H #endif // EIGEN_SPARSE_MODULE_H

42
Eigen/StdDeque Normal file
View File

@@ -0,0 +1,42 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2009 Hauke Heibel <hauke.heibel@googlemail.com>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#ifndef EIGEN_STDDEQUE_MODULE_H
#define EIGEN_STDDEQUE_MODULE_H
#include "Core"
#include <deque>
#if (defined(_MSC_VER) && defined(_WIN64)) /* MSVC auto aligns in 64 bit builds */
#define EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(...)
#else
#include "src/StlSupport/StdDeque.h"
#endif
#endif // EIGEN_STDDEQUE_MODULE_H

View File

@@ -1,12 +1,7 @@
ADD_SUBDIRECTORY(Core) file(GLOB Eigen_src_subdirectories "*")
ADD_SUBDIRECTORY(LU) escape_string_as_regex(ESCAPED_CMAKE_CURRENT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
ADD_SUBDIRECTORY(QR) foreach(f ${Eigen_src_subdirectories})
ADD_SUBDIRECTORY(SVD) if(NOT f MATCHES "\\.txt" AND NOT f MATCHES "${ESCAPED_CMAKE_CURRENT_SOURCE_DIR}/[.].+" )
ADD_SUBDIRECTORY(Cholesky) add_subdirectory(${f})
ADD_SUBDIRECTORY(Geometry) endif()
ADD_SUBDIRECTORY(Sparse) endforeach()
ADD_SUBDIRECTORY(Jacobi)
ADD_SUBDIRECTORY(Householder)
ADD_SUBDIRECTORY(Eigenvalues)
ADD_SUBDIRECTORY(misc)
ADD_SUBDIRECTORY(plugins)

View File

@@ -27,7 +27,9 @@
#ifndef EIGEN_LDLT_H #ifndef EIGEN_LDLT_H
#define EIGEN_LDLT_H #define EIGEN_LDLT_H
namespace internal {
template<typename MatrixType, int UpLo> struct LDLT_Traits; template<typename MatrixType, int UpLo> struct LDLT_Traits;
}
/** \ingroup cholesky_Module /** \ingroup cholesky_Module
* *
@@ -74,7 +76,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
typedef Transpositions<RowsAtCompileTime, MaxRowsAtCompileTime> TranspositionType; typedef Transpositions<RowsAtCompileTime, MaxRowsAtCompileTime> TranspositionType;
typedef PermutationMatrix<RowsAtCompileTime, MaxRowsAtCompileTime> PermutationType; typedef PermutationMatrix<RowsAtCompileTime, MaxRowsAtCompileTime> PermutationType;
typedef LDLT_Traits<MatrixType,UpLo> Traits; typedef internal::LDLT_Traits<MatrixType,UpLo> Traits;
/** \brief Default Constructor. /** \brief Default Constructor.
* *
@@ -108,14 +110,14 @@ template<typename _MatrixType, int _UpLo> class LDLT
/** \returns a view of the upper triangular matrix U */ /** \returns a view of the upper triangular matrix U */
inline typename Traits::MatrixU matrixU() const inline typename Traits::MatrixU matrixU() const
{ {
ei_assert(m_isInitialized && "LDLT is not initialized."); eigen_assert(m_isInitialized && "LDLT is not initialized.");
return Traits::getU(m_matrix); return Traits::getU(m_matrix);
} }
/** \returns a view of the lower triangular matrix L */ /** \returns a view of the lower triangular matrix L */
inline typename Traits::MatrixL matrixL() const inline typename Traits::MatrixL matrixL() const
{ {
ei_assert(m_isInitialized && "LDLT is not initialized."); eigen_assert(m_isInitialized && "LDLT is not initialized.");
return Traits::getL(m_matrix); return Traits::getL(m_matrix);
} }
@@ -123,28 +125,35 @@ template<typename _MatrixType, int _UpLo> class LDLT
*/ */
inline const TranspositionType& transpositionsP() const inline const TranspositionType& transpositionsP() const
{ {
ei_assert(m_isInitialized && "LDLT is not initialized."); eigen_assert(m_isInitialized && "LDLT is not initialized.");
return m_transpositions; return m_transpositions;
} }
/** \returns the coefficients of the diagonal matrix D */ /** \returns the coefficients of the diagonal matrix D */
inline Diagonal<MatrixType,0> vectorD(void) const inline Diagonal<const MatrixType> vectorD(void) const
{ {
ei_assert(m_isInitialized && "LDLT is not initialized."); eigen_assert(m_isInitialized && "LDLT is not initialized.");
return m_matrix.diagonal(); return m_matrix.diagonal();
} }
/** \returns true if the matrix is positive (semidefinite) */ /** \returns true if the matrix is positive (semidefinite) */
inline bool isPositive(void) const inline bool isPositive(void) const
{ {
ei_assert(m_isInitialized && "LDLT is not initialized."); eigen_assert(m_isInitialized && "LDLT is not initialized.");
return m_sign == 1; return m_sign == 1;
} }
#ifdef EIGEN2_SUPPORT
inline bool isPositiveDefinite() const
{
return isPositive();
}
#endif
/** \returns true if the matrix is negative (semidefinite) */ /** \returns true if the matrix is negative (semidefinite) */
inline bool isNegative(void) const inline bool isNegative(void) const
{ {
ei_assert(m_isInitialized && "LDLT is not initialized."); eigen_assert(m_isInitialized && "LDLT is not initialized.");
return m_sign == -1; return m_sign == -1;
} }
@@ -155,15 +164,24 @@ template<typename _MatrixType, int _UpLo> class LDLT
* \sa solveInPlace(), MatrixBase::ldlt() * \sa solveInPlace(), MatrixBase::ldlt()
*/ */
template<typename Rhs> template<typename Rhs>
inline const ei_solve_retval<LDLT, Rhs> inline const internal::solve_retval<LDLT, Rhs>
solve(const MatrixBase<Rhs>& b) const solve(const MatrixBase<Rhs>& b) const
{ {
ei_assert(m_isInitialized && "LDLT is not initialized."); eigen_assert(m_isInitialized && "LDLT is not initialized.");
ei_assert(m_matrix.rows()==b.rows() eigen_assert(m_matrix.rows()==b.rows()
&& "LDLT::solve(): invalid number of rows of the right hand side matrix b"); && "LDLT::solve(): invalid number of rows of the right hand side matrix b");
return ei_solve_retval<LDLT, Rhs>(*this, b.derived()); return internal::solve_retval<LDLT, Rhs>(*this, b.derived());
} }
#ifdef EIGEN2_SUPPORT
template<typename OtherDerived, typename ResultType>
bool solve(const MatrixBase<OtherDerived>& b, ResultType *result) const
{
*result = this->solve(b);
return true;
}
#endif
template<typename Derived> template<typename Derived>
bool solveInPlace(MatrixBase<Derived> &bAndX) const; bool solveInPlace(MatrixBase<Derived> &bAndX) const;
@@ -175,7 +193,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
*/ */
inline const MatrixType& matrixLDLT() const inline const MatrixType& matrixLDLT() const
{ {
ei_assert(m_isInitialized && "LDLT is not initialized."); eigen_assert(m_isInitialized && "LDLT is not initialized.");
return m_matrix; return m_matrix;
} }
@@ -199,9 +217,11 @@ template<typename _MatrixType, int _UpLo> class LDLT
bool m_isInitialized; bool m_isInitialized;
}; };
template<int UpLo> struct ei_ldlt_inplace; namespace internal {
template<> struct ei_ldlt_inplace<Lower> template<int UpLo> struct ldlt_inplace;
template<> struct ldlt_inplace<Lower>
{ {
template<typename MatrixType, typename TranspositionType, typename Workspace> template<typename MatrixType, typename TranspositionType, typename Workspace>
static bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, int* sign=0) static bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, int* sign=0)
@@ -209,14 +229,14 @@ template<> struct ei_ldlt_inplace<Lower>
typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::RealScalar RealScalar; typedef typename MatrixType::RealScalar RealScalar;
typedef typename MatrixType::Index Index; typedef typename MatrixType::Index Index;
ei_assert(mat.rows()==mat.cols()); eigen_assert(mat.rows()==mat.cols());
const Index size = mat.rows(); const Index size = mat.rows();
if (size <= 1) if (size <= 1)
{ {
transpositions.setIdentity(); transpositions.setIdentity();
if(sign) if(sign)
*sign = ei_real(mat.coeff(0,0))>0 ? 1:-1; *sign = real(mat.coeff(0,0))>0 ? 1:-1;
return true; return true;
} }
@@ -234,10 +254,10 @@ template<> struct ei_ldlt_inplace<Lower>
// The biggest overall is the point of reference to which further diagonals // The biggest overall is the point of reference to which further diagonals
// are compared; if any diagonal is negligible compared // are compared; if any diagonal is negligible compared
// to the largest overall, the algorithm bails. // to the largest overall, the algorithm bails.
cutoff = ei_abs(NumTraits<Scalar>::epsilon() * biggest_in_corner); cutoff = abs(NumTraits<Scalar>::epsilon() * biggest_in_corner);
if(sign) if(sign)
*sign = ei_real(mat.diagonal().coeff(index_of_biggest_in_corner)) > 0 ? 1 : -1; *sign = real(mat.diagonal().coeff(index_of_biggest_in_corner)) > 0 ? 1 : -1;
} }
// Finish early if the matrix is not full rank. // Finish early if the matrix is not full rank.
@@ -259,11 +279,11 @@ template<> struct ei_ldlt_inplace<Lower>
for(int i=k+1;i<index_of_biggest_in_corner;++i) for(int i=k+1;i<index_of_biggest_in_corner;++i)
{ {
Scalar tmp = mat.coeffRef(i,k); Scalar tmp = mat.coeffRef(i,k);
mat.coeffRef(i,k) = ei_conj(mat.coeffRef(index_of_biggest_in_corner,i)); mat.coeffRef(i,k) = conj(mat.coeffRef(index_of_biggest_in_corner,i));
mat.coeffRef(index_of_biggest_in_corner,i) = ei_conj(tmp); mat.coeffRef(index_of_biggest_in_corner,i) = conj(tmp);
} }
if(NumTraits<Scalar>::IsComplex) if(NumTraits<Scalar>::IsComplex)
mat.coeffRef(index_of_biggest_in_corner,k) = ei_conj(mat.coeff(index_of_biggest_in_corner,k)); mat.coeffRef(index_of_biggest_in_corner,k) = conj(mat.coeff(index_of_biggest_in_corner,k));
} }
// partition the matrix: // partition the matrix:
@@ -282,7 +302,7 @@ template<> struct ei_ldlt_inplace<Lower>
if(rs>0) if(rs>0)
A21.noalias() -= A20 * temp.head(k); A21.noalias() -= A20 * temp.head(k);
} }
if((rs>0) && (ei_abs(mat.coeffRef(k,k)) > cutoff)) if((rs>0) && (abs(mat.coeffRef(k,k)) > cutoff))
A21 /= mat.coeffRef(k,k); A21 /= mat.coeffRef(k,k);
} }
@@ -290,13 +310,13 @@ template<> struct ei_ldlt_inplace<Lower>
} }
}; };
template<> struct ei_ldlt_inplace<Upper> template<> struct ldlt_inplace<Upper>
{ {
template<typename MatrixType, typename TranspositionType, typename Workspace> template<typename MatrixType, typename TranspositionType, typename Workspace>
static EIGEN_STRONG_INLINE bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, int* sign=0) static EIGEN_STRONG_INLINE bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, int* sign=0)
{ {
Transpose<MatrixType> matt(mat); Transpose<MatrixType> matt(mat);
return ei_ldlt_inplace<Lower>::unblocked(matt, transpositions, temp, sign); return ldlt_inplace<Lower>::unblocked(matt, transpositions, temp, sign);
} }
}; };
@@ -316,12 +336,14 @@ template<typename MatrixType> struct LDLT_Traits<MatrixType,Upper>
inline static MatrixU getU(const MatrixType& m) { return m; } inline static MatrixU getU(const MatrixType& m) { return m; }
}; };
} // end namespace internal
/** Compute / recompute the LDLT decomposition A = L D L^* = U^* D U of \a matrix /** Compute / recompute the LDLT decomposition A = L D L^* = U^* D U of \a matrix
*/ */
template<typename MatrixType, int _UpLo> template<typename MatrixType, int _UpLo>
LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const MatrixType& a) LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const MatrixType& a)
{ {
ei_assert(a.rows()==a.cols()); eigen_assert(a.rows()==a.cols());
const Index size = a.rows(); const Index size = a.rows();
m_matrix = a; m_matrix = a;
@@ -330,22 +352,23 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const MatrixType& a)
m_isInitialized = false; m_isInitialized = false;
m_temporary.resize(size); m_temporary.resize(size);
ei_ldlt_inplace<UpLo>::unblocked(m_matrix, m_transpositions, m_temporary, &m_sign); internal::ldlt_inplace<UpLo>::unblocked(m_matrix, m_transpositions, m_temporary, &m_sign);
m_isInitialized = true; m_isInitialized = true;
return *this; return *this;
} }
namespace internal {
template<typename _MatrixType, int _UpLo, typename Rhs> template<typename _MatrixType, int _UpLo, typename Rhs>
struct ei_solve_retval<LDLT<_MatrixType,_UpLo>, Rhs> struct solve_retval<LDLT<_MatrixType,_UpLo>, Rhs>
: ei_solve_retval_base<LDLT<_MatrixType,_UpLo>, Rhs> : solve_retval_base<LDLT<_MatrixType,_UpLo>, Rhs>
{ {
typedef LDLT<_MatrixType,_UpLo> LDLTType; typedef LDLT<_MatrixType,_UpLo> LDLTType;
EIGEN_MAKE_SOLVE_HELPERS(LDLTType,Rhs) EIGEN_MAKE_SOLVE_HELPERS(LDLTType,Rhs)
template<typename Dest> void evalTo(Dest& dst) const template<typename Dest> void evalTo(Dest& dst) const
{ {
ei_assert(rhs().rows() == dec().matrixLDLT().rows()); eigen_assert(rhs().rows() == dec().matrixLDLT().rows());
// dst = P b // dst = P b
dst = dec().transpositionsP() * rhs(); dst = dec().transpositionsP() * rhs();
@@ -362,8 +385,11 @@ struct ei_solve_retval<LDLT<_MatrixType,_UpLo>, Rhs>
dst = dec().transpositionsP().transpose() * dst; dst = dec().transpositionsP().transpose() * dst;
} }
}; };
}
/** This is the \em in-place version of solve(). /** \internal use x = ldlt_object.solve(x);
*
* This is the \em in-place version of solve().
* *
* \param bAndX represents both the right-hand side matrix b and result x. * \param bAndX represents both the right-hand side matrix b and result x.
* *
@@ -378,9 +404,9 @@ template<typename MatrixType,int _UpLo>
template<typename Derived> template<typename Derived>
bool LDLT<MatrixType,_UpLo>::solveInPlace(MatrixBase<Derived> &bAndX) const bool LDLT<MatrixType,_UpLo>::solveInPlace(MatrixBase<Derived> &bAndX) const
{ {
ei_assert(m_isInitialized && "LDLT is not initialized."); eigen_assert(m_isInitialized && "LDLT is not initialized.");
const Index size = m_matrix.rows(); const Index size = m_matrix.rows();
ei_assert(size == bAndX.rows()); eigen_assert(size == bAndX.rows());
bAndX = this->solve(bAndX); bAndX = this->solve(bAndX);
@@ -393,7 +419,7 @@ bool LDLT<MatrixType,_UpLo>::solveInPlace(MatrixBase<Derived> &bAndX) const
template<typename MatrixType, int _UpLo> template<typename MatrixType, int _UpLo>
MatrixType LDLT<MatrixType,_UpLo>::reconstructedMatrix() const MatrixType LDLT<MatrixType,_UpLo>::reconstructedMatrix() const
{ {
ei_assert(m_isInitialized && "LDLT is not initialized."); eigen_assert(m_isInitialized && "LDLT is not initialized.");
const Index size = m_matrix.rows(); const Index size = m_matrix.rows();
MatrixType res(size,size); MatrixType res(size,size);

View File

@@ -25,7 +25,9 @@
#ifndef EIGEN_LLT_H #ifndef EIGEN_LLT_H
#define EIGEN_LLT_H #define EIGEN_LLT_H
namespace internal{
template<typename MatrixType, int UpLo> struct LLT_Traits; template<typename MatrixType, int UpLo> struct LLT_Traits;
}
/** \ingroup cholesky_Module /** \ingroup cholesky_Module
* *
@@ -68,19 +70,19 @@ template<typename _MatrixType, int _UpLo> class LLT
typedef typename MatrixType::Index Index; typedef typename MatrixType::Index Index;
enum { enum {
PacketSize = ei_packet_traits<Scalar>::size, PacketSize = internal::packet_traits<Scalar>::size,
AlignmentMask = int(PacketSize)-1, AlignmentMask = int(PacketSize)-1,
UpLo = _UpLo UpLo = _UpLo
}; };
typedef LLT_Traits<MatrixType,UpLo> Traits; typedef internal::LLT_Traits<MatrixType,UpLo> Traits;
/** /**
* \brief Default Constructor. * \brief Default Constructor.
* *
* The default constructor is useful in cases in which the user intends to * The default constructor is useful in cases in which the user intends to
* perform decompositions via LLT::compute(const MatrixType&). * perform decompositions via LLT::compute(const MatrixType&).
*/ */
LLT() : m_matrix(), m_isInitialized(false) {} LLT() : m_matrix(), m_isInitialized(false) {}
/** \brief Default Constructor with memory preallocation /** \brief Default Constructor with memory preallocation
@@ -102,14 +104,14 @@ template<typename _MatrixType, int _UpLo> class LLT
/** \returns a view of the upper triangular matrix U */ /** \returns a view of the upper triangular matrix U */
inline typename Traits::MatrixU matrixU() const inline typename Traits::MatrixU matrixU() const
{ {
ei_assert(m_isInitialized && "LLT is not initialized."); eigen_assert(m_isInitialized && "LLT is not initialized.");
return Traits::getU(m_matrix); return Traits::getU(m_matrix);
} }
/** \returns a view of the lower triangular matrix L */ /** \returns a view of the lower triangular matrix L */
inline typename Traits::MatrixL matrixL() const inline typename Traits::MatrixL matrixL() const
{ {
ei_assert(m_isInitialized && "LLT is not initialized."); eigen_assert(m_isInitialized && "LLT is not initialized.");
return Traits::getL(m_matrix); return Traits::getL(m_matrix);
} }
@@ -124,17 +126,28 @@ template<typename _MatrixType, int _UpLo> class LLT
* \sa solveInPlace(), MatrixBase::llt() * \sa solveInPlace(), MatrixBase::llt()
*/ */
template<typename Rhs> template<typename Rhs>
inline const ei_solve_retval<LLT, Rhs> inline const internal::solve_retval<LLT, Rhs>
solve(const MatrixBase<Rhs>& b) const solve(const MatrixBase<Rhs>& b) const
{ {
ei_assert(m_isInitialized && "LLT is not initialized."); eigen_assert(m_isInitialized && "LLT is not initialized.");
ei_assert(m_matrix.rows()==b.rows() eigen_assert(m_matrix.rows()==b.rows()
&& "LLT::solve(): invalid number of rows of the right hand side matrix b"); && "LLT::solve(): invalid number of rows of the right hand side matrix b");
return ei_solve_retval<LLT, Rhs>(*this, b.derived()); return internal::solve_retval<LLT, Rhs>(*this, b.derived());
} }
#ifdef EIGEN2_SUPPORT
template<typename OtherDerived, typename ResultType>
bool solve(const MatrixBase<OtherDerived>& b, ResultType *result) const
{
*result = this->solve(b);
return true;
}
bool isPositiveDefinite() const { return true; }
#endif
template<typename Derived> template<typename Derived>
bool solveInPlace(MatrixBase<Derived> &bAndX) const; void solveInPlace(MatrixBase<Derived> &bAndX) const;
LLT& compute(const MatrixType& matrix); LLT& compute(const MatrixType& matrix);
@@ -144,7 +157,7 @@ template<typename _MatrixType, int _UpLo> class LLT
*/ */
inline const MatrixType& matrixLLT() const inline const MatrixType& matrixLLT() const
{ {
ei_assert(m_isInitialized && "LLT is not initialized."); eigen_assert(m_isInitialized && "LLT is not initialized.");
return m_matrix; return m_matrix;
} }
@@ -158,7 +171,7 @@ template<typename _MatrixType, int _UpLo> class LLT
*/ */
ComputationInfo info() const ComputationInfo info() const
{ {
ei_assert(m_isInitialized && "LLT is not initialized."); eigen_assert(m_isInitialized && "LLT is not initialized.");
return m_info; return m_info;
} }
@@ -175,17 +188,20 @@ template<typename _MatrixType, int _UpLo> class LLT
ComputationInfo m_info; ComputationInfo m_info;
}; };
template<int UpLo> struct ei_llt_inplace; namespace internal {
template<> struct ei_llt_inplace<Lower> template<int UpLo> struct llt_inplace;
template<> struct llt_inplace<Lower>
{ {
template<typename MatrixType> template<typename MatrixType>
static bool unblocked(MatrixType& mat) static typename MatrixType::Index unblocked(MatrixType& mat)
{ {
typedef typename MatrixType::Index Index;
typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::RealScalar RealScalar; typedef typename MatrixType::RealScalar RealScalar;
typedef typename MatrixType::Index Index;
ei_assert(mat.rows()==mat.cols()); eigen_assert(mat.rows()==mat.cols());
const Index size = mat.rows(); const Index size = mat.rows();
for(Index k = 0; k < size; ++k) for(Index k = 0; k < size; ++k)
{ {
@@ -195,22 +211,22 @@ template<> struct ei_llt_inplace<Lower>
Block<MatrixType,1,Dynamic> A10(mat,k,0,1,k); Block<MatrixType,1,Dynamic> A10(mat,k,0,1,k);
Block<MatrixType,Dynamic,Dynamic> A20(mat,k+1,0,rs,k); Block<MatrixType,Dynamic,Dynamic> A20(mat,k+1,0,rs,k);
RealScalar x = ei_real(mat.coeff(k,k)); RealScalar x = real(mat.coeff(k,k));
if (k>0) x -= mat.row(k).head(k).squaredNorm(); if (k>0) x -= A10.squaredNorm();
if (x<=RealScalar(0)) if (x<=RealScalar(0))
return false; return k;
mat.coeffRef(k,k) = x = ei_sqrt(x); mat.coeffRef(k,k) = x = sqrt(x);
if (k>0 && rs>0) A21.noalias() -= A20 * A10.adjoint(); if (k>0 && rs>0) A21.noalias() -= A20 * A10.adjoint();
if (rs>0) A21 *= RealScalar(1)/x; if (rs>0) A21 *= RealScalar(1)/x;
} }
return true; return -1;
} }
template<typename MatrixType> template<typename MatrixType>
static bool blocked(MatrixType& m) static typename MatrixType::Index blocked(MatrixType& m)
{ {
typedef typename MatrixType::Index Index; typedef typename MatrixType::Index Index;
ei_assert(m.rows()==m.cols()); eigen_assert(m.rows()==m.cols());
Index size = m.rows(); Index size = m.rows();
if(size<32) if(size<32)
return unblocked(m); return unblocked(m);
@@ -231,27 +247,28 @@ template<> struct ei_llt_inplace<Lower>
Block<MatrixType,Dynamic,Dynamic> A21(m,k+bs,k, rs,bs); Block<MatrixType,Dynamic,Dynamic> A21(m,k+bs,k, rs,bs);
Block<MatrixType,Dynamic,Dynamic> A22(m,k+bs,k+bs,rs,rs); Block<MatrixType,Dynamic,Dynamic> A22(m,k+bs,k+bs,rs,rs);
if(!unblocked(A11)) return false; Index ret;
if((ret=unblocked(A11))>=0) return k+ret;
if(rs>0) A11.adjoint().template triangularView<Upper>().template solveInPlace<OnTheRight>(A21); if(rs>0) A11.adjoint().template triangularView<Upper>().template solveInPlace<OnTheRight>(A21);
if(rs>0) A22.template selfadjointView<Lower>().rankUpdate(A21,-1); // bottleneck if(rs>0) A22.template selfadjointView<Lower>().rankUpdate(A21,-1); // bottleneck
} }
return true; return -1;
} }
}; };
template<> struct ei_llt_inplace<Upper> template<> struct llt_inplace<Upper>
{ {
template<typename MatrixType> template<typename MatrixType>
static EIGEN_STRONG_INLINE bool unblocked(MatrixType& mat) static EIGEN_STRONG_INLINE typename MatrixType::Index unblocked(MatrixType& mat)
{ {
Transpose<MatrixType> matt(mat); Transpose<MatrixType> matt(mat);
return ei_llt_inplace<Lower>::unblocked(matt); return llt_inplace<Lower>::unblocked(matt);
} }
template<typename MatrixType> template<typename MatrixType>
static EIGEN_STRONG_INLINE bool blocked(MatrixType& mat) static EIGEN_STRONG_INLINE typename MatrixType::Index blocked(MatrixType& mat)
{ {
Transpose<MatrixType> matt(mat); Transpose<MatrixType> matt(mat);
return ei_llt_inplace<Lower>::blocked(matt); return llt_inplace<Lower>::blocked(matt);
} }
}; };
@@ -262,7 +279,7 @@ template<typename MatrixType> struct LLT_Traits<MatrixType,Lower>
inline static MatrixL getL(const MatrixType& m) { return m; } inline static MatrixL getL(const MatrixType& m) { return m; }
inline static MatrixU getU(const MatrixType& m) { return m.adjoint(); } inline static MatrixU getU(const MatrixType& m) { return m.adjoint(); }
static bool inplace_decomposition(MatrixType& m) static bool inplace_decomposition(MatrixType& m)
{ return ei_llt_inplace<Lower>::blocked(m); } { return llt_inplace<Lower>::blocked(m)==-1; }
}; };
template<typename MatrixType> struct LLT_Traits<MatrixType,Upper> template<typename MatrixType> struct LLT_Traits<MatrixType,Upper>
@@ -272,9 +289,11 @@ template<typename MatrixType> struct LLT_Traits<MatrixType,Upper>
inline static MatrixL getL(const MatrixType& m) { return m.adjoint(); } inline static MatrixL getL(const MatrixType& m) { return m.adjoint(); }
inline static MatrixU getU(const MatrixType& m) { return m; } inline static MatrixU getU(const MatrixType& m) { return m; }
static bool inplace_decomposition(MatrixType& m) static bool inplace_decomposition(MatrixType& m)
{ return ei_llt_inplace<Upper>::blocked(m); } { return llt_inplace<Upper>::blocked(m)==-1; }
}; };
} // end namespace internal
/** Computes / recomputes the Cholesky decomposition A = LL^* = U^*U of \a matrix /** Computes / recomputes the Cholesky decomposition A = LL^* = U^*U of \a matrix
* *
* *
@@ -295,9 +314,10 @@ LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const MatrixType& a)
return *this; return *this;
} }
namespace internal {
template<typename _MatrixType, int UpLo, typename Rhs> template<typename _MatrixType, int UpLo, typename Rhs>
struct ei_solve_retval<LLT<_MatrixType, UpLo>, Rhs> struct solve_retval<LLT<_MatrixType, UpLo>, Rhs>
: ei_solve_retval_base<LLT<_MatrixType, UpLo>, Rhs> : solve_retval_base<LLT<_MatrixType, UpLo>, Rhs>
{ {
typedef LLT<_MatrixType,UpLo> LLTType; typedef LLT<_MatrixType,UpLo> LLTType;
EIGEN_MAKE_SOLVE_HELPERS(LLTType,Rhs) EIGEN_MAKE_SOLVE_HELPERS(LLTType,Rhs)
@@ -308,8 +328,11 @@ struct ei_solve_retval<LLT<_MatrixType, UpLo>, Rhs>
dec().solveInPlace(dst); dec().solveInPlace(dst);
} }
}; };
}
/** This is the \em in-place version of solve(). /** \internal use x = llt_object.solve(x);
*
* This is the \em in-place version of solve().
* *
* \param bAndX represents both the right-hand side matrix b and result x. * \param bAndX represents both the right-hand side matrix b and result x.
* *
@@ -322,13 +345,12 @@ struct ei_solve_retval<LLT<_MatrixType, UpLo>, Rhs>
*/ */
template<typename MatrixType, int _UpLo> template<typename MatrixType, int _UpLo>
template<typename Derived> template<typename Derived>
bool LLT<MatrixType,_UpLo>::solveInPlace(MatrixBase<Derived> &bAndX) const void LLT<MatrixType,_UpLo>::solveInPlace(MatrixBase<Derived> &bAndX) const
{ {
ei_assert(m_isInitialized && "LLT is not initialized."); eigen_assert(m_isInitialized && "LLT is not initialized.");
ei_assert(m_matrix.rows()==bAndX.rows()); eigen_assert(m_matrix.rows()==bAndX.rows());
matrixL().solveInPlace(bAndX); matrixL().solveInPlace(bAndX);
matrixU().solveInPlace(bAndX); matrixU().solveInPlace(bAndX);
return true;
} }
/** \returns the matrix represented by the decomposition, /** \returns the matrix represented by the decomposition,
@@ -337,7 +359,7 @@ bool LLT<MatrixType,_UpLo>::solveInPlace(MatrixBase<Derived> &bAndX) const
template<typename MatrixType, int _UpLo> template<typename MatrixType, int _UpLo>
MatrixType LLT<MatrixType,_UpLo>::reconstructedMatrix() const MatrixType LLT<MatrixType,_UpLo>::reconstructedMatrix() const
{ {
ei_assert(m_isInitialized && "LLT is not initialized."); eigen_assert(m_isInitialized && "LLT is not initialized.");
return matrixL() * matrixL().adjoint().toDenseMatrix(); return matrixL() * matrixL().adjoint().toDenseMatrix();
} }

View File

@@ -25,20 +25,39 @@
#ifndef EIGEN_ARRAY_H #ifndef EIGEN_ARRAY_H
#define EIGEN_ARRAY_H #define EIGEN_ARRAY_H
/** \class Array
* \ingroup Core_Module
*
* \brief General-purpose arrays with easy API for coefficient-wise operations
*
* The %Array class is very similar to the Matrix class. It provides
* general-purpose one- and two-dimensional arrays. The difference between the
* %Array and the %Matrix class is primarily in the API: the API for the
* %Array class provides easy access to coefficient-wise operations, while the
* API for the %Matrix class provides easy access to linear-algebra
* operations.
*
* This class can be extended with the help of the plugin mechanism described on the page
* \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_ARRAY_PLUGIN.
*
* \sa \ref TutorialArrayClass, \ref TopicClassHierarchy
*/
namespace internal {
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols> template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
struct ei_traits<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > : ei_traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > struct traits<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > : traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
{ {
typedef ArrayXpr XprKind; typedef ArrayXpr XprKind;
typedef ArrayBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > XprBase; typedef ArrayBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > XprBase;
}; };
}
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols> template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
class Array class Array
: public DenseStorageBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > : public PlainObjectBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
{ {
public: public:
typedef DenseStorageBase<Array> Base; typedef PlainObjectBase<Array> Base;
EIGEN_DENSE_PUBLIC_INTERFACE(Array) EIGEN_DENSE_PUBLIC_INTERFACE(Array)
enum { Options = _Options }; enum { Options = _Options };
@@ -46,7 +65,7 @@ class Array
protected: protected:
template <typename Derived, typename OtherDerived, bool IsVector> template <typename Derived, typename OtherDerived, bool IsVector>
friend struct ei_conservative_resize_like_impl; friend struct internal::conservative_resize_like_impl;
using Base::m_storage; using Base::m_storage;
public: public:
@@ -112,8 +131,8 @@ class Array
#ifndef EIGEN_PARSED_BY_DOXYGEN #ifndef EIGEN_PARSED_BY_DOXYGEN
// FIXME is it still needed ?? // FIXME is it still needed ??
/** \internal */ /** \internal */
Array(ei_constructor_without_unaligned_array_assert) Array(internal::constructor_without_unaligned_array_assert)
: Base(ei_constructor_without_unaligned_array_assert()) : Base(internal::constructor_without_unaligned_array_assert())
{ {
Base::_check_template_params(); Base::_check_template_params();
EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
@@ -131,8 +150,8 @@ class Array
{ {
Base::_check_template_params(); Base::_check_template_params();
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Array) EIGEN_STATIC_ASSERT_VECTOR_ONLY(Array)
ei_assert(dim > 0); eigen_assert(dim >= 0);
ei_assert(SizeAtCompileTime == Dynamic || SizeAtCompileTime == dim); eigen_assert(SizeAtCompileTime == Dynamic || SizeAtCompileTime == dim);
EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
} }
@@ -214,7 +233,7 @@ class Array
* data pointers. * data pointers.
*/ */
template<typename OtherDerived> template<typename OtherDerived>
void swap(ArrayBase<OtherDerived> EIGEN_REF_TO_TEMPORARY other) void swap(ArrayBase<OtherDerived> const & other)
{ this->_swap(other.derived()); } { this->_swap(other.derived()); }
inline Index innerStride() const { return 1; } inline Index innerStride() const { return 1; }
@@ -227,10 +246,11 @@ class Array
private: private:
template<typename MatrixType, typename OtherDerived, bool SwapPointers> template<typename MatrixType, typename OtherDerived, bool SwapPointers>
friend struct ei_matrix_swap_impl; friend struct internal::matrix_swap_impl;
}; };
/** \defgroup arraytypedefs Global array typedefs /** \defgroup arraytypedefs Global array typedefs
* \ingroup Core_Module
* *
* Eigen defines several typedef shortcuts for most common 1D and 2D array types. * Eigen defines several typedef shortcuts for most common 1D and 2D array types.
* *
@@ -251,7 +271,7 @@ class Array
#define EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, Size, SizeSuffix) \ #define EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, Size, SizeSuffix) \
/** \ingroup arraytypedefs */ \ /** \ingroup arraytypedefs */ \
typedef Array<Type, Size, Size> Array##SizeSuffix##SizeSuffix##TypeSuffix; \ typedef Array<Type, Size, Size> Array##SizeSuffix##SizeSuffix##TypeSuffix; \
/** \ingroup matrixtypedefs */ \ /** \ingroup arraytypedefs */ \
typedef Array<Type, Size, 1> Array##SizeSuffix##TypeSuffix; typedef Array<Type, Size, 1> Array##SizeSuffix##TypeSuffix;
#define EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, Size) \ #define EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, Size) \

View File

@@ -28,6 +28,7 @@
template<typename ExpressionType> class MatrixWrapper; template<typename ExpressionType> class MatrixWrapper;
/** \class ArrayBase /** \class ArrayBase
* \ingroup Core_Module
* *
* \brief Base class for all 1D and 2D array, and related expressions * \brief Base class for all 1D and 2D array, and related expressions
* *
@@ -41,9 +42,12 @@ template<typename ExpressionType> class MatrixWrapper;
* *
* This class is the base that is inherited by all array expression types. * This class is the base that is inherited by all array expression types.
* *
* \param Derived is the derived type, e.g., an array or an expression type. * \tparam Derived is the derived type, e.g., an array or an expression type.
* *
* \sa class MatrixBase * This class can be extended with the help of the plugin mechanism described on the page
* \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_ARRAYBASE_PLUGIN.
*
* \sa class MatrixBase, \ref TopicClassHierarchy
*/ */
template<typename Derived> class ArrayBase template<typename Derived> class ArrayBase
: public DenseBase<Derived> : public DenseBase<Derived>
@@ -52,16 +56,16 @@ template<typename Derived> class ArrayBase
#ifndef EIGEN_PARSED_BY_DOXYGEN #ifndef EIGEN_PARSED_BY_DOXYGEN
/** The base class for a given storage type. */ /** The base class for a given storage type. */
typedef ArrayBase StorageBaseType; typedef ArrayBase StorageBaseType;
typedef ArrayBase Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl; typedef ArrayBase Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl;
using ei_special_scalar_op_base<Derived,typename ei_traits<Derived>::Scalar, using internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar,
typename NumTraits<typename ei_traits<Derived>::Scalar>::Real>::operator*; typename NumTraits<typename internal::traits<Derived>::Scalar>::Real>::operator*;
typedef typename ei_traits<Derived>::StorageKind StorageKind; typedef typename internal::traits<Derived>::StorageKind StorageKind;
typedef typename ei_traits<Derived>::Index Index; typedef typename internal::traits<Derived>::Index Index;
typedef typename ei_traits<Derived>::Scalar Scalar; typedef typename internal::traits<Derived>::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar; typedef typename internal::packet_traits<Scalar>::type PacketScalar;
typedef typename NumTraits<Scalar>::Real RealScalar; typedef typename NumTraits<Scalar>::Real RealScalar;
typedef DenseBase<Derived> Base; typedef DenseBase<Derived> Base;
@@ -90,6 +94,7 @@ template<typename Derived> class ArrayBase
using Base::operator/=; using Base::operator/=;
typedef typename Base::CoeffReturnType CoeffReturnType; typedef typename Base::CoeffReturnType CoeffReturnType;
#endif // not EIGEN_PARSED_BY_DOXYGEN #endif // not EIGEN_PARSED_BY_DOXYGEN
#ifndef EIGEN_PARSED_BY_DOXYGEN #ifndef EIGEN_PARSED_BY_DOXYGEN
@@ -98,17 +103,17 @@ template<typename Derived> class ArrayBase
* reference to a matrix, not a matrix! It is however guaranteed that the return type of eval() is either * reference to a matrix, not a matrix! It is however guaranteed that the return type of eval() is either
* PlainObject or const PlainObject&. * PlainObject or const PlainObject&.
*/ */
typedef Array<typename ei_traits<Derived>::Scalar, typedef Array<typename internal::traits<Derived>::Scalar,
ei_traits<Derived>::RowsAtCompileTime, internal::traits<Derived>::RowsAtCompileTime,
ei_traits<Derived>::ColsAtCompileTime, internal::traits<Derived>::ColsAtCompileTime,
AutoAlign | (ei_traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor), AutoAlign | (internal::traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
ei_traits<Derived>::MaxRowsAtCompileTime, internal::traits<Derived>::MaxRowsAtCompileTime,
ei_traits<Derived>::MaxColsAtCompileTime internal::traits<Derived>::MaxColsAtCompileTime
> PlainObject; > PlainObject;
/** \internal Represents a matrix with all coefficients equal to one another*/ /** \internal Represents a matrix with all coefficients equal to one another*/
typedef CwiseNullaryOp<ei_scalar_constant_op<Scalar>,Derived> ConstantReturnType; typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,Derived> ConstantReturnType;
#endif // not EIGEN_PARSED_BY_DOXYGEN #endif // not EIGEN_PARSED_BY_DOXYGEN
#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::ArrayBase #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::ArrayBase
@@ -128,7 +133,7 @@ template<typename Derived> class ArrayBase
*/ */
Derived& operator=(const ArrayBase& other) Derived& operator=(const ArrayBase& other)
{ {
return ei_assign_selector<Derived,Derived>::run(derived(), other.derived()); return internal::assign_selector<Derived,Derived>::run(derived(), other.derived());
} }
Derived& operator+=(const Scalar& scalar) Derived& operator+=(const Scalar& scalar)
@@ -166,6 +171,13 @@ template<typename Derived> class ArrayBase
explicit ArrayBase(Index); explicit ArrayBase(Index);
ArrayBase(Index,Index); ArrayBase(Index,Index);
template<typename OtherDerived> explicit ArrayBase(const ArrayBase<OtherDerived>&); template<typename OtherDerived> explicit ArrayBase(const ArrayBase<OtherDerived>&);
protected:
// mixing arrays and matrices is not legal
template<typename OtherDerived> Derived& operator+=(const MatrixBase<OtherDerived>& )
{EIGEN_STATIC_ASSERT(sizeof(typename OtherDerived::Scalar)==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES);}
// mixing arrays and matrices is not legal
template<typename OtherDerived> Derived& operator-=(const MatrixBase<OtherDerived>& )
{EIGEN_STATIC_ASSERT(sizeof(typename OtherDerived::Scalar)==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES);}
}; };
/** replaces \c *this by \c *this - \a other. /** replaces \c *this by \c *this - \a other.
@@ -177,8 +189,8 @@ template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived & EIGEN_STRONG_INLINE Derived &
ArrayBase<Derived>::operator-=(const ArrayBase<OtherDerived> &other) ArrayBase<Derived>::operator-=(const ArrayBase<OtherDerived> &other)
{ {
SelfCwiseBinaryOp<ei_scalar_difference_op<Scalar>, Derived> tmp(derived()); SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, Derived, OtherDerived> tmp(derived());
tmp = other; tmp = other.derived();
return derived(); return derived();
} }
@@ -191,7 +203,7 @@ template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived & EIGEN_STRONG_INLINE Derived &
ArrayBase<Derived>::operator+=(const ArrayBase<OtherDerived>& other) ArrayBase<Derived>::operator+=(const ArrayBase<OtherDerived>& other)
{ {
SelfCwiseBinaryOp<ei_scalar_sum_op<Scalar>, Derived> tmp(derived()); SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, Derived, OtherDerived> tmp(derived());
tmp = other.derived(); tmp = other.derived();
return derived(); return derived();
} }
@@ -205,7 +217,7 @@ template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived & EIGEN_STRONG_INLINE Derived &
ArrayBase<Derived>::operator*=(const ArrayBase<OtherDerived>& other) ArrayBase<Derived>::operator*=(const ArrayBase<OtherDerived>& other)
{ {
SelfCwiseBinaryOp<ei_scalar_product_op<Scalar>, Derived> tmp(derived()); SelfCwiseBinaryOp<internal::scalar_product_op<Scalar>, Derived, OtherDerived> tmp(derived());
tmp = other.derived(); tmp = other.derived();
return derived(); return derived();
} }
@@ -219,7 +231,7 @@ template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived & EIGEN_STRONG_INLINE Derived &
ArrayBase<Derived>::operator/=(const ArrayBase<OtherDerived>& other) ArrayBase<Derived>::operator/=(const ArrayBase<OtherDerived>& other)
{ {
SelfCwiseBinaryOp<ei_scalar_quotient_op<Scalar>, Derived> tmp(derived()); SelfCwiseBinaryOp<internal::scalar_quotient_op<Scalar>, Derived, OtherDerived> tmp(derived());
tmp = other.derived(); tmp = other.derived();
return derived(); return derived();
} }

View File

@@ -26,6 +26,7 @@
#define EIGEN_ARRAYWRAPPER_H #define EIGEN_ARRAYWRAPPER_H
/** \class ArrayWrapper /** \class ArrayWrapper
* \ingroup Core_Module
* *
* \brief Expression of a mathematical vector or matrix as an array object * \brief Expression of a mathematical vector or matrix as an array object
* *
@@ -34,12 +35,15 @@
* *
* \sa MatrixBase::array(), class MatrixWrapper * \sa MatrixBase::array(), class MatrixWrapper
*/ */
namespace internal {
template<typename ExpressionType> template<typename ExpressionType>
struct ei_traits<ArrayWrapper<ExpressionType> > struct traits<ArrayWrapper<ExpressionType> >
: public ei_traits<typename ei_cleantype<typename ExpressionType::Nested>::type > : public traits<typename remove_all<typename ExpressionType::Nested>::type >
{ {
typedef ArrayXpr XprKind; typedef ArrayXpr XprKind;
}; };
}
template<typename ExpressionType> template<typename ExpressionType>
class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> > class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
@@ -49,7 +53,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
EIGEN_DENSE_PUBLIC_INTERFACE(ArrayWrapper) EIGEN_DENSE_PUBLIC_INTERFACE(ArrayWrapper)
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(ArrayWrapper) EIGEN_INHERIT_ASSIGNMENT_OPERATORS(ArrayWrapper)
typedef typename ei_nested<ExpressionType>::type NestedExpressionType; typedef typename internal::nested<ExpressionType>::type NestedExpressionType;
inline ArrayWrapper(const ExpressionType& matrix) : m_expression(matrix) {} inline ArrayWrapper(const ExpressionType& matrix) : m_expression(matrix) {}
@@ -68,6 +72,11 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
return m_expression.const_cast_derived().coeffRef(row, col); return m_expression.const_cast_derived().coeffRef(row, col);
} }
inline const Scalar& coeffRef(Index row, Index col) const
{
return m_expression.const_cast_derived().coeffRef(row, col);
}
inline const CoeffReturnType coeff(Index index) const inline const CoeffReturnType coeff(Index index) const
{ {
return m_expression.coeff(index); return m_expression.coeff(index);
@@ -78,6 +87,11 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
return m_expression.const_cast_derived().coeffRef(index); return m_expression.const_cast_derived().coeffRef(index);
} }
inline const Scalar& coeffRef(Index index) const
{
return m_expression.const_cast_derived().coeffRef(index);
}
template<int LoadMode> template<int LoadMode>
inline const PacketScalar packet(Index row, Index col) const inline const PacketScalar packet(Index row, Index col) const
{ {
@@ -110,6 +124,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
}; };
/** \class MatrixWrapper /** \class MatrixWrapper
* \ingroup Core_Module
* *
* \brief Expression of an array as a mathematical vector or matrix * \brief Expression of an array as a mathematical vector or matrix
* *
@@ -119,12 +134,14 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
* \sa MatrixBase::matrix(), class ArrayWrapper * \sa MatrixBase::matrix(), class ArrayWrapper
*/ */
namespace internal {
template<typename ExpressionType> template<typename ExpressionType>
struct ei_traits<MatrixWrapper<ExpressionType> > struct traits<MatrixWrapper<ExpressionType> >
: public ei_traits<typename ei_cleantype<typename ExpressionType::Nested>::type > : public traits<typename remove_all<typename ExpressionType::Nested>::type >
{ {
typedef MatrixXpr XprKind; typedef MatrixXpr XprKind;
}; };
}
template<typename ExpressionType> template<typename ExpressionType>
class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> > class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
@@ -134,7 +151,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
EIGEN_DENSE_PUBLIC_INTERFACE(MatrixWrapper) EIGEN_DENSE_PUBLIC_INTERFACE(MatrixWrapper)
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(MatrixWrapper) EIGEN_INHERIT_ASSIGNMENT_OPERATORS(MatrixWrapper)
typedef typename ei_nested<ExpressionType>::type NestedExpressionType; typedef typename internal::nested<ExpressionType>::type NestedExpressionType;
inline MatrixWrapper(const ExpressionType& matrix) : m_expression(matrix) {} inline MatrixWrapper(const ExpressionType& matrix) : m_expression(matrix) {}
@@ -153,6 +170,11 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
return m_expression.const_cast_derived().coeffRef(row, col); return m_expression.const_cast_derived().coeffRef(row, col);
} }
inline const Scalar& coeffRef(Index row, Index col) const
{
return m_expression.derived().coeffRef(row, col);
}
inline const CoeffReturnType coeff(Index index) const inline const CoeffReturnType coeff(Index index) const
{ {
return m_expression.coeff(index); return m_expression.coeff(index);
@@ -163,6 +185,11 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
return m_expression.const_cast_derived().coeffRef(index); return m_expression.const_cast_derived().coeffRef(index);
} }
inline const Scalar& coeffRef(Index index) const
{
return m_expression.const_cast_derived().coeffRef(index);
}
template<int LoadMode> template<int LoadMode>
inline const PacketScalar packet(Index row, Index col) const inline const PacketScalar packet(Index row, Index col) const
{ {

View File

@@ -27,19 +27,21 @@
#ifndef EIGEN_ASSIGN_H #ifndef EIGEN_ASSIGN_H
#define EIGEN_ASSIGN_H #define EIGEN_ASSIGN_H
namespace internal {
/*************************************************************************** /***************************************************************************
* Part 1 : the logic deciding a strategy for traversal and unrolling * * Part 1 : the logic deciding a strategy for traversal and unrolling *
***************************************************************************/ ***************************************************************************/
template <typename Derived, typename OtherDerived> template <typename Derived, typename OtherDerived>
struct ei_assign_traits struct assign_traits
{ {
public: public:
enum { enum {
DstIsAligned = Derived::Flags & AlignedBit, DstIsAligned = Derived::Flags & AlignedBit,
DstHasDirectAccess = Derived::Flags & DirectAccessBit, DstHasDirectAccess = Derived::Flags & DirectAccessBit,
SrcIsAligned = OtherDerived::Flags & AlignedBit, SrcIsAligned = OtherDerived::Flags & AlignedBit,
JointAlignment = DstIsAligned && SrcIsAligned ? Aligned : Unaligned JointAlignment = bool(DstIsAligned) && bool(SrcIsAligned) ? Aligned : Unaligned
}; };
private: private:
@@ -51,7 +53,7 @@ private:
: int(Derived::Flags)&RowMajorBit ? int(Derived::MaxColsAtCompileTime) : int(Derived::Flags)&RowMajorBit ? int(Derived::MaxColsAtCompileTime)
: int(Derived::MaxRowsAtCompileTime), : int(Derived::MaxRowsAtCompileTime),
MaxSizeAtCompileTime = Derived::SizeAtCompileTime, MaxSizeAtCompileTime = Derived::SizeAtCompileTime,
PacketSize = ei_packet_traits<typename Derived::Scalar>::size PacketSize = packet_traits<typename Derived::Scalar>::size
}; };
enum { enum {
@@ -104,9 +106,9 @@ public:
: int(NoUnrolling) : int(NoUnrolling)
) )
: int(Traversal) == int(LinearVectorizedTraversal) : int(Traversal) == int(LinearVectorizedTraversal)
? ( int(MayUnrollCompletely) && int(DstIsAligned) ? int(CompleteUnrolling) : int(NoUnrolling) ) ? ( bool(MayUnrollCompletely) && bool(DstIsAligned) ? int(CompleteUnrolling) : int(NoUnrolling) )
: int(Traversal) == int(LinearTraversal) : int(Traversal) == int(LinearTraversal)
? ( int(MayUnrollCompletely) ? int(CompleteUnrolling) : int(NoUnrolling) ) ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) : int(NoUnrolling) )
: int(NoUnrolling) : int(NoUnrolling)
}; };
@@ -143,7 +145,7 @@ public:
************************/ ************************/
template<typename Derived1, typename Derived2, int Index, int Stop> template<typename Derived1, typename Derived2, int Index, int Stop>
struct ei_assign_DefaultTraversal_CompleteUnrolling struct assign_DefaultTraversal_CompleteUnrolling
{ {
enum { enum {
outer = Index / Derived1::InnerSizeAtCompileTime, outer = Index / Derived1::InnerSizeAtCompileTime,
@@ -153,28 +155,28 @@ struct ei_assign_DefaultTraversal_CompleteUnrolling
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src) EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{ {
dst.copyCoeffByOuterInner(outer, inner, src); dst.copyCoeffByOuterInner(outer, inner, src);
ei_assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src); assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src);
} }
}; };
template<typename Derived1, typename Derived2, int Stop> template<typename Derived1, typename Derived2, int Stop>
struct ei_assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop> struct assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
{ {
EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &) {} EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &) {}
}; };
template<typename Derived1, typename Derived2, int Index, int Stop> template<typename Derived1, typename Derived2, int Index, int Stop>
struct ei_assign_DefaultTraversal_InnerUnrolling struct assign_DefaultTraversal_InnerUnrolling
{ {
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src, int outer) EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src, int outer)
{ {
dst.copyCoeffByOuterInner(outer, Index, src); dst.copyCoeffByOuterInner(outer, Index, src);
ei_assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src, outer); assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src, outer);
} }
}; };
template<typename Derived1, typename Derived2, int Stop> template<typename Derived1, typename Derived2, int Stop>
struct ei_assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Stop, Stop> struct assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Stop, Stop>
{ {
EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &, int) {} EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &, int) {}
}; };
@@ -184,17 +186,17 @@ struct ei_assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Stop, Stop>
***********************/ ***********************/
template<typename Derived1, typename Derived2, int Index, int Stop> template<typename Derived1, typename Derived2, int Index, int Stop>
struct ei_assign_LinearTraversal_CompleteUnrolling struct assign_LinearTraversal_CompleteUnrolling
{ {
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src) EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{ {
dst.copyCoeff(Index, src); dst.copyCoeff(Index, src);
ei_assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src); assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src);
} }
}; };
template<typename Derived1, typename Derived2, int Stop> template<typename Derived1, typename Derived2, int Stop>
struct ei_assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop> struct assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
{ {
EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &) {} EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &) {}
}; };
@@ -204,41 +206,41 @@ struct ei_assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Sto
**************************/ **************************/
template<typename Derived1, typename Derived2, int Index, int Stop> template<typename Derived1, typename Derived2, int Index, int Stop>
struct ei_assign_innervec_CompleteUnrolling struct assign_innervec_CompleteUnrolling
{ {
enum { enum {
outer = Index / Derived1::InnerSizeAtCompileTime, outer = Index / Derived1::InnerSizeAtCompileTime,
inner = Index % Derived1::InnerSizeAtCompileTime, inner = Index % Derived1::InnerSizeAtCompileTime,
JointAlignment = ei_assign_traits<Derived1,Derived2>::JointAlignment JointAlignment = assign_traits<Derived1,Derived2>::JointAlignment
}; };
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src) EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{ {
dst.template copyPacketByOuterInner<Derived2, Aligned, JointAlignment>(outer, inner, src); dst.template copyPacketByOuterInner<Derived2, Aligned, JointAlignment>(outer, inner, src);
ei_assign_innervec_CompleteUnrolling<Derived1, Derived2, assign_innervec_CompleteUnrolling<Derived1, Derived2,
Index+ei_packet_traits<typename Derived1::Scalar>::size, Stop>::run(dst, src); Index+packet_traits<typename Derived1::Scalar>::size, Stop>::run(dst, src);
} }
}; };
template<typename Derived1, typename Derived2, int Stop> template<typename Derived1, typename Derived2, int Stop>
struct ei_assign_innervec_CompleteUnrolling<Derived1, Derived2, Stop, Stop> struct assign_innervec_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
{ {
EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &) {} EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &) {}
}; };
template<typename Derived1, typename Derived2, int Index, int Stop> template<typename Derived1, typename Derived2, int Index, int Stop>
struct ei_assign_innervec_InnerUnrolling struct assign_innervec_InnerUnrolling
{ {
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src, int outer) EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src, int outer)
{ {
dst.template copyPacketByOuterInner<Derived2, Aligned, Aligned>(outer, Index, src); dst.template copyPacketByOuterInner<Derived2, Aligned, Aligned>(outer, Index, src);
ei_assign_innervec_InnerUnrolling<Derived1, Derived2, assign_innervec_InnerUnrolling<Derived1, Derived2,
Index+ei_packet_traits<typename Derived1::Scalar>::size, Stop>::run(dst, src, outer); Index+packet_traits<typename Derived1::Scalar>::size, Stop>::run(dst, src, outer);
} }
}; };
template<typename Derived1, typename Derived2, int Stop> template<typename Derived1, typename Derived2, int Stop>
struct ei_assign_innervec_InnerUnrolling<Derived1, Derived2, Stop, Stop> struct assign_innervec_InnerUnrolling<Derived1, Derived2, Stop, Stop>
{ {
EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &, int) {} EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &, int) {}
}; };
@@ -248,16 +250,22 @@ struct ei_assign_innervec_InnerUnrolling<Derived1, Derived2, Stop, Stop>
***************************************************************************/ ***************************************************************************/
template<typename Derived1, typename Derived2, template<typename Derived1, typename Derived2,
int Traversal = ei_assign_traits<Derived1, Derived2>::Traversal, int Traversal = assign_traits<Derived1, Derived2>::Traversal,
int Unrolling = ei_assign_traits<Derived1, Derived2>::Unrolling> int Unrolling = assign_traits<Derived1, Derived2>::Unrolling>
struct ei_assign_impl; struct assign_impl;
/************************ /************************
*** Default traversal *** *** Default traversal ***
************************/ ************************/
template<typename Derived1, typename Derived2, int Unrolling>
struct assign_impl<Derived1, Derived2, InvalidTraversal, Unrolling>
{
inline static void run(Derived1 &, const Derived2 &) { }
};
template<typename Derived1, typename Derived2> template<typename Derived1, typename Derived2>
struct ei_assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling> struct assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling>
{ {
typedef typename Derived1::Index Index; typedef typename Derived1::Index Index;
inline static void run(Derived1 &dst, const Derived2 &src) inline static void run(Derived1 &dst, const Derived2 &src)
@@ -271,24 +279,24 @@ struct ei_assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling>
}; };
template<typename Derived1, typename Derived2> template<typename Derived1, typename Derived2>
struct ei_assign_impl<Derived1, Derived2, DefaultTraversal, CompleteUnrolling> struct assign_impl<Derived1, Derived2, DefaultTraversal, CompleteUnrolling>
{ {
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src) EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{ {
ei_assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime> assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
::run(dst, src); ::run(dst, src);
} }
}; };
template<typename Derived1, typename Derived2> template<typename Derived1, typename Derived2>
struct ei_assign_impl<Derived1, Derived2, DefaultTraversal, InnerUnrolling> struct assign_impl<Derived1, Derived2, DefaultTraversal, InnerUnrolling>
{ {
typedef typename Derived1::Index Index; typedef typename Derived1::Index Index;
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src) EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{ {
const Index outerSize = dst.outerSize(); const Index outerSize = dst.outerSize();
for(Index outer = 0; outer < outerSize; ++outer) for(Index outer = 0; outer < outerSize; ++outer)
ei_assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, 0, Derived1::InnerSizeAtCompileTime> assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, 0, Derived1::InnerSizeAtCompileTime>
::run(dst, src, outer); ::run(dst, src, outer);
} }
}; };
@@ -298,7 +306,7 @@ struct ei_assign_impl<Derived1, Derived2, DefaultTraversal, InnerUnrolling>
***********************/ ***********************/
template<typename Derived1, typename Derived2> template<typename Derived1, typename Derived2>
struct ei_assign_impl<Derived1, Derived2, LinearTraversal, NoUnrolling> struct assign_impl<Derived1, Derived2, LinearTraversal, NoUnrolling>
{ {
typedef typename Derived1::Index Index; typedef typename Derived1::Index Index;
inline static void run(Derived1 &dst, const Derived2 &src) inline static void run(Derived1 &dst, const Derived2 &src)
@@ -310,11 +318,11 @@ struct ei_assign_impl<Derived1, Derived2, LinearTraversal, NoUnrolling>
}; };
template<typename Derived1, typename Derived2> template<typename Derived1, typename Derived2>
struct ei_assign_impl<Derived1, Derived2, LinearTraversal, CompleteUnrolling> struct assign_impl<Derived1, Derived2, LinearTraversal, CompleteUnrolling>
{ {
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src) EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{ {
ei_assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime> assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
::run(dst, src); ::run(dst, src);
} }
}; };
@@ -324,14 +332,14 @@ struct ei_assign_impl<Derived1, Derived2, LinearTraversal, CompleteUnrolling>
**************************/ **************************/
template<typename Derived1, typename Derived2> template<typename Derived1, typename Derived2>
struct ei_assign_impl<Derived1, Derived2, InnerVectorizedTraversal, NoUnrolling> struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, NoUnrolling>
{ {
typedef typename Derived1::Index Index; typedef typename Derived1::Index Index;
inline static void run(Derived1 &dst, const Derived2 &src) inline static void run(Derived1 &dst, const Derived2 &src)
{ {
const Index innerSize = dst.innerSize(); const Index innerSize = dst.innerSize();
const Index outerSize = dst.outerSize(); const Index outerSize = dst.outerSize();
const Index packetSize = ei_packet_traits<typename Derived1::Scalar>::size; const Index packetSize = packet_traits<typename Derived1::Scalar>::size;
for(Index outer = 0; outer < outerSize; ++outer) for(Index outer = 0; outer < outerSize; ++outer)
for(Index inner = 0; inner < innerSize; inner+=packetSize) for(Index inner = 0; inner < innerSize; inner+=packetSize)
dst.template copyPacketByOuterInner<Derived2, Aligned, Aligned>(outer, inner, src); dst.template copyPacketByOuterInner<Derived2, Aligned, Aligned>(outer, inner, src);
@@ -339,24 +347,24 @@ struct ei_assign_impl<Derived1, Derived2, InnerVectorizedTraversal, NoUnrolling>
}; };
template<typename Derived1, typename Derived2> template<typename Derived1, typename Derived2>
struct ei_assign_impl<Derived1, Derived2, InnerVectorizedTraversal, CompleteUnrolling> struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, CompleteUnrolling>
{ {
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src) EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{ {
ei_assign_innervec_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime> assign_innervec_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
::run(dst, src); ::run(dst, src);
} }
}; };
template<typename Derived1, typename Derived2> template<typename Derived1, typename Derived2>
struct ei_assign_impl<Derived1, Derived2, InnerVectorizedTraversal, InnerUnrolling> struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, InnerUnrolling>
{ {
typedef typename Derived1::Index Index; typedef typename Derived1::Index Index;
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src) EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{ {
const Index outerSize = dst.outerSize(); const Index outerSize = dst.outerSize();
for(Index outer = 0; outer < outerSize; ++outer) for(Index outer = 0; outer < outerSize; ++outer)
ei_assign_innervec_InnerUnrolling<Derived1, Derived2, 0, Derived1::InnerSizeAtCompileTime> assign_innervec_InnerUnrolling<Derived1, Derived2, 0, Derived1::InnerSizeAtCompileTime>
::run(dst, src, outer); ::run(dst, src, outer);
} }
}; };
@@ -366,14 +374,14 @@ struct ei_assign_impl<Derived1, Derived2, InnerVectorizedTraversal, InnerUnrolli
***************************/ ***************************/
template <bool IsAligned = false> template <bool IsAligned = false>
struct ei_unaligned_assign_impl struct unaligned_assign_impl
{ {
template <typename Derived, typename OtherDerived> template <typename Derived, typename OtherDerived>
static EIGEN_STRONG_INLINE void run(const Derived&, OtherDerived&, typename Derived::Index, typename Derived::Index) {} static EIGEN_STRONG_INLINE void run(const Derived&, OtherDerived&, typename Derived::Index, typename Derived::Index) {}
}; };
template <> template <>
struct ei_unaligned_assign_impl<false> struct unaligned_assign_impl<false>
{ {
// MSVC must not inline this functions. If it does, it fails to optimize the // MSVC must not inline this functions. If it does, it fails to optimize the
// packet access path. // packet access path.
@@ -391,40 +399,45 @@ struct ei_unaligned_assign_impl<false>
}; };
template<typename Derived1, typename Derived2> template<typename Derived1, typename Derived2>
struct ei_assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling> struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling>
{ {
typedef typename Derived1::Index Index; typedef typename Derived1::Index Index;
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src) EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{ {
const Index size = dst.size(); const Index size = dst.size();
const Index packetSize = ei_packet_traits<typename Derived1::Scalar>::size; typedef packet_traits<typename Derived1::Scalar> PacketTraits;
const Index alignedStart = ei_assign_traits<Derived1,Derived2>::DstIsAligned ? 0 enum {
: ei_first_aligned(&dst.coeffRef(0), size); packetSize = PacketTraits::size,
dstAlignment = PacketTraits::AlignedOnScalar ? Aligned : int(assign_traits<Derived1,Derived2>::DstIsAligned) ,
srcAlignment = assign_traits<Derived1,Derived2>::JointAlignment
};
const Index alignedStart = assign_traits<Derived1,Derived2>::DstIsAligned ? 0
: first_aligned(&dst.coeffRef(0), size);
const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize; const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
ei_unaligned_assign_impl<ei_assign_traits<Derived1,Derived2>::DstIsAligned!=0>::run(src,dst,0,alignedStart); unaligned_assign_impl<assign_traits<Derived1,Derived2>::DstIsAligned!=0>::run(src,dst,0,alignedStart);
for(Index index = alignedStart; index < alignedEnd; index += packetSize) for(Index index = alignedStart; index < alignedEnd; index += packetSize)
{ {
dst.template copyPacket<Derived2, Aligned, ei_assign_traits<Derived1,Derived2>::JointAlignment>(index, src); dst.template copyPacket<Derived2, dstAlignment, srcAlignment>(index, src);
} }
ei_unaligned_assign_impl<>::run(src,dst,alignedEnd,size); unaligned_assign_impl<>::run(src,dst,alignedEnd,size);
} }
}; };
template<typename Derived1, typename Derived2> template<typename Derived1, typename Derived2>
struct ei_assign_impl<Derived1, Derived2, LinearVectorizedTraversal, CompleteUnrolling> struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, CompleteUnrolling>
{ {
typedef typename Derived1::Index Index; typedef typename Derived1::Index Index;
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src) EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{ {
enum { size = Derived1::SizeAtCompileTime, enum { size = Derived1::SizeAtCompileTime,
packetSize = ei_packet_traits<typename Derived1::Scalar>::size, packetSize = packet_traits<typename Derived1::Scalar>::size,
alignedSize = (size/packetSize)*packetSize }; alignedSize = (size/packetSize)*packetSize };
ei_assign_innervec_CompleteUnrolling<Derived1, Derived2, 0, alignedSize>::run(dst, src); assign_innervec_CompleteUnrolling<Derived1, Derived2, 0, alignedSize>::run(dst, src);
ei_assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, alignedSize, size>::run(dst, src); assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, alignedSize, size>::run(dst, src);
} }
}; };
@@ -433,18 +446,24 @@ struct ei_assign_impl<Derived1, Derived2, LinearVectorizedTraversal, CompleteUnr
***************************/ ***************************/
template<typename Derived1, typename Derived2> template<typename Derived1, typename Derived2>
struct ei_assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling> struct assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling>
{ {
typedef typename Derived1::Index Index; typedef typename Derived1::Index Index;
inline static void run(Derived1 &dst, const Derived2 &src) inline static void run(Derived1 &dst, const Derived2 &src)
{ {
const Index packetSize = ei_packet_traits<typename Derived1::Scalar>::size; typedef packet_traits<typename Derived1::Scalar> PacketTraits;
enum {
packetSize = PacketTraits::size,
alignable = PacketTraits::AlignedOnScalar,
dstAlignment = alignable ? Aligned : int(assign_traits<Derived1,Derived2>::DstIsAligned) ,
srcAlignment = assign_traits<Derived1,Derived2>::JointAlignment
};
const Index packetAlignedMask = packetSize - 1; const Index packetAlignedMask = packetSize - 1;
const Index innerSize = dst.innerSize(); const Index innerSize = dst.innerSize();
const Index outerSize = dst.outerSize(); const Index outerSize = dst.outerSize();
const Index alignedStep = (packetSize - dst.outerStride() % packetSize) & packetAlignedMask; const Index alignedStep = alignable ? (packetSize - dst.outerStride() % packetSize) & packetAlignedMask : 0;
Index alignedStart = ei_assign_traits<Derived1,Derived2>::DstIsAligned ? 0 Index alignedStart = ((!alignable) || assign_traits<Derived1,Derived2>::DstIsAligned) ? 0
: ei_first_aligned(&dst.coeffRef(0,0), innerSize); : first_aligned(&dst.coeffRef(0,0), innerSize);
for(Index outer = 0; outer < outerSize; ++outer) for(Index outer = 0; outer < outerSize; ++outer)
{ {
@@ -455,7 +474,7 @@ struct ei_assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling>
// do the vectorizable part of the assignment // do the vectorizable part of the assignment
for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize) for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize)
dst.template copyPacketByOuterInner<Derived2, Aligned, Unaligned>(outer, inner, src); dst.template copyPacketByOuterInner<Derived2, dstAlignment, Unaligned>(outer, inner, src);
// do the non-vectorizable part of the assignment // do the non-vectorizable part of the assignment
for(Index inner = alignedEnd; inner<innerSize ; ++inner) for(Index inner = alignedEnd; inner<innerSize ; ++inner)
@@ -466,6 +485,8 @@ struct ei_assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling>
} }
}; };
} // end namespace internal
/*************************************************************************** /***************************************************************************
* Part 4 : implementation of DenseBase methods * Part 4 : implementation of DenseBase methods
***************************************************************************/ ***************************************************************************/
@@ -475,20 +496,28 @@ template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived& DenseBase<Derived> EIGEN_STRONG_INLINE Derived& DenseBase<Derived>
::lazyAssign(const DenseBase<OtherDerived>& other) ::lazyAssign(const DenseBase<OtherDerived>& other)
{ {
enum{
SameType = internal::is_same<typename Derived::Scalar,typename OtherDerived::Scalar>::value
};
EIGEN_STATIC_ASSERT_LVALUE(Derived)
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived) EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived)
EIGEN_STATIC_ASSERT((ei_is_same_type<typename Derived::Scalar, typename OtherDerived::Scalar>::ret), EIGEN_STATIC_ASSERT(SameType,YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
#ifdef EIGEN_DEBUG_ASSIGN #ifdef EIGEN_DEBUG_ASSIGN
ei_assign_traits<Derived, OtherDerived>::debug(); internal::assign_traits<Derived, OtherDerived>::debug();
#endif #endif
ei_assert(rows() == other.rows() && cols() == other.cols()); eigen_assert(rows() == other.rows() && cols() == other.cols());
ei_assign_impl<Derived, OtherDerived>::run(derived(),other.derived()); internal::assign_impl<Derived, OtherDerived, int(SameType) ? int(internal::assign_traits<Derived, OtherDerived>::Traversal)
: int(InvalidTraversal)>::run(derived(),other.derived());
#ifndef EIGEN_NO_DEBUG #ifndef EIGEN_NO_DEBUG
checkTransposeAliasing(other.derived()); checkTransposeAliasing(other.derived());
#endif #endif
return derived(); return derived();
} }
namespace internal {
template<typename Derived, typename OtherDerived, template<typename Derived, typename OtherDerived,
bool EvalBeforeAssigning = (int(OtherDerived::Flags) & EvalBeforeAssigningBit) != 0, bool EvalBeforeAssigning = (int(OtherDerived::Flags) & EvalBeforeAssigningBit) != 0,
bool NeedToTranspose = Derived::IsVectorAtCompileTime bool NeedToTranspose = Derived::IsVectorAtCompileTime
@@ -498,49 +527,51 @@ template<typename Derived, typename OtherDerived,
// revert to || as soon as not needed anymore. // revert to || as soon as not needed anymore.
(int(Derived::ColsAtCompileTime) == 1 && int(OtherDerived::RowsAtCompileTime) == 1)) (int(Derived::ColsAtCompileTime) == 1 && int(OtherDerived::RowsAtCompileTime) == 1))
&& int(Derived::SizeAtCompileTime) != 1> && int(Derived::SizeAtCompileTime) != 1>
struct ei_assign_selector; struct assign_selector;
template<typename Derived, typename OtherDerived> template<typename Derived, typename OtherDerived>
struct ei_assign_selector<Derived,OtherDerived,false,false> { struct assign_selector<Derived,OtherDerived,false,false> {
EIGEN_STRONG_INLINE static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.derived()); } EIGEN_STRONG_INLINE static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.derived()); }
}; };
template<typename Derived, typename OtherDerived> template<typename Derived, typename OtherDerived>
struct ei_assign_selector<Derived,OtherDerived,true,false> { struct assign_selector<Derived,OtherDerived,true,false> {
EIGEN_STRONG_INLINE static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.eval()); } EIGEN_STRONG_INLINE static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.eval()); }
}; };
template<typename Derived, typename OtherDerived> template<typename Derived, typename OtherDerived>
struct ei_assign_selector<Derived,OtherDerived,false,true> { struct assign_selector<Derived,OtherDerived,false,true> {
EIGEN_STRONG_INLINE static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose()); } EIGEN_STRONG_INLINE static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose()); }
}; };
template<typename Derived, typename OtherDerived> template<typename Derived, typename OtherDerived>
struct ei_assign_selector<Derived,OtherDerived,true,true> { struct assign_selector<Derived,OtherDerived,true,true> {
EIGEN_STRONG_INLINE static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose().eval()); } EIGEN_STRONG_INLINE static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose().eval()); }
}; };
} // end namespace internal
template<typename Derived> template<typename Derived>
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator=(const DenseBase<OtherDerived>& other) EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator=(const DenseBase<OtherDerived>& other)
{ {
return ei_assign_selector<Derived,OtherDerived>::run(derived(), other.derived()); return internal::assign_selector<Derived,OtherDerived>::run(derived(), other.derived());
} }
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator=(const DenseBase& other) EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator=(const DenseBase& other)
{ {
return ei_assign_selector<Derived,Derived>::run(derived(), other.derived()); return internal::assign_selector<Derived,Derived>::run(derived(), other.derived());
} }
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const MatrixBase& other) EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const MatrixBase& other)
{ {
return ei_assign_selector<Derived,Derived>::run(derived(), other.derived()); return internal::assign_selector<Derived,Derived>::run(derived(), other.derived());
} }
template<typename Derived> template<typename Derived>
template <typename OtherDerived> template <typename OtherDerived>
EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const DenseBase<OtherDerived>& other) EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const DenseBase<OtherDerived>& other)
{ {
return ei_assign_selector<Derived,OtherDerived>::run(derived(), other.derived()); return internal::assign_selector<Derived,OtherDerived>::run(derived(), other.derived());
} }
template<typename Derived> template<typename Derived>

View File

@@ -25,111 +25,82 @@
#ifndef EIGEN_BANDMATRIX_H #ifndef EIGEN_BANDMATRIX_H
#define EIGEN_BANDMATRIX_H #define EIGEN_BANDMATRIX_H
/** namespace internal {
* \class BandMatrix
*
* \brief Represents a rectangular matrix with a banded storage
*
* \param _Scalar Numeric type, i.e. float, double, int
* \param Rows Number of rows, or \b Dynamic
* \param Cols Number of columns, or \b Dynamic
* \param Supers Number of super diagonal
* \param Subs Number of sub diagonal
* \param _Options A combination of either \b RowMajor or \b ColMajor, and of \b SelfAdjoint
* The former controls storage order, and defaults to column-major. The latter controls
* whether the matrix represent a selfadjoint matrix in which case either Supers of Subs
* have to be null.
*
* \sa class TridiagonalMatrix
*/
template<typename _Scalar, int Rows, int Cols, int Supers, int Subs, int Options>
struct ei_traits<BandMatrix<_Scalar,Rows,Cols,Supers,Subs,Options> >
{
typedef _Scalar Scalar;
typedef Dense StorageKind;
typedef DenseIndex Index;
enum {
CoeffReadCost = NumTraits<Scalar>::ReadCost,
RowsAtCompileTime = Rows,
ColsAtCompileTime = Cols,
MaxRowsAtCompileTime = Rows,
MaxColsAtCompileTime = Cols,
Flags = 0
};
};
template<typename _Scalar, int Rows, int Cols, int Supers, int Subs, int Options>
class BandMatrix : public EigenBase<BandMatrix<_Scalar,Rows,Cols,Supers,Subs,Options> > template<typename Derived>
class BandMatrixBase : public EigenBase<Derived>
{ {
public: public:
enum { enum {
Flags = ei_traits<BandMatrix>::Flags, Flags = internal::traits<Derived>::Flags,
CoeffReadCost = ei_traits<BandMatrix>::CoeffReadCost, CoeffReadCost = internal::traits<Derived>::CoeffReadCost,
RowsAtCompileTime = ei_traits<BandMatrix>::RowsAtCompileTime, RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,
ColsAtCompileTime = ei_traits<BandMatrix>::ColsAtCompileTime, ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
MaxRowsAtCompileTime = ei_traits<BandMatrix>::MaxRowsAtCompileTime, MaxRowsAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime,
MaxColsAtCompileTime = ei_traits<BandMatrix>::MaxColsAtCompileTime MaxColsAtCompileTime = internal::traits<Derived>::MaxColsAtCompileTime,
Supers = internal::traits<Derived>::Supers,
Subs = internal::traits<Derived>::Subs,
Options = internal::traits<Derived>::Options
}; };
typedef typename ei_traits<BandMatrix>::Scalar Scalar; typedef typename internal::traits<Derived>::Scalar Scalar;
typedef Matrix<Scalar,RowsAtCompileTime,ColsAtCompileTime> DenseMatrixType; typedef Matrix<Scalar,RowsAtCompileTime,ColsAtCompileTime> DenseMatrixType;
typedef typename DenseMatrixType::Index Index; typedef typename DenseMatrixType::Index Index;
typedef typename internal::traits<Derived>::CoefficientsType CoefficientsType;
typedef EigenBase<Derived> Base;
protected: protected:
enum { enum {
DataRowsAtCompileTime = ((Supers!=Dynamic) && (Subs!=Dynamic)) DataRowsAtCompileTime = ((Supers!=Dynamic) && (Subs!=Dynamic))
? 1 + Supers + Subs ? 1 + Supers + Subs
: Dynamic, : Dynamic,
SizeAtCompileTime = EIGEN_SIZE_MIN_PREFER_DYNAMIC(Rows,Cols) SizeAtCompileTime = EIGEN_SIZE_MIN_PREFER_DYNAMIC(RowsAtCompileTime,ColsAtCompileTime)
}; };
typedef Matrix<Scalar,DataRowsAtCompileTime,ColsAtCompileTime,Options&RowMajor?RowMajor:ColMajor> DataType;
public: public:
inline BandMatrix(Index rows=Rows, Index cols=Cols, Index supers=Supers, Index subs=Subs) using Base::derived;
: m_data(1+supers+subs,cols), using Base::rows;
m_rows(rows), m_supers(supers), m_subs(subs) using Base::cols;
{
//m_data.setConstant(666);
}
/** \returns the number of columns */
inline Index rows() const { return m_rows.value(); }
/** \returns the number of rows */
inline Index cols() const { return m_data.cols(); }
/** \returns the number of super diagonals */ /** \returns the number of super diagonals */
inline Index supers() const { return m_supers.value(); } inline Index supers() const { return derived().supers(); }
/** \returns the number of sub diagonals */ /** \returns the number of sub diagonals */
inline Index subs() const { return m_subs.value(); } inline Index subs() const { return derived().subs(); }
/** \returns an expression of the underlying coefficient matrix */
inline const CoefficientsType& coeffs() const { return derived().coeffs(); }
/** \returns an expression of the underlying coefficient matrix */
inline CoefficientsType& coeffs() { return derived().coeffs(); }
/** \returns a vector expression of the \a i -th column, /** \returns a vector expression of the \a i -th column,
* only the meaningful part is returned. * only the meaningful part is returned.
* \warning the internal storage must be column major. */ * \warning the internal storage must be column major. */
inline Block<DataType,Dynamic,1> col(Index i) inline Block<CoefficientsType,Dynamic,1> col(Index i)
{ {
EIGEN_STATIC_ASSERT((Options&RowMajor)==0,THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES); EIGEN_STATIC_ASSERT((Options&RowMajor)==0,THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES);
Index start = 0; Index start = 0;
Index len = m_data.rows(); Index len = coeffs().rows();
if (i<=supers()) if (i<=supers())
{ {
start = supers()-i; start = supers()-i;
len = std::min(rows(),std::max<Index>(0,m_data.rows() - (supers()-i))); len = std::min(rows(),std::max<Index>(0,coeffs().rows() - (supers()-i)));
} }
else if (i>=rows()-subs()) else if (i>=rows()-subs())
len = std::max<Index>(0,m_data.rows() - (i + 1 - rows() + subs())); len = std::max<Index>(0,coeffs().rows() - (i + 1 - rows() + subs()));
return Block<DataType,Dynamic,1>(m_data, start, i, len, 1); return Block<CoefficientsType,Dynamic,1>(coeffs(), start, i, len, 1);
} }
/** \returns a vector expression of the main diagonal */ /** \returns a vector expression of the main diagonal */
inline Block<DataType,1,SizeAtCompileTime> diagonal() inline Block<CoefficientsType,1,SizeAtCompileTime> diagonal()
{ return Block<DataType,1,SizeAtCompileTime>(m_data,supers(),0,1,std::min(rows(),cols())); } { return Block<CoefficientsType,1,SizeAtCompileTime>(coeffs(),supers(),0,1,std::min(rows(),cols())); }
/** \returns a vector expression of the main diagonal (const version) */ /** \returns a vector expression of the main diagonal (const version) */
inline const Block<DataType,1,SizeAtCompileTime> diagonal() const inline const Block<const CoefficientsType,1,SizeAtCompileTime> diagonal() const
{ return Block<DataType,1,SizeAtCompileTime>(m_data,supers(),0,1,std::min(rows(),cols())); } { return Block<const CoefficientsType,1,SizeAtCompileTime>(coeffs(),supers(),0,1,std::min(rows(),cols())); }
template<int Index> struct DiagonalIntReturnType { template<int Index> struct DiagonalIntReturnType {
enum { enum {
@@ -142,38 +113,38 @@ class BandMatrix : public EigenBase<BandMatrix<_Scalar,Rows,Cols,Supers,Subs,Opt
? EIGEN_SIZE_MIN_PREFER_DYNAMIC(ColsAtCompileTime, RowsAtCompileTime + ActualIndex) ? EIGEN_SIZE_MIN_PREFER_DYNAMIC(ColsAtCompileTime, RowsAtCompileTime + ActualIndex)
: EIGEN_SIZE_MIN_PREFER_DYNAMIC(RowsAtCompileTime, ColsAtCompileTime - ActualIndex)) : EIGEN_SIZE_MIN_PREFER_DYNAMIC(RowsAtCompileTime, ColsAtCompileTime - ActualIndex))
}; };
typedef Block<DataType,1, DiagonalSize> BuildType; typedef Block<CoefficientsType,1, DiagonalSize> BuildType;
typedef typename ei_meta_if<Conjugate, typedef typename internal::conditional<Conjugate,
CwiseUnaryOp<ei_scalar_conjugate_op<Scalar>,BuildType >, CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>,BuildType >,
BuildType>::ret Type; BuildType>::type Type;
}; };
/** \returns a vector expression of the \a N -th sub or super diagonal */ /** \returns a vector expression of the \a N -th sub or super diagonal */
template<int N> inline typename DiagonalIntReturnType<N>::Type diagonal() template<int N> inline typename DiagonalIntReturnType<N>::Type diagonal()
{ {
return typename DiagonalIntReturnType<N>::BuildType(m_data, supers()-N, std::max(0,N), 1, diagonalLength(N)); return typename DiagonalIntReturnType<N>::BuildType(coeffs(), supers()-N, std::max(0,N), 1, diagonalLength(N));
} }
/** \returns a vector expression of the \a N -th sub or super diagonal */ /** \returns a vector expression of the \a N -th sub or super diagonal */
template<int N> inline const typename DiagonalIntReturnType<N>::Type diagonal() const template<int N> inline const typename DiagonalIntReturnType<N>::Type diagonal() const
{ {
return typename DiagonalIntReturnType<N>::BuildType(m_data, supers()-N, std::max(0,N), 1, diagonalLength(N)); return typename DiagonalIntReturnType<N>::BuildType(coeffs(), supers()-N, std::max(0,N), 1, diagonalLength(N));
} }
/** \returns a vector expression of the \a i -th sub or super diagonal */ /** \returns a vector expression of the \a i -th sub or super diagonal */
inline Block<DataType,1,Dynamic> diagonal(Index i) inline Block<CoefficientsType,1,Dynamic> diagonal(Index i)
{ {
ei_assert((i<0 && -i<=subs()) || (i>=0 && i<=supers())); eigen_assert((i<0 && -i<=subs()) || (i>=0 && i<=supers()));
return Block<DataType,1,Dynamic>(m_data, supers()-i, std::max<Index>(0,i), 1, diagonalLength(i)); return Block<CoefficientsType,1,Dynamic>(coeffs(), supers()-i, std::max<Index>(0,i), 1, diagonalLength(i));
} }
/** \returns a vector expression of the \a i -th sub or super diagonal */ /** \returns a vector expression of the \a i -th sub or super diagonal */
inline const Block<DataType,1,Dynamic> diagonal(Index i) const inline const Block<const CoefficientsType,1,Dynamic> diagonal(Index i) const
{ {
ei_assert((i<0 && -i<=subs()) || (i>=0 && i<=supers())); eigen_assert((i<0 && -i<=subs()) || (i>=0 && i<=supers()));
return Block<DataType,1,Dynamic>(m_data, supers()-i, std::max<Index>(0,i), 1, diagonalLength(i)); return Block<const CoefficientsType,1,Dynamic>(coeffs(), supers()-i, std::max<Index>(0,i), 1, diagonalLength(i));
} }
template<typename Dest> inline void evalTo(Dest& dst) const template<typename Dest> inline void evalTo(Dest& dst) const
{ {
dst.resize(rows(),cols()); dst.resize(rows(),cols());
@@ -196,17 +167,153 @@ class BandMatrix : public EigenBase<BandMatrix<_Scalar,Rows,Cols,Supers,Subs,Opt
inline Index diagonalLength(Index i) const inline Index diagonalLength(Index i) const
{ return i<0 ? std::min(cols(),rows()+i) : std::min(rows(),cols()-i); } { return i<0 ? std::min(cols(),rows()+i) : std::min(rows(),cols()-i); }
};
DataType m_data; /**
ei_variable_if_dynamic<Index, Rows> m_rows; * \class BandMatrix
ei_variable_if_dynamic<Index, Supers> m_supers; * \ingroup Core_Module
ei_variable_if_dynamic<Index, Subs> m_subs; *
* \brief Represents a rectangular matrix with a banded storage
*
* \param _Scalar Numeric type, i.e. float, double, int
* \param Rows Number of rows, or \b Dynamic
* \param Cols Number of columns, or \b Dynamic
* \param Supers Number of super diagonal
* \param Subs Number of sub diagonal
* \param _Options A combination of either \b RowMajor or \b ColMajor, and of \b SelfAdjoint
* The former controls \ref TopicStorageOrders "storage order", and defaults to
* column-major. The latter controls whether the matrix represents a selfadjoint
* matrix in which case either Supers of Subs have to be null.
*
* \sa class TridiagonalMatrix
*/
template<typename _Scalar, int _Rows, int _Cols, int _Supers, int _Subs, int _Options>
struct traits<BandMatrix<_Scalar,_Rows,_Cols,_Supers,_Subs,_Options> >
{
typedef _Scalar Scalar;
typedef Dense StorageKind;
typedef DenseIndex Index;
enum {
CoeffReadCost = NumTraits<Scalar>::ReadCost,
RowsAtCompileTime = _Rows,
ColsAtCompileTime = _Cols,
MaxRowsAtCompileTime = _Rows,
MaxColsAtCompileTime = _Cols,
Flags = LvalueBit,
Supers = _Supers,
Subs = _Subs,
Options = _Options,
DataRowsAtCompileTime = ((Supers!=Dynamic) && (Subs!=Dynamic)) ? 1 + Supers + Subs : Dynamic
};
typedef Matrix<Scalar,DataRowsAtCompileTime,ColsAtCompileTime,Options&RowMajor?RowMajor:ColMajor> CoefficientsType;
};
template<typename _Scalar, int Rows, int Cols, int Supers, int Subs, int Options>
class BandMatrix : public BandMatrixBase<BandMatrix<_Scalar,Rows,Cols,Supers,Subs,Options> >
{
public:
typedef typename internal::traits<BandMatrix>::Scalar Scalar;
typedef typename internal::traits<BandMatrix>::Index Index;
typedef typename internal::traits<BandMatrix>::CoefficientsType CoefficientsType;
inline BandMatrix(Index rows=Rows, Index cols=Cols, Index supers=Supers, Index subs=Subs)
: m_coeffs(1+supers+subs,cols),
m_rows(rows), m_supers(supers), m_subs(subs)
{
}
/** \returns the number of columns */
inline Index rows() const { return m_rows.value(); }
/** \returns the number of rows */
inline Index cols() const { return m_coeffs.cols(); }
/** \returns the number of super diagonals */
inline Index supers() const { return m_supers.value(); }
/** \returns the number of sub diagonals */
inline Index subs() const { return m_subs.value(); }
inline const CoefficientsType& coeffs() const { return m_coeffs; }
inline CoefficientsType& coeffs() { return m_coeffs; }
protected:
CoefficientsType m_coeffs;
internal::variable_if_dynamic<Index, Rows> m_rows;
internal::variable_if_dynamic<Index, Supers> m_supers;
internal::variable_if_dynamic<Index, Subs> m_subs;
};
template<typename _CoefficientsType,int _Rows, int _Cols, int _Supers, int _Subs,int _Options>
class BandMatrixWrapper;
template<typename _CoefficientsType,int _Rows, int _Cols, int _Supers, int _Subs,int _Options>
struct traits<BandMatrixWrapper<_CoefficientsType,_Rows,_Cols,_Supers,_Subs,_Options> >
{
typedef typename _CoefficientsType::Scalar Scalar;
typedef typename _CoefficientsType::StorageKind StorageKind;
typedef typename _CoefficientsType::Index Index;
enum {
CoeffReadCost = internal::traits<_CoefficientsType>::CoeffReadCost,
RowsAtCompileTime = _Rows,
ColsAtCompileTime = _Cols,
MaxRowsAtCompileTime = _Rows,
MaxColsAtCompileTime = _Cols,
Flags = LvalueBit,
Supers = _Supers,
Subs = _Subs,
Options = _Options,
DataRowsAtCompileTime = ((Supers!=Dynamic) && (Subs!=Dynamic)) ? 1 + Supers + Subs : Dynamic
};
typedef _CoefficientsType CoefficientsType;
};
template<typename _CoefficientsType,int _Rows, int _Cols, int _Supers, int _Subs,int _Options>
class BandMatrixWrapper : public BandMatrixBase<BandMatrixWrapper<_CoefficientsType,_Rows,_Cols,_Supers,_Subs,_Options> >
{
public:
typedef typename internal::traits<BandMatrixWrapper>::Scalar Scalar;
typedef typename internal::traits<BandMatrixWrapper>::CoefficientsType CoefficientsType;
typedef typename internal::traits<BandMatrixWrapper>::Index Index;
inline BandMatrixWrapper(const CoefficientsType& coeffs, Index rows=_Rows, Index cols=_Cols, Index supers=_Supers, Index subs=_Subs)
: m_coeffs(coeffs),
m_rows(rows), m_supers(supers), m_subs(subs)
{
//internal::assert(coeffs.cols()==cols() && (supers()+subs()+1)==coeffs.rows());
}
/** \returns the number of columns */
inline Index rows() const { return m_rows.value(); }
/** \returns the number of rows */
inline Index cols() const { return m_coeffs.cols(); }
/** \returns the number of super diagonals */
inline Index supers() const { return m_supers.value(); }
/** \returns the number of sub diagonals */
inline Index subs() const { return m_subs.value(); }
inline const CoefficientsType& coeffs() const { return m_coeffs; }
protected:
const CoefficientsType& m_coeffs;
internal::variable_if_dynamic<Index, _Rows> m_rows;
internal::variable_if_dynamic<Index, _Supers> m_supers;
internal::variable_if_dynamic<Index, _Subs> m_subs;
}; };
/** /**
* \class TridiagonalMatrix * \class TridiagonalMatrix
* \ingroup Core_Module
* *
* \brief Represents a tridiagonal matrix * \brief Represents a tridiagonal matrix with a compact banded storage
* *
* \param _Scalar Numeric type, i.e. float, double, int * \param _Scalar Numeric type, i.e. float, double, int
* \param Size Number of rows and cols, or \b Dynamic * \param Size Number of rows and cols, or \b Dynamic
@@ -217,10 +324,10 @@ class BandMatrix : public EigenBase<BandMatrix<_Scalar,Rows,Cols,Supers,Subs,Opt
template<typename Scalar, int Size, int Options> template<typename Scalar, int Size, int Options>
class TridiagonalMatrix : public BandMatrix<Scalar,Size,Size,Options&SelfAdjoint?0:1,1,Options|RowMajor> class TridiagonalMatrix : public BandMatrix<Scalar,Size,Size,Options&SelfAdjoint?0:1,1,Options|RowMajor>
{ {
typedef BandMatrix<Scalar,Size,Size,1,Options&SelfAdjoint?0:1,Options|RowMajor> Base; typedef BandMatrix<Scalar,Size,Size,Options&SelfAdjoint?0:1,1,Options|RowMajor> Base;
typedef typename Base::Index Index; typedef typename Base::Index Index;
public: public:
TridiagonalMatrix(Index size = Size) : Base(size,size,1,1) {} TridiagonalMatrix(Index size = Size) : Base(size,size,Options&SelfAdjoint?0:1,1) {}
inline typename Base::template DiagonalIntReturnType<1>::Type super() inline typename Base::template DiagonalIntReturnType<1>::Type super()
{ return Base::template diagonal<1>(); } { return Base::template diagonal<1>(); }
@@ -233,4 +340,6 @@ class TridiagonalMatrix : public BandMatrix<Scalar,Size,Size,Options&SelfAdjoint
protected: protected:
}; };
} // end namespace internal
#endif // EIGEN_BANDMATRIX_H #endif // EIGEN_BANDMATRIX_H

View File

@@ -27,6 +27,7 @@
#define EIGEN_BLOCK_H #define EIGEN_BLOCK_H
/** \class Block /** \class Block
* \ingroup Core_Module
* *
* \brief Expression of a fixed-size or dynamic-size block * \brief Expression of a fixed-size or dynamic-size block
* *
@@ -57,60 +58,68 @@
* *
* \sa DenseBase::block(Index,Index,Index,Index), DenseBase::block(Index,Index), class VectorBlock * \sa DenseBase::block(Index,Index,Index,Index), DenseBase::block(Index,Index), class VectorBlock
*/ */
template<typename XprType, int BlockRows, int BlockCols, bool HasDirectAccess>
struct ei_traits<Block<XprType, BlockRows, BlockCols, HasDirectAccess> > : ei_traits<XprType> namespace internal {
template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool HasDirectAccess>
struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel, HasDirectAccess> > : traits<XprType>
{ {
typedef typename ei_traits<XprType>::Scalar Scalar; typedef typename traits<XprType>::Scalar Scalar;
typedef typename ei_traits<XprType>::StorageKind StorageKind; typedef typename traits<XprType>::StorageKind StorageKind;
typedef typename ei_traits<XprType>::XprKind XprKind; typedef typename traits<XprType>::XprKind XprKind;
typedef typename ei_nested<XprType>::type XprTypeNested; typedef typename nested<XprType>::type XprTypeNested;
typedef typename ei_unref<XprTypeNested>::type _XprTypeNested; typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
enum{ enum{
MatrixRows = ei_traits<XprType>::RowsAtCompileTime, MatrixRows = traits<XprType>::RowsAtCompileTime,
MatrixCols = ei_traits<XprType>::ColsAtCompileTime, MatrixCols = traits<XprType>::ColsAtCompileTime,
RowsAtCompileTime = MatrixRows == 0 ? 0 : BlockRows, RowsAtCompileTime = MatrixRows == 0 ? 0 : BlockRows,
ColsAtCompileTime = MatrixCols == 0 ? 0 : BlockCols, ColsAtCompileTime = MatrixCols == 0 ? 0 : BlockCols,
MaxRowsAtCompileTime = BlockRows==0 ? 0 MaxRowsAtCompileTime = BlockRows==0 ? 0
: RowsAtCompileTime != Dynamic ? int(RowsAtCompileTime) : RowsAtCompileTime != Dynamic ? int(RowsAtCompileTime)
: int(ei_traits<XprType>::MaxRowsAtCompileTime), : int(traits<XprType>::MaxRowsAtCompileTime),
MaxColsAtCompileTime = BlockCols==0 ? 0 MaxColsAtCompileTime = BlockCols==0 ? 0
: ColsAtCompileTime != Dynamic ? int(ColsAtCompileTime) : ColsAtCompileTime != Dynamic ? int(ColsAtCompileTime)
: int(ei_traits<XprType>::MaxColsAtCompileTime), : int(traits<XprType>::MaxColsAtCompileTime),
XprTypeIsRowMajor = (int(ei_traits<XprType>::Flags)&RowMajorBit) != 0, XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0,
IsRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 IsRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
: (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
: XprTypeIsRowMajor, : XprTypeIsRowMajor,
HasSameStorageOrderAsXprType = (IsRowMajor == XprTypeIsRowMajor), HasSameStorageOrderAsXprType = (IsRowMajor == XprTypeIsRowMajor),
InnerSize = IsRowMajor ? int(ColsAtCompileTime) : int(RowsAtCompileTime), InnerSize = IsRowMajor ? int(ColsAtCompileTime) : int(RowsAtCompileTime),
InnerStrideAtCompileTime = HasSameStorageOrderAsXprType InnerStrideAtCompileTime = HasSameStorageOrderAsXprType
? int(ei_inner_stride_at_compile_time<XprType>::ret) ? int(inner_stride_at_compile_time<XprType>::ret)
: int(ei_outer_stride_at_compile_time<XprType>::ret), : int(outer_stride_at_compile_time<XprType>::ret),
OuterStrideAtCompileTime = HasSameStorageOrderAsXprType OuterStrideAtCompileTime = HasSameStorageOrderAsXprType
? int(ei_outer_stride_at_compile_time<XprType>::ret) ? int(outer_stride_at_compile_time<XprType>::ret)
: int(ei_inner_stride_at_compile_time<XprType>::ret), : int(inner_stride_at_compile_time<XprType>::ret),
MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % ei_packet_traits<Scalar>::size) == 0) MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % packet_traits<Scalar>::size) == 0)
&& (InnerStrideAtCompileTime == 1) && (InnerStrideAtCompileTime == 1)
? PacketAccessBit : 0, ? PacketAccessBit : 0,
MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && ((OuterStrideAtCompileTime % packet_traits<Scalar>::size) == 0)) ? AlignedBit : 0,
FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1) ? LinearAccessBit : 0, FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1) ? LinearAccessBit : 0,
Flags0 = ei_traits<XprType>::Flags & (HereditaryBits | MaskPacketAccessBit | DirectAccessBit), FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0,
Flags1 = Flags0 | FlagsLinearAccessBit, FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0,
Flags = (Flags1 & ~RowMajorBit) | (IsRowMajor ? RowMajorBit : 0) Flags0 = traits<XprType>::Flags & ( (HereditaryBits & ~RowMajorBit) |
DirectAccessBit |
MaskPacketAccessBit |
MaskAlignedBit),
Flags = Flags0 | FlagsLinearAccessBit | FlagsLvalueBit | FlagsRowMajorBit
}; };
}; };
}
template<typename XprType, int BlockRows, int BlockCols, bool HasDirectAccess> class Block template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool HasDirectAccess> class Block
: public ei_dense_xpr_base<Block<XprType, BlockRows, BlockCols, HasDirectAccess> >::type : public internal::dense_xpr_base<Block<XprType, BlockRows, BlockCols, InnerPanel, HasDirectAccess> >::type
{ {
public: public:
typedef typename ei_dense_xpr_base<Block>::type Base; typedef typename internal::dense_xpr_base<Block>::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(Block) EIGEN_DENSE_PUBLIC_INTERFACE(Block)
class InnerIterator; class InnerIterator;
/** Column or Row constructor /** Column or Row constructor
*/ */
inline Block(const XprType& xpr, Index i) inline Block(XprType& xpr, Index i)
: m_xpr(xpr), : m_xpr(xpr),
// It is a row if and only if BlockRows==1 and BlockCols==XprType::ColsAtCompileTime, // It is a row if and only if BlockRows==1 and BlockCols==XprType::ColsAtCompileTime,
// and it is a column if and only if BlockRows==XprType::RowsAtCompileTime and BlockCols==1, // and it is a column if and only if BlockRows==XprType::RowsAtCompileTime and BlockCols==1,
@@ -121,33 +130,33 @@ template<typename XprType, int BlockRows, int BlockCols, bool HasDirectAccess> c
m_blockRows(BlockRows==1 ? 1 : xpr.rows()), m_blockRows(BlockRows==1 ? 1 : xpr.rows()),
m_blockCols(BlockCols==1 ? 1 : xpr.cols()) m_blockCols(BlockCols==1 ? 1 : xpr.cols())
{ {
ei_assert( (i>=0) && ( eigen_assert( (i>=0) && (
((BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) && i<xpr.rows()) ((BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) && i<xpr.rows())
||((BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) && i<xpr.cols()))); ||((BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) && i<xpr.cols())));
} }
/** Fixed-size constructor /** Fixed-size constructor
*/ */
inline Block(const XprType& xpr, Index startRow, Index startCol) inline Block(XprType& xpr, Index startRow, Index startCol)
: m_xpr(xpr), m_startRow(startRow), m_startCol(startCol), : m_xpr(xpr), m_startRow(startRow), m_startCol(startCol),
m_blockRows(BlockRows), m_blockCols(BlockCols) m_blockRows(BlockRows), m_blockCols(BlockCols)
{ {
EIGEN_STATIC_ASSERT(RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic,THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE) EIGEN_STATIC_ASSERT(RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic,THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE)
ei_assert(startRow >= 0 && BlockRows >= 1 && startRow + BlockRows <= xpr.rows() eigen_assert(startRow >= 0 && BlockRows >= 1 && startRow + BlockRows <= xpr.rows()
&& startCol >= 0 && BlockCols >= 1 && startCol + BlockCols <= xpr.cols()); && startCol >= 0 && BlockCols >= 1 && startCol + BlockCols <= xpr.cols());
} }
/** Dynamic-size constructor /** Dynamic-size constructor
*/ */
inline Block(const XprType& xpr, inline Block(XprType& xpr,
Index startRow, Index startCol, Index startRow, Index startCol,
Index blockRows, Index blockCols) Index blockRows, Index blockCols)
: m_xpr(xpr), m_startRow(startRow), m_startCol(startCol), : m_xpr(xpr), m_startRow(startRow), m_startCol(startCol),
m_blockRows(blockRows), m_blockCols(blockCols) m_blockRows(blockRows), m_blockCols(blockCols)
{ {
ei_assert((RowsAtCompileTime==Dynamic || RowsAtCompileTime==blockRows) eigen_assert((RowsAtCompileTime==Dynamic || RowsAtCompileTime==blockRows)
&& (ColsAtCompileTime==Dynamic || ColsAtCompileTime==blockCols)); && (ColsAtCompileTime==Dynamic || ColsAtCompileTime==blockCols));
ei_assert(startRow >= 0 && blockRows >= 0 && startRow + blockRows <= xpr.rows() eigen_assert(startRow >= 0 && blockRows >= 0 && startRow + blockRows <= xpr.rows()
&& startCol >= 0 && blockCols >= 0 && startCol + blockCols <= xpr.cols()); && startCol >= 0 && blockCols >= 0 && startCol + blockCols <= xpr.cols());
} }
@@ -158,16 +167,31 @@ template<typename XprType, int BlockRows, int BlockCols, bool HasDirectAccess> c
inline Scalar& coeffRef(Index row, Index col) inline Scalar& coeffRef(Index row, Index col)
{ {
EIGEN_STATIC_ASSERT_LVALUE(XprType)
return m_xpr.const_cast_derived() return m_xpr.const_cast_derived()
.coeffRef(row + m_startRow.value(), col + m_startCol.value()); .coeffRef(row + m_startRow.value(), col + m_startCol.value());
} }
inline const Scalar& coeffRef(Index row, Index col) const
{
return m_xpr.derived()
.coeffRef(row + m_startRow.value(), col + m_startCol.value());
}
EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const
{ {
return m_xpr.coeff(row + m_startRow.value(), col + m_startCol.value()); return m_xpr.coeff(row + m_startRow.value(), col + m_startCol.value());
} }
inline Scalar& coeffRef(Index index) inline Scalar& coeffRef(Index index)
{
EIGEN_STATIC_ASSERT_LVALUE(XprType)
return m_xpr.const_cast_derived()
.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
}
inline const Scalar& coeffRef(Index index) const
{ {
return m_xpr.const_cast_derived() return m_xpr.const_cast_derived()
.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), .coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
@@ -221,16 +245,16 @@ template<typename XprType, int BlockRows, int BlockCols, bool HasDirectAccess> c
protected: protected:
const typename XprType::Nested m_xpr; const typename XprType::Nested m_xpr;
const ei_variable_if_dynamic<Index, XprType::RowsAtCompileTime == 1 ? 0 : Dynamic> m_startRow; const internal::variable_if_dynamic<Index, XprType::RowsAtCompileTime == 1 ? 0 : Dynamic> m_startRow;
const ei_variable_if_dynamic<Index, XprType::ColsAtCompileTime == 1 ? 0 : Dynamic> m_startCol; const internal::variable_if_dynamic<Index, XprType::ColsAtCompileTime == 1 ? 0 : Dynamic> m_startCol;
const ei_variable_if_dynamic<Index, RowsAtCompileTime> m_blockRows; const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_blockRows;
const ei_variable_if_dynamic<Index, ColsAtCompileTime> m_blockCols; const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_blockCols;
}; };
/** \internal */ /** \internal */
template<typename XprType, int BlockRows, int BlockCols> template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
class Block<XprType,BlockRows,BlockCols,true> class Block<XprType,BlockRows,BlockCols, InnerPanel,true>
: public MapBase<Block<XprType, BlockRows, BlockCols,true> > : public MapBase<Block<XprType, BlockRows, BlockCols, InnerPanel, true> >
{ {
public: public:
@@ -241,15 +265,15 @@ class Block<XprType,BlockRows,BlockCols,true>
/** Column or Row constructor /** Column or Row constructor
*/ */
inline Block(const XprType& xpr, Index i) inline Block(XprType& xpr, Index i)
: Base(&xpr.const_cast_derived().coeffRef( : Base(internal::const_cast_ptr(&xpr.coeffRef(
(BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0, (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0,
(BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0), (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0)),
BlockRows==1 ? 1 : xpr.rows(), BlockRows==1 ? 1 : xpr.rows(),
BlockCols==1 ? 1 : xpr.cols()), BlockCols==1 ? 1 : xpr.cols()),
m_xpr(xpr) m_xpr(xpr)
{ {
ei_assert( (i>=0) && ( eigen_assert( (i>=0) && (
((BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) && i<xpr.rows()) ((BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) && i<xpr.rows())
||((BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) && i<xpr.cols()))); ||((BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) && i<xpr.cols())));
init(); init();
@@ -257,25 +281,25 @@ class Block<XprType,BlockRows,BlockCols,true>
/** Fixed-size constructor /** Fixed-size constructor
*/ */
inline Block(const XprType& xpr, Index startRow, Index startCol) inline Block(XprType& xpr, Index startRow, Index startCol)
: Base(&xpr.const_cast_derived().coeffRef(startRow,startCol)), m_xpr(xpr) : Base(internal::const_cast_ptr(&xpr.coeffRef(startRow,startCol))), m_xpr(xpr)
{ {
ei_assert(startRow >= 0 && BlockRows >= 1 && startRow + BlockRows <= xpr.rows() eigen_assert(startRow >= 0 && BlockRows >= 1 && startRow + BlockRows <= xpr.rows()
&& startCol >= 0 && BlockCols >= 1 && startCol + BlockCols <= xpr.cols()); && startCol >= 0 && BlockCols >= 1 && startCol + BlockCols <= xpr.cols());
init(); init();
} }
/** Dynamic-size constructor /** Dynamic-size constructor
*/ */
inline Block(const XprType& xpr, inline Block(XprType& xpr,
Index startRow, Index startCol, Index startRow, Index startCol,
Index blockRows, Index blockCols) Index blockRows, Index blockCols)
: Base(&xpr.const_cast_derived().coeffRef(startRow,startCol), blockRows, blockCols), : Base(internal::const_cast_ptr(&xpr.coeffRef(startRow,startCol)), blockRows, blockCols),
m_xpr(xpr) m_xpr(xpr)
{ {
ei_assert((RowsAtCompileTime==Dynamic || RowsAtCompileTime==blockRows) eigen_assert((RowsAtCompileTime==Dynamic || RowsAtCompileTime==blockRows)
&& (ColsAtCompileTime==Dynamic || ColsAtCompileTime==blockCols)); && (ColsAtCompileTime==Dynamic || ColsAtCompileTime==blockCols));
ei_assert(startRow >= 0 && blockRows >= 0 && startRow + blockRows <= xpr.rows() eigen_assert(startRow >= 0 && blockRows >= 0 && startRow + blockRows <= xpr.rows()
&& startCol >= 0 && blockCols >= 0 && startCol + blockCols <= xpr.cols()); && startCol >= 0 && blockCols >= 0 && startCol + blockCols <= xpr.cols());
init(); init();
} }
@@ -283,7 +307,7 @@ class Block<XprType,BlockRows,BlockCols,true>
/** \sa MapBase::innerStride() */ /** \sa MapBase::innerStride() */
inline Index innerStride() const inline Index innerStride() const
{ {
return ei_traits<Block>::HasSameStorageOrderAsXprType return internal::traits<Block>::HasSameStorageOrderAsXprType
? m_xpr.innerStride() ? m_xpr.innerStride()
: m_xpr.outerStride(); : m_xpr.outerStride();
} }
@@ -302,7 +326,7 @@ class Block<XprType,BlockRows,BlockCols,true>
#ifndef EIGEN_PARSED_BY_DOXYGEN #ifndef EIGEN_PARSED_BY_DOXYGEN
/** \internal used by allowAligned() */ /** \internal used by allowAligned() */
inline Block(const XprType& xpr, const Scalar* data, Index blockRows, Index blockCols) inline Block(XprType& xpr, const Scalar* data, Index blockRows, Index blockCols)
: Base(data, blockRows, blockCols), m_xpr(xpr) : Base(data, blockRows, blockCols), m_xpr(xpr)
{ {
init(); init();
@@ -312,7 +336,7 @@ class Block<XprType,BlockRows,BlockCols,true>
protected: protected:
void init() void init()
{ {
m_outerStride = ei_traits<Block>::HasSameStorageOrderAsXprType m_outerStride = internal::traits<Block>::HasSameStorageOrderAsXprType
? m_xpr.outerStride() ? m_xpr.outerStride()
: m_xpr.innerStride(); : m_xpr.innerStride();
} }
@@ -321,546 +345,5 @@ class Block<XprType,BlockRows,BlockCols,true>
int m_outerStride; int m_outerStride;
}; };
/** \returns a dynamic-size expression of a block in *this.
*
* \param startRow the first row in the block
* \param startCol the first column in the block
* \param blockRows the number of rows in the block
* \param blockCols the number of columns in the block
*
* Example: \include MatrixBase_block_int_int_int_int.cpp
* Output: \verbinclude MatrixBase_block_int_int_int_int.out
*
* \note Even though the returned expression has dynamic size, in the case
* when it is applied to a fixed-size matrix, it inherits a fixed maximal size,
* which means that evaluating it does not cause a dynamic memory allocation.
*
* \sa class Block, block(Index,Index)
*/
template<typename Derived>
inline Block<Derived> DenseBase<Derived>
::block(Index startRow, Index startCol, Index blockRows, Index blockCols)
{
return Block<Derived>(derived(), startRow, startCol, blockRows, blockCols);
}
/** This is the const version of block(Index,Index,Index,Index). */
template<typename Derived>
inline const Block<Derived> DenseBase<Derived>
::block(Index startRow, Index startCol, Index blockRows, Index blockCols) const
{
return Block<Derived>(derived(), startRow, startCol, blockRows, blockCols);
}
/** \returns a dynamic-size expression of a top-right corner of *this.
*
* \param cRows the number of rows in the corner
* \param cCols the number of columns in the corner
*
* Example: \include MatrixBase_topRightCorner_int_int.cpp
* Output: \verbinclude MatrixBase_topRightCorner_int_int.out
*
* \sa class Block, block(Index,Index,Index,Index)
*/
template<typename Derived>
inline Block<Derived> DenseBase<Derived>
::topRightCorner(Index cRows, Index cCols)
{
return Block<Derived>(derived(), 0, cols() - cCols, cRows, cCols);
}
/** This is the const version of topRightCorner(Index, Index).*/
template<typename Derived>
inline const Block<Derived>
DenseBase<Derived>::topRightCorner(Index cRows, Index cCols) const
{
return Block<Derived>(derived(), 0, cols() - cCols, cRows, cCols);
}
/** \returns an expression of a fixed-size top-right corner of *this.
*
* The template parameters CRows and CCols are the number of rows and columns in the corner.
*
* Example: \include MatrixBase_template_int_int_topRightCorner.cpp
* Output: \verbinclude MatrixBase_template_int_int_topRightCorner.out
*
* \sa class Block, block(Index,Index,Index,Index)
*/
template<typename Derived>
template<int CRows, int CCols>
inline Block<Derived, CRows, CCols>
DenseBase<Derived>::topRightCorner()
{
return Block<Derived, CRows, CCols>(derived(), 0, cols() - CCols);
}
/** This is the const version of topRightCorner<int, int>().*/
template<typename Derived>
template<int CRows, int CCols>
inline const Block<Derived, CRows, CCols>
DenseBase<Derived>::topRightCorner() const
{
return Block<Derived, CRows, CCols>(derived(), 0, cols() - CCols);
}
/** \returns a dynamic-size expression of a top-left corner of *this.
*
* \param cRows the number of rows in the corner
* \param cCols the number of columns in the corner
*
* Example: \include MatrixBase_topLeftCorner_int_int.cpp
* Output: \verbinclude MatrixBase_topLeftCorner_int_int.out
*
* \sa class Block, block(Index,Index,Index,Index)
*/
template<typename Derived>
inline Block<Derived> DenseBase<Derived>
::topLeftCorner(Index cRows, Index cCols)
{
return Block<Derived>(derived(), 0, 0, cRows, cCols);
}
/** This is the const version of topLeftCorner(Index, Index).*/
template<typename Derived>
inline const Block<Derived>
DenseBase<Derived>::topLeftCorner(Index cRows, Index cCols) const
{
return Block<Derived>(derived(), 0, 0, cRows, cCols);
}
/** \returns an expression of a fixed-size top-left corner of *this.
*
* The template parameters CRows and CCols are the number of rows and columns in the corner.
*
* Example: \include MatrixBase_template_int_int_topLeftCorner.cpp
* Output: \verbinclude MatrixBase_template_int_int_topLeftCorner.out
*
* \sa class Block, block(Index,Index,Index,Index)
*/
template<typename Derived>
template<int CRows, int CCols>
inline Block<Derived, CRows, CCols>
DenseBase<Derived>::topLeftCorner()
{
return Block<Derived, CRows, CCols>(derived(), 0, 0);
}
/** This is the const version of topLeftCorner<int, int>().*/
template<typename Derived>
template<int CRows, int CCols>
inline const Block<Derived, CRows, CCols>
DenseBase<Derived>::topLeftCorner() const
{
return Block<Derived, CRows, CCols>(derived(), 0, 0);
}
/** \returns a dynamic-size expression of a bottom-right corner of *this.
*
* \param cRows the number of rows in the corner
* \param cCols the number of columns in the corner
*
* Example: \include MatrixBase_bottomRightCorner_int_int.cpp
* Output: \verbinclude MatrixBase_bottomRightCorner_int_int.out
*
* \sa class Block, block(Index,Index,Index,Index)
*/
template<typename Derived>
inline Block<Derived> DenseBase<Derived>
::bottomRightCorner(Index cRows, Index cCols)
{
return Block<Derived>(derived(), rows() - cRows, cols() - cCols, cRows, cCols);
}
/** This is the const version of bottomRightCorner(Index, Index).*/
template<typename Derived>
inline const Block<Derived>
DenseBase<Derived>::bottomRightCorner(Index cRows, Index cCols) const
{
return Block<Derived>(derived(), rows() - cRows, cols() - cCols, cRows, cCols);
}
/** \returns an expression of a fixed-size bottom-right corner of *this.
*
* The template parameters CRows and CCols are the number of rows and columns in the corner.
*
* Example: \include MatrixBase_template_int_int_bottomRightCorner.cpp
* Output: \verbinclude MatrixBase_template_int_int_bottomRightCorner.out
*
* \sa class Block, block(Index,Index,Index,Index)
*/
template<typename Derived>
template<int CRows, int CCols>
inline Block<Derived, CRows, CCols>
DenseBase<Derived>::bottomRightCorner()
{
return Block<Derived, CRows, CCols>(derived(), rows() - CRows, cols() - CCols);
}
/** This is the const version of bottomRightCorner<int, int>().*/
template<typename Derived>
template<int CRows, int CCols>
inline const Block<Derived, CRows, CCols>
DenseBase<Derived>::bottomRightCorner() const
{
return Block<Derived, CRows, CCols>(derived(), rows() - CRows, cols() - CCols);
}
/** \returns a dynamic-size expression of a bottom-left corner of *this.
*
* \param cRows the number of rows in the corner
* \param cCols the number of columns in the corner
*
* Example: \include MatrixBase_bottomLeftCorner_int_int.cpp
* Output: \verbinclude MatrixBase_bottomLeftCorner_int_int.out
*
* \sa class Block, block(Index,Index,Index,Index)
*/
template<typename Derived>
inline Block<Derived> DenseBase<Derived>
::bottomLeftCorner(Index cRows, Index cCols)
{
return Block<Derived>(derived(), rows() - cRows, 0, cRows, cCols);
}
/** This is the const version of bottomLeftCorner(Index, Index).*/
template<typename Derived>
inline const Block<Derived>
DenseBase<Derived>::bottomLeftCorner(Index cRows, Index cCols) const
{
return Block<Derived>(derived(), rows() - cRows, 0, cRows, cCols);
}
/** \returns an expression of a fixed-size bottom-left corner of *this.
*
* The template parameters CRows and CCols are the number of rows and columns in the corner.
*
* Example: \include MatrixBase_template_int_int_bottomLeftCorner.cpp
* Output: \verbinclude MatrixBase_template_int_int_bottomLeftCorner.out
*
* \sa class Block, block(Index,Index,Index,Index)
*/
template<typename Derived>
template<int CRows, int CCols>
inline Block<Derived, CRows, CCols>
DenseBase<Derived>::bottomLeftCorner()
{
return Block<Derived, CRows, CCols>(derived(), rows() - CRows, 0);
}
/** This is the const version of bottomLeftCorner<int, int>().*/
template<typename Derived>
template<int CRows, int CCols>
inline const Block<Derived, CRows, CCols>
DenseBase<Derived>::bottomLeftCorner() const
{
return Block<Derived, CRows, CCols>(derived(), rows() - CRows, 0);
}
/** \returns a block consisting of the top rows of *this.
*
* \param n the number of rows in the block
*
* Example: \include MatrixBase_topRows_int.cpp
* Output: \verbinclude MatrixBase_topRows_int.out
*
* \sa class Block, block(Index,Index,Index,Index)
*/
template<typename Derived>
inline typename DenseBase<Derived>::RowsBlockXpr DenseBase<Derived>
::topRows(Index n)
{
return RowsBlockXpr(derived(), 0, 0, n, cols());
}
/** This is the const version of topRows(Index).*/
template<typename Derived>
inline const typename DenseBase<Derived>::RowsBlockXpr
DenseBase<Derived>::topRows(Index n) const
{
return RowsBlockXpr(derived(), 0, 0, n, cols());
}
/** \returns a block consisting of the top rows of *this.
*
* \param N the number of rows in the block
*
* Example: \include MatrixBase_template_int_topRows.cpp
* Output: \verbinclude MatrixBase_template_int_topRows.out
*
* \sa class Block, block(Index,Index,Index,Index)
*/
template<typename Derived>
template<int N>
inline typename DenseBase<Derived>::template NRowsBlockXpr<N>::Type
DenseBase<Derived>::topRows()
{
return typename DenseBase<Derived>::template NRowsBlockXpr<N>::Type(derived(), 0, 0, N, cols());
}
/** This is the const version of topRows<int>().*/
template<typename Derived>
template<int N>
inline const typename DenseBase<Derived>::template NRowsBlockXpr<N>::Type
DenseBase<Derived>::topRows() const
{
return typename DenseBase<Derived>::template NRowsBlockXpr<N>::Type(derived(), 0, 0, N, cols());
}
/** \returns a block consisting of the bottom rows of *this.
*
* \param n the number of rows in the block
*
* Example: \include MatrixBase_bottomRows_int.cpp
* Output: \verbinclude MatrixBase_bottomRows_int.out
*
* \sa class Block, block(Index,Index,Index,Index)
*/
template<typename Derived>
inline typename DenseBase<Derived>::RowsBlockXpr DenseBase<Derived>
::bottomRows(Index n)
{
return RowsBlockXpr(derived(), rows() - n, 0, n, cols());
}
/** This is the const version of bottomRows(Index).*/
template<typename Derived>
inline const typename DenseBase<Derived>::RowsBlockXpr
DenseBase<Derived>::bottomRows(Index n) const
{
return RowsBlockXpr(derived(), rows() - n, 0, n, cols());
}
/** \returns a block consisting of the bottom rows of *this.
*
* \param N the number of rows in the block
*
* Example: \include MatrixBase_template_int_bottomRows.cpp
* Output: \verbinclude MatrixBase_template_int_bottomRows.out
*
* \sa class Block, block(Index,Index,Index,Index)
*/
template<typename Derived>
template<int N>
inline typename DenseBase<Derived>::template NRowsBlockXpr<N>::Type
DenseBase<Derived>::bottomRows()
{
return typename NRowsBlockXpr<N>::Type(derived(), rows() - N, 0, N, cols());
}
/** This is the const version of bottomRows<int>().*/
template<typename Derived>
template<int N>
inline const typename DenseBase<Derived>::template NRowsBlockXpr<N>::Type
DenseBase<Derived>::bottomRows() const
{
return typename NRowsBlockXpr<N>::Type(derived(), rows() - N, 0, N, cols());
}
/** \returns a block consisting of the top columns of *this.
*
* \param n the number of columns in the block
*
* Example: \include MatrixBase_leftCols_int.cpp
* Output: \verbinclude MatrixBase_leftCols_int.out
*
* \sa class Block, block(Index,Index,Index,Index)
*/
template<typename Derived>
inline typename DenseBase<Derived>::ColsBlockXpr DenseBase<Derived>
::leftCols(Index n)
{
return ColsBlockXpr(derived(), 0, 0, rows(), n);
}
/** This is the const version of leftCols(Index).*/
template<typename Derived>
inline const typename DenseBase<Derived>::ColsBlockXpr
DenseBase<Derived>::leftCols(Index n) const
{
return ColsBlockXpr(derived(), 0, 0, rows(), n);
}
/** \returns a block consisting of the top columns of *this.
*
* \param N the number of columns in the block
*
* Example: \include MatrixBase_template_int_leftCols.cpp
* Output: \verbinclude MatrixBase_template_int_leftCols.out
*
* \sa class Block, block(Index,Index,Index,Index)
*/
template<typename Derived>
template<int N>
inline typename DenseBase<Derived>::template NColsBlockXpr<N>::Type
DenseBase<Derived>::leftCols()
{
return typename NColsBlockXpr<N>::Type(derived(), 0, 0, rows(), N);
}
/** This is the const version of leftCols<int>().*/
template<typename Derived>
template<int N>
inline const typename DenseBase<Derived>::template NColsBlockXpr<N>::Type
DenseBase<Derived>::leftCols() const
{
return typename NColsBlockXpr<N>::Type(derived(), 0, 0, rows(), N);
}
/** \returns a block consisting of the top columns of *this.
*
* \param n the number of columns in the block
*
* Example: \include MatrixBase_rightCols_int.cpp
* Output: \verbinclude MatrixBase_rightCols_int.out
*
* \sa class Block, block(Index,Index,Index,Index)
*/
template<typename Derived>
inline typename DenseBase<Derived>::ColsBlockXpr DenseBase<Derived>
::rightCols(Index n)
{
return ColsBlockXpr(derived(), 0, cols() - n, rows(), n);
}
/** This is the const version of rightCols(Index).*/
template<typename Derived>
inline const typename DenseBase<Derived>::ColsBlockXpr
DenseBase<Derived>::rightCols(Index n) const
{
return ColsBlockXpr(derived(), 0, cols() - n, rows(), n);
}
/** \returns a block consisting of the top columns of *this.
*
* \param N the number of columns in the block
*
* Example: \include MatrixBase_template_int_rightCols.cpp
* Output: \verbinclude MatrixBase_template_int_rightCols.out
*
* \sa class Block, block(Index,Index,Index,Index)
*/
template<typename Derived>
template<int N>
inline typename DenseBase<Derived>::template NColsBlockXpr<N>::Type
DenseBase<Derived>::rightCols()
{
return typename DenseBase<Derived>::template NColsBlockXpr<N>::Type(derived(), 0, cols() - N, rows(), N);
}
/** This is the const version of rightCols<int>().*/
template<typename Derived>
template<int N>
inline const typename DenseBase<Derived>::template NColsBlockXpr<N>::Type
DenseBase<Derived>::rightCols() const
{
return typename DenseBase<Derived>::template NColsBlockXpr<N>::Type(derived(), 0, cols() - N, rows(), N);
}
/** \returns a fixed-size expression of a block in *this.
*
* The template parameters \a BlockRows and \a BlockCols are the number of
* rows and columns in the block.
*
* \param startRow the first row in the block
* \param startCol the first column in the block
*
* Example: \include MatrixBase_block_int_int.cpp
* Output: \verbinclude MatrixBase_block_int_int.out
*
* \note since block is a templated member, the keyword template has to be used
* if the matrix type is also a template parameter: \code m.template block<3,3>(1,1); \endcode
*
* \sa class Block, block(Index,Index,Index,Index)
*/
template<typename Derived>
template<int BlockRows, int BlockCols>
inline Block<Derived, BlockRows, BlockCols>
DenseBase<Derived>::block(Index startRow, Index startCol)
{
return Block<Derived, BlockRows, BlockCols>(derived(), startRow, startCol);
}
/** This is the const version of block<>(Index, Index). */
template<typename Derived>
template<int BlockRows, int BlockCols>
inline const Block<Derived, BlockRows, BlockCols>
DenseBase<Derived>::block(Index startRow, Index startCol) const
{
return Block<Derived, BlockRows, BlockCols>(derived(), startRow, startCol);
}
/** \returns an expression of the \a i-th column of *this. Note that the numbering starts at 0.
*
* Example: \include MatrixBase_col.cpp
* Output: \verbinclude MatrixBase_col.out
*
* \sa row(), class Block */
template<typename Derived>
inline typename DenseBase<Derived>::ColXpr
DenseBase<Derived>::col(Index i)
{
return ColXpr(derived(), i);
}
/** This is the const version of col(). */
template<typename Derived>
inline const typename DenseBase<Derived>::ColXpr
DenseBase<Derived>::col(Index i) const
{
return ColXpr(derived(), i);
}
/** \returns an expression of the \a i-th row of *this. Note that the numbering starts at 0.
*
* Example: \include MatrixBase_row.cpp
* Output: \verbinclude MatrixBase_row.out
*
* \sa col(), class Block */
template<typename Derived>
inline typename DenseBase<Derived>::RowXpr
DenseBase<Derived>::row(Index i)
{
return RowXpr(derived(), i);
}
/** This is the const version of row(). */
template<typename Derived>
inline const typename DenseBase<Derived>::RowXpr
DenseBase<Derived>::row(Index i) const
{
return RowXpr(derived(), i);
}
#endif // EIGEN_BLOCK_H #endif // EIGEN_BLOCK_H

View File

@@ -25,8 +25,10 @@
#ifndef EIGEN_ALLANDANY_H #ifndef EIGEN_ALLANDANY_H
#define EIGEN_ALLANDANY_H #define EIGEN_ALLANDANY_H
namespace internal {
template<typename Derived, int UnrollCount> template<typename Derived, int UnrollCount>
struct ei_all_unroller struct all_unroller
{ {
enum { enum {
col = (UnrollCount-1) / Derived::RowsAtCompileTime, col = (UnrollCount-1) / Derived::RowsAtCompileTime,
@@ -35,24 +37,24 @@ struct ei_all_unroller
inline static bool run(const Derived &mat) inline static bool run(const Derived &mat)
{ {
return ei_all_unroller<Derived, UnrollCount-1>::run(mat) && mat.coeff(row, col); return all_unroller<Derived, UnrollCount-1>::run(mat) && mat.coeff(row, col);
} }
}; };
template<typename Derived> template<typename Derived>
struct ei_all_unroller<Derived, 1> struct all_unroller<Derived, 1>
{ {
inline static bool run(const Derived &mat) { return mat.coeff(0, 0); } inline static bool run(const Derived &mat) { return mat.coeff(0, 0); }
}; };
template<typename Derived> template<typename Derived>
struct ei_all_unroller<Derived, Dynamic> struct all_unroller<Derived, Dynamic>
{ {
inline static bool run(const Derived &) { return false; } inline static bool run(const Derived &) { return false; }
}; };
template<typename Derived, int UnrollCount> template<typename Derived, int UnrollCount>
struct ei_any_unroller struct any_unroller
{ {
enum { enum {
col = (UnrollCount-1) / Derived::RowsAtCompileTime, col = (UnrollCount-1) / Derived::RowsAtCompileTime,
@@ -61,22 +63,24 @@ struct ei_any_unroller
inline static bool run(const Derived &mat) inline static bool run(const Derived &mat)
{ {
return ei_any_unroller<Derived, UnrollCount-1>::run(mat) || mat.coeff(row, col); return any_unroller<Derived, UnrollCount-1>::run(mat) || mat.coeff(row, col);
} }
}; };
template<typename Derived> template<typename Derived>
struct ei_any_unroller<Derived, 1> struct any_unroller<Derived, 1>
{ {
inline static bool run(const Derived &mat) { return mat.coeff(0, 0); } inline static bool run(const Derived &mat) { return mat.coeff(0, 0); }
}; };
template<typename Derived> template<typename Derived>
struct ei_any_unroller<Derived, Dynamic> struct any_unroller<Derived, Dynamic>
{ {
inline static bool run(const Derived &) { return false; } inline static bool run(const Derived &) { return false; }
}; };
} // end namespace internal
/** \returns true if all coefficients are true /** \returns true if all coefficients are true
* *
* Example: \include MatrixBase_all.cpp * Example: \include MatrixBase_all.cpp
@@ -94,7 +98,7 @@ inline bool DenseBase<Derived>::all() const
&& SizeAtCompileTime * (CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT && SizeAtCompileTime * (CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT
}; };
if(unroll) if(unroll)
return ei_all_unroller<Derived, return internal::all_unroller<Derived,
unroll ? int(SizeAtCompileTime) : Dynamic unroll ? int(SizeAtCompileTime) : Dynamic
>::run(derived()); >::run(derived());
else else
@@ -120,7 +124,7 @@ inline bool DenseBase<Derived>::any() const
&& SizeAtCompileTime * (CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT && SizeAtCompileTime * (CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT
}; };
if(unroll) if(unroll)
return ei_any_unroller<Derived, return internal::any_unroller<Derived,
unroll ? int(SizeAtCompileTime) : Dynamic unroll ? int(SizeAtCompileTime) : Dynamic
>::run(derived()); >::run(derived());
else else

View File

@@ -27,6 +27,7 @@
#define EIGEN_COMMAINITIALIZER_H #define EIGEN_COMMAINITIALIZER_H
/** \class CommaInitializer /** \class CommaInitializer
* \ingroup Core_Module
* *
* \brief Helper class used by the comma initializer operator * \brief Helper class used by the comma initializer operator
* *
@@ -63,12 +64,12 @@ struct CommaInitializer
m_row+=m_currentBlockRows; m_row+=m_currentBlockRows;
m_col = 0; m_col = 0;
m_currentBlockRows = 1; m_currentBlockRows = 1;
ei_assert(m_row<m_xpr.rows() eigen_assert(m_row<m_xpr.rows()
&& "Too many rows passed to comma initializer (operator<<)"); && "Too many rows passed to comma initializer (operator<<)");
} }
ei_assert(m_col<m_xpr.cols() eigen_assert(m_col<m_xpr.cols()
&& "Too many coefficients passed to comma initializer (operator<<)"); && "Too many coefficients passed to comma initializer (operator<<)");
ei_assert(m_currentBlockRows==1); eigen_assert(m_currentBlockRows==1);
m_xpr.coeffRef(m_row, m_col++) = s; m_xpr.coeffRef(m_row, m_col++) = s;
return *this; return *this;
} }
@@ -82,12 +83,12 @@ struct CommaInitializer
m_row+=m_currentBlockRows; m_row+=m_currentBlockRows;
m_col = 0; m_col = 0;
m_currentBlockRows = other.rows(); m_currentBlockRows = other.rows();
ei_assert(m_row+m_currentBlockRows<=m_xpr.rows() eigen_assert(m_row+m_currentBlockRows<=m_xpr.rows()
&& "Too many rows passed to comma initializer (operator<<)"); && "Too many rows passed to comma initializer (operator<<)");
} }
ei_assert(m_col<m_xpr.cols() eigen_assert(m_col<m_xpr.cols()
&& "Too many coefficients passed to comma initializer (operator<<)"); && "Too many coefficients passed to comma initializer (operator<<)");
ei_assert(m_currentBlockRows==other.rows()); eigen_assert(m_currentBlockRows==other.rows());
if (OtherDerived::SizeAtCompileTime != Dynamic) if (OtherDerived::SizeAtCompileTime != Dynamic)
m_xpr.template block<OtherDerived::RowsAtCompileTime != Dynamic ? OtherDerived::RowsAtCompileTime : 1, m_xpr.template block<OtherDerived::RowsAtCompileTime != Dynamic ? OtherDerived::RowsAtCompileTime : 1,
OtherDerived::ColsAtCompileTime != Dynamic ? OtherDerived::ColsAtCompileTime : 1> OtherDerived::ColsAtCompileTime != Dynamic ? OtherDerived::ColsAtCompileTime : 1>
@@ -100,7 +101,7 @@ struct CommaInitializer
inline ~CommaInitializer() inline ~CommaInitializer()
{ {
ei_assert((m_row+m_currentBlockRows) == m_xpr.rows() eigen_assert((m_row+m_currentBlockRows) == m_xpr.rows()
&& m_col == m_xpr.cols() && m_col == m_xpr.cols()
&& "Too few coefficients passed to comma initializer (operator<<)"); && "Too few coefficients passed to comma initializer (operator<<)");
} }

View File

@@ -27,6 +27,7 @@
#define EIGEN_CWISE_BINARY_OP_H #define EIGEN_CWISE_BINARY_OP_H
/** \class CwiseBinaryOp /** \class CwiseBinaryOp
* \ingroup Core_Module
* *
* \brief Generic expression where a coefficient-wise binary operator is applied to two expressions * \brief Generic expression where a coefficient-wise binary operator is applied to two expressions
* *
@@ -44,108 +45,115 @@
* *
* \sa MatrixBase::binaryExpr(const MatrixBase<OtherDerived> &,const CustomBinaryOp &) const, class CwiseUnaryOp, class CwiseNullaryOp * \sa MatrixBase::binaryExpr(const MatrixBase<OtherDerived> &,const CustomBinaryOp &) const, class CwiseUnaryOp, class CwiseNullaryOp
*/ */
namespace internal {
template<typename BinaryOp, typename Lhs, typename Rhs> template<typename BinaryOp, typename Lhs, typename Rhs>
struct ei_traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
{ {
// we must not inherit from ei_traits<Lhs> since it has // we must not inherit from traits<Lhs> since it has
// the potential to cause problems with MSVC // the potential to cause problems with MSVC
typedef typename ei_cleantype<Lhs>::type Ancestor; typedef typename remove_all<Lhs>::type Ancestor;
typedef typename ei_traits<Ancestor>::XprKind XprKind; typedef typename traits<Ancestor>::XprKind XprKind;
enum { enum {
RowsAtCompileTime = ei_traits<Ancestor>::RowsAtCompileTime, RowsAtCompileTime = traits<Ancestor>::RowsAtCompileTime,
ColsAtCompileTime = ei_traits<Ancestor>::ColsAtCompileTime, ColsAtCompileTime = traits<Ancestor>::ColsAtCompileTime,
MaxRowsAtCompileTime = ei_traits<Ancestor>::MaxRowsAtCompileTime, MaxRowsAtCompileTime = traits<Ancestor>::MaxRowsAtCompileTime,
MaxColsAtCompileTime = ei_traits<Ancestor>::MaxColsAtCompileTime MaxColsAtCompileTime = traits<Ancestor>::MaxColsAtCompileTime
}; };
// even though we require Lhs and Rhs to have the same scalar type (see CwiseBinaryOp constructor), // even though we require Lhs and Rhs to have the same scalar type (see CwiseBinaryOp constructor),
// we still want to handle the case when the result type is different. // we still want to handle the case when the result type is different.
typedef typename ei_result_of< typedef typename result_of<
BinaryOp( BinaryOp(
typename Lhs::Scalar, typename Lhs::Scalar,
typename Rhs::Scalar typename Rhs::Scalar
) )
>::type Scalar; >::type Scalar;
typedef typename ei_promote_storage_type<typename ei_traits<Lhs>::StorageKind, typedef typename promote_storage_type<typename traits<Lhs>::StorageKind,
typename ei_traits<Rhs>::StorageKind>::ret StorageKind; typename traits<Rhs>::StorageKind>::ret StorageKind;
typedef typename ei_promote_index_type<typename ei_traits<Lhs>::Index, typedef typename promote_index_type<typename traits<Lhs>::Index,
typename ei_traits<Rhs>::Index>::type Index; typename traits<Rhs>::Index>::type Index;
typedef typename Lhs::Nested LhsNested; typedef typename Lhs::Nested LhsNested;
typedef typename Rhs::Nested RhsNested; typedef typename Rhs::Nested RhsNested;
typedef typename ei_unref<LhsNested>::type _LhsNested; typedef typename remove_reference<LhsNested>::type _LhsNested;
typedef typename ei_unref<RhsNested>::type _RhsNested; typedef typename remove_reference<RhsNested>::type _RhsNested;
enum { enum {
LhsCoeffReadCost = _LhsNested::CoeffReadCost, LhsCoeffReadCost = _LhsNested::CoeffReadCost,
RhsCoeffReadCost = _RhsNested::CoeffReadCost, RhsCoeffReadCost = _RhsNested::CoeffReadCost,
LhsFlags = _LhsNested::Flags, LhsFlags = _LhsNested::Flags,
RhsFlags = _RhsNested::Flags, RhsFlags = _RhsNested::Flags,
SameType = is_same<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::value,
StorageOrdersAgree = (int(Lhs::Flags)&RowMajorBit)==(int(Rhs::Flags)&RowMajorBit), StorageOrdersAgree = (int(Lhs::Flags)&RowMajorBit)==(int(Rhs::Flags)&RowMajorBit),
Flags0 = (int(LhsFlags) | int(RhsFlags)) & ( Flags0 = (int(LhsFlags) | int(RhsFlags)) & (
HereditaryBits HereditaryBits
| (int(LhsFlags) & int(RhsFlags) & | (int(LhsFlags) & int(RhsFlags) &
( AlignedBit ( AlignedBit
| (StorageOrdersAgree ? LinearAccessBit : 0) | (StorageOrdersAgree ? LinearAccessBit : 0)
| (ei_functor_traits<BinaryOp>::PacketAccess && StorageOrdersAgree ? PacketAccessBit : 0) | (functor_traits<BinaryOp>::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0)
) )
) )
), ),
Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit), Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit),
CoeffReadCost = LhsCoeffReadCost + RhsCoeffReadCost + ei_functor_traits<BinaryOp>::Cost CoeffReadCost = LhsCoeffReadCost + RhsCoeffReadCost + functor_traits<BinaryOp>::Cost
}; };
}; };
} // end namespace internal
// we require Lhs and Rhs to have the same scalar type. Currently there is no example of a binary functor
// that would take two operands of different types. If there were such an example, then this check should be
// moved to the BinaryOp functors, on a per-case basis. This would however require a change in the BinaryOp functors, as
// currently they take only one typename Scalar template parameter.
// It is tempting to always allow mixing different types but remember that this is often impossible in the vectorized paths.
// So allowing mixing different types gives very unexpected errors when enabling vectorization, when the user tries to
// add together a float matrix and a double matrix.
#define EIGEN_CHECK_BINARY_COMPATIBILIY(BINOP,LHS,RHS) \
EIGEN_STATIC_ASSERT((internal::functor_allows_mixing_real_and_complex<BINOP>::ret \
? int(internal::is_same<typename NumTraits<LHS>::Real, typename NumTraits<RHS>::Real>::value) \
: int(internal::is_same<LHS, RHS>::value)), \
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
template<typename BinaryOp, typename Lhs, typename Rhs, typename StorageKind> template<typename BinaryOp, typename Lhs, typename Rhs, typename StorageKind>
class CwiseBinaryOpImpl; class CwiseBinaryOpImpl;
template<typename BinaryOp, typename Lhs, typename Rhs> template<typename BinaryOp, typename Lhs, typename Rhs>
class CwiseBinaryOp : ei_no_assignment_operator, class CwiseBinaryOp : internal::no_assignment_operator,
public CwiseBinaryOpImpl< public CwiseBinaryOpImpl<
BinaryOp, Lhs, Rhs, BinaryOp, Lhs, Rhs,
typename ei_promote_storage_type<typename ei_traits<Lhs>::StorageKind, typename internal::promote_storage_type<typename internal::traits<Lhs>::StorageKind,
typename ei_traits<Rhs>::StorageKind>::ret> typename internal::traits<Rhs>::StorageKind>::ret>
{ {
public: public:
typedef typename CwiseBinaryOpImpl< typedef typename CwiseBinaryOpImpl<
BinaryOp, Lhs, Rhs, BinaryOp, Lhs, Rhs,
typename ei_promote_storage_type<typename ei_traits<Lhs>::StorageKind, typename internal::promote_storage_type<typename internal::traits<Lhs>::StorageKind,
typename ei_traits<Rhs>::StorageKind>::ret>::Base Base; typename internal::traits<Rhs>::StorageKind>::ret>::Base Base;
EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseBinaryOp) EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseBinaryOp)
typedef typename ei_nested<Lhs>::type LhsNested; typedef typename internal::nested<Lhs>::type LhsNested;
typedef typename ei_nested<Rhs>::type RhsNested; typedef typename internal::nested<Rhs>::type RhsNested;
typedef typename ei_unref<LhsNested>::type _LhsNested; typedef typename internal::remove_reference<LhsNested>::type _LhsNested;
typedef typename ei_unref<RhsNested>::type _RhsNested; typedef typename internal::remove_reference<RhsNested>::type _RhsNested;
EIGEN_STRONG_INLINE CwiseBinaryOp(const Lhs& lhs, const Rhs& rhs, const BinaryOp& func = BinaryOp()) EIGEN_STRONG_INLINE CwiseBinaryOp(const Lhs& lhs, const Rhs& rhs, const BinaryOp& func = BinaryOp())
: m_lhs(lhs), m_rhs(rhs), m_functor(func) : m_lhs(lhs), m_rhs(rhs), m_functor(func)
{ {
// we require Lhs and Rhs to have the same scalar type. Currently there is no example of a binary functor EIGEN_CHECK_BINARY_COMPATIBILIY(BinaryOp,typename Lhs::Scalar,typename Rhs::Scalar);
// that would take two operands of different types. If there were such an example, then this check should be
// moved to the BinaryOp functors, on a per-case basis. This would however require a change in the BinaryOp functors, as
// currently they take only one typename Scalar template parameter.
// It is tempting to always allow mixing different types but remember that this is often impossible in the vectorized paths.
// So allowing mixing different types gives very unexpected errors when enabling vectorization, when the user tries to
// add together a float matrix and a double matrix.
EIGEN_STATIC_ASSERT((ei_functor_allows_mixing_real_and_complex<BinaryOp>::ret
? int(ei_is_same_type<typename Lhs::RealScalar, typename Rhs::RealScalar>::ret)
: int(ei_is_same_type<typename Lhs::Scalar, typename Rhs::Scalar>::ret)),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
// require the sizes to match // require the sizes to match
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Lhs, Rhs) EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Lhs, Rhs)
ei_assert(lhs.rows() == rhs.rows() && lhs.cols() == rhs.cols()); eigen_assert(lhs.rows() == rhs.rows() && lhs.cols() == rhs.cols());
} }
EIGEN_STRONG_INLINE Index rows() const { EIGEN_STRONG_INLINE Index rows() const {
// return the fixed size type if available to enable compile time optimizations // return the fixed size type if available to enable compile time optimizations
if (ei_traits<typename ei_cleantype<LhsNested>::type>::RowsAtCompileTime==Dynamic) if (internal::traits<typename internal::remove_all<LhsNested>::type>::RowsAtCompileTime==Dynamic)
return m_rhs.rows(); return m_rhs.rows();
else else
return m_lhs.rows(); return m_lhs.rows();
} }
EIGEN_STRONG_INLINE Index cols() const { EIGEN_STRONG_INLINE Index cols() const {
// return the fixed size type if available to enable compile time optimizations // return the fixed size type if available to enable compile time optimizations
if (ei_traits<typename ei_cleantype<LhsNested>::type>::ColsAtCompileTime==Dynamic) if (internal::traits<typename internal::remove_all<LhsNested>::type>::ColsAtCompileTime==Dynamic)
return m_rhs.cols(); return m_rhs.cols();
else else
return m_lhs.cols(); return m_lhs.cols();
@@ -166,12 +174,12 @@ class CwiseBinaryOp : ei_no_assignment_operator,
template<typename BinaryOp, typename Lhs, typename Rhs> template<typename BinaryOp, typename Lhs, typename Rhs>
class CwiseBinaryOpImpl<BinaryOp, Lhs, Rhs, Dense> class CwiseBinaryOpImpl<BinaryOp, Lhs, Rhs, Dense>
: public ei_dense_xpr_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type : public internal::dense_xpr_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type
{ {
typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> Derived; typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> Derived;
public: public:
typedef typename ei_dense_xpr_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type Base; typedef typename internal::dense_xpr_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE( Derived ) EIGEN_DENSE_PUBLIC_INTERFACE( Derived )
EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
@@ -210,8 +218,8 @@ template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived & EIGEN_STRONG_INLINE Derived &
MatrixBase<Derived>::operator-=(const MatrixBase<OtherDerived> &other) MatrixBase<Derived>::operator-=(const MatrixBase<OtherDerived> &other)
{ {
SelfCwiseBinaryOp<ei_scalar_difference_op<Scalar>, Derived> tmp(derived()); SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, Derived, OtherDerived> tmp(derived());
tmp = other; tmp = other.derived();
return derived(); return derived();
} }
@@ -224,7 +232,7 @@ template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived & EIGEN_STRONG_INLINE Derived &
MatrixBase<Derived>::operator+=(const MatrixBase<OtherDerived>& other) MatrixBase<Derived>::operator+=(const MatrixBase<OtherDerived>& other)
{ {
SelfCwiseBinaryOp<ei_scalar_sum_op<Scalar>, Derived> tmp(derived()); SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, Derived, OtherDerived> tmp(derived());
tmp = other.derived(); tmp = other.derived();
return derived(); return derived();
} }

View File

@@ -26,6 +26,7 @@
#define EIGEN_CWISE_NULLARY_OP_H #define EIGEN_CWISE_NULLARY_OP_H
/** \class CwiseNullaryOp /** \class CwiseNullaryOp
* \ingroup Core_Module
* *
* \brief Generic expression of a matrix where all coefficients are defined by a functor * \brief Generic expression of a matrix where all coefficients are defined by a functor
* *
@@ -41,32 +42,35 @@
* *
* \sa class CwiseUnaryOp, class CwiseBinaryOp, DenseBase::NullaryExpr() * \sa class CwiseUnaryOp, class CwiseBinaryOp, DenseBase::NullaryExpr()
*/ */
namespace internal {
template<typename NullaryOp, typename PlainObjectType> template<typename NullaryOp, typename PlainObjectType>
struct ei_traits<CwiseNullaryOp<NullaryOp, PlainObjectType> > : ei_traits<PlainObjectType> struct traits<CwiseNullaryOp<NullaryOp, PlainObjectType> > : traits<PlainObjectType>
{ {
enum { enum {
Flags = (ei_traits<PlainObjectType>::Flags Flags = (traits<PlainObjectType>::Flags
& ( HereditaryBits & ( HereditaryBits
| (ei_functor_has_linear_access<NullaryOp>::ret ? LinearAccessBit : 0) | (functor_has_linear_access<NullaryOp>::ret ? LinearAccessBit : 0)
| (ei_functor_traits<NullaryOp>::PacketAccess ? PacketAccessBit : 0))) | (functor_traits<NullaryOp>::PacketAccess ? PacketAccessBit : 0)))
| (ei_functor_traits<NullaryOp>::IsRepeatable ? 0 : EvalBeforeNestingBit), | (functor_traits<NullaryOp>::IsRepeatable ? 0 : EvalBeforeNestingBit),
CoeffReadCost = ei_functor_traits<NullaryOp>::Cost CoeffReadCost = functor_traits<NullaryOp>::Cost
}; };
}; };
}
template<typename NullaryOp, typename PlainObjectType> template<typename NullaryOp, typename PlainObjectType>
class CwiseNullaryOp : ei_no_assignment_operator, class CwiseNullaryOp : internal::no_assignment_operator,
public ei_dense_xpr_base< CwiseNullaryOp<NullaryOp, PlainObjectType> >::type public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp, PlainObjectType> >::type
{ {
public: public:
typedef typename ei_dense_xpr_base<CwiseNullaryOp>::type Base; typedef typename internal::dense_xpr_base<CwiseNullaryOp>::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(CwiseNullaryOp) EIGEN_DENSE_PUBLIC_INTERFACE(CwiseNullaryOp)
CwiseNullaryOp(Index rows, Index cols, const NullaryOp& func = NullaryOp()) CwiseNullaryOp(Index rows, Index cols, const NullaryOp& func = NullaryOp())
: m_rows(rows), m_cols(cols), m_functor(func) : m_rows(rows), m_cols(cols), m_functor(func)
{ {
ei_assert(rows >= 0 eigen_assert(rows >= 0
&& (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows) && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
&& cols >= 0 && cols >= 0
&& (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols)); && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols));
@@ -98,8 +102,8 @@ class CwiseNullaryOp : ei_no_assignment_operator,
} }
protected: protected:
const ei_variable_if_dynamic<Index, RowsAtCompileTime> m_rows; const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_rows;
const ei_variable_if_dynamic<Index, ColsAtCompileTime> m_cols; const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_cols;
const NullaryOp m_functor; const NullaryOp m_functor;
}; };
@@ -184,7 +188,7 @@ template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
DenseBase<Derived>::Constant(Index rows, Index cols, const Scalar& value) DenseBase<Derived>::Constant(Index rows, Index cols, const Scalar& value)
{ {
return DenseBase<Derived>::NullaryExpr(rows, cols, ei_scalar_constant_op<Scalar>(value)); return DenseBase<Derived>::NullaryExpr(rows, cols, internal::scalar_constant_op<Scalar>(value));
} }
/** \returns an expression of a constant matrix of value \a value /** \returns an expression of a constant matrix of value \a value
@@ -206,7 +210,7 @@ template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
DenseBase<Derived>::Constant(Index size, const Scalar& value) DenseBase<Derived>::Constant(Index size, const Scalar& value)
{ {
return DenseBase<Derived>::NullaryExpr(size, ei_scalar_constant_op<Scalar>(value)); return DenseBase<Derived>::NullaryExpr(size, internal::scalar_constant_op<Scalar>(value));
} }
/** \returns an expression of a constant matrix of value \a value /** \returns an expression of a constant matrix of value \a value
@@ -223,7 +227,7 @@ EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
DenseBase<Derived>::Constant(const Scalar& value) DenseBase<Derived>::Constant(const Scalar& value)
{ {
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
return DenseBase<Derived>::NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, ei_scalar_constant_op<Scalar>(value)); return DenseBase<Derived>::NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_constant_op<Scalar>(value));
} }
/** /**
@@ -239,14 +243,27 @@ DenseBase<Derived>::Constant(const Scalar& value)
* Example: \include DenseBase_LinSpaced_seq.cpp * Example: \include DenseBase_LinSpaced_seq.cpp
* Output: \verbinclude DenseBase_LinSpaced_seq.out * Output: \verbinclude DenseBase_LinSpaced_seq.out
* *
* \sa setLinSpaced(const Scalar&,const Scalar&,Index), LinSpaced(Scalar,Scalar,Index), CwiseNullaryOp * \sa setLinSpaced(Index,const Scalar&,const Scalar&), LinSpaced(Index,Scalar,Scalar), CwiseNullaryOp
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::SequentialLinSpacedReturnType EIGEN_STRONG_INLINE const typename DenseBase<Derived>::SequentialLinSpacedReturnType
DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& high, Index size) DenseBase<Derived>::LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high)
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return DenseBase<Derived>::NullaryExpr(size, ei_linspaced_op<Scalar,false>(low,high,size)); return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,false>(low,high,size));
}
/**
* \copydoc DenseBase::LinSpaced(Sequential_t, Index, const Scalar&, const Scalar&)
* Special version for fixed size types which does not require the size parameter.
*/
template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::SequentialLinSpacedReturnType
DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& high)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,false>(low,high,Derived::SizeAtCompileTime));
} }
/** /**
@@ -259,14 +276,27 @@ DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& hig
* Example: \include DenseBase_LinSpaced.cpp * Example: \include DenseBase_LinSpaced.cpp
* Output: \verbinclude DenseBase_LinSpaced.out * Output: \verbinclude DenseBase_LinSpaced.out
* *
* \sa setLinSpaced(const Scalar&,const Scalar&,Index), LinSpaced(Sequential_t,const Scalar&,const Scalar&,Index), CwiseNullaryOp * \sa setLinSpaced(Index,const Scalar&,const Scalar&), LinSpaced(Sequential_t,Index,const Scalar&,const Scalar&,Index), CwiseNullaryOp
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
DenseBase<Derived>::LinSpaced(const Scalar& low, const Scalar& high, Index size) DenseBase<Derived>::LinSpaced(Index size, const Scalar& low, const Scalar& high)
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return DenseBase<Derived>::NullaryExpr(size, ei_linspaced_op<Scalar,true>(low,high,size)); return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,true>(low,high,size));
}
/**
* \copydoc DenseBase::LinSpaced(Index, const Scalar&, const Scalar&)
* Special version for fixed size types which does not require the size parameter.
*/
template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
DenseBase<Derived>::LinSpaced(const Scalar& low, const Scalar& high)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,true>(low,high,Derived::SizeAtCompileTime));
} }
/** \returns true if all coefficients in this matrix are approximately equal to \a value, to within precision \a prec */ /** \returns true if all coefficients in this matrix are approximately equal to \a value, to within precision \a prec */
@@ -276,7 +306,7 @@ bool DenseBase<Derived>::isApproxToConstant
{ {
for(Index j = 0; j < cols(); ++j) for(Index j = 0; j < cols(); ++j)
for(Index i = 0; i < rows(); ++i) for(Index i = 0; i < rows(); ++i)
if(!ei_isApprox(this->coeff(i, j), value, prec)) if(!internal::isApprox(this->coeff(i, j), value, prec))
return false; return false;
return true; return true;
} }
@@ -322,7 +352,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setConstant(const Scalar& value
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& EIGEN_STRONG_INLINE Derived&
DenseStorageBase<Derived>::setConstant(Index size, const Scalar& value) PlainObjectBase<Derived>::setConstant(Index size, const Scalar& value)
{ {
resize(size); resize(size);
return setConstant(value); return setConstant(value);
@@ -332,6 +362,7 @@ DenseStorageBase<Derived>::setConstant(Index size, const Scalar& value)
* *
* \param rows the new number of rows * \param rows the new number of rows
* \param cols the new number of columns * \param cols the new number of columns
* \param value the value to which all coefficients are set
* *
* Example: \include Matrix_setConstant_int_int.cpp * Example: \include Matrix_setConstant_int_int.cpp
* Output: \verbinclude Matrix_setConstant_int_int.out * Output: \verbinclude Matrix_setConstant_int_int.out
@@ -340,7 +371,7 @@ DenseStorageBase<Derived>::setConstant(Index size, const Scalar& value)
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& EIGEN_STRONG_INLINE Derived&
DenseStorageBase<Derived>::setConstant(Index rows, Index cols, const Scalar& value) PlainObjectBase<Derived>::setConstant(Index rows, Index cols, const Scalar& value)
{ {
resize(rows, cols); resize(rows, cols);
return setConstant(value); return setConstant(value);
@@ -359,10 +390,10 @@ DenseStorageBase<Derived>::setConstant(Index rows, Index cols, const Scalar& val
* \sa CwiseNullaryOp * \sa CwiseNullaryOp
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(const Scalar& low, const Scalar& high, Index size) EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index size, const Scalar& low, const Scalar& high)
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return derived() = Derived::NullaryExpr(size, ei_linspaced_op<Scalar,false>(low,high,size)); return derived() = Derived::NullaryExpr(size, internal::linspaced_op<Scalar,false>(low,high,size));
} }
// zero: // zero:
@@ -441,7 +472,7 @@ bool DenseBase<Derived>::isZero(RealScalar prec) const
{ {
for(Index j = 0; j < cols(); ++j) for(Index j = 0; j < cols(); ++j)
for(Index i = 0; i < rows(); ++i) for(Index i = 0; i < rows(); ++i)
if(!ei_isMuchSmallerThan(this->coeff(i, j), static_cast<Scalar>(1), prec)) if(!internal::isMuchSmallerThan(this->coeff(i, j), static_cast<Scalar>(1), prec))
return false; return false;
return true; return true;
} }
@@ -470,7 +501,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setZero()
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& EIGEN_STRONG_INLINE Derived&
DenseStorageBase<Derived>::setZero(Index size) PlainObjectBase<Derived>::setZero(Index size)
{ {
resize(size); resize(size);
return setConstant(Scalar(0)); return setConstant(Scalar(0));
@@ -488,7 +519,7 @@ DenseStorageBase<Derived>::setZero(Index size)
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& EIGEN_STRONG_INLINE Derived&
DenseStorageBase<Derived>::setZero(Index rows, Index cols) PlainObjectBase<Derived>::setZero(Index rows, Index cols)
{ {
resize(rows, cols); resize(rows, cols);
return setConstant(Scalar(0)); return setConstant(Scalar(0));
@@ -596,7 +627,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setOnes()
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& EIGEN_STRONG_INLINE Derived&
DenseStorageBase<Derived>::setOnes(Index size) PlainObjectBase<Derived>::setOnes(Index size)
{ {
resize(size); resize(size);
return setConstant(Scalar(1)); return setConstant(Scalar(1));
@@ -614,7 +645,7 @@ DenseStorageBase<Derived>::setOnes(Index size)
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& EIGEN_STRONG_INLINE Derived&
DenseStorageBase<Derived>::setOnes(Index rows, Index cols) PlainObjectBase<Derived>::setOnes(Index rows, Index cols)
{ {
resize(rows, cols); resize(rows, cols);
return setConstant(Scalar(1)); return setConstant(Scalar(1));
@@ -640,7 +671,7 @@ template<typename Derived>
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType
MatrixBase<Derived>::Identity(Index rows, Index cols) MatrixBase<Derived>::Identity(Index rows, Index cols)
{ {
return DenseBase<Derived>::NullaryExpr(rows, cols, ei_scalar_identity_op<Scalar>()); return DenseBase<Derived>::NullaryExpr(rows, cols, internal::scalar_identity_op<Scalar>());
} }
/** \returns an expression of the identity matrix (not necessarily square). /** \returns an expression of the identity matrix (not necessarily square).
@@ -658,7 +689,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType
MatrixBase<Derived>::Identity() MatrixBase<Derived>::Identity()
{ {
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
return MatrixBase<Derived>::NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, ei_scalar_identity_op<Scalar>()); return MatrixBase<Derived>::NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_identity_op<Scalar>());
} }
/** \returns true if *this is approximately equal to the identity matrix /** \returns true if *this is approximately equal to the identity matrix
@@ -680,12 +711,12 @@ bool MatrixBase<Derived>::isIdentity
{ {
if(i == j) if(i == j)
{ {
if(!ei_isApprox(this->coeff(i, j), static_cast<Scalar>(1), prec)) if(!internal::isApprox(this->coeff(i, j), static_cast<Scalar>(1), prec))
return false; return false;
} }
else else
{ {
if(!ei_isMuchSmallerThan(this->coeff(i, j), static_cast<RealScalar>(1), prec)) if(!internal::isMuchSmallerThan(this->coeff(i, j), static_cast<RealScalar>(1), prec))
return false; return false;
} }
} }
@@ -693,8 +724,10 @@ bool MatrixBase<Derived>::isIdentity
return true; return true;
} }
namespace internal {
template<typename Derived, bool Big = (Derived::SizeAtCompileTime>=16)> template<typename Derived, bool Big = (Derived::SizeAtCompileTime>=16)>
struct ei_setIdentity_impl struct setIdentity_impl
{ {
static EIGEN_STRONG_INLINE Derived& run(Derived& m) static EIGEN_STRONG_INLINE Derived& run(Derived& m)
{ {
@@ -703,7 +736,7 @@ struct ei_setIdentity_impl
}; };
template<typename Derived> template<typename Derived>
struct ei_setIdentity_impl<Derived, true> struct setIdentity_impl<Derived, true>
{ {
typedef typename Derived::Index Index; typedef typename Derived::Index Index;
static EIGEN_STRONG_INLINE Derived& run(Derived& m) static EIGEN_STRONG_INLINE Derived& run(Derived& m)
@@ -715,6 +748,8 @@ struct ei_setIdentity_impl<Derived, true>
} }
}; };
} // end namespace internal
/** Writes the identity expression (not necessarily square) into *this. /** Writes the identity expression (not necessarily square) into *this.
* *
* Example: \include MatrixBase_setIdentity.cpp * Example: \include MatrixBase_setIdentity.cpp
@@ -725,7 +760,7 @@ struct ei_setIdentity_impl<Derived, true>
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity() EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity()
{ {
return ei_setIdentity_impl<Derived>::run(derived()); return internal::setIdentity_impl<Derived>::run(derived());
} }
/** \brief Resizes to the given size, and writes the identity expression (not necessarily square) into *this. /** \brief Resizes to the given size, and writes the identity expression (not necessarily square) into *this.

View File

@@ -27,6 +27,7 @@
#define EIGEN_CWISE_UNARY_OP_H #define EIGEN_CWISE_UNARY_OP_H
/** \class CwiseUnaryOp /** \class CwiseUnaryOp
* \ingroup Core_Module
* *
* \brief Generic expression where a coefficient-wise unary operator is applied to an expression * \brief Generic expression where a coefficient-wise unary operator is applied to an expression
* *
@@ -44,33 +45,36 @@
* *
* \sa MatrixBase::unaryExpr(const CustomUnaryOp &) const, class CwiseBinaryOp, class CwiseNullaryOp * \sa MatrixBase::unaryExpr(const CustomUnaryOp &) const, class CwiseBinaryOp, class CwiseNullaryOp
*/ */
namespace internal {
template<typename UnaryOp, typename XprType> template<typename UnaryOp, typename XprType>
struct ei_traits<CwiseUnaryOp<UnaryOp, XprType> > struct traits<CwiseUnaryOp<UnaryOp, XprType> >
: ei_traits<XprType> : traits<XprType>
{ {
typedef typename ei_result_of< typedef typename result_of<
UnaryOp(typename XprType::Scalar) UnaryOp(typename XprType::Scalar)
>::type Scalar; >::type Scalar;
typedef typename XprType::Nested XprTypeNested; typedef typename XprType::Nested XprTypeNested;
typedef typename ei_unref<XprTypeNested>::type _XprTypeNested; typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
enum { enum {
Flags = _XprTypeNested::Flags & ( Flags = _XprTypeNested::Flags & (
HereditaryBits | LinearAccessBit | AlignedBit HereditaryBits | LinearAccessBit | AlignedBit
| (ei_functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0)), | (functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0)),
CoeffReadCost = _XprTypeNested::CoeffReadCost + ei_functor_traits<UnaryOp>::Cost CoeffReadCost = _XprTypeNested::CoeffReadCost + functor_traits<UnaryOp>::Cost
}; };
}; };
}
template<typename UnaryOp, typename XprType, typename StorageKind> template<typename UnaryOp, typename XprType, typename StorageKind>
class CwiseUnaryOpImpl; class CwiseUnaryOpImpl;
template<typename UnaryOp, typename XprType> template<typename UnaryOp, typename XprType>
class CwiseUnaryOp : ei_no_assignment_operator, class CwiseUnaryOp : internal::no_assignment_operator,
public CwiseUnaryOpImpl<UnaryOp, XprType, typename ei_traits<XprType>::StorageKind> public CwiseUnaryOpImpl<UnaryOp, XprType, typename internal::traits<XprType>::StorageKind>
{ {
public: public:
typedef typename CwiseUnaryOpImpl<UnaryOp, XprType,typename ei_traits<XprType>::StorageKind>::Base Base; typedef typename CwiseUnaryOpImpl<UnaryOp, XprType,typename internal::traits<XprType>::StorageKind>::Base Base;
EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryOp) EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryOp)
inline CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp()) inline CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp())
@@ -83,11 +87,11 @@ class CwiseUnaryOp : ei_no_assignment_operator,
const UnaryOp& functor() const { return m_functor; } const UnaryOp& functor() const { return m_functor; }
/** \returns the nested expression */ /** \returns the nested expression */
const typename ei_cleantype<typename XprType::Nested>::type& const typename internal::remove_all<typename XprType::Nested>::type&
nestedExpression() const { return m_xpr; } nestedExpression() const { return m_xpr; }
/** \returns the nested expression */ /** \returns the nested expression */
typename ei_cleantype<typename XprType::Nested>::type& typename internal::remove_all<typename XprType::Nested>::type&
nestedExpression() { return m_xpr.const_cast_derived(); } nestedExpression() { return m_xpr.const_cast_derived(); }
protected: protected:
@@ -99,12 +103,12 @@ class CwiseUnaryOp : ei_no_assignment_operator,
// It can be used for any expression types implementing the dense concept. // It can be used for any expression types implementing the dense concept.
template<typename UnaryOp, typename XprType> template<typename UnaryOp, typename XprType>
class CwiseUnaryOpImpl<UnaryOp,XprType,Dense> class CwiseUnaryOpImpl<UnaryOp,XprType,Dense>
: public ei_dense_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type : public internal::dense_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type
{ {
public: public:
typedef CwiseUnaryOp<UnaryOp, XprType> Derived; typedef CwiseUnaryOp<UnaryOp, XprType> Derived;
typedef typename ei_dense_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type Base; typedef typename internal::dense_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(Derived) EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const

View File

@@ -26,6 +26,7 @@
#define EIGEN_CWISE_UNARY_VIEW_H #define EIGEN_CWISE_UNARY_VIEW_H
/** \class CwiseUnaryView /** \class CwiseUnaryView
* \ingroup Core_Module
* *
* \brief Generic lvalue expression of a coefficient-wise unary operator of a matrix or a vector * \brief Generic lvalue expression of a coefficient-wise unary operator of a matrix or a vector
* *
@@ -37,39 +38,42 @@
* *
* \sa MatrixBase::unaryViewExpr(const CustomUnaryOp &) const, class CwiseUnaryOp * \sa MatrixBase::unaryViewExpr(const CustomUnaryOp &) const, class CwiseUnaryOp
*/ */
namespace internal {
template<typename ViewOp, typename MatrixType> template<typename ViewOp, typename MatrixType>
struct ei_traits<CwiseUnaryView<ViewOp, MatrixType> > struct traits<CwiseUnaryView<ViewOp, MatrixType> >
: ei_traits<MatrixType> : traits<MatrixType>
{ {
typedef typename ei_result_of< typedef typename result_of<
ViewOp(typename ei_traits<MatrixType>::Scalar) ViewOp(typename traits<MatrixType>::Scalar)
>::type Scalar; >::type Scalar;
typedef typename MatrixType::Nested MatrixTypeNested; typedef typename MatrixType::Nested MatrixTypeNested;
typedef typename ei_cleantype<MatrixTypeNested>::type _MatrixTypeNested; typedef typename remove_all<MatrixTypeNested>::type _MatrixTypeNested;
enum { enum {
Flags = (ei_traits<_MatrixTypeNested>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit)), Flags = (traits<_MatrixTypeNested>::Flags & (HereditaryBits | LvalueBit | LinearAccessBit | DirectAccessBit)),
CoeffReadCost = ei_traits<_MatrixTypeNested>::CoeffReadCost + ei_functor_traits<ViewOp>::Cost, CoeffReadCost = traits<_MatrixTypeNested>::CoeffReadCost + functor_traits<ViewOp>::Cost,
MatrixTypeInnerStride = ei_inner_stride_at_compile_time<MatrixType>::ret, MatrixTypeInnerStride = inner_stride_at_compile_time<MatrixType>::ret,
// need to cast the sizeof's from size_t to int explicitly, otherwise: // need to cast the sizeof's from size_t to int explicitly, otherwise:
// "error: no integral type can represent all of the enumerator values // "error: no integral type can represent all of the enumerator values
InnerStrideAtCompileTime = MatrixTypeInnerStride == Dynamic InnerStrideAtCompileTime = MatrixTypeInnerStride == Dynamic
? int(Dynamic) ? int(Dynamic)
: int(MatrixTypeInnerStride) : int(MatrixTypeInnerStride)
* int(sizeof(typename ei_traits<MatrixType>::Scalar) / sizeof(Scalar)), * int(sizeof(typename traits<MatrixType>::Scalar) / sizeof(Scalar)),
OuterStrideAtCompileTime = ei_outer_stride_at_compile_time<MatrixType>::ret OuterStrideAtCompileTime = outer_stride_at_compile_time<MatrixType>::ret
}; };
}; };
}
template<typename ViewOp, typename MatrixType, typename StorageKind> template<typename ViewOp, typename MatrixType, typename StorageKind>
class CwiseUnaryViewImpl; class CwiseUnaryViewImpl;
template<typename ViewOp, typename MatrixType> template<typename ViewOp, typename MatrixType>
class CwiseUnaryView : ei_no_assignment_operator, class CwiseUnaryView : internal::no_assignment_operator,
public CwiseUnaryViewImpl<ViewOp, MatrixType, typename ei_traits<MatrixType>::StorageKind> public CwiseUnaryViewImpl<ViewOp, MatrixType, typename internal::traits<MatrixType>::StorageKind>
{ {
public: public:
typedef typename CwiseUnaryViewImpl<ViewOp, MatrixType,typename ei_traits<MatrixType>::StorageKind>::Base Base; typedef typename CwiseUnaryViewImpl<ViewOp, MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base;
EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryView) EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryView)
inline CwiseUnaryView(const MatrixType& mat, const ViewOp& func = ViewOp()) inline CwiseUnaryView(const MatrixType& mat, const ViewOp& func = ViewOp())
@@ -84,33 +88,33 @@ class CwiseUnaryView : ei_no_assignment_operator,
const ViewOp& functor() const { return m_functor; } const ViewOp& functor() const { return m_functor; }
/** \returns the nested expression */ /** \returns the nested expression */
const typename ei_cleantype<typename MatrixType::Nested>::type& const typename internal::remove_all<typename MatrixType::Nested>::type&
nestedExpression() const { return m_matrix; } nestedExpression() const { return m_matrix; }
/** \returns the nested expression */ /** \returns the nested expression */
typename ei_cleantype<typename MatrixType::Nested>::type& typename internal::remove_all<typename MatrixType::Nested>::type&
nestedExpression() { return m_matrix.const_cast_derived(); } nestedExpression() { return m_matrix.const_cast_derived(); }
protected: protected:
// FIXME changed from MatrixType::Nested because of a weird compilation error with sun CC // FIXME changed from MatrixType::Nested because of a weird compilation error with sun CC
const typename ei_nested<MatrixType>::type m_matrix; const typename internal::nested<MatrixType>::type m_matrix;
ViewOp m_functor; ViewOp m_functor;
}; };
template<typename ViewOp, typename MatrixType> template<typename ViewOp, typename MatrixType>
class CwiseUnaryViewImpl<ViewOp,MatrixType,Dense> class CwiseUnaryViewImpl<ViewOp,MatrixType,Dense>
: public ei_dense_xpr_base< CwiseUnaryView<ViewOp, MatrixType> >::type : public internal::dense_xpr_base< CwiseUnaryView<ViewOp, MatrixType> >::type
{ {
public: public:
typedef CwiseUnaryView<ViewOp, MatrixType> Derived; typedef CwiseUnaryView<ViewOp, MatrixType> Derived;
typedef typename ei_dense_xpr_base< CwiseUnaryView<ViewOp, MatrixType> >::type Base; typedef typename internal::dense_xpr_base< CwiseUnaryView<ViewOp, MatrixType> >::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(Derived) EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
inline Index innerStride() const inline Index innerStride() const
{ {
return derived().nestedExpression().innerStride() * sizeof(typename ei_traits<MatrixType>::Scalar) / sizeof(Scalar); return derived().nestedExpression().innerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) / sizeof(Scalar);
} }
inline Index outerStride() const inline Index outerStride() const

View File

@@ -27,33 +27,44 @@
#define EIGEN_DENSEBASE_H #define EIGEN_DENSEBASE_H
/** \class DenseBase /** \class DenseBase
* \ingroup Core_Module
* *
* \brief Base class for all dense matrices, vectors, and arrays * \brief Base class for all dense matrices, vectors, and arrays
* *
* This class is the base that is inherited by all dense objects (matrix, vector, arrays, * This class is the base that is inherited by all dense objects (matrix, vector, arrays,
* and related expression types). The common Eigen API for dense objects is contained in this class. * and related expression types). The common Eigen API for dense objects is contained in this class.
* *
* \param Derived is the derived type, e.g., a matrix type or an expression. * \tparam Derived is the derived type, e.g., a matrix type or an expression.
*
* This class can be extended with the help of the plugin mechanism described on the page
* \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_DENSEBASE_PLUGIN.
*
* \sa \ref TopicClassHierarchy
*/ */
template<typename Derived> class DenseBase template<typename Derived> class DenseBase
#ifndef EIGEN_PARSED_BY_DOXYGEN #ifndef EIGEN_PARSED_BY_DOXYGEN
: public ei_special_scalar_op_base<Derived,typename ei_traits<Derived>::Scalar, : public internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar,
typename NumTraits<typename ei_traits<Derived>::Scalar>::Real> typename NumTraits<typename internal::traits<Derived>::Scalar>::Real>
#else #else
: public DenseCoeffsBase<Derived> : public DenseCoeffsBase<Derived>
#endif // not EIGEN_PARSED_BY_DOXYGEN #endif // not EIGEN_PARSED_BY_DOXYGEN
{ {
public: public:
#ifndef EIGEN_PARSED_BY_DOXYGEN using internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar,
using ei_special_scalar_op_base<Derived,typename ei_traits<Derived>::Scalar, typename NumTraits<typename internal::traits<Derived>::Scalar>::Real>::operator*;
typename NumTraits<typename ei_traits<Derived>::Scalar>::Real>::operator*;
class InnerIterator; class InnerIterator;
typedef typename ei_traits<Derived>::StorageKind StorageKind; typedef typename internal::traits<Derived>::StorageKind StorageKind;
typedef typename ei_traits<Derived>::Index Index;
typedef typename ei_traits<Derived>::Scalar Scalar; /** \brief The type of indices
typedef typename ei_packet_traits<Scalar>::type PacketScalar; * \details To change this, \c \#define the preprocessor symbol \c EIGEN_DEFAULT_DENSE_INDEX_TYPE.
* \sa \ref TopicPreprocessorDirectives.
*/
typedef typename internal::traits<Derived>::Index Index;
typedef typename internal::traits<Derived>::Scalar Scalar;
typedef typename internal::packet_traits<Scalar>::type PacketScalar;
typedef typename NumTraits<Scalar>::Real RealScalar; typedef typename NumTraits<Scalar>::Real RealScalar;
typedef DenseCoeffsBase<Derived> Base; typedef DenseCoeffsBase<Derived> Base;
@@ -87,32 +98,30 @@ template<typename Derived> class DenseBase
using Base::outerStride; using Base::outerStride;
using Base::rowStride; using Base::rowStride;
using Base::colStride; using Base::colStride;
using typename Base::CoeffReturnType; typedef typename Base::CoeffReturnType CoeffReturnType;
#endif // not EIGEN_PARSED_BY_DOXYGEN
enum { enum {
RowsAtCompileTime = ei_traits<Derived>::RowsAtCompileTime, RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,
/**< The number of rows at compile-time. This is just a copy of the value provided /**< The number of rows at compile-time. This is just a copy of the value provided
* by the \a Derived type. If a value is not known at compile-time, * by the \a Derived type. If a value is not known at compile-time,
* it is set to the \a Dynamic constant. * it is set to the \a Dynamic constant.
* \sa MatrixBase::rows(), MatrixBase::cols(), ColsAtCompileTime, SizeAtCompileTime */ * \sa MatrixBase::rows(), MatrixBase::cols(), ColsAtCompileTime, SizeAtCompileTime */
ColsAtCompileTime = ei_traits<Derived>::ColsAtCompileTime, ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
/**< The number of columns at compile-time. This is just a copy of the value provided /**< The number of columns at compile-time. This is just a copy of the value provided
* by the \a Derived type. If a value is not known at compile-time, * by the \a Derived type. If a value is not known at compile-time,
* it is set to the \a Dynamic constant. * it is set to the \a Dynamic constant.
* \sa MatrixBase::rows(), MatrixBase::cols(), RowsAtCompileTime, SizeAtCompileTime */ * \sa MatrixBase::rows(), MatrixBase::cols(), RowsAtCompileTime, SizeAtCompileTime */
SizeAtCompileTime = (ei_size_at_compile_time<ei_traits<Derived>::RowsAtCompileTime, SizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::RowsAtCompileTime,
ei_traits<Derived>::ColsAtCompileTime>::ret), internal::traits<Derived>::ColsAtCompileTime>::ret),
/**< This is equal to the number of coefficients, i.e. the number of /**< This is equal to the number of coefficients, i.e. the number of
* rows times the number of columns, or to \a Dynamic if this is not * rows times the number of columns, or to \a Dynamic if this is not
* known at compile-time. \sa RowsAtCompileTime, ColsAtCompileTime */ * known at compile-time. \sa RowsAtCompileTime, ColsAtCompileTime */
MaxRowsAtCompileTime = ei_traits<Derived>::MaxRowsAtCompileTime, MaxRowsAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime,
/**< This value is equal to the maximum possible number of rows that this expression /**< This value is equal to the maximum possible number of rows that this expression
* might have. If this expression might have an arbitrarily high number of rows, * might have. If this expression might have an arbitrarily high number of rows,
* this value is set to \a Dynamic. * this value is set to \a Dynamic.
@@ -123,7 +132,7 @@ template<typename Derived> class DenseBase
* \sa RowsAtCompileTime, MaxColsAtCompileTime, MaxSizeAtCompileTime * \sa RowsAtCompileTime, MaxColsAtCompileTime, MaxSizeAtCompileTime
*/ */
MaxColsAtCompileTime = ei_traits<Derived>::MaxColsAtCompileTime, MaxColsAtCompileTime = internal::traits<Derived>::MaxColsAtCompileTime,
/**< This value is equal to the maximum possible number of columns that this expression /**< This value is equal to the maximum possible number of columns that this expression
* might have. If this expression might have an arbitrarily high number of columns, * might have. If this expression might have an arbitrarily high number of columns,
* this value is set to \a Dynamic. * this value is set to \a Dynamic.
@@ -134,8 +143,8 @@ template<typename Derived> class DenseBase
* \sa ColsAtCompileTime, MaxRowsAtCompileTime, MaxSizeAtCompileTime * \sa ColsAtCompileTime, MaxRowsAtCompileTime, MaxSizeAtCompileTime
*/ */
MaxSizeAtCompileTime = (ei_size_at_compile_time<ei_traits<Derived>::MaxRowsAtCompileTime, MaxSizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::MaxRowsAtCompileTime,
ei_traits<Derived>::MaxColsAtCompileTime>::ret), internal::traits<Derived>::MaxColsAtCompileTime>::ret),
/**< This value is equal to the maximum possible number of coefficients that this expression /**< This value is equal to the maximum possible number of coefficients that this expression
* might have. If this expression might have an arbitrarily high number of coefficients, * might have. If this expression might have an arbitrarily high number of coefficients,
* this value is set to \a Dynamic. * this value is set to \a Dynamic.
@@ -146,14 +155,14 @@ template<typename Derived> class DenseBase
* \sa SizeAtCompileTime, MaxRowsAtCompileTime, MaxColsAtCompileTime * \sa SizeAtCompileTime, MaxRowsAtCompileTime, MaxColsAtCompileTime
*/ */
IsVectorAtCompileTime = ei_traits<Derived>::MaxRowsAtCompileTime == 1 IsVectorAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime == 1
|| ei_traits<Derived>::MaxColsAtCompileTime == 1, || internal::traits<Derived>::MaxColsAtCompileTime == 1,
/**< This is set to true if either the number of rows or the number of /**< This is set to true if either the number of rows or the number of
* columns is known at compile-time to be equal to 1. Indeed, in that case, * columns is known at compile-time to be equal to 1. Indeed, in that case,
* we are dealing with a column-vector (if there is only one column) or with * we are dealing with a column-vector (if there is only one column) or with
* a row-vector (if there is only one row). */ * a row-vector (if there is only one row). */
Flags = ei_traits<Derived>::Flags, Flags = internal::traits<Derived>::Flags,
/**< This stores expression \ref flags flags which may or may not be inherited by new expressions /**< This stores expression \ref flags flags which may or may not be inherited by new expressions
* constructed from this one. See the \ref flags "list of flags". * constructed from this one. See the \ref flags "list of flags".
*/ */
@@ -163,15 +172,17 @@ template<typename Derived> class DenseBase
InnerSizeAtCompileTime = int(IsVectorAtCompileTime) ? SizeAtCompileTime InnerSizeAtCompileTime = int(IsVectorAtCompileTime) ? SizeAtCompileTime
: int(IsRowMajor) ? ColsAtCompileTime : RowsAtCompileTime, : int(IsRowMajor) ? ColsAtCompileTime : RowsAtCompileTime,
CoeffReadCost = ei_traits<Derived>::CoeffReadCost, CoeffReadCost = internal::traits<Derived>::CoeffReadCost,
/**< This is a rough measure of how expensive it is to read one coefficient from /**< This is a rough measure of how expensive it is to read one coefficient from
* this expression. * this expression.
*/ */
InnerStrideAtCompileTime = ei_inner_stride_at_compile_time<Derived>::ret, InnerStrideAtCompileTime = internal::inner_stride_at_compile_time<Derived>::ret,
OuterStrideAtCompileTime = ei_outer_stride_at_compile_time<Derived>::ret OuterStrideAtCompileTime = internal::outer_stride_at_compile_time<Derived>::ret
}; };
enum { ThisConstantIsPrivateInPlainObjectBase };
/** \returns the number of nonzero coefficients which is in practice the number /** \returns the number of nonzero coefficients which is in practice the number
* of stored coefficients. */ * of stored coefficients. */
inline Index nonZeros() const { return size(); } inline Index nonZeros() const { return size(); }
@@ -183,8 +194,8 @@ template<typename Derived> class DenseBase
/** \returns the outer size. /** \returns the outer size.
* *
* \note For a vector, this returns just 1. For a matrix (non-vector), this is the major dimension * \note For a vector, this returns just 1. For a matrix (non-vector), this is the major dimension
* with respect to the storage order, i.e., the number of columns for a column-major matrix, * with respect to the \ref TopicStorageOrders "storage order", i.e., the number of columns for a
* and the number of rows for a row-major matrix. */ * column-major matrix, and the number of rows for a row-major matrix. */
Index outerSize() const Index outerSize() const
{ {
return IsVectorAtCompileTime ? 1 return IsVectorAtCompileTime ? 1
@@ -194,8 +205,8 @@ template<typename Derived> class DenseBase
/** \returns the inner size. /** \returns the inner size.
* *
* \note For a vector, this is just the size. For a matrix (non-vector), this is the minor dimension * \note For a vector, this is just the size. For a matrix (non-vector), this is the minor dimension
* with respect to the storage order, i.e., the number of rows for a column-major matrix, * with respect to the \ref TopicStorageOrders "storage order", i.e., the number of rows for a
* and the number of columns for a row-major matrix. */ * column-major matrix, and the number of columns for a row-major matrix. */
Index innerSize() const Index innerSize() const
{ {
return IsVectorAtCompileTime ? this->size() return IsVectorAtCompileTime ? this->size()
@@ -209,7 +220,7 @@ template<typename Derived> class DenseBase
void resize(Index size) void resize(Index size)
{ {
EIGEN_ONLY_USED_FOR_DEBUG(size); EIGEN_ONLY_USED_FOR_DEBUG(size);
ei_assert(size == this->size() eigen_assert(size == this->size()
&& "DenseBase::resize() does not actually allow to resize."); && "DenseBase::resize() does not actually allow to resize.");
} }
/** Only plain matrices/arrays, not expressions, may be resized; therefore the only useful resize methods are /** Only plain matrices/arrays, not expressions, may be resized; therefore the only useful resize methods are
@@ -220,33 +231,20 @@ template<typename Derived> class DenseBase
{ {
EIGEN_ONLY_USED_FOR_DEBUG(rows); EIGEN_ONLY_USED_FOR_DEBUG(rows);
EIGEN_ONLY_USED_FOR_DEBUG(cols); EIGEN_ONLY_USED_FOR_DEBUG(cols);
ei_assert(rows == this->rows() && cols == this->cols() eigen_assert(rows == this->rows() && cols == this->cols()
&& "DenseBase::resize() does not actually allow to resize."); && "DenseBase::resize() does not actually allow to resize.");
} }
#ifndef EIGEN_PARSED_BY_DOXYGEN #ifndef EIGEN_PARSED_BY_DOXYGEN
/** \internal Represents a matrix with all coefficients equal to one another*/ /** \internal Represents a matrix with all coefficients equal to one another*/
typedef CwiseNullaryOp<ei_scalar_constant_op<Scalar>,Derived> ConstantReturnType; typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,Derived> ConstantReturnType;
/** \internal Represents a vector with linearly spaced coefficients that allows sequential access only. */ /** \internal Represents a vector with linearly spaced coefficients that allows sequential access only. */
typedef CwiseNullaryOp<ei_linspaced_op<Scalar,false>,Derived> SequentialLinSpacedReturnType; typedef CwiseNullaryOp<internal::linspaced_op<Scalar,false>,Derived> SequentialLinSpacedReturnType;
/** \internal Represents a vector with linearly spaced coefficients that allows random access. */ /** \internal Represents a vector with linearly spaced coefficients that allows random access. */
typedef CwiseNullaryOp<ei_linspaced_op<Scalar,true>,Derived> RandomAccessLinSpacedReturnType; typedef CwiseNullaryOp<internal::linspaced_op<Scalar,true>,Derived> RandomAccessLinSpacedReturnType;
/** \internal the return type of MatrixBase::eigenvalues() */ /** \internal the return type of MatrixBase::eigenvalues() */
typedef Matrix<typename NumTraits<typename ei_traits<Derived>::Scalar>::Real, ei_traits<Derived>::ColsAtCompileTime, 1> EigenvaluesReturnType; typedef Matrix<typename NumTraits<typename internal::traits<Derived>::Scalar>::Real, internal::traits<Derived>::ColsAtCompileTime, 1> EigenvaluesReturnType;
/** \internal expression type of a column */
typedef Block<Derived, ei_traits<Derived>::RowsAtCompileTime, 1> ColXpr;
/** \internal expression type of a row */
typedef Block<Derived, 1, ei_traits<Derived>::ColsAtCompileTime> RowXpr;
/** \internal expression type of a block of whole columns */
typedef Block<Derived, ei_traits<Derived>::RowsAtCompileTime, Dynamic> ColsBlockXpr;
/** \internal expression type of a block of whole rows */
typedef Block<Derived, Dynamic, ei_traits<Derived>::ColsAtCompileTime> RowsBlockXpr;
/** \internal expression type of a block of whole columns */
template<int N> struct NColsBlockXpr { typedef Block<Derived, ei_traits<Derived>::RowsAtCompileTime, N> Type; };
/** \internal expression type of a block of whole rows */
template<int N> struct NRowsBlockXpr { typedef Block<Derived, N, ei_traits<Derived>::ColsAtCompileTime> Type; };
#endif // not EIGEN_PARSED_BY_DOXYGEN #endif // not EIGEN_PARSED_BY_DOXYGEN
@@ -286,7 +284,8 @@ template<typename Derived> class DenseBase
CommaInitializer<Derived> operator<< (const DenseBase<OtherDerived>& other); CommaInitializer<Derived> operator<< (const DenseBase<OtherDerived>& other);
Eigen::Transpose<Derived> transpose(); Eigen::Transpose<Derived> transpose();
const Eigen::Transpose<Derived> transpose() const; typedef const Transpose<const Derived> ConstTransposeReturnType;
ConstTransposeReturnType transpose() const;
void transposeInPlace(); void transposeInPlace();
#ifndef EIGEN_NO_DEBUG #ifndef EIGEN_NO_DEBUG
protected: protected:
@@ -295,91 +294,29 @@ template<typename Derived> class DenseBase
public: public:
#endif #endif
RowXpr row(Index i); typedef VectorBlock<Derived> SegmentReturnType;
const RowXpr row(Index i) const; typedef const VectorBlock<const Derived> ConstSegmentReturnType;
template<int Size> struct FixedSegmentReturnType { typedef VectorBlock<Derived, Size> Type; };
template<int Size> struct ConstFixedSegmentReturnType { typedef const VectorBlock<const Derived, Size> Type; };
// Note: The "DenseBase::" prefixes are added to help MSVC9 to match these declarations with the later implementations.
SegmentReturnType segment(Index start, Index size);
typename DenseBase::ConstSegmentReturnType segment(Index start, Index size) const;
ColXpr col(Index i); SegmentReturnType head(Index size);
const ColXpr col(Index i) const; typename DenseBase::ConstSegmentReturnType head(Index size) const;
Block<Derived> block(Index startRow, Index startCol, Index blockRows, Index blockCols); SegmentReturnType tail(Index size);
const Block<Derived> block(Index startRow, Index startCol, Index blockRows, Index blockCols) const; typename DenseBase::ConstSegmentReturnType tail(Index size) const;
VectorBlock<Derived> segment(Index start, Index size); template<int Size> typename FixedSegmentReturnType<Size>::Type head();
const VectorBlock<Derived> segment(Index start, Index size) const; template<int Size> typename ConstFixedSegmentReturnType<Size>::Type head() const;
VectorBlock<Derived> head(Index size); template<int Size> typename FixedSegmentReturnType<Size>::Type tail();
const VectorBlock<Derived> head(Index size) const; template<int Size> typename ConstFixedSegmentReturnType<Size>::Type tail() const;
VectorBlock<Derived> tail(Index size); template<int Size> typename FixedSegmentReturnType<Size>::Type segment(Index start);
const VectorBlock<Derived> tail(Index size) const; template<int Size> typename ConstFixedSegmentReturnType<Size>::Type segment(Index start) const;
Block<Derived> topLeftCorner(Index cRows, Index cCols);
const Block<Derived> topLeftCorner(Index cRows, Index cCols) const;
Block<Derived> topRightCorner(Index cRows, Index cCols);
const Block<Derived> topRightCorner(Index cRows, Index cCols) const;
Block<Derived> bottomLeftCorner(Index cRows, Index cCols);
const Block<Derived> bottomLeftCorner(Index cRows, Index cCols) const;
Block<Derived> bottomRightCorner(Index cRows, Index cCols);
const Block<Derived> bottomRightCorner(Index cRows, Index cCols) const;
RowsBlockXpr topRows(Index n);
const RowsBlockXpr topRows(Index n) const;
RowsBlockXpr bottomRows(Index n);
const RowsBlockXpr bottomRows(Index n) const;
ColsBlockXpr leftCols(Index n);
const ColsBlockXpr leftCols(Index n) const;
ColsBlockXpr rightCols(Index n);
const ColsBlockXpr rightCols(Index n) const;
template<int CRows, int CCols> Block<Derived, CRows, CCols> topLeftCorner();
template<int CRows, int CCols> const Block<Derived, CRows, CCols> topLeftCorner() const;
template<int CRows, int CCols> Block<Derived, CRows, CCols> topRightCorner();
template<int CRows, int CCols> const Block<Derived, CRows, CCols> topRightCorner() const;
template<int CRows, int CCols> Block<Derived, CRows, CCols> bottomLeftCorner();
template<int CRows, int CCols> const Block<Derived, CRows, CCols> bottomLeftCorner() const;
template<int CRows, int CCols> Block<Derived, CRows, CCols> bottomRightCorner();
template<int CRows, int CCols> const Block<Derived, CRows, CCols> bottomRightCorner() const;
template<int NRows> typename NRowsBlockXpr<NRows>::Type topRows();
template<int NRows> const typename NRowsBlockXpr<NRows>::Type topRows() const;
template<int NRows> typename NRowsBlockXpr<NRows>::Type bottomRows();
template<int NRows> const typename NRowsBlockXpr<NRows>::Type bottomRows() const;
template<int NCols> typename NColsBlockXpr<NCols>::Type leftCols();
template<int NCols> const typename NColsBlockXpr<NCols>::Type leftCols() const;
template<int NCols> typename NColsBlockXpr<NCols>::Type rightCols();
template<int NCols> const typename NColsBlockXpr<NCols>::Type rightCols() const;
template<int BlockRows, int BlockCols>
Block<Derived, BlockRows, BlockCols> block(Index startRow, Index startCol);
template<int BlockRows, int BlockCols>
const Block<Derived, BlockRows, BlockCols> block(Index startRow, Index startCol) const;
template<int Size> VectorBlock<Derived,Size> head(void);
template<int Size> const VectorBlock<Derived,Size> head() const;
template<int Size> VectorBlock<Derived,Size> tail();
template<int Size> const VectorBlock<Derived,Size> tail() const;
template<int Size> VectorBlock<Derived,Size> segment(Index start);
template<int Size> const VectorBlock<Derived,Size> segment(Index start) const;
Diagonal<Derived,0> diagonal();
const Diagonal<Derived,0> diagonal() const;
template<int Index> Diagonal<Derived,Index> diagonal();
template<int Index> const Diagonal<Derived,Index> diagonal() const;
Diagonal<Derived, Dynamic> diagonal(Index index);
const Diagonal<Derived, Dynamic> diagonal(Index index) const;
template<unsigned int Mode> TriangularView<Derived, Mode> part();
template<unsigned int Mode> const TriangularView<Derived, Mode> part() const;
template<unsigned int Mode> TriangularView<Derived, Mode> triangularView();
template<unsigned int Mode> const TriangularView<Derived, Mode> triangularView() const;
template<unsigned int UpLo> SelfAdjointView<Derived, UpLo> selfadjointView();
template<unsigned int UpLo> const SelfAdjointView<Derived, UpLo> selfadjointView() const;
static const ConstantReturnType static const ConstantReturnType
Constant(Index rows, Index cols, const Scalar& value); Constant(Index rows, Index cols, const Scalar& value);
@@ -389,9 +326,13 @@ template<typename Derived> class DenseBase
Constant(const Scalar& value); Constant(const Scalar& value);
static const SequentialLinSpacedReturnType static const SequentialLinSpacedReturnType
LinSpaced(Sequential_t, const Scalar& low, const Scalar& high, Index size); LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high);
static const RandomAccessLinSpacedReturnType static const RandomAccessLinSpacedReturnType
LinSpaced(const Scalar& low, const Scalar& high, Index size); LinSpaced(Index size, const Scalar& low, const Scalar& high);
static const SequentialLinSpacedReturnType
LinSpaced(Sequential_t, const Scalar& low, const Scalar& high);
static const RandomAccessLinSpacedReturnType
LinSpaced(const Scalar& low, const Scalar& high);
template<typename CustomNullaryOp> template<typename CustomNullaryOp>
static const CwiseNullaryOp<CustomNullaryOp, Derived> static const CwiseNullaryOp<CustomNullaryOp, Derived>
@@ -412,7 +353,8 @@ template<typename Derived> class DenseBase
void fill(const Scalar& value); void fill(const Scalar& value);
Derived& setConstant(const Scalar& value); Derived& setConstant(const Scalar& value);
Derived& setLinSpaced(const Scalar& low, const Scalar& high, Index size); Derived& setLinSpaced(Index size, const Scalar& low, const Scalar& high);
Derived& setLinSpaced(const Scalar& low, const Scalar& high);
Derived& setZero(); Derived& setZero();
Derived& setOnes(); Derived& setOnes();
Derived& setRandom(); Derived& setRandom();
@@ -439,22 +381,39 @@ template<typename Derived> class DenseBase
* Notice that in the case of a plain matrix or vector (not an expression) this function just returns * Notice that in the case of a plain matrix or vector (not an expression) this function just returns
* a const reference, in order to avoid a useless copy. * a const reference, in order to avoid a useless copy.
*/ */
EIGEN_STRONG_INLINE const typename ei_eval<Derived>::type eval() const EIGEN_STRONG_INLINE const typename internal::eval<Derived>::type eval() const
{ {
// Even though MSVC does not honor strong inlining when the return type // Even though MSVC does not honor strong inlining when the return type
// is a dynamic matrix, we desperately need strong inlining for fixed // is a dynamic matrix, we desperately need strong inlining for fixed
// size types on MSVC. // size types on MSVC.
return typename ei_eval<Derived>::type(derived()); return typename internal::eval<Derived>::type(derived());
} }
/** swaps *this with the expression \a other.
*
*/
template<typename OtherDerived> template<typename OtherDerived>
void swap(DenseBase<OtherDerived> EIGEN_REF_TO_TEMPORARY other); void swap(const DenseBase<OtherDerived>& other,
int = OtherDerived::ThisConstantIsPrivateInPlainObjectBase)
{
SwapWrapper<Derived>(derived()).lazyAssign(other.derived());
}
/** swaps *this with the matrix or array \a other.
*
*/
template<typename OtherDerived>
void swap(PlainObjectBase<OtherDerived>& other)
{
SwapWrapper<Derived>(derived()).lazyAssign(other.derived());
}
inline const NestByValue<Derived> nestByValue() const; inline const NestByValue<Derived> nestByValue() const;
inline const ForceAlignedAccess<Derived> forceAlignedAccess() const; inline const ForceAlignedAccess<Derived> forceAlignedAccess() const;
inline ForceAlignedAccess<Derived> forceAlignedAccess(); inline ForceAlignedAccess<Derived> forceAlignedAccess();
template<bool Enable> inline const typename ei_meta_if<Enable,ForceAlignedAccess<Derived>,Derived&>::ret forceAlignedAccessIf() const; template<bool Enable> inline const typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf() const;
template<bool Enable> inline typename ei_meta_if<Enable,ForceAlignedAccess<Derived>,Derived&>::ret forceAlignedAccessIf(); template<bool Enable> inline typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf();
Scalar sum() const; Scalar sum() const;
Scalar mean() const; Scalar mean() const;
@@ -462,17 +421,20 @@ template<typename Derived> class DenseBase
Scalar prod() const; Scalar prod() const;
typename ei_traits<Derived>::Scalar minCoeff() const; typename internal::traits<Derived>::Scalar minCoeff() const;
typename ei_traits<Derived>::Scalar maxCoeff() const; typename internal::traits<Derived>::Scalar maxCoeff() const;
typename ei_traits<Derived>::Scalar minCoeff(Index* row, Index* col) const; template<typename IndexType>
typename ei_traits<Derived>::Scalar maxCoeff(Index* row, Index* col) const; typename internal::traits<Derived>::Scalar minCoeff(IndexType* row, IndexType* col) const;
template<typename IndexType>
typename ei_traits<Derived>::Scalar minCoeff(Index* index) const; typename internal::traits<Derived>::Scalar maxCoeff(IndexType* row, IndexType* col) const;
typename ei_traits<Derived>::Scalar maxCoeff(Index* index) const; template<typename IndexType>
typename internal::traits<Derived>::Scalar minCoeff(IndexType* index) const;
template<typename IndexType>
typename internal::traits<Derived>::Scalar maxCoeff(IndexType* index) const;
template<typename BinaryOp> template<typename BinaryOp>
typename ei_result_of<BinaryOp(typename ei_traits<Derived>::Scalar)>::type typename internal::result_of<BinaryOp(typename internal::traits<Derived>::Scalar)>::type
redux(const BinaryOp& func) const; redux(const BinaryOp& func) const;
template<typename Visitor> template<typename Visitor>
@@ -480,20 +442,33 @@ template<typename Derived> class DenseBase
inline const WithFormat<Derived> format(const IOFormat& fmt) const; inline const WithFormat<Derived> format(const IOFormat& fmt) const;
/** \returns the unique coefficient of a 1x1 expression */
CoeffReturnType value() const
{
EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)
eigen_assert(this->rows() == 1 && this->cols() == 1);
return derived().coeff(0,0);
}
/////////// Array module /////////// /////////// Array module ///////////
bool all(void) const; bool all(void) const;
bool any(void) const; bool any(void) const;
Index count() const; Index count() const;
const VectorwiseOp<Derived,Horizontal> rowwise() const; typedef VectorwiseOp<Derived, Horizontal> RowwiseReturnType;
VectorwiseOp<Derived,Horizontal> rowwise(); typedef const VectorwiseOp<const Derived, Horizontal> ConstRowwiseReturnType;
const VectorwiseOp<Derived,Vertical> colwise() const; typedef VectorwiseOp<Derived, Vertical> ColwiseReturnType;
VectorwiseOp<Derived,Vertical> colwise(); typedef const VectorwiseOp<const Derived, Vertical> ConstColwiseReturnType;
static const CwiseNullaryOp<ei_scalar_random_op<Scalar>,Derived> Random(Index rows, Index cols); ConstRowwiseReturnType rowwise() const;
static const CwiseNullaryOp<ei_scalar_random_op<Scalar>,Derived> Random(Index size); RowwiseReturnType rowwise();
static const CwiseNullaryOp<ei_scalar_random_op<Scalar>,Derived> Random(); ConstColwiseReturnType colwise() const;
ColwiseReturnType colwise();
static const CwiseNullaryOp<internal::scalar_random_op<Scalar>,Derived> Random(Index rows, Index cols);
static const CwiseNullaryOp<internal::scalar_random_op<Scalar>,Derived> Random(Index size);
static const CwiseNullaryOp<internal::scalar_random_op<Scalar>,Derived> Random();
template<typename ThenDerived,typename ElseDerived> template<typename ThenDerived,typename ElseDerived>
const Select<Derived,ThenDerived,ElseDerived> const Select<Derived,ThenDerived,ElseDerived>
@@ -514,10 +489,19 @@ template<typename Derived> class DenseBase
const Replicate<Derived,RowFactor,ColFactor> replicate() const; const Replicate<Derived,RowFactor,ColFactor> replicate() const;
const Replicate<Derived,Dynamic,Dynamic> replicate(Index rowFacor,Index colFactor) const; const Replicate<Derived,Dynamic,Dynamic> replicate(Index rowFacor,Index colFactor) const;
Eigen::Reverse<Derived, BothDirections> reverse(); typedef Reverse<Derived, BothDirections> ReverseReturnType;
const Eigen::Reverse<Derived, BothDirections> reverse() const; typedef const Reverse<const Derived, BothDirections> ConstReverseReturnType;
ReverseReturnType reverse();
ConstReverseReturnType reverse() const;
void reverseInPlace(); void reverseInPlace();
#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::DenseBase
# include "../plugins/BlockMethods.h"
# ifdef EIGEN_DENSEBASE_PLUGIN
# include EIGEN_DENSEBASE_PLUGIN
# endif
#undef EIGEN_CURRENT_STORAGE_BASE_CLASS
#ifdef EIGEN2_SUPPORT #ifdef EIGEN2_SUPPORT
Block<Derived> corner(CornerType type, Index cRows, Index cCols); Block<Derived> corner(CornerType type, Index cRows, Index cCols);
@@ -529,14 +513,11 @@ template<typename Derived> class DenseBase
#endif // EIGEN2_SUPPORT #endif // EIGEN2_SUPPORT
#ifdef EIGEN_DENSEBASE_PLUGIN
#include EIGEN_DENSEBASE_PLUGIN
#endif
// disable the use of evalTo for dense objects with a nice compilation error // disable the use of evalTo for dense objects with a nice compilation error
template<typename Dest> inline void evalTo(Dest& ) const template<typename Dest> inline void evalTo(Dest& ) const
{ {
EIGEN_STATIC_ASSERT((ei_is_same_type<Dest,void>::ret),THE_EVAL_EVALTO_FUNCTION_SHOULD_NEVER_BE_CALLED_FOR_DENSE_OBJECTS); EIGEN_STATIC_ASSERT((internal::is_same<Dest,void>::value),THE_EVAL_EVALTO_FUNCTION_SHOULD_NEVER_BE_CALLED_FOR_DENSE_OBJECTS);
} }
protected: protected:
@@ -547,8 +528,6 @@ template<typename Derived> class DenseBase
* Only do it when debugging Eigen, as this borders on paranoiac and could slow compilation down * Only do it when debugging Eigen, as this borders on paranoiac and could slow compilation down
*/ */
#ifdef EIGEN_INTERNAL_DEBUGGING #ifdef EIGEN_INTERNAL_DEBUGGING
EIGEN_STATIC_ASSERT(ei_are_flags_consistent<Flags>::ret,
INVALID_MATRIXBASE_TEMPLATE_PARAMETERS)
EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, int(IsRowMajor)) EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, int(IsRowMajor))
&& EIGEN_IMPLIES(MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1, int(!IsRowMajor))), && EIGEN_IMPLIES(MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1, int(!IsRowMajor))),
INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION) INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION)

View File

@@ -25,19 +25,48 @@
#ifndef EIGEN_DENSECOEFFSBASE_H #ifndef EIGEN_DENSECOEFFSBASE_H
#define EIGEN_DENSECOEFFSBASE_H #define EIGEN_DENSECOEFFSBASE_H
template<typename Derived, bool EnableDirectAccessAPI> namespace internal {
class DenseCoeffsBase : public EigenBase<Derived> template<typename T> struct add_const_on_value_type_if_arithmetic
{
typedef typename conditional<is_arithmetic<T>::value, T, typename add_const_on_value_type<T>::type>::type type;
};
}
/** \brief Base class providing read-only coefficient access to matrices and arrays.
* \ingroup Core_Module
* \tparam Derived Type of the derived class
* \tparam ReadOnlyAccessors Constant indicating read-only access
*
* This class defines the \c operator() \c const function and friends, which can be used to read specific
* entries of a matrix or array.
*
* \sa DenseCoeffsBase<Derived, WriteAccessors>, DenseCoeffsBase<Derived, DirectAccessors>,
* \ref TopicClassHierarchy
*/
template<typename Derived>
class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
{ {
public: public:
typedef typename ei_traits<Derived>::StorageKind StorageKind; typedef typename internal::traits<Derived>::StorageKind StorageKind;
typedef typename ei_traits<Derived>::Index Index; typedef typename internal::traits<Derived>::Index Index;
typedef typename ei_traits<Derived>::Scalar Scalar; typedef typename internal::traits<Derived>::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar; typedef typename internal::packet_traits<Scalar>::type PacketScalar;
typedef typename ei_meta_if<ei_has_direct_access<Derived>::ret,
const Scalar&, // Explanation for this CoeffReturnType typedef.
typename ei_meta_if<ei_is_arithmetic<Scalar>::ret, Scalar, const Scalar>::ret // - This is the return type of the coeff() method.
>::ret CoeffReturnType; // - The LvalueBit means exactly that we can offer a coeffRef() method, which means exactly that we can get references
typedef typename ei_makeconst_return_type<typename ei_packet_traits<Scalar>::type>::type PacketReturnType; // to coeffs, which means exactly that we can have coeff() return a const reference (as opposed to returning a value).
// - The is_artihmetic check is required since "const int", "const double", etc. will cause warnings on some systems
// while the declaration of "const T", where T is a non arithmetic type does not. Always returning "const Scalar&" is
// not possible, since the underlying expressions might not offer a valid address the reference could be referring to.
typedef typename internal::conditional<bool(internal::traits<Derived>::Flags&LvalueBit),
const Scalar&,
typename internal::conditional<internal::is_arithmetic<Scalar>::value, Scalar, const Scalar>::type
>::type CoeffReturnType;
typedef typename internal::add_const_on_value_type_if_arithmetic<
typename internal::packet_traits<Scalar>::type
>::type PacketReturnType;
typedef EigenBase<Derived> Base; typedef EigenBase<Derived> Base;
using Base::rows; using Base::rows;
@@ -77,7 +106,7 @@ class DenseCoeffsBase : public EigenBase<Derived>
*/ */
EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const
{ {
ei_internal_assert(row >= 0 && row < rows() eigen_internal_assert(row >= 0 && row < rows()
&& col >= 0 && col < cols()); && col >= 0 && col < cols());
return derived().coeff(row, col); return derived().coeff(row, col);
} }
@@ -94,7 +123,7 @@ class DenseCoeffsBase : public EigenBase<Derived>
*/ */
EIGEN_STRONG_INLINE CoeffReturnType operator()(Index row, Index col) const EIGEN_STRONG_INLINE CoeffReturnType operator()(Index row, Index col) const
{ {
ei_assert(row >= 0 && row < rows() eigen_assert(row >= 0 && row < rows()
&& col >= 0 && col < cols()); && col >= 0 && col < cols());
return derived().coeff(row, col); return derived().coeff(row, col);
} }
@@ -117,7 +146,7 @@ class DenseCoeffsBase : public EigenBase<Derived>
EIGEN_STRONG_INLINE CoeffReturnType EIGEN_STRONG_INLINE CoeffReturnType
coeff(Index index) const coeff(Index index) const
{ {
ei_internal_assert(index >= 0 && index < size()); eigen_internal_assert(index >= 0 && index < size());
return derived().coeff(index); return derived().coeff(index);
} }
@@ -133,9 +162,11 @@ class DenseCoeffsBase : public EigenBase<Derived>
EIGEN_STRONG_INLINE CoeffReturnType EIGEN_STRONG_INLINE CoeffReturnType
operator[](Index index) const operator[](Index index) const
{ {
#ifndef EIGEN2_SUPPORT
EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime, EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime,
THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD) THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD)
ei_assert(index >= 0 && index < size()); #endif
eigen_assert(index >= 0 && index < size());
return derived().coeff(index); return derived().coeff(index);
} }
@@ -152,7 +183,7 @@ class DenseCoeffsBase : public EigenBase<Derived>
EIGEN_STRONG_INLINE CoeffReturnType EIGEN_STRONG_INLINE CoeffReturnType
operator()(Index index) const operator()(Index index) const
{ {
ei_assert(index >= 0 && index < size()); eigen_assert(index >= 0 && index < size());
return derived().coeff(index); return derived().coeff(index);
} }
@@ -176,7 +207,8 @@ class DenseCoeffsBase : public EigenBase<Derived>
EIGEN_STRONG_INLINE CoeffReturnType EIGEN_STRONG_INLINE CoeffReturnType
w() const { return (*this)[3]; } w() const { return (*this)[3]; }
/** \returns the packet of coefficients starting at the given row and column. It is your responsibility /** \internal
* \returns the packet of coefficients starting at the given row and column. It is your responsibility
* to ensure that a packet really starts there. This method is only available on expressions having the * to ensure that a packet really starts there. This method is only available on expressions having the
* PacketAccessBit. * PacketAccessBit.
* *
@@ -188,12 +220,13 @@ class DenseCoeffsBase : public EigenBase<Derived>
template<int LoadMode> template<int LoadMode>
EIGEN_STRONG_INLINE PacketReturnType packet(Index row, Index col) const EIGEN_STRONG_INLINE PacketReturnType packet(Index row, Index col) const
{ {
ei_internal_assert(row >= 0 && row < rows() eigen_internal_assert(row >= 0 && row < rows()
&& col >= 0 && col < cols()); && col >= 0 && col < cols());
return derived().template packet<LoadMode>(row,col); return derived().template packet<LoadMode>(row,col);
} }
/** \internal */
template<int LoadMode> template<int LoadMode>
EIGEN_STRONG_INLINE PacketReturnType packetByOuterInner(Index outer, Index inner) const EIGEN_STRONG_INLINE PacketReturnType packetByOuterInner(Index outer, Index inner) const
{ {
@@ -201,7 +234,8 @@ class DenseCoeffsBase : public EigenBase<Derived>
colIndexByOuterInner(outer, inner)); colIndexByOuterInner(outer, inner));
} }
/** \returns the packet of coefficients starting at the given index. It is your responsibility /** \internal
* \returns the packet of coefficients starting at the given index. It is your responsibility
* to ensure that a packet really starts there. This method is only available on expressions having the * to ensure that a packet really starts there. This method is only available on expressions having the
* PacketAccessBit and the LinearAccessBit. * PacketAccessBit and the LinearAccessBit.
* *
@@ -213,13 +247,13 @@ class DenseCoeffsBase : public EigenBase<Derived>
template<int LoadMode> template<int LoadMode>
EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
{ {
ei_internal_assert(index >= 0 && index < size()); eigen_internal_assert(index >= 0 && index < size());
return derived().template packet<LoadMode>(index); return derived().template packet<LoadMode>(index);
} }
protected: protected:
// explanation: DenseBase is doing "using ..." on the methods from DenseCoeffsBase. // explanation: DenseBase is doing "using ..." on the methods from DenseCoeffsBase.
// But some methods are only available in the EnableDirectAccessAPI case. // But some methods are only available in the DirectAccess case.
// So we add dummy methods here with these names, so that "using... " doesn't fail. // So we add dummy methods here with these names, so that "using... " doesn't fail.
// It's not private so that the child class DenseBase can access them, and it's not public // It's not private so that the child class DenseBase can access them, and it's not public
// either since it's an implementation detail, so has to be protected. // either since it's an implementation detail, so has to be protected.
@@ -238,17 +272,28 @@ class DenseCoeffsBase : public EigenBase<Derived>
void colStride(); void colStride();
}; };
/** \brief Base class providing read/write coefficient access to matrices and arrays.
* \ingroup Core_Module
* \tparam Derived Type of the derived class
* \tparam WriteAccessors Constant indicating read/write access
*
* This class defines the non-const \c operator() function and friends, which can be used to write specific
* entries of a matrix or array. This class inherits DenseCoeffsBase<Derived, ReadOnlyAccessors> which
* defines the const variant for reading specific entries.
*
* \sa DenseCoeffsBase<Derived, DirectAccessors>, \ref TopicClassHierarchy
*/
template<typename Derived> template<typename Derived>
class DenseCoeffsBase<Derived, true> : public DenseCoeffsBase<Derived, false> class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived, ReadOnlyAccessors>
{ {
public: public:
typedef DenseCoeffsBase<Derived, false> Base; typedef DenseCoeffsBase<Derived, ReadOnlyAccessors> Base;
typedef typename ei_traits<Derived>::StorageKind StorageKind; typedef typename internal::traits<Derived>::StorageKind StorageKind;
typedef typename ei_traits<Derived>::Index Index; typedef typename internal::traits<Derived>::Index Index;
typedef typename ei_traits<Derived>::Scalar Scalar; typedef typename internal::traits<Derived>::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar; typedef typename internal::packet_traits<Scalar>::type PacketScalar;
typedef typename NumTraits<Scalar>::Real RealScalar; typedef typename NumTraits<Scalar>::Real RealScalar;
using Base::coeff; using Base::coeff;
@@ -281,7 +326,7 @@ class DenseCoeffsBase<Derived, true> : public DenseCoeffsBase<Derived, false>
*/ */
EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col)
{ {
ei_internal_assert(row >= 0 && row < rows() eigen_internal_assert(row >= 0 && row < rows()
&& col >= 0 && col < cols()); && col >= 0 && col < cols());
return derived().coeffRef(row, col); return derived().coeffRef(row, col);
} }
@@ -301,7 +346,7 @@ class DenseCoeffsBase<Derived, true> : public DenseCoeffsBase<Derived, false>
EIGEN_STRONG_INLINE Scalar& EIGEN_STRONG_INLINE Scalar&
operator()(Index row, Index col) operator()(Index row, Index col)
{ {
ei_assert(row >= 0 && row < rows() eigen_assert(row >= 0 && row < rows()
&& col >= 0 && col < cols()); && col >= 0 && col < cols());
return derived().coeffRef(row, col); return derived().coeffRef(row, col);
} }
@@ -325,7 +370,7 @@ class DenseCoeffsBase<Derived, true> : public DenseCoeffsBase<Derived, false>
EIGEN_STRONG_INLINE Scalar& EIGEN_STRONG_INLINE Scalar&
coeffRef(Index index) coeffRef(Index index)
{ {
ei_internal_assert(index >= 0 && index < size()); eigen_internal_assert(index >= 0 && index < size());
return derived().coeffRef(index); return derived().coeffRef(index);
} }
@@ -339,9 +384,11 @@ class DenseCoeffsBase<Derived, true> : public DenseCoeffsBase<Derived, false>
EIGEN_STRONG_INLINE Scalar& EIGEN_STRONG_INLINE Scalar&
operator[](Index index) operator[](Index index)
{ {
#ifndef EIGEN2_SUPPORT
EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime, EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime,
THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD) THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD)
ei_assert(index >= 0 && index < size()); #endif
eigen_assert(index >= 0 && index < size());
return derived().coeffRef(index); return derived().coeffRef(index);
} }
@@ -357,7 +404,7 @@ class DenseCoeffsBase<Derived, true> : public DenseCoeffsBase<Derived, false>
EIGEN_STRONG_INLINE Scalar& EIGEN_STRONG_INLINE Scalar&
operator()(Index index) operator()(Index index)
{ {
ei_assert(index >= 0 && index < size()); eigen_assert(index >= 0 && index < size());
return derived().coeffRef(index); return derived().coeffRef(index);
} }
@@ -381,7 +428,8 @@ class DenseCoeffsBase<Derived, true> : public DenseCoeffsBase<Derived, false>
EIGEN_STRONG_INLINE Scalar& EIGEN_STRONG_INLINE Scalar&
w() { return (*this)[3]; } w() { return (*this)[3]; }
/** Stores the given packet of coefficients, at the given row and column of this expression. It is your responsibility /** \internal
* Stores the given packet of coefficients, at the given row and column of this expression. It is your responsibility
* to ensure that a packet really starts there. This method is only available on expressions having the * to ensure that a packet really starts there. This method is only available on expressions having the
* PacketAccessBit. * PacketAccessBit.
* *
@@ -392,24 +440,26 @@ class DenseCoeffsBase<Derived, true> : public DenseCoeffsBase<Derived, false>
template<int StoreMode> template<int StoreMode>
EIGEN_STRONG_INLINE void writePacket EIGEN_STRONG_INLINE void writePacket
(Index row, Index col, const typename ei_packet_traits<Scalar>::type& x) (Index row, Index col, const typename internal::packet_traits<Scalar>::type& x)
{ {
ei_internal_assert(row >= 0 && row < rows() eigen_internal_assert(row >= 0 && row < rows()
&& col >= 0 && col < cols()); && col >= 0 && col < cols());
derived().template writePacket<StoreMode>(row,col,x); derived().template writePacket<StoreMode>(row,col,x);
} }
/** \internal */
template<int StoreMode> template<int StoreMode>
EIGEN_STRONG_INLINE void writePacketByOuterInner EIGEN_STRONG_INLINE void writePacketByOuterInner
(Index outer, Index inner, const typename ei_packet_traits<Scalar>::type& x) (Index outer, Index inner, const typename internal::packet_traits<Scalar>::type& x)
{ {
writePacket<StoreMode>(rowIndexByOuterInner(outer, inner), writePacket<StoreMode>(rowIndexByOuterInner(outer, inner),
colIndexByOuterInner(outer, inner), colIndexByOuterInner(outer, inner),
x); x);
} }
/** Stores the given packet of coefficients, at the given index in this expression. It is your responsibility /** \internal
* Stores the given packet of coefficients, at the given index in this expression. It is your responsibility
* to ensure that a packet really starts there. This method is only available on expressions having the * to ensure that a packet really starts there. This method is only available on expressions having the
* PacketAccessBit and the LinearAccessBit. * PacketAccessBit and the LinearAccessBit.
* *
@@ -417,12 +467,11 @@ class DenseCoeffsBase<Derived, true> : public DenseCoeffsBase<Derived, false>
* the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets
* starting at an address which is a multiple of the packet size. * starting at an address which is a multiple of the packet size.
*/ */
template<int StoreMode> template<int StoreMode>
EIGEN_STRONG_INLINE void writePacket EIGEN_STRONG_INLINE void writePacket
(Index index, const typename ei_packet_traits<Scalar>::type& x) (Index index, const typename internal::packet_traits<Scalar>::type& x)
{ {
ei_internal_assert(index >= 0 && index < size()); eigen_internal_assert(index >= 0 && index < size());
derived().template writePacket<StoreMode>(index,x); derived().template writePacket<StoreMode>(index,x);
} }
@@ -439,7 +488,7 @@ class DenseCoeffsBase<Derived, true> : public DenseCoeffsBase<Derived, false>
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_STRONG_INLINE void copyCoeff(Index row, Index col, const DenseBase<OtherDerived>& other) EIGEN_STRONG_INLINE void copyCoeff(Index row, Index col, const DenseBase<OtherDerived>& other)
{ {
ei_internal_assert(row >= 0 && row < rows() eigen_internal_assert(row >= 0 && row < rows()
&& col >= 0 && col < cols()); && col >= 0 && col < cols());
derived().coeffRef(row, col) = other.derived().coeff(row, col); derived().coeffRef(row, col) = other.derived().coeff(row, col);
} }
@@ -455,7 +504,7 @@ class DenseCoeffsBase<Derived, true> : public DenseCoeffsBase<Derived, false>
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_STRONG_INLINE void copyCoeff(Index index, const DenseBase<OtherDerived>& other) EIGEN_STRONG_INLINE void copyCoeff(Index index, const DenseBase<OtherDerived>& other)
{ {
ei_internal_assert(index >= 0 && index < size()); eigen_internal_assert(index >= 0 && index < size());
derived().coeffRef(index) = other.derived().coeff(index); derived().coeffRef(index) = other.derived().coeff(index);
} }
@@ -480,7 +529,7 @@ class DenseCoeffsBase<Derived, true> : public DenseCoeffsBase<Derived, false>
template<typename OtherDerived, int StoreMode, int LoadMode> template<typename OtherDerived, int StoreMode, int LoadMode>
EIGEN_STRONG_INLINE void copyPacket(Index row, Index col, const DenseBase<OtherDerived>& other) EIGEN_STRONG_INLINE void copyPacket(Index row, Index col, const DenseBase<OtherDerived>& other)
{ {
ei_internal_assert(row >= 0 && row < rows() eigen_internal_assert(row >= 0 && row < rows()
&& col >= 0 && col < cols()); && col >= 0 && col < cols());
derived().template writePacket<StoreMode>(row, col, derived().template writePacket<StoreMode>(row, col,
other.derived().template packet<LoadMode>(row, col)); other.derived().template packet<LoadMode>(row, col));
@@ -497,11 +546,12 @@ class DenseCoeffsBase<Derived, true> : public DenseCoeffsBase<Derived, false>
template<typename OtherDerived, int StoreMode, int LoadMode> template<typename OtherDerived, int StoreMode, int LoadMode>
EIGEN_STRONG_INLINE void copyPacket(Index index, const DenseBase<OtherDerived>& other) EIGEN_STRONG_INLINE void copyPacket(Index index, const DenseBase<OtherDerived>& other)
{ {
ei_internal_assert(index >= 0 && index < size()); eigen_internal_assert(index >= 0 && index < size());
derived().template writePacket<StoreMode>(index, derived().template writePacket<StoreMode>(index,
other.derived().template packet<LoadMode>(index)); other.derived().template packet<LoadMode>(index));
} }
/** \internal */
template<typename OtherDerived, int StoreMode, int LoadMode> template<typename OtherDerived, int StoreMode, int LoadMode>
EIGEN_STRONG_INLINE void copyPacketByOuterInner(Index outer, Index inner, const DenseBase<OtherDerived>& other) EIGEN_STRONG_INLINE void copyPacketByOuterInner(Index outer, Index inner, const DenseBase<OtherDerived>& other)
{ {
@@ -512,6 +562,34 @@ class DenseCoeffsBase<Derived, true> : public DenseCoeffsBase<Derived, false>
} }
#endif #endif
};
/** \brief Base class providing direct read-only coefficient access to matrices and arrays.
* \ingroup Core_Module
* \tparam Derived Type of the derived class
* \tparam DirectAccessors Constant indicating direct access
*
* This class defines functions to work with strides which can be used to access entries directly. This class
* inherits DenseCoeffsBase<Derived, ReadOnlyAccessors> which defines functions to access entries read-only using
* \c operator() .
*
* \sa \ref TopicClassHierarchy
*/
template<typename Derived>
class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived, ReadOnlyAccessors>
{
public:
typedef DenseCoeffsBase<Derived, ReadOnlyAccessors> Base;
typedef typename internal::traits<Derived>::Index Index;
typedef typename internal::traits<Derived>::Scalar Scalar;
typedef typename NumTraits<Scalar>::Real RealScalar;
using Base::rows;
using Base::cols;
using Base::size;
using Base::derived;
/** \returns the pointer increment between two consecutive elements within a slice in the inner direction. /** \returns the pointer increment between two consecutive elements within a slice in the inner direction.
* *
* \sa outerStride(), rowStride(), colStride() * \sa outerStride(), rowStride(), colStride()
@@ -531,6 +609,7 @@ class DenseCoeffsBase<Derived, true> : public DenseCoeffsBase<Derived, false>
return derived().outerStride(); return derived().outerStride();
} }
// FIXME shall we remove it ?
inline Index stride() const inline Index stride() const
{ {
return Derived::IsVectorAtCompileTime ? innerStride() : outerStride(); return Derived::IsVectorAtCompileTime ? innerStride() : outerStride();
@@ -555,57 +634,132 @@ class DenseCoeffsBase<Derived, true> : public DenseCoeffsBase<Derived, false>
} }
}; };
/** \brief Base class providing direct read/write coefficient access to matrices and arrays.
* \ingroup Core_Module
* \tparam Derived Type of the derived class
* \tparam DirectAccessors Constant indicating direct access
*
* This class defines functions to work with strides which can be used to access entries directly. This class
* inherits DenseCoeffsBase<Derived, WriteAccessors> which defines functions to access entries read/write using
* \c operator().
*
* \sa \ref TopicClassHierarchy
*/
template<typename Derived>
class DenseCoeffsBase<Derived, DirectWriteAccessors>
: public DenseCoeffsBase<Derived, WriteAccessors>
{
public:
typedef DenseCoeffsBase<Derived, WriteAccessors> Base;
typedef typename internal::traits<Derived>::Index Index;
typedef typename internal::traits<Derived>::Scalar Scalar;
typedef typename NumTraits<Scalar>::Real RealScalar;
using Base::rows;
using Base::cols;
using Base::size;
using Base::derived;
/** \returns the pointer increment between two consecutive elements within a slice in the inner direction.
*
* \sa outerStride(), rowStride(), colStride()
*/
inline Index innerStride() const
{
return derived().innerStride();
}
/** \returns the pointer increment between two consecutive inner slices (for example, between two consecutive columns
* in a column-major matrix).
*
* \sa innerStride(), rowStride(), colStride()
*/
inline Index outerStride() const
{
return derived().outerStride();
}
// FIXME shall we remove it ?
inline Index stride() const
{
return Derived::IsVectorAtCompileTime ? innerStride() : outerStride();
}
/** \returns the pointer increment between two consecutive rows.
*
* \sa innerStride(), outerStride(), colStride()
*/
inline Index rowStride() const
{
return Derived::IsRowMajor ? outerStride() : innerStride();
}
/** \returns the pointer increment between two consecutive columns.
*
* \sa innerStride(), outerStride(), rowStride()
*/
inline Index colStride() const
{
return Derived::IsRowMajor ? innerStride() : outerStride();
}
};
namespace internal {
template<typename Derived, bool JustReturnZero> template<typename Derived, bool JustReturnZero>
struct ei_first_aligned_impl struct first_aligned_impl
{ {
inline static typename Derived::Index run(const Derived&) inline static typename Derived::Index run(const Derived&)
{ return 0; } { return 0; }
}; };
template<typename Derived> template<typename Derived>
struct ei_first_aligned_impl<Derived, false> struct first_aligned_impl<Derived, false>
{ {
inline static typename Derived::Index run(const Derived& m) inline static typename Derived::Index run(const Derived& m)
{ {
return ei_first_aligned(&m.const_cast_derived().coeffRef(0,0), m.size()); return first_aligned(&m.const_cast_derived().coeffRef(0,0), m.size());
} }
}; };
/** \internal \returns the index of the first element of the array that is well aligned for vectorization. /** \internal \returns the index of the first element of the array that is well aligned for vectorization.
* *
* There is also the variant ei_first_aligned(const Scalar*, Integer) defined in Memory.h. See it for more * There is also the variant first_aligned(const Scalar*, Integer) defined in Memory.h. See it for more
* documentation. * documentation.
*/ */
template<typename Derived> template<typename Derived>
inline static typename Derived::Index ei_first_aligned(const Derived& m) inline static typename Derived::Index first_aligned(const Derived& m)
{ {
return ei_first_aligned_impl return first_aligned_impl
<Derived, (Derived::Flags & AlignedBit) || !(Derived::Flags & DirectAccessBit)> <Derived, (Derived::Flags & AlignedBit) || !(Derived::Flags & DirectAccessBit)>
::run(m); ::run(m);
} }
template<typename Derived, bool HasDirectAccess = ei_has_direct_access<Derived>::ret> template<typename Derived, bool HasDirectAccess = has_direct_access<Derived>::ret>
struct ei_inner_stride_at_compile_time struct inner_stride_at_compile_time
{ {
enum { ret = ei_traits<Derived>::InnerStrideAtCompileTime }; enum { ret = traits<Derived>::InnerStrideAtCompileTime };
}; };
template<typename Derived> template<typename Derived>
struct ei_inner_stride_at_compile_time<Derived, false> struct inner_stride_at_compile_time<Derived, false>
{ {
enum { ret = 0 }; enum { ret = 0 };
}; };
template<typename Derived, bool HasDirectAccess = ei_has_direct_access<Derived>::ret> template<typename Derived, bool HasDirectAccess = has_direct_access<Derived>::ret>
struct ei_outer_stride_at_compile_time struct outer_stride_at_compile_time
{ {
enum { ret = ei_traits<Derived>::OuterStrideAtCompileTime }; enum { ret = traits<Derived>::OuterStrideAtCompileTime };
}; };
template<typename Derived> template<typename Derived>
struct ei_outer_stride_at_compile_time<Derived, false> struct outer_stride_at_compile_time<Derived, false>
{ {
enum { ret = 0 }; enum { ret = 0 };
}; };
} // end namespace internal
#endif // EIGEN_DENSECOEFFSBASE_H #endif // EIGEN_DENSECOEFFSBASE_H

View File

@@ -0,0 +1,304 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2006-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
// Copyright (C) 2010 Hauke Heibel <hauke.heibel@gmail.com>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#ifndef EIGEN_MATRIXSTORAGE_H
#define EIGEN_MATRIXSTORAGE_H
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN EIGEN_DENSE_STORAGE_CTOR_PLUGIN;
#else
#define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
#endif
namespace internal {
struct constructor_without_unaligned_array_assert {};
/** \internal
* Static array. If the MatrixOrArrayOptions require auto-alignment, the array will be automatically aligned:
* to 16 bytes boundary if the total size is a multiple of 16 bytes.
*/
template <typename T, int Size, int MatrixOrArrayOptions,
int Alignment = (MatrixOrArrayOptions&DontAlign) ? 0
: (((Size*sizeof(T))%16)==0) ? 16
: 0 >
struct plain_array
{
T array[Size];
plain_array() {}
plain_array(constructor_without_unaligned_array_assert) {}
};
#ifdef EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
#define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask)
#else
#define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \
eigen_assert((reinterpret_cast<size_t>(array) & sizemask) == 0 \
&& "this assertion is explained here: " \
"http://eigen.tuxfamily.org/dox/UnalignedArrayAssert.html" \
" **** READ THIS WEB PAGE !!! ****");
#endif
template <typename T, int Size, int MatrixOrArrayOptions>
struct plain_array<T, Size, MatrixOrArrayOptions, 16>
{
EIGEN_USER_ALIGN16 T array[Size];
plain_array() { EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(0xf) }
plain_array(constructor_without_unaligned_array_assert) {}
};
template <typename T, int MatrixOrArrayOptions, int Alignment>
struct plain_array<T, 0, MatrixOrArrayOptions, Alignment>
{
EIGEN_USER_ALIGN16 T array[1];
plain_array() {}
plain_array(constructor_without_unaligned_array_assert) {}
};
} // end namespace internal
/** \internal
*
* \class DenseStorage
* \ingroup Core_Module
*
* \brief Stores the data of a matrix
*
* This class stores the data of fixed-size, dynamic-size or mixed matrices
* in a way as compact as possible.
*
* \sa Matrix
*/
template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseStorage;
// purely fixed-size matrix
template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseStorage
{
internal::plain_array<T,Size,_Options> m_data;
public:
inline explicit DenseStorage() {}
inline DenseStorage(internal::constructor_without_unaligned_array_assert)
: m_data(internal::constructor_without_unaligned_array_assert()) {}
inline DenseStorage(DenseIndex,DenseIndex,DenseIndex) {}
inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); }
inline static DenseIndex rows(void) {return _Rows;}
inline static DenseIndex cols(void) {return _Cols;}
inline void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {}
inline void resize(DenseIndex,DenseIndex,DenseIndex) {}
inline const T *data() const { return m_data.array; }
inline T *data() { return m_data.array; }
};
// null matrix
template<typename T, int _Rows, int _Cols, int _Options> class DenseStorage<T, 0, _Rows, _Cols, _Options>
{
public:
inline explicit DenseStorage() {}
inline DenseStorage(internal::constructor_without_unaligned_array_assert) {}
inline DenseStorage(DenseIndex,DenseIndex,DenseIndex) {}
inline void swap(DenseStorage& ) {}
inline static DenseIndex rows(void) {return _Rows;}
inline static DenseIndex cols(void) {return _Cols;}
inline void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {}
inline void resize(DenseIndex,DenseIndex,DenseIndex) {}
inline const T *data() const { return 0; }
inline T *data() { return 0; }
};
// dynamic-size matrix with fixed-size storage
template<typename T, int Size, int _Options> class DenseStorage<T, Size, Dynamic, Dynamic, _Options>
{
internal::plain_array<T,Size,_Options> m_data;
DenseIndex m_rows;
DenseIndex m_cols;
public:
inline explicit DenseStorage() : m_rows(0), m_cols(0) {}
inline DenseStorage(internal::constructor_without_unaligned_array_assert)
: m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0), m_cols(0) {}
inline DenseStorage(DenseIndex, DenseIndex rows, DenseIndex cols) : m_rows(rows), m_cols(cols) {}
inline void swap(DenseStorage& other)
{ std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
inline DenseIndex rows(void) const {return m_rows;}
inline DenseIndex cols(void) const {return m_cols;}
inline void conservativeResize(DenseIndex, DenseIndex rows, DenseIndex cols) { m_rows = rows; m_cols = cols; }
inline void resize(DenseIndex, DenseIndex rows, DenseIndex cols) { m_rows = rows; m_cols = cols; }
inline const T *data() const { return m_data.array; }
inline T *data() { return m_data.array; }
};
// dynamic-size matrix with fixed-size storage and fixed width
template<typename T, int Size, int _Cols, int _Options> class DenseStorage<T, Size, Dynamic, _Cols, _Options>
{
internal::plain_array<T,Size,_Options> m_data;
DenseIndex m_rows;
public:
inline explicit DenseStorage() : m_rows(0) {}
inline DenseStorage(internal::constructor_without_unaligned_array_assert)
: m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0) {}
inline DenseStorage(DenseIndex, DenseIndex rows, DenseIndex) : m_rows(rows) {}
inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
inline DenseIndex rows(void) const {return m_rows;}
inline DenseIndex cols(void) const {return _Cols;}
inline void conservativeResize(DenseIndex, DenseIndex rows, DenseIndex) { m_rows = rows; }
inline void resize(DenseIndex, DenseIndex rows, DenseIndex) { m_rows = rows; }
inline const T *data() const { return m_data.array; }
inline T *data() { return m_data.array; }
};
// dynamic-size matrix with fixed-size storage and fixed height
template<typename T, int Size, int _Rows, int _Options> class DenseStorage<T, Size, _Rows, Dynamic, _Options>
{
internal::plain_array<T,Size,_Options> m_data;
DenseIndex m_cols;
public:
inline explicit DenseStorage() : m_cols(0) {}
inline DenseStorage(internal::constructor_without_unaligned_array_assert)
: m_data(internal::constructor_without_unaligned_array_assert()), m_cols(0) {}
inline DenseStorage(DenseIndex, DenseIndex, DenseIndex cols) : m_cols(cols) {}
inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
inline DenseIndex rows(void) const {return _Rows;}
inline DenseIndex cols(void) const {return m_cols;}
inline void conservativeResize(DenseIndex, DenseIndex, DenseIndex cols) { m_cols = cols; }
inline void resize(DenseIndex, DenseIndex, DenseIndex cols) { m_cols = cols; }
inline const T *data() const { return m_data.array; }
inline T *data() { return m_data.array; }
};
// purely dynamic matrix.
template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynamic, _Options>
{
T *m_data;
DenseIndex m_rows;
DenseIndex m_cols;
public:
inline explicit DenseStorage() : m_data(0), m_rows(0), m_cols(0) {}
inline DenseStorage(internal::constructor_without_unaligned_array_assert)
: m_data(0), m_rows(0), m_cols(0) {}
inline DenseStorage(DenseIndex size, DenseIndex rows, DenseIndex cols)
: m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(rows), m_cols(cols)
{ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN }
inline ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, m_rows*m_cols); }
inline void swap(DenseStorage& other)
{ std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
inline DenseIndex rows(void) const {return m_rows;}
inline DenseIndex cols(void) const {return m_cols;}
inline void conservativeResize(DenseIndex size, DenseIndex rows, DenseIndex cols)
{
m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*m_cols);
m_rows = rows;
m_cols = cols;
}
void resize(DenseIndex size, DenseIndex rows, DenseIndex cols)
{
if(size != m_rows*m_cols)
{
internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, m_rows*m_cols);
if (size)
m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
else
m_data = 0;
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
}
m_rows = rows;
m_cols = cols;
}
inline const T *data() const { return m_data; }
inline T *data() { return m_data; }
};
// matrix with dynamic width and fixed height (so that matrix has dynamic size).
template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Rows, Dynamic, _Options>
{
T *m_data;
DenseIndex m_cols;
public:
inline explicit DenseStorage() : m_data(0), m_cols(0) {}
inline DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {}
inline DenseStorage(DenseIndex size, DenseIndex, DenseIndex cols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_cols(cols)
{ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN }
inline ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols); }
inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
inline static DenseIndex rows(void) {return _Rows;}
inline DenseIndex cols(void) const {return m_cols;}
inline void conservativeResize(DenseIndex size, DenseIndex, DenseIndex cols)
{
m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, _Rows*m_cols);
m_cols = cols;
}
EIGEN_STRONG_INLINE void resize(DenseIndex size, DenseIndex, DenseIndex cols)
{
if(size != _Rows*m_cols)
{
internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols);
if (size)
m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
else
m_data = 0;
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
}
m_cols = cols;
}
inline const T *data() const { return m_data; }
inline T *data() { return m_data; }
};
// matrix with dynamic height and fixed width (so that matrix has dynamic size).
template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dynamic, _Cols, _Options>
{
T *m_data;
DenseIndex m_rows;
public:
inline explicit DenseStorage() : m_data(0), m_rows(0) {}
inline DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {}
inline DenseStorage(DenseIndex size, DenseIndex rows, DenseIndex) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(rows)
{ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN }
inline ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows); }
inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
inline DenseIndex rows(void) const {return m_rows;}
inline static DenseIndex cols(void) {return _Cols;}
inline void conservativeResize(DenseIndex size, DenseIndex rows, DenseIndex)
{
m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*_Cols);
m_rows = rows;
}
EIGEN_STRONG_INLINE void resize(DenseIndex size, DenseIndex rows, DenseIndex)
{
if(size != m_rows*_Cols)
{
internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows);
if (size)
m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
else
m_data = 0;
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
}
m_rows = rows;
}
inline const T *data() const { return m_data; }
inline T *data() { return m_data; }
};
#endif // EIGEN_MATRIX_H

View File

@@ -26,6 +26,7 @@
#define EIGEN_DIAGONAL_H #define EIGEN_DIAGONAL_H
/** \class Diagonal /** \class Diagonal
* \ingroup Core_Module
* *
* \brief Expression of a diagonal/subdiagonal/superdiagonal in a matrix * \brief Expression of a diagonal/subdiagonal/superdiagonal in a matrix
* *
@@ -42,12 +43,14 @@
* *
* \sa MatrixBase::diagonal(), MatrixBase::diagonal(Index) * \sa MatrixBase::diagonal(), MatrixBase::diagonal(Index)
*/ */
namespace internal {
template<typename MatrixType, int DiagIndex> template<typename MatrixType, int DiagIndex>
struct ei_traits<Diagonal<MatrixType,DiagIndex> > struct traits<Diagonal<MatrixType,DiagIndex> >
: ei_traits<MatrixType> : traits<MatrixType>
{ {
typedef typename ei_nested<MatrixType>::type MatrixTypeNested; typedef typename nested<MatrixType>::type MatrixTypeNested;
typedef typename ei_unref<MatrixTypeNested>::type _MatrixTypeNested; typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;
typedef typename MatrixType::StorageKind StorageKind; typedef typename MatrixType::StorageKind StorageKind;
enum { enum {
AbsDiagIndex = DiagIndex<0 ? -DiagIndex : DiagIndex, // only used if DiagIndex != Dynamic AbsDiagIndex = DiagIndex<0 ? -DiagIndex : DiagIndex, // only used if DiagIndex != Dynamic
@@ -61,23 +64,25 @@ struct ei_traits<Diagonal<MatrixType,DiagIndex> >
MatrixType::MaxColsAtCompileTime) MatrixType::MaxColsAtCompileTime)
: (EIGEN_SIZE_MIN_PREFER_FIXED(MatrixType::MaxRowsAtCompileTime, MatrixType::MaxColsAtCompileTime) - AbsDiagIndex), : (EIGEN_SIZE_MIN_PREFER_FIXED(MatrixType::MaxRowsAtCompileTime, MatrixType::MaxColsAtCompileTime) - AbsDiagIndex),
MaxColsAtCompileTime = 1, MaxColsAtCompileTime = 1,
Flags = (unsigned int)_MatrixTypeNested::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit) & ~RowMajorBit, MaskLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
Flags = (unsigned int)_MatrixTypeNested::Flags & (HereditaryBits | LinearAccessBit | MaskLvalueBit | DirectAccessBit) & ~RowMajorBit,
CoeffReadCost = _MatrixTypeNested::CoeffReadCost, CoeffReadCost = _MatrixTypeNested::CoeffReadCost,
MatrixTypeOuterStride = ei_outer_stride_at_compile_time<MatrixType>::ret, MatrixTypeOuterStride = outer_stride_at_compile_time<MatrixType>::ret,
InnerStrideAtCompileTime = MatrixTypeOuterStride == Dynamic ? Dynamic : MatrixTypeOuterStride+1, InnerStrideAtCompileTime = MatrixTypeOuterStride == Dynamic ? Dynamic : MatrixTypeOuterStride+1,
OuterStrideAtCompileTime = 0 OuterStrideAtCompileTime = 0
}; };
}; };
}
template<typename MatrixType, int DiagIndex> class Diagonal template<typename MatrixType, int DiagIndex> class Diagonal
: public ei_dense_xpr_base< Diagonal<MatrixType,DiagIndex> >::type : public internal::dense_xpr_base< Diagonal<MatrixType,DiagIndex> >::type
{ {
public: public:
typedef typename ei_dense_xpr_base<Diagonal>::type Base; typedef typename internal::dense_xpr_base<Diagonal>::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(Diagonal) EIGEN_DENSE_PUBLIC_INTERFACE(Diagonal)
inline Diagonal(const MatrixType& matrix, Index index = DiagIndex) : m_matrix(matrix), m_index(index) {} inline Diagonal(MatrixType& matrix, Index index = DiagIndex) : m_matrix(matrix), m_index(index) {}
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Diagonal) EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Diagonal)
@@ -97,6 +102,12 @@ template<typename MatrixType, int DiagIndex> class Diagonal
} }
inline Scalar& coeffRef(Index row, Index) inline Scalar& coeffRef(Index row, Index)
{
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
return m_matrix.const_cast_derived().coeffRef(row+rowOffset(), row+colOffset());
}
inline const Scalar& coeffRef(Index row, Index) const
{ {
return m_matrix.const_cast_derived().coeffRef(row+rowOffset(), row+colOffset()); return m_matrix.const_cast_derived().coeffRef(row+rowOffset(), row+colOffset());
} }
@@ -107,6 +118,12 @@ template<typename MatrixType, int DiagIndex> class Diagonal
} }
inline Scalar& coeffRef(Index index) inline Scalar& coeffRef(Index index)
{
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
return m_matrix.const_cast_derived().coeffRef(index+rowOffset(), index+colOffset());
}
inline const Scalar& coeffRef(Index index) const
{ {
return m_matrix.const_cast_derived().coeffRef(index+rowOffset(), index+colOffset()); return m_matrix.const_cast_derived().coeffRef(index+rowOffset(), index+colOffset());
} }
@@ -118,13 +135,16 @@ template<typename MatrixType, int DiagIndex> class Diagonal
protected: protected:
const typename MatrixType::Nested m_matrix; const typename MatrixType::Nested m_matrix;
const ei_variable_if_dynamic<Index, DiagIndex> m_index; const internal::variable_if_dynamic<Index, DiagIndex> m_index;
private: private:
// some compilers may fail to optimize std::max etc in case of compile-time constants... // some compilers may fail to optimize std::max etc in case of compile-time constants...
EIGEN_STRONG_INLINE Index absDiagIndex() const { return m_index.value()>0 ? m_index.value() : -m_index.value(); } EIGEN_STRONG_INLINE Index absDiagIndex() const { return m_index.value()>0 ? m_index.value() : -m_index.value(); }
EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value()>0 ? 0 : -m_index.value(); } EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value()>0 ? 0 : -m_index.value(); }
EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value()>0 ? m_index.value() : 0; } EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value()>0 ? m_index.value() : 0; }
// triger a compile time error is someone try to call packet
template<int LoadMode> typename MatrixType::PacketReturnType packet(Index) const;
template<int LoadMode> typename MatrixType::PacketReturnType packet(Index,Index) const;
}; };
/** \returns an expression of the main diagonal of the matrix \c *this /** \returns an expression of the main diagonal of the matrix \c *this
@@ -136,18 +156,18 @@ template<typename MatrixType, int DiagIndex> class Diagonal
* *
* \sa class Diagonal */ * \sa class Diagonal */
template<typename Derived> template<typename Derived>
inline Diagonal<Derived, 0> inline typename MatrixBase<Derived>::DiagonalReturnType
MatrixBase<Derived>::diagonal() MatrixBase<Derived>::diagonal()
{ {
return Diagonal<Derived, 0>(derived()); return derived();
} }
/** This is the const version of diagonal(). */ /** This is the const version of diagonal(). */
template<typename Derived> template<typename Derived>
inline const Diagonal<Derived, 0> inline const typename MatrixBase<Derived>::ConstDiagonalReturnType
MatrixBase<Derived>::diagonal() const MatrixBase<Derived>::diagonal() const
{ {
return Diagonal<Derived, 0>(derived()); return ConstDiagonalReturnType(derived());
} }
/** \returns an expression of the \a DiagIndex-th sub or super diagonal of the matrix \c *this /** \returns an expression of the \a DiagIndex-th sub or super diagonal of the matrix \c *this
@@ -162,18 +182,18 @@ MatrixBase<Derived>::diagonal() const
* *
* \sa MatrixBase::diagonal(), class Diagonal */ * \sa MatrixBase::diagonal(), class Diagonal */
template<typename Derived> template<typename Derived>
inline Diagonal<Derived, Dynamic> inline typename MatrixBase<Derived>::template DiagonalIndexReturnType<Dynamic>::Type
MatrixBase<Derived>::diagonal(Index index) MatrixBase<Derived>::diagonal(Index index)
{ {
return Diagonal<Derived, Dynamic>(derived(), index); return typename DiagonalIndexReturnType<Dynamic>::Type(derived(), index);
} }
/** This is the const version of diagonal(Index). */ /** This is the const version of diagonal(Index). */
template<typename Derived> template<typename Derived>
inline const Diagonal<Derived, Dynamic> inline typename MatrixBase<Derived>::template ConstDiagonalIndexReturnType<Dynamic>::Type
MatrixBase<Derived>::diagonal(Index index) const MatrixBase<Derived>::diagonal(Index index) const
{ {
return Diagonal<Derived, Dynamic>(derived(), index); return typename ConstDiagonalIndexReturnType<Dynamic>::Type(derived(), index);
} }
/** \returns an expression of the \a DiagIndex-th sub or super diagonal of the matrix \c *this /** \returns an expression of the \a DiagIndex-th sub or super diagonal of the matrix \c *this
@@ -188,20 +208,20 @@ MatrixBase<Derived>::diagonal(Index index) const
* *
* \sa MatrixBase::diagonal(), class Diagonal */ * \sa MatrixBase::diagonal(), class Diagonal */
template<typename Derived> template<typename Derived>
template<int DiagIndex> template<int Index>
inline Diagonal<Derived,DiagIndex> inline typename MatrixBase<Derived>::template DiagonalIndexReturnType<Index>::Type
MatrixBase<Derived>::diagonal() MatrixBase<Derived>::diagonal()
{ {
return Diagonal<Derived,DiagIndex>(derived()); return derived();
} }
/** This is the const version of diagonal<int>(). */ /** This is the const version of diagonal<int>(). */
template<typename Derived> template<typename Derived>
template<int DiagIndex> template<int Index>
inline const Diagonal<Derived,DiagIndex> inline typename MatrixBase<Derived>::template ConstDiagonalIndexReturnType<Index>::Type
MatrixBase<Derived>::diagonal() const MatrixBase<Derived>::diagonal() const
{ {
return Diagonal<Derived,DiagIndex>(derived()); return derived();
} }
#endif // EIGEN_DIAGONAL_H #endif // EIGEN_DIAGONAL_H

View File

@@ -31,10 +31,10 @@ template<typename Derived>
class DiagonalBase : public EigenBase<Derived> class DiagonalBase : public EigenBase<Derived>
{ {
public: public:
typedef typename ei_traits<Derived>::DiagonalVectorType DiagonalVectorType; typedef typename internal::traits<Derived>::DiagonalVectorType DiagonalVectorType;
typedef typename DiagonalVectorType::Scalar Scalar; typedef typename DiagonalVectorType::Scalar Scalar;
typedef typename ei_traits<Derived>::StorageKind StorageKind; typedef typename internal::traits<Derived>::StorageKind StorageKind;
typedef typename ei_traits<Derived>::Index Index; typedef typename internal::traits<Derived>::Index Index;
enum { enum {
RowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, RowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
@@ -46,6 +46,8 @@ class DiagonalBase : public EigenBase<Derived>
}; };
typedef Matrix<Scalar, RowsAtCompileTime, ColsAtCompileTime, 0, MaxRowsAtCompileTime, MaxColsAtCompileTime> DenseMatrixType; typedef Matrix<Scalar, RowsAtCompileTime, ColsAtCompileTime, 0, MaxRowsAtCompileTime, MaxColsAtCompileTime> DenseMatrixType;
typedef DenseMatrixType DenseType;
typedef DiagonalMatrix<Scalar,DiagonalVectorType::SizeAtCompileTime,DiagonalVectorType::MaxSizeAtCompileTime> PlainObject;
inline const Derived& derived() const { return *static_cast<const Derived*>(this); } inline const Derived& derived() const { return *static_cast<const Derived*>(this); }
inline Derived& derived() { return *static_cast<Derived*>(this); } inline Derived& derived() { return *static_cast<Derived*>(this); }
@@ -70,11 +72,24 @@ class DiagonalBase : public EigenBase<Derived>
const DiagonalProduct<MatrixDerived, Derived, OnTheLeft> const DiagonalProduct<MatrixDerived, Derived, OnTheLeft>
operator*(const MatrixBase<MatrixDerived> &matrix) const; operator*(const MatrixBase<MatrixDerived> &matrix) const;
inline const DiagonalWrapper<CwiseUnaryOp<ei_scalar_inverse_op<Scalar>, DiagonalVectorType> > inline const DiagonalWrapper<CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const DiagonalVectorType> >
inverse() const inverse() const
{ {
return diagonal().cwiseInverse(); return diagonal().cwiseInverse();
} }
#ifdef EIGEN2_SUPPORT
template<typename OtherDerived>
bool isApprox(const DiagonalBase<OtherDerived>& other, typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) const
{
return diagonal().isApprox(other.diagonal(), precision);
}
template<typename OtherDerived>
bool isApprox(const MatrixBase<OtherDerived>& other, typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) const
{
return toDenseMatrix().isApprox(other, precision);
}
#endif
}; };
template<typename Derived> template<typename Derived>
@@ -87,6 +102,7 @@ void DiagonalBase<Derived>::evalTo(MatrixBase<DenseDerived> &other) const
#endif #endif
/** \class DiagonalMatrix /** \class DiagonalMatrix
* \ingroup Core_Module
* *
* \brief Represents a diagonal matrix with its storage * \brief Represents a diagonal matrix with its storage
* *
@@ -97,26 +113,31 @@ void DiagonalBase<Derived>::evalTo(MatrixBase<DenseDerived> &other) const
* *
* \sa class DiagonalWrapper * \sa class DiagonalWrapper
*/ */
namespace internal {
template<typename _Scalar, int SizeAtCompileTime, int MaxSizeAtCompileTime> template<typename _Scalar, int SizeAtCompileTime, int MaxSizeAtCompileTime>
struct ei_traits<DiagonalMatrix<_Scalar,SizeAtCompileTime,MaxSizeAtCompileTime> > struct traits<DiagonalMatrix<_Scalar,SizeAtCompileTime,MaxSizeAtCompileTime> >
: ei_traits<Matrix<_Scalar,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> > : traits<Matrix<_Scalar,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
{ {
typedef Matrix<_Scalar,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1> DiagonalVectorType; typedef Matrix<_Scalar,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1> DiagonalVectorType;
typedef Dense StorageKind; typedef Dense StorageKind;
typedef DenseIndex Index; typedef DenseIndex Index;
enum {
Flags = LvalueBit
};
}; };
}
template<typename _Scalar, int SizeAtCompileTime, int MaxSizeAtCompileTime> template<typename _Scalar, int SizeAtCompileTime, int MaxSizeAtCompileTime>
class DiagonalMatrix class DiagonalMatrix
: public DiagonalBase<DiagonalMatrix<_Scalar,SizeAtCompileTime,MaxSizeAtCompileTime> > : public DiagonalBase<DiagonalMatrix<_Scalar,SizeAtCompileTime,MaxSizeAtCompileTime> >
{ {
public: public:
#ifndef EIGEN_PARSED_BY_DOXYGEN #ifndef EIGEN_PARSED_BY_DOXYGEN
typedef typename ei_traits<DiagonalMatrix>::DiagonalVectorType DiagonalVectorType; typedef typename internal::traits<DiagonalMatrix>::DiagonalVectorType DiagonalVectorType;
typedef const DiagonalMatrix& Nested; typedef const DiagonalMatrix& Nested;
typedef _Scalar Scalar; typedef _Scalar Scalar;
typedef typename ei_traits<DiagonalMatrix>::StorageKind StorageKind; typedef typename internal::traits<DiagonalMatrix>::StorageKind StorageKind;
typedef typename ei_traits<DiagonalMatrix>::Index Index; typedef typename internal::traits<DiagonalMatrix>::Index Index;
#endif #endif
protected: protected:
@@ -170,7 +191,7 @@ class DiagonalMatrix
*/ */
DiagonalMatrix& operator=(const DiagonalMatrix& other) DiagonalMatrix& operator=(const DiagonalMatrix& other)
{ {
m_diagonal = other.m_diagonal(); m_diagonal = other.diagonal();
return *this; return *this;
} }
#endif #endif
@@ -188,6 +209,7 @@ class DiagonalMatrix
}; };
/** \class DiagonalWrapper /** \class DiagonalWrapper
* \ingroup Core_Module
* *
* \brief Expression of a diagonal matrix * \brief Expression of a diagonal matrix
* *
@@ -199,8 +221,10 @@ class DiagonalMatrix
* *
* \sa class DiagonalMatrix, class DiagonalBase, MatrixBase::asDiagonal() * \sa class DiagonalMatrix, class DiagonalBase, MatrixBase::asDiagonal()
*/ */
namespace internal {
template<typename _DiagonalVectorType> template<typename _DiagonalVectorType>
struct ei_traits<DiagonalWrapper<_DiagonalVectorType> > struct traits<DiagonalWrapper<_DiagonalVectorType> >
{ {
typedef _DiagonalVectorType DiagonalVectorType; typedef _DiagonalVectorType DiagonalVectorType;
typedef typename DiagonalVectorType::Scalar Scalar; typedef typename DiagonalVectorType::Scalar Scalar;
@@ -211,13 +235,14 @@ struct ei_traits<DiagonalWrapper<_DiagonalVectorType> >
ColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, ColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
MaxRowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, MaxRowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
MaxColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, MaxColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
Flags = 0 Flags = traits<DiagonalVectorType>::Flags & LvalueBit
}; };
}; };
}
template<typename _DiagonalVectorType> template<typename _DiagonalVectorType>
class DiagonalWrapper class DiagonalWrapper
: public DiagonalBase<DiagonalWrapper<_DiagonalVectorType> >, ei_no_assignment_operator : public DiagonalBase<DiagonalWrapper<_DiagonalVectorType> >, internal::no_assignment_operator
{ {
public: public:
#ifndef EIGEN_PARSED_BY_DOXYGEN #ifndef EIGEN_PARSED_BY_DOXYGEN
@@ -245,7 +270,7 @@ class DiagonalWrapper
* \sa class DiagonalWrapper, class DiagonalMatrix, diagonal(), isDiagonal() * \sa class DiagonalWrapper, class DiagonalMatrix, diagonal(), isDiagonal()
**/ **/
template<typename Derived> template<typename Derived>
inline const DiagonalWrapper<Derived> inline const DiagonalWrapper<const Derived>
MatrixBase<Derived>::asDiagonal() const MatrixBase<Derived>::asDiagonal() const
{ {
return derived(); return derived();
@@ -260,21 +285,20 @@ MatrixBase<Derived>::asDiagonal() const
* \sa asDiagonal() * \sa asDiagonal()
*/ */
template<typename Derived> template<typename Derived>
bool MatrixBase<Derived>::isDiagonal bool MatrixBase<Derived>::isDiagonal(RealScalar prec) const
(RealScalar prec) const
{ {
if(cols() != rows()) return false; if(cols() != rows()) return false;
RealScalar maxAbsOnDiagonal = static_cast<RealScalar>(-1); RealScalar maxAbsOnDiagonal = static_cast<RealScalar>(-1);
for(Index j = 0; j < cols(); ++j) for(Index j = 0; j < cols(); ++j)
{ {
RealScalar absOnDiagonal = ei_abs(coeff(j,j)); RealScalar absOnDiagonal = internal::abs(coeff(j,j));
if(absOnDiagonal > maxAbsOnDiagonal) maxAbsOnDiagonal = absOnDiagonal; if(absOnDiagonal > maxAbsOnDiagonal) maxAbsOnDiagonal = absOnDiagonal;
} }
for(Index j = 0; j < cols(); ++j) for(Index j = 0; j < cols(); ++j)
for(Index i = 0; i < j; ++i) for(Index i = 0; i < j; ++i)
{ {
if(!ei_isMuchSmallerThan(coeff(i, j), maxAbsOnDiagonal, prec)) return false; if(!internal::isMuchSmallerThan(coeff(i, j), maxAbsOnDiagonal, prec)) return false;
if(!ei_isMuchSmallerThan(coeff(j, i), maxAbsOnDiagonal, prec)) return false; if(!internal::isMuchSmallerThan(coeff(j, i), maxAbsOnDiagonal, prec)) return false;
} }
return true; return true;
} }

View File

@@ -26,24 +26,34 @@
#ifndef EIGEN_DIAGONALPRODUCT_H #ifndef EIGEN_DIAGONALPRODUCT_H
#define EIGEN_DIAGONALPRODUCT_H #define EIGEN_DIAGONALPRODUCT_H
namespace internal {
template<typename MatrixType, typename DiagonalType, int ProductOrder> template<typename MatrixType, typename DiagonalType, int ProductOrder>
struct ei_traits<DiagonalProduct<MatrixType, DiagonalType, ProductOrder> > struct traits<DiagonalProduct<MatrixType, DiagonalType, ProductOrder> >
: ei_traits<MatrixType> : traits<MatrixType>
{ {
typedef typename ei_scalar_product_traits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar; typedef typename scalar_product_traits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar;
enum { enum {
RowsAtCompileTime = MatrixType::RowsAtCompileTime, RowsAtCompileTime = MatrixType::RowsAtCompileTime,
ColsAtCompileTime = MatrixType::ColsAtCompileTime, ColsAtCompileTime = MatrixType::ColsAtCompileTime,
MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
Flags = (HereditaryBits & (unsigned int)(MatrixType::Flags))
| (PacketAccessBit & (unsigned int)(MatrixType::Flags) & (unsigned int)(DiagonalType::DiagonalVectorType::Flags)), _StorageOrder = MatrixType::Flags & RowMajorBit ? RowMajor : ColMajor,
_PacketOnDiag = !((int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheLeft)
||(int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheRight)),
_SameTypes = is_same<typename MatrixType::Scalar, typename DiagonalType::Scalar>::value,
// FIXME currently we need same types, but in the future the next rule should be the one
//_Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagonalType::Flags)&PacketAccessBit))),
_Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && _SameTypes && ((!_PacketOnDiag) || (bool(int(DiagonalType::Flags)&PacketAccessBit))),
Flags = (HereditaryBits & (unsigned int)(MatrixType::Flags)) | (_Vectorizable ? PacketAccessBit : 0),
CoeffReadCost = NumTraits<Scalar>::MulCost + MatrixType::CoeffReadCost + DiagonalType::DiagonalVectorType::CoeffReadCost CoeffReadCost = NumTraits<Scalar>::MulCost + MatrixType::CoeffReadCost + DiagonalType::DiagonalVectorType::CoeffReadCost
}; };
}; };
}
template<typename MatrixType, typename DiagonalType, int ProductOrder> template<typename MatrixType, typename DiagonalType, int ProductOrder>
class DiagonalProduct : ei_no_assignment_operator, class DiagonalProduct : internal::no_assignment_operator,
public MatrixBase<DiagonalProduct<MatrixType, DiagonalType, ProductOrder> > public MatrixBase<DiagonalProduct<MatrixType, DiagonalType, ProductOrder> >
{ {
public: public:
@@ -54,7 +64,7 @@ class DiagonalProduct : ei_no_assignment_operator,
inline DiagonalProduct(const MatrixType& matrix, const DiagonalType& diagonal) inline DiagonalProduct(const MatrixType& matrix, const DiagonalType& diagonal)
: m_matrix(matrix), m_diagonal(diagonal) : m_matrix(matrix), m_diagonal(diagonal)
{ {
ei_assert(diagonal.diagonal().size() == (ProductOrder == OnTheLeft ? matrix.rows() : matrix.cols())); eigen_assert(diagonal.diagonal().size() == (ProductOrder == OnTheLeft ? matrix.rows() : matrix.cols()));
} }
inline Index rows() const { return m_matrix.rows(); } inline Index rows() const { return m_matrix.rows(); }
@@ -69,26 +79,34 @@ class DiagonalProduct : ei_no_assignment_operator,
EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const
{ {
enum { enum {
StorageOrder = Flags & RowMajorBit ? RowMajor : ColMajor, StorageOrder = Flags & RowMajorBit ? RowMajor : ColMajor
InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime,
DiagonalVectorPacketLoadMode = (LoadMode == Aligned && ((InnerSize%16) == 0)) ? Aligned : Unaligned
}; };
const Index indexInDiagonalVector = ProductOrder == OnTheLeft ? row : col; const Index indexInDiagonalVector = ProductOrder == OnTheLeft ? row : col;
if((int(StorageOrder) == RowMajor && int(ProductOrder) == OnTheLeft) return packet_impl<LoadMode>(row,col,indexInDiagonalVector,typename internal::conditional<
||(int(StorageOrder) == ColMajor && int(ProductOrder) == OnTheRight)) ((int(StorageOrder) == RowMajor && int(ProductOrder) == OnTheLeft)
{ ||(int(StorageOrder) == ColMajor && int(ProductOrder) == OnTheRight)), internal::true_type, internal::false_type>::type());
return ei_pmul(m_matrix.template packet<LoadMode>(row, col),
ei_pset1(m_diagonal.diagonal().coeff(indexInDiagonalVector)));
}
else
{
return ei_pmul(m_matrix.template packet<LoadMode>(row, col),
m_diagonal.diagonal().template packet<DiagonalVectorPacketLoadMode>(indexInDiagonalVector));
}
} }
protected: protected:
template<int LoadMode>
EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, internal::true_type) const
{
return internal::pmul(m_matrix.template packet<LoadMode>(row, col),
internal::pset1<PacketScalar>(m_diagonal.diagonal().coeff(id)));
}
template<int LoadMode>
EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, internal::false_type) const
{
enum {
InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime,
DiagonalVectorPacketLoadMode = (LoadMode == Aligned && ((InnerSize%16) == 0)) ? Aligned : Unaligned
};
return internal::pmul(m_matrix.template packet<LoadMode>(row, col),
m_diagonal.diagonal().template packet<DiagonalVectorPacketLoadMode>(id));
}
const typename MatrixType::Nested m_matrix; const typename MatrixType::Nested m_matrix;
const typename DiagonalType::Nested m_diagonal; const typename DiagonalType::Nested m_diagonal;
}; };

View File

@@ -25,6 +25,8 @@
#ifndef EIGEN_DOT_H #ifndef EIGEN_DOT_H
#define EIGEN_DOT_H #define EIGEN_DOT_H
namespace internal {
// helper function for dot(). The problem is that if we put that in the body of dot(), then upon calling dot // helper function for dot(). The problem is that if we put that in the body of dot(), then upon calling dot
// with mismatched types, the compiler emits errors about failing to instantiate cwiseProduct BEFORE // with mismatched types, the compiler emits errors about failing to instantiate cwiseProduct BEFORE
// looking at the static assertions. Thus this is a trick to get better compile errors. // looking at the static assertions. Thus this is a trick to get better compile errors.
@@ -37,23 +39,27 @@ template<typename T, typename U,
// revert to || as soon as not needed anymore. // revert to || as soon as not needed anymore.
(int(T::ColsAtCompileTime) == 1 && int(U::RowsAtCompileTime) == 1)) (int(T::ColsAtCompileTime) == 1 && int(U::RowsAtCompileTime) == 1))
> >
struct ei_dot_nocheck struct dot_nocheck
{ {
static inline typename ei_traits<T>::Scalar run(const MatrixBase<T>& a, const MatrixBase<U>& b) typedef typename scalar_product_traits<typename traits<T>::Scalar,typename traits<U>::Scalar>::ReturnType ResScalar;
static inline ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
{ {
return a.conjugate().cwiseProduct(b).sum(); return a.template binaryExpr<scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> >(b).sum();
} }
}; };
template<typename T, typename U> template<typename T, typename U>
struct ei_dot_nocheck<T, U, true> struct dot_nocheck<T, U, true>
{ {
static inline typename ei_traits<T>::Scalar run(const MatrixBase<T>& a, const MatrixBase<U>& b) typedef typename scalar_product_traits<typename traits<T>::Scalar,typename traits<U>::Scalar>::ReturnType ResScalar;
static inline ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
{ {
return a.adjoint().cwiseProduct(b).sum(); return a.transpose().template binaryExpr<scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> >(b).sum();
} }
}; };
} // end namespace internal
/** \returns the dot product of *this with other. /** \returns the dot product of *this with other.
* *
* \only_for_vectors * \only_for_vectors
@@ -66,19 +72,47 @@ struct ei_dot_nocheck<T, U, true>
*/ */
template<typename Derived> template<typename Derived>
template<typename OtherDerived> template<typename OtherDerived>
typename ei_traits<Derived>::Scalar typename internal::scalar_product_traits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived) EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived)
EIGEN_STATIC_ASSERT((ei_is_same_type<Scalar, typename OtherDerived::Scalar>::ret), typedef internal::scalar_conj_product_op<Scalar,typename OtherDerived::Scalar> func;
EIGEN_CHECK_BINARY_COMPATIBILIY(func,Scalar,typename OtherDerived::Scalar);
eigen_assert(size() == other.size());
return internal::dot_nocheck<Derived,OtherDerived>::run(*this, other);
}
#ifdef EIGEN2_SUPPORT
/** \returns the dot product of *this with other, with the Eigen2 convention that the dot product is linear in the first variable
* (conjugating the second variable). Of course this only makes a difference in the complex case.
*
* This method is only available in EIGEN2_SUPPORT mode.
*
* \only_for_vectors
*
* \sa dot()
*/
template<typename Derived>
template<typename OtherDerived>
typename internal::traits<Derived>::Scalar
MatrixBase<Derived>::eigen2_dot(const MatrixBase<OtherDerived>& other) const
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived)
EIGEN_STATIC_ASSERT((internal::is_same<Scalar, typename OtherDerived::Scalar>::value),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
ei_assert(size() == other.size()); eigen_assert(size() == other.size());
return ei_dot_nocheck<Derived,OtherDerived>::run(*this, other); return internal::dot_nocheck<OtherDerived,Derived>::run(other,*this);
} }
#endif
//---------- implementation of L2 norm and related functions ---------- //---------- implementation of L2 norm and related functions ----------
@@ -87,9 +121,9 @@ MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
* \sa dot(), norm() * \sa dot(), norm()
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE typename NumTraits<typename ei_traits<Derived>::Scalar>::Real MatrixBase<Derived>::squaredNorm() const EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::squaredNorm() const
{ {
return ei_real((*this).cwiseAbs2().sum()); return internal::real((*this).cwiseAbs2().sum());
} }
/** \returns the \em l2 norm of *this, i.e., for vectors, the square root of the dot product of *this with itself. /** \returns the \em l2 norm of *this, i.e., for vectors, the square root of the dot product of *this with itself.
@@ -97,9 +131,9 @@ EIGEN_STRONG_INLINE typename NumTraits<typename ei_traits<Derived>::Scalar>::Rea
* \sa dot(), squaredNorm() * \sa dot(), squaredNorm()
*/ */
template<typename Derived> template<typename Derived>
inline typename NumTraits<typename ei_traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const
{ {
return ei_sqrt(squaredNorm()); return internal::sqrt(squaredNorm());
} }
/** \returns an expression of the quotient of *this by its own norm. /** \returns an expression of the quotient of *this by its own norm.
@@ -112,8 +146,8 @@ template<typename Derived>
inline const typename MatrixBase<Derived>::PlainObject inline const typename MatrixBase<Derived>::PlainObject
MatrixBase<Derived>::normalized() const MatrixBase<Derived>::normalized() const
{ {
typedef typename ei_nested<Derived>::type Nested; typedef typename internal::nested<Derived>::type Nested;
typedef typename ei_unref<Nested>::type _Nested; typedef typename internal::remove_reference<Nested>::type _Nested;
_Nested n(derived()); _Nested n(derived());
return n / n.norm(); return n / n.norm();
} }
@@ -132,55 +166,59 @@ inline void MatrixBase<Derived>::normalize()
//---------- implementation of other norms ---------- //---------- implementation of other norms ----------
namespace internal {
template<typename Derived, int p> template<typename Derived, int p>
struct ei_lpNorm_selector struct lpNorm_selector
{ {
typedef typename NumTraits<typename ei_traits<Derived>::Scalar>::Real RealScalar; typedef typename NumTraits<typename traits<Derived>::Scalar>::Real RealScalar;
inline static RealScalar run(const MatrixBase<Derived>& m) inline static RealScalar run(const MatrixBase<Derived>& m)
{ {
return ei_pow(m.cwiseAbs().array().pow(p).sum(), RealScalar(1)/p); return pow(m.cwiseAbs().array().pow(p).sum(), RealScalar(1)/p);
} }
}; };
template<typename Derived> template<typename Derived>
struct ei_lpNorm_selector<Derived, 1> struct lpNorm_selector<Derived, 1>
{ {
inline static typename NumTraits<typename ei_traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m) inline static typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
{ {
return m.cwiseAbs().sum(); return m.cwiseAbs().sum();
} }
}; };
template<typename Derived> template<typename Derived>
struct ei_lpNorm_selector<Derived, 2> struct lpNorm_selector<Derived, 2>
{ {
inline static typename NumTraits<typename ei_traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m) inline static typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
{ {
return m.norm(); return m.norm();
} }
}; };
template<typename Derived> template<typename Derived>
struct ei_lpNorm_selector<Derived, Infinity> struct lpNorm_selector<Derived, Infinity>
{ {
inline static typename NumTraits<typename ei_traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m) inline static typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
{ {
return m.cwiseAbs().maxCoeff(); return m.cwiseAbs().maxCoeff();
} }
}; };
} // end namespace internal
/** \returns the \f$ \ell^p \f$ norm of *this, that is, returns the p-th root of the sum of the p-th powers of the absolute values /** \returns the \f$ \ell^p \f$ norm of *this, that is, returns the p-th root of the sum of the p-th powers of the absolute values
* of the coefficients of *this. If \a p is the special value \a Eigen::Infinity, this function returns the \f$ \ell^p\infty \f$ * of the coefficients of *this. If \a p is the special value \a Eigen::Infinity, this function returns the \f$ \ell^\infty \f$
* norm, that is the maximum of the absolute values of the coefficients of *this. * norm, that is the maximum of the absolute values of the coefficients of *this.
* *
* \sa norm() * \sa norm()
*/ */
template<typename Derived> template<typename Derived>
template<int p> template<int p>
inline typename NumTraits<typename ei_traits<Derived>::Scalar>::Real inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
MatrixBase<Derived>::lpNorm() const MatrixBase<Derived>::lpNorm() const
{ {
return ei_lpNorm_selector<Derived, p>::run(*this); return internal::lpNorm_selector<Derived, p>::run(*this);
} }
//---------- implementation of isOrthogonal / isUnitary ---------- //---------- implementation of isOrthogonal / isUnitary ----------
@@ -196,9 +234,9 @@ template<typename OtherDerived>
bool MatrixBase<Derived>::isOrthogonal bool MatrixBase<Derived>::isOrthogonal
(const MatrixBase<OtherDerived>& other, RealScalar prec) const (const MatrixBase<OtherDerived>& other, RealScalar prec) const
{ {
typename ei_nested<Derived,2>::type nested(derived()); typename internal::nested<Derived,2>::type nested(derived());
typename ei_nested<OtherDerived,2>::type otherNested(other.derived()); typename internal::nested<OtherDerived,2>::type otherNested(other.derived());
return ei_abs2(nested.dot(otherNested)) <= prec * prec * nested.squaredNorm() * otherNested.squaredNorm(); return internal::abs2(nested.dot(otherNested)) <= prec * prec * nested.squaredNorm() * otherNested.squaredNorm();
} }
/** \returns true if *this is approximately an unitary matrix, /** \returns true if *this is approximately an unitary matrix,
@@ -218,10 +256,10 @@ bool MatrixBase<Derived>::isUnitary(RealScalar prec) const
typename Derived::Nested nested(derived()); typename Derived::Nested nested(derived());
for(Index i = 0; i < cols(); ++i) for(Index i = 0; i < cols(); ++i)
{ {
if(!ei_isApprox(nested.col(i).squaredNorm(), static_cast<RealScalar>(1), prec)) if(!internal::isApprox(nested.col(i).squaredNorm(), static_cast<RealScalar>(1), prec))
return false; return false;
for(Index j = 0; j < i; ++j) for(Index j = 0; j < i; ++j)
if(!ei_isMuchSmallerThan(nested.col(i).dot(nested.col(j)), static_cast<Scalar>(1), prec)) if(!internal::isMuchSmallerThan(nested.col(i).dot(nested.col(j)), static_cast<Scalar>(1), prec))
return false; return false;
} }
return true; return true;

View File

@@ -34,13 +34,15 @@
* Besides MatrixBase-derived classes, this also includes special matrix classes such as diagonal matrices, etc. * Besides MatrixBase-derived classes, this also includes special matrix classes such as diagonal matrices, etc.
* *
* Notice that this class is trivial, it is only used to disambiguate overloaded functions. * Notice that this class is trivial, it is only used to disambiguate overloaded functions.
*
* \sa \ref TopicClassHierarchy
*/ */
template<typename Derived> struct EigenBase template<typename Derived> struct EigenBase
{ {
// typedef typename ei_plain_matrix_type<Derived>::type PlainObject; // typedef typename internal::plain_matrix_type<Derived>::type PlainObject;
typedef typename ei_traits<Derived>::StorageKind StorageKind; typedef typename internal::traits<Derived>::StorageKind StorageKind;
typedef typename ei_traits<Derived>::Index Index; typedef typename internal::traits<Derived>::Index Index;
/** \returns a reference to the derived object */ /** \returns a reference to the derived object */
Derived& derived() { return *static_cast<Derived*>(this); } Derived& derived() { return *static_cast<Derived*>(this); }
@@ -49,6 +51,8 @@ template<typename Derived> struct EigenBase
inline Derived& const_cast_derived() const inline Derived& const_cast_derived() const
{ return *static_cast<Derived*>(const_cast<EigenBase*>(this)); } { return *static_cast<Derived*>(const_cast<EigenBase*>(this)); }
inline const Derived& const_derived() const
{ return *static_cast<const Derived*>(this); }
/** \returns the number of rows. \sa cols(), RowsAtCompileTime */ /** \returns the number of rows. \sa cols(), RowsAtCompileTime */
inline Index rows() const { return derived().rows(); } inline Index rows() const { return derived().rows(); }

View File

@@ -26,6 +26,7 @@
#define EIGEN_FLAGGED_H #define EIGEN_FLAGGED_H
/** \class Flagged /** \class Flagged
* \ingroup Core_Module
* *
* \brief Expression with modified flags * \brief Expression with modified flags
* *
@@ -39,11 +40,14 @@
* *
* \sa MatrixBase::flagged() * \sa MatrixBase::flagged()
*/ */
namespace internal {
template<typename ExpressionType, unsigned int Added, unsigned int Removed> template<typename ExpressionType, unsigned int Added, unsigned int Removed>
struct ei_traits<Flagged<ExpressionType, Added, Removed> > : ei_traits<ExpressionType> struct traits<Flagged<ExpressionType, Added, Removed> > : traits<ExpressionType>
{ {
enum { Flags = (ExpressionType::Flags | Added) & ~Removed }; enum { Flags = (ExpressionType::Flags | Added) & ~Removed };
}; };
}
template<typename ExpressionType, unsigned int Added, unsigned int Removed> class Flagged template<typename ExpressionType, unsigned int Added, unsigned int Removed> class Flagged
: public MatrixBase<Flagged<ExpressionType, Added, Removed> > : public MatrixBase<Flagged<ExpressionType, Added, Removed> >
@@ -51,9 +55,10 @@ template<typename ExpressionType, unsigned int Added, unsigned int Removed> clas
public: public:
typedef MatrixBase<Flagged> Base; typedef MatrixBase<Flagged> Base;
EIGEN_DENSE_PUBLIC_INTERFACE(Flagged) EIGEN_DENSE_PUBLIC_INTERFACE(Flagged)
typedef typename ei_meta_if<ei_must_nest_by_value<ExpressionType>::ret, typedef typename internal::conditional<internal::must_nest_by_value<ExpressionType>::ret,
ExpressionType, const ExpressionType&>::ret ExpressionTypeNested; ExpressionType, const ExpressionType&>::type ExpressionTypeNested;
typedef typename ExpressionType::InnerIterator InnerIterator; typedef typename ExpressionType::InnerIterator InnerIterator;
inline Flagged(const ExpressionType& matrix) : m_matrix(matrix) {} inline Flagged(const ExpressionType& matrix) : m_matrix(matrix) {}
@@ -63,21 +68,31 @@ template<typename ExpressionType, unsigned int Added, unsigned int Removed> clas
inline Index outerStride() const { return m_matrix.outerStride(); } inline Index outerStride() const { return m_matrix.outerStride(); }
inline Index innerStride() const { return m_matrix.innerStride(); } inline Index innerStride() const { return m_matrix.innerStride(); }
inline const Scalar coeff(Index row, Index col) const inline CoeffReturnType coeff(Index row, Index col) const
{ {
return m_matrix.coeff(row, col); return m_matrix.coeff(row, col);
} }
inline CoeffReturnType coeff(Index index) const
{
return m_matrix.coeff(index);
}
inline const Scalar& coeffRef(Index row, Index col) const
{
return m_matrix.const_cast_derived().coeffRef(row, col);
}
inline const Scalar& coeffRef(Index index) const
{
return m_matrix.const_cast_derived().coeffRef(index);
}
inline Scalar& coeffRef(Index row, Index col) inline Scalar& coeffRef(Index row, Index col)
{ {
return m_matrix.const_cast_derived().coeffRef(row, col); return m_matrix.const_cast_derived().coeffRef(row, col);
} }
inline const Scalar coeff(Index index) const
{
return m_matrix.coeff(index);
}
inline Scalar& coeffRef(Index index) inline Scalar& coeffRef(Index index)
{ {
return m_matrix.const_cast_derived().coeffRef(index); return m_matrix.const_cast_derived().coeffRef(index);

View File

@@ -26,6 +26,7 @@
#define EIGEN_FORCEALIGNEDACCESS_H #define EIGEN_FORCEALIGNEDACCESS_H
/** \class ForceAlignedAccess /** \class ForceAlignedAccess
* \ingroup Core_Module
* *
* \brief Enforce aligned packet loads and stores regardless of what is requested * \brief Enforce aligned packet loads and stores regardless of what is requested
* *
@@ -36,16 +37,19 @@
* *
* \sa MatrixBase::forceAlignedAccess() * \sa MatrixBase::forceAlignedAccess()
*/ */
namespace internal {
template<typename ExpressionType> template<typename ExpressionType>
struct ei_traits<ForceAlignedAccess<ExpressionType> > : public ei_traits<ExpressionType> struct traits<ForceAlignedAccess<ExpressionType> > : public traits<ExpressionType>
{}; {};
}
template<typename ExpressionType> class ForceAlignedAccess template<typename ExpressionType> class ForceAlignedAccess
: public ei_dense_xpr_base< ForceAlignedAccess<ExpressionType> >::type : public internal::dense_xpr_base< ForceAlignedAccess<ExpressionType> >::type
{ {
public: public:
typedef typename ei_dense_xpr_base<ForceAlignedAccess>::type Base; typedef typename internal::dense_xpr_base<ForceAlignedAccess>::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(ForceAlignedAccess) EIGEN_DENSE_PUBLIC_INTERFACE(ForceAlignedAccess)
inline ForceAlignedAccess(const ExpressionType& matrix) : m_expression(matrix) {} inline ForceAlignedAccess(const ExpressionType& matrix) : m_expression(matrix) {}
@@ -133,7 +137,7 @@ MatrixBase<Derived>::forceAlignedAccess()
*/ */
template<typename Derived> template<typename Derived>
template<bool Enable> template<bool Enable>
inline typename ei_makeconst<typename ei_meta_if<Enable,ForceAlignedAccess<Derived>,Derived&>::ret>::type inline typename internal::add_const_on_value_type<typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type>::type
MatrixBase<Derived>::forceAlignedAccessIf() const MatrixBase<Derived>::forceAlignedAccessIf() const
{ {
return derived(); return derived();
@@ -144,7 +148,7 @@ MatrixBase<Derived>::forceAlignedAccessIf() const
*/ */
template<typename Derived> template<typename Derived>
template<bool Enable> template<bool Enable>
inline typename ei_meta_if<Enable,ForceAlignedAccess<Derived>,Derived&>::ret inline typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type
MatrixBase<Derived>::forceAlignedAccessIf() MatrixBase<Derived>::forceAlignedAccessIf()
{ {
return derived(); return derived();

File diff suppressed because it is too large Load Diff

View File

@@ -26,9 +26,67 @@
#ifndef EIGEN_FUZZY_H #ifndef EIGEN_FUZZY_H
#define EIGEN_FUZZY_H #define EIGEN_FUZZY_H
// TODO support small integer types properly i.e. do exact compare on coeffs --- taking a HS norm is guaranteed to cause integer overflow. namespace internal
{
template<typename Derived, typename OtherDerived, bool is_integer = NumTraits<typename Derived::Scalar>::IsInteger>
struct isApprox_selector
{
static bool run(const Derived& x, const OtherDerived& y, typename Derived::RealScalar prec)
{
const typename internal::nested<Derived,2>::type nested(x);
const typename internal::nested<OtherDerived,2>::type otherNested(y);
return (nested - otherNested).cwiseAbs2().sum() <= prec * prec * std::min(nested.cwiseAbs2().sum(), otherNested.cwiseAbs2().sum());
}
};
template<typename Derived, typename OtherDerived>
struct isApprox_selector<Derived, OtherDerived, true>
{
static bool run(const Derived& x, const OtherDerived& y, typename Derived::RealScalar)
{
return x.matrix() == y.matrix();
}
};
template<typename Derived, typename OtherDerived, bool is_integer = NumTraits<typename Derived::Scalar>::IsInteger>
struct isMuchSmallerThan_object_selector
{
static bool run(const Derived& x, const OtherDerived& y, typename Derived::RealScalar prec)
{
return x.cwiseAbs2().sum() <= abs2(prec) * y.cwiseAbs2().sum();
}
};
template<typename Derived, typename OtherDerived>
struct isMuchSmallerThan_object_selector<Derived, OtherDerived, true>
{
static bool run(const Derived& x, const OtherDerived&, typename Derived::RealScalar)
{
return x.matrix() == Derived::Zero(x.rows(), x.cols()).matrix();
}
};
template<typename Derived, bool is_integer = NumTraits<typename Derived::Scalar>::IsInteger>
struct isMuchSmallerThan_scalar_selector
{
static bool run(const Derived& x, const typename Derived::RealScalar& y, typename Derived::RealScalar prec)
{
return x.cwiseAbs2().sum() <= abs2(prec * y);
}
};
template<typename Derived>
struct isMuchSmallerThan_scalar_selector<Derived, true>
{
static bool run(const Derived& x, const typename Derived::RealScalar&, typename Derived::RealScalar)
{
return x.matrix() == Derived::Zero(x.rows(), x.cols()).matrix();
}
};
} // end namespace internal
#ifndef EIGEN_LEGACY_COMPARES
/** \returns \c true if \c *this is approximately equal to \a other, within the precision /** \returns \c true if \c *this is approximately equal to \a other, within the precision
* determined by \a prec. * determined by \a prec.
@@ -42,10 +100,10 @@
* \note Because of the multiplicativeness of this comparison, one can't use this function * \note Because of the multiplicativeness of this comparison, one can't use this function
* to check whether \c *this is approximately equal to the zero matrix or vector. * to check whether \c *this is approximately equal to the zero matrix or vector.
* Indeed, \c isApprox(zero) returns false unless \c *this itself is exactly the zero matrix * Indeed, \c isApprox(zero) returns false unless \c *this itself is exactly the zero matrix
* or vector. If you want to test whether \c *this is zero, use ei_isMuchSmallerThan(const * or vector. If you want to test whether \c *this is zero, use internal::isMuchSmallerThan(const
* RealScalar&, RealScalar) instead. * RealScalar&, RealScalar) instead.
* *
* \sa ei_isMuchSmallerThan(const RealScalar&, RealScalar) const * \sa internal::isMuchSmallerThan(const RealScalar&, RealScalar) const
*/ */
template<typename Derived> template<typename Derived>
template<typename OtherDerived> template<typename OtherDerived>
@@ -54,12 +112,7 @@ bool DenseBase<Derived>::isApprox(
RealScalar prec RealScalar prec
) const ) const
{ {
const typename ei_nested<Derived,2>::type nested(derived()); return internal::isApprox_selector<Derived, OtherDerived>::run(derived(), other.derived(), prec);
const typename ei_nested<OtherDerived,2>::type otherNested(other.derived());
// std::cerr << typeid(Derived).name() << " => " << typeid(typename ei_nested<Derived,2>::type).name() << "\n";
// std::cerr << typeid(OtherDerived).name() << " => " << typeid(typename ei_nested<OtherDerived,2>::type).name() << "\n";
// return false;
return (nested - otherNested).cwiseAbs2().sum() <= prec * prec * std::min(nested.cwiseAbs2().sum(), otherNested.cwiseAbs2().sum());
} }
/** \returns \c true if the norm of \c *this is much smaller than \a other, /** \returns \c true if the norm of \c *this is much smaller than \a other,
@@ -81,7 +134,7 @@ bool DenseBase<Derived>::isMuchSmallerThan(
RealScalar prec RealScalar prec
) const ) const
{ {
return derived().cwiseAbs2().sum() <= prec * prec * other * other; return internal::isMuchSmallerThan_scalar_selector<Derived>::run(derived(), other, prec);
} }
/** \returns \c true if the norm of \c *this is much smaller than the norm of \a other, /** \returns \c true if the norm of \c *this is much smaller than the norm of \a other,
@@ -101,140 +154,7 @@ bool DenseBase<Derived>::isMuchSmallerThan(
RealScalar prec RealScalar prec
) const ) const
{ {
return derived().cwiseAbs2().sum() <= prec * prec * other.derived().cwiseAbs2().sum(); return internal::isMuchSmallerThan_object_selector<Derived, OtherDerived>::run(derived(), other.derived(), prec);
} }
#else
template<typename Derived, typename OtherDerived=Derived, bool IsVector=Derived::IsVectorAtCompileTime>
struct ei_fuzzy_selector;
/** \returns \c true if \c *this is approximately equal to \a other, within the precision
* determined by \a prec.
*
* \note The fuzzy compares are done multiplicatively. Two vectors \f$ v \f$ and \f$ w \f$
* are considered to be approximately equal within precision \f$ p \f$ if
* \f[ \Vert v - w \Vert \leqslant p\,\min(\Vert v\Vert, \Vert w\Vert). \f]
* For matrices, the comparison is done on all columns.
*
* \note Because of the multiplicativeness of this comparison, one can't use this function
* to check whether \c *this is approximately equal to the zero matrix or vector.
* Indeed, \c isApprox(zero) returns false unless \c *this itself is exactly the zero matrix
* or vector. If you want to test whether \c *this is zero, use ei_isMuchSmallerThan(const
* RealScalar&, RealScalar) instead.
*
* \sa ei_isMuchSmallerThan(const RealScalar&, RealScalar) const
*/
template<typename Derived>
template<typename OtherDerived>
bool DenseBase<Derived>::isApprox(
const DenseBase<OtherDerived>& other,
RealScalar prec
) const
{
return ei_fuzzy_selector<Derived,OtherDerived>::isApprox(derived(), other.derived(), prec);
}
/** \returns \c true if the norm of \c *this is much smaller than \a other,
* within the precision determined by \a prec.
*
* \note The fuzzy compares are done multiplicatively. A vector \f$ v \f$ is
* considered to be much smaller than \f$ x \f$ within precision \f$ p \f$ if
* \f[ \Vert v \Vert \leqslant p\,\vert x\vert. \f]
* For matrices, the comparison is done on all columns.
*
* \sa isApprox(), isMuchSmallerThan(const DenseBase<OtherDerived>&, RealScalar) const
*/
template<typename Derived>
bool DenseBase<Derived>::isMuchSmallerThan(
const typename NumTraits<Scalar>::Real& other,
RealScalar prec
) const
{
return ei_fuzzy_selector<Derived>::isMuchSmallerThan(derived(), other, prec);
}
/** \returns \c true if the norm of \c *this is much smaller than the norm of \a other,
* within the precision determined by \a prec.
*
* \note The fuzzy compares are done multiplicatively. A vector \f$ v \f$ is
* considered to be much smaller than a vector \f$ w \f$ within precision \f$ p \f$ if
* \f[ \Vert v \Vert \leqslant p\,\Vert w\Vert. \f]
* For matrices, the comparison is done on all columns.
*
* \sa isApprox(), isMuchSmallerThan(const RealScalar&, RealScalar) const
*/
template<typename Derived>
template<typename OtherDerived>
bool DenseBase<Derived>::isMuchSmallerThan(
const DenseBase<OtherDerived>& other,
RealScalar prec
) const
{
return ei_fuzzy_selector<Derived,OtherDerived>::isMuchSmallerThan(derived(), other.derived(), prec);
}
template<typename Derived, typename OtherDerived>
struct ei_fuzzy_selector<Derived,OtherDerived,true>
{
typedef typename Derived::RealScalar RealScalar;
static bool isApprox(const Derived& self, const OtherDerived& other, RealScalar prec)
{
EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived)
ei_assert(self.size() == other.size());
return((self - other).squaredNorm() <= std::min(self.squaredNorm(), other.squaredNorm()) * prec * prec);
}
static bool isMuchSmallerThan(const Derived& self, const RealScalar& other, RealScalar prec)
{
return(self.squaredNorm() <= ei_abs2(other * prec));
}
static bool isMuchSmallerThan(const Derived& self, const OtherDerived& other, RealScalar prec)
{
EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived)
ei_assert(self.size() == other.size());
return(self.squaredNorm() <= other.squaredNorm() * prec * prec);
}
};
template<typename Derived, typename OtherDerived>
struct ei_fuzzy_selector<Derived,OtherDerived,false>
{
typedef typename Derived::RealScalar RealScalar;
typedef typename Derived::Index Index;
static bool isApprox(const Derived& self, const OtherDerived& other, RealScalar prec)
{
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived)
ei_assert(self.rows() == other.rows() && self.cols() == other.cols());
typename Derived::Nested nested(self);
typename OtherDerived::Nested otherNested(other);
for(Index i = 0; i < self.cols(); ++i)
if((nested.col(i) - otherNested.col(i)).squaredNorm()
> std::min(nested.col(i).squaredNorm(), otherNested.col(i).squaredNorm()) * prec * prec)
return false;
return true;
}
static bool isMuchSmallerThan(const Derived& self, const RealScalar& other, RealScalar prec)
{
typename Derived::Nested nested(self);
for(Index i = 0; i < self.cols(); ++i)
if(nested.col(i).squaredNorm() > ei_abs2(other * prec))
return false;
return true;
}
static bool isMuchSmallerThan(const Derived& self, const OtherDerived& other, RealScalar prec)
{
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived)
ei_assert(self.rows() == other.rows() && self.cols() == other.cols());
typename Derived::Nested nested(self);
typename OtherDerived::Nested otherNested(other);
for(Index i = 0; i < self.cols(); ++i)
if(nested.col(i).squaredNorm() > otherNested.col(i).squaredNorm() * prec * prec)
return false;
return true;
}
};
#endif
#endif // EIGEN_FUZZY_H #endif // EIGEN_FUZZY_H

View File

@@ -26,6 +26,8 @@
#ifndef EIGEN_GENERIC_PACKET_MATH_H #ifndef EIGEN_GENERIC_PACKET_MATH_H
#define EIGEN_GENERIC_PACKET_MATH_H #define EIGEN_GENERIC_PACKET_MATH_H
namespace internal {
/** \internal /** \internal
* \file GenericPacketMath.h * \file GenericPacketMath.h
* *
@@ -50,7 +52,7 @@
#define EIGEN_DEBUG_UNALIGNED_STORE #define EIGEN_DEBUG_UNALIGNED_STORE
#endif #endif
struct ei_default_packet_traits struct default_packet_traits
{ {
enum { enum {
HasAdd = 1, HasAdd = 1,
@@ -58,8 +60,11 @@ struct ei_default_packet_traits
HasMul = 1, HasMul = 1,
HasNegate = 1, HasNegate = 1,
HasAbs = 1, HasAbs = 1,
HasAbs2 = 1,
HasMin = 1, HasMin = 1,
HasMax = 1, HasMax = 1,
HasConj = 1,
HasSetLinear = 1,
HasDiv = 0, HasDiv = 0,
HasSqrt = 0, HasSqrt = 0,
@@ -76,101 +81,116 @@ struct ei_default_packet_traits
}; };
}; };
template<typename T> struct ei_packet_traits : ei_default_packet_traits template<typename T> struct packet_traits : default_packet_traits
{ {
typedef T type; typedef T type;
enum {size=1}; enum {
Vectorizable = 0,
size = 1,
AlignedOnScalar = 0
};
enum { enum {
HasAdd = 0, HasAdd = 0,
HasSub = 0, HasSub = 0,
HasMul = 0, HasMul = 0,
HasNegate = 0, HasNegate = 0,
HasAbs = 0, HasAbs = 0,
HasAbs2 = 0,
HasMin = 0, HasMin = 0,
HasMax = 0 HasMax = 0,
HasConj = 0,
HasSetLinear = 0
}; };
}; };
/** \internal \returns a + b (coeff-wise) */ /** \internal \returns a + b (coeff-wise) */
template<typename Packet> inline Packet template<typename Packet> inline Packet
ei_padd(const Packet& a, padd(const Packet& a,
const Packet& b) { return a+b; } const Packet& b) { return a+b; }
/** \internal \returns a - b (coeff-wise) */ /** \internal \returns a - b (coeff-wise) */
template<typename Packet> inline Packet template<typename Packet> inline Packet
ei_psub(const Packet& a, psub(const Packet& a,
const Packet& b) { return a-b; } const Packet& b) { return a-b; }
/** \internal \returns -a (coeff-wise) */ /** \internal \returns -a (coeff-wise) */
template<typename Packet> inline Packet template<typename Packet> inline Packet
ei_pnegate(const Packet& a) { return -a; } pnegate(const Packet& a) { return -a; }
/** \internal \returns conj(a) (coeff-wise) */
template<typename Packet> inline Packet
pconj(const Packet& a) { return conj(a); }
/** \internal \returns a * b (coeff-wise) */ /** \internal \returns a * b (coeff-wise) */
template<typename Packet> inline Packet template<typename Packet> inline Packet
ei_pmul(const Packet& a, pmul(const Packet& a,
const Packet& b) { return a*b; } const Packet& b) { return a*b; }
/** \internal \returns a / b (coeff-wise) */ /** \internal \returns a / b (coeff-wise) */
template<typename Packet> inline Packet template<typename Packet> inline Packet
ei_pdiv(const Packet& a, pdiv(const Packet& a,
const Packet& b) { return a/b; } const Packet& b) { return a/b; }
/** \internal \returns the min of \a a and \a b (coeff-wise) */ /** \internal \returns the min of \a a and \a b (coeff-wise) */
template<typename Packet> inline Packet template<typename Packet> inline Packet
ei_pmin(const Packet& a, pmin(const Packet& a,
const Packet& b) { return std::min(a, b); } const Packet& b) { return std::min(a, b); }
/** \internal \returns the max of \a a and \a b (coeff-wise) */ /** \internal \returns the max of \a a and \a b (coeff-wise) */
template<typename Packet> inline Packet template<typename Packet> inline Packet
ei_pmax(const Packet& a, pmax(const Packet& a,
const Packet& b) { return std::max(a, b); } const Packet& b) { return std::max(a, b); }
/** \internal \returns the absolute value of \a a */ /** \internal \returns the absolute value of \a a */
template<typename Packet> inline Packet template<typename Packet> inline Packet
ei_pabs(const Packet& a) { return ei_abs(a); } pabs(const Packet& a) { return abs(a); }
/** \internal \returns the bitwise and of \a a and \a b */ /** \internal \returns the bitwise and of \a a and \a b */
template<typename Packet> inline Packet template<typename Packet> inline Packet
ei_pand(const Packet& a, const Packet& b) { return a & b; } pand(const Packet& a, const Packet& b) { return a & b; }
/** \internal \returns the bitwise or of \a a and \a b */ /** \internal \returns the bitwise or of \a a and \a b */
template<typename Packet> inline Packet template<typename Packet> inline Packet
ei_por(const Packet& a, const Packet& b) { return a | b; } por(const Packet& a, const Packet& b) { return a | b; }
/** \internal \returns the bitwise xor of \a a and \a b */ /** \internal \returns the bitwise xor of \a a and \a b */
template<typename Packet> inline Packet template<typename Packet> inline Packet
ei_pxor(const Packet& a, const Packet& b) { return a ^ b; } pxor(const Packet& a, const Packet& b) { return a ^ b; }
/** \internal \returns the bitwise andnot of \a a and \a b */ /** \internal \returns the bitwise andnot of \a a and \a b */
template<typename Packet> inline Packet template<typename Packet> inline Packet
ei_pandnot(const Packet& a, const Packet& b) { return a & (!b); } pandnot(const Packet& a, const Packet& b) { return a & (!b); }
/** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */ /** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */
template<typename Scalar> inline typename ei_packet_traits<Scalar>::type template<typename Packet> inline Packet
ei_pload(const Scalar* from) { return *from; } pload(const typename unpacket_traits<Packet>::type* from) { return *from; }
/** \internal \returns a packet version of \a *from, (un-aligned load) */ /** \internal \returns a packet version of \a *from, (un-aligned load) */
template<typename Scalar> inline typename ei_packet_traits<Scalar>::type template<typename Packet> inline Packet
ei_ploadu(const Scalar* from) { return *from; } ploadu(const typename unpacket_traits<Packet>::type* from) { return *from; }
/** \internal \returns a packet with elements of \a *from duplicated, e.g.: (from[0],from[0],from[1],from[1]) */
template<typename Packet> inline Packet
ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; }
/** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */ /** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */
template<typename Scalar> inline typename ei_packet_traits<Scalar>::type template<typename Packet> inline Packet
ei_pset1(const Scalar& a) { return a; } pset1(const typename unpacket_traits<Packet>::type& a) { return a; }
/** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */ /** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */
template<typename Scalar> inline typename ei_packet_traits<Scalar>::type template<typename Scalar> inline typename packet_traits<Scalar>::type
ei_plset(const Scalar& a) { return a; } plset(const Scalar& a) { return a; }
/** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */ /** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */
template<typename Scalar, typename Packet> inline void ei_pstore(Scalar* to, const Packet& from) template<typename Scalar, typename Packet> inline void pstore(Scalar* to, const Packet& from)
{ (*to) = from; } { (*to) = from; }
/** \internal copy the packet \a from to \a *to, (un-aligned store) */ /** \internal copy the packet \a from to \a *to, (un-aligned store) */
template<typename Scalar, typename Packet> inline void ei_pstoreu(Scalar* to, const Packet& from) template<typename Scalar, typename Packet> inline void pstoreu(Scalar* to, const Packet& from)
{ (*to) = from; } { (*to) = from; }
/** \internal tries to do cache prefetching of \a addr */ /** \internal tries to do cache prefetching of \a addr */
template<typename Scalar> inline void ei_prefetch(const Scalar* addr) template<typename Scalar> inline void prefetch(const Scalar* addr)
{ {
#if !defined(_MSC_VER) #if !defined(_MSC_VER)
__builtin_prefetch(addr); __builtin_prefetch(addr);
@@ -178,93 +198,118 @@ __builtin_prefetch(addr);
} }
/** \internal \returns the first element of a packet */ /** \internal \returns the first element of a packet */
template<typename Packet> inline typename ei_unpacket_traits<Packet>::type ei_pfirst(const Packet& a) template<typename Packet> inline typename unpacket_traits<Packet>::type pfirst(const Packet& a)
{ return a; } { return a; }
/** \internal \returns a packet where the element i contains the sum of the packet of \a vec[i] */ /** \internal \returns a packet where the element i contains the sum of the packet of \a vec[i] */
template<typename Packet> inline Packet template<typename Packet> inline Packet
ei_preduxp(const Packet* vecs) { return vecs[0]; } preduxp(const Packet* vecs) { return vecs[0]; }
/** \internal \returns the sum of the elements of \a a*/ /** \internal \returns the sum of the elements of \a a*/
template<typename Packet> inline typename ei_unpacket_traits<Packet>::type ei_predux(const Packet& a) template<typename Packet> inline typename unpacket_traits<Packet>::type predux(const Packet& a)
{ return a; } { return a; }
/** \internal \returns the product of the elements of \a a*/ /** \internal \returns the product of the elements of \a a*/
template<typename Packet> inline typename ei_unpacket_traits<Packet>::type ei_predux_mul(const Packet& a) template<typename Packet> inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a)
{ return a; } { return a; }
/** \internal \returns the min of the elements of \a a*/ /** \internal \returns the min of the elements of \a a*/
template<typename Packet> inline typename ei_unpacket_traits<Packet>::type ei_predux_min(const Packet& a) template<typename Packet> inline typename unpacket_traits<Packet>::type predux_min(const Packet& a)
{ return a; } { return a; }
/** \internal \returns the max of the elements of \a a*/ /** \internal \returns the max of the elements of \a a*/
template<typename Packet> inline typename ei_unpacket_traits<Packet>::type ei_predux_max(const Packet& a) template<typename Packet> inline typename unpacket_traits<Packet>::type predux_max(const Packet& a)
{ return a; } { return a; }
/** \internal \returns the reversed elements of \a a*/ /** \internal \returns the reversed elements of \a a*/
template<typename Packet> inline Packet ei_preverse(const Packet& a) template<typename Packet> inline Packet preverse(const Packet& a)
{ return a; } { return a; }
/** \internal \returns \a a with real and imaginary part flipped (for complex type only) */
template<typename Packet> inline Packet pcplxflip(const Packet& a)
{ return Packet(imag(a),real(a)); }
/************************** /**************************
* Special math functions * Special math functions
***************************/ ***************************/
/** \internal \returns the sin of \a a (coeff-wise) */ /** \internal \returns the sine of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet ei_psin(const Packet& a) { return ei_sin(a); } Packet psin(const Packet& a) { return sin(a); }
/** \internal \returns the cos of \a a (coeff-wise) */ /** \internal \returns the cosine of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet ei_pcos(const Packet& a) { return ei_cos(a); } Packet pcos(const Packet& a) { return cos(a); }
/** \internal \returns the tan of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet ptan(const Packet& a) { return tan(a); }
/** \internal \returns the arc sine of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet pasin(const Packet& a) { return asin(a); }
/** \internal \returns the arc cosine of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet pacos(const Packet& a) { return acos(a); }
/** \internal \returns the exp of \a a (coeff-wise) */ /** \internal \returns the exp of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet ei_pexp(const Packet& a) { return ei_exp(a); } Packet pexp(const Packet& a) { return exp(a); }
/** \internal \returns the log of \a a (coeff-wise) */ /** \internal \returns the log of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet ei_plog(const Packet& a) { return ei_log(a); } Packet plog(const Packet& a) { return log(a); }
/** \internal \returns the square-root of \a a (coeff-wise) */ /** \internal \returns the square-root of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet ei_psqrt(const Packet& a) { return ei_sqrt(a); } Packet psqrt(const Packet& a) { return sqrt(a); }
/*************************************************************************** /***************************************************************************
* The following functions might not have to be overwritten for vectorized types * The following functions might not have to be overwritten for vectorized types
***************************************************************************/ ***************************************************************************/
/** \internal copy a packet with constant coeficient \a a (e.g., [a,a,a,a]) to \a *to. \a to must be 16 bytes aligned */
// NOTE: this function must really be templated on the packet type (think about different packet types for the same scalar type)
template<typename Packet>
inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename unpacket_traits<Packet>::type& a)
{
pstore(to, pset1<Packet>(a));
}
/** \internal \returns a * b + c (coeff-wise) */ /** \internal \returns a * b + c (coeff-wise) */
template<typename Packet> inline Packet template<typename Packet> inline Packet
ei_pmadd(const Packet& a, pmadd(const Packet& a,
const Packet& b, const Packet& b,
const Packet& c) const Packet& c)
{ return ei_padd(ei_pmul(a, b),c); } { return padd(pmul(a, b),c); }
/** \internal \returns a packet version of \a *from. /** \internal \returns a packet version of \a *from.
* \If LoadMode equals Aligned, \a from must be 16 bytes aligned */ * \If LoadMode equals Aligned, \a from must be 16 bytes aligned */
template<typename Scalar, int LoadMode> template<typename Packet, int LoadMode>
inline typename ei_packet_traits<Scalar>::type ei_ploadt(const Scalar* from) inline Packet ploadt(const typename unpacket_traits<Packet>::type* from)
{ {
if(LoadMode == Aligned) if(LoadMode == Aligned)
return ei_pload(from); return pload<Packet>(from);
else else
return ei_ploadu(from); return ploadu<Packet>(from);
} }
/** \internal copy the packet \a from to \a *to. /** \internal copy the packet \a from to \a *to.
* If StoreMode equals Aligned, \a to must be 16 bytes aligned */ * If StoreMode equals Aligned, \a to must be 16 bytes aligned */
template<typename Scalar, typename Packet, int LoadMode> template<typename Scalar, typename Packet, int LoadMode>
inline void ei_pstoret(Scalar* to, const Packet& from) inline void pstoret(Scalar* to, const Packet& from)
{ {
if(LoadMode == Aligned) if(LoadMode == Aligned)
ei_pstore(to, from); pstore(to, from);
else else
ei_pstoreu(to, from); pstoreu(to, from);
} }
/** \internal default implementation of ei_palign() allowing partial specialization */ /** \internal default implementation of palign() allowing partial specialization */
template<int Offset,typename PacketType> template<int Offset,typename PacketType>
struct ei_palign_impl struct palign_impl
{ {
// by default data are aligned, so there is nothing to be done :) // by default data are aligned, so there is nothing to be done :)
inline static void run(PacketType&, const PacketType&) {} inline static void run(PacketType&, const PacketType&) {}
@@ -273,20 +318,22 @@ struct ei_palign_impl
/** \internal update \a first using the concatenation of the \a Offset last elements /** \internal update \a first using the concatenation of the \a Offset last elements
* of \a first and packet_size minus \a Offset first elements of \a second */ * of \a first and packet_size minus \a Offset first elements of \a second */
template<int Offset,typename PacketType> template<int Offset,typename PacketType>
inline void ei_palign(PacketType& first, const PacketType& second) inline void palign(PacketType& first, const PacketType& second)
{ {
ei_palign_impl<Offset,PacketType>::run(first,second); palign_impl<Offset,PacketType>::run(first,second);
} }
/*************************************************************************** /***************************************************************************
* Fast complex products (GCC generates a function call which is very slow) * Fast complex products (GCC generates a function call which is very slow)
***************************************************************************/ ***************************************************************************/
template<> inline std::complex<float> ei_pmul(const std::complex<float>& a, const std::complex<float>& b) template<> inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b)
{ return std::complex<float>(ei_real(a)*ei_real(b) - ei_imag(a)*ei_imag(b), ei_imag(a)*ei_real(b) + ei_real(a)*ei_imag(b)); } { return std::complex<float>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
template<> inline std::complex<double> ei_pmul(const std::complex<double>& a, const std::complex<double>& b) template<> inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b)
{ return std::complex<double>(ei_real(a)*ei_real(b) - ei_imag(a)*ei_imag(b), ei_imag(a)*ei_real(b) + ei_real(a)*ei_imag(b)); } { return std::complex<double>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
} // end namespace internal
#endif // EIGEN_GENERIC_PACKET_MATH_H #endif // EIGEN_GENERIC_PACKET_MATH_H

View File

@@ -28,7 +28,7 @@
#define EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(NAME,FUNCTOR) \ #define EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(NAME,FUNCTOR) \
template<typename Derived> \ template<typename Derived> \
inline const Eigen::CwiseUnaryOp<Eigen::FUNCTOR<typename Derived::Scalar>, Derived> \ inline const Eigen::CwiseUnaryOp<Eigen::internal::FUNCTOR<typename Derived::Scalar>, const Derived> \
NAME(const Eigen::ArrayBase<Derived>& x) { \ NAME(const Eigen::ArrayBase<Derived>& x) { \
return x.derived(); \ return x.derived(); \
} }
@@ -38,7 +38,7 @@
template<typename Derived> \ template<typename Derived> \
struct NAME##_retval<ArrayBase<Derived> > \ struct NAME##_retval<ArrayBase<Derived> > \
{ \ { \
typedef const Eigen::CwiseUnaryOp<Eigen::FUNCTOR<typename Derived::Scalar>, Derived> type; \ typedef const Eigen::CwiseUnaryOp<Eigen::internal::FUNCTOR<typename Derived::Scalar>, const Derived> type; \
}; \ }; \
template<typename Derived> \ template<typename Derived> \
struct NAME##_impl<ArrayBase<Derived> > \ struct NAME##_impl<ArrayBase<Derived> > \
@@ -52,17 +52,20 @@
namespace std namespace std
{ {
EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(real,ei_scalar_real_op) EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(real,scalar_real_op)
EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(imag,ei_scalar_imag_op) EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(imag,scalar_imag_op)
EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(sin,ei_scalar_sin_op) EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(sin,scalar_sin_op)
EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(cos,ei_scalar_cos_op) EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(cos,scalar_cos_op)
EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(exp,ei_scalar_exp_op) EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(asin,scalar_asin_op)
EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(log,ei_scalar_log_op) EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(acos,scalar_acos_op)
EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(abs,ei_scalar_abs_op) EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(tan,scalar_tan_op)
EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(sqrt,ei_scalar_sqrt_op) EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(exp,scalar_exp_op)
EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(log,scalar_log_op)
EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(abs,scalar_abs_op)
EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(sqrt,scalar_sqrt_op)
template<typename Derived> template<typename Derived>
inline const Eigen::CwiseUnaryOp<Eigen::ei_scalar_pow_op<typename Derived::Scalar>, Derived> inline const Eigen::CwiseUnaryOp<Eigen::internal::scalar_pow_op<typename Derived::Scalar>, const Derived>
pow(const Eigen::ArrayBase<Derived>& x, const typename Derived::Scalar& exponent) { \ pow(const Eigen::ArrayBase<Derived>& x, const typename Derived::Scalar& exponent) { \
return x.derived().pow(exponent); \ return x.derived().pow(exponent); \
} }
@@ -70,17 +73,23 @@ namespace std
namespace Eigen namespace Eigen
{ {
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(ei_real,ei_scalar_real_op) namespace internal
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(ei_imag,ei_scalar_imag_op) {
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(ei_sin,ei_scalar_sin_op) EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(real,scalar_real_op)
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(ei_cos,ei_scalar_cos_op) EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(imag,scalar_imag_op)
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(ei_exp,ei_scalar_exp_op) EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(sin,scalar_sin_op)
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(ei_log,ei_scalar_log_op) EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(cos,scalar_cos_op)
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(ei_abs,ei_scalar_abs_op) EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(asin,scalar_asin_op)
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(ei_abs2,ei_scalar_abs2_op) EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(acos,scalar_acos_op)
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(ei_sqrt,ei_scalar_sqrt_op) EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(tan,scalar_tan_op)
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(exp,scalar_exp_op)
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(log,scalar_log_op)
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(abs,scalar_abs_op)
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(abs2,scalar_abs2_op)
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(sqrt,scalar_sqrt_op)
}
} }
// TODO: cleanly disable those functions that are not supported on Array (ei_real_ref, ei_random, ei_isApprox...) // TODO: cleanly disable those functions that are not supported on Array (internal::real_ref, internal::random, internal::isApprox...)
#endif // EIGEN_GLOBAL_FUNCTIONS_H #endif // EIGEN_GLOBAL_FUNCTIONS_H

View File

@@ -30,7 +30,13 @@ enum { DontAlignCols = 1 };
enum { StreamPrecision = -1, enum { StreamPrecision = -1,
FullPrecision = -2 }; FullPrecision = -2 };
namespace internal {
template<typename Derived>
std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat& fmt);
}
/** \class IOFormat /** \class IOFormat
* \ingroup Core_Module
* *
* \brief Stores a set of parameters controlling the way matrices are printed * \brief Stores a set of parameters controlling the way matrices are printed
* *
@@ -80,6 +86,7 @@ struct IOFormat
}; };
/** \class WithFormat /** \class WithFormat
* \ingroup Core_Module
* *
* \brief Pseudo expression providing matrix output with given format * \brief Pseudo expression providing matrix output with given format
* *
@@ -104,7 +111,7 @@ class WithFormat
friend std::ostream & operator << (std::ostream & s, const WithFormat& wf) friend std::ostream & operator << (std::ostream & s, const WithFormat& wf)
{ {
return ei_print_matrix(s, wf.m_matrix.eval(), wf.m_format); return internal::print_matrix(s, wf.m_matrix.eval(), wf.m_format);
} }
protected: protected:
@@ -126,18 +133,20 @@ DenseBase<Derived>::format(const IOFormat& fmt) const
return WithFormat<Derived>(derived(), fmt); return WithFormat<Derived>(derived(), fmt);
} }
namespace internal {
template<typename Scalar, bool IsInteger> template<typename Scalar, bool IsInteger>
struct ei_significant_decimals_default_impl struct significant_decimals_default_impl
{ {
typedef typename NumTraits<Scalar>::Real RealScalar; typedef typename NumTraits<Scalar>::Real RealScalar;
static inline int run() static inline int run()
{ {
return ei_cast<RealScalar,int>(std::ceil(-ei_log(NumTraits<RealScalar>::epsilon())/ei_log(RealScalar(10)))); return cast<RealScalar,int>(std::ceil(-log(NumTraits<RealScalar>::epsilon())/log(RealScalar(10))));
} }
}; };
template<typename Scalar> template<typename Scalar>
struct ei_significant_decimals_default_impl<Scalar, true> struct significant_decimals_default_impl<Scalar, true>
{ {
static inline int run() static inline int run()
{ {
@@ -146,14 +155,14 @@ struct ei_significant_decimals_default_impl<Scalar, true>
}; };
template<typename Scalar> template<typename Scalar>
struct ei_significant_decimals_impl struct significant_decimals_impl
: ei_significant_decimals_default_impl<Scalar, NumTraits<Scalar>::IsInteger> : significant_decimals_default_impl<Scalar, NumTraits<Scalar>::IsInteger>
{}; {};
/** \internal /** \internal
* print the matrix \a _m to the output stream \a s using the output format \a fmt */ * print the matrix \a _m to the output stream \a s using the output format \a fmt */
template<typename Derived> template<typename Derived>
std::ostream & ei_print_matrix(std::ostream & s, const Derived& _m, const IOFormat& fmt) std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat& fmt)
{ {
if(_m.size() == 0) if(_m.size() == 0)
{ {
@@ -180,7 +189,7 @@ std::ostream & ei_print_matrix(std::ostream & s, const Derived& _m, const IOForm
} }
else else
{ {
explicit_precision = ei_significant_decimals_impl<Scalar>::run(); explicit_precision = significant_decimals_impl<Scalar>::run();
} }
} }
else else
@@ -226,6 +235,8 @@ std::ostream & ei_print_matrix(std::ostream & s, const Derived& _m, const IOForm
return s; return s;
} }
} // end namespace internal
/** \relates DenseBase /** \relates DenseBase
* *
* Outputs the matrix, to the given stream. * Outputs the matrix, to the given stream.
@@ -242,7 +253,7 @@ std::ostream & operator <<
(std::ostream & s, (std::ostream & s,
const DenseBase<Derived> & m) const DenseBase<Derived> & m)
{ {
return ei_print_matrix(s, m.eval(), EIGEN_DEFAULT_IO_FORMAT); return internal::print_matrix(s, m.eval(), EIGEN_DEFAULT_IO_FORMAT);
} }
#endif // EIGEN_IO_H #endif // EIGEN_IO_H

View File

@@ -27,6 +27,7 @@
#define EIGEN_MAP_H #define EIGEN_MAP_H
/** \class Map /** \class Map
* \ingroup Core_Module
* *
* \brief A matrix or vector expression mapping an existing array of data. * \brief A matrix or vector expression mapping an existing array of data.
* *
@@ -43,7 +44,7 @@
* data is laid out contiguously in memory. You can however override this by explicitly specifying * data is laid out contiguously in memory. You can however override this by explicitly specifying
* inner and outer strides. * inner and outer strides.
* *
* Here's an example of simply mapping a contiguous array as a column-major matrix: * Here's an example of simply mapping a contiguous array as a \ref TopicStorageOrders "column-major" matrix:
* \include Map_simple.cpp * \include Map_simple.cpp
* Output: \verbinclude Map_simple.out * Output: \verbinclude Map_simple.out
* *
@@ -73,12 +74,15 @@
* *
* This class is the return type of Matrix::Map() but can also be used directly. * This class is the return type of Matrix::Map() but can also be used directly.
* *
* \sa Matrix::Map() * \sa Matrix::Map(), \ref TopicStorageOrders
*/ */
namespace internal {
template<typename PlainObjectType, int MapOptions, typename StrideType> template<typename PlainObjectType, int MapOptions, typename StrideType>
struct ei_traits<Map<PlainObjectType, MapOptions, StrideType> > struct traits<Map<PlainObjectType, MapOptions, StrideType> >
: public ei_traits<PlainObjectType> : public traits<PlainObjectType>
{ {
typedef traits<PlainObjectType> TraitsBase;
typedef typename PlainObjectType::Index Index; typedef typename PlainObjectType::Index Index;
typedef typename PlainObjectType::Scalar Scalar; typedef typename PlainObjectType::Scalar Scalar;
enum { enum {
@@ -91,21 +95,24 @@ struct ei_traits<Map<PlainObjectType, MapOptions, StrideType> >
HasNoInnerStride = InnerStrideAtCompileTime == 1, HasNoInnerStride = InnerStrideAtCompileTime == 1,
HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0, HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0,
HasNoStride = HasNoInnerStride && HasNoOuterStride, HasNoStride = HasNoInnerStride && HasNoOuterStride,
IsAligned = int(int(MapOptions)&Aligned)==Aligned, IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned),
IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic, IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic,
KeepsPacketAccess = bool(HasNoInnerStride) KeepsPacketAccess = bool(HasNoInnerStride)
&& ( bool(IsDynamicSize) && ( bool(IsDynamicSize)
|| HasNoOuterStride || HasNoOuterStride
|| ( OuterStrideAtCompileTime!=Dynamic || ( OuterStrideAtCompileTime!=Dynamic
&& ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime)%16)==0 ) ), && ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime)%16)==0 ) ),
Flags0 = ei_traits<PlainObjectType>::Flags, Flags0 = TraitsBase::Flags,
Flags1 = IsAligned ? int(Flags0) | AlignedBit : int(Flags0) & ~AlignedBit, Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit),
Flags2 = HasNoStride ? int(Flags1) : int(Flags1 & ~LinearAccessBit), Flags2 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime))
Flags = KeepsPacketAccess ? int(Flags2) : (int(Flags2) & ~PacketAccessBit) ? int(Flags1) : int(Flags1 & ~LinearAccessBit),
Flags3 = is_lvalue<PlainObjectType>::value ? int(Flags2) : (int(Flags2) & ~LvalueBit),
Flags = KeepsPacketAccess ? int(Flags3) : (int(Flags3) & ~PacketAccessBit)
}; };
private: private:
enum { Options }; // Expressions don't support Options enum { Options }; // Expressions don't have Options
}; };
}
template<typename PlainObjectType, int MapOptions, typename StrideType> class Map template<typename PlainObjectType, int MapOptions, typename StrideType> class Map
: public MapBase<Map<PlainObjectType, MapOptions, StrideType> > : public MapBase<Map<PlainObjectType, MapOptions, StrideType> >
@@ -116,6 +123,15 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
EIGEN_DENSE_PUBLIC_INTERFACE(Map) EIGEN_DENSE_PUBLIC_INTERFACE(Map)
typedef typename Base::PointerType PointerType;
#if EIGEN2_SUPPORT_STAGE <= STAGE30_FULL_EIGEN3_API
typedef const Scalar* PointerArgType;
inline PointerType cast_to_pointer_type(PointerArgType ptr) { return const_cast<PointerType>(ptr); }
#else
typedef PointerType PointerArgType;
inline PointerType cast_to_pointer_type(PointerArgType ptr) { return ptr; }
#endif
inline Index innerStride() const inline Index innerStride() const
{ {
return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1; return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1;
@@ -134,8 +150,8 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
* \param data pointer to the array to map * \param data pointer to the array to map
* \param stride optional Stride object, passing the strides. * \param stride optional Stride object, passing the strides.
*/ */
inline Map(const Scalar* data, const StrideType& stride = StrideType()) inline Map(PointerArgType data, const StrideType& stride = StrideType())
: Base(data), m_stride(stride) : Base(cast_to_pointer_type(data)), m_stride(stride)
{ {
PlainObjectType::Base::_check_template_params(); PlainObjectType::Base::_check_template_params();
} }
@@ -146,8 +162,8 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
* \param size the size of the vector expression * \param size the size of the vector expression
* \param stride optional Stride object, passing the strides. * \param stride optional Stride object, passing the strides.
*/ */
inline Map(const Scalar* data, Index size, const StrideType& stride = StrideType()) inline Map(PointerArgType data, Index size, const StrideType& stride = StrideType())
: Base(data, size), m_stride(stride) : Base(cast_to_pointer_type(data), size), m_stride(stride)
{ {
PlainObjectType::Base::_check_template_params(); PlainObjectType::Base::_check_template_params();
} }
@@ -159,8 +175,8 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
* \param cols the number of columns of the matrix expression * \param cols the number of columns of the matrix expression
* \param stride optional Stride object, passing the strides. * \param stride optional Stride object, passing the strides.
*/ */
inline Map(const Scalar* data, Index rows, Index cols, const StrideType& stride = StrideType()) inline Map(PointerArgType data, Index rows, Index cols, const StrideType& stride = StrideType())
: Base(data, rows, cols), m_stride(stride) : Base(cast_to_pointer_type(data), rows, cols), m_stride(stride)
{ {
PlainObjectType::Base::_check_template_params(); PlainObjectType::Base::_check_template_params();
} }
@@ -172,11 +188,18 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
StrideType m_stride; StrideType m_stride;
}; };
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
inline Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>
::Array(const Scalar *data)
{
this->_set_noalias(Eigen::Map<const Array>(data));
}
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols> template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
inline Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> inline Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>
::Matrix(const Scalar *data) ::Matrix(const Scalar *data)
{ {
_set_noalias(Eigen::Map<Matrix>(data)); this->_set_noalias(Eigen::Map<const Matrix>(data));
} }
#endif // EIGEN_MAP_H #endif // EIGEN_MAP_H

View File

@@ -26,30 +26,40 @@
#ifndef EIGEN_MAPBASE_H #ifndef EIGEN_MAPBASE_H
#define EIGEN_MAPBASE_H #define EIGEN_MAPBASE_H
#define EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) \
EIGEN_STATIC_ASSERT((int(internal::traits<Derived>::Flags) & LinearAccessBit) || Derived::IsVectorAtCompileTime, \
YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT)
/** \class MapBase /** \class MapBase
* \ingroup Core_Module
* *
* \brief Base class for Map and Block expression with direct access * \brief Base class for Map and Block expression with direct access
* *
* \sa class Map, class Block * \sa class Map, class Block
*/ */
template<typename Derived> class MapBase template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
: public ei_dense_xpr_base<Derived>::type : public internal::dense_xpr_base<Derived>::type
{ {
public: public:
typedef typename ei_dense_xpr_base<Derived>::type Base; typedef typename internal::dense_xpr_base<Derived>::type Base;
enum { enum {
RowsAtCompileTime = ei_traits<Derived>::RowsAtCompileTime, RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,
ColsAtCompileTime = ei_traits<Derived>::ColsAtCompileTime, ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
SizeAtCompileTime = Base::SizeAtCompileTime SizeAtCompileTime = Base::SizeAtCompileTime
}; };
typedef typename internal::traits<Derived>::StorageKind StorageKind;
typedef typename ei_traits<Derived>::StorageKind StorageKind; typedef typename internal::traits<Derived>::Index Index;
typedef typename ei_traits<Derived>::Index Index; typedef typename internal::traits<Derived>::Scalar Scalar;
typedef typename ei_traits<Derived>::Scalar Scalar; typedef typename internal::packet_traits<Scalar>::type PacketScalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
typedef typename NumTraits<Scalar>::Real RealScalar; typedef typename NumTraits<Scalar>::Real RealScalar;
typedef typename internal::conditional<
bool(internal::is_lvalue<Derived>::value),
Scalar *,
const Scalar *>::type
PointerType;
using Base::derived; using Base::derived;
// using Base::RowsAtCompileTime; // using Base::RowsAtCompileTime;
@@ -62,10 +72,6 @@ template<typename Derived> class MapBase
using Base::Flags; using Base::Flags;
using Base::IsRowMajor; using Base::IsRowMajor;
using Base::CoeffReadCost;
// using Base::derived;
using Base::const_cast_derived;
using Base::rows; using Base::rows;
using Base::cols; using Base::cols;
using Base::size; using Base::size;
@@ -73,17 +79,14 @@ template<typename Derived> class MapBase
using Base::coeffRef; using Base::coeffRef;
using Base::lazyAssign; using Base::lazyAssign;
using Base::eval; using Base::eval;
// using Base::operator=;
using Base::operator+=;
using Base::operator-=;
using Base::operator*=;
using Base::operator/=;
using Base::innerStride; using Base::innerStride;
using Base::outerStride; using Base::outerStride;
using Base::rowStride; using Base::rowStride;
using Base::colStride; using Base::colStride;
// bug 217 - compile error on ICC 11.1
using Base::operator=;
typedef typename Base::CoeffReturnType CoeffReturnType; typedef typename Base::CoeffReturnType CoeffReturnType;
@@ -103,98 +106,150 @@ template<typename Derived> class MapBase
return m_data[col * colStride() + row * rowStride()]; return m_data[col * colStride() + row * rowStride()];
} }
inline Scalar& coeffRef(Index row, Index col)
{
return const_cast<Scalar*>(m_data)[col * colStride() + row * rowStride()];
}
inline const Scalar& coeff(Index index) const inline const Scalar& coeff(Index index) const
{ {
ei_assert(Derived::IsVectorAtCompileTime || (ei_traits<Derived>::Flags & LinearAccessBit)); EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
return m_data[index * innerStride()]; return m_data[index * innerStride()];
} }
inline Scalar& coeffRef(Index index) inline const Scalar& coeffRef(Index row, Index col) const
{ {
ei_assert(Derived::IsVectorAtCompileTime || (ei_traits<Derived>::Flags & LinearAccessBit)); return this->m_data[col * colStride() + row * rowStride()];
return const_cast<Scalar*>(m_data)[index * innerStride()]; }
inline const Scalar& coeffRef(Index index) const
{
EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
return this->m_data[index * innerStride()];
} }
template<int LoadMode> template<int LoadMode>
inline PacketScalar packet(Index row, Index col) const inline PacketScalar packet(Index row, Index col) const
{ {
return ei_ploadt<Scalar, LoadMode> return internal::ploadt<PacketScalar, LoadMode>
(m_data + (col * colStride() + row * rowStride())); (m_data + (col * colStride() + row * rowStride()));
} }
template<int LoadMode> template<int LoadMode>
inline PacketScalar packet(Index index) const inline PacketScalar packet(Index index) const
{ {
return ei_ploadt<Scalar, LoadMode>(m_data + index * innerStride()); EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
return internal::ploadt<PacketScalar, LoadMode>(m_data + index * innerStride());
} }
template<int StoreMode> inline MapBase(PointerType data) : m_data(data), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime)
inline void writePacket(Index row, Index col, const PacketScalar& x)
{
ei_pstoret<Scalar, PacketScalar, StoreMode>
(const_cast<Scalar*>(m_data) + (col * colStride() + row * rowStride()), x);
}
template<int StoreMode>
inline void writePacket(Index index, const PacketScalar& x)
{
ei_pstoret<Scalar, PacketScalar, StoreMode>
(const_cast<Scalar*>(m_data) + index * innerStride(), x);
}
inline MapBase(const Scalar* data) : m_data(data), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime)
{ {
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
checkSanity(); checkSanity();
} }
inline MapBase(const Scalar* data, Index size) inline MapBase(PointerType data, Index size)
: m_data(data), : m_data(data),
m_rows(RowsAtCompileTime == Dynamic ? size : Index(RowsAtCompileTime)), m_rows(RowsAtCompileTime == Dynamic ? size : Index(RowsAtCompileTime)),
m_cols(ColsAtCompileTime == Dynamic ? size : Index(ColsAtCompileTime)) m_cols(ColsAtCompileTime == Dynamic ? size : Index(ColsAtCompileTime))
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
ei_assert(size >= 0); eigen_assert(size >= 0);
ei_assert(data == 0 || SizeAtCompileTime == Dynamic || SizeAtCompileTime == size); eigen_assert(data == 0 || SizeAtCompileTime == Dynamic || SizeAtCompileTime == size);
checkSanity(); checkSanity();
} }
inline MapBase(const Scalar* data, Index rows, Index cols) inline MapBase(PointerType data, Index rows, Index cols)
: m_data(data), m_rows(rows), m_cols(cols) : m_data(data), m_rows(rows), m_cols(cols)
{ {
ei_assert( (data == 0) eigen_assert( (data == 0)
|| ( rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows) || ( rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
&& cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols))); && cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols)));
checkSanity(); checkSanity();
} }
Derived& operator=(const MapBase& other)
{
Base::operator=(other);
return derived();
}
using Base::operator=;
protected: protected:
void checkSanity() const void checkSanity() const
{ {
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(ei_traits<Derived>::Flags&PacketAccessBit, EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(internal::traits<Derived>::Flags&PacketAccessBit,
ei_inner_stride_at_compile_time<Derived>::ret==1), internal::inner_stride_at_compile_time<Derived>::ret==1),
PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1); PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1);
ei_assert(EIGEN_IMPLIES(ei_traits<Derived>::Flags&AlignedBit, (size_t(m_data)&0xf)==0) eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::Flags&AlignedBit, (size_t(m_data) % (sizeof(Scalar)*internal::packet_traits<Scalar>::size)) == 0)
&& "data is not aligned"); && "data is not aligned");
} }
const Scalar* EIGEN_RESTRICT m_data; PointerType m_data;
const ei_variable_if_dynamic<Index, RowsAtCompileTime> m_rows; const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_rows;
const ei_variable_if_dynamic<Index, ColsAtCompileTime> m_cols; const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_cols;
}; };
template<typename Derived> class MapBase<Derived, WriteAccessors>
: public MapBase<Derived, ReadOnlyAccessors>
{
public:
typedef MapBase<Derived, ReadOnlyAccessors> Base;
typedef typename Base::Scalar Scalar;
typedef typename Base::PacketScalar PacketScalar;
typedef typename Base::Index Index;
typedef typename Base::PointerType PointerType;
using Base::derived;
using Base::rows;
using Base::cols;
using Base::size;
using Base::coeff;
using Base::coeffRef;
using Base::innerStride;
using Base::outerStride;
using Base::rowStride;
using Base::colStride;
typedef typename internal::conditional<
internal::is_lvalue<Derived>::value,
Scalar,
const Scalar
>::type ScalarWithConstIfNotLvalue;
inline const Scalar* data() const { return this->m_data; }
inline ScalarWithConstIfNotLvalue* data() { return this->m_data; } // no const-cast here so non-const-correct code will give a compile error
inline ScalarWithConstIfNotLvalue& coeffRef(Index row, Index col)
{
return this->m_data[col * colStride() + row * rowStride()];
}
inline ScalarWithConstIfNotLvalue& coeffRef(Index index)
{
EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
return this->m_data[index * innerStride()];
}
template<int StoreMode>
inline void writePacket(Index row, Index col, const PacketScalar& x)
{
internal::pstoret<Scalar, PacketScalar, StoreMode>
(this->m_data + (col * colStride() + row * rowStride()), x);
}
template<int StoreMode>
inline void writePacket(Index index, const PacketScalar& x)
{
EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
internal::pstoret<Scalar, PacketScalar, StoreMode>
(this->m_data + index * innerStride(), x);
}
inline MapBase(PointerType data) : Base(data) {}
inline MapBase(PointerType data, Index size) : Base(data, size) {}
inline MapBase(PointerType data, Index rows, Index cols) : Base(data, rows, cols) {}
Derived& operator=(const MapBase& other)
{
Base::Base::operator=(other);
return derived();
}
using Base::Base::operator=;
};
#endif // EIGEN_MAPBASE_H #endif // EIGEN_MAPBASE_H

View File

@@ -25,20 +25,22 @@
#ifndef EIGEN_MATHFUNCTIONS_H #ifndef EIGEN_MATHFUNCTIONS_H
#define EIGEN_MATHFUNCTIONS_H #define EIGEN_MATHFUNCTIONS_H
/** \internal \struct ei_global_math_functions_filtering_base namespace internal {
/** \internal \struct global_math_functions_filtering_base
* *
* What it does: * What it does:
* Defines a typedef 'type' as follows: * Defines a typedef 'type' as follows:
* - if type T has a member typedef Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl, then * - if type T has a member typedef Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl, then
* ei_global_math_functions_filtering_base<T>::type is a typedef for it. * global_math_functions_filtering_base<T>::type is a typedef for it.
* - otherwise, ei_global_math_functions_filtering_base<T>::type is a typedef for T. * - otherwise, global_math_functions_filtering_base<T>::type is a typedef for T.
* *
* How it's used: * How it's used:
* To allow to defined the global math functions (like ei_sin...) in certain cases, like the Array expressions. * To allow to defined the global math functions (like sin...) in certain cases, like the Array expressions.
* When you do ei_sin(array1+array2), the object array1+array2 has a complicated expression type, all what you want to know * When you do sin(array1+array2), the object array1+array2 has a complicated expression type, all what you want to know
* is that it inherits ArrayBase. So we implement a partial specialization of ei_sin_impl for ArrayBase<Derived>. * is that it inherits ArrayBase. So we implement a partial specialization of sin_impl for ArrayBase<Derived>.
* So we must make sure to use ei_sin_impl<ArrayBase<Derived> > and not ei_sin_impl<Derived>, otherwise our partial specialization * So we must make sure to use sin_impl<ArrayBase<Derived> > and not sin_impl<Derived>, otherwise our partial specialization
* won't be used. How does ei_sin know that? That's exactly what ei_global_math_functions_filtering_base tells it. * won't be used. How does sin know that? That's exactly what global_math_functions_filtering_base tells it.
* *
* How it's implemented: * How it's implemented:
* SFINAE in the style of enable_if. Highly susceptible of breaking compilers. With GCC, it sure does work, but if you replace * SFINAE in the style of enable_if. Highly susceptible of breaking compilers. With GCC, it sure does work, but if you replace
@@ -46,32 +48,32 @@
*/ */
template<typename T, typename dummy = void> template<typename T, typename dummy = void>
struct ei_global_math_functions_filtering_base struct global_math_functions_filtering_base
{ {
typedef T type; typedef T type;
}; };
template<typename T> struct ei_always_void { typedef void type; }; template<typename T> struct always_void { typedef void type; };
template<typename T> template<typename T>
struct ei_global_math_functions_filtering_base struct global_math_functions_filtering_base
<T, <T,
typename ei_always_void<typename T::Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl>::type typename always_void<typename T::Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl>::type
> >
{ {
typedef typename T::Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl type; typedef typename T::Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl type;
}; };
#define EIGEN_MATHFUNC_IMPL(func, scalar) ei_##func##_impl<typename ei_global_math_functions_filtering_base<scalar>::type> #define EIGEN_MATHFUNC_IMPL(func, scalar) func##_impl<typename global_math_functions_filtering_base<scalar>::type>
#define EIGEN_MATHFUNC_RETVAL(func, scalar) typename ei_##func##_retval<typename ei_global_math_functions_filtering_base<scalar>::type>::type #define EIGEN_MATHFUNC_RETVAL(func, scalar) typename func##_retval<typename global_math_functions_filtering_base<scalar>::type>::type
/**************************************************************************** /****************************************************************************
* Implementation of ei_real * * Implementation of real *
****************************************************************************/ ****************************************************************************/
template<typename Scalar> template<typename Scalar>
struct ei_real_impl struct real_impl
{ {
typedef typename NumTraits<Scalar>::Real RealScalar; typedef typename NumTraits<Scalar>::Real RealScalar;
static inline RealScalar run(const Scalar& x) static inline RealScalar run(const Scalar& x)
@@ -81,7 +83,7 @@ struct ei_real_impl
}; };
template<typename RealScalar> template<typename RealScalar>
struct ei_real_impl<std::complex<RealScalar> > struct real_impl<std::complex<RealScalar> >
{ {
static inline RealScalar run(const std::complex<RealScalar>& x) static inline RealScalar run(const std::complex<RealScalar>& x)
{ {
@@ -90,23 +92,23 @@ struct ei_real_impl<std::complex<RealScalar> >
}; };
template<typename Scalar> template<typename Scalar>
struct ei_real_retval struct real_retval
{ {
typedef typename NumTraits<Scalar>::Real type; typedef typename NumTraits<Scalar>::Real type;
}; };
template<typename Scalar> template<typename Scalar>
inline EIGEN_MATHFUNC_RETVAL(real, Scalar) ei_real(const Scalar& x) inline EIGEN_MATHFUNC_RETVAL(real, Scalar) real(const Scalar& x)
{ {
return EIGEN_MATHFUNC_IMPL(real, Scalar)::run(x); return EIGEN_MATHFUNC_IMPL(real, Scalar)::run(x);
} }
/**************************************************************************** /****************************************************************************
* Implementation of ei_imag * * Implementation of imag *
****************************************************************************/ ****************************************************************************/
template<typename Scalar> template<typename Scalar>
struct ei_imag_impl struct imag_impl
{ {
typedef typename NumTraits<Scalar>::Real RealScalar; typedef typename NumTraits<Scalar>::Real RealScalar;
static inline RealScalar run(const Scalar&) static inline RealScalar run(const Scalar&)
@@ -116,7 +118,7 @@ struct ei_imag_impl
}; };
template<typename RealScalar> template<typename RealScalar>
struct ei_imag_impl<std::complex<RealScalar> > struct imag_impl<std::complex<RealScalar> >
{ {
static inline RealScalar run(const std::complex<RealScalar>& x) static inline RealScalar run(const std::complex<RealScalar>& x)
{ {
@@ -125,23 +127,23 @@ struct ei_imag_impl<std::complex<RealScalar> >
}; };
template<typename Scalar> template<typename Scalar>
struct ei_imag_retval struct imag_retval
{ {
typedef typename NumTraits<Scalar>::Real type; typedef typename NumTraits<Scalar>::Real type;
}; };
template<typename Scalar> template<typename Scalar>
inline EIGEN_MATHFUNC_RETVAL(imag, Scalar) ei_imag(const Scalar& x) inline EIGEN_MATHFUNC_RETVAL(imag, Scalar) imag(const Scalar& x)
{ {
return EIGEN_MATHFUNC_IMPL(imag, Scalar)::run(x); return EIGEN_MATHFUNC_IMPL(imag, Scalar)::run(x);
} }
/**************************************************************************** /****************************************************************************
* Implementation of ei_real_ref * * Implementation of real_ref *
****************************************************************************/ ****************************************************************************/
template<typename Scalar> template<typename Scalar>
struct ei_real_ref_impl struct real_ref_impl
{ {
typedef typename NumTraits<Scalar>::Real RealScalar; typedef typename NumTraits<Scalar>::Real RealScalar;
static inline RealScalar& run(Scalar& x) static inline RealScalar& run(Scalar& x)
@@ -155,29 +157,29 @@ struct ei_real_ref_impl
}; };
template<typename Scalar> template<typename Scalar>
struct ei_real_ref_retval struct real_ref_retval
{ {
typedef typename NumTraits<Scalar>::Real & type; typedef typename NumTraits<Scalar>::Real & type;
}; };
template<typename Scalar> template<typename Scalar>
inline typename ei_makeconst< EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) >::type ei_real_ref(const Scalar& x) inline typename add_const_on_value_type< EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) >::type real_ref(const Scalar& x)
{ {
return ei_real_ref_impl<Scalar>::run(x); return real_ref_impl<Scalar>::run(x);
} }
template<typename Scalar> template<typename Scalar>
inline EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) ei_real_ref(Scalar& x) inline EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) real_ref(Scalar& x)
{ {
return EIGEN_MATHFUNC_IMPL(real_ref, Scalar)::run(x); return EIGEN_MATHFUNC_IMPL(real_ref, Scalar)::run(x);
} }
/**************************************************************************** /****************************************************************************
* Implementation of ei_imag_ref * * Implementation of imag_ref *
****************************************************************************/ ****************************************************************************/
template<typename Scalar, bool IsComplex> template<typename Scalar, bool IsComplex>
struct ei_imag_ref_default_impl struct imag_ref_default_impl
{ {
typedef typename NumTraits<Scalar>::Real RealScalar; typedef typename NumTraits<Scalar>::Real RealScalar;
static inline RealScalar& run(Scalar& x) static inline RealScalar& run(Scalar& x)
@@ -191,7 +193,7 @@ struct ei_imag_ref_default_impl
}; };
template<typename Scalar> template<typename Scalar>
struct ei_imag_ref_default_impl<Scalar, false> struct imag_ref_default_impl<Scalar, false>
{ {
static inline Scalar run(Scalar&) static inline Scalar run(Scalar&)
{ {
@@ -204,32 +206,32 @@ struct ei_imag_ref_default_impl<Scalar, false>
}; };
template<typename Scalar> template<typename Scalar>
struct ei_imag_ref_impl : ei_imag_ref_default_impl<Scalar, NumTraits<Scalar>::IsComplex> {}; struct imag_ref_impl : imag_ref_default_impl<Scalar, NumTraits<Scalar>::IsComplex> {};
template<typename Scalar> template<typename Scalar>
struct ei_imag_ref_retval struct imag_ref_retval
{ {
typedef typename NumTraits<Scalar>::Real & type; typedef typename NumTraits<Scalar>::Real & type;
}; };
template<typename Scalar> template<typename Scalar>
inline typename ei_makeconst< EIGEN_MATHFUNC_RETVAL(imag_ref, Scalar) >::type ei_imag_ref(const Scalar& x) inline typename add_const_on_value_type< EIGEN_MATHFUNC_RETVAL(imag_ref, Scalar) >::type imag_ref(const Scalar& x)
{ {
return ei_imag_ref_impl<Scalar>::run(x); return imag_ref_impl<Scalar>::run(x);
} }
template<typename Scalar> template<typename Scalar>
inline EIGEN_MATHFUNC_RETVAL(imag_ref, Scalar) ei_imag_ref(Scalar& x) inline EIGEN_MATHFUNC_RETVAL(imag_ref, Scalar) imag_ref(Scalar& x)
{ {
return EIGEN_MATHFUNC_IMPL(imag_ref, Scalar)::run(x); return EIGEN_MATHFUNC_IMPL(imag_ref, Scalar)::run(x);
} }
/**************************************************************************** /****************************************************************************
* Implementation of ei_conj * * Implementation of conj *
****************************************************************************/ ****************************************************************************/
template<typename Scalar> template<typename Scalar>
struct ei_conj_impl struct conj_impl
{ {
static inline Scalar run(const Scalar& x) static inline Scalar run(const Scalar& x)
{ {
@@ -238,7 +240,7 @@ struct ei_conj_impl
}; };
template<typename RealScalar> template<typename RealScalar>
struct ei_conj_impl<std::complex<RealScalar> > struct conj_impl<std::complex<RealScalar> >
{ {
static inline std::complex<RealScalar> run(const std::complex<RealScalar>& x) static inline std::complex<RealScalar> run(const std::complex<RealScalar>& x)
{ {
@@ -247,23 +249,23 @@ struct ei_conj_impl<std::complex<RealScalar> >
}; };
template<typename Scalar> template<typename Scalar>
struct ei_conj_retval struct conj_retval
{ {
typedef Scalar type; typedef Scalar type;
}; };
template<typename Scalar> template<typename Scalar>
inline EIGEN_MATHFUNC_RETVAL(conj, Scalar) ei_conj(const Scalar& x) inline EIGEN_MATHFUNC_RETVAL(conj, Scalar) conj(const Scalar& x)
{ {
return EIGEN_MATHFUNC_IMPL(conj, Scalar)::run(x); return EIGEN_MATHFUNC_IMPL(conj, Scalar)::run(x);
} }
/**************************************************************************** /****************************************************************************
* Implementation of ei_abs * * Implementation of abs *
****************************************************************************/ ****************************************************************************/
template<typename Scalar> template<typename Scalar>
struct ei_abs_impl struct abs_impl
{ {
typedef typename NumTraits<Scalar>::Real RealScalar; typedef typename NumTraits<Scalar>::Real RealScalar;
static inline RealScalar run(const Scalar& x) static inline RealScalar run(const Scalar& x)
@@ -273,23 +275,23 @@ struct ei_abs_impl
}; };
template<typename Scalar> template<typename Scalar>
struct ei_abs_retval struct abs_retval
{ {
typedef typename NumTraits<Scalar>::Real type; typedef typename NumTraits<Scalar>::Real type;
}; };
template<typename Scalar> template<typename Scalar>
inline EIGEN_MATHFUNC_RETVAL(abs, Scalar) ei_abs(const Scalar& x) inline EIGEN_MATHFUNC_RETVAL(abs, Scalar) abs(const Scalar& x)
{ {
return EIGEN_MATHFUNC_IMPL(abs, Scalar)::run(x); return EIGEN_MATHFUNC_IMPL(abs, Scalar)::run(x);
} }
/**************************************************************************** /****************************************************************************
* Implementation of ei_abs2 * * Implementation of abs2 *
****************************************************************************/ ****************************************************************************/
template<typename Scalar> template<typename Scalar>
struct ei_abs2_impl struct abs2_impl
{ {
typedef typename NumTraits<Scalar>::Real RealScalar; typedef typename NumTraits<Scalar>::Real RealScalar;
static inline RealScalar run(const Scalar& x) static inline RealScalar run(const Scalar& x)
@@ -299,7 +301,7 @@ struct ei_abs2_impl
}; };
template<typename RealScalar> template<typename RealScalar>
struct ei_abs2_impl<std::complex<RealScalar> > struct abs2_impl<std::complex<RealScalar> >
{ {
static inline RealScalar run(const std::complex<RealScalar>& x) static inline RealScalar run(const std::complex<RealScalar>& x)
{ {
@@ -308,92 +310,92 @@ struct ei_abs2_impl<std::complex<RealScalar> >
}; };
template<typename Scalar> template<typename Scalar>
struct ei_abs2_retval struct abs2_retval
{ {
typedef typename NumTraits<Scalar>::Real type; typedef typename NumTraits<Scalar>::Real type;
}; };
template<typename Scalar> template<typename Scalar>
inline EIGEN_MATHFUNC_RETVAL(abs2, Scalar) ei_abs2(const Scalar& x) inline EIGEN_MATHFUNC_RETVAL(abs2, Scalar) abs2(const Scalar& x)
{ {
return EIGEN_MATHFUNC_IMPL(abs2, Scalar)::run(x); return EIGEN_MATHFUNC_IMPL(abs2, Scalar)::run(x);
} }
/**************************************************************************** /****************************************************************************
* Implementation of ei_norm1 * * Implementation of norm1 *
****************************************************************************/ ****************************************************************************/
template<typename Scalar, bool IsComplex> template<typename Scalar, bool IsComplex>
struct ei_norm1_default_impl struct norm1_default_impl
{ {
typedef typename NumTraits<Scalar>::Real RealScalar; typedef typename NumTraits<Scalar>::Real RealScalar;
static inline RealScalar run(const Scalar& x) static inline RealScalar run(const Scalar& x)
{ {
return ei_abs(ei_real(x)) + ei_abs(ei_imag(x)); return abs(real(x)) + abs(imag(x));
} }
}; };
template<typename Scalar> template<typename Scalar>
struct ei_norm1_default_impl<Scalar, false> struct norm1_default_impl<Scalar, false>
{ {
static inline Scalar run(const Scalar& x) static inline Scalar run(const Scalar& x)
{ {
return ei_abs(x); return abs(x);
} }
}; };
template<typename Scalar> template<typename Scalar>
struct ei_norm1_impl : ei_norm1_default_impl<Scalar, NumTraits<Scalar>::IsComplex> {}; struct norm1_impl : norm1_default_impl<Scalar, NumTraits<Scalar>::IsComplex> {};
template<typename Scalar> template<typename Scalar>
struct ei_norm1_retval struct norm1_retval
{ {
typedef typename NumTraits<Scalar>::Real type; typedef typename NumTraits<Scalar>::Real type;
}; };
template<typename Scalar> template<typename Scalar>
inline EIGEN_MATHFUNC_RETVAL(norm1, Scalar) ei_norm1(const Scalar& x) inline EIGEN_MATHFUNC_RETVAL(norm1, Scalar) norm1(const Scalar& x)
{ {
return EIGEN_MATHFUNC_IMPL(norm1, Scalar)::run(x); return EIGEN_MATHFUNC_IMPL(norm1, Scalar)::run(x);
} }
/**************************************************************************** /****************************************************************************
* Implementation of ei_hypot * * Implementation of hypot *
****************************************************************************/ ****************************************************************************/
template<typename Scalar> template<typename Scalar>
struct ei_hypot_impl struct hypot_impl
{ {
typedef typename NumTraits<Scalar>::Real RealScalar; typedef typename NumTraits<Scalar>::Real RealScalar;
static inline RealScalar run(const Scalar& x, const Scalar& y) static inline RealScalar run(const Scalar& x, const Scalar& y)
{ {
RealScalar _x = ei_abs(x); RealScalar _x = abs(x);
RealScalar _y = ei_abs(y); RealScalar _y = abs(y);
RealScalar p = std::max(_x, _y); RealScalar p = std::max(_x, _y);
RealScalar q = std::min(_x, _y); RealScalar q = std::min(_x, _y);
RealScalar qp = q/p; RealScalar qp = q/p;
return p * ei_sqrt(RealScalar(1) + qp*qp); return p * sqrt(RealScalar(1) + qp*qp);
} }
}; };
template<typename Scalar> template<typename Scalar>
struct ei_hypot_retval struct hypot_retval
{ {
typedef typename NumTraits<Scalar>::Real type; typedef typename NumTraits<Scalar>::Real type;
}; };
template<typename Scalar> template<typename Scalar>
inline EIGEN_MATHFUNC_RETVAL(hypot, Scalar) ei_hypot(const Scalar& x, const Scalar& y) inline EIGEN_MATHFUNC_RETVAL(hypot, Scalar) hypot(const Scalar& x, const Scalar& y)
{ {
return EIGEN_MATHFUNC_IMPL(hypot, Scalar)::run(x, y); return EIGEN_MATHFUNC_IMPL(hypot, Scalar)::run(x, y);
} }
/**************************************************************************** /****************************************************************************
* Implementation of ei_cast * * Implementation of cast *
****************************************************************************/ ****************************************************************************/
template<typename OldType, typename NewType> template<typename OldType, typename NewType>
struct ei_cast_impl struct cast_impl
{ {
static inline NewType run(const OldType& x) static inline NewType run(const OldType& x)
{ {
@@ -401,20 +403,20 @@ struct ei_cast_impl
} }
}; };
// here, for once, we're plainly returning NewType: we don't want ei_cast to do weird things. // here, for once, we're plainly returning NewType: we don't want cast to do weird things.
template<typename OldType, typename NewType> template<typename OldType, typename NewType>
inline NewType ei_cast(const OldType& x) inline NewType cast(const OldType& x)
{ {
return ei_cast_impl<OldType, NewType>::run(x); return cast_impl<OldType, NewType>::run(x);
} }
/**************************************************************************** /****************************************************************************
* Implementation of ei_sqrt * * Implementation of sqrt *
****************************************************************************/ ****************************************************************************/
template<typename Scalar, bool IsInteger> template<typename Scalar, bool IsInteger>
struct ei_sqrt_default_impl struct sqrt_default_impl
{ {
static inline Scalar run(const Scalar& x) static inline Scalar run(const Scalar& x)
{ {
@@ -423,188 +425,72 @@ struct ei_sqrt_default_impl
}; };
template<typename Scalar> template<typename Scalar>
struct ei_sqrt_default_impl<Scalar, true> struct sqrt_default_impl<Scalar, true>
{ {
static inline Scalar run(const Scalar&) static inline Scalar run(const Scalar&)
{ {
#ifdef EIGEN2_SUPPORT
eigen_assert(!NumTraits<Scalar>::IsInteger);
#else
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
#endif
return Scalar(0); return Scalar(0);
} }
}; };
template<typename Scalar> template<typename Scalar>
struct ei_sqrt_impl : ei_sqrt_default_impl<Scalar, NumTraits<Scalar>::IsInteger> {}; struct sqrt_impl : sqrt_default_impl<Scalar, NumTraits<Scalar>::IsInteger> {};
template<typename Scalar> template<typename Scalar>
struct ei_sqrt_retval struct sqrt_retval
{ {
typedef Scalar type; typedef Scalar type;
}; };
template<typename Scalar> template<typename Scalar>
inline EIGEN_MATHFUNC_RETVAL(sqrt, Scalar) ei_sqrt(const Scalar& x) inline EIGEN_MATHFUNC_RETVAL(sqrt, Scalar) sqrt(const Scalar& x)
{ {
return EIGEN_MATHFUNC_IMPL(sqrt, Scalar)::run(x); return EIGEN_MATHFUNC_IMPL(sqrt, Scalar)::run(x);
} }
/**************************************************************************** /****************************************************************************
* Implementation of ei_exp * * Implementation of standard unary real functions (exp, log, sin, cos, ... *
****************************************************************************/ ****************************************************************************/
template<typename Scalar, bool IsInteger> // This macro instanciate all the necessary template mechanism which is common to all unary real functions.
struct ei_exp_default_impl #define EIGEN_MATHFUNC_STANDARD_REAL_UNARY(NAME) \
{ template<typename Scalar, bool IsInteger> struct NAME##_default_impl { \
static inline Scalar run(const Scalar& x) static inline Scalar run(const Scalar& x) { return std::NAME(x); } \
{ }; \
return std::exp(x); template<typename Scalar> struct NAME##_default_impl<Scalar, true> { \
static inline Scalar run(const Scalar&) { \
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) \
return Scalar(0); \
} \
}; \
template<typename Scalar> struct NAME##_impl \
: NAME##_default_impl<Scalar, NumTraits<Scalar>::IsInteger> \
{}; \
template<typename Scalar> struct NAME##_retval { typedef Scalar type; }; \
template<typename Scalar> \
inline EIGEN_MATHFUNC_RETVAL(NAME, Scalar) NAME(const Scalar& x) { \
return EIGEN_MATHFUNC_IMPL(NAME, Scalar)::run(x); \
} }
};
template<typename Scalar> EIGEN_MATHFUNC_STANDARD_REAL_UNARY(exp)
struct ei_exp_default_impl<Scalar, true> EIGEN_MATHFUNC_STANDARD_REAL_UNARY(log)
{ EIGEN_MATHFUNC_STANDARD_REAL_UNARY(sin)
static inline Scalar run(const Scalar&) EIGEN_MATHFUNC_STANDARD_REAL_UNARY(cos)
{ EIGEN_MATHFUNC_STANDARD_REAL_UNARY(tan)
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) EIGEN_MATHFUNC_STANDARD_REAL_UNARY(asin)
return Scalar(0); EIGEN_MATHFUNC_STANDARD_REAL_UNARY(acos)
}
};
template<typename Scalar>
struct ei_exp_impl : ei_exp_default_impl<Scalar, NumTraits<Scalar>::IsInteger> {};
template<typename Scalar>
struct ei_exp_retval
{
typedef Scalar type;
};
template<typename Scalar>
inline EIGEN_MATHFUNC_RETVAL(exp, Scalar) ei_exp(const Scalar& x)
{
return EIGEN_MATHFUNC_IMPL(exp, Scalar)::run(x);
}
/**************************************************************************** /****************************************************************************
* Implementation of ei_cos * * Implementation of atan2 *
****************************************************************************/ ****************************************************************************/
template<typename Scalar, bool IsInteger> template<typename Scalar, bool IsInteger>
struct ei_cos_default_impl struct atan2_default_impl
{
static inline Scalar run(const Scalar& x)
{
return std::cos(x);
}
};
template<typename Scalar>
struct ei_cos_default_impl<Scalar, true>
{
static inline Scalar run(const Scalar&)
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
return Scalar(0);
}
};
template<typename Scalar>
struct ei_cos_impl : ei_cos_default_impl<Scalar, NumTraits<Scalar>::IsInteger> {};
template<typename Scalar>
struct ei_cos_retval
{
typedef Scalar type;
};
template<typename Scalar>
inline EIGEN_MATHFUNC_RETVAL(cos, Scalar) ei_cos(const Scalar& x)
{
return EIGEN_MATHFUNC_IMPL(cos, Scalar)::run(x);
}
/****************************************************************************
* Implementation of ei_sin *
****************************************************************************/
template<typename Scalar, bool IsInteger>
struct ei_sin_default_impl
{
static inline Scalar run(const Scalar& x)
{
return std::sin(x);
}
};
template<typename Scalar>
struct ei_sin_default_impl<Scalar, true>
{
static inline Scalar run(const Scalar&)
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
return Scalar(0);
}
};
template<typename Scalar>
struct ei_sin_impl : ei_sin_default_impl<Scalar, NumTraits<Scalar>::IsInteger> {};
template<typename Scalar>
struct ei_sin_retval
{
typedef Scalar type;
};
template<typename Scalar>
inline EIGEN_MATHFUNC_RETVAL(sin, Scalar) ei_sin(const Scalar& x)
{
return EIGEN_MATHFUNC_IMPL(sin, Scalar)::run(x);
}
/****************************************************************************
* Implementation of ei_log *
****************************************************************************/
template<typename Scalar, bool IsInteger>
struct ei_log_default_impl
{
static inline Scalar run(const Scalar& x)
{
return std::log(x);
}
};
template<typename Scalar>
struct ei_log_default_impl<Scalar, true>
{
static inline Scalar run(const Scalar&)
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
return Scalar(0);
}
};
template<typename Scalar>
struct ei_log_impl : ei_log_default_impl<Scalar, NumTraits<Scalar>::IsInteger> {};
template<typename Scalar>
struct ei_log_retval
{
typedef Scalar type;
};
template<typename Scalar>
inline EIGEN_MATHFUNC_RETVAL(log, Scalar) ei_log(const Scalar& x)
{
return EIGEN_MATHFUNC_IMPL(log, Scalar)::run(x);
}
/****************************************************************************
* Implementation of ei_atan2 *
****************************************************************************/
template<typename Scalar, bool IsInteger>
struct ei_atan2_default_impl
{ {
typedef Scalar retval; typedef Scalar retval;
static inline Scalar run(const Scalar& x, const Scalar& y) static inline Scalar run(const Scalar& x, const Scalar& y)
@@ -614,7 +500,7 @@ struct ei_atan2_default_impl
}; };
template<typename Scalar> template<typename Scalar>
struct ei_atan2_default_impl<Scalar, true> struct atan2_default_impl<Scalar, true>
{ {
static inline Scalar run(const Scalar&, const Scalar&) static inline Scalar run(const Scalar&, const Scalar&)
{ {
@@ -624,26 +510,26 @@ struct ei_atan2_default_impl<Scalar, true>
}; };
template<typename Scalar> template<typename Scalar>
struct ei_atan2_impl : ei_atan2_default_impl<Scalar, NumTraits<Scalar>::IsInteger> {}; struct atan2_impl : atan2_default_impl<Scalar, NumTraits<Scalar>::IsInteger> {};
template<typename Scalar> template<typename Scalar>
struct ei_atan2_retval struct atan2_retval
{ {
typedef Scalar type; typedef Scalar type;
}; };
template<typename Scalar> template<typename Scalar>
inline EIGEN_MATHFUNC_RETVAL(atan2, Scalar) ei_atan2(const Scalar& x, const Scalar& y) inline EIGEN_MATHFUNC_RETVAL(atan2, Scalar) atan2(const Scalar& x, const Scalar& y)
{ {
return EIGEN_MATHFUNC_IMPL(atan2, Scalar)::run(x, y); return EIGEN_MATHFUNC_IMPL(atan2, Scalar)::run(x, y);
} }
/**************************************************************************** /****************************************************************************
* Implementation of ei_pow * * Implementation of pow *
****************************************************************************/ ****************************************************************************/
template<typename Scalar, bool IsInteger> template<typename Scalar, bool IsInteger>
struct ei_pow_default_impl struct pow_default_impl
{ {
typedef Scalar retval; typedef Scalar retval;
static inline Scalar run(const Scalar& x, const Scalar& y) static inline Scalar run(const Scalar& x, const Scalar& y)
@@ -653,12 +539,12 @@ struct ei_pow_default_impl
}; };
template<typename Scalar> template<typename Scalar>
struct ei_pow_default_impl<Scalar, true> struct pow_default_impl<Scalar, true>
{ {
static inline Scalar run(Scalar x, Scalar y) static inline Scalar run(Scalar x, Scalar y)
{ {
Scalar res = 1; Scalar res = 1;
ei_assert(!NumTraits<Scalar>::IsSigned || y >= 0); eigen_assert(!NumTraits<Scalar>::IsSigned || y >= 0);
if(y & 1) res *= x; if(y & 1) res *= x;
y >>= 1; y >>= 1;
while(y) while(y)
@@ -672,47 +558,47 @@ struct ei_pow_default_impl<Scalar, true>
}; };
template<typename Scalar> template<typename Scalar>
struct ei_pow_impl : ei_pow_default_impl<Scalar, NumTraits<Scalar>::IsInteger> {}; struct pow_impl : pow_default_impl<Scalar, NumTraits<Scalar>::IsInteger> {};
template<typename Scalar> template<typename Scalar>
struct ei_pow_retval struct pow_retval
{ {
typedef Scalar type; typedef Scalar type;
}; };
template<typename Scalar> template<typename Scalar>
inline EIGEN_MATHFUNC_RETVAL(pow, Scalar) ei_pow(const Scalar& x, const Scalar& y) inline EIGEN_MATHFUNC_RETVAL(pow, Scalar) pow(const Scalar& x, const Scalar& y)
{ {
return EIGEN_MATHFUNC_IMPL(pow, Scalar)::run(x, y); return EIGEN_MATHFUNC_IMPL(pow, Scalar)::run(x, y);
} }
/**************************************************************************** /****************************************************************************
* Implementation of ei_random * * Implementation of random *
****************************************************************************/ ****************************************************************************/
template<typename Scalar, template<typename Scalar,
bool IsComplex, bool IsComplex,
bool IsInteger> bool IsInteger>
struct ei_random_default_impl {}; struct random_default_impl {};
template<typename Scalar> template<typename Scalar>
struct ei_random_impl : ei_random_default_impl<Scalar, NumTraits<Scalar>::IsComplex, NumTraits<Scalar>::IsInteger> {}; struct random_impl : random_default_impl<Scalar, NumTraits<Scalar>::IsComplex, NumTraits<Scalar>::IsInteger> {};
template<typename Scalar> template<typename Scalar>
struct ei_random_retval struct random_retval
{ {
typedef Scalar type; typedef Scalar type;
}; };
template<typename Scalar> inline EIGEN_MATHFUNC_RETVAL(random, Scalar) ei_random(const Scalar& x, const Scalar& y); template<typename Scalar> inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random(const Scalar& x, const Scalar& y);
template<typename Scalar> inline EIGEN_MATHFUNC_RETVAL(random, Scalar) ei_random(); template<typename Scalar> inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random();
template<typename Scalar> template<typename Scalar>
struct ei_random_default_impl<Scalar, false, false> struct random_default_impl<Scalar, false, false>
{ {
static inline Scalar run(const Scalar& x, const Scalar& y) static inline Scalar run(const Scalar& x, const Scalar& y)
{ {
return x + (y-x) * Scalar(std::rand()) / float(RAND_MAX); return x + (y-x) * Scalar(std::rand()) / Scalar(RAND_MAX);
} }
static inline Scalar run() static inline Scalar run()
{ {
@@ -720,42 +606,102 @@ struct ei_random_default_impl<Scalar, false, false>
} }
}; };
template<typename Scalar> enum {
struct ei_random_default_impl<Scalar, false, true> floor_log2_terminate,
floor_log2_move_up,
floor_log2_move_down,
floor_log2_bogus
};
template<unsigned int n, int lower, int upper> struct floor_log2_selector
{ {
enum { middle = (lower + upper) / 2,
value = (upper <= lower + 1) ? int(floor_log2_terminate)
: (n < (1 << middle)) ? int(floor_log2_move_down)
: (n==0) ? int(floor_log2_bogus)
: int(floor_log2_move_up)
};
};
template<unsigned int n,
int lower = 0,
int upper = sizeof(unsigned int) * CHAR_BIT - 1,
int selector = floor_log2_selector<n, lower, upper>::value>
struct floor_log2 {};
template<unsigned int n, int lower, int upper>
struct floor_log2<n, lower, upper, floor_log2_move_down>
{
enum { value = floor_log2<n, lower, floor_log2_selector<n, lower, upper>::middle>::value };
};
template<unsigned int n, int lower, int upper>
struct floor_log2<n, lower, upper, floor_log2_move_up>
{
enum { value = floor_log2<n, floor_log2_selector<n, lower, upper>::middle, upper>::value };
};
template<unsigned int n, int lower, int upper>
struct floor_log2<n, lower, upper, floor_log2_terminate>
{
enum { value = (n >= ((unsigned int)(1) << (lower+1))) ? lower+1 : lower };
};
template<unsigned int n, int lower, int upper>
struct floor_log2<n, lower, upper, floor_log2_bogus>
{
// no value, error at compile time
};
template<typename Scalar>
struct random_default_impl<Scalar, false, true>
{
typedef typename NumTraits<Scalar>::NonInteger NonInteger;
static inline Scalar run(const Scalar& x, const Scalar& y) static inline Scalar run(const Scalar& x, const Scalar& y)
{ {
return x + Scalar((y-x+1) * (std::rand() / (RAND_MAX + typename NumTraits<Scalar>::NonInteger(1)))); return x + Scalar((NonInteger(y)-x+1) * std::rand() / (RAND_MAX + NonInteger(1)));
} }
static inline Scalar run() static inline Scalar run()
{ {
#ifdef EIGEN_MAKING_DOCS
return run(Scalar(NumTraits<Scalar>::IsSigned ? -10 : 0), Scalar(10)); return run(Scalar(NumTraits<Scalar>::IsSigned ? -10 : 0), Scalar(10));
#else
enum { rand_bits = floor_log2<(unsigned int)(RAND_MAX)+1>::value,
scalar_bits = sizeof(Scalar) * CHAR_BIT,
shift = EIGEN_PLAIN_ENUM_MAX(0, int(rand_bits) - int(scalar_bits))
};
Scalar x = Scalar(std::rand() >> shift);
Scalar offset = NumTraits<Scalar>::IsSigned ? Scalar(1 << (rand_bits-1)) : Scalar(0);
return x - offset;
#endif
} }
}; };
template<typename Scalar> template<typename Scalar>
struct ei_random_default_impl<Scalar, true, false> struct random_default_impl<Scalar, true, false>
{ {
static inline Scalar run(const Scalar& x, const Scalar& y) static inline Scalar run(const Scalar& x, const Scalar& y)
{ {
return Scalar(ei_random(ei_real(x), ei_real(y)), return Scalar(random(real(x), real(y)),
ei_random(ei_imag(x), ei_imag(y))); random(imag(x), imag(y)));
} }
static inline Scalar run() static inline Scalar run()
{ {
typedef typename NumTraits<Scalar>::Real RealScalar; typedef typename NumTraits<Scalar>::Real RealScalar;
return Scalar(ei_random<RealScalar>(), ei_random<RealScalar>()); return Scalar(random<RealScalar>(), random<RealScalar>());
} }
}; };
template<typename Scalar> template<typename Scalar>
inline EIGEN_MATHFUNC_RETVAL(random, Scalar) ei_random(const Scalar& x, const Scalar& y) inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random(const Scalar& x, const Scalar& y)
{ {
return EIGEN_MATHFUNC_IMPL(random, Scalar)::run(x, y); return EIGEN_MATHFUNC_IMPL(random, Scalar)::run(x, y);
} }
template<typename Scalar> template<typename Scalar>
inline EIGEN_MATHFUNC_RETVAL(random, Scalar) ei_random() inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random()
{ {
return EIGEN_MATHFUNC_IMPL(random, Scalar)::run(); return EIGEN_MATHFUNC_IMPL(random, Scalar)::run();
} }
@@ -767,20 +713,20 @@ inline EIGEN_MATHFUNC_RETVAL(random, Scalar) ei_random()
template<typename Scalar, template<typename Scalar,
bool IsComplex, bool IsComplex,
bool IsInteger> bool IsInteger>
struct ei_scalar_fuzzy_default_impl {}; struct scalar_fuzzy_default_impl {};
template<typename Scalar> template<typename Scalar>
struct ei_scalar_fuzzy_default_impl<Scalar, false, false> struct scalar_fuzzy_default_impl<Scalar, false, false>
{ {
typedef typename NumTraits<Scalar>::Real RealScalar; typedef typename NumTraits<Scalar>::Real RealScalar;
template<typename OtherScalar> template<typename OtherScalar>
static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, const RealScalar& prec) static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, const RealScalar& prec)
{ {
return ei_abs(x) <= ei_abs(y) * prec; return abs(x) <= abs(y) * prec;
} }
static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec) static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec)
{ {
return ei_abs(x - y) <= std::min(ei_abs(x), ei_abs(y)) * prec; return abs(x - y) <= std::min(abs(x), abs(y)) * prec;
} }
static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar& prec) static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar& prec)
{ {
@@ -789,7 +735,7 @@ struct ei_scalar_fuzzy_default_impl<Scalar, false, false>
}; };
template<typename Scalar> template<typename Scalar>
struct ei_scalar_fuzzy_default_impl<Scalar, false, true> struct scalar_fuzzy_default_impl<Scalar, false, true>
{ {
typedef typename NumTraits<Scalar>::Real RealScalar; typedef typename NumTraits<Scalar>::Real RealScalar;
template<typename OtherScalar> template<typename OtherScalar>
@@ -808,62 +754,78 @@ struct ei_scalar_fuzzy_default_impl<Scalar, false, true>
}; };
template<typename Scalar> template<typename Scalar>
struct ei_scalar_fuzzy_default_impl<Scalar, true, false> struct scalar_fuzzy_default_impl<Scalar, true, false>
{ {
typedef typename NumTraits<Scalar>::Real RealScalar; typedef typename NumTraits<Scalar>::Real RealScalar;
template<typename OtherScalar> template<typename OtherScalar>
static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, const RealScalar& prec) static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, const RealScalar& prec)
{ {
return ei_abs2(x) <= ei_abs2(y) * prec * prec; return abs2(x) <= abs2(y) * prec * prec;
} }
static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec) static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec)
{ {
return ei_abs2(x - y) <= std::min(ei_abs2(x), ei_abs2(y)) * prec * prec; return abs2(x - y) <= std::min(abs2(x), abs2(y)) * prec * prec;
} }
}; };
template<typename Scalar> template<typename Scalar>
struct ei_scalar_fuzzy_impl : ei_scalar_fuzzy_default_impl<Scalar, NumTraits<Scalar>::IsComplex, NumTraits<Scalar>::IsInteger> {}; struct scalar_fuzzy_impl : scalar_fuzzy_default_impl<Scalar, NumTraits<Scalar>::IsComplex, NumTraits<Scalar>::IsInteger> {};
template<typename Scalar, typename OtherScalar> template<typename Scalar, typename OtherScalar>
inline bool ei_isMuchSmallerThan(const Scalar& x, const OtherScalar& y, inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y,
typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision())
{ {
return ei_scalar_fuzzy_impl<Scalar>::template isMuchSmallerThan<OtherScalar>(x, y, precision); return scalar_fuzzy_impl<Scalar>::template isMuchSmallerThan<OtherScalar>(x, y, precision);
} }
template<typename Scalar> template<typename Scalar>
inline bool ei_isApprox(const Scalar& x, const Scalar& y, inline bool isApprox(const Scalar& x, const Scalar& y,
typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision())
{ {
return ei_scalar_fuzzy_impl<Scalar>::isApprox(x, y, precision); return scalar_fuzzy_impl<Scalar>::isApprox(x, y, precision);
} }
template<typename Scalar> template<typename Scalar>
inline bool ei_isApproxOrLessThan(const Scalar& x, const Scalar& y, inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y,
typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision())
{ {
return ei_scalar_fuzzy_impl<Scalar>::isApproxOrLessThan(x, y, precision); return scalar_fuzzy_impl<Scalar>::isApproxOrLessThan(x, y, precision);
} }
/****************************************** /******************************************
*** The special case of the bool type *** *** The special case of the bool type ***
******************************************/ ******************************************/
template<> struct ei_random_impl<bool> template<> struct random_impl<bool>
{ {
static inline bool run() static inline bool run()
{ {
return ei_random<int>(0,1)==0 ? false : true; return random<int>(0,1)==0 ? false : true;
} }
}; };
template<> struct ei_scalar_fuzzy_impl<bool> template<> struct scalar_fuzzy_impl<bool>
{ {
typedef bool RealScalar;
template<typename OtherScalar>
static inline bool isMuchSmallerThan(const bool& x, const bool&, const bool&)
{
return !x;
}
static inline bool isApprox(bool x, bool y, bool) static inline bool isApprox(bool x, bool y, bool)
{ {
return x == y; return x == y;
} }
static inline bool isApproxOrLessThan(const bool& x, const bool& y, const bool&)
{
return (!x) || y;
}
}; };
} // end namespace internal
#endif // EIGEN_MATHFUNCTIONS_H #endif // EIGEN_MATHFUNCTIONS_H

View File

@@ -27,6 +27,7 @@
#define EIGEN_MATRIX_H #define EIGEN_MATRIX_H
/** \class Matrix /** \class Matrix
* \ingroup Core_Module
* *
* \brief The matrix class, also used for vectors and row-vectors * \brief The matrix class, also used for vectors and row-vectors
* *
@@ -44,7 +45,7 @@
* The remaining template parameters are optional -- in most cases you don't have to worry about them. * The remaining template parameters are optional -- in most cases you don't have to worry about them.
* \tparam _Options \anchor matrix_tparam_options A combination of either \b RowMajor or \b ColMajor, and of either * \tparam _Options \anchor matrix_tparam_options A combination of either \b RowMajor or \b ColMajor, and of either
* \b AutoAlign or \b DontAlign. * \b AutoAlign or \b DontAlign.
* The former controls storage order, and defaults to column-major. The latter controls alignment, which is required * The former controls \ref TopicStorageOrders "storage order", and defaults to column-major. The latter controls alignment, which is required
* for vectorization. It defaults to aligning matrices except for fixed sizes that aren't a multiple of the packet size. * for vectorization. It defaults to aligning matrices except for fixed sizes that aren't a multiple of the packet size.
* \tparam _MaxRows Maximum number of rows. Defaults to \a _Rows (\ref maxrows "note"). * \tparam _MaxRows Maximum number of rows. Defaults to \a _Rows (\ref maxrows "note").
* \tparam _MaxCols Maximum number of columns. Defaults to \a _Cols (\ref maxrows "note"). * \tparam _MaxCols Maximum number of columns. Defaults to \a _Cols (\ref maxrows "note").
@@ -78,6 +79,9 @@
* m(0, 3) = 3; * m(0, 3) = 3;
* \endcode * \endcode
* *
* This class can be extended with the help of the plugin mechanism described on the page
* \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_MATRIX_PLUGIN.
*
* <i><b>Some notes:</b></i> * <i><b>Some notes:</b></i>
* *
* <dl> * <dl>
@@ -106,10 +110,13 @@
* are the dimensions of the original matrix, while _Rows and _Cols are Dynamic.</dd> * are the dimensions of the original matrix, while _Rows and _Cols are Dynamic.</dd>
* </dl> * </dl>
* *
* \see MatrixBase for the majority of the API methods for matrices * \see MatrixBase for the majority of the API methods for matrices, \ref TopicClassHierarchy,
* \ref TopicStorageOrders
*/ */
namespace internal {
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols> template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
struct ei_traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > struct traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
{ {
typedef _Scalar Scalar; typedef _Scalar Scalar;
typedef Dense StorageKind; typedef Dense StorageKind;
@@ -120,24 +127,25 @@ struct ei_traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
ColsAtCompileTime = _Cols, ColsAtCompileTime = _Cols,
MaxRowsAtCompileTime = _MaxRows, MaxRowsAtCompileTime = _MaxRows,
MaxColsAtCompileTime = _MaxCols, MaxColsAtCompileTime = _MaxCols,
Flags = ei_compute_matrix_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret, Flags = compute_matrix_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret,
CoeffReadCost = NumTraits<Scalar>::ReadCost, CoeffReadCost = NumTraits<Scalar>::ReadCost,
Options = _Options, Options = _Options,
InnerStrideAtCompileTime = 1, InnerStrideAtCompileTime = 1,
OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime
}; };
}; };
}
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols> template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
class Matrix class Matrix
: public DenseStorageBase<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > : public PlainObjectBase<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
{ {
public: public:
/** \brief Base class typedef. /** \brief Base class typedef.
* \sa DenseStorageBase * \sa PlainObjectBase
*/ */
typedef DenseStorageBase<Matrix> Base; typedef PlainObjectBase<Matrix> Base;
enum { Options = _Options }; enum { Options = _Options };
@@ -216,8 +224,8 @@ class Matrix
} }
// FIXME is it still needed // FIXME is it still needed
Matrix(ei_constructor_without_unaligned_array_assert) Matrix(internal::constructor_without_unaligned_array_assert)
: Base(ei_constructor_without_unaligned_array_assert()) : Base(internal::constructor_without_unaligned_array_assert())
{ Base::_check_template_params(); EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED } { Base::_check_template_params(); EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED }
/** \brief Constructs a vector or row-vector with given dimension. \only_for_vectors /** \brief Constructs a vector or row-vector with given dimension. \only_for_vectors
@@ -231,8 +239,8 @@ class Matrix
{ {
Base::_check_template_params(); Base::_check_template_params();
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Matrix) EIGEN_STATIC_ASSERT_VECTOR_ONLY(Matrix)
ei_assert(dim > 0); eigen_assert(dim >= 0);
ei_assert(SizeAtCompileTime == Dynamic || SizeAtCompileTime == dim); eigen_assert(SizeAtCompileTime == Dynamic || SizeAtCompileTime == dim);
EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
} }
@@ -281,6 +289,11 @@ class Matrix
EIGEN_STRONG_INLINE Matrix(const MatrixBase<OtherDerived>& other) EIGEN_STRONG_INLINE Matrix(const MatrixBase<OtherDerived>& other)
: Base(other.rows() * other.cols(), other.rows(), other.cols()) : Base(other.rows() * other.cols(), other.rows(), other.cols())
{ {
// This test resides here, to bring the error messages closer to the user. Normally, these checks
// are performed deeply within the library, thus causing long and scary error traces.
EIGEN_STATIC_ASSERT((internal::is_same<Scalar, typename OtherDerived::Scalar>::value),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
Base::_check_template_params(); Base::_check_template_params();
Base::_set_noalias(other); Base::_set_noalias(other);
} }
@@ -319,7 +332,7 @@ class Matrix
* of same type it is enough to swap the data pointers. * of same type it is enough to swap the data pointers.
*/ */
template<typename OtherDerived> template<typename OtherDerived>
void swap(MatrixBase<OtherDerived> EIGEN_REF_TO_TEMPORARY other) void swap(MatrixBase<OtherDerived> const & other)
{ this->_swap(other.derived()); } { this->_swap(other.derived()); }
inline Index innerStride() const { return 1; } inline Index innerStride() const { return 1; }
@@ -332,6 +345,13 @@ class Matrix
template<typename OtherDerived> template<typename OtherDerived>
Matrix& operator=(const RotationBase<OtherDerived,ColsAtCompileTime>& r); Matrix& operator=(const RotationBase<OtherDerived,ColsAtCompileTime>& r);
#ifdef EIGEN2_SUPPORT
template<typename OtherDerived>
explicit Matrix(const eigen2_RotationBase<OtherDerived,ColsAtCompileTime>& r);
template<typename OtherDerived>
Matrix& operator=(const eigen2_RotationBase<OtherDerived,ColsAtCompileTime>& r);
#endif
// allow to extend Matrix outside Eigen // allow to extend Matrix outside Eigen
#ifdef EIGEN_MATRIX_PLUGIN #ifdef EIGEN_MATRIX_PLUGIN
#include EIGEN_MATRIX_PLUGIN #include EIGEN_MATRIX_PLUGIN
@@ -339,7 +359,7 @@ class Matrix
protected: protected:
template <typename Derived, typename OtherDerived, bool IsVector> template <typename Derived, typename OtherDerived, bool IsVector>
friend struct ei_conservative_resize_like_impl; friend struct internal::conservative_resize_like_impl;
using Base::m_storage; using Base::m_storage;
}; };

View File

@@ -27,6 +27,7 @@
#define EIGEN_MATRIXBASE_H #define EIGEN_MATRIXBASE_H
/** \class MatrixBase /** \class MatrixBase
* \ingroup Core_Module
* *
* \brief Base class for all dense matrices, vectors, and expressions * \brief Base class for all dense matrices, vectors, and expressions
* *
@@ -37,7 +38,7 @@
* Note that some methods are defined in other modules such as the \ref LU_Module LU module * Note that some methods are defined in other modules such as the \ref LU_Module LU module
* for all functions related to matrix inversions. * for all functions related to matrix inversions.
* *
* \param Derived is the derived type, e.g. a matrix type, or an expression, etc. * \tparam Derived is the derived type, e.g. a matrix type, or an expression, etc.
* *
* When writing a function taking Eigen objects as argument, if you want your function * When writing a function taking Eigen objects as argument, if you want your function
* to take as argument any matrix, vector, or expression, just let it take a * to take as argument any matrix, vector, or expression, just let it take a
@@ -51,6 +52,11 @@
cout << x.row(0) << endl; cout << x.row(0) << endl;
} }
* \endcode * \endcode
*
* This class can be extended with the help of the plugin mechanism described on the page
* \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_MATRIXBASE_PLUGIN.
*
* \sa \ref TopicClassHierarchy
*/ */
template<typename Derived> class MatrixBase template<typename Derived> class MatrixBase
: public DenseBase<Derived> : public DenseBase<Derived>
@@ -58,10 +64,10 @@ template<typename Derived> class MatrixBase
public: public:
#ifndef EIGEN_PARSED_BY_DOXYGEN #ifndef EIGEN_PARSED_BY_DOXYGEN
typedef MatrixBase StorageBaseType; typedef MatrixBase StorageBaseType;
typedef typename ei_traits<Derived>::StorageKind StorageKind; typedef typename internal::traits<Derived>::StorageKind StorageKind;
typedef typename ei_traits<Derived>::Index Index; typedef typename internal::traits<Derived>::Index Index;
typedef typename ei_traits<Derived>::Scalar Scalar; typedef typename internal::traits<Derived>::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar; typedef typename internal::packet_traits<Scalar>::type PacketScalar;
typedef typename NumTraits<Scalar>::Real RealScalar; typedef typename NumTraits<Scalar>::Real RealScalar;
typedef DenseBase<Derived> Base; typedef DenseBase<Derived> Base;
@@ -90,6 +96,7 @@ template<typename Derived> class MatrixBase
using Base::operator/=; using Base::operator/=;
typedef typename Base::CoeffReturnType CoeffReturnType; typedef typename Base::CoeffReturnType CoeffReturnType;
typedef typename Base::ConstTransposeReturnType ConstTransposeReturnType;
typedef typename Base::RowXpr RowXpr; typedef typename Base::RowXpr RowXpr;
typedef typename Base::ColXpr ColXpr; typedef typename Base::ColXpr ColXpr;
#endif // not EIGEN_PARSED_BY_DOXYGEN #endif // not EIGEN_PARSED_BY_DOXYGEN
@@ -112,30 +119,30 @@ template<typename Derived> class MatrixBase
* the return type of eval() is a const reference to a matrix, not a matrix! It is however guaranteed * the return type of eval() is a const reference to a matrix, not a matrix! It is however guaranteed
* that the return type of eval() is either PlainObject or const PlainObject&. * that the return type of eval() is either PlainObject or const PlainObject&.
*/ */
typedef Matrix<typename ei_traits<Derived>::Scalar, typedef Matrix<typename internal::traits<Derived>::Scalar,
ei_traits<Derived>::RowsAtCompileTime, internal::traits<Derived>::RowsAtCompileTime,
ei_traits<Derived>::ColsAtCompileTime, internal::traits<Derived>::ColsAtCompileTime,
AutoAlign | (ei_traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor), AutoAlign | (internal::traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
ei_traits<Derived>::MaxRowsAtCompileTime, internal::traits<Derived>::MaxRowsAtCompileTime,
ei_traits<Derived>::MaxColsAtCompileTime internal::traits<Derived>::MaxColsAtCompileTime
> PlainObject; > PlainObject;
#ifndef EIGEN_PARSED_BY_DOXYGEN #ifndef EIGEN_PARSED_BY_DOXYGEN
/** \internal Represents a matrix with all coefficients equal to one another*/ /** \internal Represents a matrix with all coefficients equal to one another*/
typedef CwiseNullaryOp<ei_scalar_constant_op<Scalar>,Derived> ConstantReturnType; typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,Derived> ConstantReturnType;
/** \internal the return type of MatrixBase::adjoint() */ /** \internal the return type of MatrixBase::adjoint() */
typedef typename ei_meta_if<NumTraits<Scalar>::IsComplex, typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
CwiseUnaryOp<ei_scalar_conjugate_op<Scalar>, Eigen::Transpose<Derived> >, CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, ConstTransposeReturnType>,
Transpose<Derived> ConstTransposeReturnType
>::ret AdjointReturnType; >::type AdjointReturnType;
/** \internal Return type of eigenvalues() */ /** \internal Return type of eigenvalues() */
typedef Matrix<std::complex<RealScalar>, ei_traits<Derived>::ColsAtCompileTime, 1, ColMajor> EigenvaluesReturnType; typedef Matrix<std::complex<RealScalar>, internal::traits<Derived>::ColsAtCompileTime, 1, ColMajor> EigenvaluesReturnType;
/** \internal the return type of identity */ /** \internal the return type of identity */
typedef CwiseNullaryOp<ei_scalar_identity_op<Scalar>,Derived> IdentityReturnType; typedef CwiseNullaryOp<internal::scalar_identity_op<Scalar>,Derived> IdentityReturnType;
/** \internal the return type of unit vectors */ /** \internal the return type of unit vectors */
typedef Block<CwiseNullaryOp<ei_scalar_identity_op<Scalar>, SquareMatrixType>, typedef Block<const CwiseNullaryOp<internal::scalar_identity_op<Scalar>, SquareMatrixType>,
ei_traits<Derived>::RowsAtCompileTime, internal::traits<Derived>::RowsAtCompileTime,
ei_traits<Derived>::ColsAtCompileTime> BasisReturnType; internal::traits<Derived>::ColsAtCompileTime> BasisReturnType;
#endif // not EIGEN_PARSED_BY_DOXYGEN #endif // not EIGEN_PARSED_BY_DOXYGEN
#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::MatrixBase #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::MatrixBase
@@ -180,7 +187,7 @@ template<typename Derived> class MatrixBase
operator*(const MatrixBase<OtherDerived> &other) const; operator*(const MatrixBase<OtherDerived> &other) const;
template<typename OtherDerived> template<typename OtherDerived>
const typename ProductReturnType<Derived,OtherDerived,LazyCoeffBasedProductMode>::Type const typename LazyProductReturnType<Derived,OtherDerived>::Type
lazyProduct(const MatrixBase<OtherDerived> &other) const; lazyProduct(const MatrixBase<OtherDerived> &other) const;
template<typename OtherDerived> template<typename OtherDerived>
@@ -197,7 +204,14 @@ template<typename Derived> class MatrixBase
operator*(const DiagonalBase<DiagonalDerived> &diagonal) const; operator*(const DiagonalBase<DiagonalDerived> &diagonal) const;
template<typename OtherDerived> template<typename OtherDerived>
Scalar dot(const MatrixBase<OtherDerived>& other) const; typename internal::scalar_product_traits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
dot(const MatrixBase<OtherDerived>& other) const;
#ifdef EIGEN2_SUPPORT
template<typename OtherDerived>
Scalar eigen2_dot(const MatrixBase<OtherDerived>& other) const;
#endif
RealScalar squaredNorm() const; RealScalar squaredNorm() const;
RealScalar norm() const; RealScalar norm() const;
RealScalar stableNorm() const; RealScalar stableNorm() const;
@@ -209,23 +223,49 @@ template<typename Derived> class MatrixBase
const AdjointReturnType adjoint() const; const AdjointReturnType adjoint() const;
void adjointInPlace(); void adjointInPlace();
Diagonal<Derived,0> diagonal(); typedef Diagonal<Derived> DiagonalReturnType;
const Diagonal<Derived,0> diagonal() const; DiagonalReturnType diagonal();
typedef const Diagonal<const Derived> ConstDiagonalReturnType;
const ConstDiagonalReturnType diagonal() const;
template<int Index> Diagonal<Derived,Index> diagonal(); template<int Index> struct DiagonalIndexReturnType { typedef Diagonal<Derived,Index> Type; };
template<int Index> const Diagonal<Derived,Index> diagonal() const; template<int Index> struct ConstDiagonalIndexReturnType { typedef const Diagonal<const Derived,Index> Type; };
Diagonal<Derived, Dynamic> diagonal(Index index); template<int Index> typename DiagonalIndexReturnType<Index>::Type diagonal();
const Diagonal<Derived, Dynamic> diagonal(Index index) const; template<int Index> typename ConstDiagonalIndexReturnType<Index>::Type diagonal() const;
template<unsigned int Mode> TriangularView<Derived, Mode> part(); // Note: The "MatrixBase::" prefixes are added to help MSVC9 to match these declarations with the later implementations.
template<unsigned int Mode> const TriangularView<Derived, Mode> part() const; // On the other hand they confuse MSVC8...
#if (defined _MSC_VER) && (_MSC_VER >= 1500) // 2008 or later
typename MatrixBase::template DiagonalIndexReturnType<Dynamic>::Type diagonal(Index index);
typename MatrixBase::template ConstDiagonalIndexReturnType<Dynamic>::Type diagonal(Index index) const;
#else
typename DiagonalIndexReturnType<Dynamic>::Type diagonal(Index index);
typename ConstDiagonalIndexReturnType<Dynamic>::Type diagonal(Index index) const;
#endif
template<unsigned int Mode> TriangularView<Derived, Mode> triangularView(); #ifdef EIGEN2_SUPPORT
template<unsigned int Mode> const TriangularView<Derived, Mode> triangularView() const; template<unsigned int Mode> typename internal::eigen2_part_return_type<Derived, Mode>::type part();
template<unsigned int Mode> const typename internal::eigen2_part_return_type<Derived, Mode>::type part() const;
// huuuge hack. make Eigen2's matrix.part<Diagonal>() work in eigen3. Problem: Diagonal is now a class template instead
// of an integer constant. Solution: overload the part() method template wrt template parameters list.
template<template<typename T, int n> class U>
const DiagonalWrapper<ConstDiagonalReturnType> part() const
{ return diagonal().asDiagonal(); }
#endif // EIGEN2_SUPPORT
template<unsigned int UpLo> SelfAdjointView<Derived, UpLo> selfadjointView(); template<unsigned int Mode> struct TriangularViewReturnType { typedef TriangularView<Derived, Mode> Type; };
template<unsigned int UpLo> const SelfAdjointView<Derived, UpLo> selfadjointView() const; template<unsigned int Mode> struct ConstTriangularViewReturnType { typedef const TriangularView<const Derived, Mode> Type; };
template<unsigned int Mode> typename TriangularViewReturnType<Mode>::Type triangularView();
template<unsigned int Mode> typename ConstTriangularViewReturnType<Mode>::Type triangularView() const;
template<unsigned int UpLo> struct SelfAdjointViewReturnType { typedef SelfAdjointView<Derived, UpLo> Type; };
template<unsigned int UpLo> struct ConstSelfAdjointViewReturnType { typedef const SelfAdjointView<const Derived, UpLo> Type; };
template<unsigned int UpLo> typename SelfAdjointViewReturnType<UpLo>::Type selfadjointView();
template<unsigned int UpLo> typename ConstSelfAdjointViewReturnType<UpLo>::Type selfadjointView() const;
const SparseView<Derived> sparseView(const Scalar& m_reference = Scalar(0), const SparseView<Derived> sparseView(const Scalar& m_reference = Scalar(0),
typename NumTraits<Scalar>::Real m_epsilon = NumTraits<Scalar>::dummy_precision()) const; typename NumTraits<Scalar>::Real m_epsilon = NumTraits<Scalar>::dummy_precision()) const;
@@ -238,7 +278,8 @@ template<typename Derived> class MatrixBase
static const BasisReturnType UnitZ(); static const BasisReturnType UnitZ();
static const BasisReturnType UnitW(); static const BasisReturnType UnitW();
const DiagonalWrapper<Derived> asDiagonal() const; const DiagonalWrapper<const Derived> asDiagonal() const;
const PermutationWrapper<const Derived> asPermutation() const;
Derived& setIdentity(); Derived& setIdentity();
Derived& setIdentity(Index rows, Index cols); Derived& setIdentity(Index rows, Index cols);
@@ -274,8 +315,8 @@ template<typename Derived> class MatrixBase
inline const ForceAlignedAccess<Derived> forceAlignedAccess() const; inline const ForceAlignedAccess<Derived> forceAlignedAccess() const;
inline ForceAlignedAccess<Derived> forceAlignedAccess(); inline ForceAlignedAccess<Derived> forceAlignedAccess();
template<bool Enable> inline typename ei_makeconst<typename ei_meta_if<Enable,ForceAlignedAccess<Derived>,Derived&>::ret>::type forceAlignedAccessIf() const; template<bool Enable> inline typename internal::add_const_on_value_type<typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type>::type forceAlignedAccessIf() const;
template<bool Enable> inline typename ei_meta_if<Enable,ForceAlignedAccess<Derived>,Derived&>::ret forceAlignedAccessIf(); template<bool Enable> inline typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf();
Scalar trace() const; Scalar trace() const;
@@ -295,8 +336,27 @@ template<typename Derived> class MatrixBase
const FullPivLU<PlainObject> fullPivLu() const; const FullPivLU<PlainObject> fullPivLu() const;
const PartialPivLU<PlainObject> partialPivLu() const; const PartialPivLU<PlainObject> partialPivLu() const;
#if EIGEN2_SUPPORT_STAGE < STAGE20_RESOLVE_API_CONFLICTS
const LU<PlainObject> lu() const;
#endif
#ifdef EIGEN2_SUPPORT
const LU<PlainObject> eigen2_lu() const;
#endif
#if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS
const PartialPivLU<PlainObject> lu() const; const PartialPivLU<PlainObject> lu() const;
const ei_inverse_impl<Derived> inverse() const; #endif
#ifdef EIGEN2_SUPPORT
template<typename ResultType>
void computeInverse(MatrixBase<ResultType> *result) const {
*result = this->inverse();
}
#endif
const internal::inverse_impl<Derived> inverse() const;
template<typename ResultType> template<typename ResultType>
void computeInverseAndDetWithCheck( void computeInverseAndDetWithCheck(
ResultType& inverse, ResultType& inverse,
@@ -322,35 +382,57 @@ template<typename Derived> class MatrixBase
const HouseholderQR<PlainObject> householderQr() const; const HouseholderQR<PlainObject> householderQr() const;
const ColPivHouseholderQR<PlainObject> colPivHouseholderQr() const; const ColPivHouseholderQR<PlainObject> colPivHouseholderQr() const;
const FullPivHouseholderQR<PlainObject> fullPivHouseholderQr() const; const FullPivHouseholderQR<PlainObject> fullPivHouseholderQr() const;
#ifdef EIGEN2_SUPPORT
const QR<PlainObject> qr() const;
#endif
EigenvaluesReturnType eigenvalues() const; EigenvaluesReturnType eigenvalues() const;
RealScalar operatorNorm() const; RealScalar operatorNorm() const;
/////////// SVD module /////////// /////////// SVD module ///////////
JacobiSVD<PlainObject> jacobiSvd(unsigned int computationOptions = 0) const;
#ifdef EIGEN2_SUPPORT
SVD<PlainObject> svd() const; SVD<PlainObject> svd() const;
#endif
/////////// Geometry module /////////// /////////// Geometry module ///////////
#ifndef EIGEN_PARSED_BY_DOXYGEN
/// \internal helper struct to form the return type of the cross product
template<typename OtherDerived> struct cross_product_return_type {
typedef typename internal::scalar_product_traits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType Scalar;
typedef Matrix<Scalar,MatrixBase::RowsAtCompileTime,MatrixBase::ColsAtCompileTime> type;
};
#endif // EIGEN_PARSED_BY_DOXYGEN
template<typename OtherDerived> template<typename OtherDerived>
PlainObject cross(const MatrixBase<OtherDerived>& other) const; typename cross_product_return_type<OtherDerived>::type
cross(const MatrixBase<OtherDerived>& other) const;
template<typename OtherDerived> template<typename OtherDerived>
PlainObject cross3(const MatrixBase<OtherDerived>& other) const; PlainObject cross3(const MatrixBase<OtherDerived>& other) const;
PlainObject unitOrthogonal(void) const; PlainObject unitOrthogonal(void) const;
Matrix<Scalar,3,1> eulerAngles(Index a0, Index a1, Index a2) const; Matrix<Scalar,3,1> eulerAngles(Index a0, Index a1, Index a2) const;
const ScalarMultipleReturnType operator*(const UniformScaling<Scalar>& s) const;
#if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS
ScalarMultipleReturnType operator*(const UniformScaling<Scalar>& s) const;
// put this as separate enum value to work around possible GCC 4.3 bug (?)
enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1?Vertical:Horizontal };
typedef Homogeneous<Derived, HomogeneousReturnTypeDirection> HomogeneousReturnType;
HomogeneousReturnType homogeneous() const;
#endif
enum { enum {
SizeMinusOne = SizeAtCompileTime==Dynamic ? Dynamic : SizeAtCompileTime-1 SizeMinusOne = SizeAtCompileTime==Dynamic ? Dynamic : SizeAtCompileTime-1
}; };
typedef Block<Derived, typedef Block<const Derived,
ei_traits<Derived>::ColsAtCompileTime==1 ? SizeMinusOne : 1, internal::traits<Derived>::ColsAtCompileTime==1 ? SizeMinusOne : 1,
ei_traits<Derived>::ColsAtCompileTime==1 ? 1 : SizeMinusOne> StartMinusOne; internal::traits<Derived>::ColsAtCompileTime==1 ? 1 : SizeMinusOne> ConstStartMinusOne;
typedef CwiseUnaryOp<ei_scalar_quotient1_op<typename ei_traits<Derived>::Scalar>, typedef CwiseUnaryOp<internal::scalar_quotient1_op<typename internal::traits<Derived>::Scalar>,
StartMinusOne > HNormalizedReturnType; const ConstStartMinusOne > HNormalizedReturnType;
const HNormalizedReturnType hnormalized() const; const HNormalizedReturnType hnormalized() const;
typedef Homogeneous<Derived,MatrixBase<Derived>::ColsAtCompileTime==1?Vertical:Horizontal> HomogeneousReturnType;
const HomogeneousReturnType homogeneous() const;
////////// Householder module /////////// ////////// Householder module ///////////
@@ -370,13 +452,13 @@ template<typename Derived> class MatrixBase
///////// Jacobi module ///////// ///////// Jacobi module /////////
template<typename OtherScalar> template<typename OtherScalar>
void applyOnTheLeft(Index p, Index q, const PlanarRotation<OtherScalar>& j); void applyOnTheLeft(Index p, Index q, const JacobiRotation<OtherScalar>& j);
template<typename OtherScalar> template<typename OtherScalar>
void applyOnTheRight(Index p, Index q, const PlanarRotation<OtherScalar>& j); void applyOnTheRight(Index p, Index q, const JacobiRotation<OtherScalar>& j);
///////// MatrixFunctions module ///////// ///////// MatrixFunctions module /////////
typedef typename ei_stem_function<Scalar>::type StemFunction; typedef typename internal::stem_function<Scalar>::type StemFunction;
const MatrixExponentialReturnValue<Derived> exp() const; const MatrixExponentialReturnValue<Derived> exp() const;
const MatrixFunctionReturnValue<Derived> matrixFunction(StemFunction f) const; const MatrixFunctionReturnValue<Derived> matrixFunction(StemFunction f) const;
const MatrixFunctionReturnValue<Derived> cosh() const; const MatrixFunctionReturnValue<Derived> cosh() const;
@@ -407,13 +489,13 @@ template<typename Derived> class MatrixBase
inline Cwise<Derived> cwise(); inline Cwise<Derived> cwise();
VectorBlock<Derived> start(Index size); VectorBlock<Derived> start(Index size);
const VectorBlock<Derived> start(Index size) const; const VectorBlock<const Derived> start(Index size) const;
VectorBlock<Derived> end(Index size); VectorBlock<Derived> end(Index size);
const VectorBlock<Derived> end(Index size) const; const VectorBlock<const Derived> end(Index size) const;
template<int Size> VectorBlock<Derived,Size> start(); template<int Size> VectorBlock<Derived,Size> start();
template<int Size> const VectorBlock<Derived,Size> start() const; template<int Size> const VectorBlock<const Derived,Size> start() const;
template<int Size> VectorBlock<Derived,Size> end(); template<int Size> VectorBlock<Derived,Size> end();
template<int Size> const VectorBlock<Derived,Size> end() const; template<int Size> const VectorBlock<const Derived,Size> end() const;
Minor<Derived> minor(Index row, Index col); Minor<Derived> minor(Index row, Index col);
const Minor<Derived> minor(Index row, Index col) const; const Minor<Derived> minor(Index row, Index col) const;
@@ -426,6 +508,13 @@ template<typename Derived> class MatrixBase
explicit MatrixBase(int); explicit MatrixBase(int);
MatrixBase(int,int); MatrixBase(int,int);
template<typename OtherDerived> explicit MatrixBase(const MatrixBase<OtherDerived>&); template<typename OtherDerived> explicit MatrixBase(const MatrixBase<OtherDerived>&);
protected:
// mixing arrays and matrices is not legal
template<typename OtherDerived> Derived& operator+=(const ArrayBase<OtherDerived>& )
{EIGEN_STATIC_ASSERT(sizeof(typename OtherDerived::Scalar)==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES);}
// mixing arrays and matrices is not legal
template<typename OtherDerived> Derived& operator-=(const ArrayBase<OtherDerived>& )
{EIGEN_STATIC_ASSERT(sizeof(typename OtherDerived::Scalar)==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES);}
}; };
#endif // EIGEN_MATRIXBASE_H #endif // EIGEN_MATRIXBASE_H

View File

@@ -1,299 +0,0 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2006-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
// Copyright (C) 2010 Hauke Heibel <hauke.heibel@gmail.com>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#ifndef EIGEN_MATRIXSTORAGE_H
#define EIGEN_MATRIXSTORAGE_H
#ifdef EIGEN_DEBUG_MATRIX_CTOR
#define EIGEN_INT_DEBUG_MATRIX_CTOR EIGEN_DEBUG_MATRIX_CTOR;
#else
#define EIGEN_INT_DEBUG_MATRIX_CTOR
#endif
struct ei_constructor_without_unaligned_array_assert {};
/** \internal
* Static array. If the MatrixOptions require auto-alignment, the array will be automatically aligned:
* to 16 bytes boundary if the total size is a multiple of 16 bytes.
*/
template <typename T, int Size, int MatrixOptions,
int Alignment = (MatrixOptions&DontAlign) ? 0
: (((Size*sizeof(T))%16)==0) ? 16
: 0 >
struct ei_matrix_array
{
T array[Size];
ei_matrix_array() {}
ei_matrix_array(ei_constructor_without_unaligned_array_assert) {}
};
#ifdef EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
#define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask)
#else
#define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \
ei_assert((reinterpret_cast<size_t>(array) & sizemask) == 0 \
&& "this assertion is explained here: " \
"http://eigen.tuxfamily.org/dox/UnalignedArrayAssert.html" \
" **** READ THIS WEB PAGE !!! ****");
#endif
template <typename T, int Size, int MatrixOptions>
struct ei_matrix_array<T, Size, MatrixOptions, 16>
{
EIGEN_ALIGN16 T array[Size];
ei_matrix_array() { EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(0xf) }
ei_matrix_array(ei_constructor_without_unaligned_array_assert) {}
};
template <typename T, int MatrixOptions, int Alignment>
struct ei_matrix_array<T, 0, MatrixOptions, Alignment>
{
EIGEN_ALIGN16 T array[1];
ei_matrix_array() {}
ei_matrix_array(ei_constructor_without_unaligned_array_assert) {}
};
/** \internal
*
* \class ei_matrix_storage
*
* \brief Stores the data of a matrix
*
* This class stores the data of fixed-size, dynamic-size or mixed matrices
* in a way as compact as possible.
*
* \sa Matrix
*/
template<typename T, int Size, int _Rows, int _Cols, int _Options> class ei_matrix_storage;
// purely fixed-size matrix
template<typename T, int Size, int _Rows, int _Cols, int _Options> class ei_matrix_storage
{
ei_matrix_array<T,Size,_Options> m_data;
public:
inline explicit ei_matrix_storage() {}
inline ei_matrix_storage(ei_constructor_without_unaligned_array_assert)
: m_data(ei_constructor_without_unaligned_array_assert()) {}
inline ei_matrix_storage(DenseIndex,DenseIndex,DenseIndex) {}
inline void swap(ei_matrix_storage& other) { std::swap(m_data,other.m_data); }
inline static DenseIndex rows(void) {return _Rows;}
inline static DenseIndex cols(void) {return _Cols;}
inline void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {}
inline void resize(DenseIndex,DenseIndex,DenseIndex) {}
inline const T *data() const { return m_data.array; }
inline T *data() { return m_data.array; }
};
// null matrix
template<typename T, int _Rows, int _Cols, int _Options> class ei_matrix_storage<T, 0, _Rows, _Cols, _Options>
{
public:
inline explicit ei_matrix_storage() {}
inline ei_matrix_storage(ei_constructor_without_unaligned_array_assert) {}
inline ei_matrix_storage(DenseIndex,DenseIndex,DenseIndex) {}
inline void swap(ei_matrix_storage& ) {}
inline static DenseIndex rows(void) {return _Rows;}
inline static DenseIndex cols(void) {return _Cols;}
inline void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {}
inline void resize(DenseIndex,DenseIndex,DenseIndex) {}
inline const T *data() const { return 0; }
inline T *data() { return 0; }
};
// dynamic-size matrix with fixed-size storage
template<typename T, int Size, int _Options> class ei_matrix_storage<T, Size, Dynamic, Dynamic, _Options>
{
ei_matrix_array<T,Size,_Options> m_data;
DenseIndex m_rows;
DenseIndex m_cols;
public:
inline explicit ei_matrix_storage() : m_rows(0), m_cols(0) {}
inline ei_matrix_storage(ei_constructor_without_unaligned_array_assert)
: m_data(ei_constructor_without_unaligned_array_assert()), m_rows(0), m_cols(0) {}
inline ei_matrix_storage(DenseIndex, DenseIndex rows, DenseIndex cols) : m_rows(rows), m_cols(cols) {}
inline void swap(ei_matrix_storage& other)
{ std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
inline DenseIndex rows(void) const {return m_rows;}
inline DenseIndex cols(void) const {return m_cols;}
inline void conservativeResize(DenseIndex, DenseIndex rows, DenseIndex cols) { m_rows = rows; m_cols = cols; }
inline void resize(DenseIndex, DenseIndex rows, DenseIndex cols) { m_rows = rows; m_cols = cols; }
inline const T *data() const { return m_data.array; }
inline T *data() { return m_data.array; }
};
// dynamic-size matrix with fixed-size storage and fixed width
template<typename T, int Size, int _Cols, int _Options> class ei_matrix_storage<T, Size, Dynamic, _Cols, _Options>
{
ei_matrix_array<T,Size,_Options> m_data;
DenseIndex m_rows;
public:
inline explicit ei_matrix_storage() : m_rows(0) {}
inline ei_matrix_storage(ei_constructor_without_unaligned_array_assert)
: m_data(ei_constructor_without_unaligned_array_assert()), m_rows(0) {}
inline ei_matrix_storage(DenseIndex, DenseIndex rows, DenseIndex) : m_rows(rows) {}
inline void swap(ei_matrix_storage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
inline DenseIndex rows(void) const {return m_rows;}
inline DenseIndex cols(void) const {return _Cols;}
inline void conservativeResize(DenseIndex, DenseIndex rows, DenseIndex) { m_rows = rows; }
inline void resize(DenseIndex, DenseIndex rows, DenseIndex) { m_rows = rows; }
inline const T *data() const { return m_data.array; }
inline T *data() { return m_data.array; }
};
// dynamic-size matrix with fixed-size storage and fixed height
template<typename T, int Size, int _Rows, int _Options> class ei_matrix_storage<T, Size, _Rows, Dynamic, _Options>
{
ei_matrix_array<T,Size,_Options> m_data;
DenseIndex m_cols;
public:
inline explicit ei_matrix_storage() : m_cols(0) {}
inline ei_matrix_storage(ei_constructor_without_unaligned_array_assert)
: m_data(ei_constructor_without_unaligned_array_assert()), m_cols(0) {}
inline ei_matrix_storage(DenseIndex, DenseIndex, DenseIndex cols) : m_cols(cols) {}
inline void swap(ei_matrix_storage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
inline DenseIndex rows(void) const {return _Rows;}
inline DenseIndex cols(void) const {return m_cols;}
inline void conservativeResize(DenseIndex, DenseIndex, DenseIndex cols) { m_cols = cols; }
inline void resize(DenseIndex, DenseIndex, DenseIndex cols) { m_cols = cols; }
inline const T *data() const { return m_data.array; }
inline T *data() { return m_data.array; }
};
// purely dynamic matrix.
template<typename T, int _Options> class ei_matrix_storage<T, Dynamic, Dynamic, Dynamic, _Options>
{
T *m_data;
DenseIndex m_rows;
DenseIndex m_cols;
public:
inline explicit ei_matrix_storage() : m_data(0), m_rows(0), m_cols(0) {}
inline ei_matrix_storage(ei_constructor_without_unaligned_array_assert)
: m_data(0), m_rows(0), m_cols(0) {}
inline ei_matrix_storage(DenseIndex size, DenseIndex rows, DenseIndex cols)
: m_data(ei_conditional_aligned_new<T,(_Options&DontAlign)==0>(size)), m_rows(rows), m_cols(cols)
{ EIGEN_INT_DEBUG_MATRIX_CTOR }
inline ~ei_matrix_storage() { ei_conditional_aligned_delete<T,(_Options&DontAlign)==0>(m_data, m_rows*m_cols); }
inline void swap(ei_matrix_storage& other)
{ std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
inline DenseIndex rows(void) const {return m_rows;}
inline DenseIndex cols(void) const {return m_cols;}
inline void conservativeResize(DenseIndex size, DenseIndex rows, DenseIndex cols)
{
m_data = ei_conditional_aligned_realloc_new<T,(_Options&DontAlign)==0>(m_data, size, m_rows*m_cols);
m_rows = rows;
m_cols = cols;
}
void resize(DenseIndex size, DenseIndex rows, DenseIndex cols)
{
if(size != m_rows*m_cols)
{
ei_conditional_aligned_delete<T,(_Options&DontAlign)==0>(m_data, m_rows*m_cols);
if (size)
m_data = ei_conditional_aligned_new<T,(_Options&DontAlign)==0>(size);
else
m_data = 0;
EIGEN_INT_DEBUG_MATRIX_CTOR
}
m_rows = rows;
m_cols = cols;
}
inline const T *data() const { return m_data; }
inline T *data() { return m_data; }
};
// matrix with dynamic width and fixed height (so that matrix has dynamic size).
template<typename T, int _Rows, int _Options> class ei_matrix_storage<T, Dynamic, _Rows, Dynamic, _Options>
{
T *m_data;
DenseIndex m_cols;
public:
inline explicit ei_matrix_storage() : m_data(0), m_cols(0) {}
inline ei_matrix_storage(ei_constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {}
inline ei_matrix_storage(DenseIndex size, DenseIndex, DenseIndex cols) : m_data(ei_conditional_aligned_new<T,(_Options&DontAlign)==0>(size)), m_cols(cols)
{ EIGEN_INT_DEBUG_MATRIX_CTOR }
inline ~ei_matrix_storage() { ei_conditional_aligned_delete<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols); }
inline void swap(ei_matrix_storage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
inline static DenseIndex rows(void) {return _Rows;}
inline DenseIndex cols(void) const {return m_cols;}
inline void conservativeResize(DenseIndex size, DenseIndex, DenseIndex cols)
{
m_data = ei_conditional_aligned_realloc_new<T,(_Options&DontAlign)==0>(m_data, size, _Rows*m_cols);
m_cols = cols;
}
EIGEN_STRONG_INLINE void resize(DenseIndex size, DenseIndex, DenseIndex cols)
{
if(size != _Rows*m_cols)
{
ei_conditional_aligned_delete<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols);
if (size)
m_data = ei_conditional_aligned_new<T,(_Options&DontAlign)==0>(size);
else
m_data = 0;
EIGEN_INT_DEBUG_MATRIX_CTOR
}
m_cols = cols;
}
inline const T *data() const { return m_data; }
inline T *data() { return m_data; }
};
// matrix with dynamic height and fixed width (so that matrix has dynamic size).
template<typename T, int _Cols, int _Options> class ei_matrix_storage<T, Dynamic, Dynamic, _Cols, _Options>
{
T *m_data;
DenseIndex m_rows;
public:
inline explicit ei_matrix_storage() : m_data(0), m_rows(0) {}
inline ei_matrix_storage(ei_constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {}
inline ei_matrix_storage(DenseIndex size, DenseIndex rows, DenseIndex) : m_data(ei_conditional_aligned_new<T,(_Options&DontAlign)==0>(size)), m_rows(rows)
{ EIGEN_INT_DEBUG_MATRIX_CTOR }
inline ~ei_matrix_storage() { ei_conditional_aligned_delete<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows); }
inline void swap(ei_matrix_storage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
inline DenseIndex rows(void) const {return m_rows;}
inline static DenseIndex cols(void) {return _Cols;}
inline void conservativeResize(DenseIndex size, DenseIndex rows, DenseIndex)
{
m_data = ei_conditional_aligned_realloc_new<T,(_Options&DontAlign)==0>(m_data, size, m_rows*_Cols);
m_rows = rows;
}
EIGEN_STRONG_INLINE void resize(DenseIndex size, DenseIndex rows, DenseIndex)
{
if(size != m_rows*_Cols)
{
ei_conditional_aligned_delete<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows);
if (size)
m_data = ei_conditional_aligned_new<T,(_Options&DontAlign)==0>(size);
else
m_data = 0;
EIGEN_INT_DEBUG_MATRIX_CTOR
}
m_rows = rows;
}
inline const T *data() const { return m_data; }
inline T *data() { return m_data; }
};
#endif // EIGEN_MATRIX_H

View File

@@ -27,6 +27,7 @@
#define EIGEN_NESTBYVALUE_H #define EIGEN_NESTBYVALUE_H
/** \class NestByValue /** \class NestByValue
* \ingroup Core_Module
* *
* \brief Expression which must be nested by value * \brief Expression which must be nested by value
* *
@@ -37,16 +38,19 @@
* *
* \sa MatrixBase::nestByValue() * \sa MatrixBase::nestByValue()
*/ */
namespace internal {
template<typename ExpressionType> template<typename ExpressionType>
struct ei_traits<NestByValue<ExpressionType> > : public ei_traits<ExpressionType> struct traits<NestByValue<ExpressionType> > : public traits<ExpressionType>
{}; {};
}
template<typename ExpressionType> class NestByValue template<typename ExpressionType> class NestByValue
: public ei_dense_xpr_base< NestByValue<ExpressionType> >::type : public internal::dense_xpr_base< NestByValue<ExpressionType> >::type
{ {
public: public:
typedef typename ei_dense_xpr_base<NestByValue>::type Base; typedef typename internal::dense_xpr_base<NestByValue>::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(NestByValue) EIGEN_DENSE_PUBLIC_INTERFACE(NestByValue)
inline NestByValue(const ExpressionType& matrix) : m_expression(matrix) {} inline NestByValue(const ExpressionType& matrix) : m_expression(matrix) {}

View File

@@ -26,6 +26,7 @@
#define EIGEN_NOALIAS_H #define EIGEN_NOALIAS_H
/** \class NoAlias /** \class NoAlias
* \ingroup Core_Module
* *
* \brief Pseudo expression providing an operator = assuming no aliasing * \brief Pseudo expression providing an operator = assuming no aliasing
* *
@@ -42,6 +43,7 @@
template<typename ExpressionType, template <typename> class StorageBase> template<typename ExpressionType, template <typename> class StorageBase>
class NoAlias class NoAlias
{ {
typedef typename ExpressionType::Scalar Scalar;
public: public:
NoAlias(ExpressionType& expression) : m_expression(expression) {} NoAlias(ExpressionType& expression) : m_expression(expression) {}
@@ -49,17 +51,31 @@ class NoAlias
* \sa MatrixBase::lazyAssign() */ * \sa MatrixBase::lazyAssign() */
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_STRONG_INLINE ExpressionType& operator=(const StorageBase<OtherDerived>& other) EIGEN_STRONG_INLINE ExpressionType& operator=(const StorageBase<OtherDerived>& other)
{ return m_expression.lazyAssign(other.derived()); } { return internal::assign_selector<ExpressionType,OtherDerived,false>::run(m_expression,other.derived()); }
/** \sa MatrixBase::operator+= */ /** \sa MatrixBase::operator+= */
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_STRONG_INLINE ExpressionType& operator+=(const StorageBase<OtherDerived>& other) EIGEN_STRONG_INLINE ExpressionType& operator+=(const StorageBase<OtherDerived>& other)
{ return m_expression.lazyAssign(m_expression + other.derived()); } {
typedef SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, ExpressionType, OtherDerived> SelfAdder;
SelfAdder tmp(m_expression);
typedef typename internal::nested<OtherDerived>::type OtherDerivedNested;
typedef typename internal::remove_all<OtherDerivedNested>::type _OtherDerivedNested;
internal::assign_selector<SelfAdder,_OtherDerivedNested,false>::run(tmp,OtherDerivedNested(other.derived()));
return m_expression;
}
/** \sa MatrixBase::operator-= */ /** \sa MatrixBase::operator-= */
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_STRONG_INLINE ExpressionType& operator-=(const StorageBase<OtherDerived>& other) EIGEN_STRONG_INLINE ExpressionType& operator-=(const StorageBase<OtherDerived>& other)
{ return m_expression.lazyAssign(m_expression - other.derived()); } {
typedef SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, ExpressionType, OtherDerived> SelfAdder;
SelfAdder tmp(m_expression);
typedef typename internal::nested<OtherDerived>::type OtherDerivedNested;
typedef typename internal::remove_all<OtherDerivedNested>::type _OtherDerivedNested;
internal::assign_selector<SelfAdder,_OtherDerivedNested,false>::run(tmp,OtherDerivedNested(other.derived()));
return m_expression;
}
#ifndef EIGEN_PARSED_BY_DOXYGEN #ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename ProductDerived, typename Lhs, typename Rhs> template<typename ProductDerived, typename Lhs, typename Rhs>

View File

@@ -26,6 +26,7 @@
#define EIGEN_NUMTRAITS_H #define EIGEN_NUMTRAITS_H
/** \class NumTraits /** \class NumTraits
* \ingroup Core_Module
* *
* \brief Holds information about the various numeric (i.e. scalar) types allowed by Eigen. * \brief Holds information about the various numeric (i.e. scalar) types allowed by Eigen.
* *
@@ -39,7 +40,7 @@
* is a typedef to \a U. * is a typedef to \a U.
* \li A typedef \a NonInteger, giving the type that should be used for operations producing non-integral values, * \li A typedef \a NonInteger, giving the type that should be used for operations producing non-integral values,
* such as quotients, square roots, etc. If \a T is a floating-point type, then this typedef just gives * such as quotients, square roots, etc. If \a T is a floating-point type, then this typedef just gives
* \a T again. Note however that many Eigen functions such as ei_sqrt simply refuse to * \a T again. Note however that many Eigen functions such as internal::sqrt simply refuse to
* take integers. Outside of a few cases, Eigen doesn't do automatic type promotion. Thus, this typedef is * take integers. Outside of a few cases, Eigen doesn't do automatic type promotion. Thus, this typedef is
* only intended as a helper for code that needs to explicitly promote types. * only intended as a helper for code that needs to explicitly promote types.
* \li A typedef \a Nested giving the type to use to nest a value inside of the expression tree. If you don't know what * \li A typedef \a Nested giving the type to use to nest a value inside of the expression tree. If you don't know what
@@ -52,6 +53,8 @@
* to by move / add / mul instructions respectively, assuming the data is already stored in CPU registers. * to by move / add / mul instructions respectively, assuming the data is already stored in CPU registers.
* Stay vague here. No need to do architecture-specific stuff. * Stay vague here. No need to do architecture-specific stuff.
* \li An enum value \a IsSigned. It is equal to \c 1 if \a T is a signed type and to 0 if \a T is unsigned. * \li An enum value \a IsSigned. It is equal to \c 1 if \a T is a signed type and to 0 if \a T is unsigned.
* \li An enum value \a RequireInitialization. It is equal to \c 1 if the constructor of the numeric type \a T must
* be called, and to 0 if it is safe not to call it. Default is 0 if \a T is an arithmetic type, and 1 otherwise.
* \li An epsilon() function which, unlike std::numeric_limits::epsilon(), returns a \a Real instead of a \a T. * \li An epsilon() function which, unlike std::numeric_limits::epsilon(), returns a \a Real instead of a \a T.
* \li A dummy_precision() function returning a weak epsilon value. It is mainly used as a default * \li A dummy_precision() function returning a weak epsilon value. It is mainly used as a default
* value by the fuzzy comparison operators. * value by the fuzzy comparison operators.
@@ -64,17 +67,18 @@ template<typename T> struct GenericNumTraits
IsInteger = std::numeric_limits<T>::is_integer, IsInteger = std::numeric_limits<T>::is_integer,
IsSigned = std::numeric_limits<T>::is_signed, IsSigned = std::numeric_limits<T>::is_signed,
IsComplex = 0, IsComplex = 0,
RequireInitialization = internal::is_arithmetic<T>::value ? 0 : 1,
ReadCost = 1, ReadCost = 1,
AddCost = 1, AddCost = 1,
MulCost = 1 MulCost = 1
}; };
typedef T Real; typedef T Real;
typedef typename ei_meta_if< typedef typename internal::conditional<
IsInteger, IsInteger,
typename ei_meta_if<sizeof(T)<=2, float, double>::ret, typename internal::conditional<sizeof(T)<=2, float, double>::type,
T T
>::ret NonInteger; >::type NonInteger;
typedef T Nested; typedef T Nested;
inline static Real epsilon() { return std::numeric_limits<T>::epsilon(); } inline static Real epsilon() { return std::numeric_limits<T>::epsilon(); }
@@ -85,6 +89,13 @@ template<typename T> struct GenericNumTraits
} }
inline static T highest() { return std::numeric_limits<T>::max(); } inline static T highest() { return std::numeric_limits<T>::max(); }
inline static T lowest() { return IsInteger ? std::numeric_limits<T>::min() : (-std::numeric_limits<T>::max()); } inline static T lowest() { return IsInteger ? std::numeric_limits<T>::min() : (-std::numeric_limits<T>::max()); }
#ifdef EIGEN2_SUPPORT
enum {
HasFloatingPoint = !IsInteger
};
typedef NonInteger FloatingPoint;
#endif
}; };
template<typename T> struct NumTraits : GenericNumTraits<T> template<typename T> struct NumTraits : GenericNumTraits<T>
@@ -113,6 +124,7 @@ template<typename _Real> struct NumTraits<std::complex<_Real> >
typedef _Real Real; typedef _Real Real;
enum { enum {
IsComplex = 1, IsComplex = 1,
RequireInitialization = NumTraits<_Real>::RequireInitialization,
ReadCost = 2 * NumTraits<_Real>::ReadCost, ReadCost = 2 * NumTraits<_Real>::ReadCost,
AddCost = 2 * NumTraits<Real>::AddCost, AddCost = 2 * NumTraits<Real>::AddCost,
MulCost = 4 * NumTraits<Real>::MulCost + 2 * NumTraits<Real>::AddCost MulCost = 4 * NumTraits<Real>::MulCost + 2 * NumTraits<Real>::AddCost
@@ -136,6 +148,7 @@ struct NumTraits<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
IsComplex = NumTraits<Scalar>::IsComplex, IsComplex = NumTraits<Scalar>::IsComplex,
IsInteger = NumTraits<Scalar>::IsInteger, IsInteger = NumTraits<Scalar>::IsInteger,
IsSigned = NumTraits<Scalar>::IsSigned, IsSigned = NumTraits<Scalar>::IsSigned,
RequireInitialization = 1,
ReadCost = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::ReadCost, ReadCost = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::ReadCost,
AddCost = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::AddCost, AddCost = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::AddCost,
MulCost = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::MulCost MulCost = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::MulCost

View File

@@ -2,7 +2,7 @@
// for linear algebra. // for linear algebra.
// //
// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com> // Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr> // Copyright (C) 2009-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
// //
// Eigen is free software; you can redistribute it and/or // Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public // modify it under the terms of the GNU Lesser General Public
@@ -26,14 +26,17 @@
#ifndef EIGEN_PERMUTATIONMATRIX_H #ifndef EIGEN_PERMUTATIONMATRIX_H
#define EIGEN_PERMUTATIONMATRIX_H #define EIGEN_PERMUTATIONMATRIX_H
/** \class PermutationMatrix template<int RowCol,typename IndicesType,typename MatrixType, typename StorageKind> class PermutedImpl;
/** \class PermutationBase
* \ingroup Core_Module
* *
* \brief Permutation matrix * \brief Base class for permutations
* *
* \param SizeAtCompileTime the number of rows/cols, or Dynamic * \param Derived the derived class
* \param MaxSizeAtCompileTime the maximum number of rows/cols, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it.
* *
* This class represents a permutation matrix, internally stored as a vector of integers. * This class is the base class for all expressions representing a permutation matrix,
* internally stored as a vector of integers.
* The convention followed here is that if \f$ \sigma \f$ is a permutation, the corresponding permutation matrix * The convention followed here is that if \f$ \sigma \f$ is a permutation, the corresponding permutation matrix
* \f$ P_\sigma \f$ is such that if \f$ (e_1,\ldots,e_p) \f$ is the canonical basis, we have: * \f$ P_\sigma \f$ is such that if \f$ (e_1,\ldots,e_p) \f$ is the canonical basis, we have:
* \f[ P_\sigma(e_i) = e_{\sigma(i)}. \f] * \f[ P_\sigma(e_i) = e_{\sigma(i)}. \f]
@@ -43,26 +46,29 @@
* Permutation matrices are square and invertible. * Permutation matrices are square and invertible.
* *
* Notice that in addition to the member functions and operators listed here, there also are non-member * Notice that in addition to the member functions and operators listed here, there also are non-member
* operator* to multiply a PermutationMatrix with any kind of matrix expression (MatrixBase) on either side. * operator* to multiply any kind of permutation object with any kind of matrix expression (MatrixBase)
* on either side.
* *
* \sa class DiagonalMatrix * \sa class PermutationMatrix, class PermutationWrapper
*/ */
template<typename PermutationType, typename MatrixType, int Side, bool Transposed=false> struct ei_permut_matrix_product_retval;
template<int SizeAtCompileTime, int MaxSizeAtCompileTime> namespace internal {
struct ei_traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> >
: ei_traits<Matrix<int,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
{};
template<int SizeAtCompileTime, int MaxSizeAtCompileTime> template<typename PermutationType, typename MatrixType, int Side, bool Transposed=false>
class PermutationMatrix : public EigenBase<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> > struct permut_matrix_product_retval;
enum PermPermProduct_t {PermPermProduct};
} // end namespace internal
template<typename Derived>
class PermutationBase : public EigenBase<Derived>
{ {
typedef internal::traits<Derived> Traits;
typedef EigenBase<Derived> Base;
public: public:
#ifndef EIGEN_PARSED_BY_DOXYGEN #ifndef EIGEN_PARSED_BY_DOXYGEN
typedef ei_traits<PermutationMatrix> Traits; typedef typename Traits::IndicesType IndicesType;
typedef Matrix<int,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime>
DenseMatrixType;
enum { enum {
Flags = Traits::Flags, Flags = Traits::Flags,
CoeffReadCost = Traits::CoeffReadCost, CoeffReadCost = Traits::CoeffReadCost,
@@ -73,9 +79,227 @@ class PermutationMatrix : public EigenBase<PermutationMatrix<SizeAtCompileTime,
}; };
typedef typename Traits::Scalar Scalar; typedef typename Traits::Scalar Scalar;
typedef typename Traits::Index Index; typedef typename Traits::Index Index;
typedef Matrix<Scalar,RowsAtCompileTime,ColsAtCompileTime,0,MaxRowsAtCompileTime,MaxColsAtCompileTime>
DenseMatrixType;
typedef PermutationMatrix<IndicesType::SizeAtCompileTime,IndicesType::MaxSizeAtCompileTime,Index>
PlainPermutationType;
using Base::derived;
#endif #endif
typedef Matrix<int, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType; /** Copies the other permutation into *this */
template<typename OtherDerived>
Derived& operator=(const PermutationBase<OtherDerived>& other)
{
indices() = other.indices();
return derived();
}
/** Assignment from the Transpositions \a tr */
template<typename OtherDerived>
Derived& operator=(const TranspositionsBase<OtherDerived>& tr)
{
setIdentity(tr.size());
for(Index k=size()-1; k>=0; --k)
applyTranspositionOnTheRight(k,tr.coeff(k));
return derived();
}
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** This is a special case of the templated operator=. Its purpose is to
* prevent a default operator= from hiding the templated operator=.
*/
Derived& operator=(const PermutationBase& other)
{
indices() = other.indices();
return derived();
}
#endif
/** \returns the number of rows */
inline Index rows() const { return indices().size(); }
/** \returns the number of columns */
inline Index cols() const { return indices().size(); }
/** \returns the size of a side of the respective square matrix, i.e., the number of indices */
inline Index size() const { return indices().size(); }
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename DenseDerived>
void evalTo(MatrixBase<DenseDerived>& other) const
{
other.setZero();
for (int i=0; i<rows();++i)
other.coeffRef(indices().coeff(i),i) = typename DenseDerived::Scalar(1);
}
#endif
/** \returns a Matrix object initialized from this permutation matrix. Notice that it
* is inefficient to return this Matrix object by value. For efficiency, favor using
* the Matrix constructor taking EigenBase objects.
*/
DenseMatrixType toDenseMatrix() const
{
return derived();
}
/** const version of indices(). */
const IndicesType& indices() const { return derived().indices(); }
/** \returns a reference to the stored array representing the permutation. */
IndicesType& indices() { return derived().indices(); }
/** Resizes to given size.
*/
inline void resize(Index size)
{
indices().resize(size);
}
/** Sets *this to be the identity permutation matrix */
void setIdentity()
{
for(Index i = 0; i < size(); ++i)
indices().coeffRef(i) = i;
}
/** Sets *this to be the identity permutation matrix of given size.
*/
void setIdentity(Index size)
{
resize(size);
setIdentity();
}
/** Multiplies *this by the transposition \f$(ij)\f$ on the left.
*
* \returns a reference to *this.
*
* \warning This is much slower than applyTranspositionOnTheRight(int,int):
* this has linear complexity and requires a lot of branching.
*
* \sa applyTranspositionOnTheRight(int,int)
*/
Derived& applyTranspositionOnTheLeft(Index i, Index j)
{
eigen_assert(i>=0 && j>=0 && i<size() && j<size());
for(Index k = 0; k < size(); ++k)
{
if(indices().coeff(k) == i) indices().coeffRef(k) = j;
else if(indices().coeff(k) == j) indices().coeffRef(k) = i;
}
return derived();
}
/** Multiplies *this by the transposition \f$(ij)\f$ on the right.
*
* \returns a reference to *this.
*
* This is a fast operation, it only consists in swapping two indices.
*
* \sa applyTranspositionOnTheLeft(int,int)
*/
Derived& applyTranspositionOnTheRight(Index i, Index j)
{
eigen_assert(i>=0 && j>=0 && i<size() && j<size());
std::swap(indices().coeffRef(i), indices().coeffRef(j));
return derived();
}
/** \returns the inverse permutation matrix.
*
* \note \note_try_to_help_rvo
*/
inline Transpose<PermutationBase> inverse() const
{ return derived(); }
/** \returns the tranpose permutation matrix.
*
* \note \note_try_to_help_rvo
*/
inline Transpose<PermutationBase> transpose() const
{ return derived(); }
/**** multiplication helpers to hopefully get RVO ****/
#ifndef EIGEN_PARSED_BY_DOXYGEN
protected:
template<typename OtherDerived>
void assignTranspose(const PermutationBase<OtherDerived>& other)
{
for (int i=0; i<rows();++i) indices().coeffRef(other.indices().coeff(i)) = i;
}
template<typename Lhs,typename Rhs>
void assignProduct(const Lhs& lhs, const Rhs& rhs)
{
eigen_assert(lhs.cols() == rhs.rows());
for (int i=0; i<rows();++i) indices().coeffRef(i) = lhs.indices().coeff(rhs.indices().coeff(i));
}
#endif
public:
/** \returns the product permutation matrix.
*
* \note \note_try_to_help_rvo
*/
template<typename Other>
inline PlainPermutationType operator*(const PermutationBase<Other>& other) const
{ return PlainPermutationType(internal::PermPermProduct, derived(), other.derived()); }
/** \returns the product of a permutation with another inverse permutation.
*
* \note \note_try_to_help_rvo
*/
template<typename Other>
inline PlainPermutationType operator*(const Transpose<PermutationBase<Other> >& other) const
{ return PlainPermutationType(internal::PermPermProduct, *this, other.eval()); }
/** \returns the product of an inverse permutation with another permutation.
*
* \note \note_try_to_help_rvo
*/
template<typename Other> friend
inline PlainPermutationType operator*(const Transpose<PermutationBase<Other> >& other, const PermutationBase& perm)
{ return PlainPermutationType(internal::PermPermProduct, other.eval(), perm); }
protected:
};
/** \class PermutationMatrix
* \ingroup Core_Module
*
* \brief Permutation matrix
*
* \param SizeAtCompileTime the number of rows/cols, or Dynamic
* \param MaxSizeAtCompileTime the maximum number of rows/cols, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it.
* \param IndexType the interger type of the indices
*
* This class represents a permutation matrix, internally stored as a vector of integers.
*
* \sa class PermutationBase, class PermutationWrapper, class DiagonalMatrix
*/
namespace internal {
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType>
struct traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType> >
: traits<Matrix<IndexType,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
{
typedef IndexType Index;
typedef Matrix<IndexType, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType;
};
}
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType>
class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType> >
{
typedef PermutationBase<PermutationMatrix> Base;
typedef internal::traits<PermutationMatrix> Traits;
public:
#ifndef EIGEN_PARSED_BY_DOXYGEN
typedef typename Traits::IndicesType IndicesType;
#endif
inline PermutationMatrix() inline PermutationMatrix()
{} {}
@@ -86,8 +310,8 @@ class PermutationMatrix : public EigenBase<PermutationMatrix<SizeAtCompileTime,
{} {}
/** Copy constructor. */ /** Copy constructor. */
template<int OtherSize, int OtherMaxSize> template<typename OtherDerived>
inline PermutationMatrix(const PermutationMatrix<OtherSize, OtherMaxSize>& other) inline PermutationMatrix(const PermutationBase<OtherDerived>& other)
: m_indices(other.indices()) {} : m_indices(other.indices()) {}
#ifndef EIGEN_PARSED_BY_DOXYGEN #ifndef EIGEN_PARSED_BY_DOXYGEN
@@ -108,29 +332,26 @@ class PermutationMatrix : public EigenBase<PermutationMatrix<SizeAtCompileTime,
{} {}
/** Convert the Transpositions \a tr to a permutation matrix */ /** Convert the Transpositions \a tr to a permutation matrix */
template<int OtherSize, int OtherMaxSize> template<typename Other>
explicit PermutationMatrix(const Transpositions<OtherSize,OtherMaxSize>& tr) explicit PermutationMatrix(const TranspositionsBase<Other>& tr)
: m_indices(tr.size()) : m_indices(tr.size())
{ {
*this = tr; *this = tr;
} }
/** Copies the other permutation into *this */ /** Copies the other permutation into *this */
template<int OtherSize, int OtherMaxSize> template<typename Other>
PermutationMatrix& operator=(const PermutationMatrix<OtherSize, OtherMaxSize>& other) PermutationMatrix& operator=(const PermutationBase<Other>& other)
{ {
m_indices = other.indices(); m_indices = other.indices();
return *this; return *this;
} }
/** Assignment from the Transpositions \a tr */ /** Assignment from the Transpositions \a tr */
template<int OtherSize, int OtherMaxSize> template<typename Other>
PermutationMatrix& operator=(const Transpositions<OtherSize,OtherMaxSize>& tr) PermutationMatrix& operator=(const TranspositionsBase<Other>& tr)
{ {
setIdentity(tr.size()); return Base::operator=(tr.derived());
for(Index k=size()-1; k>=0; --k)
applyTranspositionOnTheRight(k,tr.coeff(k));
return *this;
} }
#ifndef EIGEN_PARSED_BY_DOXYGEN #ifndef EIGEN_PARSED_BY_DOXYGEN
@@ -144,197 +365,195 @@ class PermutationMatrix : public EigenBase<PermutationMatrix<SizeAtCompileTime,
} }
#endif #endif
/** \returns the number of rows */
inline Index rows() const { return m_indices.size(); }
/** \returns the number of columns */
inline Index cols() const { return m_indices.size(); }
/** \returns the size of a side of the respective square matrix, i.e., the number of indices */
inline Index size() const { return m_indices.size(); }
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename DenseDerived>
void evalTo(MatrixBase<DenseDerived>& other) const
{
other.setZero();
for (int i=0; i<rows();++i)
other.coeffRef(m_indices.coeff(i),i) = typename DenseDerived::Scalar(1);
}
#endif
/** \returns a Matrix object initialized from this permutation matrix. Notice that it
* is inefficient to return this Matrix object by value. For efficiency, favor using
* the Matrix constructor taking EigenBase objects.
*/
DenseMatrixType toDenseMatrix() const
{
return *this;
}
/** const version of indices(). */ /** const version of indices(). */
const IndicesType& indices() const { return m_indices; } const IndicesType& indices() const { return m_indices; }
/** \returns a reference to the stored array representing the permutation. */ /** \returns a reference to the stored array representing the permutation. */
IndicesType& indices() { return m_indices; } IndicesType& indices() { return m_indices; }
/** Resizes to given size.
*/
inline void resize(Index size)
{
m_indices.resize(size);
}
/** Sets *this to be the identity permutation matrix */
void setIdentity()
{
for(Index i = 0; i < m_indices.size(); ++i)
m_indices.coeffRef(i) = i;
}
/** Sets *this to be the identity permutation matrix of given size.
*/
void setIdentity(Index size)
{
resize(size);
setIdentity();
}
/** Multiplies *this by the transposition \f$(ij)\f$ on the left.
*
* \returns a reference to *this.
*
* \warning This is much slower than applyTranspositionOnTheRight(int,int):
* this has linear complexity and requires a lot of branching.
*
* \sa applyTranspositionOnTheRight(int,int)
*/
PermutationMatrix& applyTranspositionOnTheLeft(Index i, Index j)
{
ei_assert(i>=0 && j>=0 && i<m_indices.size() && j<m_indices.size());
for(Index k = 0; k < m_indices.size(); ++k)
{
if(m_indices.coeff(k) == i) m_indices.coeffRef(k) = j;
else if(m_indices.coeff(k) == j) m_indices.coeffRef(k) = i;
}
return *this;
}
/** Multiplies *this by the transposition \f$(ij)\f$ on the right.
*
* \returns a reference to *this.
*
* This is a fast operation, it only consists in swapping two indices.
*
* \sa applyTranspositionOnTheLeft(int,int)
*/
PermutationMatrix& applyTranspositionOnTheRight(Index i, Index j)
{
ei_assert(i>=0 && j>=0 && i<m_indices.size() && j<m_indices.size());
std::swap(m_indices.coeffRef(i), m_indices.coeffRef(j));
return *this;
}
/** \returns the inverse permutation matrix.
*
* \note \note_try_to_help_rvo
*/
inline Transpose<PermutationMatrix> inverse() const
{ return *this; }
/** \returns the tranpose permutation matrix.
*
* \note \note_try_to_help_rvo
*/
inline Transpose<PermutationMatrix> transpose() const
{ return *this; }
/**** multiplication helpers to hopefully get RVO ****/ /**** multiplication helpers to hopefully get RVO ****/
#ifndef EIGEN_PARSED_BY_DOXYGEN #ifndef EIGEN_PARSED_BY_DOXYGEN
template<int OtherSize, int OtherMaxSize> template<typename Other>
PermutationMatrix(const Transpose<PermutationMatrix<OtherSize,OtherMaxSize> >& other) PermutationMatrix(const Transpose<PermutationBase<Other> >& other)
: m_indices(other.nestedPermutation().size()) : m_indices(other.nestedPermutation().size())
{ {
for (int i=0; i<rows();++i) m_indices.coeffRef(other.nestedPermutation().indices().coeff(i)) = i; for (int i=0; i<m_indices.size();++i) m_indices.coeffRef(other.nestedPermutation().indices().coeff(i)) = i;
} }
protected: template<typename Lhs,typename Rhs>
enum Product_t {Product}; PermutationMatrix(internal::PermPermProduct_t, const Lhs& lhs, const Rhs& rhs)
PermutationMatrix(Product_t, const PermutationMatrix& lhs, const PermutationMatrix& rhs) : m_indices(lhs.indices().size())
: m_indices(lhs.m_indices.size())
{ {
ei_assert(lhs.cols() == rhs.rows()); Base::assignProduct(lhs,rhs);
for (int i=0; i<rows();++i) m_indices.coeffRef(i) = lhs.m_indices.coeff(rhs.m_indices.coeff(i));
} }
#endif #endif
public:
/** \returns the product permutation matrix.
*
* \note \note_try_to_help_rvo
*/
template<int OtherSize, int OtherMaxSize>
inline PermutationMatrix operator*(const PermutationMatrix<OtherSize, OtherMaxSize>& other) const
{ return PermutationMatrix(Product, *this, other); }
/** \returns the product of a permutation with another inverse permutation.
*
* \note \note_try_to_help_rvo
*/
template<int OtherSize, int OtherMaxSize>
inline PermutationMatrix operator*(const Transpose<PermutationMatrix<OtherSize,OtherMaxSize> >& other) const
{ return PermutationMatrix(Product, *this, other.eval()); }
/** \returns the product of an inverse permutation with another permutation.
*
* \note \note_try_to_help_rvo
*/
template<int OtherSize, int OtherMaxSize> friend
inline PermutationMatrix operator*(const Transpose<PermutationMatrix<OtherSize,OtherMaxSize> >& other, const PermutationMatrix& perm)
{ return PermutationMatrix(Product, other.eval(), perm); }
protected: protected:
IndicesType m_indices; IndicesType m_indices;
}; };
namespace internal {
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType, int _PacketAccess>
struct traits<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType>,_PacketAccess> >
: traits<Matrix<IndexType,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
{
typedef IndexType Index;
typedef Map<const Matrix<IndexType, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1>, _PacketAccess> IndicesType;
};
}
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType, int _PacketAccess>
class Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType>,_PacketAccess>
: public PermutationBase<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType>,_PacketAccess> >
{
typedef PermutationBase<Map> Base;
typedef internal::traits<Map> Traits;
public:
#ifndef EIGEN_PARSED_BY_DOXYGEN
typedef typename Traits::IndicesType IndicesType;
typedef typename IndicesType::Scalar Index;
#endif
inline Map(const Index* indices)
: m_indices(indices)
{}
inline Map(const Index* indices, Index size)
: m_indices(indices,size)
{}
/** Copies the other permutation into *this */
template<typename Other>
Map& operator=(const PermutationBase<Other>& other)
{ return Base::operator=(other.derived()); }
/** Assignment from the Transpositions \a tr */
template<typename Other>
Map& operator=(const TranspositionsBase<Other>& tr)
{ return Base::operator=(tr.derived()); }
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** This is a special case of the templated operator=. Its purpose is to
* prevent a default operator= from hiding the templated operator=.
*/
Map& operator=(const Map& other)
{
m_indices = other.m_indices;
return *this;
}
#endif
/** const version of indices(). */
const IndicesType& indices() const { return m_indices; }
/** \returns a reference to the stored array representing the permutation. */
IndicesType& indices() { return m_indices; }
protected:
IndicesType m_indices;
};
/** \class PermutationWrapper
* \ingroup Core_Module
*
* \brief Class to view a vector of integers as a permutation matrix
*
* \param _IndicesType the type of the vector of integer (can be any compatible expression)
*
* This class allows to view any vector expression of integers as a permutation matrix.
*
* \sa class PermutationBase, class PermutationMatrix
*/
struct PermutationStorage {};
template<typename _IndicesType> class TranspositionsWrapper;
namespace internal {
template<typename _IndicesType>
struct traits<PermutationWrapper<_IndicesType> >
{
typedef PermutationStorage StorageKind;
typedef typename _IndicesType::Scalar Scalar;
typedef typename _IndicesType::Scalar Index;
typedef _IndicesType IndicesType;
enum {
RowsAtCompileTime = _IndicesType::SizeAtCompileTime,
ColsAtCompileTime = _IndicesType::SizeAtCompileTime,
MaxRowsAtCompileTime = IndicesType::MaxRowsAtCompileTime,
MaxColsAtCompileTime = IndicesType::MaxColsAtCompileTime,
Flags = 0,
CoeffReadCost = _IndicesType::CoeffReadCost
};
};
}
template<typename _IndicesType>
class PermutationWrapper : public PermutationBase<PermutationWrapper<_IndicesType> >
{
typedef PermutationBase<PermutationWrapper> Base;
typedef internal::traits<PermutationWrapper> Traits;
public:
#ifndef EIGEN_PARSED_BY_DOXYGEN
typedef typename Traits::IndicesType IndicesType;
#endif
inline PermutationWrapper(const IndicesType& indices)
: m_indices(indices)
{}
/** const version of indices(). */
const typename internal::remove_all<typename IndicesType::Nested>::type&
indices() const { return m_indices; }
protected:
const typename IndicesType::Nested m_indices;
};
/** \returns the matrix with the permutation applied to the columns. /** \returns the matrix with the permutation applied to the columns.
*/ */
template<typename Derived, int SizeAtCompileTime, int MaxSizeAtCompileTime> template<typename Derived, typename PermutationDerived>
inline const ei_permut_matrix_product_retval<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime>, Derived, OnTheRight> inline const internal::permut_matrix_product_retval<PermutationDerived, Derived, OnTheRight>
operator*(const MatrixBase<Derived>& matrix, operator*(const MatrixBase<Derived>& matrix,
const PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> &permutation) const PermutationBase<PermutationDerived> &permutation)
{ {
return ei_permut_matrix_product_retval return internal::permut_matrix_product_retval
<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime>, Derived, OnTheRight> <PermutationDerived, Derived, OnTheRight>
(permutation, matrix.derived()); (permutation.derived(), matrix.derived());
} }
/** \returns the matrix with the permutation applied to the rows. /** \returns the matrix with the permutation applied to the rows.
*/ */
template<typename Derived, int SizeAtCompileTime, int MaxSizeAtCompileTime> template<typename Derived, typename PermutationDerived>
inline const ei_permut_matrix_product_retval inline const internal::permut_matrix_product_retval
<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime>, Derived, OnTheLeft> <PermutationDerived, Derived, OnTheLeft>
operator*(const PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> &permutation, operator*(const PermutationBase<PermutationDerived> &permutation,
const MatrixBase<Derived>& matrix) const MatrixBase<Derived>& matrix)
{ {
return ei_permut_matrix_product_retval return internal::permut_matrix_product_retval
<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime>, Derived, OnTheLeft> <PermutationDerived, Derived, OnTheLeft>
(permutation, matrix.derived()); (permutation.derived(), matrix.derived());
} }
namespace internal {
template<typename PermutationType, typename MatrixType, int Side, bool Transposed> template<typename PermutationType, typename MatrixType, int Side, bool Transposed>
struct ei_traits<ei_permut_matrix_product_retval<PermutationType, MatrixType, Side, Transposed> > struct traits<permut_matrix_product_retval<PermutationType, MatrixType, Side, Transposed> >
{ {
typedef typename MatrixType::PlainObject ReturnType; typedef typename MatrixType::PlainObject ReturnType;
}; };
template<typename PermutationType, typename MatrixType, int Side, bool Transposed> template<typename PermutationType, typename MatrixType, int Side, bool Transposed>
struct ei_permut_matrix_product_retval struct permut_matrix_product_retval
: public ReturnByValue<ei_permut_matrix_product_retval<PermutationType, MatrixType, Side, Transposed> > : public ReturnByValue<permut_matrix_product_retval<PermutationType, MatrixType, Side, Transposed> >
{ {
typedef typename ei_cleantype<typename MatrixType::Nested>::type MatrixTypeNestedCleaned; typedef typename remove_all<typename MatrixType::Nested>::type MatrixTypeNestedCleaned;
ei_permut_matrix_product_retval(const PermutationType& perm, const MatrixType& matrix) permut_matrix_product_retval(const PermutationType& perm, const MatrixType& matrix)
: m_permutation(perm), m_matrix(matrix) : m_permutation(perm), m_matrix(matrix)
{} {}
@@ -345,7 +564,7 @@ struct ei_permut_matrix_product_retval
{ {
const int n = Side==OnTheLeft ? rows() : cols(); const int n = Side==OnTheLeft ? rows() : cols();
if(ei_is_same_type<MatrixTypeNestedCleaned,Dest>::ret && ei_extract_data(dst) == ei_extract_data(m_matrix)) if(is_same<MatrixTypeNestedCleaned,Dest>::value && extract_data(dst) == extract_data(m_matrix))
{ {
// apply the permutation inplace // apply the permutation inplace
Matrix<bool,PermutationType::RowsAtCompileTime,1,0,PermutationType::MaxRowsAtCompileTime> mask(m_permutation.size()); Matrix<bool,PermutationType::RowsAtCompileTime,1,0,PermutationType::MaxRowsAtCompileTime> mask(m_permutation.size());
@@ -381,7 +600,7 @@ struct ei_permut_matrix_product_retval
= =
Block<MatrixTypeNestedCleaned,Side==OnTheLeft ? 1 : MatrixType::RowsAtCompileTime,Side==OnTheRight ? 1 : MatrixType::ColsAtCompileTime> Block<const MatrixTypeNestedCleaned,Side==OnTheLeft ? 1 : MatrixType::RowsAtCompileTime,Side==OnTheRight ? 1 : MatrixType::ColsAtCompileTime>
(m_matrix, ((Side==OnTheRight) ^ Transposed) ? m_permutation.indices().coeff(i) : i); (m_matrix, ((Side==OnTheRight) ^ Transposed) ? m_permutation.indices().coeff(i) : i);
} }
} }
@@ -394,23 +613,25 @@ struct ei_permut_matrix_product_retval
/* Template partial specialization for transposed/inverse permutations */ /* Template partial specialization for transposed/inverse permutations */
template<int SizeAtCompileTime, int MaxSizeAtCompileTime> template<typename Derived>
struct ei_traits<Transpose<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> > > struct traits<Transpose<PermutationBase<Derived> > >
: ei_traits<Matrix<int,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> > : traits<Derived>
{}; {};
template<int SizeAtCompileTime, int MaxSizeAtCompileTime> } // end namespace internal
class Transpose<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> >
: public EigenBase<Transpose<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> > > template<typename Derived>
class Transpose<PermutationBase<Derived> >
: public EigenBase<Transpose<PermutationBase<Derived> > >
{ {
typedef PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> PermutationType; typedef Derived PermutationType;
typedef typename PermutationType::IndicesType IndicesType; typedef typename PermutationType::IndicesType IndicesType;
typedef typename PermutationType::PlainPermutationType PlainPermutationType;
public: public:
#ifndef EIGEN_PARSED_BY_DOXYGEN #ifndef EIGEN_PARSED_BY_DOXYGEN
typedef ei_traits<PermutationType> Traits; typedef internal::traits<PermutationType> Traits;
typedef Matrix<int,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> typedef typename Derived::DenseMatrixType DenseMatrixType;
DenseMatrixType;
enum { enum {
Flags = Traits::Flags, Flags = Traits::Flags,
CoeffReadCost = Traits::CoeffReadCost, CoeffReadCost = Traits::CoeffReadCost,
@@ -438,26 +659,26 @@ class Transpose<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> >
#endif #endif
/** \return the equivalent permutation matrix */ /** \return the equivalent permutation matrix */
PermutationType eval() const { return *this; } PlainPermutationType eval() const { return *this; }
DenseMatrixType toDenseMatrix() const { return *this; } DenseMatrixType toDenseMatrix() const { return *this; }
/** \returns the matrix with the inverse permutation applied to the columns. /** \returns the matrix with the inverse permutation applied to the columns.
*/ */
template<typename Derived> friend template<typename OtherDerived> friend
inline const ei_permut_matrix_product_retval<PermutationType, Derived, OnTheRight, true> inline const internal::permut_matrix_product_retval<PermutationType, OtherDerived, OnTheRight, true>
operator*(const MatrixBase<Derived>& matrix, const Transpose& trPerm) operator*(const MatrixBase<OtherDerived>& matrix, const Transpose& trPerm)
{ {
return ei_permut_matrix_product_retval<PermutationType, Derived, OnTheRight, true>(trPerm.m_permutation, matrix.derived()); return internal::permut_matrix_product_retval<PermutationType, OtherDerived, OnTheRight, true>(trPerm.m_permutation, matrix.derived());
} }
/** \returns the matrix with the inverse permutation applied to the rows. /** \returns the matrix with the inverse permutation applied to the rows.
*/ */
template<typename Derived> template<typename OtherDerived>
inline const ei_permut_matrix_product_retval<PermutationType, Derived, OnTheLeft, true> inline const internal::permut_matrix_product_retval<PermutationType, OtherDerived, OnTheLeft, true>
operator*(const MatrixBase<Derived>& matrix) const operator*(const MatrixBase<OtherDerived>& matrix) const
{ {
return ei_permut_matrix_product_retval<PermutationType, Derived, OnTheLeft, true>(m_permutation, matrix.derived()); return internal::permut_matrix_product_retval<PermutationType, OtherDerived, OnTheLeft, true>(m_permutation, matrix.derived());
} }
const PermutationType& nestedPermutation() const { return m_permutation; } const PermutationType& nestedPermutation() const { return m_permutation; }
@@ -466,4 +687,10 @@ class Transpose<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> >
const PermutationType& m_permutation; const PermutationType& m_permutation;
}; };
template<typename Derived>
const PermutationWrapper<const Derived> MatrixBase<Derived>::asPermutation() const
{
return derived();
}
#endif // EIGEN_PERMUTATIONMATRIX_H #endif // EIGEN_PERMUTATIONMATRIX_H

View File

@@ -32,24 +32,35 @@
# define EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED # define EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
#endif #endif
template <typename Derived, typename OtherDerived = Derived, bool IsVector = static_cast<bool>(Derived::IsVectorAtCompileTime)> struct ei_conservative_resize_like_impl; namespace internal {
template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers> struct ei_matrix_swap_impl;
template <typename Derived, typename OtherDerived = Derived, bool IsVector = static_cast<bool>(Derived::IsVectorAtCompileTime)> struct conservative_resize_like_impl;
template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers> struct matrix_swap_impl;
} // end namespace internal
/** /**
* \brief Dense storage base class for matrices and arrays. * \brief %Dense storage base class for matrices and arrays.
**/ *
* This class can be extended with the help of the plugin mechanism described on the page
* \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_PLAINOBJECTBASE_PLUGIN.
*
* \sa \ref TopicClassHierarchy
*/
template<typename Derived> template<typename Derived>
class DenseStorageBase : public ei_dense_xpr_base<Derived>::type class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
{ {
public: public:
enum { Options = ei_traits<Derived>::Options }; enum { Options = internal::traits<Derived>::Options };
typedef typename ei_dense_xpr_base<Derived>::type Base; typedef typename internal::dense_xpr_base<Derived>::type Base;
typedef typename ei_traits<Derived>::StorageKind StorageKind; typedef typename internal::traits<Derived>::StorageKind StorageKind;
typedef typename ei_traits<Derived>::Index Index; typedef typename internal::traits<Derived>::Index Index;
typedef typename ei_traits<Derived>::Scalar Scalar; typedef typename internal::traits<Derived>::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar; typedef typename internal::packet_traits<Scalar>::type PacketScalar;
typedef typename NumTraits<Scalar>::Real RealScalar; typedef typename NumTraits<Scalar>::Real RealScalar;
typedef Derived DenseType;
using Base::RowsAtCompileTime; using Base::RowsAtCompileTime;
using Base::ColsAtCompileTime; using Base::ColsAtCompileTime;
@@ -60,13 +71,23 @@ class DenseStorageBase : public ei_dense_xpr_base<Derived>::type
using Base::IsVectorAtCompileTime; using Base::IsVectorAtCompileTime;
using Base::Flags; using Base::Flags;
template<typename PlainObjectType, int MapOptions, typename StrideType> friend class Eigen::Map;
friend class Eigen::Map<Derived, Unaligned>; friend class Eigen::Map<Derived, Unaligned>;
typedef class Eigen::Map<Derived, Unaligned> UnalignedMapType; typedef Eigen::Map<Derived, Unaligned> MapType;
friend class Eigen::Map<const Derived, Unaligned>;
typedef const Eigen::Map<const Derived, Unaligned> ConstMapType;
friend class Eigen::Map<Derived, Aligned>; friend class Eigen::Map<Derived, Aligned>;
typedef class Eigen::Map<Derived, Aligned> AlignedMapType; typedef Eigen::Map<Derived, Aligned> AlignedMapType;
friend class Eigen::Map<const Derived, Aligned>;
typedef const Eigen::Map<const Derived, Aligned> ConstAlignedMapType;
template<typename StrideType> struct StridedMapType { typedef Eigen::Map<Derived, Unaligned, StrideType> type; };
template<typename StrideType> struct StridedConstMapType { typedef Eigen::Map<const Derived, Unaligned, StrideType> type; };
template<typename StrideType> struct StridedAlignedMapType { typedef Eigen::Map<Derived, Aligned, StrideType> type; };
template<typename StrideType> struct StridedConstAlignedMapType { typedef Eigen::Map<const Derived, Aligned, StrideType> type; };
protected: protected:
ei_matrix_storage<Scalar, Base::MaxSizeAtCompileTime, Base::RowsAtCompileTime, Base::ColsAtCompileTime, Options> m_storage; DenseStorage<Scalar, Base::MaxSizeAtCompileTime, Base::RowsAtCompileTime, Base::ColsAtCompileTime, Options> m_storage;
public: public:
enum { NeedsToAlign = (!(Options&DontAlign)) enum { NeedsToAlign = (!(Options&DontAlign))
@@ -105,34 +126,51 @@ class DenseStorageBase : public ei_dense_xpr_base<Derived>::type
return m_storage.data()[index]; return m_storage.data()[index];
} }
EIGEN_STRONG_INLINE const Scalar& coeffRef(Index row, Index col) const
{
if(Flags & RowMajorBit)
return m_storage.data()[col + row * m_storage.cols()];
else // column-major
return m_storage.data()[row + col * m_storage.rows()];
}
EIGEN_STRONG_INLINE const Scalar& coeffRef(Index index) const
{
return m_storage.data()[index];
}
/** \internal */
template<int LoadMode> template<int LoadMode>
EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const
{ {
return ei_ploadt<Scalar, LoadMode> return internal::ploadt<PacketScalar, LoadMode>
(m_storage.data() + (Flags & RowMajorBit (m_storage.data() + (Flags & RowMajorBit
? col + row * m_storage.cols() ? col + row * m_storage.cols()
: row + col * m_storage.rows())); : row + col * m_storage.rows()));
} }
/** \internal */
template<int LoadMode> template<int LoadMode>
EIGEN_STRONG_INLINE PacketScalar packet(Index index) const EIGEN_STRONG_INLINE PacketScalar packet(Index index) const
{ {
return ei_ploadt<Scalar, LoadMode>(m_storage.data() + index); return internal::ploadt<PacketScalar, LoadMode>(m_storage.data() + index);
} }
/** \internal */
template<int StoreMode> template<int StoreMode>
EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketScalar& x) EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketScalar& x)
{ {
ei_pstoret<Scalar, PacketScalar, StoreMode> internal::pstoret<Scalar, PacketScalar, StoreMode>
(m_storage.data() + (Flags & RowMajorBit (m_storage.data() + (Flags & RowMajorBit
? col + row * m_storage.cols() ? col + row * m_storage.cols()
: row + col * m_storage.rows()), x); : row + col * m_storage.rows()), x);
} }
/** \internal */
template<int StoreMode> template<int StoreMode>
EIGEN_STRONG_INLINE void writePacket(Index index, const PacketScalar& x) EIGEN_STRONG_INLINE void writePacket(Index index, const PacketScalar& x)
{ {
ei_pstoret<Scalar, PacketScalar, StoreMode>(m_storage.data() + index, x); internal::pstoret<Scalar, PacketScalar, StoreMode>(m_storage.data() + index, x);
} }
/** \returns a const pointer to the data array of this matrix */ /** \returns a const pointer to the data array of this matrix */
@@ -184,8 +222,8 @@ class DenseStorageBase : public ei_dense_xpr_base<Derived>::type
*/ */
inline void resize(Index size) inline void resize(Index size)
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(DenseStorageBase) EIGEN_STATIC_ASSERT_VECTOR_ONLY(PlainObjectBase)
ei_assert(SizeAtCompileTime == Dynamic || SizeAtCompileTime == size); eigen_assert(SizeAtCompileTime == Dynamic || SizeAtCompileTime == size);
#ifdef EIGEN_INITIALIZE_MATRICES_BY_ZERO #ifdef EIGEN_INITIALIZE_MATRICES_BY_ZERO
bool size_changed = size != this->size(); bool size_changed = size != this->size();
#endif #endif
@@ -238,44 +276,58 @@ class DenseStorageBase : public ei_dense_xpr_base<Derived>::type
const Index othersize = other.rows()*other.cols(); const Index othersize = other.rows()*other.cols();
if(RowsAtCompileTime == 1) if(RowsAtCompileTime == 1)
{ {
ei_assert(other.rows() == 1 || other.cols() == 1); eigen_assert(other.rows() == 1 || other.cols() == 1);
resize(1, othersize); resize(1, othersize);
} }
else if(ColsAtCompileTime == 1) else if(ColsAtCompileTime == 1)
{ {
ei_assert(other.rows() == 1 || other.cols() == 1); eigen_assert(other.rows() == 1 || other.cols() == 1);
resize(othersize, 1); resize(othersize, 1);
} }
else resize(other.rows(), other.cols()); else resize(other.rows(), other.cols());
} }
/** Resizes \c *this to a \a rows x \a cols matrix while leaving old values of \c *this untouched. /** Resizes the matrix to \a rows x \a cols while leaving old values untouched.
* *
* This method is intended for dynamic-size matrices. If you only want to change the number * The method is intended for matrices of dynamic size. If you only want to change the number
* of rows and/or of columns, you can use conservativeResize(NoChange_t, Index), * of rows and/or of columns, you can use conservativeResize(NoChange_t, Index) or
* conservativeResize(Index, NoChange_t). * conservativeResize(Index, NoChange_t).
* *
* The top-left part of the resized matrix will be the same as the overlapping top-left corner * Matrices are resized relative to the top-left element. In case values need to be
* of \c *this. In case values need to be appended to the matrix they will be uninitialized. * appended to the matrix they will be uninitialized.
*/ */
EIGEN_STRONG_INLINE void conservativeResize(Index rows, Index cols) EIGEN_STRONG_INLINE void conservativeResize(Index rows, Index cols)
{ {
ei_conservative_resize_like_impl<Derived>::run(*this, rows, cols); internal::conservative_resize_like_impl<Derived>::run(*this, rows, cols);
} }
/** Resizes the matrix to \a rows x \a cols while leaving old values untouched.
*
* As opposed to conservativeResize(Index rows, Index cols), this version leaves
* the number of columns unchanged.
*
* In case the matrix is growing, new rows will be uninitialized.
*/
EIGEN_STRONG_INLINE void conservativeResize(Index rows, NoChange_t) EIGEN_STRONG_INLINE void conservativeResize(Index rows, NoChange_t)
{ {
// Note: see the comment in conservativeResize(Index,Index) // Note: see the comment in conservativeResize(Index,Index)
conservativeResize(rows, cols()); conservativeResize(rows, cols());
} }
/** Resizes the matrix to \a rows x \a cols while leaving old values untouched.
*
* As opposed to conservativeResize(Index rows, Index cols), this version leaves
* the number of rows unchanged.
*
* In case the matrix is growing, new columns will be uninitialized.
*/
EIGEN_STRONG_INLINE void conservativeResize(NoChange_t, Index cols) EIGEN_STRONG_INLINE void conservativeResize(NoChange_t, Index cols)
{ {
// Note: see the comment in conservativeResize(Index,Index) // Note: see the comment in conservativeResize(Index,Index)
conservativeResize(rows(), cols); conservativeResize(rows(), cols);
} }
/** Resizes \c *this to a vector of length \a size while retaining old values of *this. /** Resizes the vector to \a size while retaining old values.
* *
* \only_for_vectors. This method does not work for * \only_for_vectors. This method does not work for
* partially dynamic matrices when the static dimension is anything other * partially dynamic matrices when the static dimension is anything other
@@ -285,19 +337,28 @@ class DenseStorageBase : public ei_dense_xpr_base<Derived>::type
*/ */
EIGEN_STRONG_INLINE void conservativeResize(Index size) EIGEN_STRONG_INLINE void conservativeResize(Index size)
{ {
ei_conservative_resize_like_impl<Derived>::run(*this, size); internal::conservative_resize_like_impl<Derived>::run(*this, size);
} }
/** Resizes the matrix to \a rows x \a cols of \c other, while leaving old values untouched.
*
* The method is intended for matrices of dynamic size. If you only want to change the number
* of rows and/or of columns, you can use conservativeResize(NoChange_t, Index) or
* conservativeResize(Index, NoChange_t).
*
* Matrices are resized relative to the top-left element. In case values need to be
* appended to the matrix they will copied from \c other.
*/
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_STRONG_INLINE void conservativeResizeLike(const DenseBase<OtherDerived>& other) EIGEN_STRONG_INLINE void conservativeResizeLike(const DenseBase<OtherDerived>& other)
{ {
ei_conservative_resize_like_impl<Derived,OtherDerived>::run(*this, other); internal::conservative_resize_like_impl<Derived,OtherDerived>::run(*this, other);
} }
/** This is a special case of the templated operator=. Its purpose is to /** This is a special case of the templated operator=. Its purpose is to
* prevent a default operator= from hiding the templated operator=. * prevent a default operator= from hiding the templated operator=.
*/ */
EIGEN_STRONG_INLINE Derived& operator=(const DenseStorageBase& other) EIGEN_STRONG_INLINE Derived& operator=(const PlainObjectBase& other)
{ {
return _set(other); return _set(other);
} }
@@ -317,7 +378,7 @@ class DenseStorageBase : public ei_dense_xpr_base<Derived>::type
return Base::operator=(func); return Base::operator=(func);
} }
EIGEN_STRONG_INLINE explicit DenseStorageBase() : m_storage() EIGEN_STRONG_INLINE explicit PlainObjectBase() : m_storage()
{ {
// _check_template_params(); // _check_template_params();
// EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED // EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
@@ -326,14 +387,14 @@ class DenseStorageBase : public ei_dense_xpr_base<Derived>::type
#ifndef EIGEN_PARSED_BY_DOXYGEN #ifndef EIGEN_PARSED_BY_DOXYGEN
// FIXME is it still needed ? // FIXME is it still needed ?
/** \internal */ /** \internal */
DenseStorageBase(ei_constructor_without_unaligned_array_assert) PlainObjectBase(internal::constructor_without_unaligned_array_assert)
: m_storage(ei_constructor_without_unaligned_array_assert()) : m_storage(internal::constructor_without_unaligned_array_assert())
{ {
// _check_template_params(); EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED // _check_template_params(); EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
} }
#endif #endif
EIGEN_STRONG_INLINE DenseStorageBase(Index size, Index rows, Index cols) EIGEN_STRONG_INLINE PlainObjectBase(Index size, Index rows, Index cols)
: m_storage(size, rows, cols) : m_storage(size, rows, cols)
{ {
// _check_template_params(); // _check_template_params();
@@ -352,7 +413,7 @@ class DenseStorageBase : public ei_dense_xpr_base<Derived>::type
/** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */ /** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_STRONG_INLINE DenseStorageBase(const EigenBase<OtherDerived> &other) EIGEN_STRONG_INLINE PlainObjectBase(const EigenBase<OtherDerived> &other)
: m_storage(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols()) : m_storage(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols())
{ {
_check_template_params(); _check_template_params();
@@ -370,31 +431,69 @@ class DenseStorageBase : public ei_dense_xpr_base<Derived>::type
* \see class Map * \see class Map
*/ */
//@{ //@{
inline static const UnalignedMapType Map(const Scalar* data) inline static ConstMapType Map(const Scalar* data)
{ return UnalignedMapType(data); } { return ConstMapType(data); }
inline static UnalignedMapType Map(Scalar* data) inline static MapType Map(Scalar* data)
{ return UnalignedMapType(data); } { return MapType(data); }
inline static const UnalignedMapType Map(const Scalar* data, Index size) inline static ConstMapType Map(const Scalar* data, Index size)
{ return UnalignedMapType(data, size); } { return ConstMapType(data, size); }
inline static UnalignedMapType Map(Scalar* data, Index size) inline static MapType Map(Scalar* data, Index size)
{ return UnalignedMapType(data, size); } { return MapType(data, size); }
inline static const UnalignedMapType Map(const Scalar* data, Index rows, Index cols) inline static ConstMapType Map(const Scalar* data, Index rows, Index cols)
{ return UnalignedMapType(data, rows, cols); } { return ConstMapType(data, rows, cols); }
inline static UnalignedMapType Map(Scalar* data, Index rows, Index cols) inline static MapType Map(Scalar* data, Index rows, Index cols)
{ return UnalignedMapType(data, rows, cols); } { return MapType(data, rows, cols); }
inline static const AlignedMapType MapAligned(const Scalar* data) inline static ConstAlignedMapType MapAligned(const Scalar* data)
{ return AlignedMapType(data); } { return ConstAlignedMapType(data); }
inline static AlignedMapType MapAligned(Scalar* data) inline static AlignedMapType MapAligned(Scalar* data)
{ return AlignedMapType(data); } { return AlignedMapType(data); }
inline static const AlignedMapType MapAligned(const Scalar* data, Index size) inline static ConstAlignedMapType MapAligned(const Scalar* data, Index size)
{ return AlignedMapType(data, size); } { return ConstAlignedMapType(data, size); }
inline static AlignedMapType MapAligned(Scalar* data, Index size) inline static AlignedMapType MapAligned(Scalar* data, Index size)
{ return AlignedMapType(data, size); } { return AlignedMapType(data, size); }
inline static const AlignedMapType MapAligned(const Scalar* data, Index rows, Index cols) inline static ConstAlignedMapType MapAligned(const Scalar* data, Index rows, Index cols)
{ return AlignedMapType(data, rows, cols); } { return ConstAlignedMapType(data, rows, cols); }
inline static AlignedMapType MapAligned(Scalar* data, Index rows, Index cols) inline static AlignedMapType MapAligned(Scalar* data, Index rows, Index cols)
{ return AlignedMapType(data, rows, cols); } { return AlignedMapType(data, rows, cols); }
template<int Outer, int Inner>
inline static typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, const Stride<Outer, Inner>& stride)
{ return typename StridedConstMapType<Stride<Outer, Inner> >::type(data, stride); }
template<int Outer, int Inner>
inline static typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, const Stride<Outer, Inner>& stride)
{ return typename StridedMapType<Stride<Outer, Inner> >::type(data, stride); }
template<int Outer, int Inner>
inline static typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, Index size, const Stride<Outer, Inner>& stride)
{ return typename StridedConstMapType<Stride<Outer, Inner> >::type(data, size, stride); }
template<int Outer, int Inner>
inline static typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, Index size, const Stride<Outer, Inner>& stride)
{ return typename StridedMapType<Stride<Outer, Inner> >::type(data, size, stride); }
template<int Outer, int Inner>
inline static typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
{ return typename StridedConstMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
template<int Outer, int Inner>
inline static typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
{ return typename StridedMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
template<int Outer, int Inner>
inline static typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, const Stride<Outer, Inner>& stride)
{ return typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type(data, stride); }
template<int Outer, int Inner>
inline static typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, const Stride<Outer, Inner>& stride)
{ return typename StridedAlignedMapType<Stride<Outer, Inner> >::type(data, stride); }
template<int Outer, int Inner>
inline static typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, Index size, const Stride<Outer, Inner>& stride)
{ return typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type(data, size, stride); }
template<int Outer, int Inner>
inline static typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, Index size, const Stride<Outer, Inner>& stride)
{ return typename StridedAlignedMapType<Stride<Outer, Inner> >::type(data, size, stride); }
template<int Outer, int Inner>
inline static typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
{ return typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
template<int Outer, int Inner>
inline static typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
{ return typename StridedAlignedMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
//@} //@}
using Base::setConstant; using Base::setConstant;
@@ -413,8 +512,8 @@ class DenseStorageBase : public ei_dense_xpr_base<Derived>::type
Derived& setRandom(Index size); Derived& setRandom(Index size);
Derived& setRandom(Index rows, Index cols); Derived& setRandom(Index rows, Index cols);
#ifdef EIGEN_DENSESTORAGEBASE_PLUGIN #ifdef EIGEN_PLAINOBJECTBASE_PLUGIN
#include EIGEN_DENSESTORAGEBASE_PLUGIN #include EIGEN_PLAINOBJECTBASE_PLUGIN
#endif #endif
protected: protected:
@@ -429,11 +528,12 @@ class DenseStorageBase : public ei_dense_xpr_base<Derived>::type
EIGEN_STRONG_INLINE void _resize_to_match(const EigenBase<OtherDerived>& other) EIGEN_STRONG_INLINE void _resize_to_match(const EigenBase<OtherDerived>& other)
{ {
#ifdef EIGEN_NO_AUTOMATIC_RESIZING #ifdef EIGEN_NO_AUTOMATIC_RESIZING
ei_assert((this->size()==0 || (IsVectorAtCompileTime ? (this->size() == other.size()) eigen_assert((this->size()==0 || (IsVectorAtCompileTime ? (this->size() == other.size())
: (rows() == other.rows() && cols() == other.cols()))) : (rows() == other.rows() && cols() == other.cols())))
&& "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined"); && "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined");
#endif #else
resizeLike(other); resizeLike(other);
#endif
} }
/** /**
@@ -453,15 +553,15 @@ class DenseStorageBase : public ei_dense_xpr_base<Derived>::type
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived& _set(const DenseBase<OtherDerived>& other) EIGEN_STRONG_INLINE Derived& _set(const DenseBase<OtherDerived>& other)
{ {
_set_selector(other.derived(), typename ei_meta_if<static_cast<bool>(int(OtherDerived::Flags) & EvalBeforeAssigningBit), ei_meta_true, ei_meta_false>::ret()); _set_selector(other.derived(), typename internal::conditional<static_cast<bool>(int(OtherDerived::Flags) & EvalBeforeAssigningBit), internal::true_type, internal::false_type>::type());
return this->derived(); return this->derived();
} }
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_STRONG_INLINE void _set_selector(const OtherDerived& other, const ei_meta_true&) { _set_noalias(other.eval()); } EIGEN_STRONG_INLINE void _set_selector(const OtherDerived& other, const internal::true_type&) { _set_noalias(other.eval()); }
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_STRONG_INLINE void _set_selector(const OtherDerived& other, const ei_meta_false&) { _set_noalias(other); } EIGEN_STRONG_INLINE void _set_selector(const OtherDerived& other, const internal::false_type&) { _set_noalias(other); }
/** \internal Like _set() but additionally makes the assumption that no aliasing effect can happen (which /** \internal Like _set() but additionally makes the assumption that no aliasing effect can happen (which
* is the case when creating a new matrix) so one can enforce lazy evaluation. * is the case when creating a new matrix) so one can enforce lazy evaluation.
@@ -476,36 +576,36 @@ class DenseStorageBase : public ei_dense_xpr_base<Derived>::type
//_resize_to_match(other); //_resize_to_match(other);
// the 'false' below means to enforce lazy evaluation. We don't use lazyAssign() because // the 'false' below means to enforce lazy evaluation. We don't use lazyAssign() because
// it wouldn't allow to copy a row-vector into a column-vector. // it wouldn't allow to copy a row-vector into a column-vector.
return ei_assign_selector<Derived,OtherDerived,false>::run(this->derived(), other.derived()); return internal::assign_selector<Derived,OtherDerived,false>::run(this->derived(), other.derived());
} }
template<typename T0, typename T1> template<typename T0, typename T1>
EIGEN_STRONG_INLINE void _init2(Index rows, Index cols, typename ei_enable_if<Base::SizeAtCompileTime!=2,T0>::type* = 0) EIGEN_STRONG_INLINE void _init2(Index rows, Index cols, typename internal::enable_if<Base::SizeAtCompileTime!=2,T0>::type* = 0)
{ {
ei_assert(rows > 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows) eigen_assert(rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
&& cols > 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols)); && cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols));
m_storage.resize(rows*cols,rows,cols); m_storage.resize(rows*cols,rows,cols);
EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
} }
template<typename T0, typename T1> template<typename T0, typename T1>
EIGEN_STRONG_INLINE void _init2(const Scalar& x, const Scalar& y, typename ei_enable_if<Base::SizeAtCompileTime==2,T0>::type* = 0) EIGEN_STRONG_INLINE void _init2(const Scalar& x, const Scalar& y, typename internal::enable_if<Base::SizeAtCompileTime==2,T0>::type* = 0)
{ {
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(DenseStorageBase, 2) EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2)
m_storage.data()[0] = x; m_storage.data()[0] = x;
m_storage.data()[1] = y; m_storage.data()[1] = y;
} }
template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers> template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers>
friend struct ei_matrix_swap_impl; friend struct internal::matrix_swap_impl;
/** \internal generic implementation of swap for dense storage since for dynamic-sized matrices of same type it is enough to swap the /** \internal generic implementation of swap for dense storage since for dynamic-sized matrices of same type it is enough to swap the
* data pointers. * data pointers.
*/ */
template<typename OtherDerived> template<typename OtherDerived>
void _swap(DenseBase<OtherDerived> EIGEN_REF_TO_TEMPORARY other) void _swap(DenseBase<OtherDerived> const & other)
{ {
enum { SwapPointers = ei_is_same_type<Derived, OtherDerived>::ret && Base::SizeAtCompileTime==Dynamic }; enum { SwapPointers = internal::is_same<Derived, OtherDerived>::value && Base::SizeAtCompileTime==Dynamic };
ei_matrix_swap_impl<Derived, OtherDerived, bool(SwapPointers)>::run(this->derived(), other.const_cast_derived()); internal::matrix_swap_impl<Derived, OtherDerived, bool(SwapPointers)>::run(this->derived(), other.const_cast_derived());
} }
public: public:
@@ -524,10 +624,13 @@ class DenseStorageBase : public ei_dense_xpr_base<Derived>::type
INVALID_MATRIX_TEMPLATE_PARAMETERS) INVALID_MATRIX_TEMPLATE_PARAMETERS)
} }
#endif #endif
private:
enum { ThisConstantIsPrivateInPlainObjectBase };
}; };
template <typename Derived, typename OtherDerived, bool IsVector> template <typename Derived, typename OtherDerived, bool IsVector>
struct ei_conservative_resize_like_impl struct internal::conservative_resize_like_impl
{ {
typedef typename Derived::Index Index; typedef typename Derived::Index Index;
static void run(DenseBase<Derived>& _this, Index rows, Index cols) static void run(DenseBase<Derived>& _this, Index rows, Index cols)
@@ -586,8 +689,10 @@ struct ei_conservative_resize_like_impl
} }
}; };
namespace internal {
template <typename Derived, typename OtherDerived> template <typename Derived, typename OtherDerived>
struct ei_conservative_resize_like_impl<Derived,OtherDerived,true> struct conservative_resize_like_impl<Derived,OtherDerived,true>
{ {
typedef typename Derived::Index Index; typedef typename Derived::Index Index;
static void run(DenseBase<Derived>& _this, Index size) static void run(DenseBase<Derived>& _this, Index size)
@@ -613,7 +718,7 @@ struct ei_conservative_resize_like_impl<Derived,OtherDerived,true>
}; };
template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers> template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers>
struct ei_matrix_swap_impl struct matrix_swap_impl
{ {
static inline void run(MatrixTypeA& a, MatrixTypeB& b) static inline void run(MatrixTypeA& a, MatrixTypeB& b)
{ {
@@ -622,7 +727,7 @@ struct ei_matrix_swap_impl
}; };
template<typename MatrixTypeA, typename MatrixTypeB> template<typename MatrixTypeA, typename MatrixTypeB>
struct ei_matrix_swap_impl<MatrixTypeA, MatrixTypeB, true> struct matrix_swap_impl<MatrixTypeA, MatrixTypeB, true>
{ {
static inline void run(MatrixTypeA& a, MatrixTypeB& b) static inline void run(MatrixTypeA& a, MatrixTypeB& b)
{ {
@@ -630,4 +735,6 @@ struct ei_matrix_swap_impl<MatrixTypeA, MatrixTypeB, true>
} }
}; };
} // end namespace internal
#endif // EIGEN_DENSESTORAGEBASE_H #endif // EIGEN_DENSESTORAGEBASE_H

View File

@@ -27,6 +27,7 @@
#define EIGEN_PRODUCT_H #define EIGEN_PRODUCT_H
/** \class GeneralProduct /** \class GeneralProduct
* \ingroup Core_Module
* *
* \brief Expression of the product of two general matrices or vectors * \brief Expression of the product of two general matrices or vectors
* *
@@ -44,39 +45,57 @@
* *
* \sa ProductReturnType, MatrixBase::operator*(const MatrixBase<OtherDerived>&) * \sa ProductReturnType, MatrixBase::operator*(const MatrixBase<OtherDerived>&)
*/ */
template<typename Lhs, typename Rhs, int ProductType = ei_product_type<Lhs,Rhs>::value> template<typename Lhs, typename Rhs, int ProductType = internal::product_type<Lhs,Rhs>::value>
class GeneralProduct; class GeneralProduct;
template<int Rows, int Cols, int Depth> struct ei_product_type_selector;
enum { enum {
Large = 2, Large = 2,
Small = 3 Small = 3
}; };
template<typename Lhs, typename Rhs> struct ei_product_type namespace internal {
template<int Rows, int Cols, int Depth> struct product_type_selector;
template<int Size, int MaxSize> struct product_size_category
{ {
typedef typename ei_cleantype<Lhs>::type _Lhs; enum { is_large = MaxSize == Dynamic ||
typedef typename ei_cleantype<Rhs>::type _Rhs; Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD,
value = is_large ? Large
: Size == 1 ? 1
: Small
};
};
template<typename Lhs, typename Rhs> struct product_type
{
typedef typename remove_all<Lhs>::type _Lhs;
typedef typename remove_all<Rhs>::type _Rhs;
enum { enum {
Rows = _Lhs::MaxRowsAtCompileTime, MaxRows = _Lhs::MaxRowsAtCompileTime,
Cols = _Rhs::MaxColsAtCompileTime, Rows = _Lhs::RowsAtCompileTime,
Depth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::MaxColsAtCompileTime,_Rhs::MaxRowsAtCompileTime) MaxCols = _Rhs::MaxColsAtCompileTime,
Cols = _Rhs::ColsAtCompileTime,
MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::MaxColsAtCompileTime,
_Rhs::MaxRowsAtCompileTime),
Depth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::ColsAtCompileTime,
_Rhs::RowsAtCompileTime),
LargeThreshold = EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
}; };
// the splitting into different lines of code here, introducing the _select enums and the typedef below, // the splitting into different lines of code here, introducing the _select enums and the typedef below,
// is to work around an internal compiler error with gcc 4.1 and 4.2. // is to work around an internal compiler error with gcc 4.1 and 4.2.
private: private:
enum { enum {
rows_select = Rows == Dynamic || Rows >=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ? Large : (Rows==1 ? 1 : Small), rows_select = product_size_category<Rows,MaxRows>::value,
cols_select = Cols == Dynamic || Cols >=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ? Large : (Cols==1 ? 1 : Small), cols_select = product_size_category<Cols,MaxCols>::value,
depth_select = Depth == Dynamic || Depth>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ? Large : (Depth==1 ? 1 : Small) depth_select = product_size_category<Depth,MaxDepth>::value
}; };
typedef ei_product_type_selector<rows_select, cols_select, depth_select> product_type_selector; typedef product_type_selector<rows_select, cols_select, depth_select> selector;
public: public:
enum { enum {
value = product_type_selector::ret value = selector::ret
}; };
#ifdef EIGEN_DEBUG_PRODUCT #ifdef EIGEN_DEBUG_PRODUCT
static void debug() static void debug()
@@ -92,40 +111,44 @@ public:
#endif #endif
}; };
/* The following allows to select the kind of product at compile time /* The following allows to select the kind of product at compile time
* based on the three dimensions of the product. * based on the three dimensions of the product.
* This is a compile time mapping from {1,Small,Large}^3 -> {product types} */ * This is a compile time mapping from {1,Small,Large}^3 -> {product types} */
// FIXME I'm not sure the current mapping is the ideal one. // FIXME I'm not sure the current mapping is the ideal one.
template<int M, int N> struct ei_product_type_selector<M,N,1> { enum { ret = OuterProduct }; }; template<int M, int N> struct product_type_selector<M,N,1> { enum { ret = OuterProduct }; };
template<int Depth> struct ei_product_type_selector<1, 1, Depth> { enum { ret = InnerProduct }; }; template<int Depth> struct product_type_selector<1, 1, Depth> { enum { ret = InnerProduct }; };
template<> struct ei_product_type_selector<1, 1, 1> { enum { ret = InnerProduct }; }; template<> struct product_type_selector<1, 1, 1> { enum { ret = InnerProduct }; };
template<> struct ei_product_type_selector<Small,1, Small> { enum { ret = CoeffBasedProductMode }; }; template<> struct product_type_selector<Small,1, Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct ei_product_type_selector<1, Small,Small> { enum { ret = CoeffBasedProductMode }; }; template<> struct product_type_selector<1, Small,Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct ei_product_type_selector<Small,Small,Small> { enum { ret = CoeffBasedProductMode }; }; template<> struct product_type_selector<Small,Small,Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct ei_product_type_selector<Small, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; }; template<> struct product_type_selector<Small, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; };
template<> struct ei_product_type_selector<Small, Large, 1> { enum { ret = LazyCoeffBasedProductMode }; }; template<> struct product_type_selector<Small, Large, 1> { enum { ret = LazyCoeffBasedProductMode }; };
template<> struct ei_product_type_selector<Large, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; }; template<> struct product_type_selector<Large, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; };
template<> struct ei_product_type_selector<1, Large,Small> { enum { ret = CoeffBasedProductMode }; }; template<> struct product_type_selector<1, Large,Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct ei_product_type_selector<1, Large,Large> { enum { ret = GemvProduct }; }; template<> struct product_type_selector<1, Large,Large> { enum { ret = GemvProduct }; };
template<> struct ei_product_type_selector<1, Small,Large> { enum { ret = CoeffBasedProductMode }; }; template<> struct product_type_selector<1, Small,Large> { enum { ret = CoeffBasedProductMode }; };
template<> struct ei_product_type_selector<Large,1, Small> { enum { ret = CoeffBasedProductMode }; }; template<> struct product_type_selector<Large,1, Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct ei_product_type_selector<Large,1, Large> { enum { ret = GemvProduct }; }; template<> struct product_type_selector<Large,1, Large> { enum { ret = GemvProduct }; };
template<> struct ei_product_type_selector<Small,1, Large> { enum { ret = CoeffBasedProductMode }; }; template<> struct product_type_selector<Small,1, Large> { enum { ret = CoeffBasedProductMode }; };
template<> struct ei_product_type_selector<Small,Small,Large> { enum { ret = GemmProduct }; }; template<> struct product_type_selector<Small,Small,Large> { enum { ret = GemmProduct }; };
template<> struct ei_product_type_selector<Large,Small,Large> { enum { ret = GemmProduct }; }; template<> struct product_type_selector<Large,Small,Large> { enum { ret = GemmProduct }; };
template<> struct ei_product_type_selector<Small,Large,Large> { enum { ret = GemmProduct }; }; template<> struct product_type_selector<Small,Large,Large> { enum { ret = GemmProduct }; };
template<> struct ei_product_type_selector<Large,Large,Large> { enum { ret = GemmProduct }; }; template<> struct product_type_selector<Large,Large,Large> { enum { ret = GemmProduct }; };
template<> struct ei_product_type_selector<Large,Small,Small> { enum { ret = GemmProduct }; }; template<> struct product_type_selector<Large,Small,Small> { enum { ret = GemmProduct }; };
template<> struct ei_product_type_selector<Small,Large,Small> { enum { ret = GemmProduct }; }; template<> struct product_type_selector<Small,Large,Small> { enum { ret = GemmProduct }; };
template<> struct ei_product_type_selector<Large,Large,Small> { enum { ret = GemmProduct }; }; template<> struct product_type_selector<Large,Large,Small> { enum { ret = GemmProduct }; };
} // end namespace internal
/** \class ProductReturnType /** \class ProductReturnType
* \ingroup Core_Module
* *
* \brief Helper class to get the correct and optimized returned type of operator* * \brief Helper class to get the correct and optimized returned type of operator*
* *
* \param Lhs the type of the left-hand side * \param Lhs the type of the left-hand side
* \param Rhs the type of the right-hand side * \param Rhs the type of the right-hand side
* \param ProductMode the type of the product (determined automatically by ei_product_mode) * \param ProductMode the type of the product (determined automatically by internal::product_mode)
* *
* This class defines the typename Type representing the optimized product expression * This class defines the typename Type representing the optimized product expression
* between two matrix expressions. In practice, using ProductReturnType<Lhs,Rhs>::Type * between two matrix expressions. In practice, using ProductReturnType<Lhs,Rhs>::Type
@@ -139,8 +162,8 @@ template<typename Lhs, typename Rhs, int ProductType>
struct ProductReturnType struct ProductReturnType
{ {
// TODO use the nested type to reduce instanciations ???? // TODO use the nested type to reduce instanciations ????
// typedef typename ei_nested<Lhs,Rhs::ColsAtCompileTime>::type LhsNested; // typedef typename internal::nested<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
// typedef typename ei_nested<Rhs,Lhs::RowsAtCompileTime>::type RhsNested; // typedef typename internal::nested<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
typedef GeneralProduct<Lhs/*Nested*/, Rhs/*Nested*/, ProductType> Type; typedef GeneralProduct<Lhs/*Nested*/, Rhs/*Nested*/, ProductType> Type;
}; };
@@ -148,19 +171,23 @@ struct ProductReturnType
template<typename Lhs, typename Rhs> template<typename Lhs, typename Rhs>
struct ProductReturnType<Lhs,Rhs,CoeffBasedProductMode> struct ProductReturnType<Lhs,Rhs,CoeffBasedProductMode>
{ {
typedef typename ei_nested<Lhs, Rhs::ColsAtCompileTime, typename ei_plain_matrix_type<Lhs>::type >::type LhsNested; typedef typename internal::nested<Lhs, Rhs::ColsAtCompileTime, typename internal::plain_matrix_type<Lhs>::type >::type LhsNested;
typedef typename ei_nested<Rhs, Lhs::RowsAtCompileTime, typename ei_plain_matrix_type<Rhs>::type >::type RhsNested; typedef typename internal::nested<Rhs, Lhs::RowsAtCompileTime, typename internal::plain_matrix_type<Rhs>::type >::type RhsNested;
typedef CoeffBasedProduct<LhsNested, RhsNested, EvalBeforeAssigningBit | EvalBeforeNestingBit> Type; typedef CoeffBasedProduct<LhsNested, RhsNested, EvalBeforeAssigningBit | EvalBeforeNestingBit> Type;
}; };
template<typename Lhs, typename Rhs> template<typename Lhs, typename Rhs>
struct ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode> struct ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode>
{ {
typedef typename ei_nested<Lhs, Rhs::ColsAtCompileTime, typename ei_plain_matrix_type<Lhs>::type >::type LhsNested; typedef typename internal::nested<Lhs, Rhs::ColsAtCompileTime, typename internal::plain_matrix_type<Lhs>::type >::type LhsNested;
typedef typename ei_nested<Rhs, Lhs::RowsAtCompileTime, typename ei_plain_matrix_type<Rhs>::type >::type RhsNested; typedef typename internal::nested<Rhs, Lhs::RowsAtCompileTime, typename internal::plain_matrix_type<Rhs>::type >::type RhsNested;
typedef CoeffBasedProduct<LhsNested, RhsNested, NestByRefBit> Type; typedef CoeffBasedProduct<LhsNested, RhsNested, NestByRefBit> Type;
}; };
// this is a workaround for sun CC
template<typename Lhs, typename Rhs>
struct LazyProductReturnType : public ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode>
{};
/*********************************************************************** /***********************************************************************
* Implementation of Inner Vector Vector Product * Implementation of Inner Vector Vector Product
@@ -173,28 +200,30 @@ struct ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode>
// product ends up to a row-vector times col-vector product... To tackle this use // product ends up to a row-vector times col-vector product... To tackle this use
// case, we could have a specialization for Block<MatrixType,1,1> with: operator=(Scalar x); // case, we could have a specialization for Block<MatrixType,1,1> with: operator=(Scalar x);
namespace internal {
template<typename Lhs, typename Rhs> template<typename Lhs, typename Rhs>
struct ei_traits<GeneralProduct<Lhs,Rhs,InnerProduct> > struct traits<GeneralProduct<Lhs,Rhs,InnerProduct> >
: ei_traits<Matrix<typename ei_scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> > : traits<Matrix<typename scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> >
{}; {};
}
template<typename Lhs, typename Rhs> template<typename Lhs, typename Rhs>
class GeneralProduct<Lhs, Rhs, InnerProduct> class GeneralProduct<Lhs, Rhs, InnerProduct>
: ei_no_assignment_operator, : internal::no_assignment_operator,
public Matrix<typename ei_scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> public Matrix<typename internal::scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1>
{ {
typedef Matrix<typename ei_scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> Base; typedef Matrix<typename internal::scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> Base;
public: public:
GeneralProduct(const Lhs& lhs, const Rhs& rhs) GeneralProduct(const Lhs& lhs, const Rhs& rhs)
{ {
EIGEN_STATIC_ASSERT((ei_is_same_type<typename Lhs::RealScalar, typename Rhs::RealScalar>::ret), EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
Base::coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum(); Base::coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
} }
typename Base::Scalar value() const { return Base::coeff(0,0); }
/** Convertion to scalar */ /** Convertion to scalar */
operator const typename Base::Scalar() const { operator const typename Base::Scalar() const {
return Base::coeff(0,0); return Base::coeff(0,0);
@@ -204,13 +233,17 @@ class GeneralProduct<Lhs, Rhs, InnerProduct>
/*********************************************************************** /***********************************************************************
* Implementation of Outer Vector Vector Product * Implementation of Outer Vector Vector Product
***********************************************************************/ ***********************************************************************/
template<int StorageOrder> struct ei_outer_product_selector;
namespace internal {
template<int StorageOrder> struct outer_product_selector;
template<typename Lhs, typename Rhs> template<typename Lhs, typename Rhs>
struct ei_traits<GeneralProduct<Lhs,Rhs,OuterProduct> > struct traits<GeneralProduct<Lhs,Rhs,OuterProduct> >
: ei_traits<ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs> > : traits<ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs> >
{}; {};
}
template<typename Lhs, typename Rhs> template<typename Lhs, typename Rhs>
class GeneralProduct<Lhs, Rhs, OuterProduct> class GeneralProduct<Lhs, Rhs, OuterProduct>
: public ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs> : public ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs>
@@ -220,17 +253,19 @@ class GeneralProduct<Lhs, Rhs, OuterProduct>
GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
{ {
EIGEN_STATIC_ASSERT((ei_is_same_type<typename Lhs::RealScalar, typename Rhs::RealScalar>::ret), EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
} }
template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const
{ {
ei_outer_product_selector<(int(Dest::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(*this, dest, alpha); internal::outer_product_selector<(int(Dest::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(*this, dest, alpha);
} }
}; };
template<> struct ei_outer_product_selector<ColMajor> { namespace internal {
template<> struct outer_product_selector<ColMajor> {
template<typename ProductType, typename Dest> template<typename ProductType, typename Dest>
static EIGEN_DONT_INLINE void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) { static EIGEN_DONT_INLINE void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) {
typedef typename Dest::Index Index; typedef typename Dest::Index Index;
@@ -242,7 +277,7 @@ template<> struct ei_outer_product_selector<ColMajor> {
} }
}; };
template<> struct ei_outer_product_selector<RowMajor> { template<> struct outer_product_selector<RowMajor> {
template<typename ProductType, typename Dest> template<typename ProductType, typename Dest>
static EIGEN_DONT_INLINE void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) { static EIGEN_DONT_INLINE void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) {
typedef typename Dest::Index Index; typedef typename Dest::Index Index;
@@ -254,6 +289,8 @@ template<> struct ei_outer_product_selector<RowMajor> {
} }
}; };
} // end namespace internal
/*********************************************************************** /***********************************************************************
* Implementation of General Matrix Vector Product * Implementation of General Matrix Vector Product
***********************************************************************/ ***********************************************************************/
@@ -265,13 +302,17 @@ template<> struct ei_outer_product_selector<RowMajor> {
* Therefore we need a lower level meta selector. * Therefore we need a lower level meta selector.
* Furthermore, if the matrix is the rhs, then the product has to be transposed. * Furthermore, if the matrix is the rhs, then the product has to be transposed.
*/ */
namespace internal {
template<typename Lhs, typename Rhs> template<typename Lhs, typename Rhs>
struct ei_traits<GeneralProduct<Lhs,Rhs,GemvProduct> > struct traits<GeneralProduct<Lhs,Rhs,GemvProduct> >
: ei_traits<ProductBase<GeneralProduct<Lhs,Rhs,GemvProduct>, Lhs, Rhs> > : traits<ProductBase<GeneralProduct<Lhs,Rhs,GemvProduct>, Lhs, Rhs> >
{}; {};
template<int Side, int StorageOrder, bool BlasCompatible> template<int Side, int StorageOrder, bool BlasCompatible>
struct ei_gemv_selector; struct gemv_selector;
} // end namespace internal
template<typename Lhs, typename Rhs> template<typename Lhs, typename Rhs>
class GeneralProduct<Lhs, Rhs, GemvProduct> class GeneralProduct<Lhs, Rhs, GemvProduct>
@@ -280,125 +321,208 @@ class GeneralProduct<Lhs, Rhs, GemvProduct>
public: public:
EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct) EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
typedef typename Lhs::Scalar LhsScalar;
typedef typename Rhs::Scalar RhsScalar;
GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
{ {
EIGEN_STATIC_ASSERT((ei_is_same_type<typename Lhs::Scalar, typename Rhs::Scalar>::ret), // EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::Scalar, typename Rhs::Scalar>::value),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) // YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
} }
enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight }; enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
typedef typename ei_meta_if<int(Side)==OnTheRight,_LhsNested,_RhsNested>::ret MatrixType; typedef typename internal::conditional<int(Side)==OnTheRight,_LhsNested,_RhsNested>::type MatrixType;
template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
{ {
ei_assert(m_lhs.rows() == dst.rows() && m_rhs.cols() == dst.cols()); eigen_assert(m_lhs.rows() == dst.rows() && m_rhs.cols() == dst.cols());
ei_gemv_selector<Side,(int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor, internal::gemv_selector<Side,(int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
bool(ei_blas_traits<MatrixType>::HasUsableDirectAccess)>::run(*this, dst, alpha); bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)>::run(*this, dst, alpha);
} }
}; };
namespace internal {
// The vector is on the left => transposition // The vector is on the left => transposition
template<int StorageOrder, bool BlasCompatible> template<int StorageOrder, bool BlasCompatible>
struct ei_gemv_selector<OnTheLeft,StorageOrder,BlasCompatible> struct gemv_selector<OnTheLeft,StorageOrder,BlasCompatible>
{ {
template<typename ProductType, typename Dest> template<typename ProductType, typename Dest>
static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
{ {
Transpose<Dest> destT(dest); Transpose<Dest> destT(dest);
enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor }; enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor };
ei_gemv_selector<OnTheRight,OtherStorageOrder,BlasCompatible> gemv_selector<OnTheRight,OtherStorageOrder,BlasCompatible>
::run(GeneralProduct<Transpose<typename ProductType::_RhsNested>,Transpose<typename ProductType::_LhsNested>, GemvProduct> ::run(GeneralProduct<Transpose<const typename ProductType::_RhsNested>,Transpose<const typename ProductType::_LhsNested>, GemvProduct>
(prod.rhs().transpose(), prod.lhs().transpose()), destT, alpha); (prod.rhs().transpose(), prod.lhs().transpose()), destT, alpha);
} }
}; };
template<> struct ei_gemv_selector<OnTheRight,ColMajor,true> template<typename Scalar,int Size,int MaxSize,bool Cond> struct gemv_static_vector_if;
template<typename Scalar,int Size,int MaxSize>
struct gemv_static_vector_if<Scalar,Size,MaxSize,false>
{
EIGEN_STRONG_INLINE Scalar* data() { eigen_internal_assert(false && "should never be called"); return 0; }
};
template<typename Scalar,int Size>
struct gemv_static_vector_if<Scalar,Size,Dynamic,true>
{
EIGEN_STRONG_INLINE Scalar* data() { return 0; }
};
template<typename Scalar,int Size,int MaxSize>
struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
{
internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0> m_data;
EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
};
template<> struct gemv_selector<OnTheRight,ColMajor,true>
{ {
template<typename ProductType, typename Dest> template<typename ProductType, typename Dest>
static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) static inline void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
{ {
typedef typename ProductType::Scalar Scalar; typedef typename ProductType::Index Index;
typedef typename ProductType::LhsScalar LhsScalar;
typedef typename ProductType::RhsScalar RhsScalar;
typedef typename ProductType::Scalar ResScalar;
typedef typename ProductType::RealScalar RealScalar;
typedef typename ProductType::ActualLhsType ActualLhsType; typedef typename ProductType::ActualLhsType ActualLhsType;
typedef typename ProductType::ActualRhsType ActualRhsType; typedef typename ProductType::ActualRhsType ActualRhsType;
typedef typename ProductType::LhsBlasTraits LhsBlasTraits; typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
typedef typename ProductType::RhsBlasTraits RhsBlasTraits; typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
ActualLhsType actualLhs = LhsBlasTraits::extract(prod.lhs()); const ActualLhsType actualLhs = LhsBlasTraits::extract(prod.lhs());
ActualRhsType actualRhs = RhsBlasTraits::extract(prod.rhs()); const ActualRhsType actualRhs = RhsBlasTraits::extract(prod.rhs());
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs()) ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
* RhsBlasTraits::extractScalarFactor(prod.rhs()); * RhsBlasTraits::extractScalarFactor(prod.rhs());
enum { enum {
// FIXME find a way to allow an inner stride on the result if ei_packet_traits<Scalar>::size==1 // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
EvalToDest = Dest::InnerStrideAtCompileTime==1 // on, the other hand it is good for the cache to pack the vector anyways...
EvalToDestAtCompileTime = Dest::InnerStrideAtCompileTime==1,
ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
MightCannotUseDest = (Dest::InnerStrideAtCompileTime!=1) || ComplexByReal
}; };
Scalar* EIGEN_RESTRICT actualDest; gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
if (EvalToDest)
actualDest = &dest.coeffRef(0); bool alphaIsCompatible = (!ComplexByReal) || (imag(actualAlpha)==RealScalar(0));
bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
ResScalar* actualDestPtr;
bool freeDestPtr = false;
if (evalToDest)
{
actualDestPtr = &dest.coeffRef(0);
}
else else
{ {
actualDest = ei_aligned_stack_new(Scalar,dest.size()); #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
Map<typename Dest::PlainObject>(actualDest, dest.size()) = dest; int size = dest.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
if((actualDestPtr = static_dest.data())==0)
{
freeDestPtr = true;
actualDestPtr = ei_aligned_stack_new(ResScalar,dest.size());
}
if(!alphaIsCompatible)
{
MappedDest(actualDestPtr, dest.size()).setZero();
compatibleAlpha = RhsScalar(1);
}
else
MappedDest(actualDestPtr, dest.size()) = dest;
} }
ei_cache_friendly_product_colmajor_times_vector general_matrix_vector_product
<LhsBlasTraits::NeedToConjugate,RhsBlasTraits::NeedToConjugate>( <Index,LhsScalar,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run(
dest.size(), actualLhs.rows(), actualLhs.cols(),
&actualLhs.const_cast_derived().coeffRef(0,0), actualLhs.outerStride(), &actualLhs.coeffRef(0,0), actualLhs.outerStride(),
actualRhs, actualDest, actualAlpha); actualRhs.data(), actualRhs.innerStride(),
actualDestPtr, 1,
compatibleAlpha);
if (!EvalToDest) if (!evalToDest)
{ {
dest = Map<typename Dest::PlainObject>(actualDest, dest.size()); if(!alphaIsCompatible)
ei_aligned_stack_delete(Scalar, actualDest, dest.size()); dest += actualAlpha * MappedDest(actualDestPtr, dest.size());
else
dest = MappedDest(actualDestPtr, dest.size());
if(freeDestPtr) ei_aligned_stack_delete(ResScalar, actualDestPtr, dest.size());
} }
} }
}; };
template<> struct ei_gemv_selector<OnTheRight,RowMajor,true> template<> struct gemv_selector<OnTheRight,RowMajor,true>
{ {
template<typename ProductType, typename Dest> template<typename ProductType, typename Dest>
static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
{ {
typedef typename ProductType::Scalar Scalar; typedef typename ProductType::LhsScalar LhsScalar;
typedef typename ProductType::RhsScalar RhsScalar;
typedef typename ProductType::Scalar ResScalar;
typedef typename ProductType::Index Index;
typedef typename ProductType::ActualLhsType ActualLhsType; typedef typename ProductType::ActualLhsType ActualLhsType;
typedef typename ProductType::ActualRhsType ActualRhsType; typedef typename ProductType::ActualRhsType ActualRhsType;
typedef typename ProductType::_ActualRhsType _ActualRhsType; typedef typename ProductType::_ActualRhsType _ActualRhsType;
typedef typename ProductType::LhsBlasTraits LhsBlasTraits; typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
typedef typename ProductType::RhsBlasTraits RhsBlasTraits; typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
ActualLhsType actualLhs = LhsBlasTraits::extract(prod.lhs()); typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
ActualRhsType actualRhs = RhsBlasTraits::extract(prod.rhs()); typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs()) ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
* RhsBlasTraits::extractScalarFactor(prod.rhs()); * RhsBlasTraits::extractScalarFactor(prod.rhs());
enum { enum {
DirectlyUseRhs = ((ei_packet_traits<Scalar>::size==1) || (_ActualRhsType::Flags&ActualPacketAccessBit)) // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
&& (!(_ActualRhsType::Flags & RowMajorBit)) // on, the other hand it is good for the cache to pack the vector anyways...
DirectlyUseRhs = _ActualRhsType::InnerStrideAtCompileTime==1
}; };
Scalar* EIGEN_RESTRICT rhs_data; gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;
RhsScalar* actualRhsPtr;
bool freeRhsPtr = false;
if (DirectlyUseRhs) if (DirectlyUseRhs)
rhs_data = reinterpret_cast<Scalar* EIGEN_RESTRICT>(&actualRhs.const_cast_derived().coeffRef(0)); {
actualRhsPtr = const_cast<RhsScalar*>(&actualRhs.coeffRef(0));
}
else else
{ {
rhs_data = ei_aligned_stack_new(Scalar, actualRhs.size()); #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
Map<typename _ActualRhsType::PlainObject>(reinterpret_cast<Scalar*>(rhs_data), actualRhs.size()) = actualRhs; int size = actualRhs.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
if((actualRhsPtr = static_rhs.data())==0)
{
freeRhsPtr = true;
actualRhsPtr = ei_aligned_stack_new(RhsScalar, actualRhs.size());
}
Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
} }
ei_cache_friendly_product_rowmajor_times_vector general_matrix_vector_product
<LhsBlasTraits::NeedToConjugate,RhsBlasTraits::NeedToConjugate>( <Index,LhsScalar,RowMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run(
&actualLhs.const_cast_derived().coeffRef(0,0), actualLhs.outerStride(), actualLhs.rows(), actualLhs.cols(),
rhs_data, prod.rhs().size(), dest, actualAlpha); &actualLhs.coeffRef(0,0), actualLhs.outerStride(),
actualRhsPtr, 1,
&dest.coeffRef(0,0), dest.innerStride(),
actualAlpha);
if (!DirectlyUseRhs) ei_aligned_stack_delete(Scalar, rhs_data, prod.rhs().size()); if((!DirectlyUseRhs) && freeRhsPtr) ei_aligned_stack_delete(RhsScalar, actualRhsPtr, prod.rhs().size());
} }
}; };
template<> struct ei_gemv_selector<OnTheRight,ColMajor,false> template<> struct gemv_selector<OnTheRight,ColMajor,false>
{ {
template<typename ProductType, typename Dest> template<typename ProductType, typename Dest>
static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
@@ -411,7 +535,7 @@ template<> struct ei_gemv_selector<OnTheRight,ColMajor,false>
} }
}; };
template<> struct ei_gemv_selector<OnTheRight,RowMajor,false> template<> struct gemv_selector<OnTheRight,RowMajor,false>
{ {
template<typename ProductType, typename Dest> template<typename ProductType, typename Dest>
static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
@@ -424,6 +548,8 @@ template<> struct ei_gemv_selector<OnTheRight,RowMajor,false>
} }
}; };
} // end namespace internal
/*************************************************************************** /***************************************************************************
* Implementation of matrix base methods * Implementation of matrix base methods
***************************************************************************/ ***************************************************************************/
@@ -440,7 +566,7 @@ inline const typename ProductReturnType<Derived,OtherDerived>::Type
MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
{ {
// A note regarding the function declaration: In MSVC, this function will sometimes // A note regarding the function declaration: In MSVC, this function will sometimes
// not be inlined since ei_matrix_storage is an unwindable object for dynamic // not be inlined since DenseStorage is an unwindable object for dynamic
// matrices and product types are holding a member to store the result. // matrices and product types are holding a member to store the result.
// Thus it does not help tagging this function with EIGEN_STRONG_INLINE. // Thus it does not help tagging this function with EIGEN_STRONG_INLINE.
enum { enum {
@@ -459,7 +585,7 @@ MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION) INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT) EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
#ifdef EIGEN_DEBUG_PRODUCT #ifdef EIGEN_DEBUG_PRODUCT
ei_product_type<Derived,OtherDerived>::debug(); internal::product_type<Derived,OtherDerived>::debug();
#endif #endif
return typename ProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived()); return typename ProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
} }
@@ -477,7 +603,7 @@ MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
*/ */
template<typename Derived> template<typename Derived>
template<typename OtherDerived> template<typename OtherDerived>
const typename ProductReturnType<Derived,OtherDerived,LazyCoeffBasedProductMode>::Type const typename LazyProductReturnType<Derived,OtherDerived>::Type
MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const
{ {
enum { enum {
@@ -496,7 +622,7 @@ MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const
INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION) INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT) EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
return typename ProductReturnType<Derived,OtherDerived,LazyCoeffBasedProductMode>::Type(derived(), other.derived()); return typename LazyProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
} }
#endif // EIGEN_PRODUCT_H #endif // EIGEN_PRODUCT_H

View File

@@ -26,31 +26,35 @@
#define EIGEN_PRODUCTBASE_H #define EIGEN_PRODUCTBASE_H
/** \class ProductBase /** \class ProductBase
* \ingroup Core_Module
* *
*/ */
namespace internal {
template<typename Derived, typename _Lhs, typename _Rhs> template<typename Derived, typename _Lhs, typename _Rhs>
struct ei_traits<ProductBase<Derived,_Lhs,_Rhs> > struct traits<ProductBase<Derived,_Lhs,_Rhs> >
{ {
typedef MatrixXpr XprKind; typedef MatrixXpr XprKind;
typedef typename ei_cleantype<_Lhs>::type Lhs; typedef typename remove_all<_Lhs>::type Lhs;
typedef typename ei_cleantype<_Rhs>::type Rhs; typedef typename remove_all<_Rhs>::type Rhs;
typedef typename ei_scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType Scalar; typedef typename scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType Scalar;
typedef typename ei_promote_storage_type<typename ei_traits<Lhs>::StorageKind, typedef typename promote_storage_type<typename traits<Lhs>::StorageKind,
typename ei_traits<Rhs>::StorageKind>::ret StorageKind; typename traits<Rhs>::StorageKind>::ret StorageKind;
typedef typename ei_promote_index_type<typename ei_traits<Lhs>::Index, typedef typename promote_index_type<typename traits<Lhs>::Index,
typename ei_traits<Rhs>::Index>::type Index; typename traits<Rhs>::Index>::type Index;
enum { enum {
RowsAtCompileTime = ei_traits<Lhs>::RowsAtCompileTime, RowsAtCompileTime = traits<Lhs>::RowsAtCompileTime,
ColsAtCompileTime = ei_traits<Rhs>::ColsAtCompileTime, ColsAtCompileTime = traits<Rhs>::ColsAtCompileTime,
MaxRowsAtCompileTime = ei_traits<Lhs>::MaxRowsAtCompileTime, MaxRowsAtCompileTime = traits<Lhs>::MaxRowsAtCompileTime,
MaxColsAtCompileTime = ei_traits<Rhs>::MaxColsAtCompileTime, MaxColsAtCompileTime = traits<Rhs>::MaxColsAtCompileTime,
Flags = (MaxRowsAtCompileTime==1 ? RowMajorBit : 0) Flags = (MaxRowsAtCompileTime==1 ? RowMajorBit : 0)
| EvalBeforeNestingBit | EvalBeforeAssigningBit | NestByRefBit, | EvalBeforeNestingBit | EvalBeforeAssigningBit | NestByRefBit,
// Note that EvalBeforeNestingBit and NestByRefBit // Note that EvalBeforeNestingBit and NestByRefBit
// are not used in practice because ei_nested is overloaded for products // are not used in practice because nested is overloaded for products
CoeffReadCost = 0 // FIXME why is it needed ? CoeffReadCost = 0 // FIXME why is it needed ?
}; };
}; };
}
#define EIGEN_PRODUCT_PUBLIC_INTERFACE(Derived) \ #define EIGEN_PRODUCT_PUBLIC_INTERFACE(Derived) \
typedef ProductBase<Derived, Lhs, Rhs > Base; \ typedef ProductBase<Derived, Lhs, Rhs > Base; \
@@ -74,18 +78,20 @@ class ProductBase : public MatrixBase<Derived>
public: public:
typedef MatrixBase<Derived> Base; typedef MatrixBase<Derived> Base;
EIGEN_DENSE_PUBLIC_INTERFACE(ProductBase) EIGEN_DENSE_PUBLIC_INTERFACE(ProductBase)
protected:
typedef typename Lhs::Nested LhsNested; typedef typename Lhs::Nested LhsNested;
typedef typename ei_cleantype<LhsNested>::type _LhsNested; typedef typename internal::remove_all<LhsNested>::type _LhsNested;
typedef ei_blas_traits<_LhsNested> LhsBlasTraits; typedef internal::blas_traits<_LhsNested> LhsBlasTraits;
typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
typedef typename ei_cleantype<ActualLhsType>::type _ActualLhsType; typedef typename internal::remove_all<ActualLhsType>::type _ActualLhsType;
typedef typename internal::traits<Lhs>::Scalar LhsScalar;
typedef typename Rhs::Nested RhsNested; typedef typename Rhs::Nested RhsNested;
typedef typename ei_cleantype<RhsNested>::type _RhsNested; typedef typename internal::remove_all<RhsNested>::type _RhsNested;
typedef ei_blas_traits<_RhsNested> RhsBlasTraits; typedef internal::blas_traits<_RhsNested> RhsBlasTraits;
typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
typedef typename ei_cleantype<ActualRhsType>::type _ActualRhsType; typedef typename internal::remove_all<ActualRhsType>::type _ActualRhsType;
typedef typename internal::traits<Rhs>::Scalar RhsScalar;
// Diagonal of a product: no need to evaluate the arguments because they are going to be evaluated only once // Diagonal of a product: no need to evaluate the arguments because they are going to be evaluated only once
typedef CoeffBasedProduct<LhsNested, RhsNested, 0> FullyLazyCoeffBaseProductType; typedef CoeffBasedProduct<LhsNested, RhsNested, 0> FullyLazyCoeffBaseProductType;
@@ -97,7 +103,7 @@ class ProductBase : public MatrixBase<Derived>
ProductBase(const Lhs& lhs, const Rhs& rhs) ProductBase(const Lhs& lhs, const Rhs& rhs)
: m_lhs(lhs), m_rhs(rhs) : m_lhs(lhs), m_rhs(rhs)
{ {
ei_assert(lhs.cols() == rhs.rows() eigen_assert(lhs.cols() == rhs.rows()
&& "invalid matrix product" && "invalid matrix product"
&& "if you wanted a coeff-wise or a dot product use the respective explicit functions"); && "if you wanted a coeff-wise or a dot product use the respective explicit functions");
} }
@@ -124,11 +130,11 @@ class ProductBase : public MatrixBase<Derived>
operator const PlainObject& () const operator const PlainObject& () const
{ {
m_result.resize(m_lhs.rows(), m_rhs.cols()); m_result.resize(m_lhs.rows(), m_rhs.cols());
this->evalTo(m_result); derived().evalTo(m_result);
return m_result; return m_result;
} }
const Diagonal<FullyLazyCoeffBaseProductType,0> diagonal() const const Diagonal<const FullyLazyCoeffBaseProductType,0> diagonal() const
{ return FullyLazyCoeffBaseProductType(m_lhs, m_rhs); } { return FullyLazyCoeffBaseProductType(m_lhs, m_rhs); }
template<int Index> template<int Index>
@@ -138,29 +144,56 @@ class ProductBase : public MatrixBase<Derived>
const Diagonal<FullyLazyCoeffBaseProductType,Dynamic> diagonal(Index index) const const Diagonal<FullyLazyCoeffBaseProductType,Dynamic> diagonal(Index index) const
{ return FullyLazyCoeffBaseProductType(m_lhs, m_rhs).diagonal(index); } { return FullyLazyCoeffBaseProductType(m_lhs, m_rhs).diagonal(index); }
// restrict coeff accessors to 1x1 expressions. No need to care about mutators here since this isnt a Lvalue expression
typename Base::CoeffReturnType coeff(Index row, Index col) const
{
#ifdef EIGEN2_SUPPORT
return lhs().row(row).cwiseProduct(rhs().col(col).transpose()).sum();
#else
EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)
eigen_assert(this->rows() == 1 && this->cols() == 1);
return derived().coeff(row,col);
#endif
}
typename Base::CoeffReturnType coeff(Index i) const
{
EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)
eigen_assert(this->rows() == 1 && this->cols() == 1);
return derived().coeff(i);
}
const Scalar& coeffRef(Index row, Index col) const
{
EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)
eigen_assert(this->rows() == 1 && this->cols() == 1);
return derived().coeffRef(row,col);
}
const Scalar& coeffRef(Index i) const
{
EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)
eigen_assert(this->rows() == 1 && this->cols() == 1);
return derived().coeffRef(i);
}
protected: protected:
const LhsNested m_lhs; const LhsNested m_lhs;
const RhsNested m_rhs; const RhsNested m_rhs;
mutable PlainObject m_result; mutable PlainObject m_result;
private:
// discard coeff methods
void coeff(Index,Index) const;
void coeffRef(Index,Index);
void coeff(Index) const;
void coeffRef(Index);
}; };
// here we need to overload the nested rule for products // here we need to overload the nested rule for products
// such that the nested type is a const reference to a plain matrix // such that the nested type is a const reference to a plain matrix
namespace internal {
template<typename Lhs, typename Rhs, int Mode, int N, typename PlainObject> template<typename Lhs, typename Rhs, int Mode, int N, typename PlainObject>
struct ei_nested<GeneralProduct<Lhs,Rhs,Mode>, N, PlainObject> struct nested<GeneralProduct<Lhs,Rhs,Mode>, N, PlainObject>
{ {
typedef PlainObject const& type; typedef PlainObject const& type;
}; };
}
template<typename NestedProduct> template<typename NestedProduct>
class ScaledProduct; class ScaledProduct;
@@ -177,7 +210,7 @@ operator*(const ProductBase<Derived,Lhs,Rhs>& prod, typename Derived::Scalar x)
{ return ScaledProduct<Derived>(prod.derived(), x); } { return ScaledProduct<Derived>(prod.derived(), x); }
template<typename Derived,typename Lhs,typename Rhs> template<typename Derived,typename Lhs,typename Rhs>
typename ei_enable_if<!ei_is_same_type<typename Derived::Scalar,typename Derived::RealScalar>::ret, typename internal::enable_if<!internal::is_same<typename Derived::Scalar,typename Derived::RealScalar>::value,
const ScaledProduct<Derived> >::type const ScaledProduct<Derived> >::type
operator*(const ProductBase<Derived,Lhs,Rhs>& prod, typename Derived::RealScalar x) operator*(const ProductBase<Derived,Lhs,Rhs>& prod, typename Derived::RealScalar x)
{ return ScaledProduct<Derived>(prod.derived(), x); } { return ScaledProduct<Derived>(prod.derived(), x); }
@@ -189,20 +222,21 @@ operator*(typename Derived::Scalar x,const ProductBase<Derived,Lhs,Rhs>& prod)
{ return ScaledProduct<Derived>(prod.derived(), x); } { return ScaledProduct<Derived>(prod.derived(), x); }
template<typename Derived,typename Lhs,typename Rhs> template<typename Derived,typename Lhs,typename Rhs>
typename ei_enable_if<!ei_is_same_type<typename Derived::Scalar,typename Derived::RealScalar>::ret, typename internal::enable_if<!internal::is_same<typename Derived::Scalar,typename Derived::RealScalar>::value,
const ScaledProduct<Derived> >::type const ScaledProduct<Derived> >::type
operator*(typename Derived::RealScalar x,const ProductBase<Derived,Lhs,Rhs>& prod) operator*(typename Derived::RealScalar x,const ProductBase<Derived,Lhs,Rhs>& prod)
{ return ScaledProduct<Derived>(prod.derived(), x); } { return ScaledProduct<Derived>(prod.derived(), x); }
namespace internal {
template<typename NestedProduct> template<typename NestedProduct>
struct ei_traits<ScaledProduct<NestedProduct> > struct traits<ScaledProduct<NestedProduct> >
: ei_traits<ProductBase<ScaledProduct<NestedProduct>, : traits<ProductBase<ScaledProduct<NestedProduct>,
typename NestedProduct::_LhsNested, typename NestedProduct::_LhsNested,
typename NestedProduct::_RhsNested> > typename NestedProduct::_RhsNested> >
{ {
typedef typename ei_traits<NestedProduct>::StorageKind StorageKind; typedef typename traits<NestedProduct>::StorageKind StorageKind;
}; };
}
template<typename NestedProduct> template<typename NestedProduct>
class ScaledProduct class ScaledProduct
@@ -215,6 +249,7 @@ class ScaledProduct
typename NestedProduct::_LhsNested, typename NestedProduct::_LhsNested,
typename NestedProduct::_RhsNested> Base; typename NestedProduct::_RhsNested> Base;
typedef typename Base::Scalar Scalar; typedef typename Base::Scalar Scalar;
typedef typename Base::PlainObject PlainObject;
// EIGEN_PRODUCT_PUBLIC_INTERFACE(ScaledProduct) // EIGEN_PRODUCT_PUBLIC_INTERFACE(ScaledProduct)
ScaledProduct(const NestedProduct& prod, Scalar x) ScaledProduct(const NestedProduct& prod, Scalar x)
@@ -232,6 +267,8 @@ class ScaledProduct
template<typename Dest> template<typename Dest>
inline void scaleAndAddTo(Dest& dst,Scalar alpha) const { m_prod.derived().scaleAndAddTo(dst,alpha); } inline void scaleAndAddTo(Dest& dst,Scalar alpha) const { m_prod.derived().scaleAndAddTo(dst,alpha); }
const Scalar& alpha() const { return m_alpha; }
protected: protected:
const NestedProduct& m_prod; const NestedProduct& m_prod;
Scalar m_alpha; Scalar m_alpha;

View File

@@ -25,15 +25,20 @@
#ifndef EIGEN_RANDOM_H #ifndef EIGEN_RANDOM_H
#define EIGEN_RANDOM_H #define EIGEN_RANDOM_H
template<typename Scalar> struct ei_scalar_random_op { namespace internal {
EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_random_op)
template<typename Scalar> struct scalar_random_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_random_op)
template<typename Index> template<typename Index>
inline const Scalar operator() (Index, Index = 0) const { return ei_random<Scalar>(); } inline const Scalar operator() (Index, Index = 0) const { return random<Scalar>(); }
}; };
template<typename Scalar> template<typename Scalar>
struct ei_functor_traits<ei_scalar_random_op<Scalar> > struct functor_traits<scalar_random_op<Scalar> >
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false, IsRepeatable = false }; }; { enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false, IsRepeatable = false }; };
} // end namespace internal
/** \returns a random matrix expression /** \returns a random matrix expression
* *
* The parameters \a rows and \a cols are the number of rows and of columns of * The parameters \a rows and \a cols are the number of rows and of columns of
@@ -53,10 +58,10 @@ struct ei_functor_traits<ei_scalar_random_op<Scalar> >
* \sa MatrixBase::setRandom(), MatrixBase::Random(Index), MatrixBase::Random() * \sa MatrixBase::setRandom(), MatrixBase::Random(Index), MatrixBase::Random()
*/ */
template<typename Derived> template<typename Derived>
inline const CwiseNullaryOp<ei_scalar_random_op<typename ei_traits<Derived>::Scalar>, Derived> inline const CwiseNullaryOp<internal::scalar_random_op<typename internal::traits<Derived>::Scalar>, Derived>
DenseBase<Derived>::Random(Index rows, Index cols) DenseBase<Derived>::Random(Index rows, Index cols)
{ {
return NullaryExpr(rows, cols, ei_scalar_random_op<Scalar>()); return NullaryExpr(rows, cols, internal::scalar_random_op<Scalar>());
} }
/** \returns a random vector expression /** \returns a random vector expression
@@ -80,10 +85,10 @@ DenseBase<Derived>::Random(Index rows, Index cols)
* \sa MatrixBase::setRandom(), MatrixBase::Random(Index,Index), MatrixBase::Random() * \sa MatrixBase::setRandom(), MatrixBase::Random(Index,Index), MatrixBase::Random()
*/ */
template<typename Derived> template<typename Derived>
inline const CwiseNullaryOp<ei_scalar_random_op<typename ei_traits<Derived>::Scalar>, Derived> inline const CwiseNullaryOp<internal::scalar_random_op<typename internal::traits<Derived>::Scalar>, Derived>
DenseBase<Derived>::Random(Index size) DenseBase<Derived>::Random(Index size)
{ {
return NullaryExpr(size, ei_scalar_random_op<Scalar>()); return NullaryExpr(size, internal::scalar_random_op<Scalar>());
} }
/** \returns a fixed-size random matrix or vector expression /** \returns a fixed-size random matrix or vector expression
@@ -101,10 +106,10 @@ DenseBase<Derived>::Random(Index size)
* \sa MatrixBase::setRandom(), MatrixBase::Random(Index,Index), MatrixBase::Random(Index) * \sa MatrixBase::setRandom(), MatrixBase::Random(Index,Index), MatrixBase::Random(Index)
*/ */
template<typename Derived> template<typename Derived>
inline const CwiseNullaryOp<ei_scalar_random_op<typename ei_traits<Derived>::Scalar>, Derived> inline const CwiseNullaryOp<internal::scalar_random_op<typename internal::traits<Derived>::Scalar>, Derived>
DenseBase<Derived>::Random() DenseBase<Derived>::Random()
{ {
return NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, ei_scalar_random_op<Scalar>()); return NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_random_op<Scalar>());
} }
/** Sets all coefficients in this expression to random values. /** Sets all coefficients in this expression to random values.
@@ -131,7 +136,7 @@ inline Derived& DenseBase<Derived>::setRandom()
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& EIGEN_STRONG_INLINE Derived&
DenseStorageBase<Derived>::setRandom(Index size) PlainObjectBase<Derived>::setRandom(Index size)
{ {
resize(size); resize(size);
return setRandom(); return setRandom();
@@ -149,7 +154,7 @@ DenseStorageBase<Derived>::setRandom(Index size)
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& EIGEN_STRONG_INLINE Derived&
DenseStorageBase<Derived>::setRandom(Index rows, Index cols) PlainObjectBase<Derived>::setRandom(Index rows, Index cols)
{ {
resize(rows, cols); resize(rows, cols);
return setRandom(); return setRandom();

View File

@@ -26,6 +26,8 @@
#ifndef EIGEN_REDUX_H #ifndef EIGEN_REDUX_H
#define EIGEN_REDUX_H #define EIGEN_REDUX_H
namespace internal {
// TODO // TODO
// * implement other kind of vectorization // * implement other kind of vectorization
// * factorize code // * factorize code
@@ -35,11 +37,11 @@
***************************************************************************/ ***************************************************************************/
template<typename Func, typename Derived> template<typename Func, typename Derived>
struct ei_redux_traits struct redux_traits
{ {
public: public:
enum { enum {
PacketSize = ei_packet_traits<typename Derived::Scalar>::size, PacketSize = packet_traits<typename Derived::Scalar>::size,
InnerMaxSize = int(Derived::IsRowMajor) InnerMaxSize = int(Derived::IsRowMajor)
? Derived::MaxColsAtCompileTime ? Derived::MaxColsAtCompileTime
: Derived::MaxRowsAtCompileTime : Derived::MaxRowsAtCompileTime
@@ -47,7 +49,7 @@ public:
enum { enum {
MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit) MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit)
&& (ei_functor_traits<Func>::PacketAccess), && (functor_traits<Func>::PacketAccess),
MayLinearVectorize = MightVectorize && (int(Derived::Flags)&LinearAccessBit), MayLinearVectorize = MightVectorize && (int(Derived::Flags)&LinearAccessBit),
MaySliceVectorize = MightVectorize && int(InnerMaxSize)>=3*PacketSize MaySliceVectorize = MightVectorize && int(InnerMaxSize)>=3*PacketSize
}; };
@@ -63,10 +65,10 @@ public:
enum { enum {
Cost = ( Derived::SizeAtCompileTime == Dynamic Cost = ( Derived::SizeAtCompileTime == Dynamic
|| Derived::CoeffReadCost == Dynamic || Derived::CoeffReadCost == Dynamic
|| (Derived::SizeAtCompileTime!=1 && ei_functor_traits<Func>::Cost == Dynamic) || (Derived::SizeAtCompileTime!=1 && functor_traits<Func>::Cost == Dynamic)
) ? Dynamic ) ? Dynamic
: Derived::SizeAtCompileTime * Derived::CoeffReadCost : Derived::SizeAtCompileTime * Derived::CoeffReadCost
+ (Derived::SizeAtCompileTime-1) * ei_functor_traits<Func>::Cost, + (Derived::SizeAtCompileTime-1) * functor_traits<Func>::Cost,
UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize)) UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
}; };
@@ -85,7 +87,7 @@ public:
/*** no vectorization ***/ /*** no vectorization ***/
template<typename Func, typename Derived, int Start, int Length> template<typename Func, typename Derived, int Start, int Length>
struct ei_redux_novec_unroller struct redux_novec_unroller
{ {
enum { enum {
HalfLength = Length/2 HalfLength = Length/2
@@ -95,13 +97,13 @@ struct ei_redux_novec_unroller
EIGEN_STRONG_INLINE static Scalar run(const Derived &mat, const Func& func) EIGEN_STRONG_INLINE static Scalar run(const Derived &mat, const Func& func)
{ {
return func(ei_redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func), return func(redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
ei_redux_novec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func)); redux_novec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func));
} }
}; };
template<typename Func, typename Derived, int Start> template<typename Func, typename Derived, int Start>
struct ei_redux_novec_unroller<Func, Derived, Start, 1> struct redux_novec_unroller<Func, Derived, Start, 1>
{ {
enum { enum {
outer = Start / Derived::InnerSizeAtCompileTime, outer = Start / Derived::InnerSizeAtCompileTime,
@@ -120,7 +122,7 @@ struct ei_redux_novec_unroller<Func, Derived, Start, 1>
// to prevent false warnings regarding failed inlining though // to prevent false warnings regarding failed inlining though
// for 0 length run() will never be called at all. // for 0 length run() will never be called at all.
template<typename Func, typename Derived, int Start> template<typename Func, typename Derived, int Start>
struct ei_redux_novec_unroller<Func, Derived, Start, 0> struct redux_novec_unroller<Func, Derived, Start, 0>
{ {
typedef typename Derived::Scalar Scalar; typedef typename Derived::Scalar Scalar;
EIGEN_STRONG_INLINE static Scalar run(const Derived&, const Func&) { return Scalar(); } EIGEN_STRONG_INLINE static Scalar run(const Derived&, const Func&) { return Scalar(); }
@@ -129,36 +131,36 @@ struct ei_redux_novec_unroller<Func, Derived, Start, 0>
/*** vectorization ***/ /*** vectorization ***/
template<typename Func, typename Derived, int Start, int Length> template<typename Func, typename Derived, int Start, int Length>
struct ei_redux_vec_unroller struct redux_vec_unroller
{ {
enum { enum {
PacketSize = ei_packet_traits<typename Derived::Scalar>::size, PacketSize = packet_traits<typename Derived::Scalar>::size,
HalfLength = Length/2 HalfLength = Length/2
}; };
typedef typename Derived::Scalar Scalar; typedef typename Derived::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar; typedef typename packet_traits<Scalar>::type PacketScalar;
EIGEN_STRONG_INLINE static PacketScalar run(const Derived &mat, const Func& func) EIGEN_STRONG_INLINE static PacketScalar run(const Derived &mat, const Func& func)
{ {
return func.packetOp( return func.packetOp(
ei_redux_vec_unroller<Func, Derived, Start, HalfLength>::run(mat,func), redux_vec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
ei_redux_vec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func) ); redux_vec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func) );
} }
}; };
template<typename Func, typename Derived, int Start> template<typename Func, typename Derived, int Start>
struct ei_redux_vec_unroller<Func, Derived, Start, 1> struct redux_vec_unroller<Func, Derived, Start, 1>
{ {
enum { enum {
index = Start * ei_packet_traits<typename Derived::Scalar>::size, index = Start * packet_traits<typename Derived::Scalar>::size,
outer = index / int(Derived::InnerSizeAtCompileTime), outer = index / int(Derived::InnerSizeAtCompileTime),
inner = index % int(Derived::InnerSizeAtCompileTime), inner = index % int(Derived::InnerSizeAtCompileTime),
alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned
}; };
typedef typename Derived::Scalar Scalar; typedef typename Derived::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar; typedef typename packet_traits<Scalar>::type PacketScalar;
EIGEN_STRONG_INLINE static PacketScalar run(const Derived &mat, const Func&) EIGEN_STRONG_INLINE static PacketScalar run(const Derived &mat, const Func&)
{ {
@@ -171,19 +173,19 @@ struct ei_redux_vec_unroller<Func, Derived, Start, 1>
***************************************************************************/ ***************************************************************************/
template<typename Func, typename Derived, template<typename Func, typename Derived,
int Traversal = ei_redux_traits<Func, Derived>::Traversal, int Traversal = redux_traits<Func, Derived>::Traversal,
int Unrolling = ei_redux_traits<Func, Derived>::Unrolling int Unrolling = redux_traits<Func, Derived>::Unrolling
> >
struct ei_redux_impl; struct redux_impl;
template<typename Func, typename Derived> template<typename Func, typename Derived>
struct ei_redux_impl<Func, Derived, DefaultTraversal, NoUnrolling> struct redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>
{ {
typedef typename Derived::Scalar Scalar; typedef typename Derived::Scalar Scalar;
typedef typename Derived::Index Index; typedef typename Derived::Index Index;
static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func) static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func)
{ {
ei_assert(mat.rows()>0 && mat.cols()>0 && "you are using a non initialized matrix"); eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
Scalar res; Scalar res;
res = mat.coeffByOuterInner(0, 0); res = mat.coeffByOuterInner(0, 0);
for(Index i = 1; i < mat.innerSize(); ++i) for(Index i = 1; i < mat.innerSize(); ++i)
@@ -196,24 +198,25 @@ struct ei_redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>
}; };
template<typename Func, typename Derived> template<typename Func, typename Derived>
struct ei_redux_impl<Func,Derived, DefaultTraversal, CompleteUnrolling> struct redux_impl<Func,Derived, DefaultTraversal, CompleteUnrolling>
: public ei_redux_novec_unroller<Func,Derived, 0, Derived::SizeAtCompileTime> : public redux_novec_unroller<Func,Derived, 0, Derived::SizeAtCompileTime>
{}; {};
template<typename Func, typename Derived> template<typename Func, typename Derived>
struct ei_redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling> struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
{ {
typedef typename Derived::Scalar Scalar; typedef typename Derived::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar; typedef typename packet_traits<Scalar>::type PacketScalar;
typedef typename Derived::Index Index; typedef typename Derived::Index Index;
static Scalar run(const Derived& mat, const Func& func) static Scalar run(const Derived& mat, const Func& func)
{ {
const Index size = mat.size(); const Index size = mat.size();
const Index packetSize = ei_packet_traits<Scalar>::size; eigen_assert(size && "you are using an empty matrix");
const Index alignedStart = ei_first_aligned(mat); const Index packetSize = packet_traits<Scalar>::size;
const Index alignedStart = first_aligned(mat);
enum { enum {
alignment = (Derived::Flags & DirectAccessBit) || (Derived::Flags & AlignedBit) alignment = bool(Derived::Flags & DirectAccessBit) || bool(Derived::Flags & AlignedBit)
? Aligned : Unaligned ? Aligned : Unaligned
}; };
const Index alignedSize = ((size-alignedStart)/packetSize)*packetSize; const Index alignedSize = ((size-alignedStart)/packetSize)*packetSize;
@@ -245,18 +248,19 @@ struct ei_redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
}; };
template<typename Func, typename Derived> template<typename Func, typename Derived>
struct ei_redux_impl<Func, Derived, SliceVectorizedTraversal, NoUnrolling> struct redux_impl<Func, Derived, SliceVectorizedTraversal, NoUnrolling>
{ {
typedef typename Derived::Scalar Scalar; typedef typename Derived::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar; typedef typename packet_traits<Scalar>::type PacketScalar;
typedef typename Derived::Index Index; typedef typename Derived::Index Index;
static Scalar run(const Derived& mat, const Func& func) static Scalar run(const Derived& mat, const Func& func)
{ {
eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
const Index innerSize = mat.innerSize(); const Index innerSize = mat.innerSize();
const Index outerSize = mat.outerSize(); const Index outerSize = mat.outerSize();
enum { enum {
packetSize = ei_packet_traits<Scalar>::size packetSize = packet_traits<Scalar>::size
}; };
const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize; const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize;
Scalar res; Scalar res;
@@ -275,7 +279,7 @@ struct ei_redux_impl<Func, Derived, SliceVectorizedTraversal, NoUnrolling>
else // too small to vectorize anything. else // too small to vectorize anything.
// since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize. // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
{ {
res = ei_redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>::run(mat, func); res = redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>::run(mat, func);
} }
return res; return res;
@@ -283,24 +287,31 @@ struct ei_redux_impl<Func, Derived, SliceVectorizedTraversal, NoUnrolling>
}; };
template<typename Func, typename Derived> template<typename Func, typename Derived>
struct ei_redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling> struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
{ {
typedef typename Derived::Scalar Scalar; typedef typename Derived::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar; typedef typename packet_traits<Scalar>::type PacketScalar;
enum { enum {
PacketSize = ei_packet_traits<Scalar>::size, PacketSize = packet_traits<Scalar>::size,
Size = Derived::SizeAtCompileTime, Size = Derived::SizeAtCompileTime,
VectorizedSize = (Size / PacketSize) * PacketSize VectorizedSize = (Size / PacketSize) * PacketSize
}; };
EIGEN_STRONG_INLINE static Scalar run(const Derived& mat, const Func& func) EIGEN_STRONG_INLINE static Scalar run(const Derived& mat, const Func& func)
{ {
Scalar res = func.predux(ei_redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func)); eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));
if (VectorizedSize != Size) if (VectorizedSize != Size)
res = func(res,ei_redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func)); res = func(res,redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func));
return res; return res;
} }
}; };
} // end namespace internal
/***************************************************************************
* Part 4 : public API
***************************************************************************/
/** \returns the result of a full redux operation on the whole matrix or vector using \a func /** \returns the result of a full redux operation on the whole matrix or vector using \a func
* *
@@ -311,30 +322,30 @@ struct ei_redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling
*/ */
template<typename Derived> template<typename Derived>
template<typename Func> template<typename Func>
EIGEN_STRONG_INLINE typename ei_result_of<Func(typename ei_traits<Derived>::Scalar)>::type EIGEN_STRONG_INLINE typename internal::result_of<Func(typename internal::traits<Derived>::Scalar)>::type
DenseBase<Derived>::redux(const Func& func) const DenseBase<Derived>::redux(const Func& func) const
{ {
typedef typename ei_cleantype<typename Derived::Nested>::type ThisNested; typedef typename internal::remove_all<typename Derived::Nested>::type ThisNested;
return ei_redux_impl<Func, ThisNested> return internal::redux_impl<Func, ThisNested>
::run(derived(), func); ::run(derived(), func);
} }
/** \returns the minimum of all coefficients of *this /** \returns the minimum of all coefficients of *this
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
DenseBase<Derived>::minCoeff() const DenseBase<Derived>::minCoeff() const
{ {
return this->redux(Eigen::ei_scalar_min_op<Scalar>()); return this->redux(Eigen::internal::scalar_min_op<Scalar>());
} }
/** \returns the maximum of all coefficients of *this /** \returns the maximum of all coefficients of *this
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
DenseBase<Derived>::maxCoeff() const DenseBase<Derived>::maxCoeff() const
{ {
return this->redux(Eigen::ei_scalar_max_op<Scalar>()); return this->redux(Eigen::internal::scalar_max_op<Scalar>());
} }
/** \returns the sum of all coefficients of *this /** \returns the sum of all coefficients of *this
@@ -342,10 +353,12 @@ DenseBase<Derived>::maxCoeff() const
* \sa trace(), prod(), mean() * \sa trace(), prod(), mean()
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
DenseBase<Derived>::sum() const DenseBase<Derived>::sum() const
{ {
return this->redux(Eigen::ei_scalar_sum_op<Scalar>()); if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
return Scalar(0);
return this->redux(Eigen::internal::scalar_sum_op<Scalar>());
} }
/** \returns the mean of all coefficients of *this /** \returns the mean of all coefficients of *this
@@ -353,10 +366,10 @@ DenseBase<Derived>::sum() const
* \sa trace(), prod(), sum() * \sa trace(), prod(), sum()
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
DenseBase<Derived>::mean() const DenseBase<Derived>::mean() const
{ {
return Scalar(this->redux(Eigen::ei_scalar_sum_op<Scalar>())) / Scalar(this->size()); return Scalar(this->redux(Eigen::internal::scalar_sum_op<Scalar>())) / Scalar(this->size());
} }
/** \returns the product of all coefficients of *this /** \returns the product of all coefficients of *this
@@ -367,10 +380,12 @@ DenseBase<Derived>::mean() const
* \sa sum(), mean(), trace() * \sa sum(), mean(), trace()
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
DenseBase<Derived>::prod() const DenseBase<Derived>::prod() const
{ {
return this->redux(Eigen::ei_scalar_product_op<Scalar>()); if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
return Scalar(1);
return this->redux(Eigen::internal::scalar_product_op<Scalar>());
} }
/** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal. /** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal.
@@ -380,7 +395,7 @@ DenseBase<Derived>::prod() const
* \sa diagonal(), sum() * \sa diagonal(), sum()
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
MatrixBase<Derived>::trace() const MatrixBase<Derived>::trace() const
{ {
return derived().diagonal().sum(); return derived().diagonal().sum();

View File

@@ -27,6 +27,7 @@
/** /**
* \class Replicate * \class Replicate
* \ingroup Core_Module
* *
* \brief Expression of the multiple replication of a matrix or vector * \brief Expression of the multiple replication of a matrix or vector
* *
@@ -38,15 +39,17 @@
* *
* \sa DenseBase::replicate() * \sa DenseBase::replicate()
*/ */
namespace internal {
template<typename MatrixType,int RowFactor,int ColFactor> template<typename MatrixType,int RowFactor,int ColFactor>
struct ei_traits<Replicate<MatrixType,RowFactor,ColFactor> > struct traits<Replicate<MatrixType,RowFactor,ColFactor> >
: ei_traits<MatrixType> : traits<MatrixType>
{ {
typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::Scalar Scalar;
typedef typename ei_traits<MatrixType>::StorageKind StorageKind; typedef typename traits<MatrixType>::StorageKind StorageKind;
typedef typename ei_traits<MatrixType>::XprKind XprKind; typedef typename traits<MatrixType>::XprKind XprKind;
typedef typename ei_nested<MatrixType>::type MatrixTypeNested; typedef typename nested<MatrixType>::type MatrixTypeNested;
typedef typename ei_unref<MatrixTypeNested>::type _MatrixTypeNested; typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;
enum { enum {
RowsAtCompileTime = RowFactor==Dynamic || int(MatrixType::RowsAtCompileTime)==Dynamic RowsAtCompileTime = RowFactor==Dynamic || int(MatrixType::RowsAtCompileTime)==Dynamic
? Dynamic ? Dynamic
@@ -64,29 +67,30 @@ struct ei_traits<Replicate<MatrixType,RowFactor,ColFactor> >
CoeffReadCost = _MatrixTypeNested::CoeffReadCost CoeffReadCost = _MatrixTypeNested::CoeffReadCost
}; };
}; };
}
template<typename MatrixType,int RowFactor,int ColFactor> class Replicate template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
: public ei_dense_xpr_base< Replicate<MatrixType,RowFactor,ColFactor> >::type : public internal::dense_xpr_base< Replicate<MatrixType,RowFactor,ColFactor> >::type
{ {
public: public:
typedef typename ei_dense_xpr_base<Replicate>::type Base; typedef typename internal::dense_xpr_base<Replicate>::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(Replicate) EIGEN_DENSE_PUBLIC_INTERFACE(Replicate)
template<typename OriginalMatrixType> template<typename OriginalMatrixType>
inline explicit Replicate(const OriginalMatrixType& matrix) inline explicit Replicate(const OriginalMatrixType& matrix)
: m_matrix(matrix), m_rowFactor(RowFactor), m_colFactor(ColFactor) : m_matrix(matrix), m_rowFactor(RowFactor), m_colFactor(ColFactor)
{ {
EIGEN_STATIC_ASSERT((ei_is_same_type<MatrixType,OriginalMatrixType>::ret), EIGEN_STATIC_ASSERT((internal::is_same<typename internal::remove_const<MatrixType>::type,OriginalMatrixType>::value),
THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE) THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE)
ei_assert(RowFactor!=Dynamic && ColFactor!=Dynamic); eigen_assert(RowFactor!=Dynamic && ColFactor!=Dynamic);
} }
template<typename OriginalMatrixType> template<typename OriginalMatrixType>
inline Replicate(const OriginalMatrixType& matrix, int rowFactor, int colFactor) inline Replicate(const OriginalMatrixType& matrix, int rowFactor, int colFactor)
: m_matrix(matrix), m_rowFactor(rowFactor), m_colFactor(colFactor) : m_matrix(matrix), m_rowFactor(rowFactor), m_colFactor(colFactor)
{ {
EIGEN_STATIC_ASSERT((ei_is_same_type<MatrixType,OriginalMatrixType>::ret), EIGEN_STATIC_ASSERT((internal::is_same<typename internal::remove_const<MatrixType>::type,OriginalMatrixType>::value),
THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE) THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE)
} }
@@ -96,10 +100,10 @@ template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
inline Scalar coeff(Index row, Index col) const inline Scalar coeff(Index row, Index col) const
{ {
// try to avoid using modulo; this is a pure optimization strategy // try to avoid using modulo; this is a pure optimization strategy
const Index actual_row = ei_traits<MatrixType>::RowsAtCompileTime==1 ? 0 const Index actual_row = internal::traits<MatrixType>::RowsAtCompileTime==1 ? 0
: RowFactor==1 ? row : RowFactor==1 ? row
: row%m_matrix.rows(); : row%m_matrix.rows();
const Index actual_col = ei_traits<MatrixType>::ColsAtCompileTime==1 ? 0 const Index actual_col = internal::traits<MatrixType>::ColsAtCompileTime==1 ? 0
: ColFactor==1 ? col : ColFactor==1 ? col
: col%m_matrix.cols(); : col%m_matrix.cols();
@@ -108,10 +112,10 @@ template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
template<int LoadMode> template<int LoadMode>
inline PacketScalar packet(Index row, Index col) const inline PacketScalar packet(Index row, Index col) const
{ {
const Index actual_row = ei_traits<MatrixType>::RowsAtCompileTime==1 ? 0 const Index actual_row = internal::traits<MatrixType>::RowsAtCompileTime==1 ? 0
: RowFactor==1 ? row : RowFactor==1 ? row
: row%m_matrix.rows(); : row%m_matrix.rows();
const Index actual_col = ei_traits<MatrixType>::ColsAtCompileTime==1 ? 0 const Index actual_col = internal::traits<MatrixType>::ColsAtCompileTime==1 ? 0
: ColFactor==1 ? col : ColFactor==1 ? col
: col%m_matrix.cols(); : col%m_matrix.cols();
@@ -121,8 +125,8 @@ template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
protected: protected:
const typename MatrixType::Nested m_matrix; const typename MatrixType::Nested m_matrix;
const ei_variable_if_dynamic<Index, RowFactor> m_rowFactor; const internal::variable_if_dynamic<Index, RowFactor> m_rowFactor;
const ei_variable_if_dynamic<Index, ColFactor> m_colFactor; const internal::variable_if_dynamic<Index, ColFactor> m_colFactor;
}; };
/** /**

View File

@@ -27,45 +27,53 @@
#define EIGEN_RETURNBYVALUE_H #define EIGEN_RETURNBYVALUE_H
/** \class ReturnByValue /** \class ReturnByValue
* \ingroup Core_Module
* *
*/ */
namespace internal {
template<typename Derived> template<typename Derived>
struct ei_traits<ReturnByValue<Derived> > struct traits<ReturnByValue<Derived> >
: public ei_traits<typename ei_traits<Derived>::ReturnType> : public traits<typename traits<Derived>::ReturnType>
{ {
enum { enum {
// We're disabling the DirectAccess because e.g. the constructor of // We're disabling the DirectAccess because e.g. the constructor of
// the Block-with-DirectAccess expression requires to have a coeffRef method. // the Block-with-DirectAccess expression requires to have a coeffRef method.
// Also, we don't want to have to implement the stride stuff. // Also, we don't want to have to implement the stride stuff.
Flags = (ei_traits<typename ei_traits<Derived>::ReturnType>::Flags Flags = (traits<typename traits<Derived>::ReturnType>::Flags
| EvalBeforeNestingBit) & ~DirectAccessBit | EvalBeforeNestingBit) & ~DirectAccessBit
}; };
}; };
/* The ReturnByValue object doesn't even have a coeff() method. /* The ReturnByValue object doesn't even have a coeff() method.
* So the only way that nesting it in an expression can work, is by evaluating it into a plain matrix. * So the only way that nesting it in an expression can work, is by evaluating it into a plain matrix.
* So ei_nested always gives the plain return matrix type. * So internal::nested always gives the plain return matrix type.
*
* FIXME: I don't understand why we need this specialization: isn't this taken care of by the EvalBeforeNestingBit ??
*/ */
template<typename Derived,int n,typename PlainObject> template<typename Derived,int n,typename PlainObject>
struct ei_nested<ReturnByValue<Derived>, n, PlainObject> struct nested<ReturnByValue<Derived>, n, PlainObject>
{ {
typedef typename ei_traits<Derived>::ReturnType type; typedef typename traits<Derived>::ReturnType type;
}; };
} // end namespace internal
template<typename Derived> class ReturnByValue template<typename Derived> class ReturnByValue
: public ei_dense_xpr_base< ReturnByValue<Derived> >::type : public internal::dense_xpr_base< ReturnByValue<Derived> >::type
{ {
public: public:
typedef typename ei_traits<Derived>::ReturnType ReturnType; typedef typename internal::traits<Derived>::ReturnType ReturnType;
typedef typename ei_dense_xpr_base<ReturnByValue>::type Base; typedef typename internal::dense_xpr_base<ReturnByValue>::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(ReturnByValue) EIGEN_DENSE_PUBLIC_INTERFACE(ReturnByValue)
template<typename Dest> template<typename Dest>
inline void evalTo(Dest& dst) const inline void evalTo(Dest& dst) const
{ static_cast<const Derived* const>(this)->evalTo(dst); } { static_cast<const Derived*>(this)->evalTo(dst); }
inline Index rows() const { return static_cast<const Derived* const>(this)->rows(); } inline Index rows() const { return static_cast<const Derived*>(this)->rows(); }
inline Index cols() const { return static_cast<const Derived* const>(this)->cols(); } inline Index cols() const { return static_cast<const Derived*>(this)->cols(); }
#ifndef EIGEN_PARSED_BY_DOXYGEN #ifndef EIGEN_PARSED_BY_DOXYGEN
#define Unusable YOU_ARE_TRYING_TO_ACCESS_A_SINGLE_COEFFICIENT_IN_A_SPECIAL_EXPRESSION_WHERE_THAT_IS_NOT_ALLOWED_BECAUSE_THAT_WOULD_BE_INEFFICIENT #define Unusable YOU_ARE_TRYING_TO_ACCESS_A_SINGLE_COEFFICIENT_IN_A_SPECIAL_EXPRESSION_WHERE_THAT_IS_NOT_ALLOWED_BECAUSE_THAT_WOULD_BE_INEFFICIENT

View File

@@ -28,6 +28,7 @@
#define EIGEN_REVERSE_H #define EIGEN_REVERSE_H
/** \class Reverse /** \class Reverse
* \ingroup Core_Module
* *
* \brief Expression of the reverse of a vector or matrix * \brief Expression of the reverse of a vector or matrix
* *
@@ -39,15 +40,18 @@
* *
* \sa MatrixBase::reverse(), VectorwiseOp::reverse() * \sa MatrixBase::reverse(), VectorwiseOp::reverse()
*/ */
namespace internal {
template<typename MatrixType, int Direction> template<typename MatrixType, int Direction>
struct ei_traits<Reverse<MatrixType, Direction> > struct traits<Reverse<MatrixType, Direction> >
: ei_traits<MatrixType> : traits<MatrixType>
{ {
typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::Scalar Scalar;
typedef typename ei_traits<MatrixType>::StorageKind StorageKind; typedef typename traits<MatrixType>::StorageKind StorageKind;
typedef typename ei_traits<MatrixType>::XprKind XprKind; typedef typename traits<MatrixType>::XprKind XprKind;
typedef typename ei_nested<MatrixType>::type MatrixTypeNested; typedef typename nested<MatrixType>::type MatrixTypeNested;
typedef typename ei_unref<MatrixTypeNested>::type _MatrixTypeNested; typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;
enum { enum {
RowsAtCompileTime = MatrixType::RowsAtCompileTime, RowsAtCompileTime = MatrixType::RowsAtCompileTime,
ColsAtCompileTime = MatrixType::ColsAtCompileTime, ColsAtCompileTime = MatrixType::ColsAtCompileTime,
@@ -58,27 +62,30 @@ struct ei_traits<Reverse<MatrixType, Direction> >
LinearAccess = ( (Direction==BothDirections) && (int(_MatrixTypeNested::Flags)&PacketAccessBit) ) LinearAccess = ( (Direction==BothDirections) && (int(_MatrixTypeNested::Flags)&PacketAccessBit) )
? LinearAccessBit : 0, ? LinearAccessBit : 0,
Flags = int(_MatrixTypeNested::Flags) & (HereditaryBits | PacketAccessBit | LinearAccess), Flags = int(_MatrixTypeNested::Flags) & (HereditaryBits | LvalueBit | PacketAccessBit | LinearAccess),
CoeffReadCost = _MatrixTypeNested::CoeffReadCost CoeffReadCost = _MatrixTypeNested::CoeffReadCost
}; };
}; };
template<typename PacketScalar, bool ReversePacket> struct ei_reverse_packet_cond template<typename PacketScalar, bool ReversePacket> struct reverse_packet_cond
{ {
static inline PacketScalar run(const PacketScalar& x) { return ei_preverse(x); } static inline PacketScalar run(const PacketScalar& x) { return preverse(x); }
}; };
template<typename PacketScalar> struct ei_reverse_packet_cond<PacketScalar,false>
template<typename PacketScalar> struct reverse_packet_cond<PacketScalar,false>
{ {
static inline PacketScalar run(const PacketScalar& x) { return x; } static inline PacketScalar run(const PacketScalar& x) { return x; }
}; };
} // end namespace internal
template<typename MatrixType, int Direction> class Reverse template<typename MatrixType, int Direction> class Reverse
: public ei_dense_xpr_base< Reverse<MatrixType, Direction> >::type : public internal::dense_xpr_base< Reverse<MatrixType, Direction> >::type
{ {
public: public:
typedef typename ei_dense_xpr_base<Reverse>::type Base; typedef typename internal::dense_xpr_base<Reverse>::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(Reverse) EIGEN_DENSE_PUBLIC_INTERFACE(Reverse)
using Base::IsRowMajor; using Base::IsRowMajor;
@@ -88,7 +95,7 @@ template<typename MatrixType, int Direction> class Reverse
protected: protected:
enum { enum {
PacketSize = ei_packet_traits<Scalar>::size, PacketSize = internal::packet_traits<Scalar>::size,
IsColMajor = !IsRowMajor, IsColMajor = !IsRowMajor,
ReverseRow = (Direction == Vertical) || (Direction == BothDirections), ReverseRow = (Direction == Vertical) || (Direction == BothDirections),
ReverseCol = (Direction == Horizontal) || (Direction == BothDirections), ReverseCol = (Direction == Horizontal) || (Direction == BothDirections),
@@ -98,7 +105,7 @@ template<typename MatrixType, int Direction> class Reverse
|| ((Direction == Vertical) && IsColMajor) || ((Direction == Vertical) && IsColMajor)
|| ((Direction == Horizontal) && IsRowMajor) || ((Direction == Horizontal) && IsRowMajor)
}; };
typedef ei_reverse_packet_cond<PacketScalar,ReversePacket> reverse_packet; typedef internal::reverse_packet_cond<PacketScalar,ReversePacket> reverse_packet;
public: public:
inline Reverse(const MatrixType& matrix) : m_matrix(matrix) { } inline Reverse(const MatrixType& matrix) : m_matrix(matrix) { }
@@ -108,9 +115,14 @@ template<typename MatrixType, int Direction> class Reverse
inline Index rows() const { return m_matrix.rows(); } inline Index rows() const { return m_matrix.rows(); }
inline Index cols() const { return m_matrix.cols(); } inline Index cols() const { return m_matrix.cols(); }
inline Index innerStride() const
{
return -m_matrix.innerStride();
}
inline Scalar& operator()(Index row, Index col) inline Scalar& operator()(Index row, Index col)
{ {
ei_assert(row >= 0 && row < rows() && col >= 0 && col < cols()); eigen_assert(row >= 0 && row < rows() && col >= 0 && col < cols());
return coeffRef(row, col); return coeffRef(row, col);
} }
@@ -120,13 +132,13 @@ template<typename MatrixType, int Direction> class Reverse
ReverseCol ? m_matrix.cols() - col - 1 : col); ReverseCol ? m_matrix.cols() - col - 1 : col);
} }
inline const Scalar coeff(Index row, Index col) const inline CoeffReturnType coeff(Index row, Index col) const
{ {
return m_matrix.coeff(ReverseRow ? m_matrix.rows() - row - 1 : row, return m_matrix.coeff(ReverseRow ? m_matrix.rows() - row - 1 : row,
ReverseCol ? m_matrix.cols() - col - 1 : col); ReverseCol ? m_matrix.cols() - col - 1 : col);
} }
inline const Scalar coeff(Index index) const inline CoeffReturnType coeff(Index index) const
{ {
return m_matrix.coeff(m_matrix.size() - index - 1); return m_matrix.coeff(m_matrix.size() - index - 1);
} }
@@ -138,7 +150,7 @@ template<typename MatrixType, int Direction> class Reverse
inline Scalar& operator()(Index index) inline Scalar& operator()(Index index)
{ {
ei_assert(index >= 0 && index < m_matrix.size()); eigen_assert(index >= 0 && index < m_matrix.size());
return coeffRef(index); return coeffRef(index);
} }
@@ -162,13 +174,13 @@ template<typename MatrixType, int Direction> class Reverse
template<int LoadMode> template<int LoadMode>
inline const PacketScalar packet(Index index) const inline const PacketScalar packet(Index index) const
{ {
return ei_preverse(m_matrix.template packet<LoadMode>( m_matrix.size() - index - PacketSize )); return internal::preverse(m_matrix.template packet<LoadMode>( m_matrix.size() - index - PacketSize ));
} }
template<int LoadMode> template<int LoadMode>
inline void writePacket(Index index, const PacketScalar& x) inline void writePacket(Index index, const PacketScalar& x)
{ {
m_matrix.const_cast_derived().template writePacket<LoadMode>(m_matrix.size() - index - PacketSize, ei_preverse(x)); m_matrix.const_cast_derived().template writePacket<LoadMode>(m_matrix.size() - index - PacketSize, internal::preverse(x));
} }
protected: protected:
@@ -182,7 +194,7 @@ template<typename MatrixType, int Direction> class Reverse
* *
*/ */
template<typename Derived> template<typename Derived>
inline Reverse<Derived, BothDirections> inline typename DenseBase<Derived>::ReverseReturnType
DenseBase<Derived>::reverse() DenseBase<Derived>::reverse()
{ {
return derived(); return derived();
@@ -190,7 +202,7 @@ DenseBase<Derived>::reverse()
/** This is the const version of reverse(). */ /** This is the const version of reverse(). */
template<typename Derived> template<typename Derived>
inline const Reverse<Derived, BothDirections> inline const typename DenseBase<Derived>::ConstReverseReturnType
DenseBase<Derived>::reverse() const DenseBase<Derived>::reverse() const
{ {
return derived(); return derived();
@@ -204,7 +216,7 @@ DenseBase<Derived>::reverse() const
* the following additional features: * the following additional features:
* - less error prone: doing the same operation with .reverse() requires special care: * - less error prone: doing the same operation with .reverse() requires special care:
* \code m = m.reverse().eval(); \endcode * \code m = m.reverse().eval(); \endcode
* - no temporary object is created (currently there is one created but could be avoided using swap) * - this API allows to avoid creating a temporary (the current implementation creates a temporary, but that could be avoided using swap)
* - it allows future optimizations (cache friendliness, etc.) * - it allows future optimizations (cache friendliness, etc.)
* *
* \sa reverse() */ * \sa reverse() */

View File

@@ -26,6 +26,7 @@
#define EIGEN_SELECT_H #define EIGEN_SELECT_H
/** \class Select /** \class Select
* \ingroup Core_Module
* *
* \brief Expression of a coefficient wise version of the C++ ternary operator ?: * \brief Expression of a coefficient wise version of the C++ ternary operator ?:
* *
@@ -39,13 +40,14 @@
* \sa DenseBase::select(const DenseBase<ThenDerived>&, const DenseBase<ElseDerived>&) const * \sa DenseBase::select(const DenseBase<ThenDerived>&, const DenseBase<ElseDerived>&) const
*/ */
namespace internal {
template<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType> template<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType>
struct ei_traits<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> > struct traits<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
: ei_traits<ThenMatrixType> : traits<ThenMatrixType>
{ {
typedef typename ei_traits<ThenMatrixType>::Scalar Scalar; typedef typename traits<ThenMatrixType>::Scalar Scalar;
typedef Dense StorageKind; typedef Dense StorageKind;
typedef typename ei_traits<ThenMatrixType>::XprKind XprKind; typedef typename traits<ThenMatrixType>::XprKind XprKind;
typedef typename ConditionMatrixType::Nested ConditionMatrixNested; typedef typename ConditionMatrixType::Nested ConditionMatrixNested;
typedef typename ThenMatrixType::Nested ThenMatrixNested; typedef typename ThenMatrixType::Nested ThenMatrixNested;
typedef typename ElseMatrixType::Nested ElseMatrixNested; typedef typename ElseMatrixType::Nested ElseMatrixNested;
@@ -55,19 +57,20 @@ struct ei_traits<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
MaxRowsAtCompileTime = ConditionMatrixType::MaxRowsAtCompileTime, MaxRowsAtCompileTime = ConditionMatrixType::MaxRowsAtCompileTime,
MaxColsAtCompileTime = ConditionMatrixType::MaxColsAtCompileTime, MaxColsAtCompileTime = ConditionMatrixType::MaxColsAtCompileTime,
Flags = (unsigned int)ThenMatrixType::Flags & ElseMatrixType::Flags & HereditaryBits, Flags = (unsigned int)ThenMatrixType::Flags & ElseMatrixType::Flags & HereditaryBits,
CoeffReadCost = ei_traits<typename ei_cleantype<ConditionMatrixNested>::type>::CoeffReadCost CoeffReadCost = traits<typename remove_all<ConditionMatrixNested>::type>::CoeffReadCost
+ EIGEN_SIZE_MAX(ei_traits<typename ei_cleantype<ThenMatrixNested>::type>::CoeffReadCost, + EIGEN_SIZE_MAX(traits<typename remove_all<ThenMatrixNested>::type>::CoeffReadCost,
ei_traits<typename ei_cleantype<ElseMatrixNested>::type>::CoeffReadCost) traits<typename remove_all<ElseMatrixNested>::type>::CoeffReadCost)
}; };
}; };
}
template<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType> template<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType>
class Select : ei_no_assignment_operator, class Select : internal::no_assignment_operator,
public ei_dense_xpr_base< Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >::type public internal::dense_xpr_base< Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >::type
{ {
public: public:
typedef typename ei_dense_xpr_base<Select>::type Base; typedef typename internal::dense_xpr_base<Select>::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(Select) EIGEN_DENSE_PUBLIC_INTERFACE(Select)
Select(const ConditionMatrixType& conditionMatrix, Select(const ConditionMatrixType& conditionMatrix,
@@ -75,8 +78,8 @@ class Select : ei_no_assignment_operator,
const ElseMatrixType& elseMatrix) const ElseMatrixType& elseMatrix)
: m_condition(conditionMatrix), m_then(thenMatrix), m_else(elseMatrix) : m_condition(conditionMatrix), m_then(thenMatrix), m_else(elseMatrix)
{ {
ei_assert(m_condition.rows() == m_then.rows() && m_condition.rows() == m_else.rows()); eigen_assert(m_condition.rows() == m_then.rows() && m_condition.rows() == m_else.rows());
ei_assert(m_condition.cols() == m_then.cols() && m_condition.cols() == m_else.cols()); eigen_assert(m_condition.cols() == m_then.cols() && m_condition.cols() == m_else.cols());
} }
Index rows() const { return m_condition.rows(); } Index rows() const { return m_condition.rows(); }

View File

@@ -26,6 +26,7 @@
#define EIGEN_SELFADJOINTMATRIX_H #define EIGEN_SELFADJOINTMATRIX_H
/** \class SelfAdjointView /** \class SelfAdjointView
* \ingroup Core_Module
* *
* *
* \brief Expression of a selfadjoint matrix from a triangular part of a dense matrix * \brief Expression of a selfadjoint matrix from a triangular part of a dense matrix
@@ -39,19 +40,23 @@
* *
* \sa class TriangularBase, MatrixBase::selfAdjointView() * \sa class TriangularBase, MatrixBase::selfAdjointView()
*/ */
namespace internal {
template<typename MatrixType, unsigned int UpLo> template<typename MatrixType, unsigned int UpLo>
struct ei_traits<SelfAdjointView<MatrixType, UpLo> > : ei_traits<MatrixType> struct traits<SelfAdjointView<MatrixType, UpLo> > : traits<MatrixType>
{ {
typedef typename ei_nested<MatrixType>::type MatrixTypeNested; typedef typename nested<MatrixType>::type MatrixTypeNested;
typedef typename ei_unref<MatrixTypeNested>::type _MatrixTypeNested; typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
typedef MatrixType ExpressionType; typedef MatrixType ExpressionType;
typedef typename MatrixType::PlainObject DenseMatrixType;
enum { enum {
Mode = UpLo | SelfAdjoint, Mode = UpLo | SelfAdjoint,
Flags = _MatrixTypeNested::Flags & (HereditaryBits) Flags = MatrixTypeNestedCleaned::Flags & (HereditaryBits)
& (~(PacketAccessBit | DirectAccessBit | LinearAccessBit)), // FIXME these flags should be preserved & (~(PacketAccessBit | DirectAccessBit | LinearAccessBit)), // FIXME these flags should be preserved
CoeffReadCost = _MatrixTypeNested::CoeffReadCost CoeffReadCost = MatrixTypeNestedCleaned::CoeffReadCost
}; };
}; };
}
template <typename Lhs, int LhsMode, bool LhsIsVector, template <typename Lhs, int LhsMode, bool LhsIsVector,
typename Rhs, int RhsMode, bool RhsIsVector> typename Rhs, int RhsMode, bool RhsIsVector>
@@ -64,16 +69,21 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
public: public:
typedef TriangularBase<SelfAdjointView> Base; typedef TriangularBase<SelfAdjointView> Base;
typedef typename ei_traits<SelfAdjointView>::Scalar Scalar; typedef typename internal::traits<SelfAdjointView>::MatrixTypeNested MatrixTypeNested;
typedef typename internal::traits<SelfAdjointView>::MatrixTypeNestedCleaned MatrixTypeNestedCleaned;
/** \brief The type of coefficients in this matrix */
typedef typename internal::traits<SelfAdjointView>::Scalar Scalar;
typedef typename MatrixType::Index Index; typedef typename MatrixType::Index Index;
enum { enum {
Mode = ei_traits<SelfAdjointView>::Mode Mode = internal::traits<SelfAdjointView>::Mode
}; };
typedef typename MatrixType::PlainObject PlainObject; typedef typename MatrixType::PlainObject PlainObject;
inline SelfAdjointView(const MatrixType& matrix) : m_matrix(matrix) inline SelfAdjointView(const MatrixType& matrix) : m_matrix(matrix)
{ ei_assert(ei_are_flags_consistent<Mode>::ret); } {}
inline Index rows() const { return m_matrix.rows(); } inline Index rows() const { return m_matrix.rows(); }
inline Index cols() const { return m_matrix.cols(); } inline Index cols() const { return m_matrix.cols(); }
@@ -99,10 +109,10 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
} }
/** \internal */ /** \internal */
const MatrixType& _expression() const { return m_matrix; } const MatrixTypeNestedCleaned& _expression() const { return m_matrix; }
const MatrixType& nestedExpression() const { return m_matrix; } const MatrixTypeNestedCleaned& nestedExpression() const { return m_matrix; }
MatrixType& nestedExpression() { return const_cast<MatrixType&>(m_matrix); } MatrixTypeNestedCleaned& nestedExpression() { return *const_cast<MatrixTypeNestedCleaned*>(&m_matrix); }
/** Efficient self-adjoint matrix times vector/matrix product */ /** Efficient self-adjoint matrix times vector/matrix product */
template<typename OtherDerived> template<typename OtherDerived>
@@ -125,7 +135,7 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
} }
/** Perform a symmetric rank 2 update of the selfadjoint matrix \c *this: /** Perform a symmetric rank 2 update of the selfadjoint matrix \c *this:
* \f$ this = this + \alpha ( u v^* + v u^*) \f$ * \f$ this = this + \alpha u v^* + conj(\alpha) v u^* \f$
* \returns a reference to \c *this * \returns a reference to \c *this
* *
* The vectors \a u and \c v \b must be column vectors, however they can be * The vectors \a u and \c v \b must be column vectors, however they can be
@@ -160,27 +170,52 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
/** Real part of #Scalar */ /** Real part of #Scalar */
typedef typename NumTraits<Scalar>::Real RealScalar; typedef typename NumTraits<Scalar>::Real RealScalar;
/** Return type of eigenvalues() */ /** Return type of eigenvalues() */
typedef Matrix<RealScalar, ei_traits<MatrixType>::ColsAtCompileTime, 1> EigenvaluesReturnType; typedef Matrix<RealScalar, internal::traits<MatrixType>::ColsAtCompileTime, 1> EigenvaluesReturnType;
EigenvaluesReturnType eigenvalues() const; EigenvaluesReturnType eigenvalues() const;
RealScalar operatorNorm() const; RealScalar operatorNorm() const;
#ifdef EIGEN2_SUPPORT
template<typename OtherDerived>
SelfAdjointView& operator=(const MatrixBase<OtherDerived>& other)
{
enum {
OtherPart = UpLo == Upper ? StrictlyLower : StrictlyUpper
};
m_matrix.const_cast_derived().template triangularView<UpLo>() = other;
m_matrix.const_cast_derived().template triangularView<OtherPart>() = other.adjoint();
return *this;
}
template<typename OtherMatrixType, unsigned int OtherMode>
SelfAdjointView& operator=(const TriangularView<OtherMatrixType, OtherMode>& other)
{
enum {
OtherPart = UpLo == Upper ? StrictlyLower : StrictlyUpper
};
m_matrix.const_cast_derived().template triangularView<UpLo>() = other.toDenseMatrix();
m_matrix.const_cast_derived().template triangularView<OtherPart>() = other.toDenseMatrix().adjoint();
return *this;
}
#endif
protected: protected:
const typename MatrixType::Nested m_matrix; const MatrixTypeNested m_matrix;
}; };
// template<typename OtherDerived, typename MatrixType, unsigned int UpLo> // template<typename OtherDerived, typename MatrixType, unsigned int UpLo>
// ei_selfadjoint_matrix_product_returntype<OtherDerived,SelfAdjointView<MatrixType,UpLo> > // internal::selfadjoint_matrix_product_returntype<OtherDerived,SelfAdjointView<MatrixType,UpLo> >
// operator*(const MatrixBase<OtherDerived>& lhs, const SelfAdjointView<MatrixType,UpLo>& rhs) // operator*(const MatrixBase<OtherDerived>& lhs, const SelfAdjointView<MatrixType,UpLo>& rhs)
// { // {
// return ei_matrix_selfadjoint_product_returntype<OtherDerived,SelfAdjointView<MatrixType,UpLo> >(lhs.derived(),rhs); // return internal::matrix_selfadjoint_product_returntype<OtherDerived,SelfAdjointView<MatrixType,UpLo> >(lhs.derived(),rhs);
// } // }
// selfadjoint to dense matrix // selfadjoint to dense matrix
namespace internal {
template<typename Derived1, typename Derived2, int UnrollCount, bool ClearOpposite> template<typename Derived1, typename Derived2, int UnrollCount, bool ClearOpposite>
struct ei_triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), UnrollCount, ClearOpposite> struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), UnrollCount, ClearOpposite>
{ {
enum { enum {
col = (UnrollCount-1) / Derived1::RowsAtCompileTime, col = (UnrollCount-1) / Derived1::RowsAtCompileTime,
@@ -189,23 +224,23 @@ struct ei_triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper)
inline static void run(Derived1 &dst, const Derived2 &src) inline static void run(Derived1 &dst, const Derived2 &src)
{ {
ei_triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), UnrollCount-1, ClearOpposite>::run(dst, src); triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), UnrollCount-1, ClearOpposite>::run(dst, src);
if(row == col) if(row == col)
dst.coeffRef(row, col) = ei_real(src.coeff(row, col)); dst.coeffRef(row, col) = real(src.coeff(row, col));
else if(row < col) else if(row < col)
dst.coeffRef(col, row) = ei_conj(dst.coeffRef(row, col) = src.coeff(row, col)); dst.coeffRef(col, row) = conj(dst.coeffRef(row, col) = src.coeff(row, col));
} }
}; };
template<typename Derived1, typename Derived2, bool ClearOpposite> template<typename Derived1, typename Derived2, bool ClearOpposite>
struct ei_triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, 0, ClearOpposite> struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, 0, ClearOpposite>
{ {
inline static void run(Derived1 &, const Derived2 &) {} inline static void run(Derived1 &, const Derived2 &) {}
}; };
template<typename Derived1, typename Derived2, int UnrollCount, bool ClearOpposite> template<typename Derived1, typename Derived2, int UnrollCount, bool ClearOpposite>
struct ei_triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), UnrollCount, ClearOpposite> struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), UnrollCount, ClearOpposite>
{ {
enum { enum {
col = (UnrollCount-1) / Derived1::RowsAtCompileTime, col = (UnrollCount-1) / Derived1::RowsAtCompileTime,
@@ -214,23 +249,23 @@ struct ei_triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower)
inline static void run(Derived1 &dst, const Derived2 &src) inline static void run(Derived1 &dst, const Derived2 &src)
{ {
ei_triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), UnrollCount-1, ClearOpposite>::run(dst, src); triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), UnrollCount-1, ClearOpposite>::run(dst, src);
if(row == col) if(row == col)
dst.coeffRef(row, col) = ei_real(src.coeff(row, col)); dst.coeffRef(row, col) = real(src.coeff(row, col));
else if(row > col) else if(row > col)
dst.coeffRef(col, row) = ei_conj(dst.coeffRef(row, col) = src.coeff(row, col)); dst.coeffRef(col, row) = conj(dst.coeffRef(row, col) = src.coeff(row, col));
} }
}; };
template<typename Derived1, typename Derived2, bool ClearOpposite> template<typename Derived1, typename Derived2, bool ClearOpposite>
struct ei_triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Lower, 0, ClearOpposite> struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Lower, 0, ClearOpposite>
{ {
inline static void run(Derived1 &, const Derived2 &) {} inline static void run(Derived1 &, const Derived2 &) {}
}; };
template<typename Derived1, typename Derived2, bool ClearOpposite> template<typename Derived1, typename Derived2, bool ClearOpposite>
struct ei_triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, Dynamic, ClearOpposite> struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, Dynamic, ClearOpposite>
{ {
typedef typename Derived1::Index Index; typedef typename Derived1::Index Index;
inline static void run(Derived1 &dst, const Derived2 &src) inline static void run(Derived1 &dst, const Derived2 &src)
@@ -240,7 +275,7 @@ struct ei_triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper,
for(Index i = 0; i < j; ++i) for(Index i = 0; i < j; ++i)
{ {
dst.copyCoeff(i, j, src); dst.copyCoeff(i, j, src);
dst.coeffRef(j,i) = ei_conj(dst.coeff(i,j)); dst.coeffRef(j,i) = conj(dst.coeff(i,j));
} }
dst.copyCoeff(j, j, src); dst.copyCoeff(j, j, src);
} }
@@ -248,7 +283,7 @@ struct ei_triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper,
}; };
template<typename Derived1, typename Derived2, bool ClearOpposite> template<typename Derived1, typename Derived2, bool ClearOpposite>
struct ei_triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Lower, Dynamic, ClearOpposite> struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Lower, Dynamic, ClearOpposite>
{ {
inline static void run(Derived1 &dst, const Derived2 &src) inline static void run(Derived1 &dst, const Derived2 &src)
{ {
@@ -258,27 +293,31 @@ struct ei_triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Lower,
for(Index j = 0; j < i; ++j) for(Index j = 0; j < i; ++j)
{ {
dst.copyCoeff(i, j, src); dst.copyCoeff(i, j, src);
dst.coeffRef(j,i) = ei_conj(dst.coeff(i,j)); dst.coeffRef(j,i) = conj(dst.coeff(i,j));
} }
dst.copyCoeff(i, i, src); dst.copyCoeff(i, i, src);
} }
} }
}; };
} // end namespace internal
/*************************************************************************** /***************************************************************************
* Implementation of MatrixBase methods * Implementation of MatrixBase methods
***************************************************************************/ ***************************************************************************/
template<typename Derived> template<typename Derived>
template<unsigned int UpLo> template<unsigned int UpLo>
const SelfAdjointView<Derived, UpLo> MatrixBase<Derived>::selfadjointView() const typename MatrixBase<Derived>::template ConstSelfAdjointViewReturnType<UpLo>::Type
MatrixBase<Derived>::selfadjointView() const
{ {
return derived(); return derived();
} }
template<typename Derived> template<typename Derived>
template<unsigned int UpLo> template<unsigned int UpLo>
SelfAdjointView<Derived, UpLo> MatrixBase<Derived>::selfadjointView() typename MatrixBase<Derived>::template SelfAdjointViewReturnType<UpLo>::Type
MatrixBase<Derived>::selfadjointView()
{ {
return derived(); return derived();
} }

View File

@@ -26,6 +26,7 @@
#define EIGEN_SELFCWISEBINARYOP_H #define EIGEN_SELFCWISEBINARYOP_H
/** \class SelfCwiseBinaryOp /** \class SelfCwiseBinaryOp
* \ingroup Core_Module
* *
* \internal * \internal
* *
@@ -38,25 +39,33 @@
* *
* \sa class SwapWrapper for a similar trick. * \sa class SwapWrapper for a similar trick.
*/ */
template<typename BinaryOp, typename MatrixType>
struct ei_traits<SelfCwiseBinaryOp<BinaryOp,MatrixType> > : ei_traits<MatrixType>
{
};
template<typename BinaryOp, typename MatrixType> class SelfCwiseBinaryOp namespace internal {
: public ei_dense_xpr_base< SelfCwiseBinaryOp<BinaryOp, MatrixType> >::type template<typename BinaryOp, typename Lhs, typename Rhs>
struct traits<SelfCwiseBinaryOp<BinaryOp,Lhs,Rhs> >
: traits<CwiseBinaryOp<BinaryOp,Lhs,Rhs> >
{
enum {
// Note that it is still a good idea to preserve the DirectAccessBit
// so that assign can correctly align the data.
Flags = traits<CwiseBinaryOp<BinaryOp,Lhs,Rhs> >::Flags | (Lhs::Flags&DirectAccessBit) | (Lhs::Flags&LvalueBit),
OuterStrideAtCompileTime = Lhs::OuterStrideAtCompileTime,
InnerStrideAtCompileTime = Lhs::InnerStrideAtCompileTime
};
};
}
template<typename BinaryOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp
: public internal::dense_xpr_base< SelfCwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type
{ {
public: public:
typedef typename ei_dense_xpr_base<SelfCwiseBinaryOp>::type Base; typedef typename internal::dense_xpr_base<SelfCwiseBinaryOp>::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(SelfCwiseBinaryOp) EIGEN_DENSE_PUBLIC_INTERFACE(SelfCwiseBinaryOp)
typedef typename ei_packet_traits<Scalar>::type Packet; typedef typename internal::packet_traits<Scalar>::type Packet;
using Base::operator=; inline SelfCwiseBinaryOp(Lhs& xpr, const BinaryOp& func = BinaryOp()) : m_matrix(xpr), m_functor(func) {}
inline SelfCwiseBinaryOp(MatrixType& xpr, const BinaryOp& func = BinaryOp()) : m_matrix(xpr), m_functor(func) {}
inline Index rows() const { return m_matrix.rows(); } inline Index rows() const { return m_matrix.rows(); }
inline Index cols() const { return m_matrix.cols(); } inline Index cols() const { return m_matrix.cols(); }
@@ -68,12 +77,22 @@ template<typename BinaryOp, typename MatrixType> class SelfCwiseBinaryOp
// TODO make Assign use .data() // TODO make Assign use .data()
inline Scalar& coeffRef(Index row, Index col) inline Scalar& coeffRef(Index row, Index col)
{ {
EIGEN_STATIC_ASSERT_LVALUE(Lhs)
return m_matrix.const_cast_derived().coeffRef(row, col); return m_matrix.const_cast_derived().coeffRef(row, col);
} }
inline const Scalar& coeffRef(Index row, Index col) const
{
return m_matrix.coeffRef(row, col);
}
// note that this function is needed by assign to correctly align loads/stores // note that this function is needed by assign to correctly align loads/stores
// TODO make Assign use .data() // TODO make Assign use .data()
inline Scalar& coeffRef(Index index) inline Scalar& coeffRef(Index index)
{
EIGEN_STATIC_ASSERT_LVALUE(Lhs)
return m_matrix.const_cast_derived().coeffRef(index);
}
inline const Scalar& coeffRef(Index index) const
{ {
return m_matrix.const_cast_derived().coeffRef(index); return m_matrix.const_cast_derived().coeffRef(index);
} }
@@ -82,7 +101,7 @@ template<typename BinaryOp, typename MatrixType> class SelfCwiseBinaryOp
void copyCoeff(Index row, Index col, const DenseBase<OtherDerived>& other) void copyCoeff(Index row, Index col, const DenseBase<OtherDerived>& other)
{ {
OtherDerived& _other = other.const_cast_derived(); OtherDerived& _other = other.const_cast_derived();
ei_internal_assert(row >= 0 && row < rows() eigen_internal_assert(row >= 0 && row < rows()
&& col >= 0 && col < cols()); && col >= 0 && col < cols());
Scalar& tmp = m_matrix.coeffRef(row,col); Scalar& tmp = m_matrix.coeffRef(row,col);
tmp = m_functor(tmp, _other.coeff(row,col)); tmp = m_functor(tmp, _other.coeff(row,col));
@@ -92,7 +111,7 @@ template<typename BinaryOp, typename MatrixType> class SelfCwiseBinaryOp
void copyCoeff(Index index, const DenseBase<OtherDerived>& other) void copyCoeff(Index index, const DenseBase<OtherDerived>& other)
{ {
OtherDerived& _other = other.const_cast_derived(); OtherDerived& _other = other.const_cast_derived();
ei_internal_assert(index >= 0 && index < m_matrix.size()); eigen_internal_assert(index >= 0 && index < m_matrix.size());
Scalar& tmp = m_matrix.coeffRef(index); Scalar& tmp = m_matrix.coeffRef(index);
tmp = m_functor(tmp, _other.coeff(index)); tmp = m_functor(tmp, _other.coeff(index));
} }
@@ -101,7 +120,7 @@ template<typename BinaryOp, typename MatrixType> class SelfCwiseBinaryOp
void copyPacket(Index row, Index col, const DenseBase<OtherDerived>& other) void copyPacket(Index row, Index col, const DenseBase<OtherDerived>& other)
{ {
OtherDerived& _other = other.const_cast_derived(); OtherDerived& _other = other.const_cast_derived();
ei_internal_assert(row >= 0 && row < rows() eigen_internal_assert(row >= 0 && row < rows()
&& col >= 0 && col < cols()); && col >= 0 && col < cols());
m_matrix.template writePacket<StoreMode>(row, col, m_matrix.template writePacket<StoreMode>(row, col,
m_functor.packetOp(m_matrix.template packet<StoreMode>(row, col),_other.template packet<LoadMode>(row, col)) ); m_functor.packetOp(m_matrix.template packet<StoreMode>(row, col),_other.template packet<LoadMode>(row, col)) );
@@ -111,7 +130,7 @@ template<typename BinaryOp, typename MatrixType> class SelfCwiseBinaryOp
void copyPacket(Index index, const DenseBase<OtherDerived>& other) void copyPacket(Index index, const DenseBase<OtherDerived>& other)
{ {
OtherDerived& _other = other.const_cast_derived(); OtherDerived& _other = other.const_cast_derived();
ei_internal_assert(index >= 0 && index < m_matrix.size()); eigen_internal_assert(index >= 0 && index < m_matrix.size());
m_matrix.template writePacket<StoreMode>(index, m_matrix.template writePacket<StoreMode>(index,
m_functor.packetOp(m_matrix.template packet<StoreMode>(index),_other.template packet<LoadMode>(index)) ); m_functor.packetOp(m_matrix.template packet<StoreMode>(index),_other.template packet<LoadMode>(index)) );
} }
@@ -121,26 +140,31 @@ template<typename BinaryOp, typename MatrixType> class SelfCwiseBinaryOp
template<typename RhsDerived> template<typename RhsDerived>
EIGEN_STRONG_INLINE SelfCwiseBinaryOp& lazyAssign(const DenseBase<RhsDerived>& rhs) EIGEN_STRONG_INLINE SelfCwiseBinaryOp& lazyAssign(const DenseBase<RhsDerived>& rhs)
{ {
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(MatrixType,RhsDerived) EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Lhs,RhsDerived)
EIGEN_CHECK_BINARY_COMPATIBILIY(BinaryOp,typename Lhs::Scalar,typename RhsDerived::Scalar);
EIGEN_STATIC_ASSERT((ei_functor_allows_mixing_real_and_complex<BinaryOp>::ret
? int(ei_is_same_type<typename MatrixType::RealScalar, typename RhsDerived::RealScalar>::ret)
: int(ei_is_same_type<typename MatrixType::Scalar, typename RhsDerived::Scalar>::ret)),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
#ifdef EIGEN_DEBUG_ASSIGN #ifdef EIGEN_DEBUG_ASSIGN
ei_assign_traits<SelfCwiseBinaryOp, RhsDerived>::debug(); internal::assign_traits<SelfCwiseBinaryOp, RhsDerived>::debug();
#endif #endif
ei_assert(rows() == rhs.rows() && cols() == rhs.cols()); eigen_assert(rows() == rhs.rows() && cols() == rhs.cols());
ei_assign_impl<SelfCwiseBinaryOp, RhsDerived>::run(*this,rhs.derived()); internal::assign_impl<SelfCwiseBinaryOp, RhsDerived>::run(*this,rhs.derived());
#ifndef EIGEN_NO_DEBUG #ifndef EIGEN_NO_DEBUG
this->checkTransposeAliasing(rhs.derived()); this->checkTransposeAliasing(rhs.derived());
#endif #endif
return *this; return *this;
} }
// overloaded to honor evaluation of special matrices
// maybe another solution would be to not use SelfCwiseBinaryOp
// at first...
SelfCwiseBinaryOp& operator=(const Rhs& _rhs)
{
typename internal::nested<Rhs>::type rhs(_rhs);
return Base::operator=(rhs);
}
protected: protected:
MatrixType& m_matrix; Lhs& m_matrix;
const BinaryOp& m_functor; const BinaryOp& m_functor;
private: private:
@@ -150,8 +174,8 @@ template<typename BinaryOp, typename MatrixType> class SelfCwiseBinaryOp
template<typename Derived> template<typename Derived>
inline Derived& DenseBase<Derived>::operator*=(const Scalar& other) inline Derived& DenseBase<Derived>::operator*=(const Scalar& other)
{ {
SelfCwiseBinaryOp<ei_scalar_product_op<Scalar>, Derived> tmp(derived());
typedef typename Derived::PlainObject PlainObject; typedef typename Derived::PlainObject PlainObject;
SelfCwiseBinaryOp<internal::scalar_product_op<Scalar>, Derived, typename PlainObject::ConstantReturnType> tmp(derived());
tmp = PlainObject::Constant(rows(),cols(),other); tmp = PlainObject::Constant(rows(),cols(),other);
return derived(); return derived();
} }
@@ -159,10 +183,11 @@ inline Derived& DenseBase<Derived>::operator*=(const Scalar& other)
template<typename Derived> template<typename Derived>
inline Derived& DenseBase<Derived>::operator/=(const Scalar& other) inline Derived& DenseBase<Derived>::operator/=(const Scalar& other)
{ {
SelfCwiseBinaryOp<typename ei_meta_if<NumTraits<Scalar>::IsInteger, typedef typename internal::conditional<NumTraits<Scalar>::IsInteger,
ei_scalar_quotient_op<Scalar>, internal::scalar_quotient_op<Scalar>,
ei_scalar_product_op<Scalar> >::ret, Derived> tmp(derived()); internal::scalar_product_op<Scalar> >::type BinOp;
typedef typename Derived::PlainObject PlainObject; typedef typename Derived::PlainObject PlainObject;
SelfCwiseBinaryOp<BinOp, Derived, typename PlainObject::ConstantReturnType> tmp(derived());
tmp = PlainObject::Constant(rows(),cols(), NumTraits<Scalar>::IsInteger ? other : Scalar(1)/other); tmp = PlainObject::Constant(rows(),cols(), NumTraits<Scalar>::IsInteger ? other : Scalar(1)/other);
return derived(); return derived();
} }

View File

@@ -25,8 +25,19 @@
#ifndef EIGEN_SOLVETRIANGULAR_H #ifndef EIGEN_SOLVETRIANGULAR_H
#define EIGEN_SOLVETRIANGULAR_H #define EIGEN_SOLVETRIANGULAR_H
namespace internal {
// Forward declarations:
// The following two routines are implemented in the products/TriangularSolver*.h files
template<typename LhsScalar, typename RhsScalar, typename Index, int Side, int Mode, bool Conjugate, int StorageOrder>
struct triangular_solve_vector;
template <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder, int OtherStorageOrder>
struct triangular_solve_matrix;
// small helper struct extracting some traits on the underlying solver operation
template<typename Lhs, typename Rhs, int Side> template<typename Lhs, typename Rhs, int Side>
class ei_trsolve_traits class trsolve_traits
{ {
private: private:
enum { enum {
@@ -43,150 +54,63 @@ class ei_trsolve_traits
template<typename Lhs, typename Rhs, template<typename Lhs, typename Rhs,
int Side, // can be OnTheLeft/OnTheRight int Side, // can be OnTheLeft/OnTheRight
int Mode, // can be Upper/Lower | UnitDiag int Mode, // can be Upper/Lower | UnitDiag
int Unrolling = ei_trsolve_traits<Lhs,Rhs,Side>::Unrolling, int Unrolling = trsolve_traits<Lhs,Rhs,Side>::Unrolling,
int StorageOrder = (int(Lhs::Flags) & RowMajorBit) ? RowMajor : ColMajor, int RhsVectors = trsolve_traits<Lhs,Rhs,Side>::RhsVectors
int RhsVectors = ei_trsolve_traits<Lhs,Rhs,Side>::RhsVectors
> >
struct ei_triangular_solver_selector; struct triangular_solver_selector;
// forward and backward substitution, row-major, rhs is a vector template<typename Lhs, typename Rhs, int Side, int Mode>
template<typename Lhs, typename Rhs, int Mode> struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,1>
struct ei_triangular_solver_selector<Lhs,Rhs,OnTheLeft,Mode,NoUnrolling,RowMajor,1>
{ {
typedef typename Rhs::Scalar Scalar; typedef typename Lhs::Scalar LhsScalar;
typedef ei_blas_traits<Lhs> LhsProductTraits; typedef typename Rhs::Scalar RhsScalar;
typedef blas_traits<Lhs> LhsProductTraits;
typedef typename LhsProductTraits::ExtractType ActualLhsType; typedef typename LhsProductTraits::ExtractType ActualLhsType;
typedef typename Lhs::Index Index; typedef Map<Matrix<RhsScalar,Dynamic,1>, Aligned> MappedRhs;
enum {
IsLower = ((Mode&Lower)==Lower)
};
static void run(const Lhs& lhs, Rhs& other)
{
static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
ActualLhsType actualLhs = LhsProductTraits::extract(lhs);
const Index size = lhs.cols();
for(Index pi=IsLower ? 0 : size;
IsLower ? pi<size : pi>0;
IsLower ? pi+=PanelWidth : pi-=PanelWidth)
{
Index actualPanelWidth = std::min(IsLower ? size - pi : pi, PanelWidth);
Index r = IsLower ? pi : size - pi; // remaining size
if (r > 0)
{
// let's directly call the low level product function because:
// 1 - it is faster to compile
// 2 - it is slighlty faster at runtime
Index startRow = IsLower ? pi : pi-actualPanelWidth;
Index startCol = IsLower ? 0 : pi;
VectorBlock<Rhs,Dynamic> target(other,startRow,actualPanelWidth);
ei_cache_friendly_product_rowmajor_times_vector<LhsProductTraits::NeedToConjugate,false>(
&(actualLhs.const_cast_derived().coeffRef(startRow,startCol)), actualLhs.outerStride(),
&(other.coeffRef(startCol)), r,
target, Scalar(-1));
}
for(Index k=0; k<actualPanelWidth; ++k)
{
Index i = IsLower ? pi+k : pi-k-1;
Index s = IsLower ? pi : i+1;
if (k>0)
other.coeffRef(i) -= (lhs.row(i).segment(s,k).transpose().cwiseProduct(other.segment(s,k))).sum();
if(!(Mode & UnitDiag))
other.coeffRef(i) /= lhs.coeff(i,i);
}
}
}
};
// forward and backward substitution, column-major, rhs is a vector
template<typename Lhs, typename Rhs, int Mode>
struct ei_triangular_solver_selector<Lhs,Rhs,OnTheLeft,Mode,NoUnrolling,ColMajor,1>
{
typedef typename Rhs::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type Packet;
typedef ei_blas_traits<Lhs> LhsProductTraits;
typedef typename LhsProductTraits::ExtractType ActualLhsType;
typedef typename Lhs::Index Index;
enum {
PacketSize = ei_packet_traits<Scalar>::size,
IsLower = ((Mode&Lower)==Lower)
};
static void run(const Lhs& lhs, Rhs& other)
{
static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
ActualLhsType actualLhs = LhsProductTraits::extract(lhs);
const Index size = lhs.cols();
for(Index pi=IsLower ? 0 : size;
IsLower ? pi<size : pi>0;
IsLower ? pi+=PanelWidth : pi-=PanelWidth)
{
Index actualPanelWidth = std::min(IsLower ? size - pi : pi, PanelWidth);
Index startBlock = IsLower ? pi : pi-actualPanelWidth;
Index endBlock = IsLower ? pi + actualPanelWidth : 0;
for(Index k=0; k<actualPanelWidth; ++k)
{
Index i = IsLower ? pi+k : pi-k-1;
if(!(Mode & UnitDiag))
other.coeffRef(i) /= lhs.coeff(i,i);
Index r = actualPanelWidth - k - 1; // remaining size
Index s = IsLower ? i+1 : i-r;
if (r>0)
other.segment(s,r) -= other.coeffRef(i) * Block<Lhs,Dynamic,1>(lhs, s, i, r, 1);
}
Index r = IsLower ? size - endBlock : startBlock; // remaining size
if (r > 0)
{
// let's directly call the low level product function because:
// 1 - it is faster to compile
// 2 - it is slighlty faster at runtime
ei_cache_friendly_product_colmajor_times_vector<LhsProductTraits::NeedToConjugate,false>(
r,
&(actualLhs.const_cast_derived().coeffRef(endBlock,startBlock)), actualLhs.outerStride(),
other.segment(startBlock, actualPanelWidth),
&(other.coeffRef(endBlock, 0)),
Scalar(-1));
}
}
}
};
// transpose OnTheRight cases for vectors
template<typename Lhs, typename Rhs, int Mode, int Unrolling, int StorageOrder>
struct ei_triangular_solver_selector<Lhs,Rhs,OnTheRight,Mode,Unrolling,StorageOrder,1>
{
static void run(const Lhs& lhs, Rhs& rhs) static void run(const Lhs& lhs, Rhs& rhs)
{ {
Transpose<Rhs> rhsTr(rhs); ActualLhsType actualLhs = LhsProductTraits::extract(lhs);
Transpose<Lhs> lhsTr(lhs);
ei_triangular_solver_selector<Transpose<Lhs>,Transpose<Rhs>,OnTheLeft,TriangularView<Lhs,Mode>::TransposeMode>::run(lhsTr,rhsTr); // FIXME find a way to allow an inner stride if packet_traits<Scalar>::size==1
bool useRhsDirectly = Rhs::InnerStrideAtCompileTime==1 || rhs.innerStride()==1;
RhsScalar* actualRhs;
if(useRhsDirectly)
{
actualRhs = &rhs.coeffRef(0);
}
else
{
actualRhs = ei_aligned_stack_new(RhsScalar,rhs.size());
MappedRhs(actualRhs,rhs.size()) = rhs;
}
triangular_solve_vector<LhsScalar, RhsScalar, typename Lhs::Index, Side, Mode, LhsProductTraits::NeedToConjugate,
(int(Lhs::Flags) & RowMajorBit) ? RowMajor : ColMajor>
::run(actualLhs.cols(), actualLhs.data(), actualLhs.outerStride(), actualRhs);
if(!useRhsDirectly)
{
rhs = MappedRhs(actualRhs, rhs.size());
ei_aligned_stack_delete(RhsScalar, actualRhs, rhs.size());
}
} }
}; };
template <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder, int OtherStorageOrder>
struct ei_triangular_solve_matrix;
// the rhs is a matrix // the rhs is a matrix
template<typename Lhs, typename Rhs, int Side, int Mode, int StorageOrder> template<typename Lhs, typename Rhs, int Side, int Mode>
struct ei_triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,StorageOrder,Dynamic> struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,Dynamic>
{ {
typedef typename Rhs::Scalar Scalar; typedef typename Rhs::Scalar Scalar;
typedef typename Rhs::Index Index; typedef typename Rhs::Index Index;
typedef ei_blas_traits<Lhs> LhsProductTraits; typedef blas_traits<Lhs> LhsProductTraits;
typedef typename LhsProductTraits::DirectLinearAccessType ActualLhsType; typedef typename LhsProductTraits::DirectLinearAccessType ActualLhsType;
static void run(const Lhs& lhs, Rhs& rhs) static void run(const Lhs& lhs, Rhs& rhs)
{ {
const ActualLhsType actualLhs = LhsProductTraits::extract(lhs); const ActualLhsType actualLhs = LhsProductTraits::extract(lhs);
ei_triangular_solve_matrix<Scalar,Index,Side,Mode,LhsProductTraits::NeedToConjugate,StorageOrder, triangular_solve_matrix<Scalar,Index,Side,Mode,LhsProductTraits::NeedToConjugate,(int(Lhs::Flags) & RowMajorBit) ? RowMajor : ColMajor,
(Rhs::Flags&RowMajorBit) ? RowMajor : ColMajor> (Rhs::Flags&RowMajorBit) ? RowMajor : ColMajor>
::run(lhs.rows(), Side==OnTheLeft? rhs.cols() : rhs.rows(), &actualLhs.coeff(0,0), actualLhs.outerStride(), &rhs.coeffRef(0,0), rhs.outerStride()); ::run(lhs.rows(), Side==OnTheLeft? rhs.cols() : rhs.rows(), &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &rhs.coeffRef(0,0), rhs.outerStride());
} }
}; };
@@ -196,10 +120,10 @@ struct ei_triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,StorageOrder,
template<typename Lhs, typename Rhs, int Mode, int Index, int Size, template<typename Lhs, typename Rhs, int Mode, int Index, int Size,
bool Stop = Index==Size> bool Stop = Index==Size>
struct ei_triangular_solver_unroller; struct triangular_solver_unroller;
template<typename Lhs, typename Rhs, int Mode, int Index, int Size> template<typename Lhs, typename Rhs, int Mode, int Index, int Size>
struct ei_triangular_solver_unroller<Lhs,Rhs,Mode,Index,Size,false> { struct triangular_solver_unroller<Lhs,Rhs,Mode,Index,Size,false> {
enum { enum {
IsLower = ((Mode&Lower)==Lower), IsLower = ((Mode&Lower)==Lower),
I = IsLower ? Index : Size - Index - 1, I = IsLower ? Index : Size - Index - 1,
@@ -208,33 +132,47 @@ struct ei_triangular_solver_unroller<Lhs,Rhs,Mode,Index,Size,false> {
static void run(const Lhs& lhs, Rhs& rhs) static void run(const Lhs& lhs, Rhs& rhs)
{ {
if (Index>0) if (Index>0)
rhs.coeffRef(I) -= lhs.row(I).template segment<Index>(S).transpose().cwiseProduct(rhs.template segment<Index>(S)).sum(); rhs.coeffRef(I) -= lhs.row(I).template segment<Index>(S).transpose()
.cwiseProduct(rhs.template segment<Index>(S)).sum();
if(!(Mode & UnitDiag)) if(!(Mode & UnitDiag))
rhs.coeffRef(I) /= lhs.coeff(I,I); rhs.coeffRef(I) /= lhs.coeff(I,I);
ei_triangular_solver_unroller<Lhs,Rhs,Mode,Index+1,Size>::run(lhs,rhs); triangular_solver_unroller<Lhs,Rhs,Mode,Index+1,Size>::run(lhs,rhs);
} }
}; };
template<typename Lhs, typename Rhs, int Mode, int Index, int Size> template<typename Lhs, typename Rhs, int Mode, int Index, int Size>
struct ei_triangular_solver_unroller<Lhs,Rhs,Mode,Index,Size,true> { struct triangular_solver_unroller<Lhs,Rhs,Mode,Index,Size,true> {
static void run(const Lhs&, Rhs&) {} static void run(const Lhs&, Rhs&) {}
}; };
template<typename Lhs, typename Rhs, int Mode, int StorageOrder> template<typename Lhs, typename Rhs, int Mode>
struct ei_triangular_solver_selector<Lhs,Rhs,OnTheLeft,Mode,CompleteUnrolling,StorageOrder,1> { struct triangular_solver_selector<Lhs,Rhs,OnTheLeft,Mode,CompleteUnrolling,1> {
static void run(const Lhs& lhs, Rhs& rhs) static void run(const Lhs& lhs, Rhs& rhs)
{ ei_triangular_solver_unroller<Lhs,Rhs,Mode,0,Rhs::SizeAtCompileTime>::run(lhs,rhs); } { triangular_solver_unroller<Lhs,Rhs,Mode,0,Rhs::SizeAtCompileTime>::run(lhs,rhs); }
}; };
template<typename Lhs, typename Rhs, int Mode>
struct triangular_solver_selector<Lhs,Rhs,OnTheRight,Mode,CompleteUnrolling,1> {
static void run(const Lhs& lhs, Rhs& rhs)
{
Transpose<const Lhs> trLhs(lhs);
Transpose<Rhs> trRhs(rhs);
triangular_solver_unroller<Transpose<const Lhs>,Transpose<Rhs>,
((Mode&Upper)==Upper ? Lower : Upper) | (Mode&UnitDiag),
0,Rhs::SizeAtCompileTime>::run(trLhs,trRhs);
}
};
} // end namespace internal
/*************************************************************************** /***************************************************************************
* TriangularView methods * TriangularView methods
***************************************************************************/ ***************************************************************************/
/** "in-place" version of TriangularView::solve() where the result is written in \a other /** "in-place" version of TriangularView::solve() where the result is written in \a other
*
*
* *
* \warning The parameter is only marked 'const' to make the C++ compiler accept a temporary expression here. * \warning The parameter is only marked 'const' to make the C++ compiler accept a temporary expression here.
* This function will const_cast it, so constness isn't honored here. * This function will const_cast it, so constness isn't honored here.
@@ -246,17 +184,17 @@ template<int Side, typename OtherDerived>
void TriangularView<MatrixType,Mode>::solveInPlace(const MatrixBase<OtherDerived>& _other) const void TriangularView<MatrixType,Mode>::solveInPlace(const MatrixBase<OtherDerived>& _other) const
{ {
OtherDerived& other = _other.const_cast_derived(); OtherDerived& other = _other.const_cast_derived();
ei_assert(cols() == rows()); eigen_assert(cols() == rows());
ei_assert( (Side==OnTheLeft && cols() == other.rows()) || (Side==OnTheRight && cols() == other.cols()) ); eigen_assert( (Side==OnTheLeft && cols() == other.rows()) || (Side==OnTheRight && cols() == other.cols()) );
ei_assert(!(Mode & ZeroDiag)); eigen_assert(!(Mode & ZeroDiag));
ei_assert(Mode & (Upper|Lower)); eigen_assert(Mode & (Upper|Lower));
enum { copy = ei_traits<OtherDerived>::Flags & RowMajorBit && OtherDerived::IsVectorAtCompileTime }; enum { copy = internal::traits<OtherDerived>::Flags & RowMajorBit && OtherDerived::IsVectorAtCompileTime };
typedef typename ei_meta_if<copy, typedef typename internal::conditional<copy,
typename ei_plain_matrix_type_column_major<OtherDerived>::type, OtherDerived&>::ret OtherCopy; typename internal::plain_matrix_type_column_major<OtherDerived>::type, OtherDerived&>::type OtherCopy;
OtherCopy otherCopy(other); OtherCopy otherCopy(other);
ei_triangular_solver_selector<MatrixType, typename ei_unref<OtherCopy>::type, internal::triangular_solver_selector<MatrixType, typename internal::remove_reference<OtherCopy>::type,
Side, Mode>::run(nestedExpression(), otherCopy); Side, Mode>::run(nestedExpression(), otherCopy);
if (copy) if (copy)
@@ -265,43 +203,68 @@ void TriangularView<MatrixType,Mode>::solveInPlace(const MatrixBase<OtherDerived
/** \returns the product of the inverse of \c *this with \a other, \a *this being triangular. /** \returns the product of the inverse of \c *this with \a other, \a *this being triangular.
* *
* This function computes the inverse-matrix matrix product inverse(\c *this) * \a other if
* \a Side==OnTheLeft (the default), or the right-inverse-multiply \a other * inverse(\c *this) if
* \a Side==OnTheRight.
* *
*
* This function computes the inverse-matrix matrix product inverse(\c *this) * \a other.
* The matrix \c *this must be triangular and invertible (i.e., all the coefficients of the * The matrix \c *this must be triangular and invertible (i.e., all the coefficients of the
* diagonal must be non zero). It works as a forward (resp. backward) substitution if \c *this * diagonal must be non zero). It works as a forward (resp. backward) substitution if \c *this
* is an upper (resp. lower) triangular matrix. * is an upper (resp. lower) triangular matrix.
* *
* It is required that \c *this be marked as either an upper or a lower triangular matrix, which
* can be done by marked(), and that is automatically the case with expressions such as those returned
* by extract().
*
* Example: \include MatrixBase_marked.cpp * Example: \include MatrixBase_marked.cpp
* Output: \verbinclude MatrixBase_marked.out * Output: \verbinclude MatrixBase_marked.out
* *
* This function is essentially a wrapper to the faster solveTriangularInPlace() function creating * This function returns an expression of the inverse-multiply and can works in-place if it is assigned
* a temporary copy of \a other, calling solveTriangularInPlace() on the copy and returning it. * to the same matrix or vector \a other.
* Therefore, if \a other is not needed anymore, it is quite faster to call solveTriangularInPlace()
* instead of solveTriangular().
* *
* For users coming from BLAS, this function (and more specifically solveTriangularInPlace()) offer * For users coming from BLAS, this function (and more specifically solveInPlace()) offer
* all the operations supported by the \c *TRSV and \c *TRSM BLAS routines. * all the operations supported by the \c *TRSV and \c *TRSM BLAS routines.
* *
* \b Tips: to perform a \em "right-inverse-multiply" you can simply transpose the operation, e.g.:
* \code
* M * T^1 <=> T.transpose().solveInPlace(M.transpose());
* \endcode
*
* \sa TriangularView::solveInPlace() * \sa TriangularView::solveInPlace()
*/ */
template<typename Derived, unsigned int Mode> template<typename Derived, unsigned int Mode>
template<int Side, typename RhsDerived> template<int Side, typename Other>
typename ei_plain_matrix_type_column_major<RhsDerived>::type const internal::triangular_solve_retval<Side,TriangularView<Derived,Mode>,Other>
TriangularView<Derived,Mode>::solve(const MatrixBase<RhsDerived>& rhs) const TriangularView<Derived,Mode>::solve(const MatrixBase<Other>& other) const
{ {
typename ei_plain_matrix_type_column_major<RhsDerived>::type res(rhs); return internal::triangular_solve_retval<Side,TriangularView,Other>(*this, other.derived());
solveInPlace<Side>(res);
return res;
} }
namespace internal {
template<int Side, typename TriangularType, typename Rhs>
struct traits<triangular_solve_retval<Side, TriangularType, Rhs> >
{
typedef typename internal::plain_matrix_type_column_major<Rhs>::type ReturnType;
};
template<int Side, typename TriangularType, typename Rhs> struct triangular_solve_retval
: public ReturnByValue<triangular_solve_retval<Side, TriangularType, Rhs> >
{
typedef typename remove_all<typename Rhs::Nested>::type RhsNestedCleaned;
typedef ReturnByValue<triangular_solve_retval> Base;
typedef typename Base::Index Index;
triangular_solve_retval(const TriangularType& tri, const Rhs& rhs)
: m_triangularMatrix(tri), m_rhs(rhs)
{}
inline Index rows() const { return m_rhs.rows(); }
inline Index cols() const { return m_rhs.cols(); }
template<typename Dest> inline void evalTo(Dest& dst) const
{
if(!(is_same<RhsNestedCleaned,Dest>::value && extract_data(dst) == extract_data(m_rhs)))
dst = m_rhs;
m_triangularMatrix.template solveInPlace<Side>(dst);
}
protected:
const TriangularType& m_triangularMatrix;
const typename Rhs::Nested m_rhs;
};
} // namespace internal
#endif // EIGEN_SOLVETRIANGULAR_H #endif // EIGEN_SOLVETRIANGULAR_H

View File

@@ -25,13 +25,14 @@
#ifndef EIGEN_STABLENORM_H #ifndef EIGEN_STABLENORM_H
#define EIGEN_STABLENORM_H #define EIGEN_STABLENORM_H
namespace internal {
template<typename ExpressionType, typename Scalar> template<typename ExpressionType, typename Scalar>
inline void ei_stable_norm_kernel(const ExpressionType& bl, Scalar& ssq, Scalar& scale, Scalar& invScale) inline void stable_norm_kernel(const ExpressionType& bl, Scalar& ssq, Scalar& scale, Scalar& invScale)
{ {
Scalar max = bl.cwiseAbs().maxCoeff(); Scalar max = bl.cwiseAbs().maxCoeff();
if (max>scale) if (max>scale)
{ {
ssq = ssq * ei_abs2(scale/max); ssq = ssq * abs2(scale/max);
scale = max; scale = max;
invScale = Scalar(1)/scale; invScale = Scalar(1)/scale;
} }
@@ -39,6 +40,7 @@ inline void ei_stable_norm_kernel(const ExpressionType& bl, Scalar& ssq, Scalar&
// then we can neglect this sub vector // then we can neglect this sub vector
ssq += (bl*invScale).squaredNorm(); ssq += (bl*invScale).squaredNorm();
} }
}
/** \returns the \em l2 norm of \c *this avoiding underflow and overflow. /** \returns the \em l2 norm of \c *this avoiding underflow and overflow.
* This version use a blockwise two passes algorithm: * This version use a blockwise two passes algorithm:
@@ -51,7 +53,7 @@ inline void ei_stable_norm_kernel(const ExpressionType& bl, Scalar& ssq, Scalar&
* \sa norm(), blueNorm(), hypotNorm() * \sa norm(), blueNorm(), hypotNorm()
*/ */
template<typename Derived> template<typename Derived>
inline typename NumTraits<typename ei_traits<Derived>::Scalar>::Real inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
MatrixBase<Derived>::stableNorm() const MatrixBase<Derived>::stableNorm() const
{ {
const Index blockSize = 4096; const Index blockSize = 4096;
@@ -62,12 +64,12 @@ MatrixBase<Derived>::stableNorm() const
Alignment = (int(Flags)&DirectAccessBit) || (int(Flags)&AlignedBit) ? 1 : 0 Alignment = (int(Flags)&DirectAccessBit) || (int(Flags)&AlignedBit) ? 1 : 0
}; };
Index n = size(); Index n = size();
Index bi = ei_first_aligned(derived()); Index bi = internal::first_aligned(derived());
if (bi>0) if (bi>0)
ei_stable_norm_kernel(this->head(bi), ssq, scale, invScale); internal::stable_norm_kernel(this->head(bi), ssq, scale, invScale);
for (; bi<n; bi+=blockSize) for (; bi<n; bi+=blockSize)
ei_stable_norm_kernel(this->segment(bi,std::min(blockSize, n - bi)).template forceAlignedAccessIf<Alignment>(), ssq, scale, invScale); internal::stable_norm_kernel(this->segment(bi,std::min(blockSize, n - bi)).template forceAlignedAccessIf<Alignment>(), ssq, scale, invScale);
return scale * ei_sqrt(ssq); return scale * internal::sqrt(ssq);
} }
/** \returns the \em l2 norm of \c *this using the Blue's algorithm. /** \returns the \em l2 norm of \c *this using the Blue's algorithm.
@@ -80,7 +82,7 @@ MatrixBase<Derived>::stableNorm() const
* \sa norm(), stableNorm(), hypotNorm() * \sa norm(), stableNorm(), hypotNorm()
*/ */
template<typename Derived> template<typename Derived>
inline typename NumTraits<typename ei_traits<Derived>::Scalar>::Real inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
MatrixBase<Derived>::blueNorm() const MatrixBase<Derived>::blueNorm() const
{ {
static Index nmax = -1; static Index nmax = -1;
@@ -116,7 +118,7 @@ MatrixBase<Derived>::blueNorm() const
overfl = rbig*s2m; // overflow boundary for abig overfl = rbig*s2m; // overflow boundary for abig
eps = RealScalar(std::pow(double(ibeta), 1-it)); eps = RealScalar(std::pow(double(ibeta), 1-it));
relerr = ei_sqrt(eps); // tolerance for neglecting asml relerr = internal::sqrt(eps); // tolerance for neglecting asml
abig = RealScalar(1.0/eps - 1.0); abig = RealScalar(1.0/eps - 1.0);
if (RealScalar(nbig)>abig) nmax = int(abig); // largest safe n if (RealScalar(nbig)>abig) nmax = int(abig); // largest safe n
else nmax = nbig; else nmax = nbig;
@@ -128,23 +130,23 @@ MatrixBase<Derived>::blueNorm() const
RealScalar abig = RealScalar(0); RealScalar abig = RealScalar(0);
for(Index j=0; j<n; ++j) for(Index j=0; j<n; ++j)
{ {
RealScalar ax = ei_abs(coeff(j)); RealScalar ax = internal::abs(coeff(j));
if(ax > ab2) abig += ei_abs2(ax*s2m); if(ax > ab2) abig += internal::abs2(ax*s2m);
else if(ax < b1) asml += ei_abs2(ax*s1m); else if(ax < b1) asml += internal::abs2(ax*s1m);
else amed += ei_abs2(ax); else amed += internal::abs2(ax);
} }
if(abig > RealScalar(0)) if(abig > RealScalar(0))
{ {
abig = ei_sqrt(abig); abig = internal::sqrt(abig);
if(abig > overfl) if(abig > overfl)
{ {
ei_assert(false && "overflow"); eigen_assert(false && "overflow");
return rbig; return rbig;
} }
if(amed > RealScalar(0)) if(amed > RealScalar(0))
{ {
abig = abig/s2m; abig = abig/s2m;
amed = ei_sqrt(amed); amed = internal::sqrt(amed);
} }
else else
return abig/s2m; return abig/s2m;
@@ -153,20 +155,20 @@ MatrixBase<Derived>::blueNorm() const
{ {
if (amed > RealScalar(0)) if (amed > RealScalar(0))
{ {
abig = ei_sqrt(amed); abig = internal::sqrt(amed);
amed = ei_sqrt(asml) / s1m; amed = internal::sqrt(asml) / s1m;
} }
else else
return ei_sqrt(asml)/s1m; return internal::sqrt(asml)/s1m;
} }
else else
return ei_sqrt(amed); return internal::sqrt(amed);
asml = std::min(abig, amed); asml = std::min(abig, amed);
abig = std::max(abig, amed); abig = std::max(abig, amed);
if(asml <= abig*relerr) if(asml <= abig*relerr)
return abig; return abig;
else else
return abig * ei_sqrt(RealScalar(1) + ei_abs2(asml/abig)); return abig * internal::sqrt(RealScalar(1) + internal::abs2(asml/abig));
} }
/** \returns the \em l2 norm of \c *this avoiding undeflow and overflow. /** \returns the \em l2 norm of \c *this avoiding undeflow and overflow.
@@ -175,10 +177,10 @@ MatrixBase<Derived>::blueNorm() const
* \sa norm(), stableNorm() * \sa norm(), stableNorm()
*/ */
template<typename Derived> template<typename Derived>
inline typename NumTraits<typename ei_traits<Derived>::Scalar>::Real inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
MatrixBase<Derived>::hypotNorm() const MatrixBase<Derived>::hypotNorm() const
{ {
return this->cwiseAbs().redux(ei_scalar_hypot_op<RealScalar>()); return this->cwiseAbs().redux(internal::scalar_hypot_op<RealScalar>());
} }
#endif // EIGEN_STABLENORM_H #endif // EIGEN_STABLENORM_H

View File

@@ -26,6 +26,7 @@
#define EIGEN_STRIDE_H #define EIGEN_STRIDE_H
/** \class Stride /** \class Stride
* \ingroup Core_Module
* *
* \brief Holds strides information for Map * \brief Holds strides information for Map
* *
@@ -50,7 +51,7 @@
* \include Map_general_stride.cpp * \include Map_general_stride.cpp
* Output: \verbinclude Map_general_stride.out * Output: \verbinclude Map_general_stride.out
* *
* \sa class InnerStride, class OuterStride * \sa class InnerStride, class OuterStride, \ref TopicStorageOrders
*/ */
template<int _OuterStrideAtCompileTime, int _InnerStrideAtCompileTime> template<int _OuterStrideAtCompileTime, int _InnerStrideAtCompileTime>
class Stride class Stride
@@ -66,14 +67,14 @@ class Stride
Stride() Stride()
: m_outer(OuterStrideAtCompileTime), m_inner(InnerStrideAtCompileTime) : m_outer(OuterStrideAtCompileTime), m_inner(InnerStrideAtCompileTime)
{ {
ei_assert(InnerStrideAtCompileTime != Dynamic && OuterStrideAtCompileTime != Dynamic); eigen_assert(InnerStrideAtCompileTime != Dynamic && OuterStrideAtCompileTime != Dynamic);
} }
/** Constructor allowing to pass the strides at runtime */ /** Constructor allowing to pass the strides at runtime */
Stride(Index outerStride, Index innerStride) Stride(Index outerStride, Index innerStride)
: m_outer(outerStride), m_inner(innerStride) : m_outer(outerStride), m_inner(innerStride)
{ {
ei_assert(innerStride>=0 && outerStride>=0); eigen_assert(innerStride>=0 && outerStride>=0);
} }
/** Copy constructor */ /** Copy constructor */
@@ -87,8 +88,8 @@ class Stride
inline Index inner() const { return m_inner.value(); } inline Index inner() const { return m_inner.value(); }
protected: protected:
ei_variable_if_dynamic<Index, OuterStrideAtCompileTime> m_outer; internal::variable_if_dynamic<Index, OuterStrideAtCompileTime> m_outer;
ei_variable_if_dynamic<Index, InnerStrideAtCompileTime> m_inner; internal::variable_if_dynamic<Index, InnerStrideAtCompileTime> m_inner;
}; };
/** \brief Convenience specialization of Stride to specify only an inner stride /** \brief Convenience specialization of Stride to specify only an inner stride

View File

@@ -26,22 +26,25 @@
#define EIGEN_SWAP_H #define EIGEN_SWAP_H
/** \class SwapWrapper /** \class SwapWrapper
* \ingroup Core_Module
* *
* \internal * \internal
* *
* \brief Internal helper class for swapping two expressions * \brief Internal helper class for swapping two expressions
*/ */
namespace internal {
template<typename ExpressionType> template<typename ExpressionType>
struct ei_traits<SwapWrapper<ExpressionType> > : ei_traits<ExpressionType> {}; struct traits<SwapWrapper<ExpressionType> > : traits<ExpressionType> {};
}
template<typename ExpressionType> class SwapWrapper template<typename ExpressionType> class SwapWrapper
: public ei_dense_xpr_base<SwapWrapper<ExpressionType> >::type : public internal::dense_xpr_base<SwapWrapper<ExpressionType> >::type
{ {
public: public:
typedef typename ei_dense_xpr_base<SwapWrapper>::type Base; typedef typename internal::dense_xpr_base<SwapWrapper>::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(SwapWrapper) EIGEN_DENSE_PUBLIC_INTERFACE(SwapWrapper)
typedef typename ei_packet_traits<Scalar>::type Packet; typedef typename internal::packet_traits<Scalar>::type Packet;
inline SwapWrapper(ExpressionType& xpr) : m_expression(xpr) {} inline SwapWrapper(ExpressionType& xpr) : m_expression(xpr) {}
@@ -60,11 +63,21 @@ template<typename ExpressionType> class SwapWrapper
return m_expression.const_cast_derived().coeffRef(index); return m_expression.const_cast_derived().coeffRef(index);
} }
inline Scalar& coeffRef(Index row, Index col) const
{
return m_expression.coeffRef(row, col);
}
inline Scalar& coeffRef(Index index) const
{
return m_expression.coeffRef(index);
}
template<typename OtherDerived> template<typename OtherDerived>
void copyCoeff(Index row, Index col, const DenseBase<OtherDerived>& other) void copyCoeff(Index row, Index col, const DenseBase<OtherDerived>& other)
{ {
OtherDerived& _other = other.const_cast_derived(); OtherDerived& _other = other.const_cast_derived();
ei_internal_assert(row >= 0 && row < rows() eigen_internal_assert(row >= 0 && row < rows()
&& col >= 0 && col < cols()); && col >= 0 && col < cols());
Scalar tmp = m_expression.coeff(row, col); Scalar tmp = m_expression.coeff(row, col);
m_expression.coeffRef(row, col) = _other.coeff(row, col); m_expression.coeffRef(row, col) = _other.coeff(row, col);
@@ -75,7 +88,7 @@ template<typename ExpressionType> class SwapWrapper
void copyCoeff(Index index, const DenseBase<OtherDerived>& other) void copyCoeff(Index index, const DenseBase<OtherDerived>& other)
{ {
OtherDerived& _other = other.const_cast_derived(); OtherDerived& _other = other.const_cast_derived();
ei_internal_assert(index >= 0 && index < m_expression.size()); eigen_internal_assert(index >= 0 && index < m_expression.size());
Scalar tmp = m_expression.coeff(index); Scalar tmp = m_expression.coeff(index);
m_expression.coeffRef(index) = _other.coeff(index); m_expression.coeffRef(index) = _other.coeff(index);
_other.coeffRef(index) = tmp; _other.coeffRef(index) = tmp;
@@ -85,7 +98,7 @@ template<typename ExpressionType> class SwapWrapper
void copyPacket(Index row, Index col, const DenseBase<OtherDerived>& other) void copyPacket(Index row, Index col, const DenseBase<OtherDerived>& other)
{ {
OtherDerived& _other = other.const_cast_derived(); OtherDerived& _other = other.const_cast_derived();
ei_internal_assert(row >= 0 && row < rows() eigen_internal_assert(row >= 0 && row < rows()
&& col >= 0 && col < cols()); && col >= 0 && col < cols());
Packet tmp = m_expression.template packet<StoreMode>(row, col); Packet tmp = m_expression.template packet<StoreMode>(row, col);
m_expression.template writePacket<StoreMode>(row, col, m_expression.template writePacket<StoreMode>(row, col,
@@ -98,7 +111,7 @@ template<typename ExpressionType> class SwapWrapper
void copyPacket(Index index, const DenseBase<OtherDerived>& other) void copyPacket(Index index, const DenseBase<OtherDerived>& other)
{ {
OtherDerived& _other = other.const_cast_derived(); OtherDerived& _other = other.const_cast_derived();
ei_internal_assert(index >= 0 && index < m_expression.size()); eigen_internal_assert(index >= 0 && index < m_expression.size());
Packet tmp = m_expression.template packet<StoreMode>(index); Packet tmp = m_expression.template packet<StoreMode>(index);
m_expression.template writePacket<StoreMode>(index, m_expression.template writePacket<StoreMode>(index,
_other.template packet<LoadMode>(index) _other.template packet<LoadMode>(index)
@@ -110,18 +123,4 @@ template<typename ExpressionType> class SwapWrapper
ExpressionType& m_expression; ExpressionType& m_expression;
}; };
/** swaps *this with the expression \a other.
*
* \note \a other is only marked for internal reasons, but of course
* it gets const-casted. One reason is that one will often call swap
* on temporary objects (hence non-const references are forbidden).
* Another reason is that lazyAssign takes a const argument anyway.
*/
template<typename Derived>
template<typename OtherDerived>
void DenseBase<Derived>::swap(DenseBase<OtherDerived> EIGEN_REF_TO_TEMPORARY other)
{
(SwapWrapper<Derived>(derived())).lazyAssign(other);
}
#endif // EIGEN_SWAP_H #endif // EIGEN_SWAP_H

View File

@@ -27,6 +27,7 @@
#define EIGEN_TRANSPOSE_H #define EIGEN_TRANSPOSE_H
/** \class Transpose /** \class Transpose
* \ingroup Core_Module
* *
* \brief Expression of the transpose of a matrix * \brief Expression of the transpose of a matrix
* *
@@ -38,37 +39,43 @@
* *
* \sa MatrixBase::transpose(), MatrixBase::adjoint() * \sa MatrixBase::transpose(), MatrixBase::adjoint()
*/ */
namespace internal {
template<typename MatrixType> template<typename MatrixType>
struct ei_traits<Transpose<MatrixType> > : ei_traits<MatrixType> struct traits<Transpose<MatrixType> > : traits<MatrixType>
{ {
typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::Scalar Scalar;
typedef typename ei_nested<MatrixType>::type MatrixTypeNested; typedef typename nested<MatrixType>::type MatrixTypeNested;
typedef typename ei_unref<MatrixTypeNested>::type _MatrixTypeNested; typedef typename remove_reference<MatrixTypeNested>::type MatrixTypeNestedPlain;
typedef typename ei_traits<MatrixType>::StorageKind StorageKind; typedef typename traits<MatrixType>::StorageKind StorageKind;
typedef typename ei_traits<MatrixType>::XprKind XprKind; typedef typename traits<MatrixType>::XprKind XprKind;
enum { enum {
RowsAtCompileTime = MatrixType::ColsAtCompileTime, RowsAtCompileTime = MatrixType::ColsAtCompileTime,
ColsAtCompileTime = MatrixType::RowsAtCompileTime, ColsAtCompileTime = MatrixType::RowsAtCompileTime,
MaxRowsAtCompileTime = MatrixType::MaxColsAtCompileTime, MaxRowsAtCompileTime = MatrixType::MaxColsAtCompileTime,
MaxColsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
Flags = int(_MatrixTypeNested::Flags & ~NestByRefBit) ^ RowMajorBit, FlagsLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
CoeffReadCost = _MatrixTypeNested::CoeffReadCost, Flags0 = MatrixTypeNestedPlain::Flags & ~(LvalueBit | NestByRefBit),
InnerStrideAtCompileTime = ei_inner_stride_at_compile_time<MatrixType>::ret, Flags1 = Flags0 | FlagsLvalueBit,
OuterStrideAtCompileTime = ei_outer_stride_at_compile_time<MatrixType>::ret Flags = Flags1 ^ RowMajorBit,
CoeffReadCost = MatrixTypeNestedPlain::CoeffReadCost,
InnerStrideAtCompileTime = inner_stride_at_compile_time<MatrixType>::ret,
OuterStrideAtCompileTime = outer_stride_at_compile_time<MatrixType>::ret
}; };
}; };
}
template<typename MatrixType, typename StorageKind> class TransposeImpl; template<typename MatrixType, typename StorageKind> class TransposeImpl;
template<typename MatrixType> class Transpose template<typename MatrixType> class Transpose
: public TransposeImpl<MatrixType,typename ei_traits<MatrixType>::StorageKind> : public TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind>
{ {
public: public:
typedef typename TransposeImpl<MatrixType,typename ei_traits<MatrixType>::StorageKind>::Base Base; typedef typename TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base;
EIGEN_GENERIC_PUBLIC_INTERFACE(Transpose) EIGEN_GENERIC_PUBLIC_INTERFACE(Transpose)
inline Transpose(const MatrixType& matrix) : m_matrix(matrix) {} inline Transpose(MatrixType& matrix) : m_matrix(matrix) {}
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Transpose) EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Transpose)
@@ -76,50 +83,73 @@ template<typename MatrixType> class Transpose
inline Index cols() const { return m_matrix.rows(); } inline Index cols() const { return m_matrix.rows(); }
/** \returns the nested expression */ /** \returns the nested expression */
const typename ei_cleantype<typename MatrixType::Nested>::type& const typename internal::remove_all<typename MatrixType::Nested>::type&
nestedExpression() const { return m_matrix; } nestedExpression() const { return m_matrix; }
/** \returns the nested expression */ /** \returns the nested expression */
typename ei_cleantype<typename MatrixType::Nested>::type& typename internal::remove_all<typename MatrixType::Nested>::type&
nestedExpression() { return m_matrix.const_cast_derived(); } nestedExpression() { return m_matrix.const_cast_derived(); }
protected: protected:
const typename MatrixType::Nested m_matrix; const typename MatrixType::Nested m_matrix;
}; };
template<typename MatrixType, bool HasDirectAccess = ei_has_direct_access<MatrixType>::ret> namespace internal {
struct ei_TransposeImpl_base
template<typename MatrixType, bool HasDirectAccess = has_direct_access<MatrixType>::ret>
struct TransposeImpl_base
{ {
typedef typename ei_dense_xpr_base<Transpose<MatrixType> >::type type; typedef typename dense_xpr_base<Transpose<MatrixType> >::type type;
}; };
template<typename MatrixType> template<typename MatrixType>
struct ei_TransposeImpl_base<MatrixType, false> struct TransposeImpl_base<MatrixType, false>
{ {
typedef typename ei_dense_xpr_base<Transpose<MatrixType> >::type type; typedef typename dense_xpr_base<Transpose<MatrixType> >::type type;
}; };
} // end namespace internal
template<typename MatrixType> class TransposeImpl<MatrixType,Dense> template<typename MatrixType> class TransposeImpl<MatrixType,Dense>
: public ei_TransposeImpl_base<MatrixType>::type : public internal::TransposeImpl_base<MatrixType>::type
{ {
public: public:
typedef typename ei_TransposeImpl_base<MatrixType>::type Base; typedef typename internal::TransposeImpl_base<MatrixType>::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(Transpose<MatrixType>) EIGEN_DENSE_PUBLIC_INTERFACE(Transpose<MatrixType>)
inline Index innerStride() const { return derived().nestedExpression().innerStride(); } inline Index innerStride() const { return derived().nestedExpression().innerStride(); }
inline Index outerStride() const { return derived().nestedExpression().outerStride(); } inline Index outerStride() const { return derived().nestedExpression().outerStride(); }
inline Scalar* data() { return derived().nestedExpression().data(); }
typedef typename internal::conditional<
internal::is_lvalue<MatrixType>::value,
Scalar,
const Scalar
>::type ScalarWithConstIfNotLvalue;
inline ScalarWithConstIfNotLvalue* data() { return derived().nestedExpression().data(); }
inline const Scalar* data() const { return derived().nestedExpression().data(); } inline const Scalar* data() const { return derived().nestedExpression().data(); }
inline Scalar& coeffRef(Index row, Index col) inline ScalarWithConstIfNotLvalue& coeffRef(Index row, Index col)
{ {
return const_cast_derived().nestedExpression().coeffRef(col, row); EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
return derived().nestedExpression().const_cast_derived().coeffRef(col, row);
} }
inline Scalar& coeffRef(Index index) inline ScalarWithConstIfNotLvalue& coeffRef(Index index)
{ {
return const_cast_derived().nestedExpression().coeffRef(index); EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
return derived().nestedExpression().const_cast_derived().coeffRef(index);
}
inline const Scalar& coeffRef(Index row, Index col) const
{
return derived().nestedExpression().coeffRef(col, row);
}
inline const Scalar& coeffRef(Index index) const
{
return derived().nestedExpression().coeffRef(index);
} }
inline const CoeffReturnType coeff(Index row, Index col) const inline const CoeffReturnType coeff(Index row, Index col) const
@@ -141,7 +171,7 @@ template<typename MatrixType> class TransposeImpl<MatrixType,Dense>
template<int LoadMode> template<int LoadMode>
inline void writePacket(Index row, Index col, const PacketScalar& x) inline void writePacket(Index row, Index col, const PacketScalar& x)
{ {
const_cast_derived().nestedExpression().template writePacket<LoadMode>(col, row, x); derived().nestedExpression().const_cast_derived().template writePacket<LoadMode>(col, row, x);
} }
template<int LoadMode> template<int LoadMode>
@@ -153,7 +183,7 @@ template<typename MatrixType> class TransposeImpl<MatrixType,Dense>
template<int LoadMode> template<int LoadMode>
inline void writePacket(Index index, const PacketScalar& x) inline void writePacket(Index index, const PacketScalar& x)
{ {
const_cast_derived().nestedExpression().template writePacket<LoadMode>(index, x); derived().nestedExpression().const_cast_derived().template writePacket<LoadMode>(index, x);
} }
}; };
@@ -189,10 +219,10 @@ DenseBase<Derived>::transpose()
* *
* \sa transposeInPlace(), adjoint() */ * \sa transposeInPlace(), adjoint() */
template<typename Derived> template<typename Derived>
inline const Transpose<Derived> inline const typename DenseBase<Derived>::ConstTransposeReturnType
DenseBase<Derived>::transpose() const DenseBase<Derived>::transpose() const
{ {
return derived(); return ConstTransposeReturnType(derived());
} }
/** \returns an expression of the adjoint (i.e. conjugate transpose) of *this. /** \returns an expression of the adjoint (i.e. conjugate transpose) of *this.
@@ -213,31 +243,34 @@ DenseBase<Derived>::transpose() const
* m = m.adjoint().eval(); * m = m.adjoint().eval();
* \endcode * \endcode
* *
* \sa adjointInPlace(), transpose(), conjugate(), class Transpose, class ei_scalar_conjugate_op */ * \sa adjointInPlace(), transpose(), conjugate(), class Transpose, class internal::scalar_conjugate_op */
template<typename Derived> template<typename Derived>
inline const typename MatrixBase<Derived>::AdjointReturnType inline const typename MatrixBase<Derived>::AdjointReturnType
MatrixBase<Derived>::adjoint() const MatrixBase<Derived>::adjoint() const
{ {
return this->transpose(); return this->transpose(); // in the complex case, the .conjugate() is be implicit here
// due to implicit conversion to return type
} }
/*************************************************************************** /***************************************************************************
* "in place" transpose implementation * "in place" transpose implementation
***************************************************************************/ ***************************************************************************/
namespace internal {
template<typename MatrixType, template<typename MatrixType,
bool IsSquare = (MatrixType::RowsAtCompileTime == MatrixType::ColsAtCompileTime) && MatrixType::RowsAtCompileTime!=Dynamic> bool IsSquare = (MatrixType::RowsAtCompileTime == MatrixType::ColsAtCompileTime) && MatrixType::RowsAtCompileTime!=Dynamic>
struct ei_inplace_transpose_selector; struct inplace_transpose_selector;
template<typename MatrixType> template<typename MatrixType>
struct ei_inplace_transpose_selector<MatrixType,true> { // square matrix struct inplace_transpose_selector<MatrixType,true> { // square matrix
static void run(MatrixType& m) { static void run(MatrixType& m) {
m.template triangularView<StrictlyUpper>().swap(m.transpose()); m.template triangularView<StrictlyUpper>().swap(m.transpose());
} }
}; };
template<typename MatrixType> template<typename MatrixType>
struct ei_inplace_transpose_selector<MatrixType,false> { // non square matrix struct inplace_transpose_selector<MatrixType,false> { // non square matrix
static void run(MatrixType& m) { static void run(MatrixType& m) {
if (m.rows()==m.cols()) if (m.rows()==m.cols())
m.template triangularView<StrictlyUpper>().swap(m.transpose()); m.template triangularView<StrictlyUpper>().swap(m.transpose());
@@ -246,6 +279,8 @@ struct ei_inplace_transpose_selector<MatrixType,false> { // non square matrix
} }
}; };
} // end namespace internal
/** This is the "in place" version of transpose(): it replaces \c *this by its own transpose. /** This is the "in place" version of transpose(): it replaces \c *this by its own transpose.
* Thus, doing * Thus, doing
* \code * \code
@@ -267,7 +302,7 @@ struct ei_inplace_transpose_selector<MatrixType,false> { // non square matrix
template<typename Derived> template<typename Derived>
inline void DenseBase<Derived>::transposeInPlace() inline void DenseBase<Derived>::transposeInPlace()
{ {
ei_inplace_transpose_selector<Derived>::run(derived()); internal::inplace_transpose_selector<Derived>::run(derived());
} }
/*************************************************************************** /***************************************************************************
@@ -302,45 +337,47 @@ inline void MatrixBase<Derived>::adjointInPlace()
// The following is to detect aliasing problems in most common cases. // The following is to detect aliasing problems in most common cases.
template<typename BinOp,typename NestedXpr> namespace internal {
struct ei_blas_traits<SelfCwiseBinaryOp<BinOp,NestedXpr> >
: ei_blas_traits<NestedXpr> template<typename BinOp,typename NestedXpr,typename Rhs>
struct blas_traits<SelfCwiseBinaryOp<BinOp,NestedXpr,Rhs> >
: blas_traits<NestedXpr>
{ {
typedef SelfCwiseBinaryOp<BinOp,NestedXpr> XprType; typedef SelfCwiseBinaryOp<BinOp,NestedXpr,Rhs> XprType;
static inline const XprType extract(const XprType& x) { return x; } static inline const XprType extract(const XprType& x) { return x; }
}; };
template<bool DestIsTransposed, typename OtherDerived> template<bool DestIsTransposed, typename OtherDerived>
struct ei_check_transpose_aliasing_compile_time_selector struct check_transpose_aliasing_compile_time_selector
{ {
enum { ret = ei_blas_traits<OtherDerived>::IsTransposed != DestIsTransposed enum { ret = blas_traits<OtherDerived>::IsTransposed != DestIsTransposed
}; };
}; };
template<bool DestIsTransposed, typename BinOp, typename DerivedA, typename DerivedB> template<bool DestIsTransposed, typename BinOp, typename DerivedA, typename DerivedB>
struct ei_check_transpose_aliasing_compile_time_selector<DestIsTransposed,CwiseBinaryOp<BinOp,DerivedA,DerivedB> > struct check_transpose_aliasing_compile_time_selector<DestIsTransposed,CwiseBinaryOp<BinOp,DerivedA,DerivedB> >
{ {
enum { ret = ei_blas_traits<DerivedA>::IsTransposed != DestIsTransposed enum { ret = blas_traits<DerivedA>::IsTransposed != DestIsTransposed
|| ei_blas_traits<DerivedB>::IsTransposed != DestIsTransposed || blas_traits<DerivedB>::IsTransposed != DestIsTransposed
}; };
}; };
template<typename Scalar, bool DestIsTransposed, typename OtherDerived> template<typename Scalar, bool DestIsTransposed, typename OtherDerived>
struct ei_check_transpose_aliasing_run_time_selector struct check_transpose_aliasing_run_time_selector
{ {
static bool run(const Scalar* dest, const OtherDerived& src) static bool run(const Scalar* dest, const OtherDerived& src)
{ {
return (ei_blas_traits<OtherDerived>::IsTransposed != DestIsTransposed) && (dest!=0 && dest==(Scalar*)ei_extract_data(src)); return (blas_traits<OtherDerived>::IsTransposed != DestIsTransposed) && (dest!=0 && dest==(Scalar*)extract_data(src));
} }
}; };
template<typename Scalar, bool DestIsTransposed, typename BinOp, typename DerivedA, typename DerivedB> template<typename Scalar, bool DestIsTransposed, typename BinOp, typename DerivedA, typename DerivedB>
struct ei_check_transpose_aliasing_run_time_selector<Scalar,DestIsTransposed,CwiseBinaryOp<BinOp,DerivedA,DerivedB> > struct check_transpose_aliasing_run_time_selector<Scalar,DestIsTransposed,CwiseBinaryOp<BinOp,DerivedA,DerivedB> >
{ {
static bool run(const Scalar* dest, const CwiseBinaryOp<BinOp,DerivedA,DerivedB>& src) static bool run(const Scalar* dest, const CwiseBinaryOp<BinOp,DerivedA,DerivedB>& src)
{ {
return ((ei_blas_traits<DerivedA>::IsTransposed != DestIsTransposed) && (dest!=0 && dest==(Scalar*)ei_extract_data(src.lhs()))) return ((blas_traits<DerivedA>::IsTransposed != DestIsTransposed) && (dest!=0 && dest==(Scalar*)extract_data(src.lhs())))
|| ((ei_blas_traits<DerivedB>::IsTransposed != DestIsTransposed) && (dest!=0 && dest==(Scalar*)ei_extract_data(src.rhs()))); || ((blas_traits<DerivedB>::IsTransposed != DestIsTransposed) && (dest!=0 && dest==(Scalar*)extract_data(src.rhs())));
} }
}; };
@@ -352,16 +389,16 @@ struct ei_check_transpose_aliasing_run_time_selector<Scalar,DestIsTransposed,Cwi
template<typename Derived, typename OtherDerived, template<typename Derived, typename OtherDerived,
bool MightHaveTransposeAliasing bool MightHaveTransposeAliasing
= ei_check_transpose_aliasing_compile_time_selector = check_transpose_aliasing_compile_time_selector
<ei_blas_traits<Derived>::IsTransposed,OtherDerived>::ret <blas_traits<Derived>::IsTransposed,OtherDerived>::ret
> >
struct checkTransposeAliasing_impl struct checkTransposeAliasing_impl
{ {
static void run(const Derived& dst, const OtherDerived& other) static void run(const Derived& dst, const OtherDerived& other)
{ {
ei_assert((!ei_check_transpose_aliasing_run_time_selector eigen_assert((!check_transpose_aliasing_run_time_selector
<typename Derived::Scalar,ei_blas_traits<Derived>::IsTransposed,OtherDerived> <typename Derived::Scalar,blas_traits<Derived>::IsTransposed,OtherDerived>
::run(ei_extract_data(dst), other)) ::run(extract_data(dst), other))
&& "aliasing detected during tranposition, use transposeInPlace() " && "aliasing detected during tranposition, use transposeInPlace() "
"or evaluate the rhs into a temporary using .eval()"); "or evaluate the rhs into a temporary using .eval()");
@@ -376,12 +413,13 @@ struct checkTransposeAliasing_impl<Derived, OtherDerived, false>
} }
}; };
} // end namespace internal
template<typename Derived> template<typename Derived>
template<typename OtherDerived> template<typename OtherDerived>
void DenseBase<Derived>::checkTransposeAliasing(const OtherDerived& other) const void DenseBase<Derived>::checkTransposeAliasing(const OtherDerived& other) const
{ {
checkTransposeAliasing_impl<Derived, OtherDerived>::run(derived(), other); internal::checkTransposeAliasing_impl<Derived, OtherDerived>::run(derived(), other);
} }
#endif #endif

View File

@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library // This file is part of Eigen, a lightweight C++ template library
// for linear algebra. // for linear algebra.
// //
// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr> // Copyright (C) 2010-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
// //
// Eigen is free software; you can redistribute it and/or // Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public // modify it under the terms of the GNU Lesser General Public
@@ -26,6 +26,7 @@
#define EIGEN_TRANSPOSITIONS_H #define EIGEN_TRANSPOSITIONS_H
/** \class Transpositions /** \class Transpositions
* \ingroup Core_Module
* *
* \brief Represents a sequence of transpositions (row/column interchange) * \brief Represents a sequence of transpositions (row/column interchange)
* *
@@ -52,90 +53,75 @@
* *
* \sa class PermutationMatrix * \sa class PermutationMatrix
*/ */
template<typename TranspositionType, typename MatrixType, int Side, bool Transposed=false> struct ei_transposition_matrix_product_retval;
template<int SizeAtCompileTime, int MaxSizeAtCompileTime> namespace internal {
class Transpositions template<typename TranspositionType, typename MatrixType, int Side, bool Transposed=false> struct transposition_matrix_product_retval;
}
template<typename Derived>
class TranspositionsBase
{ {
typedef internal::traits<Derived> Traits;
public: public:
typedef Matrix<DenseIndex, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType; typedef typename Traits::IndicesType IndicesType;
typedef typename IndicesType::Index Index; typedef typename IndicesType::Scalar Index;
inline Transpositions() {} Derived& derived() { return *static_cast<Derived*>(this); }
const Derived& derived() const { return *static_cast<const Derived*>(this); }
/** Copy constructor. */
template<int OtherSize, int OtherMaxSize>
inline Transpositions(const Transpositions<OtherSize, OtherMaxSize>& other)
: m_indices(other.indices()) {}
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** Standard copy constructor. Defined only to prevent a default copy constructor
* from hiding the other templated constructor */
inline Transpositions(const Transpositions& other) : m_indices(other.indices()) {}
#endif
/** Generic constructor from expression of the transposition indices. */
template<typename Other>
explicit inline Transpositions(const MatrixBase<Other>& indices) : m_indices(indices)
{}
/** Copies the \a other transpositions into \c *this */ /** Copies the \a other transpositions into \c *this */
template<int OtherSize, int OtherMaxSize> template<typename OtherDerived>
Transpositions& operator=(const Transpositions<OtherSize, OtherMaxSize>& other) Derived& operator=(const TranspositionsBase<OtherDerived>& other)
{ {
m_indices = other.indices(); indices() = other.indices();
return *this; return derived();
} }
#ifndef EIGEN_PARSED_BY_DOXYGEN #ifndef EIGEN_PARSED_BY_DOXYGEN
/** This is a special case of the templated operator=. Its purpose is to /** This is a special case of the templated operator=. Its purpose is to
* prevent a default operator= from hiding the templated operator=. * prevent a default operator= from hiding the templated operator=.
*/ */
Transpositions& operator=(const Transpositions& other) Derived& operator=(const TranspositionsBase& other)
{ {
m_indices = other.m_indices; indices() = other.indices();
return *this; return derived();
} }
#endif #endif
/** Constructs an uninitialized permutation matrix of given size.
*/
inline Transpositions(Index size) : m_indices(size)
{}
/** \returns the number of transpositions */ /** \returns the number of transpositions */
inline Index size() const { return m_indices.size(); } inline Index size() const { return indices().size(); }
/** Direct access to the underlying index vector */ /** Direct access to the underlying index vector */
inline const Index& coeff(Index i) const { return m_indices.coeff(i); } inline const Index& coeff(Index i) const { return indices().coeff(i); }
/** Direct access to the underlying index vector */ /** Direct access to the underlying index vector */
inline Index& coeffRef(Index i) { return m_indices.coeffRef(i); } inline Index& coeffRef(Index i) { return indices().coeffRef(i); }
/** Direct access to the underlying index vector */ /** Direct access to the underlying index vector */
inline const Index& operator()(Index i) const { return m_indices(i); } inline const Index& operator()(Index i) const { return indices()(i); }
/** Direct access to the underlying index vector */ /** Direct access to the underlying index vector */
inline Index& operator()(Index i) { return m_indices(i); } inline Index& operator()(Index i) { return indices()(i); }
/** Direct access to the underlying index vector */ /** Direct access to the underlying index vector */
inline const Index& operator[](Index i) const { return m_indices(i); } inline const Index& operator[](Index i) const { return indices()(i); }
/** Direct access to the underlying index vector */ /** Direct access to the underlying index vector */
inline Index& operator[](Index i) { return m_indices(i); } inline Index& operator[](Index i) { return indices()(i); }
/** const version of indices(). */ /** const version of indices(). */
const IndicesType& indices() const { return m_indices; } const IndicesType& indices() const { return derived().indices(); }
/** \returns a reference to the stored array representing the transpositions. */ /** \returns a reference to the stored array representing the transpositions. */
IndicesType& indices() { return m_indices; } IndicesType& indices() { return derived().indices(); }
/** Resizes to given size. */ /** Resizes to given size. */
inline void resize(int size) inline void resize(int size)
{ {
m_indices.resize(size); indices().resize(size);
} }
/** Sets \c *this to represents an identity transformation */ /** Sets \c *this to represents an identity transformation */
void setIdentity() void setIdentity()
{ {
for(int i = 0; i < m_indices.size(); ++i) for(int i = 0; i < indices().size(); ++i)
m_indices.coeffRef(i) = i; coeffRef(i) = i;
} }
// FIXME: do we want such methods ? // FIXME: do we want such methods ?
@@ -160,69 +146,238 @@ class Transpositions
*/ */
/** \returns the inverse transformation */ /** \returns the inverse transformation */
inline Transpose<Transpositions> inverse() const inline Transpose<TranspositionsBase> inverse() const
{ return *this; } { return Transpose<TranspositionsBase>(derived()); }
/** \returns the tranpose transformation */ /** \returns the tranpose transformation */
inline Transpose<Transpositions> transpose() const inline Transpose<TranspositionsBase> transpose() const
{ return *this; } { return Transpose<TranspositionsBase>(derived()); }
#ifndef EIGEN_PARSED_BY_DOXYGEN protected:
template<int OtherSize, int OtherMaxSize> };
Transpositions(const Transpose<Transpositions<OtherSize,OtherMaxSize> >& other)
: m_indices(other.size()) namespace internal {
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType>
struct traits<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,IndexType> >
{
typedef IndexType Index;
typedef Matrix<Index, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType;
};
}
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType>
class Transpositions : public TranspositionsBase<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,IndexType> >
{
typedef internal::traits<Transpositions> Traits;
public:
typedef TranspositionsBase<Transpositions> Base;
typedef typename Traits::IndicesType IndicesType;
typedef typename IndicesType::Scalar Index;
inline Transpositions() {}
/** Copy constructor. */
template<typename OtherDerived>
inline Transpositions(const TranspositionsBase<OtherDerived>& other)
: m_indices(other.indices()) {}
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** Standard copy constructor. Defined only to prevent a default copy constructor
* from hiding the other templated constructor */
inline Transpositions(const Transpositions& other) : m_indices(other.indices()) {}
#endif
/** Generic constructor from expression of the transposition indices. */
template<typename Other>
explicit inline Transpositions(const MatrixBase<Other>& indices) : m_indices(indices)
{}
/** Copies the \a other transpositions into \c *this */
template<typename OtherDerived>
Transpositions& operator=(const TranspositionsBase<OtherDerived>& other)
{ {
Index n = size(); return Base::operator=(other);
Index j = size-1;
for(Index i=0; i<n;++i,--j)
m_indices.coeffRef(j) = other.nestedTranspositions().indices().coeff(i);
} }
#endif
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** This is a special case of the templated operator=. Its purpose is to
* prevent a default operator= from hiding the templated operator=.
*/
Transpositions& operator=(const Transpositions& other)
{
m_indices = other.m_indices;
return *this;
}
#endif
/** Constructs an uninitialized permutation matrix of given size.
*/
inline Transpositions(Index size) : m_indices(size)
{}
/** const version of indices(). */
const IndicesType& indices() const { return m_indices; }
/** \returns a reference to the stored array representing the transpositions. */
IndicesType& indices() { return m_indices; }
protected: protected:
IndicesType m_indices; IndicesType m_indices;
}; };
namespace internal {
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType, int _PacketAccess>
struct traits<Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,IndexType>,_PacketAccess> >
{
typedef IndexType Index;
typedef Map<const Matrix<Index,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1>, _PacketAccess> IndicesType;
};
}
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType, int PacketAccess>
class Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,IndexType>,PacketAccess>
: public TranspositionsBase<Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,IndexType>,PacketAccess> >
{
typedef internal::traits<Map> Traits;
public:
typedef TranspositionsBase<Map> Base;
typedef typename Traits::IndicesType IndicesType;
typedef typename IndicesType::Scalar Index;
inline Map(const Index* indices)
: m_indices(indices)
{}
inline Map(const Index* indices, Index size)
: m_indices(indices,size)
{}
/** Copies the \a other transpositions into \c *this */
template<typename OtherDerived>
Map& operator=(const TranspositionsBase<OtherDerived>& other)
{
return Base::operator=(other);
}
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** This is a special case of the templated operator=. Its purpose is to
* prevent a default operator= from hiding the templated operator=.
*/
Map& operator=(const Map& other)
{
m_indices = other.m_indices;
return *this;
}
#endif
/** const version of indices(). */
const IndicesType& indices() const { return m_indices; }
/** \returns a reference to the stored array representing the transpositions. */
IndicesType& indices() { return m_indices; }
protected:
IndicesType m_indices;
};
namespace internal {
template<typename _IndicesType>
struct traits<TranspositionsWrapper<_IndicesType> >
{
typedef typename _IndicesType::Scalar Index;
typedef _IndicesType IndicesType;
};
}
template<typename _IndicesType>
class TranspositionsWrapper
: public TranspositionsBase<TranspositionsWrapper<_IndicesType> >
{
typedef internal::traits<TranspositionsWrapper> Traits;
public:
typedef TranspositionsBase<TranspositionsWrapper> Base;
typedef typename Traits::IndicesType IndicesType;
typedef typename IndicesType::Scalar Index;
inline TranspositionsWrapper(IndicesType& indices)
: m_indices(indices)
{}
/** Copies the \a other transpositions into \c *this */
template<typename OtherDerived>
TranspositionsWrapper& operator=(const TranspositionsBase<OtherDerived>& other)
{
return Base::operator=(other);
}
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** This is a special case of the templated operator=. Its purpose is to
* prevent a default operator= from hiding the templated operator=.
*/
TranspositionsWrapper& operator=(const TranspositionsWrapper& other)
{
m_indices = other.m_indices;
return *this;
}
#endif
/** const version of indices(). */
const IndicesType& indices() const { return m_indices; }
/** \returns a reference to the stored array representing the transpositions. */
IndicesType& indices() { return m_indices; }
protected:
const typename IndicesType::Nested m_indices;
};
/** \returns the \a matrix with the \a transpositions applied to the columns. /** \returns the \a matrix with the \a transpositions applied to the columns.
*/ */
template<typename Derived, int SizeAtCompileTime, int MaxSizeAtCompileTime> template<typename Derived, typename TranspositionsDerived>
inline const ei_transposition_matrix_product_retval<Transpositions<SizeAtCompileTime, MaxSizeAtCompileTime>, Derived, OnTheRight> inline const internal::transposition_matrix_product_retval<TranspositionsDerived, Derived, OnTheRight>
operator*(const MatrixBase<Derived>& matrix, operator*(const MatrixBase<Derived>& matrix,
const Transpositions<SizeAtCompileTime, MaxSizeAtCompileTime> &transpositions) const TranspositionsBase<TranspositionsDerived> &transpositions)
{ {
return ei_transposition_matrix_product_retval return internal::transposition_matrix_product_retval
<Transpositions<SizeAtCompileTime, MaxSizeAtCompileTime>, Derived, OnTheRight> <TranspositionsDerived, Derived, OnTheRight>
(transpositions, matrix.derived()); (transpositions.derived(), matrix.derived());
} }
/** \returns the \a matrix with the \a transpositions applied to the rows. /** \returns the \a matrix with the \a transpositions applied to the rows.
*/ */
template<typename Derived, int SizeAtCompileTime, int MaxSizeAtCompileTime> template<typename Derived, typename TranspositionDerived>
inline const ei_transposition_matrix_product_retval inline const internal::transposition_matrix_product_retval
<Transpositions<SizeAtCompileTime, MaxSizeAtCompileTime>, Derived, OnTheLeft> <TranspositionDerived, Derived, OnTheLeft>
operator*(const Transpositions<SizeAtCompileTime, MaxSizeAtCompileTime> &transpositions, operator*(const TranspositionsBase<TranspositionDerived> &transpositions,
const MatrixBase<Derived>& matrix) const MatrixBase<Derived>& matrix)
{ {
return ei_transposition_matrix_product_retval return internal::transposition_matrix_product_retval
<Transpositions<SizeAtCompileTime, MaxSizeAtCompileTime>, Derived, OnTheLeft> <TranspositionDerived, Derived, OnTheLeft>
(transpositions, matrix.derived()); (transpositions.derived(), matrix.derived());
} }
namespace internal {
template<typename TranspositionType, typename MatrixType, int Side, bool Transposed> template<typename TranspositionType, typename MatrixType, int Side, bool Transposed>
struct ei_traits<ei_transposition_matrix_product_retval<TranspositionType, MatrixType, Side, Transposed> > struct traits<transposition_matrix_product_retval<TranspositionType, MatrixType, Side, Transposed> >
{ {
typedef typename MatrixType::PlainObject ReturnType; typedef typename MatrixType::PlainObject ReturnType;
}; };
template<typename TranspositionType, typename MatrixType, int Side, bool Transposed> template<typename TranspositionType, typename MatrixType, int Side, bool Transposed>
struct ei_transposition_matrix_product_retval struct transposition_matrix_product_retval
: public ReturnByValue<ei_transposition_matrix_product_retval<TranspositionType, MatrixType, Side, Transposed> > : public ReturnByValue<transposition_matrix_product_retval<TranspositionType, MatrixType, Side, Transposed> >
{ {
typedef typename ei_cleantype<typename MatrixType::Nested>::type MatrixTypeNestedCleaned; typedef typename remove_all<typename MatrixType::Nested>::type MatrixTypeNestedCleaned;
typedef typename TranspositionType::Index Index; typedef typename TranspositionType::Index Index;
ei_transposition_matrix_product_retval(const TranspositionType& tr, const MatrixType& matrix) transposition_matrix_product_retval(const TranspositionType& tr, const MatrixType& matrix)
: m_transpositions(tr), m_matrix(matrix) : m_transpositions(tr), m_matrix(matrix)
{} {}
@@ -234,7 +389,7 @@ struct ei_transposition_matrix_product_retval
const int size = m_transpositions.size(); const int size = m_transpositions.size();
Index j = 0; Index j = 0;
if(!(ei_is_same_type<MatrixTypeNestedCleaned,Dest>::ret && ei_extract_data(dst) == ei_extract_data(m_matrix))) if(!(is_same<MatrixTypeNestedCleaned,Dest>::value && extract_data(dst) == extract_data(m_matrix)))
dst = m_matrix; dst = m_matrix;
for(int k=(Transposed?size-1:0) ; Transposed?k>=0:k<size ; Transposed?--k:++k) for(int k=(Transposed?size-1:0) ; Transposed?k>=0:k<size ; Transposed?--k:++k)
@@ -252,12 +407,14 @@ struct ei_transposition_matrix_product_retval
const typename MatrixType::Nested m_matrix; const typename MatrixType::Nested m_matrix;
}; };
} // end namespace internal
/* Template partial specialization for transposed/inverse transpositions */ /* Template partial specialization for transposed/inverse transpositions */
template<int SizeAtCompileTime, int MaxSizeAtCompileTime> template<typename TranspositionsDerived>
class Transpose<Transpositions<SizeAtCompileTime, MaxSizeAtCompileTime> > class Transpose<TranspositionsBase<TranspositionsDerived> >
{ {
typedef Transpositions<SizeAtCompileTime, MaxSizeAtCompileTime> TranspositionType; typedef TranspositionsDerived TranspositionType;
typedef typename TranspositionType::IndicesType IndicesType; typedef typename TranspositionType::IndicesType IndicesType;
public: public:
@@ -268,23 +425,21 @@ class Transpose<Transpositions<SizeAtCompileTime, MaxSizeAtCompileTime> >
/** \returns the \a matrix with the inverse transpositions applied to the columns. /** \returns the \a matrix with the inverse transpositions applied to the columns.
*/ */
template<typename Derived> friend template<typename Derived> friend
inline const ei_transposition_matrix_product_retval<TranspositionType, Derived, OnTheRight, true> inline const internal::transposition_matrix_product_retval<TranspositionType, Derived, OnTheRight, true>
operator*(const MatrixBase<Derived>& matrix, const Transpose& trt) operator*(const MatrixBase<Derived>& matrix, const Transpose& trt)
{ {
return ei_transposition_matrix_product_retval<TranspositionType, Derived, OnTheRight, true>(trt.m_transpositions, matrix.derived()); return internal::transposition_matrix_product_retval<TranspositionType, Derived, OnTheRight, true>(trt.m_transpositions, matrix.derived());
} }
/** \returns the \a matrix with the inverse transpositions applied to the rows. /** \returns the \a matrix with the inverse transpositions applied to the rows.
*/ */
template<typename Derived> template<typename Derived>
inline const ei_transposition_matrix_product_retval<TranspositionType, Derived, OnTheLeft, true> inline const internal::transposition_matrix_product_retval<TranspositionType, Derived, OnTheLeft, true>
operator*(const MatrixBase<Derived>& matrix) const operator*(const MatrixBase<Derived>& matrix) const
{ {
return ei_transposition_matrix_product_retval<TranspositionType, Derived, OnTheLeft, true>(m_transpositions, matrix.derived()); return internal::transposition_matrix_product_retval<TranspositionType, Derived, OnTheLeft, true>(m_transpositions, matrix.derived());
} }
const TranspositionType& nestedTranspositions() const { return m_transpositions; }
protected: protected:
const TranspositionType& m_transpositions; const TranspositionType& m_transpositions;
}; };

View File

@@ -26,9 +26,16 @@
#ifndef EIGEN_TRIANGULARMATRIX_H #ifndef EIGEN_TRIANGULARMATRIX_H
#define EIGEN_TRIANGULARMATRIX_H #define EIGEN_TRIANGULARMATRIX_H
namespace internal {
template<int Side, typename TriangularType, typename Rhs> struct triangular_solve_retval;
}
/** \internal /** \internal
* *
* \class TriangularBase * \class TriangularBase
* \ingroup Core_Module
* *
* \brief Base class for triangular part in a matrix * \brief Base class for triangular part in a matrix
*/ */
@@ -37,18 +44,20 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
public: public:
enum { enum {
Mode = ei_traits<Derived>::Mode, Mode = internal::traits<Derived>::Mode,
CoeffReadCost = ei_traits<Derived>::CoeffReadCost, CoeffReadCost = internal::traits<Derived>::CoeffReadCost,
RowsAtCompileTime = ei_traits<Derived>::RowsAtCompileTime, RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,
ColsAtCompileTime = ei_traits<Derived>::ColsAtCompileTime, ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
MaxRowsAtCompileTime = ei_traits<Derived>::MaxRowsAtCompileTime, MaxRowsAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime,
MaxColsAtCompileTime = ei_traits<Derived>::MaxColsAtCompileTime MaxColsAtCompileTime = internal::traits<Derived>::MaxColsAtCompileTime
}; };
typedef typename ei_traits<Derived>::Scalar Scalar; typedef typename internal::traits<Derived>::Scalar Scalar;
typedef typename ei_traits<Derived>::StorageKind StorageKind; typedef typename internal::traits<Derived>::StorageKind StorageKind;
typedef typename ei_traits<Derived>::Index Index; typedef typename internal::traits<Derived>::Index Index;
typedef typename internal::traits<Derived>::DenseMatrixType DenseMatrixType;
typedef DenseMatrixType DenseType;
inline TriangularBase() { ei_assert(!((Mode&UnitDiag) && (Mode&ZeroDiag))); } inline TriangularBase() { eigen_assert(!((Mode&UnitDiag) && (Mode&ZeroDiag))); }
inline Index rows() const { return derived().rows(); } inline Index rows() const { return derived().rows(); }
inline Index cols() const { return derived().cols(); } inline Index cols() const { return derived().cols(); }
@@ -87,31 +96,40 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
template<typename DenseDerived> template<typename DenseDerived>
void evalToLazy(MatrixBase<DenseDerived> &other) const; void evalToLazy(MatrixBase<DenseDerived> &other) const;
DenseMatrixType toDenseMatrix() const
{
DenseMatrixType res(rows(), cols());
evalToLazy(res);
return res;
}
protected: protected:
void check_coordinates(Index row, Index col) void check_coordinates(Index row, Index col) const
{ {
EIGEN_ONLY_USED_FOR_DEBUG(row); EIGEN_ONLY_USED_FOR_DEBUG(row);
EIGEN_ONLY_USED_FOR_DEBUG(col); EIGEN_ONLY_USED_FOR_DEBUG(col);
ei_assert(col>=0 && col<cols() && row>=0 && row<rows()); eigen_assert(col>=0 && col<cols() && row>=0 && row<rows());
ei_assert( (Mode==Upper && col>=row) const int mode = int(Mode) & ~SelfAdjoint;
|| (Mode==Lower && col<=row) eigen_assert((mode==Upper && col>=row)
|| ((Mode==StrictlyUpper || Mode==UnitUpper) && col>row) || (mode==Lower && col<=row)
|| ((Mode==StrictlyLower || Mode==UnitLower) && col<row)); || ((mode==StrictlyUpper || mode==UnitUpper) && col>row)
|| ((mode==StrictlyLower || mode==UnitLower) && col<row));
} }
#ifdef EIGEN_INTERNAL_DEBUGGING #ifdef EIGEN_INTERNAL_DEBUGGING
void check_coordinates_internal(Index row, Index col) void check_coordinates_internal(Index row, Index col) const
{ {
check_coordinates(row, col); check_coordinates(row, col);
} }
#else #else
void check_coordinates_internal(Index , Index ) {} void check_coordinates_internal(Index , Index ) const {}
#endif #endif
}; };
/** \class TriangularView /** \class TriangularView
* \ingroup Core_Module
* *
* \brief Base class for triangular part in a matrix * \brief Base class for triangular part in a matrix
* *
@@ -127,18 +145,22 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
* *
* \sa MatrixBase::triangularView() * \sa MatrixBase::triangularView()
*/ */
namespace internal {
template<typename MatrixType, unsigned int _Mode> template<typename MatrixType, unsigned int _Mode>
struct ei_traits<TriangularView<MatrixType, _Mode> > : ei_traits<MatrixType> struct traits<TriangularView<MatrixType, _Mode> > : traits<MatrixType>
{ {
typedef typename ei_nested<MatrixType>::type MatrixTypeNested; typedef typename nested<MatrixType>::type MatrixTypeNested;
typedef typename ei_unref<MatrixTypeNested>::type _MatrixTypeNested; typedef typename remove_reference<MatrixTypeNested>::type MatrixTypeNestedNonRef;
typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
typedef MatrixType ExpressionType; typedef MatrixType ExpressionType;
typedef typename MatrixType::PlainObject DenseMatrixType;
enum { enum {
Mode = _Mode, Mode = _Mode,
Flags = (_MatrixTypeNested::Flags & (HereditaryBits) & (~(PacketAccessBit | DirectAccessBit | LinearAccessBit))) | Mode, Flags = (MatrixTypeNestedCleaned::Flags & (HereditaryBits) & (~(PacketAccessBit | DirectAccessBit | LinearAccessBit))) | Mode,
CoeffReadCost = _MatrixTypeNested::CoeffReadCost CoeffReadCost = MatrixTypeNestedCleaned::CoeffReadCost
}; };
}; };
}
template<int Mode, bool LhsIsTriangular, template<int Mode, bool LhsIsTriangular,
typename Lhs, bool LhsIsVector, typename Lhs, bool LhsIsVector,
@@ -151,15 +173,25 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
public: public:
typedef TriangularBase<TriangularView> Base; typedef TriangularBase<TriangularView> Base;
typedef typename ei_traits<TriangularView>::Scalar Scalar; typedef typename internal::traits<TriangularView>::Scalar Scalar;
typedef _MatrixType MatrixType;
typedef typename MatrixType::PlainObject DenseMatrixType;
typedef typename MatrixType::Nested MatrixTypeNested;
typedef typename ei_cleantype<MatrixTypeNested>::type _MatrixTypeNested;
using Base::evalToLazy;
typedef typename ei_traits<TriangularView>::StorageKind StorageKind; typedef _MatrixType MatrixType;
typedef typename ei_traits<TriangularView>::Index Index; typedef typename internal::traits<TriangularView>::DenseMatrixType DenseMatrixType;
typedef DenseMatrixType PlainObject;
protected:
typedef typename internal::traits<TriangularView>::MatrixTypeNested MatrixTypeNested;
typedef typename internal::traits<TriangularView>::MatrixTypeNestedNonRef MatrixTypeNestedNonRef;
typedef typename internal::traits<TriangularView>::MatrixTypeNestedCleaned MatrixTypeNestedCleaned;
typedef typename internal::remove_all<typename MatrixType::ConjugateReturnType>::type MatrixConjugateReturnType;
public:
using Base::evalToLazy;
typedef typename internal::traits<TriangularView>::StorageKind StorageKind;
typedef typename internal::traits<TriangularView>::Index Index;
enum { enum {
Mode = _Mode, Mode = _Mode,
@@ -170,7 +202,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
}; };
inline TriangularView(const MatrixType& matrix) : m_matrix(matrix) inline TriangularView(const MatrixType& matrix) : m_matrix(matrix)
{ ei_assert(ei_are_flags_consistent<Mode>::ret); } {}
inline Index rows() const { return m_matrix.rows(); } inline Index rows() const { return m_matrix.rows(); }
inline Index cols() const { return m_matrix.cols(); } inline Index cols() const { return m_matrix.cols(); }
@@ -178,13 +210,13 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
inline Index innerStride() const { return m_matrix.innerStride(); } inline Index innerStride() const { return m_matrix.innerStride(); }
/** \sa MatrixBase::operator+=() */ /** \sa MatrixBase::operator+=() */
template<typename Other> TriangularView& operator+=(const Other& other) { return *this = m_matrix + other; } template<typename Other> TriangularView& operator+=(const DenseBase<Other>& other) { return *this = m_matrix + other.derived(); }
/** \sa MatrixBase::operator-=() */ /** \sa MatrixBase::operator-=() */
template<typename Other> TriangularView& operator-=(const Other& other) { return *this = m_matrix - other; } template<typename Other> TriangularView& operator-=(const DenseBase<Other>& other) { return *this = m_matrix - other.derived(); }
/** \sa MatrixBase::operator*=() */ /** \sa MatrixBase::operator*=() */
TriangularView& operator*=(const typename ei_traits<MatrixType>::Scalar& other) { return *this = m_matrix * other; } TriangularView& operator*=(const typename internal::traits<MatrixType>::Scalar& other) { return *this = m_matrix * other; }
/** \sa MatrixBase::operator/=() */ /** \sa MatrixBase::operator/=() */
TriangularView& operator/=(const typename ei_traits<MatrixType>::Scalar& other) { return *this = m_matrix / other; } TriangularView& operator/=(const typename internal::traits<MatrixType>::Scalar& other) { return *this = m_matrix / other; }
/** \sa MatrixBase::fill() */ /** \sa MatrixBase::fill() */
void fill(const Scalar& value) { setConstant(value); } void fill(const Scalar& value) { setConstant(value); }
@@ -214,8 +246,8 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
return m_matrix.const_cast_derived().coeffRef(row, col); return m_matrix.const_cast_derived().coeffRef(row, col);
} }
const MatrixType& nestedExpression() const { return m_matrix; } const MatrixTypeNestedCleaned& nestedExpression() const { return m_matrix; }
MatrixType& nestedExpression() { return const_cast<MatrixType&>(m_matrix); } MatrixTypeNestedCleaned& nestedExpression() { return *const_cast<MatrixTypeNestedCleaned*>(&m_matrix); }
/** Assigns a triangular matrix to a triangular part of a dense matrix */ /** Assigns a triangular matrix to a triangular part of a dense matrix */
template<typename OtherDerived> template<typename OtherDerived>
@@ -233,6 +265,12 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
template<typename OtherDerived> template<typename OtherDerived>
void lazyAssign(const MatrixBase<OtherDerived>& other); void lazyAssign(const MatrixBase<OtherDerived>& other);
/** \sa MatrixBase::conjugate() */
inline TriangularView<MatrixConjugateReturnType,Mode> conjugate()
{ return m_matrix.conjugate(); }
/** \sa MatrixBase::conjugate() const */
inline const TriangularView<MatrixConjugateReturnType,Mode> conjugate() const
{ return m_matrix.conjugate(); }
/** \sa MatrixBase::adjoint() */ /** \sa MatrixBase::adjoint() */
inline TriangularView<typename MatrixType::AdjointReturnType,TransposeMode> adjoint() inline TriangularView<typename MatrixType::AdjointReturnType,TransposeMode> adjoint()
@@ -243,18 +281,14 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
/** \sa MatrixBase::transpose() */ /** \sa MatrixBase::transpose() */
inline TriangularView<Transpose<MatrixType>,TransposeMode> transpose() inline TriangularView<Transpose<MatrixType>,TransposeMode> transpose()
{ return m_matrix.transpose(); } {
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
return m_matrix.const_cast_derived().transpose();
}
/** \sa MatrixBase::transpose() const */ /** \sa MatrixBase::transpose() const */
inline const TriangularView<Transpose<MatrixType>,TransposeMode> transpose() const inline const TriangularView<Transpose<MatrixType>,TransposeMode> transpose() const
{ return m_matrix.transpose(); } { return m_matrix.transpose(); }
DenseMatrixType toDenseMatrix() const
{
DenseMatrixType res(rows(), cols());
evalToLazy(res);
return res;
}
/** Efficient triangular matrix times vector/matrix product */ /** Efficient triangular matrix times vector/matrix product */
template<typename OtherDerived> template<typename OtherDerived>
TriangularProduct<Mode,true,MatrixType,false,OtherDerived,OtherDerived::IsVectorAtCompileTime> TriangularProduct<Mode,true,MatrixType,false,OtherDerived,OtherDerived::IsVectorAtCompileTime>
@@ -275,42 +309,70 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
(lhs.derived(),rhs.m_matrix); (lhs.derived(),rhs.m_matrix);
} }
#ifdef EIGEN2_SUPPORT
template<typename OtherDerived>
struct eigen2_product_return_type
{
typedef typename TriangularView<MatrixType,Mode>::DenseMatrixType DenseMatrixType;
typedef typename OtherDerived::PlainObject::DenseType OtherPlainObject;
typedef typename ProductReturnType<DenseMatrixType, OtherPlainObject>::Type ProdRetType;
typedef typename ProdRetType::PlainObject type;
};
template<typename OtherDerived>
const typename eigen2_product_return_type<OtherDerived>::type
operator*(const EigenBase<OtherDerived>& rhs) const
{
typename OtherDerived::PlainObject::DenseType rhsPlainObject;
rhs.evalTo(rhsPlainObject);
return this->toDenseMatrix() * rhsPlainObject;
}
template<typename OtherMatrixType>
bool isApprox(const TriangularView<OtherMatrixType, Mode>& other, typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) const
{
return this->toDenseMatrix().isApprox(other.toDenseMatrix(), precision);
}
template<typename OtherDerived>
bool isApprox(const MatrixBase<OtherDerived>& other, typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) const
{
return this->toDenseMatrix().isApprox(other, precision);
}
#endif // EIGEN2_SUPPORT
template<int Side, typename OtherDerived> template<int Side, typename Other>
typename ei_plain_matrix_type_column_major<OtherDerived>::type inline const internal::triangular_solve_retval<Side,TriangularView, Other>
solve(const MatrixBase<OtherDerived>& other) const; solve(const MatrixBase<Other>& other) const;
template<int Side, typename OtherDerived> template<int Side, typename OtherDerived>
void solveInPlace(const MatrixBase<OtherDerived>& other) const; void solveInPlace(const MatrixBase<OtherDerived>& other) const;
template<typename OtherDerived> template<typename Other>
typename ei_plain_matrix_type_column_major<OtherDerived>::type inline const internal::triangular_solve_retval<OnTheLeft,TriangularView, Other>
solve(const MatrixBase<OtherDerived>& other) const solve(const MatrixBase<Other>& other) const
{ return solve<OnTheLeft>(other); } { return solve<OnTheLeft>(other); }
template<typename OtherDerived> template<typename OtherDerived>
void solveInPlace(const MatrixBase<OtherDerived>& other) const void solveInPlace(const MatrixBase<OtherDerived>& other) const
{ return solveInPlace<OnTheLeft>(other); } { return solveInPlace<OnTheLeft>(other); }
const SelfAdjointView<_MatrixTypeNested,Mode> selfadjointView() const const SelfAdjointView<MatrixTypeNestedNonRef,Mode> selfadjointView() const
{ {
EIGEN_STATIC_ASSERT((Mode&UnitDiag)==0,PROGRAMMING_ERROR); EIGEN_STATIC_ASSERT((Mode&UnitDiag)==0,PROGRAMMING_ERROR);
return SelfAdjointView<_MatrixTypeNested,Mode>(m_matrix); return SelfAdjointView<MatrixTypeNestedNonRef,Mode>(m_matrix);
} }
SelfAdjointView<_MatrixTypeNested,Mode> selfadjointView() SelfAdjointView<MatrixTypeNestedNonRef,Mode> selfadjointView()
{ {
EIGEN_STATIC_ASSERT((Mode&UnitDiag)==0,PROGRAMMING_ERROR); EIGEN_STATIC_ASSERT((Mode&UnitDiag)==0,PROGRAMMING_ERROR);
return SelfAdjointView<_MatrixTypeNested,Mode>(m_matrix); return SelfAdjointView<MatrixTypeNestedNonRef,Mode>(m_matrix);
} }
template<typename OtherDerived> template<typename OtherDerived>
void swap(TriangularBase<OtherDerived> EIGEN_REF_TO_TEMPORARY other) void swap(TriangularBase<OtherDerived> const & other)
{ {
TriangularView<SwapWrapper<MatrixType>,Mode>(const_cast<MatrixType&>(m_matrix)).lazyAssign(other.derived()); TriangularView<SwapWrapper<MatrixType>,Mode>(const_cast<MatrixType&>(m_matrix)).lazyAssign(other.derived());
} }
template<typename OtherDerived> template<typename OtherDerived>
void swap(MatrixBase<OtherDerived> EIGEN_REF_TO_TEMPORARY other) void swap(MatrixBase<OtherDerived> const & other)
{ {
TriangularView<SwapWrapper<MatrixType>,Mode>(const_cast<MatrixType&>(m_matrix)).lazyAssign(other.derived()); TriangularView<SwapWrapper<MatrixType>,Mode>(const_cast<MatrixType&>(m_matrix)).lazyAssign(other.derived());
} }
@@ -324,8 +386,51 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
else else
return m_matrix.diagonal().prod(); return m_matrix.diagonal().prod();
} }
// TODO simplify the following:
template<typename ProductDerived, typename Lhs, typename Rhs>
EIGEN_STRONG_INLINE TriangularView& operator=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
{
setZero();
return assignProduct(other,1);
}
template<typename ProductDerived, typename Lhs, typename Rhs>
EIGEN_STRONG_INLINE TriangularView& operator+=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
{
return assignProduct(other,1);
}
template<typename ProductDerived, typename Lhs, typename Rhs>
EIGEN_STRONG_INLINE TriangularView& operator-=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
{
return assignProduct(other,-1);
}
template<typename ProductDerived>
EIGEN_STRONG_INLINE TriangularView& operator=(const ScaledProduct<ProductDerived>& other)
{
setZero();
return assignProduct(other,other.alpha());
}
template<typename ProductDerived>
EIGEN_STRONG_INLINE TriangularView& operator+=(const ScaledProduct<ProductDerived>& other)
{
return assignProduct(other,other.alpha());
}
template<typename ProductDerived>
EIGEN_STRONG_INLINE TriangularView& operator-=(const ScaledProduct<ProductDerived>& other)
{
return assignProduct(other,-other.alpha());
}
protected: protected:
template<typename ProductDerived, typename Lhs, typename Rhs>
EIGEN_STRONG_INLINE TriangularView& assignProduct(const ProductBase<ProductDerived, Lhs,Rhs>& prod, const Scalar& alpha);
const MatrixTypeNested m_matrix; const MatrixTypeNested m_matrix;
}; };
@@ -334,8 +439,10 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
* Implementation of triangular evaluation/assignment * Implementation of triangular evaluation/assignment
***************************************************************************/ ***************************************************************************/
namespace internal {
template<typename Derived1, typename Derived2, unsigned int Mode, int UnrollCount, bool ClearOpposite> template<typename Derived1, typename Derived2, unsigned int Mode, int UnrollCount, bool ClearOpposite>
struct ei_triangular_assignment_selector struct triangular_assignment_selector
{ {
enum { enum {
col = (UnrollCount-1) / Derived1::RowsAtCompileTime, col = (UnrollCount-1) / Derived1::RowsAtCompileTime,
@@ -344,9 +451,9 @@ struct ei_triangular_assignment_selector
inline static void run(Derived1 &dst, const Derived2 &src) inline static void run(Derived1 &dst, const Derived2 &src)
{ {
ei_triangular_assignment_selector<Derived1, Derived2, Mode, UnrollCount-1, ClearOpposite>::run(dst, src); triangular_assignment_selector<Derived1, Derived2, Mode, UnrollCount-1, ClearOpposite>::run(dst, src);
ei_assert( Mode == Upper || Mode == Lower eigen_assert( Mode == Upper || Mode == Lower
|| Mode == StrictlyUpper || Mode == StrictlyLower || Mode == StrictlyUpper || Mode == StrictlyLower
|| Mode == UnitUpper || Mode == UnitLower); || Mode == UnitUpper || Mode == UnitLower);
if((Mode == Upper && row <= col) if((Mode == Upper && row <= col)
@@ -368,13 +475,13 @@ struct ei_triangular_assignment_selector
// prevent buggy user code from causing an infinite recursion // prevent buggy user code from causing an infinite recursion
template<typename Derived1, typename Derived2, unsigned int Mode, bool ClearOpposite> template<typename Derived1, typename Derived2, unsigned int Mode, bool ClearOpposite>
struct ei_triangular_assignment_selector<Derived1, Derived2, Mode, 0, ClearOpposite> struct triangular_assignment_selector<Derived1, Derived2, Mode, 0, ClearOpposite>
{ {
inline static void run(Derived1 &, const Derived2 &) {} inline static void run(Derived1 &, const Derived2 &) {}
}; };
template<typename Derived1, typename Derived2, bool ClearOpposite> template<typename Derived1, typename Derived2, bool ClearOpposite>
struct ei_triangular_assignment_selector<Derived1, Derived2, Upper, Dynamic, ClearOpposite> struct triangular_assignment_selector<Derived1, Derived2, Upper, Dynamic, ClearOpposite>
{ {
typedef typename Derived1::Index Index; typedef typename Derived1::Index Index;
inline static void run(Derived1 &dst, const Derived2 &src) inline static void run(Derived1 &dst, const Derived2 &src)
@@ -392,7 +499,7 @@ struct ei_triangular_assignment_selector<Derived1, Derived2, Upper, Dynamic, Cle
}; };
template<typename Derived1, typename Derived2, bool ClearOpposite> template<typename Derived1, typename Derived2, bool ClearOpposite>
struct ei_triangular_assignment_selector<Derived1, Derived2, Lower, Dynamic, ClearOpposite> struct triangular_assignment_selector<Derived1, Derived2, Lower, Dynamic, ClearOpposite>
{ {
typedef typename Derived1::Index Index; typedef typename Derived1::Index Index;
inline static void run(Derived1 &dst, const Derived2 &src) inline static void run(Derived1 &dst, const Derived2 &src)
@@ -410,7 +517,7 @@ struct ei_triangular_assignment_selector<Derived1, Derived2, Lower, Dynamic, Cle
}; };
template<typename Derived1, typename Derived2, bool ClearOpposite> template<typename Derived1, typename Derived2, bool ClearOpposite>
struct ei_triangular_assignment_selector<Derived1, Derived2, StrictlyUpper, Dynamic, ClearOpposite> struct triangular_assignment_selector<Derived1, Derived2, StrictlyUpper, Dynamic, ClearOpposite>
{ {
typedef typename Derived1::Index Index; typedef typename Derived1::Index Index;
inline static void run(Derived1 &dst, const Derived2 &src) inline static void run(Derived1 &dst, const Derived2 &src)
@@ -428,7 +535,7 @@ struct ei_triangular_assignment_selector<Derived1, Derived2, StrictlyUpper, Dyna
}; };
template<typename Derived1, typename Derived2, bool ClearOpposite> template<typename Derived1, typename Derived2, bool ClearOpposite>
struct ei_triangular_assignment_selector<Derived1, Derived2, StrictlyLower, Dynamic, ClearOpposite> struct triangular_assignment_selector<Derived1, Derived2, StrictlyLower, Dynamic, ClearOpposite>
{ {
typedef typename Derived1::Index Index; typedef typename Derived1::Index Index;
inline static void run(Derived1 &dst, const Derived2 &src) inline static void run(Derived1 &dst, const Derived2 &src)
@@ -446,7 +553,7 @@ struct ei_triangular_assignment_selector<Derived1, Derived2, StrictlyLower, Dyna
}; };
template<typename Derived1, typename Derived2, bool ClearOpposite> template<typename Derived1, typename Derived2, bool ClearOpposite>
struct ei_triangular_assignment_selector<Derived1, Derived2, UnitUpper, Dynamic, ClearOpposite> struct triangular_assignment_selector<Derived1, Derived2, UnitUpper, Dynamic, ClearOpposite>
{ {
typedef typename Derived1::Index Index; typedef typename Derived1::Index Index;
inline static void run(Derived1 &dst, const Derived2 &src) inline static void run(Derived1 &dst, const Derived2 &src)
@@ -466,7 +573,7 @@ struct ei_triangular_assignment_selector<Derived1, Derived2, UnitUpper, Dynamic,
} }
}; };
template<typename Derived1, typename Derived2, bool ClearOpposite> template<typename Derived1, typename Derived2, bool ClearOpposite>
struct ei_triangular_assignment_selector<Derived1, Derived2, UnitLower, Dynamic, ClearOpposite> struct triangular_assignment_selector<Derived1, Derived2, UnitLower, Dynamic, ClearOpposite>
{ {
typedef typename Derived1::Index Index; typedef typename Derived1::Index Index;
inline static void run(Derived1 &dst, const Derived2 &src) inline static void run(Derived1 &dst, const Derived2 &src)
@@ -486,6 +593,8 @@ struct ei_triangular_assignment_selector<Derived1, Derived2, UnitLower, Dynamic,
} }
}; };
} // end namespace internal
// FIXME should we keep that possibility // FIXME should we keep that possibility
template<typename MatrixType, unsigned int Mode> template<typename MatrixType, unsigned int Mode>
template<typename OtherDerived> template<typename OtherDerived>
@@ -494,7 +603,7 @@ TriangularView<MatrixType, Mode>::operator=(const MatrixBase<OtherDerived>& othe
{ {
if(OtherDerived::Flags & EvalBeforeAssigningBit) if(OtherDerived::Flags & EvalBeforeAssigningBit)
{ {
typename ei_plain_matrix_type<OtherDerived>::type other_evaluated(other.rows(), other.cols()); typename internal::plain_matrix_type<OtherDerived>::type other_evaluated(other.rows(), other.cols());
other_evaluated.template triangularView<Mode>().lazyAssign(other.derived()); other_evaluated.template triangularView<Mode>().lazyAssign(other.derived());
lazyAssign(other_evaluated); lazyAssign(other_evaluated);
} }
@@ -510,12 +619,12 @@ void TriangularView<MatrixType, Mode>::lazyAssign(const MatrixBase<OtherDerived>
{ {
enum { enum {
unroll = MatrixType::SizeAtCompileTime != Dynamic unroll = MatrixType::SizeAtCompileTime != Dynamic
&& ei_traits<OtherDerived>::CoeffReadCost != Dynamic && internal::traits<OtherDerived>::CoeffReadCost != Dynamic
&& MatrixType::SizeAtCompileTime*ei_traits<OtherDerived>::CoeffReadCost/2 <= EIGEN_UNROLLING_LIMIT && MatrixType::SizeAtCompileTime*internal::traits<OtherDerived>::CoeffReadCost/2 <= EIGEN_UNROLLING_LIMIT
}; };
ei_assert(m_matrix.rows() == other.rows() && m_matrix.cols() == other.cols()); eigen_assert(m_matrix.rows() == other.rows() && m_matrix.cols() == other.cols());
ei_triangular_assignment_selector internal::triangular_assignment_selector
<MatrixType, OtherDerived, int(Mode), <MatrixType, OtherDerived, int(Mode),
unroll ? int(MatrixType::SizeAtCompileTime) : Dynamic, unroll ? int(MatrixType::SizeAtCompileTime) : Dynamic,
false // do not change the opposite triangular part false // do not change the opposite triangular part
@@ -529,8 +638,8 @@ template<typename OtherDerived>
inline TriangularView<MatrixType, Mode>& inline TriangularView<MatrixType, Mode>&
TriangularView<MatrixType, Mode>::operator=(const TriangularBase<OtherDerived>& other) TriangularView<MatrixType, Mode>::operator=(const TriangularBase<OtherDerived>& other)
{ {
ei_assert(Mode == int(OtherDerived::Mode)); eigen_assert(Mode == int(OtherDerived::Mode));
if(ei_traits<OtherDerived>::Flags & EvalBeforeAssigningBit) if(internal::traits<OtherDerived>::Flags & EvalBeforeAssigningBit)
{ {
typename OtherDerived::DenseMatrixType other_evaluated(other.rows(), other.cols()); typename OtherDerived::DenseMatrixType other_evaluated(other.rows(), other.cols());
other_evaluated.template triangularView<Mode>().lazyAssign(other.derived().nestedExpression()); other_evaluated.template triangularView<Mode>().lazyAssign(other.derived().nestedExpression());
@@ -547,13 +656,13 @@ void TriangularView<MatrixType, Mode>::lazyAssign(const TriangularBase<OtherDeri
{ {
enum { enum {
unroll = MatrixType::SizeAtCompileTime != Dynamic unroll = MatrixType::SizeAtCompileTime != Dynamic
&& ei_traits<OtherDerived>::CoeffReadCost != Dynamic && internal::traits<OtherDerived>::CoeffReadCost != Dynamic
&& MatrixType::SizeAtCompileTime * ei_traits<OtherDerived>::CoeffReadCost / 2 && MatrixType::SizeAtCompileTime * internal::traits<OtherDerived>::CoeffReadCost / 2
<= EIGEN_UNROLLING_LIMIT <= EIGEN_UNROLLING_LIMIT
}; };
ei_assert(m_matrix.rows() == other.rows() && m_matrix.cols() == other.cols()); eigen_assert(m_matrix.rows() == other.rows() && m_matrix.cols() == other.cols());
ei_triangular_assignment_selector internal::triangular_assignment_selector
<MatrixType, OtherDerived, int(Mode), <MatrixType, OtherDerived, int(Mode),
unroll ? int(MatrixType::SizeAtCompileTime) : Dynamic, unroll ? int(MatrixType::SizeAtCompileTime) : Dynamic,
false // preserve the opposite triangular part false // preserve the opposite triangular part
@@ -570,9 +679,9 @@ template<typename Derived>
template<typename DenseDerived> template<typename DenseDerived>
void TriangularBase<Derived>::evalTo(MatrixBase<DenseDerived> &other) const void TriangularBase<Derived>::evalTo(MatrixBase<DenseDerived> &other) const
{ {
if(ei_traits<Derived>::Flags & EvalBeforeAssigningBit) if(internal::traits<Derived>::Flags & EvalBeforeAssigningBit)
{ {
typename ei_plain_matrix_type<Derived>::type other_evaluated(rows(), cols()); typename internal::plain_matrix_type<Derived>::type other_evaluated(rows(), cols());
evalToLazy(other_evaluated); evalToLazy(other_evaluated);
other.derived().swap(other_evaluated); other.derived().swap(other_evaluated);
} }
@@ -588,14 +697,14 @@ void TriangularBase<Derived>::evalToLazy(MatrixBase<DenseDerived> &other) const
{ {
enum { enum {
unroll = DenseDerived::SizeAtCompileTime != Dynamic unroll = DenseDerived::SizeAtCompileTime != Dynamic
&& ei_traits<Derived>::CoeffReadCost != Dynamic && internal::traits<Derived>::CoeffReadCost != Dynamic
&& DenseDerived::SizeAtCompileTime * ei_traits<Derived>::CoeffReadCost / 2 && DenseDerived::SizeAtCompileTime * internal::traits<Derived>::CoeffReadCost / 2
<= EIGEN_UNROLLING_LIMIT <= EIGEN_UNROLLING_LIMIT
}; };
ei_assert(this->rows() == other.rows() && this->cols() == other.cols()); other.derived().resize(this->rows(), this->cols());
ei_triangular_assignment_selector internal::triangular_assignment_selector
<DenseDerived, typename ei_traits<Derived>::ExpressionType, Derived::Mode, <DenseDerived, typename internal::traits<Derived>::MatrixTypeNestedCleaned, Derived::Mode,
unroll ? int(DenseDerived::SizeAtCompileTime) : Dynamic, unroll ? int(DenseDerived::SizeAtCompileTime) : Dynamic,
true // clear the opposite triangular part true // clear the opposite triangular part
>::run(other.derived(), derived().nestedExpression()); >::run(other.derived(), derived().nestedExpression());
@@ -609,10 +718,28 @@ void TriangularBase<Derived>::evalToLazy(MatrixBase<DenseDerived> &other) const
* Implementation of MatrixBase methods * Implementation of MatrixBase methods
***************************************************************************/ ***************************************************************************/
#ifdef EIGEN2_SUPPORT
// implementation of part<>(), including the SelfAdjoint case.
namespace internal {
template<typename MatrixType, unsigned int Mode>
struct eigen2_part_return_type
{
typedef TriangularView<MatrixType, Mode> type;
};
template<typename MatrixType>
struct eigen2_part_return_type<MatrixType, SelfAdjoint>
{
typedef SelfAdjointView<MatrixType, Upper> type;
};
}
/** \deprecated use MatrixBase::triangularView() */ /** \deprecated use MatrixBase::triangularView() */
template<typename Derived> template<typename Derived>
template<unsigned int Mode> template<unsigned int Mode>
EIGEN_DEPRECATED const TriangularView<Derived, Mode> MatrixBase<Derived>::part() const const typename internal::eigen2_part_return_type<Derived, Mode>::type MatrixBase<Derived>::part() const
{ {
return derived(); return derived();
} }
@@ -620,10 +747,11 @@ EIGEN_DEPRECATED const TriangularView<Derived, Mode> MatrixBase<Derived>::part()
/** \deprecated use MatrixBase::triangularView() */ /** \deprecated use MatrixBase::triangularView() */
template<typename Derived> template<typename Derived>
template<unsigned int Mode> template<unsigned int Mode>
EIGEN_DEPRECATED TriangularView<Derived, Mode> MatrixBase<Derived>::part() typename internal::eigen2_part_return_type<Derived, Mode>::type MatrixBase<Derived>::part()
{ {
return derived(); return derived();
} }
#endif
/** /**
* \returns an expression of a triangular view extracted from the current matrix * \returns an expression of a triangular view extracted from the current matrix
@@ -638,7 +766,8 @@ EIGEN_DEPRECATED TriangularView<Derived, Mode> MatrixBase<Derived>::part()
*/ */
template<typename Derived> template<typename Derived>
template<unsigned int Mode> template<unsigned int Mode>
TriangularView<Derived, Mode> MatrixBase<Derived>::triangularView() typename MatrixBase<Derived>::template TriangularViewReturnType<Mode>::Type
MatrixBase<Derived>::triangularView()
{ {
return derived(); return derived();
} }
@@ -646,7 +775,8 @@ TriangularView<Derived, Mode> MatrixBase<Derived>::triangularView()
/** This is the const version of MatrixBase::triangularView() */ /** This is the const version of MatrixBase::triangularView() */
template<typename Derived> template<typename Derived>
template<unsigned int Mode> template<unsigned int Mode>
const TriangularView<Derived, Mode> MatrixBase<Derived>::triangularView() const typename MatrixBase<Derived>::template ConstTriangularViewReturnType<Mode>::Type
MatrixBase<Derived>::triangularView() const
{ {
return derived(); return derived();
} }
@@ -654,7 +784,7 @@ const TriangularView<Derived, Mode> MatrixBase<Derived>::triangularView() const
/** \returns true if *this is approximately equal to an upper triangular matrix, /** \returns true if *this is approximately equal to an upper triangular matrix,
* within the precision given by \a prec. * within the precision given by \a prec.
* *
* \sa isLowerTriangular(), extract(), part(), marked() * \sa isLowerTriangular()
*/ */
template<typename Derived> template<typename Derived>
bool MatrixBase<Derived>::isUpperTriangular(RealScalar prec) const bool MatrixBase<Derived>::isUpperTriangular(RealScalar prec) const
@@ -665,21 +795,21 @@ bool MatrixBase<Derived>::isUpperTriangular(RealScalar prec) const
Index maxi = std::min(j, rows()-1); Index maxi = std::min(j, rows()-1);
for(Index i = 0; i <= maxi; ++i) for(Index i = 0; i <= maxi; ++i)
{ {
RealScalar absValue = ei_abs(coeff(i,j)); RealScalar absValue = internal::abs(coeff(i,j));
if(absValue > maxAbsOnUpperPart) maxAbsOnUpperPart = absValue; if(absValue > maxAbsOnUpperPart) maxAbsOnUpperPart = absValue;
} }
} }
RealScalar threshold = maxAbsOnUpperPart * prec; RealScalar threshold = maxAbsOnUpperPart * prec;
for(Index j = 0; j < cols(); ++j) for(Index j = 0; j < cols(); ++j)
for(Index i = j+1; i < rows(); ++i) for(Index i = j+1; i < rows(); ++i)
if(ei_abs(coeff(i, j)) > threshold) return false; if(internal::abs(coeff(i, j)) > threshold) return false;
return true; return true;
} }
/** \returns true if *this is approximately equal to a lower triangular matrix, /** \returns true if *this is approximately equal to a lower triangular matrix,
* within the precision given by \a prec. * within the precision given by \a prec.
* *
* \sa isUpperTriangular(), extract(), part(), marked() * \sa isUpperTriangular()
*/ */
template<typename Derived> template<typename Derived>
bool MatrixBase<Derived>::isLowerTriangular(RealScalar prec) const bool MatrixBase<Derived>::isLowerTriangular(RealScalar prec) const
@@ -688,7 +818,7 @@ bool MatrixBase<Derived>::isLowerTriangular(RealScalar prec) const
for(Index j = 0; j < cols(); ++j) for(Index j = 0; j < cols(); ++j)
for(Index i = j; i < rows(); ++i) for(Index i = j; i < rows(); ++i)
{ {
RealScalar absValue = ei_abs(coeff(i,j)); RealScalar absValue = internal::abs(coeff(i,j));
if(absValue > maxAbsOnLowerPart) maxAbsOnLowerPart = absValue; if(absValue > maxAbsOnLowerPart) maxAbsOnLowerPart = absValue;
} }
RealScalar threshold = maxAbsOnLowerPart * prec; RealScalar threshold = maxAbsOnLowerPart * prec;
@@ -696,7 +826,7 @@ bool MatrixBase<Derived>::isLowerTriangular(RealScalar prec) const
{ {
Index maxi = std::min(j, rows()-1); Index maxi = std::min(j, rows()-1);
for(Index i = 0; i < maxi; ++i) for(Index i = 0; i < maxi; ++i)
if(ei_abs(coeff(i, j)) > threshold) return false; if(internal::abs(coeff(i, j)) > threshold) return false;
} }
return true; return true;
} }

View File

@@ -27,6 +27,7 @@
#define EIGEN_VECTORBLOCK_H #define EIGEN_VECTORBLOCK_H
/** \class VectorBlock /** \class VectorBlock
* \ingroup Core_Module
* *
* \brief Expression of a fixed-size or dynamic-size sub-vector * \brief Expression of a fixed-size or dynamic-size sub-vector
* *
@@ -55,24 +56,27 @@
* *
* \sa class Block, DenseBase::segment(Index,Index,Index,Index), DenseBase::segment(Index,Index) * \sa class Block, DenseBase::segment(Index,Index,Index,Index), DenseBase::segment(Index,Index)
*/ */
namespace internal {
template<typename VectorType, int Size> template<typename VectorType, int Size>
struct ei_traits<VectorBlock<VectorType, Size> > struct traits<VectorBlock<VectorType, Size> >
: public ei_traits<Block<VectorType, : public traits<Block<VectorType,
ei_traits<VectorType>::Flags & RowMajorBit ? 1 : Size, traits<VectorType>::Flags & RowMajorBit ? 1 : Size,
ei_traits<VectorType>::Flags & RowMajorBit ? Size : 1> > traits<VectorType>::Flags & RowMajorBit ? Size : 1> >
{ {
}; };
}
template<typename VectorType, int Size> class VectorBlock template<typename VectorType, int Size> class VectorBlock
: public Block<VectorType, : public Block<VectorType,
ei_traits<VectorType>::Flags & RowMajorBit ? 1 : Size, internal::traits<VectorType>::Flags & RowMajorBit ? 1 : Size,
ei_traits<VectorType>::Flags & RowMajorBit ? Size : 1> internal::traits<VectorType>::Flags & RowMajorBit ? Size : 1>
{ {
typedef Block<VectorType, typedef Block<VectorType,
ei_traits<VectorType>::Flags & RowMajorBit ? 1 : Size, internal::traits<VectorType>::Flags & RowMajorBit ? 1 : Size,
ei_traits<VectorType>::Flags & RowMajorBit ? Size : 1> Base; internal::traits<VectorType>::Flags & RowMajorBit ? Size : 1> Base;
enum { enum {
IsColVector = !(ei_traits<VectorType>::Flags & RowMajorBit) IsColVector = !(internal::traits<VectorType>::Flags & RowMajorBit)
}; };
public: public:
EIGEN_DENSE_PUBLIC_INTERFACE(VectorBlock) EIGEN_DENSE_PUBLIC_INTERFACE(VectorBlock)
@@ -81,7 +85,7 @@ template<typename VectorType, int Size> class VectorBlock
/** Dynamic-size constructor /** Dynamic-size constructor
*/ */
inline VectorBlock(const VectorType& vector, Index start, Index size) inline VectorBlock(VectorType& vector, Index start, Index size)
: Base(vector, : Base(vector,
IsColVector ? start : 0, IsColVector ? 0 : start, IsColVector ? start : 0, IsColVector ? 0 : start,
IsColVector ? size : 1, IsColVector ? 1 : size) IsColVector ? size : 1, IsColVector ? 1 : size)
@@ -91,7 +95,7 @@ template<typename VectorType, int Size> class VectorBlock
/** Fixed-size constructor /** Fixed-size constructor
*/ */
inline VectorBlock(const VectorType& vector, Index start) inline VectorBlock(VectorType& vector, Index start)
: Base(vector, IsColVector ? start : 0, IsColVector ? 0 : start) : Base(vector, IsColVector ? start : 0, IsColVector ? 0 : start)
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorBlock); EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorBlock);
@@ -116,20 +120,20 @@ template<typename VectorType, int Size> class VectorBlock
* \sa class Block, segment(Index) * \sa class Block, segment(Index)
*/ */
template<typename Derived> template<typename Derived>
inline VectorBlock<Derived> DenseBase<Derived> inline typename DenseBase<Derived>::SegmentReturnType
::segment(Index start, Index size) DenseBase<Derived>::segment(Index start, Index size)
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return VectorBlock<Derived>(derived(), start, size); return SegmentReturnType(derived(), start, size);
} }
/** This is the const version of segment(Index,Index).*/ /** This is the const version of segment(Index,Index).*/
template<typename Derived> template<typename Derived>
inline const VectorBlock<Derived> inline typename DenseBase<Derived>::ConstSegmentReturnType
DenseBase<Derived>::segment(Index start, Index size) const DenseBase<Derived>::segment(Index start, Index size) const
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return VectorBlock<Derived>(derived(), start, size); return ConstSegmentReturnType(derived(), start, size);
} }
/** \returns a dynamic-size expression of the first coefficients of *this. /** \returns a dynamic-size expression of the first coefficients of *this.
@@ -148,20 +152,20 @@ DenseBase<Derived>::segment(Index start, Index size) const
* \sa class Block, block(Index,Index) * \sa class Block, block(Index,Index)
*/ */
template<typename Derived> template<typename Derived>
inline VectorBlock<Derived> inline typename DenseBase<Derived>::SegmentReturnType
DenseBase<Derived>::head(Index size) DenseBase<Derived>::head(Index size)
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return VectorBlock<Derived>(derived(), 0, size); return SegmentReturnType(derived(), 0, size);
} }
/** This is the const version of head(Index).*/ /** This is the const version of head(Index).*/
template<typename Derived> template<typename Derived>
inline const VectorBlock<Derived> inline typename DenseBase<Derived>::ConstSegmentReturnType
DenseBase<Derived>::head(Index size) const DenseBase<Derived>::head(Index size) const
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return VectorBlock<Derived>(derived(), 0, size); return ConstSegmentReturnType(derived(), 0, size);
} }
/** \returns a dynamic-size expression of the last coefficients of *this. /** \returns a dynamic-size expression of the last coefficients of *this.
@@ -180,20 +184,20 @@ DenseBase<Derived>::head(Index size) const
* \sa class Block, block(Index,Index) * \sa class Block, block(Index,Index)
*/ */
template<typename Derived> template<typename Derived>
inline VectorBlock<Derived> inline typename DenseBase<Derived>::SegmentReturnType
DenseBase<Derived>::tail(Index size) DenseBase<Derived>::tail(Index size)
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return VectorBlock<Derived>(derived(), this->size() - size, size); return SegmentReturnType(derived(), this->size() - size, size);
} }
/** This is the const version of tail(Index).*/ /** This is the const version of tail(Index).*/
template<typename Derived> template<typename Derived>
inline const VectorBlock<Derived> inline typename DenseBase<Derived>::ConstSegmentReturnType
DenseBase<Derived>::tail(Index size) const DenseBase<Derived>::tail(Index size) const
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return VectorBlock<Derived>(derived(), this->size() - size, size); return ConstSegmentReturnType(derived(), this->size() - size, size);
} }
/** \returns a fixed-size expression of a segment (i.e. a vector block) in \c *this /** \returns a fixed-size expression of a segment (i.e. a vector block) in \c *this
@@ -211,21 +215,21 @@ DenseBase<Derived>::tail(Index size) const
*/ */
template<typename Derived> template<typename Derived>
template<int Size> template<int Size>
inline VectorBlock<Derived,Size> inline typename DenseBase<Derived>::template FixedSegmentReturnType<Size>::Type
DenseBase<Derived>::segment(Index start) DenseBase<Derived>::segment(Index start)
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return VectorBlock<Derived,Size>(derived(), start); return typename FixedSegmentReturnType<Size>::Type(derived(), start);
} }
/** This is the const version of segment<int>(Index).*/ /** This is the const version of segment<int>(Index).*/
template<typename Derived> template<typename Derived>
template<int Size> template<int Size>
inline const VectorBlock<Derived,Size> inline typename DenseBase<Derived>::template ConstFixedSegmentReturnType<Size>::Type
DenseBase<Derived>::segment(Index start) const DenseBase<Derived>::segment(Index start) const
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return VectorBlock<Derived,Size>(derived(), start); return typename ConstFixedSegmentReturnType<Size>::Type(derived(), start);
} }
/** \returns a fixed-size expression of the first coefficients of *this. /** \returns a fixed-size expression of the first coefficients of *this.
@@ -241,21 +245,21 @@ DenseBase<Derived>::segment(Index start) const
*/ */
template<typename Derived> template<typename Derived>
template<int Size> template<int Size>
inline VectorBlock<Derived,Size> inline typename DenseBase<Derived>::template FixedSegmentReturnType<Size>::Type
DenseBase<Derived>::head() DenseBase<Derived>::head()
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return VectorBlock<Derived,Size>(derived(), 0); return typename FixedSegmentReturnType<Size>::Type(derived(), 0);
} }
/** This is the const version of head<int>().*/ /** This is the const version of head<int>().*/
template<typename Derived> template<typename Derived>
template<int Size> template<int Size>
inline const VectorBlock<Derived,Size> inline typename DenseBase<Derived>::template ConstFixedSegmentReturnType<Size>::Type
DenseBase<Derived>::head() const DenseBase<Derived>::head() const
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return VectorBlock<Derived,Size>(derived(), 0); return typename ConstFixedSegmentReturnType<Size>::Type(derived(), 0);
} }
/** \returns a fixed-size expression of the last coefficients of *this. /** \returns a fixed-size expression of the last coefficients of *this.
@@ -271,21 +275,21 @@ DenseBase<Derived>::head() const
*/ */
template<typename Derived> template<typename Derived>
template<int Size> template<int Size>
inline VectorBlock<Derived,Size> inline typename DenseBase<Derived>::template FixedSegmentReturnType<Size>::Type
DenseBase<Derived>::tail() DenseBase<Derived>::tail()
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return VectorBlock<Derived, Size>(derived(), size() - Size); return typename FixedSegmentReturnType<Size>::Type(derived(), size() - Size);
} }
/** This is the const version of tail<int>.*/ /** This is the const version of tail<int>.*/
template<typename Derived> template<typename Derived>
template<int Size> template<int Size>
inline const VectorBlock<Derived,Size> inline typename DenseBase<Derived>::template ConstFixedSegmentReturnType<Size>::Type
DenseBase<Derived>::tail() const DenseBase<Derived>::tail() const
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return VectorBlock<Derived, Size>(derived(), size() - Size); return typename ConstFixedSegmentReturnType<Size>::Type(derived(), size() - Size);
} }

View File

@@ -27,6 +27,7 @@
#define EIGEN_PARTIAL_REDUX_H #define EIGEN_PARTIAL_REDUX_H
/** \class PartialReduxExpr /** \class PartialReduxExpr
* \ingroup Core_Module
* *
* \brief Generic expression of a partially reduxed matrix * \brief Generic expression of a partially reduxed matrix
* *
@@ -44,16 +45,17 @@
template< typename MatrixType, typename MemberOp, int Direction> template< typename MatrixType, typename MemberOp, int Direction>
class PartialReduxExpr; class PartialReduxExpr;
namespace internal {
template<typename MatrixType, typename MemberOp, int Direction> template<typename MatrixType, typename MemberOp, int Direction>
struct ei_traits<PartialReduxExpr<MatrixType, MemberOp, Direction> > struct traits<PartialReduxExpr<MatrixType, MemberOp, Direction> >
: ei_traits<MatrixType> : traits<MatrixType>
{ {
typedef typename MemberOp::result_type Scalar; typedef typename MemberOp::result_type Scalar;
typedef typename ei_traits<MatrixType>::StorageKind StorageKind; typedef typename traits<MatrixType>::StorageKind StorageKind;
typedef typename ei_traits<MatrixType>::XprKind XprKind; typedef typename traits<MatrixType>::XprKind XprKind;
typedef typename MatrixType::Scalar InputScalar; typedef typename MatrixType::Scalar InputScalar;
typedef typename ei_nested<MatrixType>::type MatrixTypeNested; typedef typename nested<MatrixType>::type MatrixTypeNested;
typedef typename ei_cleantype<MatrixTypeNested>::type _MatrixTypeNested; typedef typename remove_all<MatrixTypeNested>::type _MatrixTypeNested;
enum { enum {
RowsAtCompileTime = Direction==Vertical ? 1 : MatrixType::RowsAtCompileTime, RowsAtCompileTime = Direction==Vertical ? 1 : MatrixType::RowsAtCompileTime,
ColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::ColsAtCompileTime, ColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::ColsAtCompileTime,
@@ -69,20 +71,21 @@ struct ei_traits<PartialReduxExpr<MatrixType, MemberOp, Direction> >
typedef typename MemberOp::template Cost<InputScalar,TraversalSize> CostOpType; typedef typename MemberOp::template Cost<InputScalar,TraversalSize> CostOpType;
#endif #endif
enum { enum {
CoeffReadCost = TraversalSize * ei_traits<_MatrixTypeNested>::CoeffReadCost + int(CostOpType::value) CoeffReadCost = TraversalSize * traits<_MatrixTypeNested>::CoeffReadCost + int(CostOpType::value)
}; };
}; };
}
template< typename MatrixType, typename MemberOp, int Direction> template< typename MatrixType, typename MemberOp, int Direction>
class PartialReduxExpr : ei_no_assignment_operator, class PartialReduxExpr : internal::no_assignment_operator,
public ei_dense_xpr_base< PartialReduxExpr<MatrixType, MemberOp, Direction> >::type public internal::dense_xpr_base< PartialReduxExpr<MatrixType, MemberOp, Direction> >::type
{ {
public: public:
typedef typename ei_dense_xpr_base<PartialReduxExpr>::type Base; typedef typename internal::dense_xpr_base<PartialReduxExpr>::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(PartialReduxExpr) EIGEN_DENSE_PUBLIC_INTERFACE(PartialReduxExpr)
typedef typename ei_traits<PartialReduxExpr>::MatrixTypeNested MatrixTypeNested; typedef typename internal::traits<PartialReduxExpr>::MatrixTypeNested MatrixTypeNested;
typedef typename ei_traits<PartialReduxExpr>::_MatrixTypeNested _MatrixTypeNested; typedef typename internal::traits<PartialReduxExpr>::_MatrixTypeNested _MatrixTypeNested;
PartialReduxExpr(const MatrixType& mat, const MemberOp& func = MemberOp()) PartialReduxExpr(const MatrixType& mat, const MemberOp& func = MemberOp())
: m_matrix(mat), m_functor(func) {} : m_matrix(mat), m_functor(func) {}
@@ -113,8 +116,8 @@ class PartialReduxExpr : ei_no_assignment_operator,
#define EIGEN_MEMBER_FUNCTOR(MEMBER,COST) \ #define EIGEN_MEMBER_FUNCTOR(MEMBER,COST) \
template <typename ResultType> \ template <typename ResultType> \
struct ei_member_##MEMBER { \ struct member_##MEMBER { \
EIGEN_EMPTY_STRUCT_CTOR(ei_member_##MEMBER) \ EIGEN_EMPTY_STRUCT_CTOR(member_##MEMBER) \
typedef ResultType result_type; \ typedef ResultType result_type; \
template<typename Scalar, int Size> struct Cost \ template<typename Scalar, int Size> struct Cost \
{ enum { value = COST }; }; \ { enum { value = COST }; }; \
@@ -123,11 +126,13 @@ class PartialReduxExpr : ei_no_assignment_operator,
{ return mat.MEMBER(); } \ { return mat.MEMBER(); } \
} }
namespace internal {
EIGEN_MEMBER_FUNCTOR(squaredNorm, Size * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost); EIGEN_MEMBER_FUNCTOR(squaredNorm, Size * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost);
EIGEN_MEMBER_FUNCTOR(norm, (Size+5) * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost); EIGEN_MEMBER_FUNCTOR(norm, (Size+5) * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost);
EIGEN_MEMBER_FUNCTOR(stableNorm, (Size+5) * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost); EIGEN_MEMBER_FUNCTOR(stableNorm, (Size+5) * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost);
EIGEN_MEMBER_FUNCTOR(blueNorm, (Size+5) * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost); EIGEN_MEMBER_FUNCTOR(blueNorm, (Size+5) * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost);
EIGEN_MEMBER_FUNCTOR(hypotNorm, (Size-1) * ei_functor_traits<ei_scalar_hypot_op<Scalar> >::Cost ); EIGEN_MEMBER_FUNCTOR(hypotNorm, (Size-1) * functor_traits<scalar_hypot_op<Scalar> >::Cost );
EIGEN_MEMBER_FUNCTOR(sum, (Size-1)*NumTraits<Scalar>::AddCost); EIGEN_MEMBER_FUNCTOR(sum, (Size-1)*NumTraits<Scalar>::AddCost);
EIGEN_MEMBER_FUNCTOR(mean, (Size-1)*NumTraits<Scalar>::AddCost + NumTraits<Scalar>::MulCost); EIGEN_MEMBER_FUNCTOR(mean, (Size-1)*NumTraits<Scalar>::AddCost + NumTraits<Scalar>::MulCost);
EIGEN_MEMBER_FUNCTOR(minCoeff, (Size-1)*NumTraits<Scalar>::AddCost); EIGEN_MEMBER_FUNCTOR(minCoeff, (Size-1)*NumTraits<Scalar>::AddCost);
@@ -138,22 +143,23 @@ EIGEN_MEMBER_FUNCTOR(count, (Size-1)*NumTraits<Scalar>::AddCost);
EIGEN_MEMBER_FUNCTOR(prod, (Size-1)*NumTraits<Scalar>::MulCost); EIGEN_MEMBER_FUNCTOR(prod, (Size-1)*NumTraits<Scalar>::MulCost);
/** \internal */
template <typename BinaryOp, typename Scalar> template <typename BinaryOp, typename Scalar>
struct ei_member_redux { struct member_redux {
typedef typename ei_result_of< typedef typename result_of<
BinaryOp(Scalar) BinaryOp(Scalar)
>::type result_type; >::type result_type;
template<typename _Scalar, int Size> struct Cost template<typename _Scalar, int Size> struct Cost
{ enum { value = (Size-1) * ei_functor_traits<BinaryOp>::Cost }; }; { enum { value = (Size-1) * functor_traits<BinaryOp>::Cost }; };
ei_member_redux(const BinaryOp func) : m_functor(func) {} member_redux(const BinaryOp func) : m_functor(func) {}
template<typename Derived> template<typename Derived>
inline result_type operator()(const DenseBase<Derived>& mat) const inline result_type operator()(const DenseBase<Derived>& mat) const
{ return mat.redux(m_functor); } { return mat.redux(m_functor); }
const BinaryOp m_functor; const BinaryOp m_functor;
}; };
}
/** \class VectorwiseOp /** \class VectorwiseOp
* \ingroup Core_Module
* *
* \brief Pseudo expression providing partial reduction operations * \brief Pseudo expression providing partial reduction operations
* *
@@ -176,11 +182,12 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
typedef typename ExpressionType::Scalar Scalar; typedef typename ExpressionType::Scalar Scalar;
typedef typename ExpressionType::RealScalar RealScalar; typedef typename ExpressionType::RealScalar RealScalar;
typedef typename ExpressionType::Index Index; typedef typename ExpressionType::Index Index;
typedef typename ei_meta_if<ei_must_nest_by_value<ExpressionType>::ret, typedef typename internal::conditional<internal::must_nest_by_value<ExpressionType>::ret,
ExpressionType, const ExpressionType&>::ret ExpressionTypeNested; ExpressionType, ExpressionType&>::type ExpressionTypeNested;
typedef typename internal::remove_all<ExpressionTypeNested>::type ExpressionTypeNestedCleaned;
template<template<typename _Scalar> class Functor, template<template<typename _Scalar> class Functor,
typename Scalar=typename ei_traits<ExpressionType>::Scalar> struct ReturnType typename Scalar=typename internal::traits<ExpressionType>::Scalar> struct ReturnType
{ {
typedef PartialReduxExpr<ExpressionType, typedef PartialReduxExpr<ExpressionType,
Functor<Scalar>, Functor<Scalar>,
@@ -191,7 +198,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
template<typename BinaryOp> struct ReduxReturnType template<typename BinaryOp> struct ReduxReturnType
{ {
typedef PartialReduxExpr<ExpressionType, typedef PartialReduxExpr<ExpressionType,
ei_member_redux<BinaryOp,typename ei_traits<ExpressionType>::Scalar>, internal::member_redux<BinaryOp,typename internal::traits<ExpressionType>::Scalar>,
Direction Direction
> Type; > Type;
}; };
@@ -205,9 +212,9 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
/** \internal /** \internal
* \returns the i-th subvector according to the \c Direction */ * \returns the i-th subvector according to the \c Direction */
typedef typename ei_meta_if<Direction==Vertical, typedef typename internal::conditional<Direction==Vertical,
typename ExpressionType::ColXpr, typename ExpressionType::ColXpr,
typename ExpressionType::RowXpr>::ret SubVector; typename ExpressionType::RowXpr>::type SubVector;
SubVector subVector(Index i) SubVector subVector(Index i)
{ {
return SubVector(m_matrix.derived(),i); return SubVector(m_matrix.derived(),i);
@@ -239,7 +246,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
public: public:
inline VectorwiseOp(const ExpressionType& matrix) : m_matrix(matrix) {} inline VectorwiseOp(ExpressionType& matrix) : m_matrix(matrix) {}
/** \internal */ /** \internal */
inline const ExpressionType& _expression() const { return m_matrix; } inline const ExpressionType& _expression() const { return m_matrix; }
@@ -263,7 +270,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* Output: \verbinclude PartialRedux_minCoeff.out * Output: \verbinclude PartialRedux_minCoeff.out
* *
* \sa DenseBase::minCoeff() */ * \sa DenseBase::minCoeff() */
const typename ReturnType<ei_member_minCoeff>::Type minCoeff() const const typename ReturnType<internal::member_minCoeff>::Type minCoeff() const
{ return _expression(); } { return _expression(); }
/** \returns a row (or column) vector expression of the largest coefficient /** \returns a row (or column) vector expression of the largest coefficient
@@ -273,7 +280,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* Output: \verbinclude PartialRedux_maxCoeff.out * Output: \verbinclude PartialRedux_maxCoeff.out
* *
* \sa DenseBase::maxCoeff() */ * \sa DenseBase::maxCoeff() */
const typename ReturnType<ei_member_maxCoeff>::Type maxCoeff() const const typename ReturnType<internal::member_maxCoeff>::Type maxCoeff() const
{ return _expression(); } { return _expression(); }
/** \returns a row (or column) vector expression of the squared norm /** \returns a row (or column) vector expression of the squared norm
@@ -283,7 +290,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* Output: \verbinclude PartialRedux_squaredNorm.out * Output: \verbinclude PartialRedux_squaredNorm.out
* *
* \sa DenseBase::squaredNorm() */ * \sa DenseBase::squaredNorm() */
const typename ReturnType<ei_member_squaredNorm,RealScalar>::Type squaredNorm() const const typename ReturnType<internal::member_squaredNorm,RealScalar>::Type squaredNorm() const
{ return _expression(); } { return _expression(); }
/** \returns a row (or column) vector expression of the norm /** \returns a row (or column) vector expression of the norm
@@ -293,7 +300,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* Output: \verbinclude PartialRedux_norm.out * Output: \verbinclude PartialRedux_norm.out
* *
* \sa DenseBase::norm() */ * \sa DenseBase::norm() */
const typename ReturnType<ei_member_norm,RealScalar>::Type norm() const const typename ReturnType<internal::member_norm,RealScalar>::Type norm() const
{ return _expression(); } { return _expression(); }
@@ -302,7 +309,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* blue's algorithm. * blue's algorithm.
* *
* \sa DenseBase::blueNorm() */ * \sa DenseBase::blueNorm() */
const typename ReturnType<ei_member_blueNorm,RealScalar>::Type blueNorm() const const typename ReturnType<internal::member_blueNorm,RealScalar>::Type blueNorm() const
{ return _expression(); } { return _expression(); }
@@ -311,7 +318,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* underflow and overflow. * underflow and overflow.
* *
* \sa DenseBase::stableNorm() */ * \sa DenseBase::stableNorm() */
const typename ReturnType<ei_member_stableNorm,RealScalar>::Type stableNorm() const const typename ReturnType<internal::member_stableNorm,RealScalar>::Type stableNorm() const
{ return _expression(); } { return _expression(); }
@@ -320,7 +327,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* underflow and overflow using a concatenation of hypot() calls. * underflow and overflow using a concatenation of hypot() calls.
* *
* \sa DenseBase::hypotNorm() */ * \sa DenseBase::hypotNorm() */
const typename ReturnType<ei_member_hypotNorm,RealScalar>::Type hypotNorm() const const typename ReturnType<internal::member_hypotNorm,RealScalar>::Type hypotNorm() const
{ return _expression(); } { return _expression(); }
/** \returns a row (or column) vector expression of the sum /** \returns a row (or column) vector expression of the sum
@@ -330,28 +337,28 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* Output: \verbinclude PartialRedux_sum.out * Output: \verbinclude PartialRedux_sum.out
* *
* \sa DenseBase::sum() */ * \sa DenseBase::sum() */
const typename ReturnType<ei_member_sum>::Type sum() const const typename ReturnType<internal::member_sum>::Type sum() const
{ return _expression(); } { return _expression(); }
/** \returns a row (or column) vector expression of the mean /** \returns a row (or column) vector expression of the mean
* of each column (or row) of the referenced expression. * of each column (or row) of the referenced expression.
* *
* \sa DenseBase::mean() */ * \sa DenseBase::mean() */
const typename ReturnType<ei_member_mean>::Type mean() const const typename ReturnType<internal::member_mean>::Type mean() const
{ return _expression(); } { return _expression(); }
/** \returns a row (or column) vector expression representing /** \returns a row (or column) vector expression representing
* whether \b all coefficients of each respective column (or row) are \c true. * whether \b all coefficients of each respective column (or row) are \c true.
* *
* \sa DenseBase::all() */ * \sa DenseBase::all() */
const typename ReturnType<ei_member_all>::Type all() const const typename ReturnType<internal::member_all>::Type all() const
{ return _expression(); } { return _expression(); }
/** \returns a row (or column) vector expression representing /** \returns a row (or column) vector expression representing
* whether \b at \b least one coefficient of each respective column (or row) is \c true. * whether \b at \b least one coefficient of each respective column (or row) is \c true.
* *
* \sa DenseBase::any() */ * \sa DenseBase::any() */
const typename ReturnType<ei_member_any>::Type any() const const typename ReturnType<internal::member_any>::Type any() const
{ return _expression(); } { return _expression(); }
/** \returns a row (or column) vector expression representing /** \returns a row (or column) vector expression representing
@@ -361,7 +368,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* Output: \verbinclude PartialRedux_count.out * Output: \verbinclude PartialRedux_count.out
* *
* \sa DenseBase::count() */ * \sa DenseBase::count() */
const PartialReduxExpr<ExpressionType, ei_member_count<Index>, Direction> count() const const PartialReduxExpr<ExpressionType, internal::member_count<Index>, Direction> count() const
{ return _expression(); } { return _expression(); }
/** \returns a row (or column) vector expression of the product /** \returns a row (or column) vector expression of the product
@@ -371,7 +378,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* Output: \verbinclude PartialRedux_prod.out * Output: \verbinclude PartialRedux_prod.out
* *
* \sa DenseBase::prod() */ * \sa DenseBase::prod() */
const typename ReturnType<ei_member_prod>::Type prod() const const typename ReturnType<internal::member_prod>::Type prod() const
{ return _expression(); } { return _expression(); }
@@ -411,7 +418,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
ExpressionType& operator=(const DenseBase<OtherDerived>& other) ExpressionType& operator=(const DenseBase<OtherDerived>& other)
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
//ei_assert((m_matrix.isNull()) == (other.isNull())); FIXME //eigen_assert((m_matrix.isNull()) == (other.isNull())); FIXME
for(Index j=0; j<subVectors(); ++j) for(Index j=0; j<subVectors(); ++j)
subVector(j) = other; subVector(j) = other;
return const_cast<ExpressionType&>(m_matrix); return const_cast<ExpressionType&>(m_matrix);
@@ -438,10 +445,10 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
} }
/** Returns the expression of the sum of the vector \a other to each subvector of \c *this */ /** Returns the expression of the sum of the vector \a other to each subvector of \c *this */
template<typename OtherDerived> template<typename OtherDerived> EIGEN_STRONG_INLINE
CwiseBinaryOp<ei_scalar_sum_op<Scalar>, CwiseBinaryOp<internal::scalar_sum_op<Scalar>,
ExpressionType, const ExpressionTypeNestedCleaned,
typename ExtendedType<OtherDerived>::Type> const typename ExtendedType<OtherDerived>::Type>
operator+(const DenseBase<OtherDerived>& other) const operator+(const DenseBase<OtherDerived>& other) const
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived); EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived);
@@ -450,9 +457,9 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
/** Returns the expression of the difference between each subvector of \c *this and the vector \a other */ /** Returns the expression of the difference between each subvector of \c *this and the vector \a other */
template<typename OtherDerived> template<typename OtherDerived>
CwiseBinaryOp<ei_scalar_difference_op<Scalar>, CwiseBinaryOp<internal::scalar_difference_op<Scalar>,
ExpressionType, const ExpressionTypeNestedCleaned,
typename ExtendedType<OtherDerived>::Type> const typename ExtendedType<OtherDerived>::Type>
operator-(const DenseBase<OtherDerived>& other) const operator-(const DenseBase<OtherDerived>& other) const
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived); EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived);
@@ -461,30 +468,32 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
/////////// Geometry module /////////// /////////// Geometry module ///////////
const Homogeneous<ExpressionType,Direction> homogeneous() const; #if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS
Homogeneous<ExpressionType,Direction> homogeneous() const;
#endif
typedef typename ExpressionType::PlainObject CrossReturnType; typedef typename ExpressionType::PlainObject CrossReturnType;
template<typename OtherDerived> template<typename OtherDerived>
const CrossReturnType cross(const MatrixBase<OtherDerived>& other) const; const CrossReturnType cross(const MatrixBase<OtherDerived>& other) const;
enum { enum {
HNormalized_Size = Direction==Vertical ? ei_traits<ExpressionType>::RowsAtCompileTime HNormalized_Size = Direction==Vertical ? internal::traits<ExpressionType>::RowsAtCompileTime
: ei_traits<ExpressionType>::ColsAtCompileTime, : internal::traits<ExpressionType>::ColsAtCompileTime,
HNormalized_SizeMinusOne = HNormalized_Size==Dynamic ? Dynamic : HNormalized_Size-1 HNormalized_SizeMinusOne = HNormalized_Size==Dynamic ? Dynamic : HNormalized_Size-1
}; };
typedef Block<ExpressionType, typedef Block<const ExpressionType,
Direction==Vertical ? int(HNormalized_SizeMinusOne) Direction==Vertical ? int(HNormalized_SizeMinusOne)
: int(ei_traits<ExpressionType>::RowsAtCompileTime), : int(internal::traits<ExpressionType>::RowsAtCompileTime),
Direction==Horizontal ? int(HNormalized_SizeMinusOne) Direction==Horizontal ? int(HNormalized_SizeMinusOne)
: int(ei_traits<ExpressionType>::ColsAtCompileTime)> : int(internal::traits<ExpressionType>::ColsAtCompileTime)>
HNormalized_Block; HNormalized_Block;
typedef Block<ExpressionType, typedef Block<const ExpressionType,
Direction==Vertical ? 1 : int(ei_traits<ExpressionType>::RowsAtCompileTime), Direction==Vertical ? 1 : int(internal::traits<ExpressionType>::RowsAtCompileTime),
Direction==Horizontal ? 1 : int(ei_traits<ExpressionType>::ColsAtCompileTime)> Direction==Horizontal ? 1 : int(internal::traits<ExpressionType>::ColsAtCompileTime)>
HNormalized_Factors; HNormalized_Factors;
typedef CwiseBinaryOp<ei_scalar_quotient_op<typename ei_traits<ExpressionType>::Scalar>, typedef CwiseBinaryOp<internal::scalar_quotient_op<typename internal::traits<ExpressionType>::Scalar>,
HNormalized_Block, const HNormalized_Block,
Replicate<HNormalized_Factors, const Replicate<HNormalized_Factors,
Direction==Vertical ? HNormalized_SizeMinusOne : 1, Direction==Vertical ? HNormalized_SizeMinusOne : 1,
Direction==Horizontal ? HNormalized_SizeMinusOne : 1> > Direction==Horizontal ? HNormalized_SizeMinusOne : 1> >
HNormalizedReturnType; HNormalizedReturnType;
@@ -503,7 +512,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* \sa rowwise(), class VectorwiseOp * \sa rowwise(), class VectorwiseOp
*/ */
template<typename Derived> template<typename Derived>
inline const VectorwiseOp<Derived,Vertical> inline const typename DenseBase<Derived>::ConstColwiseReturnType
DenseBase<Derived>::colwise() const DenseBase<Derived>::colwise() const
{ {
return derived(); return derived();
@@ -514,7 +523,7 @@ DenseBase<Derived>::colwise() const
* \sa rowwise(), class VectorwiseOp * \sa rowwise(), class VectorwiseOp
*/ */
template<typename Derived> template<typename Derived>
inline VectorwiseOp<Derived,Vertical> inline typename DenseBase<Derived>::ColwiseReturnType
DenseBase<Derived>::colwise() DenseBase<Derived>::colwise()
{ {
return derived(); return derived();
@@ -528,7 +537,7 @@ DenseBase<Derived>::colwise()
* \sa colwise(), class VectorwiseOp * \sa colwise(), class VectorwiseOp
*/ */
template<typename Derived> template<typename Derived>
inline const VectorwiseOp<Derived,Horizontal> inline const typename DenseBase<Derived>::ConstRowwiseReturnType
DenseBase<Derived>::rowwise() const DenseBase<Derived>::rowwise() const
{ {
return derived(); return derived();
@@ -539,7 +548,7 @@ DenseBase<Derived>::rowwise() const
* \sa colwise(), class VectorwiseOp * \sa colwise(), class VectorwiseOp
*/ */
template<typename Derived> template<typename Derived>
inline VectorwiseOp<Derived,Horizontal> inline typename DenseBase<Derived>::RowwiseReturnType
DenseBase<Derived>::rowwise() DenseBase<Derived>::rowwise()
{ {
return derived(); return derived();

View File

@@ -25,8 +25,10 @@
#ifndef EIGEN_VISITOR_H #ifndef EIGEN_VISITOR_H
#define EIGEN_VISITOR_H #define EIGEN_VISITOR_H
namespace internal {
template<typename Visitor, typename Derived, int UnrollCount> template<typename Visitor, typename Derived, int UnrollCount>
struct ei_visitor_impl struct visitor_impl
{ {
enum { enum {
col = (UnrollCount-1) / Derived::RowsAtCompileTime, col = (UnrollCount-1) / Derived::RowsAtCompileTime,
@@ -35,13 +37,13 @@ struct ei_visitor_impl
inline static void run(const Derived &mat, Visitor& visitor) inline static void run(const Derived &mat, Visitor& visitor)
{ {
ei_visitor_impl<Visitor, Derived, UnrollCount-1>::run(mat, visitor); visitor_impl<Visitor, Derived, UnrollCount-1>::run(mat, visitor);
visitor(mat.coeff(row, col), row, col); visitor(mat.coeff(row, col), row, col);
} }
}; };
template<typename Visitor, typename Derived> template<typename Visitor, typename Derived>
struct ei_visitor_impl<Visitor, Derived, 1> struct visitor_impl<Visitor, Derived, 1>
{ {
inline static void run(const Derived &mat, Visitor& visitor) inline static void run(const Derived &mat, Visitor& visitor)
{ {
@@ -50,7 +52,7 @@ struct ei_visitor_impl<Visitor, Derived, 1>
}; };
template<typename Visitor, typename Derived> template<typename Visitor, typename Derived>
struct ei_visitor_impl<Visitor, Derived, Dynamic> struct visitor_impl<Visitor, Derived, Dynamic>
{ {
typedef typename Derived::Index Index; typedef typename Derived::Index Index;
inline static void run(const Derived& mat, Visitor& visitor) inline static void run(const Derived& mat, Visitor& visitor)
@@ -64,6 +66,7 @@ struct ei_visitor_impl<Visitor, Derived, Dynamic>
} }
}; };
} // end namespace internal
/** Applies the visitor \a visitor to the whole coefficients of the matrix or vector. /** Applies the visitor \a visitor to the whole coefficients of the matrix or vector.
* *
@@ -88,19 +91,21 @@ void DenseBase<Derived>::visit(Visitor& visitor) const
{ {
enum { unroll = SizeAtCompileTime != Dynamic enum { unroll = SizeAtCompileTime != Dynamic
&& CoeffReadCost != Dynamic && CoeffReadCost != Dynamic
&& (SizeAtCompileTime == 1 || ei_functor_traits<Visitor>::Cost != Dynamic) && (SizeAtCompileTime == 1 || internal::functor_traits<Visitor>::Cost != Dynamic)
&& SizeAtCompileTime * CoeffReadCost + (SizeAtCompileTime-1) * ei_functor_traits<Visitor>::Cost && SizeAtCompileTime * CoeffReadCost + (SizeAtCompileTime-1) * internal::functor_traits<Visitor>::Cost
<= EIGEN_UNROLLING_LIMIT }; <= EIGEN_UNROLLING_LIMIT };
return ei_visitor_impl<Visitor, Derived, return internal::visitor_impl<Visitor, Derived,
unroll ? int(SizeAtCompileTime) : Dynamic unroll ? int(SizeAtCompileTime) : Dynamic
>::run(derived(), visitor); >::run(derived(), visitor);
} }
namespace internal {
/** \internal /** \internal
* \brief Base class to implement min and max visitors * \brief Base class to implement min and max visitors
*/ */
template <typename Derived> template <typename Derived>
struct ei_coeff_visitor struct coeff_visitor
{ {
typedef typename Derived::Index Index; typedef typename Derived::Index Index;
typedef typename Derived::Scalar Scalar; typedef typename Derived::Scalar Scalar;
@@ -120,7 +125,7 @@ struct ei_coeff_visitor
* \sa DenseBase::minCoeff(Index*, Index*) * \sa DenseBase::minCoeff(Index*, Index*)
*/ */
template <typename Derived> template <typename Derived>
struct ei_min_coeff_visitor : ei_coeff_visitor<Derived> struct min_coeff_visitor : coeff_visitor<Derived>
{ {
typedef typename Derived::Index Index; typedef typename Derived::Index Index;
typedef typename Derived::Scalar Scalar; typedef typename Derived::Scalar Scalar;
@@ -136,7 +141,7 @@ struct ei_min_coeff_visitor : ei_coeff_visitor<Derived>
}; };
template<typename Scalar> template<typename Scalar>
struct ei_functor_traits<ei_min_coeff_visitor<Scalar> > { struct functor_traits<min_coeff_visitor<Scalar> > {
enum { enum {
Cost = NumTraits<Scalar>::AddCost Cost = NumTraits<Scalar>::AddCost
}; };
@@ -148,7 +153,7 @@ struct ei_functor_traits<ei_min_coeff_visitor<Scalar> > {
* \sa DenseBase::maxCoeff(Index*, Index*) * \sa DenseBase::maxCoeff(Index*, Index*)
*/ */
template <typename Derived> template <typename Derived>
struct ei_max_coeff_visitor : ei_coeff_visitor<Derived> struct max_coeff_visitor : coeff_visitor<Derived>
{ {
typedef typename Derived::Index Index; typedef typename Derived::Index Index;
typedef typename Derived::Scalar Scalar; typedef typename Derived::Scalar Scalar;
@@ -164,22 +169,25 @@ struct ei_max_coeff_visitor : ei_coeff_visitor<Derived>
}; };
template<typename Scalar> template<typename Scalar>
struct ei_functor_traits<ei_max_coeff_visitor<Scalar> > { struct functor_traits<max_coeff_visitor<Scalar> > {
enum { enum {
Cost = NumTraits<Scalar>::AddCost Cost = NumTraits<Scalar>::AddCost
}; };
}; };
} // end namespace internal
/** \returns the minimum of all coefficients of *this /** \returns the minimum of all coefficients of *this
* and puts in *row and *col its location. * and puts in *row and *col its location.
* *
* \sa DenseBase::minCoeff(Index*), DenseBase::maxCoeff(Index*,Index*), DenseBase::visitor(), DenseBase::minCoeff() * \sa DenseBase::minCoeff(Index*), DenseBase::maxCoeff(Index*,Index*), DenseBase::visitor(), DenseBase::minCoeff()
*/ */
template<typename Derived> template<typename Derived>
typename ei_traits<Derived>::Scalar template<typename IndexType>
DenseBase<Derived>::minCoeff(Index* row, Index* col) const typename internal::traits<Derived>::Scalar
DenseBase<Derived>::minCoeff(IndexType* row, IndexType* col) const
{ {
ei_min_coeff_visitor<Derived> minVisitor; internal::min_coeff_visitor<Derived> minVisitor;
this->visit(minVisitor); this->visit(minVisitor);
*row = minVisitor.row; *row = minVisitor.row;
if (col) *col = minVisitor.col; if (col) *col = minVisitor.col;
@@ -189,14 +197,15 @@ DenseBase<Derived>::minCoeff(Index* row, Index* col) const
/** \returns the minimum of all coefficients of *this /** \returns the minimum of all coefficients of *this
* and puts in *index its location. * and puts in *index its location.
* *
* \sa DenseBase::minCoeff(Index*,Index*), DenseBase::maxCoeff(Index*,Index*), DenseBase::visitor(), DenseBase::minCoeff() * \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::visitor(), DenseBase::minCoeff()
*/ */
template<typename Derived> template<typename Derived>
typename ei_traits<Derived>::Scalar template<typename IndexType>
DenseBase<Derived>::minCoeff(Index* index) const typename internal::traits<Derived>::Scalar
DenseBase<Derived>::minCoeff(IndexType* index) const
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
ei_min_coeff_visitor<Derived> minVisitor; internal::min_coeff_visitor<Derived> minVisitor;
this->visit(minVisitor); this->visit(minVisitor);
*index = (RowsAtCompileTime==1) ? minVisitor.col : minVisitor.row; *index = (RowsAtCompileTime==1) ? minVisitor.col : minVisitor.row;
return minVisitor.res; return minVisitor.res;
@@ -205,13 +214,14 @@ DenseBase<Derived>::minCoeff(Index* index) const
/** \returns the maximum of all coefficients of *this /** \returns the maximum of all coefficients of *this
* and puts in *row and *col its location. * and puts in *row and *col its location.
* *
* \sa DenseBase::minCoeff(Index*,Index*), DenseBase::visitor(), DenseBase::maxCoeff() * \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visitor(), DenseBase::maxCoeff()
*/ */
template<typename Derived> template<typename Derived>
typename ei_traits<Derived>::Scalar template<typename IndexType>
DenseBase<Derived>::maxCoeff(Index* row, Index* col) const typename internal::traits<Derived>::Scalar
DenseBase<Derived>::maxCoeff(IndexType* row, IndexType* col) const
{ {
ei_max_coeff_visitor<Derived> maxVisitor; internal::max_coeff_visitor<Derived> maxVisitor;
this->visit(maxVisitor); this->visit(maxVisitor);
*row = maxVisitor.row; *row = maxVisitor.row;
if (col) *col = maxVisitor.col; if (col) *col = maxVisitor.col;
@@ -221,14 +231,15 @@ DenseBase<Derived>::maxCoeff(Index* row, Index* col) const
/** \returns the maximum of all coefficients of *this /** \returns the maximum of all coefficients of *this
* and puts in *index its location. * and puts in *index its location.
* *
* \sa DenseBase::maxCoeff(Index*,Index*), DenseBase::minCoeff(Index*,Index*), DenseBase::visitor(), DenseBase::maxCoeff() * \sa DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visitor(), DenseBase::maxCoeff()
*/ */
template<typename Derived> template<typename Derived>
typename ei_traits<Derived>::Scalar template<typename IndexType>
DenseBase<Derived>::maxCoeff(Index* index) const typename internal::traits<Derived>::Scalar
DenseBase<Derived>::maxCoeff(IndexType* index) const
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
ei_max_coeff_visitor<Derived> maxVisitor; internal::max_coeff_visitor<Derived> maxVisitor;
this->visit(maxVisitor); this->visit(maxVisitor);
*index = (RowsAtCompileTime==1) ? maxVisitor.col : maxVisitor.row; *index = (RowsAtCompileTime==1) ? maxVisitor.col : maxVisitor.row;
return maxVisitor.res; return maxVisitor.res;

View File

@@ -0,0 +1,228 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#ifndef EIGEN_COMPLEX_ALTIVEC_H
#define EIGEN_COMPLEX_ALTIVEC_H
namespace internal {
static Packet4ui p4ui_CONJ_XOR = vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_ZERO_);//{ 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
static Packet16uc p16uc_COMPLEX_RE = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };
static Packet16uc p16uc_COMPLEX_IM = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 1), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 };
static Packet16uc p16uc_COMPLEX_REV = vec_sld(p16uc_REVERSE, p16uc_REVERSE, 8);//{ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11 };
static Packet16uc p16uc_COMPLEX_REV2 = vec_sld(p16uc_FORWARD, p16uc_FORWARD, 8);//{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
static Packet16uc p16uc_PSET_HI = (Packet16uc) vec_mergeh((Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 1));//{ 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 };
static Packet16uc p16uc_PSET_LO = (Packet16uc) vec_mergeh((Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 2), (Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 3));//{ 8,9,10,11, 12,13,14,15, 8,9,10,11, 12,13,14,15 };
//---------- float ----------
struct Packet2cf
{
EIGEN_STRONG_INLINE Packet2cf() {}
EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
Packet4f v;
};
template<> struct packet_traits<std::complex<float> > : default_packet_traits
{
typedef Packet2cf type;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size = 2,
HasAdd = 1,
HasSub = 1,
HasMul = 1,
HasDiv = 1,
HasNegate = 1,
HasAbs = 0,
HasAbs2 = 0,
HasMin = 0,
HasMax = 0,
HasSetLinear = 0
};
};
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; };
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
{
Packet2cf res;
/* On AltiVec we cannot load 64-bit registers, so wa have to take care of alignment */
if((ptrdiff_t(&from) % 16) == 0)
res.v = pload<Packet4f>((const float *)&from);
else
res.v = ploadu<Packet4f>((const float *)&from);
res.v = vec_perm(res.v, res.v, p16uc_PSET_HI);
return res;
}
template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_add(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_sub(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(a.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) { return Packet2cf((Packet4f)vec_xor((Packet4ui)a.v, p4ui_CONJ_XOR)); }
template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
Packet4f v1, v2;
// Permute and multiply the real parts of a and b
v1 = vec_perm(a.v, a.v, p16uc_COMPLEX_RE);
// Get the imaginary parts of a
v2 = vec_perm(a.v, a.v, p16uc_COMPLEX_IM);
// multiply a_re * b
v1 = vec_madd(v1, b.v, p4f_ZERO);
// multiply a_im * b and get the conjugate result
v2 = vec_madd(v2, b.v, p4f_ZERO);
v2 = (Packet4f) vec_xor((Packet4ui)v2, p4ui_CONJ_XOR);
// permute back to a proper order
v2 = vec_perm(v2, v2, p16uc_COMPLEX_REV);
return Packet2cf(vec_add(v1, v2));
}
template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_and(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_or(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_xor(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_and(a.v, vec_nor(b.v,b.v))); }
template<> EIGEN_STRONG_INLINE Packet2cf pload <Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>((const float*)from)); }
template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>((const float*)from)); }
template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from)
{
return pset1<Packet2cf>(*from);
}
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { vec_dstt((float *)addr, DST_CTRL(2,2,32), DST_CHAN); }
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
{
std::complex<float> EIGEN_ALIGN16 res[2];
pstore((float *)&res, a.v);
return res[0];
}
template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
{
Packet4f rev_a;
rev_a = vec_perm(a.v, a.v, p16uc_COMPLEX_REV2);
return Packet2cf(rev_a);
}
template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
{
Packet4f b;
b = (Packet4f) vec_sld(a.v, a.v, 8);
b = padd(a.v, b);
return pfirst(Packet2cf(b));
}
template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
{
Packet4f b1, b2;
b1 = (Packet4f) vec_sld(vecs[0].v, vecs[1].v, 8);
b2 = (Packet4f) vec_sld(vecs[1].v, vecs[0].v, 8);
b2 = (Packet4f) vec_sld(b2, b2, 8);
b2 = padd(b1, b2);
return Packet2cf(b2);
}
template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
{
Packet4f b;
Packet2cf prod;
b = (Packet4f) vec_sld(a.v, a.v, 8);
prod = pmul(a, Packet2cf(b));
return pfirst(prod);
}
template<int Offset>
struct palign_impl<Offset,Packet2cf>
{
EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second)
{
if (Offset==1)
{
first.v = vec_sld(first.v, second.v, 8);
}
}
};
template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
return internal::pmul(a, pconj(b));
}
};
template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
return internal::pmul(pconj(a), b);
}
};
template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
return pconj(internal::pmul(a, b));
}
};
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
// TODO optimize it for AltiVec
Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
Packet4f s = vec_madd(b.v, b.v, p4f_ZERO);
return Packet2cf(pdiv(res.v, vec_add(s,vec_perm(s, s, p16uc_COMPLEX_REV))));
}
template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& x)
{
return Packet2cf(vec_perm(x.v, x.v, p16uc_COMPLEX_REV));
}
} // end namespace internal
#endif // EIGEN_COMPLEX_ALTIVEC_H

View File

@@ -25,6 +25,8 @@
#ifndef EIGEN_PACKET_MATH_ALTIVEC_H #ifndef EIGEN_PACKET_MATH_ALTIVEC_H
#define EIGEN_PACKET_MATH_ALTIVEC_H #define EIGEN_PACKET_MATH_ALTIVEC_H
namespace internal {
#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD #ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 4 #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 4
#endif #endif
@@ -33,10 +35,6 @@
#define EIGEN_HAS_FUSE_CJMADD 1 #define EIGEN_HAS_FUSE_CJMADD 1
#endif #endif
#ifndef EIGEN_TUNE_FOR_CPU_CACHE_SIZE
#define EIGEN_TUNE_FOR_CPU_CACHE_SIZE 8*256*256
#endif
// NOTE Altivec has 32 registers, but Eigen only accepts a value of 8 or 16 // NOTE Altivec has 32 registers, but Eigen only accepts a value of 8 or 16
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS #ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 16 #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 16
@@ -53,40 +51,47 @@ typedef __vector unsigned char Packet16uc;
// and it doesn't really work to declare them global, so we define macros instead // and it doesn't really work to declare them global, so we define macros instead
#define _EIGEN_DECLARE_CONST_FAST_Packet4f(NAME,X) \ #define _EIGEN_DECLARE_CONST_FAST_Packet4f(NAME,X) \
Packet4f ei_p4f_##NAME = (Packet4f) vec_splat_s32(X) Packet4f p4f_##NAME = (Packet4f) vec_splat_s32(X)
#define _EIGEN_DECLARE_CONST_FAST_Packet4i(NAME,X) \ #define _EIGEN_DECLARE_CONST_FAST_Packet4i(NAME,X) \
Packet4i ei_p4i_##NAME = vec_splat_s32(X) Packet4i p4i_##NAME = vec_splat_s32(X)
#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \ #define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
Packet4f ei_p4f_##NAME = ei_pset1<float>(X) Packet4f p4f_##NAME = pset1<Packet4f>(X)
#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \ #define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
Packet4f ei_p4f_##NAME = vreinterpretq_f32_u32(ei_pset1<int>(X)) Packet4f p4f_##NAME = vreinterpretq_f32_u32(pset1<int>(X))
#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \ #define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
Packet4i ei_p4i_##NAME = ei_pset1<int>(X) Packet4i p4i_##NAME = pset1<Packet4i>(X)
#define DST_CHAN 1 #define DST_CHAN 1
#define DST_CTRL(size, count, stride) (((size) << 24) | ((count) << 16) | (stride)) #define DST_CTRL(size, count, stride) (((size) << 24) | ((count) << 16) | (stride))
// Define global static constants: // Define global static constants:
static Packet4f ei_p4f_COUNTDOWN = { 3.0, 2.0, 1.0, 0.0 }; static Packet4f p4f_COUNTDOWN = { 3.0, 2.0, 1.0, 0.0 };
static Packet4i ei_p4i_COUNTDOWN = { 3, 2, 1, 0 }; static Packet4i p4i_COUNTDOWN = { 3, 2, 1, 0 };
static Packet16uc ei_p16uc_REVERSE = {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3}; static Packet16uc p16uc_REVERSE = {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3};
static Packet16uc p16uc_FORWARD = vec_lvsl(0, (float*)0);
static Packet16uc p16uc_DUPLICATE = {0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7};
static _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0); static _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0);
static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0); static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0);
static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE,1); static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE,1);
static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS16,-16); static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS16,-16);
static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS1,-1); static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS1,-1);
static Packet4f ei_p4f_ONE = vec_ctf(ei_p4i_ONE, 0); static Packet4f p4f_ONE = vec_ctf(p4i_ONE, 0);
static Packet4f ei_p4f_ZERO_ = (Packet4f) vec_sl((Packet4ui)ei_p4i_MINUS1, (Packet4ui)ei_p4i_MINUS1); static Packet4f p4f_ZERO_ = (Packet4f) vec_sl((Packet4ui)p4i_MINUS1, (Packet4ui)p4i_MINUS1);
template<> struct ei_packet_traits<float> : ei_default_packet_traits template<> struct packet_traits<float> : default_packet_traits
{ {
typedef Packet4f type; enum {size=4}; typedef Packet4f type;
enum { enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size=4,
// FIXME check the Has*
HasSin = 0, HasSin = 0,
HasCos = 0, HasCos = 0,
HasLog = 0, HasLog = 0,
@@ -94,11 +99,19 @@ template<> struct ei_packet_traits<float> : ei_default_packet_traits
HasSqrt = 0 HasSqrt = 0
}; };
}; };
template<> struct ei_packet_traits<int> : ei_default_packet_traits template<> struct packet_traits<int> : default_packet_traits
{ typedef Packet4i type; enum {size=4}; }; {
typedef Packet4i type;
enum {
// FIXME check the Has*
Vectorizable = 1,
AlignedOnScalar = 1,
size=4
};
};
template<> struct ei_unpacket_traits<Packet4f> { typedef float type; enum {size=4}; }; template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}; };
template<> struct ei_unpacket_traits<Packet4i> { typedef int type; enum {size=4}; }; template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; };
/* /*
inline std::ostream & operator <<(std::ostream & s, const Packet4f & v) inline std::ostream & operator <<(std::ostream & s, const Packet4f & v)
{ {
@@ -144,7 +157,7 @@ inline std::ostream & operator <<(std::ostream & s, const Packetbi & v)
return s; return s;
} }
*/ */
template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<float>(const float& from) { template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) {
// Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
float EIGEN_ALIGN16 af[4]; float EIGEN_ALIGN16 af[4];
af[0] = from; af[0] = from;
@@ -153,7 +166,7 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<float>(const float& from) {
return vc; return vc;
} }
template<> EIGEN_STRONG_INLINE Packet4i ei_pset1<int>(const int& from) { template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) {
int EIGEN_ALIGN16 ai[4]; int EIGEN_ALIGN16 ai[4];
ai[0] = from; ai[0] = from;
Packet4i vc = vec_ld(0, ai); Packet4i vc = vec_ld(0, ai);
@@ -161,22 +174,22 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_pset1<int>(const int& from) {
return vc; return vc;
} }
template<> EIGEN_STRONG_INLINE Packet4f ei_plset<float>(const float& a) { return vec_add(ei_pset1(a), ei_p4f_COUNTDOWN); } template<> EIGEN_STRONG_INLINE Packet4f plset<float>(const float& a) { return vec_add(pset1<Packet4f>(a), p4f_COUNTDOWN); }
template<> EIGEN_STRONG_INLINE Packet4i ei_plset<int>(const int& a) { return vec_add(ei_pset1(a), ei_p4i_COUNTDOWN); } template<> EIGEN_STRONG_INLINE Packet4i plset<int>(const int& a) { return vec_add(pset1<Packet4i>(a), p4i_COUNTDOWN); }
template<> EIGEN_STRONG_INLINE Packet4f ei_padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_add(a,b); } template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_add(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_add(a,b); } template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_add(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f ei_psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_sub(a,b); } template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_sub(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_sub(a,b); } template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_sub(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pnegate(const Packet4f& a) { return ei_psub<Packet4f>(ei_p4f_ZERO, a); } template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) { return psub<Packet4f>(p4f_ZERO, a); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pnegate(const Packet4i& a) { return ei_psub<Packet4i>(ei_p4i_ZERO, a); } template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return psub<Packet4i>(p4i_ZERO, a); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_madd(a,b,ei_p4f_ZERO); } template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_madd(a,b,p4f_ZERO); }
/* Commented out: it's actually slower than processing it scalar /* Commented out: it's actually slower than processing it scalar
* *
template<> EIGEN_STRONG_INLINE Packet4i ei_pmul<Packet4i>(const Packet4i& a, const Packet4i& b) template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b)
{ {
// Detailed in: http://freevec.org/content/32bit_signed_integer_multiplication_altivec // Detailed in: http://freevec.org/content/32bit_signed_integer_multiplication_altivec
//Set up constants, variables //Set up constants, variables
@@ -187,21 +200,21 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_pmul<Packet4i>(const Packet4i& a, con
b1 = vec_abs(b); b1 = vec_abs(b);
// Get the signs using xor // Get the signs using xor
Packet4bi sgn = (Packet4bi) vec_cmplt(vec_xor(a, b), ei_p4i_ZERO); Packet4bi sgn = (Packet4bi) vec_cmplt(vec_xor(a, b), p4i_ZERO);
// Do the multiplication for the asbolute values. // Do the multiplication for the asbolute values.
bswap = (Packet4i) vec_rl((Packet4ui) b1, (Packet4ui) ei_p4i_MINUS16 ); bswap = (Packet4i) vec_rl((Packet4ui) b1, (Packet4ui) p4i_MINUS16 );
low_prod = vec_mulo((Packet8i) a1, (Packet8i)b1); low_prod = vec_mulo((Packet8i) a1, (Packet8i)b1);
high_prod = vec_msum((Packet8i) a1, (Packet8i) bswap, ei_p4i_ZERO); high_prod = vec_msum((Packet8i) a1, (Packet8i) bswap, p4i_ZERO);
high_prod = (Packet4i) vec_sl((Packet4ui) high_prod, (Packet4ui) ei_p4i_MINUS16); high_prod = (Packet4i) vec_sl((Packet4ui) high_prod, (Packet4ui) p4i_MINUS16);
prod = vec_add( low_prod, high_prod ); prod = vec_add( low_prod, high_prod );
// NOR the product and select only the negative elements according to the sign mask // NOR the product and select only the negative elements according to the sign mask
prod_ = vec_nor(prod, prod); prod_ = vec_nor(prod, prod);
prod_ = vec_sel(ei_p4i_ZERO, prod_, sgn); prod_ = vec_sel(p4i_ZERO, prod_, sgn);
// Add 1 to the result to get the negative numbers // Add 1 to the result to get the negative numbers
v1sel = vec_sel(ei_p4i_ZERO, ei_p4i_ONE, sgn); v1sel = vec_sel(p4i_ZERO, p4i_ONE, sgn);
prod_ = vec_add(prod_, v1sel); prod_ = vec_add(prod_, v1sel);
// Merge the results back to the final vector. // Merge the results back to the final vector.
@@ -210,7 +223,7 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_pmul<Packet4i>(const Packet4i& a, con
return prod; return prod;
} }
*/ */
template<> EIGEN_STRONG_INLINE Packet4f ei_pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b)
{ {
Packet4f t, y_0, y_1, res; Packet4f t, y_0, y_1, res;
@@ -218,45 +231,45 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_pdiv<Packet4f>(const Packet4f& a, con
y_0 = vec_re(b); y_0 = vec_re(b);
// Do one Newton-Raphson iteration to get the needed accuracy // Do one Newton-Raphson iteration to get the needed accuracy
t = vec_nmsub(y_0, b, ei_p4f_ONE); t = vec_nmsub(y_0, b, p4f_ONE);
y_1 = vec_madd(y_0, t, y_0); y_1 = vec_madd(y_0, t, y_0);
res = vec_madd(a, y_1, ei_p4f_ZERO); res = vec_madd(a, y_1, p4f_ZERO);
return res; return res;
} }
template<> EIGEN_STRONG_INLINE Packet4i ei_pdiv<Packet4i>(const Packet4i& /*a*/, const Packet4i& /*b*/) template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, const Packet4i& /*b*/)
{ ei_assert(false && "packet integer division are not supported by AltiVec"); { eigen_assert(false && "packet integer division are not supported by AltiVec");
return ei_pset1<int>(0); return pset1<Packet4i>(0);
} }
// for some weird raisons, it has to be overloaded for packet of integers // for some weird raisons, it has to be overloaded for packet of integers
template<> EIGEN_STRONG_INLINE Packet4f ei_pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_madd(a, b, c); } template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_madd(a, b, c); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return ei_padd(ei_pmul(a,b), c); } template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd(pmul(a,b), c); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_min(a, b); } template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_min(a, b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); } template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_max(a, b); } template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_max(a, b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); } template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); }
// Logical Operations are not supported for float, so we have to reinterpret casts using NEON intrinsics // Logical Operations are not supported for float, so we have to reinterpret casts using NEON intrinsics
template<> EIGEN_STRONG_INLINE Packet4f ei_pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, b); } template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, b); } template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, b); }
template<> EIGEN_STRONG_INLINE Packet4f ei_por<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_or(a, b); } template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_or(a, b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_or(a, b); } template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_or(a, b); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pxor<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_xor(a, b); } template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_xor(a, b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_xor(a, b); } template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_xor(a, b); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, vec_nor(b, b)); } template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, vec_nor(b, b)); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, vec_nor(b, b)); } template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, vec_nor(b, b)); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pload<float>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); } template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pload<int>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); } template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); }
template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu(const float* from) template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
{ {
EIGEN_DEBUG_ALIGNED_LOAD EIGEN_DEBUG_ALIGNED_LOAD
// Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
@@ -268,7 +281,7 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu(const float* from)
return (Packet4f) vec_perm(MSQ, LSQ, mask); // align the data return (Packet4f) vec_perm(MSQ, LSQ, mask); // align the data
} }
template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu(const int* from) template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
{ {
EIGEN_DEBUG_ALIGNED_LOAD EIGEN_DEBUG_ALIGNED_LOAD
// Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
@@ -280,10 +293,25 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu(const int* from)
return (Packet4i) vec_perm(MSQ, LSQ, mask); // align the data return (Packet4i) vec_perm(MSQ, LSQ, mask); // align the data
} }
template<> EIGEN_STRONG_INLINE void ei_pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vec_st(from, 0, to); } template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
template<> EIGEN_STRONG_INLINE void ei_pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vec_st(from, 0, to); } {
Packet4f p;
if((ptrdiff_t(&from) % 16) == 0) p = pload<Packet4f>(from);
else p = ploadu<Packet4f>(from);
return vec_perm(p, p, p16uc_DUPLICATE);
}
template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
{
Packet4i p;
if((ptrdiff_t(&from) % 16) == 0) p = pload<Packet4i>(from);
else p = ploadu<Packet4i>(from);
return vec_perm(p, p, p16uc_DUPLICATE);
}
template<> EIGEN_STRONG_INLINE void ei_pstoreu<float>(float* to, const Packet4f& from) template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vec_st(from, 0, to); }
template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vec_st(from, 0, to); }
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from)
{ {
EIGEN_DEBUG_UNALIGNED_STORE EIGEN_DEBUG_UNALIGNED_STORE
// Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
@@ -301,7 +329,7 @@ template<> EIGEN_STRONG_INLINE void ei_pstoreu<float>(float* to, const Packet4f
vec_st( LSQ, 15, (unsigned char *)to ); // Store the LSQ part first vec_st( LSQ, 15, (unsigned char *)to ); // Store the LSQ part first
vec_st( MSQ, 0, (unsigned char *)to ); // Store the MSQ part vec_st( MSQ, 0, (unsigned char *)to ); // Store the MSQ part
} }
template<> EIGEN_STRONG_INLINE void ei_pstoreu<int>(int* to, const Packet4i& from) template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from)
{ {
EIGEN_DEBUG_UNALIGNED_STORE EIGEN_DEBUG_UNALIGNED_STORE
// Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
@@ -320,29 +348,29 @@ template<> EIGEN_STRONG_INLINE void ei_pstoreu<int>(int* to, const Packet4i
vec_st( MSQ, 0, (unsigned char *)to ); // Store the MSQ part vec_st( MSQ, 0, (unsigned char *)to ); // Store the MSQ part
} }
template<> EIGEN_STRONG_INLINE void ei_prefetch<float>(const float* addr) { vec_dstt(addr, DST_CTRL(2,2,32), DST_CHAN); } template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { vec_dstt(addr, DST_CTRL(2,2,32), DST_CHAN); }
template<> EIGEN_STRONG_INLINE void ei_prefetch<int>(const int* addr) { vec_dstt(addr, DST_CTRL(2,2,32), DST_CHAN); } template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { vec_dstt(addr, DST_CTRL(2,2,32), DST_CHAN); }
template<> EIGEN_STRONG_INLINE float ei_pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vec_st(a, 0, x); return x[0]; } template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vec_st(a, 0, x); return x[0]; }
template<> EIGEN_STRONG_INLINE int ei_pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; vec_st(a, 0, x); return x[0]; } template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; vec_st(a, 0, x); return x[0]; }
template<> EIGEN_STRONG_INLINE Packet4f ei_preverse(const Packet4f& a) { return (Packet4f)vec_perm((Packet16uc)a,(Packet16uc)a, ei_p16uc_REVERSE); } template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) { return (Packet4f)vec_perm((Packet16uc)a,(Packet16uc)a, p16uc_REVERSE); }
template<> EIGEN_STRONG_INLINE Packet4i ei_preverse(const Packet4i& a) { return (Packet4i)vec_perm((Packet16uc)a,(Packet16uc)a, ei_p16uc_REVERSE); } template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) { return (Packet4i)vec_perm((Packet16uc)a,(Packet16uc)a, p16uc_REVERSE); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pabs(const Packet4f& a) { return vec_abs(a); } template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vec_abs(a); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pabs(const Packet4i& a) { return vec_abs(a); } template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vec_abs(a); }
template<> EIGEN_STRONG_INLINE float ei_predux<Packet4f>(const Packet4f& a) template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
{ {
Packet4f b, sum; Packet4f b, sum;
b = (Packet4f) vec_sld(a, a, 8); b = (Packet4f) vec_sld(a, a, 8);
sum = vec_add(a, b); sum = vec_add(a, b);
b = (Packet4f) vec_sld(sum, sum, 4); b = (Packet4f) vec_sld(sum, sum, 4);
sum = vec_add(sum, b); sum = vec_add(sum, b);
return ei_pfirst(sum); return pfirst(sum);
} }
template<> EIGEN_STRONG_INLINE Packet4f ei_preduxp<Packet4f>(const Packet4f* vecs) template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
{ {
Packet4f v[4], sum[4]; Packet4f v[4], sum[4];
@@ -370,15 +398,15 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_preduxp<Packet4f>(const Packet4f* vec
return sum[0]; return sum[0];
} }
template<> EIGEN_STRONG_INLINE int ei_predux<Packet4i>(const Packet4i& a) template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
{ {
Packet4i sum; Packet4i sum;
sum = vec_sums(a, ei_p4i_ZERO); sum = vec_sums(a, p4i_ZERO);
sum = vec_sld(sum, ei_p4i_ZERO, 12); sum = vec_sld(sum, p4i_ZERO, 12);
return ei_pfirst(sum); return pfirst(sum);
} }
template<> EIGEN_STRONG_INLINE Packet4i ei_preduxp<Packet4i>(const Packet4i* vecs) template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
{ {
Packet4i v[4], sum[4]; Packet4i v[4], sum[4];
@@ -408,56 +436,56 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_preduxp<Packet4i>(const Packet4i* vec
// Other reduction functions: // Other reduction functions:
// mul // mul
template<> EIGEN_STRONG_INLINE float ei_predux_mul<Packet4f>(const Packet4f& a) template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
{ {
Packet4f prod; Packet4f prod;
prod = ei_pmul(a, (Packet4f)vec_sld(a, a, 8)); prod = pmul(a, (Packet4f)vec_sld(a, a, 8));
return ei_pfirst(ei_pmul(prod, (Packet4f)vec_sld(prod, prod, 4))); return pfirst(pmul(prod, (Packet4f)vec_sld(prod, prod, 4)));
} }
template<> EIGEN_STRONG_INLINE int ei_predux_mul<Packet4i>(const Packet4i& a) template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
{ {
EIGEN_ALIGN16 int aux[4]; EIGEN_ALIGN16 int aux[4];
ei_pstore(aux, a); pstore(aux, a);
return aux[0] * aux[1] * aux[2] * aux[3]; return aux[0] * aux[1] * aux[2] * aux[3];
} }
// min // min
template<> EIGEN_STRONG_INLINE float ei_predux_min<Packet4f>(const Packet4f& a) template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
{ {
Packet4f b, res; Packet4f b, res;
b = vec_min(a, vec_sld(a, a, 8)); b = vec_min(a, vec_sld(a, a, 8));
res = vec_min(b, vec_sld(b, b, 4)); res = vec_min(b, vec_sld(b, b, 4));
return ei_pfirst(res); return pfirst(res);
} }
template<> EIGEN_STRONG_INLINE int ei_predux_min<Packet4i>(const Packet4i& a) template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
{ {
Packet4i b, res; Packet4i b, res;
b = vec_min(a, vec_sld(a, a, 8)); b = vec_min(a, vec_sld(a, a, 8));
res = vec_min(b, vec_sld(b, b, 4)); res = vec_min(b, vec_sld(b, b, 4));
return ei_pfirst(res); return pfirst(res);
} }
// max // max
template<> EIGEN_STRONG_INLINE float ei_predux_max<Packet4f>(const Packet4f& a) template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
{ {
Packet4f b, res; Packet4f b, res;
b = vec_max(a, vec_sld(a, a, 8)); b = vec_max(a, vec_sld(a, a, 8));
res = vec_max(b, vec_sld(b, b, 4)); res = vec_max(b, vec_sld(b, b, 4));
return ei_pfirst(res); return pfirst(res);
} }
template<> EIGEN_STRONG_INLINE int ei_predux_max<Packet4i>(const Packet4i& a) template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
{ {
Packet4i b, res; Packet4i b, res;
b = vec_max(a, vec_sld(a, a, 8)); b = vec_max(a, vec_sld(a, a, 8));
res = vec_max(b, vec_sld(b, b, 4)); res = vec_max(b, vec_sld(b, b, 4));
return ei_pfirst(res); return pfirst(res);
} }
template<int Offset> template<int Offset>
struct ei_palign_impl<Offset,Packet4f> struct palign_impl<Offset,Packet4f>
{ {
EIGEN_STRONG_INLINE static void run(Packet4f& first, const Packet4f& second) EIGEN_STRONG_INLINE static void run(Packet4f& first, const Packet4f& second)
{ {
@@ -467,7 +495,7 @@ struct ei_palign_impl<Offset,Packet4f>
}; };
template<int Offset> template<int Offset>
struct ei_palign_impl<Offset,Packet4i> struct palign_impl<Offset,Packet4i>
{ {
EIGEN_STRONG_INLINE static void run(Packet4i& first, const Packet4i& second) EIGEN_STRONG_INLINE static void run(Packet4i& first, const Packet4i& second)
{ {
@@ -475,4 +503,7 @@ struct ei_palign_impl<Offset,Packet4i>
first = vec_sld(first, second, Offset*4); first = vec_sld(first, second, Offset*4);
} }
}; };
} // end namespace internal
#endif // EIGEN_PACKET_MATH_ALTIVEC_H #endif // EIGEN_PACKET_MATH_ALTIVEC_H

View File

@@ -46,15 +46,6 @@
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8 #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
#endif #endif
/** Defines the maximal size in Bytes of blocks fitting in CPU cache.
* The current value is set to generate blocks of 256x256 for float
*
* Typically for a single-threaded application you would set that to 25% of the size of your CPU caches in bytes
*/
#ifndef EIGEN_TUNE_FOR_CPU_CACHE_SIZE
#define EIGEN_TUNE_FOR_CPU_CACHE_SIZE (sizeof(float)*512*512)
#endif
/** Defines the maximal width of the blocks used in the triangular product and solver /** Defines the maximal width of the blocks used in the triangular product and solver
* for vectors (level 2 blas xTRMV and xTRSV). The default is 8. * for vectors (level 2 blas xTRMV and xTRSV). The default is 8.
*/ */
@@ -67,12 +58,7 @@
* Currently it must be 8 or 16. Other values will fail. * Currently it must be 8 or 16. Other values will fail.
*/ */
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS #ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
#if (defined __i386__)
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 8 #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 8
#else
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 16
#endif
#endif #endif
#endif // EIGEN_DEFAULT_SETTINGS_H #endif // EIGEN_DEFAULT_SETTINGS_H

View File

@@ -0,0 +1,269 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#ifndef EIGEN_COMPLEX_NEON_H
#define EIGEN_COMPLEX_NEON_H
namespace internal {
static uint32x4_t p4ui_CONJ_XOR = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
static uint32x2_t p2ui_CONJ_XOR = { 0x00000000, 0x80000000 };
//---------- float ----------
struct Packet2cf
{
EIGEN_STRONG_INLINE Packet2cf() {}
EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
Packet4f v;
};
template<> struct packet_traits<std::complex<float> > : default_packet_traits
{
typedef Packet2cf type;
enum {
Vectorizable = 1,
size = 2,
HasAdd = 1,
HasSub = 1,
HasMul = 1,
HasDiv = 1,
HasNegate = 1,
HasAbs = 0,
HasAbs2 = 0,
HasMin = 0,
HasMax = 0,
HasSetLinear = 0
};
};
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; };
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
{
float32x2_t r64;
r64 = vld1_f32((float *)&from);
return Packet2cf(vcombine_f32(r64, r64));
}
template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(padd<Packet4f>(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(psub<Packet4f>(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate<Packet4f>(a.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
{
Packet4ui b = vreinterpretq_u32_f32(a.v);
return Packet2cf(vreinterpretq_f32_u32(veorq_u32(b, p4ui_CONJ_XOR)));
}
template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
Packet4f v1, v2;
float32x2_t a_lo, a_hi;
// Get the real values of a | a1_re | a1_re | a2_re | a2_re |
v1 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 0), vdup_lane_f32(vget_high_f32(a.v), 0));
// Get the real values of a | a1_im | a1_im | a2_im | a2_im |
v2 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 1), vdup_lane_f32(vget_high_f32(a.v), 1));
// Multiply the real a with b
v1 = vmulq_f32(v1, b.v);
// Multiply the imag a with b
v2 = vmulq_f32(v2, b.v);
// Conjugate v2
v2 = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(v2), p4ui_CONJ_XOR));
// Swap real/imag elements in v2.
a_lo = vrev64_f32(vget_low_f32(v2));
a_hi = vrev64_f32(vget_high_f32(v2));
v2 = vcombine_f32(a_lo, a_hi);
// Add and return the result
return Packet2cf(vaddq_f32(v1, v2));
}
template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
}
template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
}
template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
return Packet2cf(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
}
template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
return Packet2cf(vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
}
template<> EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>((const float*)from)); }
template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>((const float*)from)); }
template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { __pld((float *)addr); }
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
{
std::complex<float> EIGEN_ALIGN16 x[2];
vst1q_f32((float *)x, a.v);
return x[0];
}
template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
{
float32x2_t a_lo, a_hi;
Packet4f a_r128;
a_lo = vget_low_f32(a.v);
a_hi = vget_high_f32(a.v);
a_r128 = vcombine_f32(a_hi, a_lo);
return Packet2cf(a_r128);
}
template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& a)
{
return Packet2cf(vrev64q_f32(a.v));
}
template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
{
float32x2_t a1, a2;
std::complex<float> s;
a1 = vget_low_f32(a.v);
a2 = vget_high_f32(a.v);
a2 = vadd_f32(a1, a2);
vst1_f32((float *)&s, a2);
return s;
}
template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
{
Packet4f sum1, sum2, sum;
// Add the first two 64-bit float32x2_t of vecs[0]
sum1 = vcombine_f32(vget_low_f32(vecs[0].v), vget_low_f32(vecs[1].v));
sum2 = vcombine_f32(vget_high_f32(vecs[0].v), vget_high_f32(vecs[1].v));
sum = vaddq_f32(sum1, sum2);
return Packet2cf(sum);
}
template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
{
float32x2_t a1, a2, v1, v2, prod;
std::complex<float> s;
a1 = vget_low_f32(a.v);
a2 = vget_high_f32(a.v);
// Get the real values of a | a1_re | a1_re | a2_re | a2_re |
v1 = vdup_lane_f32(a1, 0);
// Get the real values of a | a1_im | a1_im | a2_im | a2_im |
v2 = vdup_lane_f32(a1, 1);
// Multiply the real a with b
v1 = vmul_f32(v1, a2);
// Multiply the imag a with b
v2 = vmul_f32(v2, a2);
// Conjugate v2
v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), p2ui_CONJ_XOR));
// Swap real/imag elements in v2.
v2 = vrev64_f32(v2);
// Add v1, v2
prod = vadd_f32(v1, v2);
vst1_f32((float *)&s, prod);
return s;
}
template<int Offset>
struct palign_impl<Offset,Packet2cf>
{
EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second)
{
if (Offset==1)
{
first.v = vextq_f32(first.v, second.v, 2);
}
}
};
template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
return internal::pmul(a, pconj(b));
}
};
template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
return internal::pmul(pconj(a), b);
}
};
template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
return pconj(internal::pmul(a, b));
}
};
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
// TODO optimize it for AltiVec
Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
Packet4f s, rev_s;
float32x2_t a_lo, a_hi;
// this computes the norm
s = vmulq_f32(b.v, b.v);
a_lo = vrev64_f32(vget_low_f32(s));
a_hi = vrev64_f32(vget_high_f32(s));
rev_s = vcombine_f32(a_lo, a_hi);
return Packet2cf(pdiv(res.v, vaddq_f32(s,rev_s)));
}
} // end namespace internal
#endif // EIGEN_COMPLEX_NEON_H

View File

@@ -27,14 +27,12 @@
#ifndef EIGEN_PACKET_MATH_NEON_H #ifndef EIGEN_PACKET_MATH_NEON_H
#define EIGEN_PACKET_MATH_NEON_H #define EIGEN_PACKET_MATH_NEON_H
namespace internal {
#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD #ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8 #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
#endif #endif
#ifndef EIGEN_TUNE_FOR_CPU_CACHE_SIZE
#define EIGEN_TUNE_FOR_CPU_CACHE_SIZE 4*192*192
#endif
// FIXME NEON has 16 quad registers, but since the current register allocator // FIXME NEON has 16 quad registers, but since the current register allocator
// is so bad, it is much better to reduce it to 8 // is so bad, it is much better to reduce it to 8
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS #ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
@@ -43,24 +41,31 @@
typedef float32x4_t Packet4f; typedef float32x4_t Packet4f;
typedef int32x4_t Packet4i; typedef int32x4_t Packet4i;
typedef uint32x4_t Packet4ui;
#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \ #define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
const Packet4f ei_p4f_##NAME = ei_pset1<float>(X) const Packet4f p4f_##NAME = pset1<Packet4f>(X)
#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \ #define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
const Packet4f ei_p4f_##NAME = vreinterpretq_f32_u32(ei_pset1<int>(X)) const Packet4f p4f_##NAME = vreinterpretq_f32_u32(pset1<int>(X))
#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \ #define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
const Packet4i ei_p4i_##NAME = ei_pset1<int>(X) const Packet4i p4i_##NAME = pset1<Packet4i>(X)
#ifndef __pld #ifndef __pld
#define __pld(x) asm volatile ( " pld [%[addr]]\n" :: [addr] "r" (x) : "cc" ); #define __pld(x) asm volatile ( " pld [%[addr]]\n" :: [addr] "r" (x) : "cc" );
#endif #endif
template<> struct ei_packet_traits<float> : ei_default_packet_traits template<> struct packet_traits<float> : default_packet_traits
{ {
typedef Packet4f type; enum {size=4}; typedef Packet4f type;
enum { enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size = 4,
HasDiv = 1,
// FIXME check the Has*
HasSin = 0, HasSin = 0,
HasCos = 0, HasCos = 0,
HasLog = 0, HasLog = 0,
@@ -68,39 +73,55 @@ template<> struct ei_packet_traits<float> : ei_default_packet_traits
HasSqrt = 0 HasSqrt = 0
}; };
}; };
template<> struct ei_packet_traits<int> : ei_default_packet_traits template<> struct packet_traits<int> : default_packet_traits
{ typedef Packet4i type; enum {size=4}; }; {
typedef Packet4i type;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size=4
// FIXME check the Has*
};
};
template<> struct ei_unpacket_traits<Packet4f> { typedef float type; enum {size=4}; }; #if EIGEN_GNUC_AT_MOST(4,4)
template<> struct ei_unpacket_traits<Packet4i> { typedef int type; enum {size=4}; }; // workaround gcc 4.2, 4.3 and 4.4 compilatin issue
EIGEN_STRONG_INLINE float32x4_t vld1q_f32(const float* x) { return ::vld1q_f32((const float32_t*)x); }
EIGEN_STRONG_INLINE float32x2_t vld1_f32 (const float* x) { return ::vld1_f32 ((const float32_t*)x); }
EIGEN_STRONG_INLINE void vst1q_f32(float* to, float32x4_t from) { ::vst1q_f32((float32_t*)to,from); }
EIGEN_STRONG_INLINE void vst1_f32 (float* to, float32x2_t from) { ::vst1_f32 ((float32_t*)to,from); }
#endif
template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<float>(const float& from) { return vdupq_n_f32(from); } template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}; };
template<> EIGEN_STRONG_INLINE Packet4i ei_pset1<int>(const int& from) { return vdupq_n_s32(from); } template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; };
template<> EIGEN_STRONG_INLINE Packet4f ei_plset<float>(const float& a) template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return vdupq_n_f32(from); }
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return vdupq_n_s32(from); }
template<> EIGEN_STRONG_INLINE Packet4f plset<float>(const float& a)
{ {
Packet4f countdown = { 3, 2, 1, 0 }; Packet4f countdown = { 3, 2, 1, 0 };
return vaddq_f32(ei_pset1(a), countdown); return vaddq_f32(pset1<Packet4f>(a), countdown);
} }
template<> EIGEN_STRONG_INLINE Packet4i ei_plset<int>(const int& a) template<> EIGEN_STRONG_INLINE Packet4i plset<int>(const int& a)
{ {
Packet4i countdown = { 3, 2, 1, 0 }; Packet4i countdown = { 3, 2, 1, 0 };
return vaddq_s32(ei_pset1(a), countdown); return vaddq_s32(pset1<Packet4i>(a), countdown);
} }
template<> EIGEN_STRONG_INLINE Packet4f ei_padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return vaddq_f32(a,b); } template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return vaddq_f32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return vaddq_s32(a,b); } template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return vaddq_s32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f ei_psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return vsubq_f32(a,b); } template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return vsubq_f32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return vsubq_s32(a,b); } template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return vsubq_s32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pnegate(const Packet4f& a) { return vnegq_f32(a); } template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) { return vnegq_f32(a); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pnegate(const Packet4i& a) { return vnegq_s32(a); } template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return vnegq_s32(a); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return vmulq_f32(a,b); } template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return vmulq_f32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { return vmulq_s32(a,b); } template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { return vmulq_s32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b)
{ {
Packet4f inv, restep, div; Packet4f inv, restep, div;
@@ -121,90 +142,101 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_pdiv<Packet4f>(const Packet4f& a, con
return div; return div;
} }
template<> EIGEN_STRONG_INLINE Packet4i ei_pdiv<Packet4i>(const Packet4i& /*a*/, const Packet4i& /*b*/) template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, const Packet4i& /*b*/)
{ ei_assert(false && "packet integer division are not supported by NEON"); { eigen_assert(false && "packet integer division are not supported by NEON");
return ei_pset1<int>(0); return pset1<Packet4i>(0);
} }
// for some weird raisons, it has to be overloaded for packet of integers // for some weird raisons, it has to be overloaded for packet of integers
template<> EIGEN_STRONG_INLINE Packet4i ei_pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return ei_padd(ei_pmul(a,b), c); } template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd(pmul(a,b), c); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return vminq_f32(a,b); } template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return vminq_f32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vminq_s32(a,b); } template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vminq_s32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return vmaxq_f32(a,b); } template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return vmaxq_f32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vmaxq_s32(a,b); } template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vmaxq_s32(a,b); }
// Logical Operations are not supported for float, so we have to reinterpret casts using NEON intrinsics // Logical Operations are not supported for float, so we have to reinterpret casts using NEON intrinsics
template<> EIGEN_STRONG_INLINE Packet4f ei_pand<Packet4f>(const Packet4f& a, const Packet4f& b) template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b)
{ {
return vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b))); return vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
} }
template<> EIGEN_STRONG_INLINE Packet4i ei_pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vandq_s32(a,b); } template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vandq_s32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f ei_por<Packet4f>(const Packet4f& a, const Packet4f& b) template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b)
{ {
return vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b))); return vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
} }
template<> EIGEN_STRONG_INLINE Packet4i ei_por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vorrq_s32(a,b); } template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vorrq_s32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pxor<Packet4f>(const Packet4f& a, const Packet4f& b) template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b)
{ {
return vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b))); return vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
} }
template<> EIGEN_STRONG_INLINE Packet4i ei_pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return veorq_s32(a,b); } template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return veorq_s32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b)
{ {
return vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b))); return vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
} }
template<> EIGEN_STRONG_INLINE Packet4i ei_pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vbicq_s32(a,b); } template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vbicq_s32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pload<float>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from); } template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pload<int>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from); } template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from); }
template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from); } template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_f32(from); }
template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from); } template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_s32(from); }
template<> EIGEN_STRONG_INLINE void ei_pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_f32(to, from); } template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
template<> EIGEN_STRONG_INLINE void ei_pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_s32(to, from); } {
float32x2_t lo, hi;
lo = vdup_n_f32(*from);
hi = vdup_n_f32(*from);
return vcombine_f32(lo, hi);
}
template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
{
int32x2_t lo, hi;
lo = vdup_n_s32(*from);
hi = vdup_n_s32(*from);
return vcombine_s32(lo, hi);
}
template<> EIGEN_STRONG_INLINE void ei_pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f32(to, from); } template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_f32(to, from); }
template<> EIGEN_STRONG_INLINE void ei_pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_s32(to, from); } template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_s32(to, from); }
template<> EIGEN_STRONG_INLINE void ei_prefetch<float>(const float* addr) { __pld(addr); } template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f32(to, from); }
template<> EIGEN_STRONG_INLINE void ei_prefetch<int>(const int* addr) { __pld(addr); } template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_s32(to, from); }
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { __pld(addr); }
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { __pld(addr); }
// FIXME only store the 2 first elements ? // FIXME only store the 2 first elements ?
template<> EIGEN_STRONG_INLINE float ei_pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vst1q_f32(x, a); return x[0]; } template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vst1q_f32(x, a); return x[0]; }
template<> EIGEN_STRONG_INLINE int ei_pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; vst1q_s32(x, a); return x[0]; } template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; vst1q_s32(x, a); return x[0]; }
template<> EIGEN_STRONG_INLINE Packet4f ei_preverse(const Packet4f& a) { template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) {
float32x2_t a_lo, a_hi; float32x2_t a_lo, a_hi;
Packet4f a_r64, a_r128; Packet4f a_r64;
a_r64 = vrev64q_f32(a); a_r64 = vrev64q_f32(a);
a_lo = vget_low_f32(a_r64); a_lo = vget_low_f32(a_r64);
a_hi = vget_high_f32(a_r64); a_hi = vget_high_f32(a_r64);
a_r128 = vcombine_f32(a_hi, a_lo); return vcombine_f32(a_hi, a_lo);
return a_r128;
} }
template<> EIGEN_STRONG_INLINE Packet4i ei_preverse(const Packet4i& a) { template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) {
int32x2_t a_lo, a_hi; int32x2_t a_lo, a_hi;
Packet4i a_r64, a_r128; Packet4i a_r64;
a_r64 = vrev64q_s32(a); a_r64 = vrev64q_s32(a);
a_lo = vget_low_s32(a_r64); a_lo = vget_low_s32(a_r64);
a_hi = vget_high_s32(a_r64); a_hi = vget_high_s32(a_r64);
a_r128 = vcombine_s32(a_hi, a_lo); return vcombine_s32(a_hi, a_lo);
return a_r128;
} }
template<> EIGEN_STRONG_INLINE Packet4f ei_pabs(const Packet4f& a) { return vabsq_f32(a); } template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vabsq_f32(a); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pabs(const Packet4i& a) { return vabsq_s32(a); } template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vabsq_s32(a); }
template<> EIGEN_STRONG_INLINE float ei_predux<Packet4f>(const Packet4f& a) template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
{ {
float32x2_t a_lo, a_hi, sum; float32x2_t a_lo, a_hi, sum;
float s[2]; float s[2];
@@ -218,7 +250,7 @@ template<> EIGEN_STRONG_INLINE float ei_predux<Packet4f>(const Packet4f& a)
return s[0]; return s[0];
} }
template<> EIGEN_STRONG_INLINE Packet4f ei_preduxp<Packet4f>(const Packet4f* vecs) template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
{ {
float32x4x2_t vtrn1, vtrn2, res1, res2; float32x4x2_t vtrn1, vtrn2, res1, res2;
Packet4f sum1, sum2, sum; Packet4f sum1, sum2, sum;
@@ -238,7 +270,7 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_preduxp<Packet4f>(const Packet4f* vec
return sum; return sum;
} }
template<> EIGEN_STRONG_INLINE int ei_predux<Packet4i>(const Packet4i& a) template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
{ {
int32x2_t a_lo, a_hi, sum; int32x2_t a_lo, a_hi, sum;
int32_t s[2]; int32_t s[2];
@@ -252,7 +284,7 @@ template<> EIGEN_STRONG_INLINE int ei_predux<Packet4i>(const Packet4i& a)
return s[0]; return s[0];
} }
template<> EIGEN_STRONG_INLINE Packet4i ei_preduxp<Packet4i>(const Packet4i* vecs) template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
{ {
int32x4x2_t vtrn1, vtrn2, res1, res2; int32x4x2_t vtrn1, vtrn2, res1, res2;
Packet4i sum1, sum2, sum; Packet4i sum1, sum2, sum;
@@ -274,7 +306,7 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_preduxp<Packet4i>(const Packet4i* vec
// Other reduction functions: // Other reduction functions:
// mul // mul
template<> EIGEN_STRONG_INLINE float ei_predux_mul<Packet4f>(const Packet4f& a) template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
{ {
float32x2_t a_lo, a_hi, prod; float32x2_t a_lo, a_hi, prod;
float s[2]; float s[2];
@@ -290,7 +322,7 @@ template<> EIGEN_STRONG_INLINE float ei_predux_mul<Packet4f>(const Packet4f& a)
return s[0]; return s[0];
} }
template<> EIGEN_STRONG_INLINE int ei_predux_mul<Packet4i>(const Packet4i& a) template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
{ {
int32x2_t a_lo, a_hi, prod; int32x2_t a_lo, a_hi, prod;
int32_t s[2]; int32_t s[2];
@@ -308,7 +340,7 @@ template<> EIGEN_STRONG_INLINE int ei_predux_mul<Packet4i>(const Packet4i& a)
} }
// min // min
template<> EIGEN_STRONG_INLINE float ei_predux_min<Packet4f>(const Packet4f& a) template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
{ {
float32x2_t a_lo, a_hi, min; float32x2_t a_lo, a_hi, min;
float s[2]; float s[2];
@@ -321,7 +353,7 @@ template<> EIGEN_STRONG_INLINE float ei_predux_min<Packet4f>(const Packet4f& a)
return s[0]; return s[0];
} }
template<> EIGEN_STRONG_INLINE int ei_predux_min<Packet4i>(const Packet4i& a) template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
{ {
int32x2_t a_lo, a_hi, min; int32x2_t a_lo, a_hi, min;
int32_t s[2]; int32_t s[2];
@@ -336,7 +368,7 @@ template<> EIGEN_STRONG_INLINE int ei_predux_min<Packet4i>(const Packet4i& a)
} }
// max // max
template<> EIGEN_STRONG_INLINE float ei_predux_max<Packet4f>(const Packet4f& a) template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
{ {
float32x2_t a_lo, a_hi, max; float32x2_t a_lo, a_hi, max;
float s[2]; float s[2];
@@ -349,7 +381,7 @@ template<> EIGEN_STRONG_INLINE float ei_predux_max<Packet4f>(const Packet4f& a)
return s[0]; return s[0];
} }
template<> EIGEN_STRONG_INLINE int ei_predux_max<Packet4i>(const Packet4i& a) template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
{ {
int32x2_t a_lo, a_hi, max; int32x2_t a_lo, a_hi, max;
int32_t s[2]; int32_t s[2];
@@ -364,7 +396,7 @@ template<> EIGEN_STRONG_INLINE int ei_predux_max<Packet4i>(const Packet4i& a)
} }
template<int Offset> template<int Offset>
struct ei_palign_impl<Offset,Packet4f> struct palign_impl<Offset,Packet4f>
{ {
EIGEN_STRONG_INLINE static void run(Packet4f& first, const Packet4f& second) EIGEN_STRONG_INLINE static void run(Packet4f& first, const Packet4f& second)
{ {
@@ -374,7 +406,7 @@ struct ei_palign_impl<Offset,Packet4f>
}; };
template<int Offset> template<int Offset>
struct ei_palign_impl<Offset,Packet4i> struct palign_impl<Offset,Packet4i>
{ {
EIGEN_STRONG_INLINE static void run(Packet4i& first, const Packet4i& second) EIGEN_STRONG_INLINE static void run(Packet4i& first, const Packet4i& second)
{ {
@@ -382,4 +414,7 @@ struct ei_palign_impl<Offset,Packet4i>
first = vextq_s32(first, second, Offset); first = vextq_s32(first, second, Offset);
} }
}; };
} // end namespace internal
#endif // EIGEN_PACKET_MATH_NEON_H #endif // EIGEN_PACKET_MATH_NEON_H

View File

@@ -0,0 +1,447 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#ifndef EIGEN_COMPLEX_SSE_H
#define EIGEN_COMPLEX_SSE_H
namespace internal {
//---------- float ----------
struct Packet2cf
{
EIGEN_STRONG_INLINE Packet2cf() {}
EIGEN_STRONG_INLINE explicit Packet2cf(const __m128& a) : v(a) {}
__m128 v;
};
template<> struct packet_traits<std::complex<float> > : default_packet_traits
{
typedef Packet2cf type;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size = 2,
HasAdd = 1,
HasSub = 1,
HasMul = 1,
HasDiv = 1,
HasNegate = 1,
HasAbs = 0,
HasAbs2 = 0,
HasMin = 0,
HasMax = 0,
HasSetLinear = 0
};
};
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; };
template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_add_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_sub_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a)
{
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));
return Packet2cf(_mm_xor_ps(a.v,mask));
}
template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
{
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
return Packet2cf(_mm_xor_ps(a.v,mask));
}
template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
// TODO optimize it for SSE3 and 4
#ifdef EIGEN_VECTORIZE_SSE3
return Packet2cf(_mm_addsub_ps(_mm_mul_ps(_mm_moveldup_ps(a.v), b.v),
_mm_mul_ps(_mm_movehdup_ps(a.v),
vec4f_swizzle1(b.v, 1, 0, 3, 2))));
// return Packet2cf(_mm_addsub_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
// _mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
// vec4f_swizzle1(b.v, 1, 0, 3, 2))));
#else
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x00000000,0x80000000,0x00000000));
return Packet2cf(_mm_add_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask)));
#endif
}
template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_and_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_or_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_xor_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_andnot_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf pload <Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>(&real_ref(*from))); }
template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>(&real_ref(*from))); }
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
{
Packet2cf res;
#if EIGEN_GNUC_AT_MOST(4,2)
// workaround annoying "may be used uninitialized in this function" warning with gcc 4.2
res.v = _mm_loadl_pi(_mm_set1_ps(0.0f), (const __m64*)&from);
#else
res.v = _mm_loadl_pi(res.v, (const __m64*)&from);
#endif
return Packet2cf(_mm_movelh_ps(res.v,res.v));
}
template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore(&real_ref(*to), from.v); }
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&real_ref(*to), from.v); }
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
{
#if EIGEN_GNUC_AT_MOST(4,3)
// Workaround gcc 4.2 ICE - this is not performance wise ideal, but who cares...
// This workaround also fix invalid code generation with gcc 4.3
EIGEN_ALIGN16 std::complex<float> res[2];
_mm_store_ps((float*)res, a.v);
return res[0];
#else
std::complex<float> res;
_mm_storel_pi((__m64*)&res, a.v);
return res;
#endif
}
template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a) { return Packet2cf(_mm_castpd_ps(preverse(_mm_castps_pd(a.v)))); }
template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
{
return pfirst(Packet2cf(_mm_add_ps(a.v, _mm_movehl_ps(a.v,a.v))));
}
template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
{
return Packet2cf(_mm_add_ps(_mm_movelh_ps(vecs[0].v,vecs[1].v), _mm_movehl_ps(vecs[1].v,vecs[0].v)));
}
template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
{
return pfirst(pmul(a, Packet2cf(_mm_movehl_ps(a.v,a.v))));
}
template<int Offset>
struct palign_impl<Offset,Packet2cf>
{
EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second)
{
if (Offset==1)
{
first.v = _mm_movehl_ps(first.v, first.v);
first.v = _mm_movelh_ps(first.v, second.v);
}
}
};
template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
#ifdef EIGEN_VECTORIZE_SSE3
return internal::pmul(a, pconj(b));
#else
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
return Packet2cf(_mm_add_ps(_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask),
_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
vec4f_swizzle1(b.v, 1, 0, 3, 2))));
#endif
}
};
template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
#ifdef EIGEN_VECTORIZE_SSE3
return internal::pmul(pconj(a), b);
#else
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
return Packet2cf(_mm_add_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask)));
#endif
}
};
template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
#ifdef EIGEN_VECTORIZE_SSE3
return pconj(internal::pmul(a, b));
#else
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
return Packet2cf(_mm_sub_ps(_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask),
_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
vec4f_swizzle1(b.v, 1, 0, 3, 2))));
#endif
}
};
template<> struct conj_helper<Packet4f, Packet2cf, false,false>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet4f& x, const Packet2cf& y, const Packet2cf& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet4f& x, const Packet2cf& y) const
{ return Packet2cf(Eigen::internal::pmul(x, y.v)); }
};
template<> struct conj_helper<Packet2cf, Packet4f, false,false>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet4f& y, const Packet2cf& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& x, const Packet4f& y) const
{ return Packet2cf(Eigen::internal::pmul(x.v, y)); }
};
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
// TODO optimize it for SSE3 and 4
Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
__m128 s = _mm_mul_ps(b.v,b.v);
return Packet2cf(_mm_div_ps(res.v,_mm_add_ps(s,_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(s), 0xb1)))));
}
EIGEN_STRONG_INLINE Packet2cf pcplxflip/*<Packet2cf>*/(const Packet2cf& x)
{
return Packet2cf(vec4f_swizzle1(x.v, 1, 0, 3, 2));
}
//---------- double ----------
struct Packet1cd
{
EIGEN_STRONG_INLINE Packet1cd() {}
EIGEN_STRONG_INLINE explicit Packet1cd(const __m128d& a) : v(a) {}
__m128d v;
};
template<> struct packet_traits<std::complex<double> > : default_packet_traits
{
typedef Packet1cd type;
enum {
Vectorizable = 1,
AlignedOnScalar = 0,
size = 1,
HasAdd = 1,
HasSub = 1,
HasMul = 1,
HasDiv = 1,
HasNegate = 1,
HasAbs = 0,
HasAbs2 = 0,
HasMin = 0,
HasMax = 0,
HasSetLinear = 0
};
};
template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1}; };
template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_add_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_sub_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(a.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a)
{
const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
return Packet1cd(_mm_xor_pd(a.v,mask));
}
template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{
// TODO optimize it for SSE3 and 4
#ifdef EIGEN_VECTORIZE_SSE3
return Packet1cd(_mm_addsub_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v),
_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
vec2d_swizzle1(b.v, 1, 0))));
#else
const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));
return Packet1cd(_mm_add_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v),
_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
vec2d_swizzle1(b.v, 1, 0)), mask)));
#endif
}
template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_and_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_or_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_xor_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_andnot_pd(a.v,b.v)); }
// FIXME force unaligned load, this is a temporary fix
template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from)
{ EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from)
{ EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }
template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }
// FIXME force unaligned store, this is a temporary fix
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
{
EIGEN_ALIGN16 double res[2];
_mm_store_pd(res, a.v);
return std::complex<double>(res[0],res[1]);
}
template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a)
{
return pfirst(a);
}
template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs)
{
return vecs[0];
}
template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a)
{
return pfirst(a);
}
template<int Offset>
struct palign_impl<Offset,Packet1cd>
{
EIGEN_STRONG_INLINE static void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
{
// FIXME is it sure we never have to align a Packet1cd?
// Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
}
};
template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
{
#ifdef EIGEN_VECTORIZE_SSE3
return internal::pmul(a, pconj(b));
#else
const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
return Packet1cd(_mm_add_pd(_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), mask),
_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
vec2d_swizzle1(b.v, 1, 0))));
#endif
}
};
template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
{
#ifdef EIGEN_VECTORIZE_SSE3
return internal::pmul(pconj(a), b);
#else
const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
return Packet1cd(_mm_add_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v),
_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
vec2d_swizzle1(b.v, 1, 0)), mask)));
#endif
}
};
template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
{
#ifdef EIGEN_VECTORIZE_SSE3
return pconj(internal::pmul(a, b));
#else
const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
return Packet1cd(_mm_sub_pd(_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), mask),
_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
vec2d_swizzle1(b.v, 1, 0))));
#endif
}
};
template<> struct conj_helper<Packet2d, Packet1cd, false,false>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet2d& x, const Packet1cd& y, const Packet1cd& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet2d& x, const Packet1cd& y) const
{ return Packet1cd(Eigen::internal::pmul(x, y.v)); }
};
template<> struct conj_helper<Packet1cd, Packet2d, false,false>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet2d& y, const Packet1cd& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& x, const Packet2d& y) const
{ return Packet1cd(Eigen::internal::pmul(x.v, y)); }
};
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{
// TODO optimize it for SSE3 and 4
Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
__m128d s = _mm_mul_pd(b.v,b.v);
return Packet1cd(_mm_div_pd(res.v, _mm_add_pd(s,_mm_shuffle_pd(s, s, 0x1))));
}
EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
{
return Packet1cd(preverse(x.v));
}
} // end namespace internal
#endif // EIGEN_COMPLEX_SSE_H

View File

@@ -30,8 +30,10 @@
#ifndef EIGEN_MATH_FUNCTIONS_SSE_H #ifndef EIGEN_MATH_FUNCTIONS_SSE_H
#define EIGEN_MATH_FUNCTIONS_SSE_H #define EIGEN_MATH_FUNCTIONS_SSE_H
namespace internal {
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet4f ei_plog<Packet4f>(const Packet4f& _x) Packet4f plog<Packet4f>(const Packet4f& _x)
{ {
Packet4f x = _x; Packet4f x = _x;
_EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f); _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
@@ -64,15 +66,15 @@ Packet4f ei_plog<Packet4f>(const Packet4f& _x)
Packet4f invalid_mask = _mm_cmple_ps(x, _mm_setzero_ps()); Packet4f invalid_mask = _mm_cmple_ps(x, _mm_setzero_ps());
x = ei_pmax(x, ei_p4f_min_norm_pos); /* cut off denormalized stuff */ x = pmax(x, p4f_min_norm_pos); /* cut off denormalized stuff */
emm0 = _mm_srli_epi32(_mm_castps_si128(x), 23); emm0 = _mm_srli_epi32(_mm_castps_si128(x), 23);
/* keep only the fractional part */ /* keep only the fractional part */
x = _mm_and_ps(x, ei_p4f_inv_mant_mask); x = _mm_and_ps(x, p4f_inv_mant_mask);
x = _mm_or_ps(x, ei_p4f_half); x = _mm_or_ps(x, p4f_half);
emm0 = _mm_sub_epi32(emm0, ei_p4i_0x7f); emm0 = _mm_sub_epi32(emm0, p4i_0x7f);
Packet4f e = ei_padd(_mm_cvtepi32_ps(emm0), ei_p4f_1); Packet4f e = padd(_mm_cvtepi32_ps(emm0), p4f_1);
/* part2: /* part2:
if( x < SQRTHF ) { if( x < SQRTHF ) {
@@ -80,38 +82,38 @@ Packet4f ei_plog<Packet4f>(const Packet4f& _x)
x = x + x - 1.0; x = x + x - 1.0;
} else { x = x - 1.0; } } else { x = x - 1.0; }
*/ */
Packet4f mask = _mm_cmplt_ps(x, ei_p4f_cephes_SQRTHF); Packet4f mask = _mm_cmplt_ps(x, p4f_cephes_SQRTHF);
Packet4f tmp = _mm_and_ps(x, mask); Packet4f tmp = _mm_and_ps(x, mask);
x = ei_psub(x, ei_p4f_1); x = psub(x, p4f_1);
e = ei_psub(e, _mm_and_ps(ei_p4f_1, mask)); e = psub(e, _mm_and_ps(p4f_1, mask));
x = ei_padd(x, tmp); x = padd(x, tmp);
Packet4f x2 = ei_pmul(x,x); Packet4f x2 = pmul(x,x);
Packet4f x3 = ei_pmul(x2,x); Packet4f x3 = pmul(x2,x);
Packet4f y, y1, y2; Packet4f y, y1, y2;
y = ei_pmadd(ei_p4f_cephes_log_p0, x, ei_p4f_cephes_log_p1); y = pmadd(p4f_cephes_log_p0, x, p4f_cephes_log_p1);
y1 = ei_pmadd(ei_p4f_cephes_log_p3, x, ei_p4f_cephes_log_p4); y1 = pmadd(p4f_cephes_log_p3, x, p4f_cephes_log_p4);
y2 = ei_pmadd(ei_p4f_cephes_log_p6, x, ei_p4f_cephes_log_p7); y2 = pmadd(p4f_cephes_log_p6, x, p4f_cephes_log_p7);
y = ei_pmadd(y , x, ei_p4f_cephes_log_p2); y = pmadd(y , x, p4f_cephes_log_p2);
y1 = ei_pmadd(y1, x, ei_p4f_cephes_log_p5); y1 = pmadd(y1, x, p4f_cephes_log_p5);
y2 = ei_pmadd(y2, x, ei_p4f_cephes_log_p8); y2 = pmadd(y2, x, p4f_cephes_log_p8);
y = ei_pmadd(y, x3, y1); y = pmadd(y, x3, y1);
y = ei_pmadd(y, x3, y2); y = pmadd(y, x3, y2);
y = ei_pmul(y, x3); y = pmul(y, x3);
y1 = ei_pmul(e, ei_p4f_cephes_log_q1); y1 = pmul(e, p4f_cephes_log_q1);
tmp = ei_pmul(x2, ei_p4f_half); tmp = pmul(x2, p4f_half);
y = ei_padd(y, y1); y = padd(y, y1);
x = ei_psub(x, tmp); x = psub(x, tmp);
y2 = ei_pmul(e, ei_p4f_cephes_log_q2); y2 = pmul(e, p4f_cephes_log_q2);
x = ei_padd(x, y); x = padd(x, y);
x = ei_padd(x, y2); x = padd(x, y2);
return _mm_or_ps(x, invalid_mask); // negative arg will be NAN return _mm_or_ps(x, invalid_mask); // negative arg will be NAN
} }
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet4f ei_pexp<Packet4f>(const Packet4f& _x) Packet4f pexp<Packet4f>(const Packet4f& _x)
{ {
Packet4f x = _x; Packet4f x = _x;
_EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f); _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
@@ -137,40 +139,40 @@ Packet4f ei_pexp<Packet4f>(const Packet4f& _x)
Packet4i emm0; Packet4i emm0;
// clamp x // clamp x
x = ei_pmax(ei_pmin(x, ei_p4f_exp_hi), ei_p4f_exp_lo); x = pmax(pmin(x, p4f_exp_hi), p4f_exp_lo);
/* express exp(x) as exp(g + n*log(2)) */ /* express exp(x) as exp(g + n*log(2)) */
fx = ei_pmadd(x, ei_p4f_cephes_LOG2EF, ei_p4f_half); fx = pmadd(x, p4f_cephes_LOG2EF, p4f_half);
/* how to perform a floorf with SSE: just below */ /* how to perform a floorf with SSE: just below */
emm0 = _mm_cvttps_epi32(fx); emm0 = _mm_cvttps_epi32(fx);
tmp = _mm_cvtepi32_ps(emm0); tmp = _mm_cvtepi32_ps(emm0);
/* if greater, substract 1 */ /* if greater, substract 1 */
Packet4f mask = _mm_cmpgt_ps(tmp, fx); Packet4f mask = _mm_cmpgt_ps(tmp, fx);
mask = _mm_and_ps(mask, ei_p4f_1); mask = _mm_and_ps(mask, p4f_1);
fx = ei_psub(tmp, mask); fx = psub(tmp, mask);
tmp = ei_pmul(fx, ei_p4f_cephes_exp_C1); tmp = pmul(fx, p4f_cephes_exp_C1);
Packet4f z = ei_pmul(fx, ei_p4f_cephes_exp_C2); Packet4f z = pmul(fx, p4f_cephes_exp_C2);
x = ei_psub(x, tmp); x = psub(x, tmp);
x = ei_psub(x, z); x = psub(x, z);
z = ei_pmul(x,x); z = pmul(x,x);
Packet4f y = ei_p4f_cephes_exp_p0; Packet4f y = p4f_cephes_exp_p0;
y = ei_pmadd(y, x, ei_p4f_cephes_exp_p1); y = pmadd(y, x, p4f_cephes_exp_p1);
y = ei_pmadd(y, x, ei_p4f_cephes_exp_p2); y = pmadd(y, x, p4f_cephes_exp_p2);
y = ei_pmadd(y, x, ei_p4f_cephes_exp_p3); y = pmadd(y, x, p4f_cephes_exp_p3);
y = ei_pmadd(y, x, ei_p4f_cephes_exp_p4); y = pmadd(y, x, p4f_cephes_exp_p4);
y = ei_pmadd(y, x, ei_p4f_cephes_exp_p5); y = pmadd(y, x, p4f_cephes_exp_p5);
y = ei_pmadd(y, z, x); y = pmadd(y, z, x);
y = ei_padd(y, ei_p4f_1); y = padd(y, p4f_1);
/* build 2^n */ /* build 2^n */
emm0 = _mm_cvttps_epi32(fx); emm0 = _mm_cvttps_epi32(fx);
emm0 = _mm_add_epi32(emm0, ei_p4i_0x7f); emm0 = _mm_add_epi32(emm0, p4i_0x7f);
emm0 = _mm_slli_epi32(emm0, 23); emm0 = _mm_slli_epi32(emm0, 23);
return ei_pmul(y, _mm_castsi128_ps(emm0)); return pmul(y, _mm_castsi128_ps(emm0));
} }
/* evaluation of 4 sines at onces, using SSE2 intrinsics. /* evaluation of 4 sines at onces, using SSE2 intrinsics.
@@ -186,7 +188,7 @@ Packet4f ei_pexp<Packet4f>(const Packet4f& _x)
*/ */
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet4f ei_psin<Packet4f>(const Packet4f& _x) Packet4f psin<Packet4f>(const Packet4f& _x)
{ {
Packet4f x = _x; Packet4f x = _x;
_EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f); _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
@@ -215,24 +217,24 @@ Packet4f ei_psin<Packet4f>(const Packet4f& _x)
Packet4i emm0, emm2; Packet4i emm0, emm2;
sign_bit = x; sign_bit = x;
/* take the absolute value */ /* take the absolute value */
x = ei_pabs(x); x = pabs(x);
/* take the modulo */ /* take the modulo */
/* extract the sign bit (upper one) */ /* extract the sign bit (upper one) */
sign_bit = _mm_and_ps(sign_bit, ei_p4f_sign_mask); sign_bit = _mm_and_ps(sign_bit, p4f_sign_mask);
/* scale by 4/Pi */ /* scale by 4/Pi */
y = ei_pmul(x, ei_p4f_cephes_FOPI); y = pmul(x, p4f_cephes_FOPI);
/* store the integer part of y in mm0 */ /* store the integer part of y in mm0 */
emm2 = _mm_cvttps_epi32(y); emm2 = _mm_cvttps_epi32(y);
/* j=(j+1) & (~1) (see the cephes sources) */ /* j=(j+1) & (~1) (see the cephes sources) */
emm2 = _mm_add_epi32(emm2, ei_p4i_1); emm2 = _mm_add_epi32(emm2, p4i_1);
emm2 = _mm_and_si128(emm2, ei_p4i_not1); emm2 = _mm_and_si128(emm2, p4i_not1);
y = _mm_cvtepi32_ps(emm2); y = _mm_cvtepi32_ps(emm2);
/* get the swap sign flag */ /* get the swap sign flag */
emm0 = _mm_and_si128(emm2, ei_p4i_4); emm0 = _mm_and_si128(emm2, p4i_4);
emm0 = _mm_slli_epi32(emm0, 29); emm0 = _mm_slli_epi32(emm0, 29);
/* get the polynom selection mask /* get the polynom selection mask
there is one polynom for 0 <= x <= Pi/4 there is one polynom for 0 <= x <= Pi/4
@@ -240,7 +242,7 @@ Packet4f ei_psin<Packet4f>(const Packet4f& _x)
Both branches will be computed. Both branches will be computed.
*/ */
emm2 = _mm_and_si128(emm2, ei_p4i_2); emm2 = _mm_and_si128(emm2, p4i_2);
emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128()); emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
Packet4f swap_sign_bit = _mm_castsi128_ps(emm0); Packet4f swap_sign_bit = _mm_castsi128_ps(emm0);
@@ -249,33 +251,33 @@ Packet4f ei_psin<Packet4f>(const Packet4f& _x)
/* The magic pass: "Extended precision modular arithmetic" /* The magic pass: "Extended precision modular arithmetic"
x = ((x - y * DP1) - y * DP2) - y * DP3; */ x = ((x - y * DP1) - y * DP2) - y * DP3; */
xmm1 = ei_pmul(y, ei_p4f_minus_cephes_DP1); xmm1 = pmul(y, p4f_minus_cephes_DP1);
xmm2 = ei_pmul(y, ei_p4f_minus_cephes_DP2); xmm2 = pmul(y, p4f_minus_cephes_DP2);
xmm3 = ei_pmul(y, ei_p4f_minus_cephes_DP3); xmm3 = pmul(y, p4f_minus_cephes_DP3);
x = ei_padd(x, xmm1); x = padd(x, xmm1);
x = ei_padd(x, xmm2); x = padd(x, xmm2);
x = ei_padd(x, xmm3); x = padd(x, xmm3);
/* Evaluate the first polynom (0 <= x <= Pi/4) */ /* Evaluate the first polynom (0 <= x <= Pi/4) */
y = ei_p4f_coscof_p0; y = p4f_coscof_p0;
Packet4f z = _mm_mul_ps(x,x); Packet4f z = _mm_mul_ps(x,x);
y = ei_pmadd(y, z, ei_p4f_coscof_p1); y = pmadd(y, z, p4f_coscof_p1);
y = ei_pmadd(y, z, ei_p4f_coscof_p2); y = pmadd(y, z, p4f_coscof_p2);
y = ei_pmul(y, z); y = pmul(y, z);
y = ei_pmul(y, z); y = pmul(y, z);
Packet4f tmp = ei_pmul(z, ei_p4f_half); Packet4f tmp = pmul(z, p4f_half);
y = ei_psub(y, tmp); y = psub(y, tmp);
y = ei_padd(y, ei_p4f_1); y = padd(y, p4f_1);
/* Evaluate the second polynom (Pi/4 <= x <= 0) */ /* Evaluate the second polynom (Pi/4 <= x <= 0) */
Packet4f y2 = ei_p4f_sincof_p0; Packet4f y2 = p4f_sincof_p0;
y2 = ei_pmadd(y2, z, ei_p4f_sincof_p1); y2 = pmadd(y2, z, p4f_sincof_p1);
y2 = ei_pmadd(y2, z, ei_p4f_sincof_p2); y2 = pmadd(y2, z, p4f_sincof_p2);
y2 = ei_pmul(y2, z); y2 = pmul(y2, z);
y2 = ei_pmul(y2, x); y2 = pmul(y2, x);
y2 = ei_padd(y2, x); y2 = padd(y2, x);
/* select the correct result from the two polynoms */ /* select the correct result from the two polynoms */
y2 = _mm_and_ps(poly_mask, y2); y2 = _mm_and_ps(poly_mask, y2);
@@ -285,9 +287,9 @@ Packet4f ei_psin<Packet4f>(const Packet4f& _x)
return _mm_xor_ps(y, sign_bit); return _mm_xor_ps(y, sign_bit);
} }
/* almost the same as ei_psin */ /* almost the same as psin */
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet4f ei_pcos<Packet4f>(const Packet4f& _x) Packet4f pcos<Packet4f>(const Packet4f& _x)
{ {
Packet4f x = _x; Packet4f x = _x;
_EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f); _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
@@ -312,25 +314,25 @@ Packet4f ei_pcos<Packet4f>(const Packet4f& _x)
Packet4f xmm1, xmm2 = _mm_setzero_ps(), xmm3, y; Packet4f xmm1, xmm2 = _mm_setzero_ps(), xmm3, y;
Packet4i emm0, emm2; Packet4i emm0, emm2;
x = ei_pabs(x); x = pabs(x);
/* scale by 4/Pi */ /* scale by 4/Pi */
y = ei_pmul(x, ei_p4f_cephes_FOPI); y = pmul(x, p4f_cephes_FOPI);
/* get the integer part of y */ /* get the integer part of y */
emm2 = _mm_cvttps_epi32(y); emm2 = _mm_cvttps_epi32(y);
/* j=(j+1) & (~1) (see the cephes sources) */ /* j=(j+1) & (~1) (see the cephes sources) */
emm2 = _mm_add_epi32(emm2, ei_p4i_1); emm2 = _mm_add_epi32(emm2, p4i_1);
emm2 = _mm_and_si128(emm2, ei_p4i_not1); emm2 = _mm_and_si128(emm2, p4i_not1);
y = _mm_cvtepi32_ps(emm2); y = _mm_cvtepi32_ps(emm2);
emm2 = _mm_sub_epi32(emm2, ei_p4i_2); emm2 = _mm_sub_epi32(emm2, p4i_2);
/* get the swap sign flag */ /* get the swap sign flag */
emm0 = _mm_andnot_si128(emm2, ei_p4i_4); emm0 = _mm_andnot_si128(emm2, p4i_4);
emm0 = _mm_slli_epi32(emm0, 29); emm0 = _mm_slli_epi32(emm0, 29);
/* get the polynom selection mask */ /* get the polynom selection mask */
emm2 = _mm_and_si128(emm2, ei_p4i_2); emm2 = _mm_and_si128(emm2, p4i_2);
emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128()); emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
Packet4f sign_bit = _mm_castsi128_ps(emm0); Packet4f sign_bit = _mm_castsi128_ps(emm0);
@@ -338,31 +340,31 @@ Packet4f ei_pcos<Packet4f>(const Packet4f& _x)
/* The magic pass: "Extended precision modular arithmetic" /* The magic pass: "Extended precision modular arithmetic"
x = ((x - y * DP1) - y * DP2) - y * DP3; */ x = ((x - y * DP1) - y * DP2) - y * DP3; */
xmm1 = ei_pmul(y, ei_p4f_minus_cephes_DP1); xmm1 = pmul(y, p4f_minus_cephes_DP1);
xmm2 = ei_pmul(y, ei_p4f_minus_cephes_DP2); xmm2 = pmul(y, p4f_minus_cephes_DP2);
xmm3 = ei_pmul(y, ei_p4f_minus_cephes_DP3); xmm3 = pmul(y, p4f_minus_cephes_DP3);
x = ei_padd(x, xmm1); x = padd(x, xmm1);
x = ei_padd(x, xmm2); x = padd(x, xmm2);
x = ei_padd(x, xmm3); x = padd(x, xmm3);
/* Evaluate the first polynom (0 <= x <= Pi/4) */ /* Evaluate the first polynom (0 <= x <= Pi/4) */
y = ei_p4f_coscof_p0; y = p4f_coscof_p0;
Packet4f z = ei_pmul(x,x); Packet4f z = pmul(x,x);
y = ei_pmadd(y,z,ei_p4f_coscof_p1); y = pmadd(y,z,p4f_coscof_p1);
y = ei_pmadd(y,z,ei_p4f_coscof_p2); y = pmadd(y,z,p4f_coscof_p2);
y = ei_pmul(y, z); y = pmul(y, z);
y = ei_pmul(y, z); y = pmul(y, z);
Packet4f tmp = _mm_mul_ps(z, ei_p4f_half); Packet4f tmp = _mm_mul_ps(z, p4f_half);
y = ei_psub(y, tmp); y = psub(y, tmp);
y = ei_padd(y, ei_p4f_1); y = padd(y, p4f_1);
/* Evaluate the second polynom (Pi/4 <= x <= 0) */ /* Evaluate the second polynom (Pi/4 <= x <= 0) */
Packet4f y2 = ei_p4f_sincof_p0; Packet4f y2 = p4f_sincof_p0;
y2 = ei_pmadd(y2, z, ei_p4f_sincof_p1); y2 = pmadd(y2, z, p4f_sincof_p1);
y2 = ei_pmadd(y2, z, ei_p4f_sincof_p2); y2 = pmadd(y2, z, p4f_sincof_p2);
y2 = ei_pmul(y2, z); y2 = pmul(y2, z);
y2 = ei_pmadd(y2, x, x); y2 = pmadd(y2, x, x);
/* select the correct result from the two polynoms */ /* select the correct result from the two polynoms */
y2 = _mm_and_ps(poly_mask, y2); y2 = _mm_and_ps(poly_mask, y2);
@@ -373,19 +375,21 @@ Packet4f ei_pcos<Packet4f>(const Packet4f& _x)
return _mm_xor_ps(y, sign_bit); return _mm_xor_ps(y, sign_bit);
} }
// This is Quake3's fast inverse square root. // This is based on Quake3's fast inverse square root.
// For detail see here: http://www.beyond3d.com/content/articles/8/ // For detail see here: http://www.beyond3d.com/content/articles/8/
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet4f ei_psqrt<Packet4f>(const Packet4f& _x) Packet4f psqrt<Packet4f>(const Packet4f& _x)
{ {
Packet4f half = ei_pmul(_x, ei_pset1(.5f)); Packet4f half = pmul(_x, pset1<Packet4f>(.5f));
/* select only the inverse sqrt of non-zero inputs */
Packet4f non_zero_mask = _mm_cmpgt_ps(_x, ei_pset1(std::numeric_limits<float>::epsilon()));
Packet4f x = _mm_and_ps(non_zero_mask, _mm_rsqrt_ps(_x));
x = ei_pmul(x, ei_psub(ei_pset1(1.5f), ei_pmul(half, ei_pmul(x,x)))); /* select only the inverse sqrt of non-zero inputs */
return ei_pmul(_x,x); Packet4f non_zero_mask = _mm_cmpgt_ps(_x, pset1<Packet4f>(std::numeric_limits<float>::epsilon()));
Packet4f x = _mm_and_ps(non_zero_mask, _mm_rsqrt_ps(_x));
x = pmul(x, psub(pset1<Packet4f>(1.5f), pmul(half, pmul(x,x))));
return pmul(_x,x);
} }
} // end namespace internal
#endif // EIGEN_MATH_FUNCTIONS_SSE_H #endif // EIGEN_MATH_FUNCTIONS_SSE_H

View File

@@ -25,43 +25,58 @@
#ifndef EIGEN_PACKET_MATH_SSE_H #ifndef EIGEN_PACKET_MATH_SSE_H
#define EIGEN_PACKET_MATH_SSE_H #define EIGEN_PACKET_MATH_SSE_H
namespace internal {
#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD #ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8 #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
#endif #endif
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
#endif
typedef __m128 Packet4f; typedef __m128 Packet4f;
typedef __m128i Packet4i; typedef __m128i Packet4i;
typedef __m128d Packet2d; typedef __m128d Packet2d;
template<> struct ei_is_arithmetic<__m128> { enum { ret = true }; }; template<> struct is_arithmetic<__m128> { enum { value = true }; };
template<> struct ei_is_arithmetic<__m128i> { enum { ret = true }; }; template<> struct is_arithmetic<__m128i> { enum { value = true }; };
template<> struct ei_is_arithmetic<__m128d> { enum { ret = true }; }; template<> struct is_arithmetic<__m128d> { enum { value = true }; };
#define ei_vec4f_swizzle1(v,p,q,r,s) \ #define vec4f_swizzle1(v,p,q,r,s) \
(_mm_castsi128_ps(_mm_shuffle_epi32( _mm_castps_si128(v), ((s)<<6|(r)<<4|(q)<<2|(p))))) (_mm_castsi128_ps(_mm_shuffle_epi32( _mm_castps_si128(v), ((s)<<6|(r)<<4|(q)<<2|(p)))))
#define ei_vec4i_swizzle1(v,p,q,r,s) \ #define vec4i_swizzle1(v,p,q,r,s) \
(_mm_shuffle_epi32( v, ((s)<<6|(r)<<4|(q)<<2|(p)))) (_mm_shuffle_epi32( v, ((s)<<6|(r)<<4|(q)<<2|(p))))
#define ei_vec4f_swizzle2(a,b,p,q,r,s) \ #define vec2d_swizzle1(v,p,q) \
(_mm_castsi128_pd(_mm_shuffle_epi32( _mm_castpd_si128(v), ((q*2+1)<<6|(q*2)<<4|(p*2+1)<<2|(p*2)))))
#define vec4f_swizzle2(a,b,p,q,r,s) \
(_mm_shuffle_ps( (a), (b), ((s)<<6|(r)<<4|(q)<<2|(p)))) (_mm_shuffle_ps( (a), (b), ((s)<<6|(r)<<4|(q)<<2|(p))))
#define ei_vec4i_swizzle2(a,b,p,q,r,s) \ #define vec4i_swizzle2(a,b,p,q,r,s) \
(_mm_castps_si128( (_mm_shuffle_ps( _mm_castsi128_ps(a), _mm_castsi128_ps(b), ((s)<<6|(r)<<4|(q)<<2|(p)))))) (_mm_castps_si128( (_mm_shuffle_ps( _mm_castsi128_ps(a), _mm_castsi128_ps(b), ((s)<<6|(r)<<4|(q)<<2|(p))))))
#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \ #define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
const Packet4f ei_p4f_##NAME = ei_pset1<float>(X) const Packet4f p4f_##NAME = pset1<Packet4f>(X)
#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \ #define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
const Packet4f ei_p4f_##NAME = _mm_castsi128_ps(ei_pset1<int>(X)) const Packet4f p4f_##NAME = _mm_castsi128_ps(pset1<Packet4i>(X))
#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \ #define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
const Packet4i ei_p4i_##NAME = ei_pset1<int>(X) const Packet4i p4i_##NAME = pset1<Packet4i>(X)
template<> struct ei_packet_traits<float> : ei_default_packet_traits
template<> struct packet_traits<float> : default_packet_traits
{ {
typedef Packet4f type; enum {size=4}; typedef Packet4f type;
enum { enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size=4,
HasDiv = 1,
HasSin = EIGEN_FAST_MATH, HasSin = EIGEN_FAST_MATH,
HasCos = EIGEN_FAST_MATH, HasCos = EIGEN_FAST_MATH,
HasLog = 1, HasLog = 1,
@@ -69,129 +84,146 @@ template<> struct ei_packet_traits<float> : ei_default_packet_traits
HasSqrt = 1 HasSqrt = 1
}; };
}; };
template<> struct ei_packet_traits<double> : ei_default_packet_traits template<> struct packet_traits<double> : default_packet_traits
{ typedef Packet2d type; enum {size=2}; }; {
template<> struct ei_packet_traits<int> : ei_default_packet_traits typedef Packet2d type;
{ typedef Packet4i type; enum {size=4}; }; enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size=2,
template<> struct ei_unpacket_traits<Packet4f> { typedef float type; enum {size=4}; }; HasDiv = 1
template<> struct ei_unpacket_traits<Packet2d> { typedef double type; enum {size=2}; }; };
template<> struct ei_unpacket_traits<Packet4i> { typedef int type; enum {size=4}; }; };
template<> struct packet_traits<int> : default_packet_traits
{
typedef Packet4i type;
enum {
// FIXME check the Has*
Vectorizable = 1,
AlignedOnScalar = 1,
size=4
};
};
#ifdef __GNUC__ template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}; };
// Sometimes GCC implements _mm_set1_p* using multiple moves, template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2}; };
// that is inefficient :( (e.g., see ei_gemm_pack_rhs) template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; };
template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<float>(const float& from) {
Packet4f res = _mm_set_ss(from);
return _mm_shuffle_ps(res,res,0);
}
template<> EIGEN_STRONG_INLINE Packet2d ei_pset1<double>(const double& from) {
Packet2d res = _mm_set_sd(from);
return _mm_unpacklo_pd(res,res);
}
#else
template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<float>(const float& from) { return _mm_set1_ps(from); }
template<> EIGEN_STRONG_INLINE Packet2d ei_pset1<double>(const double& from) { return _mm_set1_pd(from); }
#endif
template<> EIGEN_STRONG_INLINE Packet4i ei_pset1<int>(const int& from) { return _mm_set1_epi32(from); }
template<> EIGEN_STRONG_INLINE Packet4f ei_plset<float>(const float& a) { return _mm_add_ps(ei_pset1(a), _mm_set_ps(3,2,1,0)); } template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set1_ps(from); }
template<> EIGEN_STRONG_INLINE Packet2d ei_plset<double>(const double& a) { return _mm_add_pd(ei_pset1(a),_mm_set_pd(1,0)); } template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set1_pd(from); }
template<> EIGEN_STRONG_INLINE Packet4i ei_plset<int>(const int& a) { return _mm_add_epi32(ei_pset1(a),_mm_set_epi32(3,2,1,0)); } template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set1_epi32(from); }
template<> EIGEN_STRONG_INLINE Packet4f ei_padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_add_ps(a,b); } template<> EIGEN_STRONG_INLINE Packet4f plset<float>(const float& a) { return _mm_add_ps(pset1<Packet4f>(a), _mm_set_ps(3,2,1,0)); }
template<> EIGEN_STRONG_INLINE Packet2d ei_padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_add_pd(a,b); } template<> EIGEN_STRONG_INLINE Packet2d plset<double>(const double& a) { return _mm_add_pd(pset1<Packet2d>(a),_mm_set_pd(1,0)); }
template<> EIGEN_STRONG_INLINE Packet4i ei_padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_add_epi32(a,b); } template<> EIGEN_STRONG_INLINE Packet4i plset<int>(const int& a) { return _mm_add_epi32(pset1<Packet4i>(a),_mm_set_epi32(3,2,1,0)); }
template<> EIGEN_STRONG_INLINE Packet4f ei_psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_sub_ps(a,b); } template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_add_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d ei_psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_sub_pd(a,b); } template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_add_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_sub_epi32(a,b); } template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_add_epi32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pnegate(const Packet4f& a) template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_sub_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_sub_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_sub_epi32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a)
{ {
const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000)); const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));
return _mm_xor_ps(a,mask); return _mm_xor_ps(a,mask);
} }
template<> EIGEN_STRONG_INLINE Packet2d ei_pnegate(const Packet2d& a) template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a)
{ {
const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x80000000,0x0,0x80000000)); const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x80000000,0x0,0x80000000));
return _mm_xor_pd(a,mask); return _mm_xor_pd(a,mask);
} }
template<> EIGEN_STRONG_INLINE Packet4i ei_pnegate(const Packet4i& a) template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a)
{ {
return ei_psub(_mm_setr_epi32(0,0,0,0), a); return psub(_mm_setr_epi32(0,0,0,0), a);
} }
template<> EIGEN_STRONG_INLINE Packet4f ei_pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_mul_ps(a,b); } template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_mul_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d ei_pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_mul_pd(a,b); } template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_mul_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pmul<Packet4i>(const Packet4i& a, const Packet4i& b) template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b)
{ {
#ifdef EIGEN_VECTORIZE_SSE4_1 #ifdef EIGEN_VECTORIZE_SSE4_1
return _mm_mullo_epi32(a,b); return _mm_mullo_epi32(a,b);
#else #else
// this version is slightly faster than 4 scalar products // this version is slightly faster than 4 scalar products
return ei_vec4i_swizzle1( return vec4i_swizzle1(
ei_vec4i_swizzle2( vec4i_swizzle2(
_mm_mul_epu32(a,b), _mm_mul_epu32(a,b),
_mm_mul_epu32(ei_vec4i_swizzle1(a,1,0,3,2), _mm_mul_epu32(vec4i_swizzle1(a,1,0,3,2),
ei_vec4i_swizzle1(b,1,0,3,2)), vec4i_swizzle1(b,1,0,3,2)),
0,2,0,2), 0,2,0,2),
0,2,1,3); 0,2,1,3);
#endif #endif
} }
template<> EIGEN_STRONG_INLINE Packet4f ei_pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_div_ps(a,b); } template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_div_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d ei_pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_div_pd(a,b); } template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_div_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pdiv<Packet4i>(const Packet4i& /*a*/, const Packet4i& /*b*/) template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, const Packet4i& /*b*/)
{ ei_assert(false && "packet integer division are not supported by SSE"); { eigen_assert(false && "packet integer division are not supported by SSE");
return ei_pset1<int>(0); return pset1<Packet4i>(0);
} }
// for some weird raisons, it has to be overloaded for packet of integers // for some weird raisons, it has to be overloaded for packet of integers
template<> EIGEN_STRONG_INLINE Packet4i ei_pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return ei_padd(ei_pmul(a,b), c); } template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd(pmul(a,b), c); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_min_ps(a,b); } template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_min_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d ei_pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_min_pd(a,b); } template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_min_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pmin<Packet4i>(const Packet4i& a, const Packet4i& b) template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b)
{ {
// after some bench, this version *is* faster than a scalar implementation // after some bench, this version *is* faster than a scalar implementation
Packet4i mask = _mm_cmplt_epi32(a,b); Packet4i mask = _mm_cmplt_epi32(a,b);
return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b)); return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
} }
template<> EIGEN_STRONG_INLINE Packet4f ei_pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_max_ps(a,b); } template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_max_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d ei_pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_max_pd(a,b); } template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_max_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pmax<Packet4i>(const Packet4i& a, const Packet4i& b) template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b)
{ {
// after some bench, this version *is* faster than a scalar implementation // after some bench, this version *is* faster than a scalar implementation
Packet4i mask = _mm_cmpgt_epi32(a,b); Packet4i mask = _mm_cmpgt_epi32(a,b);
return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b)); return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
} }
template<> EIGEN_STRONG_INLINE Packet4f ei_pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_and_ps(a,b); } template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_and_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d ei_pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_and_pd(a,b); } template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_and_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_and_si128(a,b); } template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_and_si128(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f ei_por<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_or_ps(a,b); } template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_or_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d ei_por<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_or_pd(a,b); } template<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_or_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_por<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_or_si128(a,b); } template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_or_si128(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pxor<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_xor_ps(a,b); } template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_xor_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d ei_pxor<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_xor_pd(a,b); } template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_xor_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_xor_si128(a,b); } template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_xor_si128(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_andnot_ps(a,b); } template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_andnot_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d ei_pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_andnot_pd(a,b); } template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_andnot_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_andnot_si128(a,b); } template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_andnot_si128(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pload<float>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_ps(from); } template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_ps(from); }
template<> EIGEN_STRONG_INLINE Packet2d ei_pload<double>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_pd(from); } template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_pd(from); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pload<int>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(reinterpret_cast<const Packet4i*>(from)); } template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(reinterpret_cast<const Packet4i*>(from)); }
#if defined(_MSC_VER) #if defined(_MSC_VER)
template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_ps(from); } template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) {
template<> EIGEN_STRONG_INLINE Packet2d ei_ploadu<double>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_pd(from); } EIGEN_DEBUG_UNALIGNED_LOAD
template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu<int>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_si128(reinterpret_cast<const Packet4i*>(from)); } #if (_MSC_VER==1600)
// NOTE Some version of MSVC10 generates bad code when using _mm_loadu_ps
// (i.e., it does not generate an unaligned load!!
// TODO On most architectures this version should also be faster than a single _mm_loadu_ps
// so we could also enable it for MSVC08 but first we have to make this later does not generate crap when doing so...
__m128 res = _mm_loadl_pi(_mm_set1_ps(0.0f), (const __m64*)(from));
res = _mm_loadh_pi(res, (const __m64*)(from+2));
return res;
#else
return _mm_loadu_ps(from);
#endif
}
template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_pd(from); }
template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_si128(reinterpret_cast<const Packet4i*>(from)); }
#else #else
// Fast unaligned loads. Note that here we cannot directly use intrinsics: this would // Fast unaligned loads. Note that here we cannot directly use intrinsics: this would
// require pointer casting to incompatible pointer types and leads to invalid code // require pointer casting to incompatible pointer types and leads to invalid code
@@ -199,84 +231,133 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_pload<int>(const int* from) { EIGEN_D
// a correct instruction dependency. // a correct instruction dependency.
// TODO: do the same for MSVC (ICC is compatible) // TODO: do the same for MSVC (ICC is compatible)
// NOTE: with the code below, MSVC's compiler crashes! // NOTE: with the code below, MSVC's compiler crashes!
template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu(const float* from)
#if defined(__GNUC__) && defined(__i386__)
// bug 195: gcc/i386 emits weird x87 fldl/fstpl instructions for _mm_load_sd
#define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 1
#elif defined(__clang__)
// bug 201: Segfaults in __mm_loadh_pd with clang 2.8
#define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 1
#else
#define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 0
#endif
template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
{ {
EIGEN_DEBUG_UNALIGNED_LOAD EIGEN_DEBUG_UNALIGNED_LOAD
#if EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS
return _mm_loadu_ps(from);
#else
__m128d res; __m128d res;
res = _mm_load_sd((const double*)(from)) ; res = _mm_load_sd((const double*)(from)) ;
res = _mm_loadh_pd(res, (const double*)(from+2)) ; res = _mm_loadh_pd(res, (const double*)(from+2)) ;
return _mm_castpd_ps(res); return _mm_castpd_ps(res);
#endif
} }
template<> EIGEN_STRONG_INLINE Packet2d ei_ploadu(const double* from) template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from)
{ {
EIGEN_DEBUG_UNALIGNED_LOAD EIGEN_DEBUG_UNALIGNED_LOAD
#if EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS
return _mm_loadu_pd(from);
#else
__m128d res; __m128d res;
res = _mm_load_sd(from) ; res = _mm_load_sd(from) ;
res = _mm_loadh_pd(res,from+1); res = _mm_loadh_pd(res,from+1);
return res; return res;
#endif
} }
template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu(const int* from) template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
{ {
EIGEN_DEBUG_UNALIGNED_LOAD EIGEN_DEBUG_UNALIGNED_LOAD
#if EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS
return _mm_loadu_si128(reinterpret_cast<const Packet4i*>(from));
#else
__m128d res; __m128d res;
res = _mm_load_sd((const double*)(from)) ; res = _mm_load_sd((const double*)(from)) ;
res = _mm_loadh_pd(res, (const double*)(from+2)) ; res = _mm_loadh_pd(res, (const double*)(from+2)) ;
return _mm_castpd_si128(res); return _mm_castpd_si128(res);
#endif
} }
#endif #endif
template<> EIGEN_STRONG_INLINE void ei_pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_ps(to, from); } template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
template<> EIGEN_STRONG_INLINE void ei_pstore<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_pd(to, from); } {
template<> EIGEN_STRONG_INLINE void ei_pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<Packet4i*>(to), from); } return vec4f_swizzle1(_mm_castpd_ps(_mm_load_sd((const double*)from)), 0, 0, 1, 1);
}
template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
{ return pset1<Packet2d>(from[0]); }
template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
{
Packet4i tmp;
tmp = _mm_loadl_epi64(reinterpret_cast<const Packet4i*>(from));
return vec4i_swizzle1(tmp, 0, 0, 1, 1);
}
template<> EIGEN_STRONG_INLINE void ei_pstoreu<double>(double* to, const Packet2d& from) { template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_ps(to, from); }
template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_pd(to, from); }
template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<Packet4i*>(to), from); }
template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) {
EIGEN_DEBUG_UNALIGNED_STORE EIGEN_DEBUG_UNALIGNED_STORE
_mm_storel_pd((to), from); _mm_storel_pd((to), from);
_mm_storeh_pd((to+1), from); _mm_storeh_pd((to+1), from);
} }
template<> EIGEN_STRONG_INLINE void ei_pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE ei_pstoreu((double*)to, _mm_castps_pd(from)); } template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, _mm_castps_pd(from)); }
template<> EIGEN_STRONG_INLINE void ei_pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE ei_pstoreu((double*)to, _mm_castsi128_pd(from)); } template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, _mm_castsi128_pd(from)); }
template<> EIGEN_STRONG_INLINE void ei_prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } // some compilers might be tempted to perform multiple moves instead of using a vector path.
template<> EIGEN_STRONG_INLINE void ei_prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } template<> EIGEN_STRONG_INLINE void pstore1<Packet4f>(float* to, const float& a)
template<> EIGEN_STRONG_INLINE void ei_prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } {
Packet4f pa = _mm_set_ss(a);
pstore(to, vec4f_swizzle1(pa,0,0,0,0));
}
// some compilers might be tempted to perform multiple moves instead of using a vector path.
template<> EIGEN_STRONG_INLINE void pstore1<Packet2d>(double* to, const double& a)
{
Packet2d pa = _mm_set_sd(a);
pstore(to, vec2d_swizzle1(pa,0,0));
}
#if defined(_MSC_VER) && (_MSC_VER <= 1500) && defined(_WIN64) template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
// The temporary variable fixes an internal compilation error. template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
#if defined(_MSC_VER) && defined(_WIN64) && !defined(__INTEL_COMPILER)
// The temporary variable fixes an internal compilation error in vs <= 2008 and a wrong-result bug in vs 2010
// Direct of the struct members fixed bug #62. // Direct of the struct members fixed bug #62.
template<> EIGEN_STRONG_INLINE float ei_pfirst<Packet4f>(const Packet4f& a) { return a.m128_f32[0]; } template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { return a.m128_f32[0]; }
template<> EIGEN_STRONG_INLINE double ei_pfirst<Packet2d>(const Packet2d& a) { return a.m128d_f64[0]; } template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return a.m128d_f64[0]; }
template<> EIGEN_STRONG_INLINE int ei_pfirst<Packet4i>(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; } template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; }
#elif defined(_MSC_VER) && (_MSC_VER <= 1500) #elif defined(_MSC_VER) && !defined(__INTEL_COMPILER)
// The temporary variable fixes an internal compilation error. // The temporary variable fixes an internal compilation error in vs <= 2008 and a wrong-result bug in vs 2010
template<> EIGEN_STRONG_INLINE float ei_pfirst<Packet4f>(const Packet4f& a) { float x = _mm_cvtss_f32(a); return x; } template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float x = _mm_cvtss_f32(a); return x; }
template<> EIGEN_STRONG_INLINE double ei_pfirst<Packet2d>(const Packet2d& a) { double x = _mm_cvtsd_f64(a); return x; } template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { double x = _mm_cvtsd_f64(a); return x; }
template<> EIGEN_STRONG_INLINE int ei_pfirst<Packet4i>(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; } template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; }
#else #else
template<> EIGEN_STRONG_INLINE float ei_pfirst<Packet4f>(const Packet4f& a) { return _mm_cvtss_f32(a); } template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { return _mm_cvtss_f32(a); }
template<> EIGEN_STRONG_INLINE double ei_pfirst<Packet2d>(const Packet2d& a) { return _mm_cvtsd_f64(a); } template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return _mm_cvtsd_f64(a); }
template<> EIGEN_STRONG_INLINE int ei_pfirst<Packet4i>(const Packet4i& a) { return _mm_cvtsi128_si32(a); } template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { return _mm_cvtsi128_si32(a); }
#endif #endif
template<> EIGEN_STRONG_INLINE Packet4f ei_preverse(const Packet4f& a) template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a)
{ return _mm_shuffle_ps(a,a,0x1B); } { return _mm_shuffle_ps(a,a,0x1B); }
template<> EIGEN_STRONG_INLINE Packet2d ei_preverse(const Packet2d& a) template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a)
{ return _mm_shuffle_pd(a,a,0x1); } { return _mm_shuffle_pd(a,a,0x1); }
template<> EIGEN_STRONG_INLINE Packet4i ei_preverse(const Packet4i& a) template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a)
{ return _mm_shuffle_epi32(a,0x1B); } { return _mm_shuffle_epi32(a,0x1B); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pabs(const Packet4f& a) template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a)
{ {
const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF)); const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF));
return _mm_and_ps(a,mask); return _mm_and_ps(a,mask);
} }
template<> EIGEN_STRONG_INLINE Packet2d ei_pabs(const Packet2d& a) template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a)
{ {
const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF)); const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF));
return _mm_and_pd(a,mask); return _mm_and_pd(a,mask);
} }
template<> EIGEN_STRONG_INLINE Packet4i ei_pabs(const Packet4i& a) template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a)
{ {
#ifdef EIGEN_VECTORIZE_SSSE3 #ifdef EIGEN_VECTORIZE_SSSE3
return _mm_abs_epi32(a); return _mm_abs_epi32(a);
@@ -286,7 +367,7 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_pabs(const Packet4i& a)
#endif #endif
} }
EIGEN_STRONG_INLINE void ei_punpackp(Packet4f* vecs) EIGEN_STRONG_INLINE void punpackp(Packet4f* vecs)
{ {
vecs[1] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x55)); vecs[1] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x55));
vecs[2] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0xAA)); vecs[2] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0xAA));
@@ -296,47 +377,47 @@ EIGEN_STRONG_INLINE void ei_punpackp(Packet4f* vecs)
#ifdef EIGEN_VECTORIZE_SSE3 #ifdef EIGEN_VECTORIZE_SSE3
// TODO implement SSE2 versions as well as integer versions // TODO implement SSE2 versions as well as integer versions
template<> EIGEN_STRONG_INLINE Packet4f ei_preduxp<Packet4f>(const Packet4f* vecs) template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
{ {
return _mm_hadd_ps(_mm_hadd_ps(vecs[0], vecs[1]),_mm_hadd_ps(vecs[2], vecs[3])); return _mm_hadd_ps(_mm_hadd_ps(vecs[0], vecs[1]),_mm_hadd_ps(vecs[2], vecs[3]));
} }
template<> EIGEN_STRONG_INLINE Packet2d ei_preduxp<Packet2d>(const Packet2d* vecs) template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
{ {
return _mm_hadd_pd(vecs[0], vecs[1]); return _mm_hadd_pd(vecs[0], vecs[1]);
} }
// SSSE3 version: // SSSE3 version:
// EIGEN_STRONG_INLINE Packet4i ei_preduxp(const Packet4i* vecs) // EIGEN_STRONG_INLINE Packet4i preduxp(const Packet4i* vecs)
// { // {
// return _mm_hadd_epi32(_mm_hadd_epi32(vecs[0], vecs[1]),_mm_hadd_epi32(vecs[2], vecs[3])); // return _mm_hadd_epi32(_mm_hadd_epi32(vecs[0], vecs[1]),_mm_hadd_epi32(vecs[2], vecs[3]));
// } // }
template<> EIGEN_STRONG_INLINE float ei_predux<Packet4f>(const Packet4f& a) template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
{ {
Packet4f tmp0 = _mm_hadd_ps(a,a); Packet4f tmp0 = _mm_hadd_ps(a,a);
return ei_pfirst(_mm_hadd_ps(tmp0, tmp0)); return pfirst(_mm_hadd_ps(tmp0, tmp0));
} }
template<> EIGEN_STRONG_INLINE double ei_predux<Packet2d>(const Packet2d& a) { return ei_pfirst(_mm_hadd_pd(a, a)); } template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) { return pfirst(_mm_hadd_pd(a, a)); }
// SSSE3 version: // SSSE3 version:
// EIGEN_STRONG_INLINE float ei_predux(const Packet4i& a) // EIGEN_STRONG_INLINE float predux(const Packet4i& a)
// { // {
// Packet4i tmp0 = _mm_hadd_epi32(a,a); // Packet4i tmp0 = _mm_hadd_epi32(a,a);
// return ei_pfirst(_mm_hadd_epi32(tmp0, tmp0)); // return pfirst(_mm_hadd_epi32(tmp0, tmp0));
// } // }
#else #else
// SSE2 versions // SSE2 versions
template<> EIGEN_STRONG_INLINE float ei_predux<Packet4f>(const Packet4f& a) template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
{ {
Packet4f tmp = _mm_add_ps(a, _mm_movehl_ps(a,a)); Packet4f tmp = _mm_add_ps(a, _mm_movehl_ps(a,a));
return ei_pfirst(_mm_add_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1))); return pfirst(_mm_add_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
} }
template<> EIGEN_STRONG_INLINE double ei_predux<Packet2d>(const Packet2d& a) template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
{ {
return ei_pfirst(_mm_add_sd(a, _mm_unpackhi_pd(a,a))); return pfirst(_mm_add_sd(a, _mm_unpackhi_pd(a,a)));
} }
template<> EIGEN_STRONG_INLINE Packet4f ei_preduxp<Packet4f>(const Packet4f* vecs) template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
{ {
Packet4f tmp0, tmp1, tmp2; Packet4f tmp0, tmp1, tmp2;
tmp0 = _mm_unpacklo_ps(vecs[0], vecs[1]); tmp0 = _mm_unpacklo_ps(vecs[0], vecs[1]);
@@ -350,19 +431,19 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_preduxp<Packet4f>(const Packet4f* vec
return _mm_add_ps(tmp0, tmp2); return _mm_add_ps(tmp0, tmp2);
} }
template<> EIGEN_STRONG_INLINE Packet2d ei_preduxp<Packet2d>(const Packet2d* vecs) template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
{ {
return _mm_add_pd(_mm_unpacklo_pd(vecs[0], vecs[1]), _mm_unpackhi_pd(vecs[0], vecs[1])); return _mm_add_pd(_mm_unpacklo_pd(vecs[0], vecs[1]), _mm_unpackhi_pd(vecs[0], vecs[1]));
} }
#endif // SSE3 #endif // SSE3
template<> EIGEN_STRONG_INLINE int ei_predux<Packet4i>(const Packet4i& a) template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
{ {
Packet4i tmp = _mm_add_epi32(a, _mm_unpackhi_epi64(a,a)); Packet4i tmp = _mm_add_epi32(a, _mm_unpackhi_epi64(a,a));
return ei_pfirst(tmp) + ei_pfirst(_mm_shuffle_epi32(tmp, 1)); return pfirst(tmp) + pfirst(_mm_shuffle_epi32(tmp, 1));
} }
template<> EIGEN_STRONG_INLINE Packet4i ei_preduxp<Packet4i>(const Packet4i* vecs) template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
{ {
Packet4i tmp0, tmp1, tmp2; Packet4i tmp0, tmp1, tmp2;
tmp0 = _mm_unpacklo_epi32(vecs[0], vecs[1]); tmp0 = _mm_unpacklo_epi32(vecs[0], vecs[1]);
@@ -379,69 +460,69 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_preduxp<Packet4i>(const Packet4i* vec
// Other reduction functions: // Other reduction functions:
// mul // mul
template<> EIGEN_STRONG_INLINE float ei_predux_mul<Packet4f>(const Packet4f& a) template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
{ {
Packet4f tmp = _mm_mul_ps(a, _mm_movehl_ps(a,a)); Packet4f tmp = _mm_mul_ps(a, _mm_movehl_ps(a,a));
return ei_pfirst(_mm_mul_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1))); return pfirst(_mm_mul_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
} }
template<> EIGEN_STRONG_INLINE double ei_predux_mul<Packet2d>(const Packet2d& a) template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a)
{ {
return ei_pfirst(_mm_mul_sd(a, _mm_unpackhi_pd(a,a))); return pfirst(_mm_mul_sd(a, _mm_unpackhi_pd(a,a)));
} }
template<> EIGEN_STRONG_INLINE int ei_predux_mul<Packet4i>(const Packet4i& a) template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
{ {
// after some experiments, it is seems this is the fastest way to implement it // after some experiments, it is seems this is the fastest way to implement it
// for GCC (eg., reusing ei_pmul is very slow !) // for GCC (eg., reusing pmul is very slow !)
// TODO try to call _mm_mul_epu32 directly // TODO try to call _mm_mul_epu32 directly
EIGEN_ALIGN16 int aux[4]; EIGEN_ALIGN16 int aux[4];
ei_pstore(aux, a); pstore(aux, a);
return (aux[0] * aux[1]) * (aux[2] * aux[3]);; return (aux[0] * aux[1]) * (aux[2] * aux[3]);;
} }
// min // min
template<> EIGEN_STRONG_INLINE float ei_predux_min<Packet4f>(const Packet4f& a) template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
{ {
Packet4f tmp = _mm_min_ps(a, _mm_movehl_ps(a,a)); Packet4f tmp = _mm_min_ps(a, _mm_movehl_ps(a,a));
return ei_pfirst(_mm_min_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1))); return pfirst(_mm_min_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
} }
template<> EIGEN_STRONG_INLINE double ei_predux_min<Packet2d>(const Packet2d& a) template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)
{ {
return ei_pfirst(_mm_min_sd(a, _mm_unpackhi_pd(a,a))); return pfirst(_mm_min_sd(a, _mm_unpackhi_pd(a,a)));
} }
template<> EIGEN_STRONG_INLINE int ei_predux_min<Packet4i>(const Packet4i& a) template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
{ {
// after some experiments, it is seems this is the fastest way to implement it // after some experiments, it is seems this is the fastest way to implement it
// for GCC (eg., it does not like using std::min after the ei_pstore !!) // for GCC (eg., it does not like using std::min after the pstore !!)
EIGEN_ALIGN16 int aux[4]; EIGEN_ALIGN16 int aux[4];
ei_pstore(aux, a); pstore(aux, a);
register int aux0 = aux[0]<aux[1] ? aux[0] : aux[1]; register int aux0 = aux[0]<aux[1] ? aux[0] : aux[1];
register int aux2 = aux[2]<aux[3] ? aux[2] : aux[3]; register int aux2 = aux[2]<aux[3] ? aux[2] : aux[3];
return aux0<aux2 ? aux0 : aux2; return aux0<aux2 ? aux0 : aux2;
} }
// max // max
template<> EIGEN_STRONG_INLINE float ei_predux_max<Packet4f>(const Packet4f& a) template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
{ {
Packet4f tmp = _mm_max_ps(a, _mm_movehl_ps(a,a)); Packet4f tmp = _mm_max_ps(a, _mm_movehl_ps(a,a));
return ei_pfirst(_mm_max_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1))); return pfirst(_mm_max_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
} }
template<> EIGEN_STRONG_INLINE double ei_predux_max<Packet2d>(const Packet2d& a) template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
{ {
return ei_pfirst(_mm_max_sd(a, _mm_unpackhi_pd(a,a))); return pfirst(_mm_max_sd(a, _mm_unpackhi_pd(a,a)));
} }
template<> EIGEN_STRONG_INLINE int ei_predux_max<Packet4i>(const Packet4i& a) template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
{ {
// after some experiments, it is seems this is the fastest way to implement it // after some experiments, it is seems this is the fastest way to implement it
// for GCC (eg., it does not like using std::min after the ei_pstore !!) // for GCC (eg., it does not like using std::min after the pstore !!)
EIGEN_ALIGN16 int aux[4]; EIGEN_ALIGN16 int aux[4];
ei_pstore(aux, a); pstore(aux, a);
register int aux0 = aux[0]>aux[1] ? aux[0] : aux[1]; register int aux0 = aux[0]>aux[1] ? aux[0] : aux[1];
register int aux2 = aux[2]>aux[3] ? aux[2] : aux[3]; register int aux2 = aux[2]>aux[3] ? aux[2] : aux[3];
return aux0>aux2 ? aux0 : aux2; return aux0>aux2 ? aux0 : aux2;
} }
#if (defined __GNUC__) #if (defined __GNUC__)
// template <> EIGEN_STRONG_INLINE Packet4f ei_pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) // template <> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c)
// { // {
// Packet4f res = b; // Packet4f res = b;
// asm("mulps %[a], %[b] \n\taddps %[c], %[b]" : [b] "+x" (res) : [a] "x" (a), [c] "x" (c)); // asm("mulps %[a], %[b] \n\taddps %[c], %[b]" : [b] "+x" (res) : [a] "x" (a), [c] "x" (c));
@@ -458,7 +539,7 @@ template<> EIGEN_STRONG_INLINE int ei_predux_max<Packet4i>(const Packet4i& a)
#ifdef EIGEN_VECTORIZE_SSSE3 #ifdef EIGEN_VECTORIZE_SSSE3
// SSSE3 versions // SSSE3 versions
template<int Offset> template<int Offset>
struct ei_palign_impl<Offset,Packet4f> struct palign_impl<Offset,Packet4f>
{ {
EIGEN_STRONG_INLINE static void run(Packet4f& first, const Packet4f& second) EIGEN_STRONG_INLINE static void run(Packet4f& first, const Packet4f& second)
{ {
@@ -468,7 +549,7 @@ struct ei_palign_impl<Offset,Packet4f>
}; };
template<int Offset> template<int Offset>
struct ei_palign_impl<Offset,Packet4i> struct palign_impl<Offset,Packet4i>
{ {
EIGEN_STRONG_INLINE static void run(Packet4i& first, const Packet4i& second) EIGEN_STRONG_INLINE static void run(Packet4i& first, const Packet4i& second)
{ {
@@ -478,7 +559,7 @@ struct ei_palign_impl<Offset,Packet4i>
}; };
template<int Offset> template<int Offset>
struct ei_palign_impl<Offset,Packet2d> struct palign_impl<Offset,Packet2d>
{ {
EIGEN_STRONG_INLINE static void run(Packet2d& first, const Packet2d& second) EIGEN_STRONG_INLINE static void run(Packet2d& first, const Packet2d& second)
{ {
@@ -489,7 +570,7 @@ struct ei_palign_impl<Offset,Packet2d>
#else #else
// SSE2 versions // SSE2 versions
template<int Offset> template<int Offset>
struct ei_palign_impl<Offset,Packet4f> struct palign_impl<Offset,Packet4f>
{ {
EIGEN_STRONG_INLINE static void run(Packet4f& first, const Packet4f& second) EIGEN_STRONG_INLINE static void run(Packet4f& first, const Packet4f& second)
{ {
@@ -512,7 +593,7 @@ struct ei_palign_impl<Offset,Packet4f>
}; };
template<int Offset> template<int Offset>
struct ei_palign_impl<Offset,Packet4i> struct palign_impl<Offset,Packet4i>
{ {
EIGEN_STRONG_INLINE static void run(Packet4i& first, const Packet4i& second) EIGEN_STRONG_INLINE static void run(Packet4i& first, const Packet4i& second)
{ {
@@ -535,7 +616,7 @@ struct ei_palign_impl<Offset,Packet4i>
}; };
template<int Offset> template<int Offset>
struct ei_palign_impl<Offset,Packet2d> struct palign_impl<Offset,Packet2d>
{ {
EIGEN_STRONG_INLINE static void run(Packet2d& first, const Packet2d& second) EIGEN_STRONG_INLINE static void run(Packet2d& first, const Packet2d& second)
{ {
@@ -548,4 +629,6 @@ struct ei_palign_impl<Offset,Packet2d>
}; };
#endif #endif
} // end namespace internal
#endif // EIGEN_PACKET_MATH_SSE_H #endif // EIGEN_PACKET_MATH_SSE_H

View File

@@ -26,6 +26,8 @@
#ifndef EIGEN_COEFFBASED_PRODUCT_H #ifndef EIGEN_COEFFBASED_PRODUCT_H
#define EIGEN_COEFFBASED_PRODUCT_H #define EIGEN_COEFFBASED_PRODUCT_H
namespace internal {
/********************************************************************************* /*********************************************************************************
* Coefficient based product implementation. * Coefficient based product implementation.
* It is designed for the following use cases: * It is designed for the following use cases:
@@ -40,22 +42,22 @@
*/ */
template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar> template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
struct ei_product_coeff_impl; struct product_coeff_impl;
template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename PacketScalar, int LoadMode> template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct ei_product_packet_impl; struct product_packet_impl;
template<typename LhsNested, typename RhsNested, int NestingFlags> template<typename LhsNested, typename RhsNested, int NestingFlags>
struct ei_traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> > struct traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
{ {
typedef MatrixXpr XprKind; typedef MatrixXpr XprKind;
typedef typename ei_cleantype<LhsNested>::type _LhsNested; typedef typename remove_all<LhsNested>::type _LhsNested;
typedef typename ei_cleantype<RhsNested>::type _RhsNested; typedef typename remove_all<RhsNested>::type _RhsNested;
typedef typename ei_scalar_product_traits<typename _LhsNested::Scalar, typename _RhsNested::Scalar>::ReturnType Scalar; typedef typename scalar_product_traits<typename _LhsNested::Scalar, typename _RhsNested::Scalar>::ReturnType Scalar;
typedef typename ei_promote_storage_type<typename ei_traits<_LhsNested>::StorageKind, typedef typename promote_storage_type<typename traits<_LhsNested>::StorageKind,
typename ei_traits<_RhsNested>::StorageKind>::ret StorageKind; typename traits<_RhsNested>::StorageKind>::ret StorageKind;
typedef typename ei_promote_index_type<typename ei_traits<_LhsNested>::Index, typedef typename promote_index_type<typename traits<_LhsNested>::Index,
typename ei_traits<_RhsNested>::Index>::type Index; typename traits<_RhsNested>::Index>::type Index;
enum { enum {
LhsCoeffReadCost = _LhsNested::CoeffReadCost, LhsCoeffReadCost = _LhsNested::CoeffReadCost,
@@ -73,16 +75,18 @@ struct ei_traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
LhsRowMajor = LhsFlags & RowMajorBit, LhsRowMajor = LhsFlags & RowMajorBit,
RhsRowMajor = RhsFlags & RowMajorBit, RhsRowMajor = RhsFlags & RowMajorBit,
SameType = is_same<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::value,
CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit) CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit)
&& (ColsAtCompileTime == Dynamic && (ColsAtCompileTime == Dynamic
|| ( (ColsAtCompileTime % ei_packet_traits<Scalar>::size) == 0 || ( (ColsAtCompileTime % packet_traits<Scalar>::size) == 0
&& (RhsFlags&AlignedBit) && (RhsFlags&AlignedBit)
) )
), ),
CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit)
&& (RowsAtCompileTime == Dynamic && (RowsAtCompileTime == Dynamic
|| ( (RowsAtCompileTime % ei_packet_traits<Scalar>::size) == 0 || ( (RowsAtCompileTime % packet_traits<Scalar>::size) == 0
&& (LhsFlags&AlignedBit) && (LhsFlags&AlignedBit)
) )
), ),
@@ -94,7 +98,9 @@ struct ei_traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit) Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
| (EvalToRowMajor ? RowMajorBit : 0) | (EvalToRowMajor ? RowMajorBit : 0)
| NestingFlags | NestingFlags
| (CanVectorizeLhs || CanVectorizeRhs ? PacketAccessBit : 0), | (LhsFlags & RhsFlags & AlignedBit)
// TODO enable vectorization for mixed types
| (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0),
CoeffReadCost = InnerSize == Dynamic ? Dynamic CoeffReadCost = InnerSize == Dynamic ? Dynamic
: InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost) : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
@@ -105,17 +111,20 @@ struct ei_traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
* loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
* the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI. * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI.
*/ */
CanVectorizeInner = LhsRowMajor CanVectorizeInner = SameType
&& LhsRowMajor
&& (!RhsRowMajor) && (!RhsRowMajor)
&& (LhsFlags & RhsFlags & ActualPacketAccessBit) && (LhsFlags & RhsFlags & ActualPacketAccessBit)
&& (LhsFlags & RhsFlags & AlignedBit) && (LhsFlags & RhsFlags & AlignedBit)
&& (InnerSize % ei_packet_traits<Scalar>::size == 0) && (InnerSize % packet_traits<Scalar>::size == 0)
}; };
}; };
} // end namespace internal
template<typename LhsNested, typename RhsNested, int NestingFlags> template<typename LhsNested, typename RhsNested, int NestingFlags>
class CoeffBasedProduct class CoeffBasedProduct
: ei_no_assignment_operator, : internal::no_assignment_operator,
public MatrixBase<CoeffBasedProduct<LhsNested, RhsNested, NestingFlags> > public MatrixBase<CoeffBasedProduct<LhsNested, RhsNested, NestingFlags> >
{ {
public: public:
@@ -126,19 +135,19 @@ class CoeffBasedProduct
private: private:
typedef typename ei_traits<CoeffBasedProduct>::_LhsNested _LhsNested; typedef typename internal::traits<CoeffBasedProduct>::_LhsNested _LhsNested;
typedef typename ei_traits<CoeffBasedProduct>::_RhsNested _RhsNested; typedef typename internal::traits<CoeffBasedProduct>::_RhsNested _RhsNested;
enum { enum {
PacketSize = ei_packet_traits<Scalar>::size, PacketSize = internal::packet_traits<Scalar>::size,
InnerSize = ei_traits<CoeffBasedProduct>::InnerSize, InnerSize = internal::traits<CoeffBasedProduct>::InnerSize,
Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT, Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
CanVectorizeInner = ei_traits<CoeffBasedProduct>::CanVectorizeInner CanVectorizeInner = internal::traits<CoeffBasedProduct>::CanVectorizeInner
}; };
typedef ei_product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal, typedef internal::product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal,
Unroll ? InnerSize-1 : Dynamic, Unroll ? InnerSize-1 : Dynamic,
_LhsNested, _RhsNested, Scalar> ScalarCoeffImpl; _LhsNested, _RhsNested, Scalar> ScalarCoeffImpl;
typedef CoeffBasedProduct<LhsNested,RhsNested,NestByRefBit> LazyCoeffBasedProductType; typedef CoeffBasedProduct<LhsNested,RhsNested,NestByRefBit> LazyCoeffBasedProductType;
@@ -154,9 +163,9 @@ class CoeffBasedProduct
{ {
// we don't allow taking products of matrices of different real types, as that wouldn't be vectorizable. // we don't allow taking products of matrices of different real types, as that wouldn't be vectorizable.
// We still allow to mix T and complex<T>. // We still allow to mix T and complex<T>.
EIGEN_STATIC_ASSERT((ei_is_same_type<typename Lhs::RealScalar, typename Rhs::RealScalar>::ret), EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
ei_assert(lhs.cols() == rhs.rows() eigen_assert(lhs.cols() == rhs.rows()
&& "invalid matrix product" && "invalid matrix product"
&& "if you wanted a coeff-wise or a dot product use the respective explicit functions"); && "if you wanted a coeff-wise or a dot product use the respective explicit functions");
} }
@@ -187,15 +196,15 @@ class CoeffBasedProduct
EIGEN_STRONG_INLINE const PacketScalar packet(Index row, Index col) const EIGEN_STRONG_INLINE const PacketScalar packet(Index row, Index col) const
{ {
PacketScalar res; PacketScalar res;
ei_product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor, internal::product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor,
Unroll ? InnerSize-1 : Dynamic, Unroll ? InnerSize-1 : Dynamic,
_LhsNested, _RhsNested, PacketScalar, LoadMode> _LhsNested, _RhsNested, PacketScalar, LoadMode>
::run(row, col, m_lhs, m_rhs, res); ::run(row, col, m_lhs, m_rhs, res);
return res; return res;
} }
// Implicit conversion to the nested type (trigger the evaluation of the product) // Implicit conversion to the nested type (trigger the evaluation of the product)
operator const PlainObject& () const EIGEN_STRONG_INLINE operator const PlainObject& () const
{ {
m_result.lazyAssign(*this); m_result.lazyAssign(*this);
return m_result; return m_result;
@@ -204,14 +213,14 @@ class CoeffBasedProduct
const _LhsNested& lhs() const { return m_lhs; } const _LhsNested& lhs() const { return m_lhs; }
const _RhsNested& rhs() const { return m_rhs; } const _RhsNested& rhs() const { return m_rhs; }
const Diagonal<LazyCoeffBasedProductType,0> diagonal() const const Diagonal<const LazyCoeffBasedProductType,0> diagonal() const
{ return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); } { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
template<int DiagonalIndex> template<int DiagonalIndex>
const Diagonal<LazyCoeffBasedProductType,DiagonalIndex> diagonal() const const Diagonal<const LazyCoeffBasedProductType,DiagonalIndex> diagonal() const
{ return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); } { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
const Diagonal<LazyCoeffBasedProductType,Dynamic> diagonal(Index index) const const Diagonal<const LazyCoeffBasedProductType,Dynamic> diagonal(Index index) const
{ return reinterpret_cast<const LazyCoeffBasedProductType&>(*this).diagonal(index); } { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this).diagonal(index); }
protected: protected:
@@ -221,10 +230,12 @@ class CoeffBasedProduct
mutable PlainObject m_result; mutable PlainObject m_result;
}; };
namespace internal {
// here we need to overload the nested rule for products // here we need to overload the nested rule for products
// such that the nested type is a const reference to a plain matrix // such that the nested type is a const reference to a plain matrix
template<typename Lhs, typename Rhs, int N, typename PlainObject> template<typename Lhs, typename Rhs, int N, typename PlainObject>
struct ei_nested<CoeffBasedProduct<Lhs,Rhs,EvalBeforeNestingBit|EvalBeforeAssigningBit>, N, PlainObject> struct nested<CoeffBasedProduct<Lhs,Rhs,EvalBeforeNestingBit|EvalBeforeAssigningBit>, N, PlainObject>
{ {
typedef PlainObject const& type; typedef PlainObject const& type;
}; };
@@ -238,18 +249,18 @@ struct ei_nested<CoeffBasedProduct<Lhs,Rhs,EvalBeforeNestingBit|EvalBeforeAssign
**************************************/ **************************************/
template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar> template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
struct ei_product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar> struct product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
{ {
typedef typename Lhs::Index Index; typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
{ {
ei_product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res); product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res);
res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col); res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col);
} }
}; };
template<typename Lhs, typename Rhs, typename RetScalar> template<typename Lhs, typename Rhs, typename RetScalar>
struct ei_product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar> struct product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
{ {
typedef typename Lhs::Index Index; typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
@@ -259,12 +270,12 @@ struct ei_product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
}; };
template<typename Lhs, typename Rhs, typename RetScalar> template<typename Lhs, typename Rhs, typename RetScalar>
struct ei_product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar> struct product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
{ {
typedef typename Lhs::Index Index; typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res) EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res)
{ {
ei_assert(lhs.cols()>0 && "you are using a non initialized matrix"); eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
res = lhs.coeff(row, 0) * rhs.coeff(0, col); res = lhs.coeff(row, 0) * rhs.coeff(0, col);
for(Index i = 1; i < lhs.cols(); ++i) for(Index i = 1; i < lhs.cols(); ++i)
res += lhs.coeff(row, i) * rhs.coeff(i, col); res += lhs.coeff(row, i) * rhs.coeff(i, col);
@@ -275,92 +286,92 @@ struct ei_product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
*** Scalar path with inner vectorization *** *** Scalar path with inner vectorization ***
*******************************************/ *******************************************/
template<int UnrollingIndex, typename Lhs, typename Rhs, typename PacketScalar> template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet>
struct ei_product_coeff_vectorized_unroller struct product_coeff_vectorized_unroller
{ {
typedef typename Lhs::Index Index; typedef typename Lhs::Index Index;
enum { PacketSize = ei_packet_traits<typename Lhs::Scalar>::size }; enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres) EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
{ {
ei_product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, PacketScalar>::run(row, col, lhs, rhs, pres); product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
pres = ei_padd(pres, ei_pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) )); pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) ));
} }
}; };
template<typename Lhs, typename Rhs, typename PacketScalar> template<typename Lhs, typename Rhs, typename Packet>
struct ei_product_coeff_vectorized_unroller<0, Lhs, Rhs, PacketScalar> struct product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet>
{ {
typedef typename Lhs::Index Index; typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres) EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
{ {
pres = ei_pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col)); pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col));
} }
}; };
template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar> template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
struct ei_product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar> struct product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
{ {
typedef typename Lhs::PacketScalar PacketScalar; typedef typename Lhs::PacketScalar Packet;
typedef typename Lhs::Index Index; typedef typename Lhs::Index Index;
enum { PacketSize = ei_packet_traits<typename Lhs::Scalar>::size }; enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
{ {
PacketScalar pres; Packet pres;
ei_product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, PacketScalar>::run(row, col, lhs, rhs, pres); product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
ei_product_coeff_impl<DefaultTraversal,UnrollingIndex,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, res); product_coeff_impl<DefaultTraversal,UnrollingIndex,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, res);
res = ei_predux(pres); res = predux(pres);
} }
}; };
template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int RhsCols = Rhs::ColsAtCompileTime> template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int RhsCols = Rhs::ColsAtCompileTime>
struct ei_product_coeff_vectorized_dyn_selector struct product_coeff_vectorized_dyn_selector
{ {
typedef typename Lhs::Index Index; typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
{ {
res = lhs.row(row).cwiseProduct(rhs.col(col)).sum(); res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum();
} }
}; };
// NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower // NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower
// NOTE maybe they are now useless since we have a specialization for Block<Matrix> // NOTE maybe they are now useless since we have a specialization for Block<Matrix>
template<typename Lhs, typename Rhs, int RhsCols> template<typename Lhs, typename Rhs, int RhsCols>
struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols> struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
{ {
typedef typename Lhs::Index Index; typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) EIGEN_STRONG_INLINE static void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
{ {
res = lhs.cwiseProduct(rhs.col(col)).sum(); res = lhs.transpose().cwiseProduct(rhs.col(col)).sum();
} }
}; };
template<typename Lhs, typename Rhs, int LhsRows> template<typename Lhs, typename Rhs, int LhsRows>
struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1> struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
{ {
typedef typename Lhs::Index Index; typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) EIGEN_STRONG_INLINE static void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
{ {
res = lhs.row(row).cwiseProduct(rhs).sum(); res = lhs.row(row).transpose().cwiseProduct(rhs).sum();
} }
}; };
template<typename Lhs, typename Rhs> template<typename Lhs, typename Rhs>
struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1> struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
{ {
typedef typename Lhs::Index Index; typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) EIGEN_STRONG_INLINE static void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
{ {
res = lhs.cwiseProduct(rhs).sum(); res = lhs.transpose().cwiseProduct(rhs).sum();
} }
}; };
template<typename Lhs, typename Rhs, typename RetScalar> template<typename Lhs, typename Rhs, typename RetScalar>
struct ei_product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar> struct product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar>
{ {
typedef typename Lhs::Index Index; typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
{ {
ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, res); product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, res);
} }
}; };
@@ -368,72 +379,74 @@ struct ei_product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetSca
*** Packet path *** *** Packet path ***
*******************/ *******************/
template<int UnrollingIndex, typename Lhs, typename Rhs, typename PacketScalar, int LoadMode> template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct ei_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, PacketScalar, LoadMode> struct product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
{ {
typedef typename Lhs::Index Index; typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
{ {
ei_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, PacketScalar, LoadMode>::run(row, col, lhs, rhs, res); product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
res = ei_pmadd(ei_pset1(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res); res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
} }
}; };
template<int UnrollingIndex, typename Lhs, typename Rhs, typename PacketScalar, int LoadMode> template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct ei_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, PacketScalar, LoadMode> struct product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
{ {
typedef typename Lhs::Index Index; typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
{ {
ei_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, PacketScalar, LoadMode>::run(row, col, lhs, rhs, res); product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
res = ei_pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), ei_pset1(rhs.coeff(UnrollingIndex, col)), res); res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
} }
}; };
template<typename Lhs, typename Rhs, typename PacketScalar, int LoadMode> template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct ei_product_packet_impl<RowMajor, 0, Lhs, Rhs, PacketScalar, LoadMode> struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
{ {
typedef typename Lhs::Index Index; typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
{ {
res = ei_pmul(ei_pset1(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col)); res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
} }
}; };
template<typename Lhs, typename Rhs, typename PacketScalar, int LoadMode> template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct ei_product_packet_impl<ColMajor, 0, Lhs, Rhs, PacketScalar, LoadMode> struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
{ {
typedef typename Lhs::Index Index; typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
{ {
res = ei_pmul(lhs.template packet<LoadMode>(row, 0), ei_pset1(rhs.coeff(0, col))); res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
} }
}; };
template<typename Lhs, typename Rhs, typename PacketScalar, int LoadMode> template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct ei_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, PacketScalar, LoadMode> struct product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
{ {
typedef typename Lhs::Index Index; typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, PacketScalar& res) EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
{ {
ei_assert(lhs.cols()>0 && "you are using a non initialized matrix"); eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
res = ei_pmul(ei_pset1(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col)); res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
for(Index i = 1; i < lhs.cols(); ++i) for(Index i = 1; i < lhs.cols(); ++i)
res = ei_pmadd(ei_pset1(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res); res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
} }
}; };
template<typename Lhs, typename Rhs, typename PacketScalar, int LoadMode> template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct ei_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, PacketScalar, LoadMode> struct product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
{ {
typedef typename Lhs::Index Index; typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, PacketScalar& res) EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
{ {
ei_assert(lhs.cols()>0 && "you are using a non initialized matrix"); eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
res = ei_pmul(lhs.template packet<LoadMode>(row, 0), ei_pset1(rhs.coeff(0, col))); res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
for(Index i = 1; i < lhs.cols(); ++i) for(Index i = 1; i < lhs.cols(); ++i)
res = ei_pmadd(lhs.template packet<LoadMode>(row, i), ei_pset1(rhs.coeff(i, col)), res); res = pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
} }
}; };
} // end namespace internal
#endif // EIGEN_COEFFBASED_PRODUCT_H #endif // EIGEN_COEFFBASED_PRODUCT_H

File diff suppressed because it is too large Load Diff

View File

@@ -25,30 +25,31 @@
#ifndef EIGEN_GENERAL_MATRIX_MATRIX_H #ifndef EIGEN_GENERAL_MATRIX_MATRIX_H
#define EIGEN_GENERAL_MATRIX_MATRIX_H #define EIGEN_GENERAL_MATRIX_MATRIX_H
template<typename _LhsScalar, typename _RhsScalar> class ei_level3_blocking; namespace internal {
template<typename _LhsScalar, typename _RhsScalar> class level3_blocking;
/* Specialization for a row-major destination matrix => simple transposition of the product */ /* Specialization for a row-major destination matrix => simple transposition of the product */
template< template<
typename Scalar, typename Index, typename Index,
int LhsStorageOrder, bool ConjugateLhs, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs> typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs>
struct ei_general_matrix_matrix_product<Scalar,Index,LhsStorageOrder,ConjugateLhs,RhsStorageOrder,ConjugateRhs,RowMajor> struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor>
{ {
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
static EIGEN_STRONG_INLINE void run( static EIGEN_STRONG_INLINE void run(
Index rows, Index cols, Index depth, Index rows, Index cols, Index depth,
const Scalar* lhs, Index lhsStride, const LhsScalar* lhs, Index lhsStride,
const Scalar* rhs, Index rhsStride, const RhsScalar* rhs, Index rhsStride,
Scalar* res, Index resStride, ResScalar* res, Index resStride,
Scalar alpha, ResScalar alpha,
ei_level3_blocking<Scalar,Scalar>& blocking, level3_blocking<RhsScalar,LhsScalar>& blocking,
GemmParallelInfo<Index>* info = 0) GemmParallelInfo<Index>* info = 0)
{ {
// transpose the product such that the result is column major // transpose the product such that the result is column major
ei_general_matrix_matrix_product<Scalar, Index, general_matrix_matrix_product<Index,
RhsStorageOrder==RowMajor ? ColMajor : RowMajor, RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs,
ConjugateRhs, LhsScalar, LhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateLhs,
LhsStorageOrder==RowMajor ? ColMajor : RowMajor,
ConjugateLhs,
ColMajor> ColMajor>
::run(cols,rows,depth,rhs,rhsStride,lhs,lhsStride,res,resStride,alpha,blocking,info); ::run(cols,rows,depth,rhs,rhsStride,lhs,lhsStride,res,resStride,alpha,blocking,info);
} }
@@ -57,35 +58,32 @@ struct ei_general_matrix_matrix_product<Scalar,Index,LhsStorageOrder,ConjugateLh
/* Specialization for a col-major destination matrix /* Specialization for a col-major destination matrix
* => Blocking algorithm following Goto's paper */ * => Blocking algorithm following Goto's paper */
template< template<
typename Scalar, typename Index, typename Index,
int LhsStorageOrder, bool ConjugateLhs, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs> typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs>
struct ei_general_matrix_matrix_product<Scalar,Index,LhsStorageOrder,ConjugateLhs,RhsStorageOrder,ConjugateRhs,ColMajor> struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor>
{ {
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
static void run(Index rows, Index cols, Index depth, static void run(Index rows, Index cols, Index depth,
const Scalar* _lhs, Index lhsStride, const LhsScalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride, const RhsScalar* _rhs, Index rhsStride,
Scalar* res, Index resStride, ResScalar* res, Index resStride,
Scalar alpha, ResScalar alpha,
ei_level3_blocking<Scalar,Scalar>& blocking, level3_blocking<LhsScalar,RhsScalar>& blocking,
GemmParallelInfo<Index>* info = 0) GemmParallelInfo<Index>* info = 0)
{ {
ei_const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride); const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
ei_const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride); const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
if (ConjugateRhs) typedef gebp_traits<LhsScalar,RhsScalar> Traits;
alpha = ei_conj(alpha);
typedef typename ei_packet_traits<Scalar>::type PacketType;
typedef ei_product_blocking_traits<Scalar> Blocking;
Index kc = blocking.kc(); // cache block size along the K direction Index kc = blocking.kc(); // cache block size along the K direction
Index mc = std::min(rows,blocking.mc()); // cache block size along the M direction Index mc = std::min(rows,blocking.mc()); // cache block size along the M direction
//Index nc = blocking.nc(); // cache block size along the N direction //Index nc = blocking.nc(); // cache block size along the N direction
ei_gemm_pack_rhs<Scalar, Index, Blocking::nr, RhsStorageOrder> pack_rhs; gemm_pack_lhs<LhsScalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
ei_gemm_pack_lhs<Scalar, Index, Blocking::mr, LhsStorageOrder> pack_lhs; gemm_pack_rhs<RhsScalar, Index, Traits::nr, RhsStorageOrder> pack_rhs;
ei_gebp_kernel<Scalar, Index, Blocking::mr, Blocking::nr, ei_conj_helper<ConjugateLhs,ConjugateRhs> > gebp; gebp_kernel<LhsScalar, RhsScalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp;
#ifdef EIGEN_HAS_OPENMP #ifdef EIGEN_HAS_OPENMP
if(info) if(info)
@@ -93,12 +91,13 @@ static void run(Index rows, Index cols, Index depth,
// this is the parallel version! // this is the parallel version!
Index tid = omp_get_thread_num(); Index tid = omp_get_thread_num();
Index threads = omp_get_num_threads(); Index threads = omp_get_num_threads();
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc); std::size_t sizeA = kc*mc;
std::size_t sizeW = kc*Blocking::PacketSize*Blocking::nr*8; std::size_t sizeW = kc*Traits::WorkSpaceFactor;
Scalar* w = ei_aligned_stack_new(Scalar, sizeW); LhsScalar* blockA = ei_aligned_stack_new(LhsScalar, sizeA);
Scalar* blockB = blocking.blockB(); RhsScalar* w = ei_aligned_stack_new(RhsScalar, sizeW);
ei_internal_assert(blockB!=0); RhsScalar* blockB = blocking.blockB();
eigen_internal_assert(blockB!=0);
// For each horizontal panel of the rhs, and corresponding vertical panel of the lhs... // For each horizontal panel of the rhs, and corresponding vertical panel of the lhs...
for(Index k=0; k<depth; k+=kc) for(Index k=0; k<depth; k+=kc)
@@ -118,7 +117,7 @@ static void run(Index rows, Index cols, Index depth,
while(info[tid].users!=0) {} while(info[tid].users!=0) {}
info[tid].users += threads; info[tid].users += threads;
pack_rhs(blockB+info[tid].rhs_start*kc, &rhs(k,info[tid].rhs_start), rhsStride, alpha, actual_kc, info[tid].rhs_length); pack_rhs(blockB+info[tid].rhs_start*actual_kc, &rhs(k,info[tid].rhs_start), rhsStride, actual_kc, info[tid].rhs_length);
// Notify the other threads that the part B'_j is ready to go. // Notify the other threads that the part B'_j is ready to go.
info[tid].sync = k; info[tid].sync = k;
@@ -134,7 +133,7 @@ static void run(Index rows, Index cols, Index depth,
if(shift>0) if(shift>0)
while(info[j].sync!=k) {} while(info[j].sync!=k) {}
gebp(res+info[j].rhs_start*resStride, resStride, blockA, blockB+info[j].rhs_start*kc, mc, actual_kc, info[j].rhs_length, -1,-1,0,0, w); gebp(res+info[j].rhs_start*resStride, resStride, blockA, blockB+info[j].rhs_start*actual_kc, mc, actual_kc, info[j].rhs_length, alpha, -1,-1,0,0, w);
} }
// Then keep going as usual with the remaining A' // Then keep going as usual with the remaining A'
@@ -146,7 +145,7 @@ static void run(Index rows, Index cols, Index depth,
pack_lhs(blockA, &lhs(i,k), lhsStride, actual_kc, actual_mc); pack_lhs(blockA, &lhs(i,k), lhsStride, actual_kc, actual_mc);
// C_i += A' * B' // C_i += A' * B'
gebp(res+i, resStride, blockA, blockB, actual_mc, actual_kc, cols, -1,-1,0,0, w); gebp(res+i, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha, -1,-1,0,0, w);
} }
// Release all the sub blocks B'_j of B' for the current thread, // Release all the sub blocks B'_j of B' for the current thread,
@@ -156,8 +155,8 @@ static void run(Index rows, Index cols, Index depth,
--(info[j].users); --(info[j].users);
} }
ei_aligned_stack_delete(Scalar, blockA, kc*mc); ei_aligned_stack_delete(LhsScalar, blockA, kc*mc);
ei_aligned_stack_delete(Scalar, w, sizeW); ei_aligned_stack_delete(RhsScalar, w, sizeW);
} }
else else
#endif // EIGEN_HAS_OPENMP #endif // EIGEN_HAS_OPENMP
@@ -167,10 +166,10 @@ static void run(Index rows, Index cols, Index depth,
// this is the sequential version! // this is the sequential version!
std::size_t sizeA = kc*mc; std::size_t sizeA = kc*mc;
std::size_t sizeB = kc*cols; std::size_t sizeB = kc*cols;
std::size_t sizeW = kc*Blocking::PacketSize*Blocking::nr; std::size_t sizeW = kc*Traits::WorkSpaceFactor;
Scalar *blockA = blocking.blockA()==0 ? ei_aligned_stack_new(Scalar, sizeA) : blocking.blockA(); LhsScalar *blockA = blocking.blockA()==0 ? ei_aligned_stack_new(LhsScalar, sizeA) : blocking.blockA();
Scalar *blockB = blocking.blockB()==0 ? ei_aligned_stack_new(Scalar, sizeB) : blocking.blockB(); RhsScalar *blockB = blocking.blockB()==0 ? ei_aligned_stack_new(RhsScalar, sizeB) : blocking.blockB();
Scalar *blockW = blocking.blockW()==0 ? ei_aligned_stack_new(Scalar, sizeW) : blocking.blockW(); RhsScalar *blockW = blocking.blockW()==0 ? ei_aligned_stack_new(RhsScalar, sizeW) : blocking.blockW();
// For each horizontal panel of the rhs, and corresponding panel of the lhs... // For each horizontal panel of the rhs, and corresponding panel of the lhs...
// (==GEMM_VAR1) // (==GEMM_VAR1)
@@ -182,7 +181,7 @@ static void run(Index rows, Index cols, Index depth,
// => Pack rhs's panel into a sequential chunk of memory (L2 caching) // => Pack rhs's panel into a sequential chunk of memory (L2 caching)
// Note that this panel will be read as many times as the number of blocks in the lhs's // Note that this panel will be read as many times as the number of blocks in the lhs's
// vertical panel which is, in practice, a very low number. // vertical panel which is, in practice, a very low number.
pack_rhs(blockB, &rhs(k2,0), rhsStride, alpha, actual_kc, cols); pack_rhs(blockB, &rhs(k2,0), rhsStride, actual_kc, cols);
// For each mc x kc block of the lhs's vertical panel... // For each mc x kc block of the lhs's vertical panel...
@@ -197,14 +196,14 @@ static void run(Index rows, Index cols, Index depth,
pack_lhs(blockA, &lhs(i2,k2), lhsStride, actual_kc, actual_mc); pack_lhs(blockA, &lhs(i2,k2), lhsStride, actual_kc, actual_mc);
// Everything is packed, we can now call the block * panel kernel: // Everything is packed, we can now call the block * panel kernel:
gebp(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, -1, -1, 0, 0, blockW); gebp(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha, -1, -1, 0, 0, blockW);
} }
} }
if(blocking.blockA()==0) ei_aligned_stack_delete(Scalar, blockA, kc*mc); if(blocking.blockA()==0) ei_aligned_stack_delete(LhsScalar, blockA, sizeA);
if(blocking.blockB()==0) ei_aligned_stack_delete(Scalar, blockB, sizeB); if(blocking.blockB()==0) ei_aligned_stack_delete(RhsScalar, blockB, sizeB);
if(blocking.blockW()==0) ei_aligned_stack_delete(Scalar, blockW, sizeW); if(blocking.blockW()==0) ei_aligned_stack_delete(RhsScalar, blockW, sizeW);
} }
} }
@@ -212,18 +211,18 @@ static void run(Index rows, Index cols, Index depth,
/********************************************************************************* /*********************************************************************************
* Specialization of GeneralProduct<> for "large" GEMM, i.e., * Specialization of GeneralProduct<> for "large" GEMM, i.e.,
* implementation of the high level wrapper to ei_general_matrix_matrix_product * implementation of the high level wrapper to general_matrix_matrix_product
**********************************************************************************/ **********************************************************************************/
template<typename Lhs, typename Rhs> template<typename Lhs, typename Rhs>
struct ei_traits<GeneralProduct<Lhs,Rhs,GemmProduct> > struct traits<GeneralProduct<Lhs,Rhs,GemmProduct> >
: ei_traits<ProductBase<GeneralProduct<Lhs,Rhs,GemmProduct>, Lhs, Rhs> > : traits<ProductBase<GeneralProduct<Lhs,Rhs,GemmProduct>, Lhs, Rhs> >
{}; {};
template<typename Scalar, typename Index, typename Gemm, typename Lhs, typename Rhs, typename Dest, typename BlockingType> template<typename Scalar, typename Index, typename Gemm, typename Lhs, typename Rhs, typename Dest, typename BlockingType>
struct ei_gemm_functor struct gemm_functor
{ {
ei_gemm_functor(const Lhs& lhs, const Rhs& rhs, Dest& dest, Scalar actualAlpha, gemm_functor(const Lhs& lhs, const Rhs& rhs, Dest& dest, Scalar actualAlpha,
BlockingType& blocking) BlockingType& blocking)
: m_lhs(lhs), m_rhs(rhs), m_dest(dest), m_actualAlpha(actualAlpha), m_blocking(blocking) : m_lhs(lhs), m_rhs(rhs), m_dest(dest), m_actualAlpha(actualAlpha), m_blocking(blocking)
{} {}
@@ -237,10 +236,10 @@ struct ei_gemm_functor
{ {
if(cols==-1) if(cols==-1)
cols = m_rhs.cols(); cols = m_rhs.cols();
Gemm::run(rows, cols, m_lhs.cols(), Gemm::run(rows, cols, m_lhs.cols(),
(const Scalar*)&(m_lhs.const_cast_derived().coeffRef(row,0)), m_lhs.outerStride(), /*(const Scalar*)*/&m_lhs.coeffRef(row,0), m_lhs.outerStride(),
(const Scalar*)&(m_rhs.const_cast_derived().coeffRef(0,col)), m_rhs.outerStride(), /*(const Scalar*)*/&m_rhs.coeffRef(0,col), m_rhs.outerStride(),
(Scalar*)&(m_dest.coeffRef(row,col)), m_dest.outerStride(), (Scalar*)&(m_dest.coeffRef(row,col)), m_dest.outerStride(),
m_actualAlpha, m_blocking, info); m_actualAlpha, m_blocking, info);
} }
@@ -254,10 +253,10 @@ struct ei_gemm_functor
}; };
template<int StorageOrder, typename LhsScalar, typename RhsScalar, int MaxRows, int MaxCols, int MaxDepth, template<int StorageOrder, typename LhsScalar, typename RhsScalar, int MaxRows, int MaxCols, int MaxDepth,
bool FiniteAtCompileTime = MaxRows!=Dynamic && MaxCols!=Dynamic && MaxDepth != Dynamic> class ei_gemm_blocking_space; bool FiniteAtCompileTime = MaxRows!=Dynamic && MaxCols!=Dynamic && MaxDepth != Dynamic> class gemm_blocking_space;
template<typename _LhsScalar, typename _RhsScalar> template<typename _LhsScalar, typename _RhsScalar>
class ei_level3_blocking class level3_blocking
{ {
typedef _LhsScalar LhsScalar; typedef _LhsScalar LhsScalar;
typedef _RhsScalar RhsScalar; typedef _RhsScalar RhsScalar;
@@ -273,7 +272,7 @@ class ei_level3_blocking
public: public:
ei_level3_blocking() level3_blocking()
: m_blockA(0), m_blockB(0), m_blockW(0), m_mc(0), m_nc(0), m_kc(0) : m_blockA(0), m_blockB(0), m_blockW(0), m_mc(0), m_nc(0), m_kc(0)
{} {}
@@ -287,23 +286,23 @@ class ei_level3_blocking
}; };
template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth> template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth>
class ei_gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, true> class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, true>
: public ei_level3_blocking< : public level3_blocking<
typename ei_meta_if<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::ret, typename conditional<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::type,
typename ei_meta_if<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::ret> typename conditional<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::type>
{ {
enum { enum {
Transpose = StorageOrder==RowMajor, Transpose = StorageOrder==RowMajor,
ActualRows = Transpose ? MaxCols : MaxRows, ActualRows = Transpose ? MaxCols : MaxRows,
ActualCols = Transpose ? MaxRows : MaxCols ActualCols = Transpose ? MaxRows : MaxCols
}; };
typedef typename ei_meta_if<Transpose,_RhsScalar,_LhsScalar>::ret LhsScalar; typedef typename conditional<Transpose,_RhsScalar,_LhsScalar>::type LhsScalar;
typedef typename ei_meta_if<Transpose,_LhsScalar,_RhsScalar>::ret RhsScalar; typedef typename conditional<Transpose,_LhsScalar,_RhsScalar>::type RhsScalar;
typedef ei_product_blocking_traits<RhsScalar> Blocking; typedef gebp_traits<LhsScalar,RhsScalar> Traits;
enum { enum {
SizeA = ActualRows * MaxDepth, SizeA = ActualRows * MaxDepth,
SizeB = ActualCols * MaxDepth, SizeB = ActualCols * MaxDepth,
SizeW = MaxDepth * Blocking::nr * ei_packet_traits<RhsScalar>::size SizeW = MaxDepth * Traits::WorkSpaceFactor
}; };
EIGEN_ALIGN16 LhsScalar m_staticA[SizeA]; EIGEN_ALIGN16 LhsScalar m_staticA[SizeA];
@@ -312,7 +311,7 @@ class ei_gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols
public: public:
ei_gemm_blocking_space(DenseIndex /*rows*/, DenseIndex /*cols*/, DenseIndex /*depth*/) gemm_blocking_space(DenseIndex /*rows*/, DenseIndex /*cols*/, DenseIndex /*depth*/)
{ {
this->m_mc = ActualRows; this->m_mc = ActualRows;
this->m_nc = ActualCols; this->m_nc = ActualCols;
@@ -329,17 +328,17 @@ class ei_gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols
}; };
template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth> template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth>
class ei_gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, false> class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, false>
: public ei_level3_blocking< : public level3_blocking<
typename ei_meta_if<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::ret, typename conditional<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::type,
typename ei_meta_if<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::ret> typename conditional<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::type>
{ {
enum { enum {
Transpose = StorageOrder==RowMajor Transpose = StorageOrder==RowMajor
}; };
typedef typename ei_meta_if<Transpose,_RhsScalar,_LhsScalar>::ret LhsScalar; typedef typename conditional<Transpose,_RhsScalar,_LhsScalar>::type LhsScalar;
typedef typename ei_meta_if<Transpose,_LhsScalar,_RhsScalar>::ret RhsScalar; typedef typename conditional<Transpose,_LhsScalar,_RhsScalar>::type RhsScalar;
typedef ei_product_blocking_traits<RhsScalar> Blocking; typedef gebp_traits<LhsScalar,RhsScalar> Traits;
DenseIndex m_sizeA; DenseIndex m_sizeA;
DenseIndex m_sizeB; DenseIndex m_sizeB;
@@ -347,7 +346,7 @@ class ei_gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols
public: public:
ei_gemm_blocking_space(DenseIndex rows, DenseIndex cols, DenseIndex depth) gemm_blocking_space(DenseIndex rows, DenseIndex cols, DenseIndex depth)
{ {
this->m_mc = Transpose ? cols : rows; this->m_mc = Transpose ? cols : rows;
this->m_nc = Transpose ? rows : cols; this->m_nc = Transpose ? rows : cols;
@@ -356,25 +355,25 @@ class ei_gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols
computeProductBlockingSizes<LhsScalar,RhsScalar>(this->m_kc, this->m_mc, this->m_nc); computeProductBlockingSizes<LhsScalar,RhsScalar>(this->m_kc, this->m_mc, this->m_nc);
m_sizeA = this->m_mc * this->m_kc; m_sizeA = this->m_mc * this->m_kc;
m_sizeB = this->m_kc * this->m_nc; m_sizeB = this->m_kc * this->m_nc;
m_sizeW = this->m_kc*ei_packet_traits<RhsScalar>::size*Blocking::nr; m_sizeW = this->m_kc*Traits::WorkSpaceFactor;
} }
void allocateA() void allocateA()
{ {
if(this->m_blockA==0) if(this->m_blockA==0)
this->m_blockA = ei_aligned_new<LhsScalar>(m_sizeA); this->m_blockA = aligned_new<LhsScalar>(m_sizeA);
} }
void allocateB() void allocateB()
{ {
if(this->m_blockB==0) if(this->m_blockB==0)
this->m_blockB = ei_aligned_new<RhsScalar>(m_sizeB); this->m_blockB = aligned_new<RhsScalar>(m_sizeB);
} }
void allocateW() void allocateW()
{ {
if(this->m_blockW==0) if(this->m_blockW==0)
this->m_blockW = ei_aligned_new<RhsScalar>(m_sizeW); this->m_blockW = aligned_new<RhsScalar>(m_sizeW);
} }
void allocateAll() void allocateAll()
@@ -384,14 +383,16 @@ class ei_gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols
allocateW(); allocateW();
} }
~ei_gemm_blocking_space() ~gemm_blocking_space()
{ {
ei_aligned_delete(this->m_blockA, m_sizeA); aligned_delete(this->m_blockA, m_sizeA);
ei_aligned_delete(this->m_blockB, m_sizeB); aligned_delete(this->m_blockB, m_sizeB);
ei_aligned_delete(this->m_blockW, m_sizeW); aligned_delete(this->m_blockW, m_sizeW);
} }
}; };
} // end namespace internal
template<typename Lhs, typename Rhs> template<typename Lhs, typename Rhs>
class GeneralProduct<Lhs, Rhs, GemmProduct> class GeneralProduct<Lhs, Rhs, GemmProduct>
: public ProductBase<GeneralProduct<Lhs,Rhs,GemmProduct>, Lhs, Rhs> : public ProductBase<GeneralProduct<Lhs,Rhs,GemmProduct>, Lhs, Rhs>
@@ -401,16 +402,20 @@ class GeneralProduct<Lhs, Rhs, GemmProduct>
}; };
public: public:
EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct) EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
typedef typename Lhs::Scalar LhsScalar;
typedef typename Rhs::Scalar RhsScalar;
typedef Scalar ResScalar;
GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
{ {
EIGEN_STATIC_ASSERT((ei_is_same_type<typename Lhs::Scalar, typename Rhs::Scalar>::ret), typedef internal::scalar_product_op<LhsScalar,RhsScalar> BinOp;
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) EIGEN_CHECK_BINARY_COMPATIBILIY(BinOp,LhsScalar,RhsScalar);
} }
template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
{ {
ei_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols()); eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs); const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs);
const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs); const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs);
@@ -418,21 +423,21 @@ class GeneralProduct<Lhs, Rhs, GemmProduct>
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs) Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
* RhsBlasTraits::extractScalarFactor(m_rhs); * RhsBlasTraits::extractScalarFactor(m_rhs);
typedef ei_gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar, typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,LhsScalar,RhsScalar,
Dest::MaxRowsAtCompileTime,Dest::MaxColsAtCompileTime,MaxDepthAtCompileTime> BlockingType; Dest::MaxRowsAtCompileTime,Dest::MaxColsAtCompileTime,MaxDepthAtCompileTime> BlockingType;
typedef ei_gemm_functor< typedef internal::gemm_functor<
Scalar, Index, Scalar, Index,
ei_general_matrix_matrix_product< internal::general_matrix_matrix_product<
Scalar, Index, Index,
(_ActualLhsType::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(LhsBlasTraits::NeedToConjugate), LhsScalar, (_ActualLhsType::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(LhsBlasTraits::NeedToConjugate),
(_ActualRhsType::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(RhsBlasTraits::NeedToConjugate), RhsScalar, (_ActualRhsType::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(RhsBlasTraits::NeedToConjugate),
(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor>, (Dest::Flags&RowMajorBit) ? RowMajor : ColMajor>,
_ActualLhsType, _ActualRhsType, Dest, BlockingType> GemmFunctor; _ActualLhsType, _ActualRhsType, Dest, BlockingType> GemmFunctor;
BlockingType blocking(dst.rows(), dst.cols(), lhs.cols()); BlockingType blocking(dst.rows(), dst.cols(), lhs.cols());
ei_parallelize_gemm<(Dest::MaxRowsAtCompileTime>32 || Dest::MaxRowsAtCompileTime==Dynamic)>(GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), this->rows(), this->cols(), Dest::Flags&RowMajorBit); internal::parallelize_gemm<(Dest::MaxRowsAtCompileTime>32 || Dest::MaxRowsAtCompileTime==Dynamic)>(GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), this->rows(), this->cols(), Dest::Flags&RowMajorBit);
} }
}; };

View File

@@ -0,0 +1,227 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H
#define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H
namespace internal {
/**********************************************************************
* This file implements a general A * B product while
* evaluating only one triangular part of the product.
* This is more general version of self adjoint product (C += A A^T)
* as the level 3 SYRK Blas routine.
**********************************************************************/
// forward declarations (defined at the end of this file)
template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int UpLo>
struct tribb_kernel;
/* Optimized matrix-matrix product evaluating only one triangular half */
template <typename Index,
typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
int ResStorageOrder, int UpLo>
struct general_matrix_matrix_triangular_product;
// as usual if the result is row major => we transpose the product
template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo>
struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor,UpLo>
{
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride,
const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, ResScalar alpha)
{
general_matrix_matrix_triangular_product<Index,
RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs,
LhsScalar, LhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateLhs,
ColMajor, UpLo==Lower?Upper:Lower>
::run(size,depth,rhs,rhsStride,lhs,lhsStride,res,resStride,alpha);
}
};
template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo>
struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo>
{
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride,
const RhsScalar* _rhs, Index rhsStride, ResScalar* res, Index resStride, ResScalar alpha)
{
const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
Index kc = depth; // cache block size along the K direction
Index mc = size; // cache block size along the M direction
Index nc = size; // cache block size along the N direction
computeProductBlockingSizes<LhsScalar,RhsScalar>(kc, mc, nc);
// !!! mc must be a multiple of nr:
if(mc > Traits::nr)
mc = (mc/Traits::nr)*Traits::nr;
LhsScalar* blockA = ei_aligned_stack_new(LhsScalar, kc*mc);
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
std::size_t sizeB = sizeW + kc*size;
RhsScalar* allocatedBlockB = ei_aligned_stack_new(RhsScalar, sizeB);
RhsScalar* blockB = allocatedBlockB + sizeW;
gemm_pack_lhs<LhsScalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
gemm_pack_rhs<RhsScalar, Index, Traits::nr, RhsStorageOrder> pack_rhs;
gebp_kernel <LhsScalar, RhsScalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp;
tribb_kernel<LhsScalar, RhsScalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs, UpLo> sybb;
for(Index k2=0; k2<depth; k2+=kc)
{
const Index actual_kc = std::min(k2+kc,depth)-k2;
// note that the actual rhs is the transpose/adjoint of mat
pack_rhs(blockB, &rhs(k2,0), rhsStride, actual_kc, size);
for(Index i2=0; i2<size; i2+=mc)
{
const Index actual_mc = std::min(i2+mc,size)-i2;
pack_lhs(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);
// the selected actual_mc * size panel of res is split into three different part:
// 1 - before the diagonal => processed with gebp or skipped
// 2 - the actual_mc x actual_mc symmetric block => processed with a special kernel
// 3 - after the diagonal => processed with gebp or skipped
if (UpLo==Lower)
gebp(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, std::min(size,i2), alpha,
-1, -1, 0, 0, allocatedBlockB);
sybb(res+resStride*i2 + i2, resStride, blockA, blockB + actual_kc*i2, actual_mc, actual_kc, alpha, allocatedBlockB);
if (UpLo==Upper)
{
Index j2 = i2+actual_mc;
gebp(res+resStride*j2+i2, resStride, blockA, blockB+actual_kc*j2, actual_mc, actual_kc, std::max(Index(0), size-j2), alpha,
-1, -1, 0, 0, allocatedBlockB);
}
}
}
ei_aligned_stack_delete(LhsScalar, blockA, kc*mc);
ei_aligned_stack_delete(RhsScalar, allocatedBlockB, sizeB);
}
};
// Optimized packed Block * packed Block product kernel evaluating only one given triangular part
// This kernel is built on top of the gebp kernel:
// - the current destination block is processed per panel of actual_mc x BlockSize
// where BlockSize is set to the minimal value allowing gebp to be as fast as possible
// - then, as usual, each panel is split into three parts along the diagonal,
// the sub blocks above and below the diagonal are processed as usual,
// while the triangular block overlapping the diagonal is evaluated into a
// small temporary buffer which is then accumulated into the result using a
// triangular traversal.
template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int UpLo>
struct tribb_kernel
{
typedef gebp_traits<LhsScalar,RhsScalar,ConjLhs,ConjRhs> Traits;
typedef typename Traits::ResScalar ResScalar;
enum {
BlockSize = EIGEN_PLAIN_ENUM_MAX(mr,nr)
};
void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, ResScalar alpha, RhsScalar* workspace)
{
gebp_kernel<LhsScalar, RhsScalar, Index, mr, nr, ConjLhs, ConjRhs> gebp_kernel;
Matrix<ResScalar,BlockSize,BlockSize,ColMajor> buffer;
// let's process the block per panel of actual_mc x BlockSize,
// again, each is split into three parts, etc.
for (Index j=0; j<size; j+=BlockSize)
{
Index actualBlockSize = std::min<Index>(BlockSize,size - j);
const RhsScalar* actual_b = blockB+j*depth;
if(UpLo==Upper)
gebp_kernel(res+j*resStride, resStride, blockA, actual_b, j, depth, actualBlockSize, alpha,
-1, -1, 0, 0, workspace);
// selfadjoint micro block
{
Index i = j;
buffer.setZero();
// 1 - apply the kernel on the temporary buffer
gebp_kernel(buffer.data(), BlockSize, blockA+depth*i, actual_b, actualBlockSize, depth, actualBlockSize, alpha,
-1, -1, 0, 0, workspace);
// 2 - triangular accumulation
for(Index j1=0; j1<actualBlockSize; ++j1)
{
ResScalar* r = res + (j+j1)*resStride + i;
for(Index i1=UpLo==Lower ? j1 : 0;
UpLo==Lower ? i1<actualBlockSize : i1<=j1; ++i1)
r[i1] += buffer(i1,j1);
}
}
if(UpLo==Lower)
{
Index i = j+actualBlockSize;
gebp_kernel(res+j*resStride+i, resStride, blockA+depth*i, actual_b, size-i, depth, actualBlockSize, alpha,
-1, -1, 0, 0, workspace);
}
}
}
};
} // end namespace internal
// high level API
template<typename MatrixType, unsigned int UpLo>
template<typename ProductDerived, typename _Lhs, typename _Rhs>
TriangularView<MatrixType,UpLo>& TriangularView<MatrixType,UpLo>::assignProduct(const ProductBase<ProductDerived, _Lhs,_Rhs>& prod, const Scalar& alpha)
{
typedef typename internal::remove_all<typename ProductDerived::LhsNested>::type Lhs;
typedef internal::blas_traits<Lhs> LhsBlasTraits;
typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs;
typedef typename internal::remove_all<ActualLhs>::type _ActualLhs;
const ActualLhs actualLhs = LhsBlasTraits::extract(prod.lhs());
typedef typename internal::remove_all<typename ProductDerived::RhsNested>::type Rhs;
typedef internal::blas_traits<Rhs> RhsBlasTraits;
typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs;
typedef typename internal::remove_all<ActualRhs>::type _ActualRhs;
const ActualRhs actualRhs = RhsBlasTraits::extract(prod.rhs());
typename ProductDerived::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());
internal::general_matrix_matrix_triangular_product<Index,
typename Lhs::Scalar, _ActualLhs::Flags&RowMajorBit ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
typename Rhs::Scalar, _ActualRhs::Flags&RowMajorBit ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor, UpLo>
::run(m_matrix.cols(), actualLhs.cols(),
&actualLhs.coeffRef(0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,0), actualRhs.outerStride(),
const_cast<Scalar*>(m_matrix.data()), m_matrix.outerStride(), actualAlpha);
return *this;
}
#endif // EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H

View File

@@ -25,78 +25,108 @@
#ifndef EIGEN_GENERAL_MATRIX_VECTOR_H #ifndef EIGEN_GENERAL_MATRIX_VECTOR_H
#define EIGEN_GENERAL_MATRIX_VECTOR_H #define EIGEN_GENERAL_MATRIX_VECTOR_H
namespace internal {
/* Optimized col-major matrix * vector product: /* Optimized col-major matrix * vector product:
* This algorithm processes 4 columns at onces that allows to both reduce * This algorithm processes 4 columns at onces that allows to both reduce
* the number of load/stores of the result by a factor 4 and to reduce * the number of load/stores of the result by a factor 4 and to reduce
* the instruction dependency. Moreover, we know that all bands have the * the instruction dependency. Moreover, we know that all bands have the
* same alignment pattern. * same alignment pattern.
* TODO: since rhs gets evaluated only once, no need to evaluate it *
* Mixing type logic: C += alpha * A * B
* | A | B |alpha| comments
* |real |cplx |cplx | no vectorization
* |real |cplx |real | alpha is converted to a cplx when calling the run function, no vectorization
* |cplx |real |cplx | invalid, the caller has to do tmp: = A * B; C += alpha*tmp
* |cplx |real |real | optimal case, vectorization possible via real-cplx mul
*/ */
template<bool ConjugateLhs, bool ConjugateRhs, typename Scalar, typename Index, typename RhsType> template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
static EIGEN_DONT_INLINE struct general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjugateLhs,RhsScalar,ConjugateRhs>
void ei_cache_friendly_product_colmajor_times_vector(
Index size,
const Scalar* lhs, Index lhsStride,
const RhsType& rhs,
Scalar* res,
Scalar alpha)
{ {
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
enum {
Vectorizable = packet_traits<LhsScalar>::Vectorizable && packet_traits<RhsScalar>::Vectorizable
&& int(packet_traits<LhsScalar>::size)==int(packet_traits<RhsScalar>::size),
LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1
};
typedef typename packet_traits<LhsScalar>::type _LhsPacket;
typedef typename packet_traits<RhsScalar>::type _RhsPacket;
typedef typename packet_traits<ResScalar>::type _ResPacket;
typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
EIGEN_DONT_INLINE static void run(
Index rows, Index cols,
const LhsScalar* lhs, Index lhsStride,
const RhsScalar* rhs, Index rhsIncr,
ResScalar* res, Index
#ifdef EIGEN_INTERNAL_DEBUGGING
resIncr
#endif
, RhsScalar alpha)
{
eigen_internal_assert(resIncr==1);
#ifdef _EIGEN_ACCUMULATE_PACKETS #ifdef _EIGEN_ACCUMULATE_PACKETS
#error _EIGEN_ACCUMULATE_PACKETS has already been defined #error _EIGEN_ACCUMULATE_PACKETS has already been defined
#endif #endif
#define _EIGEN_ACCUMULATE_PACKETS(A0,A13,A2) \ #define _EIGEN_ACCUMULATE_PACKETS(A0,A13,A2) \
ei_pstore(&res[j], \ pstore(&res[j], \
ei_padd(ei_pload(&res[j]), \ padd(pload<ResPacket>(&res[j]), \
ei_padd( \ padd( \
ei_padd(cj.pmul(EIGEN_CAT(ei_ploa , A0)(&lhs0[j]), ptmp0), \ padd(pcj.pmul(EIGEN_CAT(ploa , A0)<LhsPacket>(&lhs0[j]), ptmp0), \
cj.pmul(EIGEN_CAT(ei_ploa , A13)(&lhs1[j]), ptmp1)), \ pcj.pmul(EIGEN_CAT(ploa , A13)<LhsPacket>(&lhs1[j]), ptmp1)), \
ei_padd(cj.pmul(EIGEN_CAT(ei_ploa , A2)(&lhs2[j]), ptmp2), \ padd(pcj.pmul(EIGEN_CAT(ploa , A2)<LhsPacket>(&lhs2[j]), ptmp2), \
cj.pmul(EIGEN_CAT(ei_ploa , A13)(&lhs3[j]), ptmp3)) ))) pcj.pmul(EIGEN_CAT(ploa , A13)<LhsPacket>(&lhs3[j]), ptmp3)) )))
ei_conj_helper<ConjugateLhs,ConjugateRhs> cj; conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;
conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj;
if(ConjugateRhs) if(ConjugateRhs)
alpha = ei_conj(alpha); alpha = conj(alpha);
typedef typename NumTraits<Scalar>::Real RealScalar;
typedef typename ei_packet_traits<Scalar>::type Packet;
const Index PacketSize = sizeof(Packet)/sizeof(Scalar);
enum { AllAligned = 0, EvenAligned, FirstAligned, NoneAligned }; enum { AllAligned = 0, EvenAligned, FirstAligned, NoneAligned };
const Index columnsAtOnce = 4; const Index columnsAtOnce = 4;
const Index peels = 2; const Index peels = 2;
const Index PacketAlignedMask = PacketSize-1; const Index LhsPacketAlignedMask = LhsPacketSize-1;
const Index PeelAlignedMask = PacketSize*peels-1; const Index ResPacketAlignedMask = ResPacketSize-1;
const Index PeelAlignedMask = ResPacketSize*peels-1;
const Index size = rows;
// How many coeffs of the result do we have to skip to be aligned. // How many coeffs of the result do we have to skip to be aligned.
// Here we assume data are at least aligned on the base scalar type. // Here we assume data are at least aligned on the base scalar type.
Index alignedStart = ei_first_aligned(res,size); Index alignedStart = first_aligned(res,size);
Index alignedSize = PacketSize>1 ? alignedStart + ((size-alignedStart) & ~PacketAlignedMask) : 0; Index alignedSize = ResPacketSize>1 ? alignedStart + ((size-alignedStart) & ~ResPacketAlignedMask) : 0;
const Index peeledSize = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart; const Index peeledSize = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart;
const Index alignmentStep = PacketSize>1 ? (PacketSize - lhsStride % PacketSize) & PacketAlignedMask : 0; const Index alignmentStep = LhsPacketSize>1 ? (LhsPacketSize - lhsStride % LhsPacketSize) & LhsPacketAlignedMask : 0;
Index alignmentPattern = alignmentStep==0 ? AllAligned Index alignmentPattern = alignmentStep==0 ? AllAligned
: alignmentStep==(PacketSize/2) ? EvenAligned : alignmentStep==(LhsPacketSize/2) ? EvenAligned
: FirstAligned; : FirstAligned;
// we cannot assume the first element is aligned because of sub-matrices // we cannot assume the first element is aligned because of sub-matrices
const Index lhsAlignmentOffset = ei_first_aligned(lhs,size); const Index lhsAlignmentOffset = first_aligned(lhs,size);
// find how many columns do we have to skip to be aligned with the result (if possible) // find how many columns do we have to skip to be aligned with the result (if possible)
Index skipColumns = 0; Index skipColumns = 0;
// if the data cannot be aligned (TODO add some compile time tests when possible, e.g. for floats) // if the data cannot be aligned (TODO add some compile time tests when possible, e.g. for floats)
if( (size_t(lhs)%sizeof(RealScalar)) || (size_t(res)%sizeof(RealScalar)) ) if( (size_t(lhs)%sizeof(LhsScalar)) || (size_t(res)%sizeof(ResScalar)) )
{ {
alignedSize = 0; alignedSize = 0;
alignedStart = 0; alignedStart = 0;
} }
else if (PacketSize>1) else if (LhsPacketSize>1)
{ {
ei_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(Packet)==0 || size<PacketSize); eigen_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || size<LhsPacketSize);
while (skipColumns<PacketSize && while (skipColumns<LhsPacketSize &&
alignedStart != ((lhsAlignmentOffset + alignmentStep*skipColumns)%PacketSize)) alignedStart != ((lhsAlignmentOffset + alignmentStep*skipColumns)%LhsPacketSize))
++skipColumns; ++skipColumns;
if (skipColumns==PacketSize) if (skipColumns==LhsPacketSize)
{ {
// nothing can be aligned, no need to skip any column // nothing can be aligned, no need to skip any column
alignmentPattern = NoneAligned; alignmentPattern = NoneAligned;
@@ -104,39 +134,47 @@ void ei_cache_friendly_product_colmajor_times_vector(
} }
else else
{ {
skipColumns = std::min(skipColumns,rhs.size()); skipColumns = std::min(skipColumns,cols);
// note that the skiped columns are processed later. // note that the skiped columns are processed later.
} }
ei_internal_assert( (alignmentPattern==NoneAligned) eigen_internal_assert( (alignmentPattern==NoneAligned)
|| (skipColumns + columnsAtOnce >= rhs.size()) || (skipColumns + columnsAtOnce >= cols)
|| PacketSize > size || LhsPacketSize > size
|| (size_t(lhs+alignedStart+lhsStride*skipColumns)%sizeof(Packet))==0); || (size_t(lhs+alignedStart+lhsStride*skipColumns)%sizeof(LhsPacket))==0);
}
else if(Vectorizable)
{
alignedStart = 0;
alignedSize = size;
alignmentPattern = AllAligned;
} }
Index offset1 = (FirstAligned && alignmentStep==1?3:1); Index offset1 = (FirstAligned && alignmentStep==1?3:1);
Index offset3 = (FirstAligned && alignmentStep==1?1:3); Index offset3 = (FirstAligned && alignmentStep==1?1:3);
Index columnBound = ((rhs.size()-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns; Index columnBound = ((cols-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns;
for (Index i=skipColumns; i<columnBound; i+=columnsAtOnce) for (Index i=skipColumns; i<columnBound; i+=columnsAtOnce)
{ {
Packet ptmp0 = ei_pset1(alpha*rhs[i]), ptmp1 = ei_pset1(alpha*rhs[i+offset1]), RhsPacket ptmp0 = pset1<RhsPacket>(alpha*rhs[i*rhsIncr]),
ptmp2 = ei_pset1(alpha*rhs[i+2]), ptmp3 = ei_pset1(alpha*rhs[i+offset3]); ptmp1 = pset1<RhsPacket>(alpha*rhs[(i+offset1)*rhsIncr]),
ptmp2 = pset1<RhsPacket>(alpha*rhs[(i+2)*rhsIncr]),
ptmp3 = pset1<RhsPacket>(alpha*rhs[(i+offset3)*rhsIncr]);
// this helps a lot generating better binary code // this helps a lot generating better binary code
const Scalar *lhs0 = lhs + i*lhsStride, *lhs1 = lhs + (i+offset1)*lhsStride, const LhsScalar *lhs0 = lhs + i*lhsStride, *lhs1 = lhs + (i+offset1)*lhsStride,
*lhs2 = lhs + (i+2)*lhsStride, *lhs3 = lhs + (i+offset3)*lhsStride; *lhs2 = lhs + (i+2)*lhsStride, *lhs3 = lhs + (i+offset3)*lhsStride;
if (PacketSize>1) if (Vectorizable)
{ {
/* explicit vectorization */ /* explicit vectorization */
// process initial unaligned coeffs // process initial unaligned coeffs
for (Index j=0; j<alignedStart; ++j) for (Index j=0; j<alignedStart; ++j)
{ {
res[j] = cj.pmadd(lhs0[j], ei_pfirst(ptmp0), res[j]); res[j] = cj.pmadd(lhs0[j], pfirst(ptmp0), res[j]);
res[j] = cj.pmadd(lhs1[j], ei_pfirst(ptmp1), res[j]); res[j] = cj.pmadd(lhs1[j], pfirst(ptmp1), res[j]);
res[j] = cj.pmadd(lhs2[j], ei_pfirst(ptmp2), res[j]); res[j] = cj.pmadd(lhs2[j], pfirst(ptmp2), res[j]);
res[j] = cj.pmadd(lhs3[j], ei_pfirst(ptmp3), res[j]); res[j] = cj.pmadd(lhs3[j], pfirst(ptmp3), res[j]);
} }
if (alignedSize>alignedStart) if (alignedSize>alignedStart)
@@ -144,51 +182,52 @@ void ei_cache_friendly_product_colmajor_times_vector(
switch(alignmentPattern) switch(alignmentPattern)
{ {
case AllAligned: case AllAligned:
for (Index j = alignedStart; j<alignedSize; j+=PacketSize) for (Index j = alignedStart; j<alignedSize; j+=ResPacketSize)
_EIGEN_ACCUMULATE_PACKETS(d,d,d); _EIGEN_ACCUMULATE_PACKETS(d,d,d);
break; break;
case EvenAligned: case EvenAligned:
for (Index j = alignedStart; j<alignedSize; j+=PacketSize) for (Index j = alignedStart; j<alignedSize; j+=ResPacketSize)
_EIGEN_ACCUMULATE_PACKETS(d,du,d); _EIGEN_ACCUMULATE_PACKETS(d,du,d);
break; break;
case FirstAligned: case FirstAligned:
if(peels>1) if(peels>1)
{ {
Packet A00, A01, A02, A03, A10, A11, A12, A13; LhsPacket A00, A01, A02, A03, A10, A11, A12, A13;
ResPacket T0, T1;
A01 = ei_pload(&lhs1[alignedStart-1]); A01 = pload<LhsPacket>(&lhs1[alignedStart-1]);
A02 = ei_pload(&lhs2[alignedStart-2]); A02 = pload<LhsPacket>(&lhs2[alignedStart-2]);
A03 = ei_pload(&lhs3[alignedStart-3]); A03 = pload<LhsPacket>(&lhs3[alignedStart-3]);
for (Index j = alignedStart; j<peeledSize; j+=peels*PacketSize) for (Index j = alignedStart; j<peeledSize; j+=peels*ResPacketSize)
{ {
A11 = ei_pload(&lhs1[j-1+PacketSize]); ei_palign<1>(A01,A11); A11 = pload<LhsPacket>(&lhs1[j-1+LhsPacketSize]); palign<1>(A01,A11);
A12 = ei_pload(&lhs2[j-2+PacketSize]); ei_palign<2>(A02,A12); A12 = pload<LhsPacket>(&lhs2[j-2+LhsPacketSize]); palign<2>(A02,A12);
A13 = ei_pload(&lhs3[j-3+PacketSize]); ei_palign<3>(A03,A13); A13 = pload<LhsPacket>(&lhs3[j-3+LhsPacketSize]); palign<3>(A03,A13);
A00 = ei_pload (&lhs0[j]); A00 = pload<LhsPacket>(&lhs0[j]);
A10 = ei_pload (&lhs0[j+PacketSize]); A10 = pload<LhsPacket>(&lhs0[j+LhsPacketSize]);
A00 = cj.pmadd(A00, ptmp0, ei_pload(&res[j])); T0 = pcj.pmadd(A00, ptmp0, pload<ResPacket>(&res[j]));
A10 = cj.pmadd(A10, ptmp0, ei_pload(&res[j+PacketSize])); T1 = pcj.pmadd(A10, ptmp0, pload<ResPacket>(&res[j+ResPacketSize]));
A00 = cj.pmadd(A01, ptmp1, A00); T0 = pcj.pmadd(A01, ptmp1, T0);
A01 = ei_pload(&lhs1[j-1+2*PacketSize]); ei_palign<1>(A11,A01); A01 = pload<LhsPacket>(&lhs1[j-1+2*LhsPacketSize]); palign<1>(A11,A01);
A00 = cj.pmadd(A02, ptmp2, A00); T0 = pcj.pmadd(A02, ptmp2, T0);
A02 = ei_pload(&lhs2[j-2+2*PacketSize]); ei_palign<2>(A12,A02); A02 = pload<LhsPacket>(&lhs2[j-2+2*LhsPacketSize]); palign<2>(A12,A02);
A00 = cj.pmadd(A03, ptmp3, A00); T0 = pcj.pmadd(A03, ptmp3, T0);
ei_pstore(&res[j],A00); pstore(&res[j],T0);
A03 = ei_pload(&lhs3[j-3+2*PacketSize]); ei_palign<3>(A13,A03); A03 = pload<LhsPacket>(&lhs3[j-3+2*LhsPacketSize]); palign<3>(A13,A03);
A10 = cj.pmadd(A11, ptmp1, A10); T1 = pcj.pmadd(A11, ptmp1, T1);
A10 = cj.pmadd(A12, ptmp2, A10); T1 = pcj.pmadd(A12, ptmp2, T1);
A10 = cj.pmadd(A13, ptmp3, A10); T1 = pcj.pmadd(A13, ptmp3, T1);
ei_pstore(&res[j+PacketSize],A10); pstore(&res[j+ResPacketSize],T1);
} }
} }
for (Index j = peeledSize; j<alignedSize; j+=PacketSize) for (Index j = peeledSize; j<alignedSize; j+=ResPacketSize)
_EIGEN_ACCUMULATE_PACKETS(d,du,du); _EIGEN_ACCUMULATE_PACKETS(d,du,du);
break; break;
default: default:
for (Index j = alignedStart; j<alignedSize; j+=PacketSize) for (Index j = alignedStart; j<alignedSize; j+=ResPacketSize)
_EIGEN_ACCUMULATE_PACKETS(du,du,du); _EIGEN_ACCUMULATE_PACKETS(du,du,du);
break; break;
} }
@@ -198,42 +237,41 @@ void ei_cache_friendly_product_colmajor_times_vector(
/* process remaining coeffs (or all if there is no explicit vectorization) */ /* process remaining coeffs (or all if there is no explicit vectorization) */
for (Index j=alignedSize; j<size; ++j) for (Index j=alignedSize; j<size; ++j)
{ {
res[j] = cj.pmadd(lhs0[j], ei_pfirst(ptmp0), res[j]); res[j] = cj.pmadd(lhs0[j], pfirst(ptmp0), res[j]);
res[j] = cj.pmadd(lhs1[j], ei_pfirst(ptmp1), res[j]); res[j] = cj.pmadd(lhs1[j], pfirst(ptmp1), res[j]);
res[j] = cj.pmadd(lhs2[j], ei_pfirst(ptmp2), res[j]); res[j] = cj.pmadd(lhs2[j], pfirst(ptmp2), res[j]);
res[j] = cj.pmadd(lhs3[j], ei_pfirst(ptmp3), res[j]); res[j] = cj.pmadd(lhs3[j], pfirst(ptmp3), res[j]);
} }
} }
// process remaining first and last columns (at most columnsAtOnce-1) // process remaining first and last columns (at most columnsAtOnce-1)
Index end = rhs.size(); Index end = cols;
Index start = columnBound; Index start = columnBound;
do do
{ {
for (Index i=start; i<end; ++i) for (Index k=start; k<end; ++k)
{ {
Packet ptmp0 = ei_pset1(alpha*rhs[i]); RhsPacket ptmp0 = pset1<RhsPacket>(alpha*rhs[k*rhsIncr]);
const Scalar* lhs0 = lhs + i*lhsStride; const LhsScalar* lhs0 = lhs + k*lhsStride;
if (PacketSize>1) if (Vectorizable)
{ {
/* explicit vectorization */ /* explicit vectorization */
// process first unaligned result's coeffs // process first unaligned result's coeffs
for (Index j=0; j<alignedStart; ++j) for (Index j=0; j<alignedStart; ++j)
res[j] += cj.pmul(lhs0[j], ei_pfirst(ptmp0)); res[j] += cj.pmul(lhs0[j], pfirst(ptmp0));
// process aligned result's coeffs // process aligned result's coeffs
if ((size_t(lhs0+alignedStart)%sizeof(Packet))==0) if ((size_t(lhs0+alignedStart)%sizeof(LhsPacket))==0)
for (Index j = alignedStart;j<alignedSize;j+=PacketSize) for (Index i = alignedStart;i<alignedSize;i+=ResPacketSize)
ei_pstore(&res[j], cj.pmadd(ei_pload(&lhs0[j]), ptmp0, ei_pload(&res[j]))); pstore(&res[i], pcj.pmadd(ploadu<LhsPacket>(&lhs0[i]), ptmp0, pload<ResPacket>(&res[i])));
else else
for (Index j = alignedStart;j<alignedSize;j+=PacketSize) for (Index i = alignedStart;i<alignedSize;i+=ResPacketSize)
ei_pstore(&res[j], cj.pmadd(ei_ploadu(&lhs0[j]), ptmp0, ei_pload(&res[j]))); pstore(&res[i], pcj.pmadd(ploadu<LhsPacket>(&lhs0[i]), ptmp0, pload<ResPacket>(&res[i])));
} }
// process remaining scalars (or all if no explicit vectorization) // process remaining scalars (or all if no explicit vectorization)
for (Index j=alignedSize; j<size; ++j) for (Index i=alignedSize; i<size; ++i)
res[j] += cj.pmul(lhs0[j], ei_pfirst(ptmp0)); res[i] += cj.pmul(lhs0[i], pfirst(ptmp0));
} }
if (skipColumns) if (skipColumns)
{ {
@@ -243,73 +281,104 @@ void ei_cache_friendly_product_colmajor_times_vector(
} }
else else
break; break;
} while(PacketSize>1); } while(Vectorizable);
#undef _EIGEN_ACCUMULATE_PACKETS #undef _EIGEN_ACCUMULATE_PACKETS
} }
};
// TODO add peeling to mask unaligned load/stores /* Optimized row-major matrix * vector product:
template<bool ConjugateLhs, bool ConjugateRhs, typename Scalar, typename Index, typename ResType> * This algorithm processes 4 rows at onces that allows to both reduce
static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector( * the number of load/stores of the result by a factor 4 and to reduce
const Scalar* lhs, Index lhsStride, * the instruction dependency. Moreover, we know that all bands have the
const Scalar* rhs, Index rhsSize, * same alignment pattern.
ResType& res, *
Scalar alpha) * Mixing type logic:
* - alpha is always a complex (or converted to a complex)
* - no vectorization
*/
template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
struct general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjugateLhs,RhsScalar,ConjugateRhs>
{ {
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
enum {
Vectorizable = packet_traits<LhsScalar>::Vectorizable && packet_traits<RhsScalar>::Vectorizable
&& int(packet_traits<LhsScalar>::size)==int(packet_traits<RhsScalar>::size),
LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1
};
typedef typename packet_traits<LhsScalar>::type _LhsPacket;
typedef typename packet_traits<RhsScalar>::type _RhsPacket;
typedef typename packet_traits<ResScalar>::type _ResPacket;
typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
EIGEN_DONT_INLINE static void run(
Index rows, Index cols,
const LhsScalar* lhs, Index lhsStride,
const RhsScalar* rhs, Index rhsIncr,
ResScalar* res, Index resIncr,
ResScalar alpha)
{
EIGEN_UNUSED_VARIABLE(rhsIncr);
eigen_internal_assert(rhsIncr==1);
#ifdef _EIGEN_ACCUMULATE_PACKETS #ifdef _EIGEN_ACCUMULATE_PACKETS
#error _EIGEN_ACCUMULATE_PACKETS has already been defined #error _EIGEN_ACCUMULATE_PACKETS has already been defined
#endif #endif
#define _EIGEN_ACCUMULATE_PACKETS(A0,A13,A2) {\ #define _EIGEN_ACCUMULATE_PACKETS(A0,A13,A2) {\
Packet b = ei_pload(&rhs[j]); \ RhsPacket b = pload<RhsPacket>(&rhs[j]); \
ptmp0 = cj.pmadd(EIGEN_CAT(ei_ploa,A0) (&lhs0[j]), b, ptmp0); \ ptmp0 = pcj.pmadd(EIGEN_CAT(ploa,A0) <LhsPacket>(&lhs0[j]), b, ptmp0); \
ptmp1 = cj.pmadd(EIGEN_CAT(ei_ploa,A13)(&lhs1[j]), b, ptmp1); \ ptmp1 = pcj.pmadd(EIGEN_CAT(ploa,A13)<LhsPacket>(&lhs1[j]), b, ptmp1); \
ptmp2 = cj.pmadd(EIGEN_CAT(ei_ploa,A2) (&lhs2[j]), b, ptmp2); \ ptmp2 = pcj.pmadd(EIGEN_CAT(ploa,A2) <LhsPacket>(&lhs2[j]), b, ptmp2); \
ptmp3 = cj.pmadd(EIGEN_CAT(ei_ploa,A13)(&lhs3[j]), b, ptmp3); } ptmp3 = pcj.pmadd(EIGEN_CAT(ploa,A13)<LhsPacket>(&lhs3[j]), b, ptmp3); }
ei_conj_helper<ConjugateLhs,ConjugateRhs> cj; conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;
conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj;
typedef typename NumTraits<Scalar>::Real RealScalar;
typedef typename ei_packet_traits<Scalar>::type Packet;
const Index PacketSize = sizeof(Packet)/sizeof(Scalar);
enum { AllAligned=0, EvenAligned=1, FirstAligned=2, NoneAligned=3 }; enum { AllAligned=0, EvenAligned=1, FirstAligned=2, NoneAligned=3 };
const Index rowsAtOnce = 4; const Index rowsAtOnce = 4;
const Index peels = 2; const Index peels = 2;
const Index PacketAlignedMask = PacketSize-1; const Index RhsPacketAlignedMask = RhsPacketSize-1;
const Index PeelAlignedMask = PacketSize*peels-1; const Index LhsPacketAlignedMask = LhsPacketSize-1;
const Index size = rhsSize; const Index PeelAlignedMask = RhsPacketSize*peels-1;
const Index depth = cols;
// How many coeffs of the result do we have to skip to be aligned. // How many coeffs of the result do we have to skip to be aligned.
// Here we assume data are at least aligned on the base scalar type // Here we assume data are at least aligned on the base scalar type
// if that's not the case then vectorization is discarded, see below. // if that's not the case then vectorization is discarded, see below.
Index alignedStart = ei_first_aligned(rhs, size); Index alignedStart = first_aligned(rhs, depth);
Index alignedSize = PacketSize>1 ? alignedStart + ((size-alignedStart) & ~PacketAlignedMask) : 0; Index alignedSize = RhsPacketSize>1 ? alignedStart + ((depth-alignedStart) & ~RhsPacketAlignedMask) : 0;
const Index peeledSize = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart; const Index peeledSize = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart;
const Index alignmentStep = PacketSize>1 ? (PacketSize - lhsStride % PacketSize) & PacketAlignedMask : 0; const Index alignmentStep = LhsPacketSize>1 ? (LhsPacketSize - lhsStride % LhsPacketSize) & LhsPacketAlignedMask : 0;
Index alignmentPattern = alignmentStep==0 ? AllAligned Index alignmentPattern = alignmentStep==0 ? AllAligned
: alignmentStep==(PacketSize/2) ? EvenAligned : alignmentStep==(LhsPacketSize/2) ? EvenAligned
: FirstAligned; : FirstAligned;
// we cannot assume the first element is aligned because of sub-matrices // we cannot assume the first element is aligned because of sub-matrices
const Index lhsAlignmentOffset = ei_first_aligned(lhs,size); const Index lhsAlignmentOffset = first_aligned(lhs,depth);
// find how many rows do we have to skip to be aligned with rhs (if possible) // find how many rows do we have to skip to be aligned with rhs (if possible)
Index skipRows = 0; Index skipRows = 0;
// if the data cannot be aligned (TODO add some compile time tests when possible, e.g. for floats) // if the data cannot be aligned (TODO add some compile time tests when possible, e.g. for floats)
if( (size_t(lhs)%sizeof(RealScalar)) || (size_t(rhs)%sizeof(RealScalar)) ) if( (sizeof(LhsScalar)!=sizeof(RhsScalar)) || (size_t(lhs)%sizeof(LhsScalar)) || (size_t(rhs)%sizeof(RhsScalar)) )
{ {
alignedSize = 0; alignedSize = 0;
alignedStart = 0; alignedStart = 0;
} }
else if (PacketSize>1) else if (LhsPacketSize>1)
{ {
ei_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(Packet)==0 || size<PacketSize); eigen_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || depth<LhsPacketSize);
while (skipRows<PacketSize && while (skipRows<LhsPacketSize &&
alignedStart != ((lhsAlignmentOffset + alignmentStep*skipRows)%PacketSize)) alignedStart != ((lhsAlignmentOffset + alignmentStep*skipRows)%LhsPacketSize))
++skipRows; ++skipRows;
if (skipRows==PacketSize) if (skipRows==LhsPacketSize)
{ {
// nothing can be aligned, no need to skip any column // nothing can be aligned, no need to skip any column
alignmentPattern = NoneAligned; alignmentPattern = NoneAligned;
@@ -317,38 +386,46 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector(
} }
else else
{ {
skipRows = std::min(skipRows,Index(res.size())); skipRows = std::min(skipRows,Index(rows));
// note that the skiped columns are processed later. // note that the skiped columns are processed later.
} }
ei_internal_assert( alignmentPattern==NoneAligned eigen_internal_assert( alignmentPattern==NoneAligned
|| PacketSize==1 || LhsPacketSize==1
|| (skipRows + rowsAtOnce >= res.size()) || (skipRows + rowsAtOnce >= rows)
|| PacketSize > rhsSize || LhsPacketSize > depth
|| (size_t(lhs+alignedStart+lhsStride*skipRows)%sizeof(Packet))==0); || (size_t(lhs+alignedStart+lhsStride*skipRows)%sizeof(LhsPacket))==0);
}
else if(Vectorizable)
{
alignedStart = 0;
alignedSize = depth;
alignmentPattern = AllAligned;
} }
Index offset1 = (FirstAligned && alignmentStep==1?3:1); Index offset1 = (FirstAligned && alignmentStep==1?3:1);
Index offset3 = (FirstAligned && alignmentStep==1?1:3); Index offset3 = (FirstAligned && alignmentStep==1?1:3);
Index rowBound = ((res.size()-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows; Index rowBound = ((rows-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows;
for (Index i=skipRows; i<rowBound; i+=rowsAtOnce) for (Index i=skipRows; i<rowBound; i+=rowsAtOnce)
{ {
Scalar tmp0 = Scalar(0), tmp1 = Scalar(0), tmp2 = Scalar(0), tmp3 = Scalar(0); EIGEN_ALIGN16 ResScalar tmp0 = ResScalar(0);
ResScalar tmp1 = ResScalar(0), tmp2 = ResScalar(0), tmp3 = ResScalar(0);
// this helps the compiler generating good binary code // this helps the compiler generating good binary code
const Scalar *lhs0 = lhs + i*lhsStride, *lhs1 = lhs + (i+offset1)*lhsStride, const LhsScalar *lhs0 = lhs + i*lhsStride, *lhs1 = lhs + (i+offset1)*lhsStride,
*lhs2 = lhs + (i+2)*lhsStride, *lhs3 = lhs + (i+offset3)*lhsStride; *lhs2 = lhs + (i+2)*lhsStride, *lhs3 = lhs + (i+offset3)*lhsStride;
if (PacketSize>1) if (Vectorizable)
{ {
/* explicit vectorization */ /* explicit vectorization */
Packet ptmp0 = ei_pset1(Scalar(0)), ptmp1 = ei_pset1(Scalar(0)), ptmp2 = ei_pset1(Scalar(0)), ptmp3 = ei_pset1(Scalar(0)); ResPacket ptmp0 = pset1<ResPacket>(ResScalar(0)), ptmp1 = pset1<ResPacket>(ResScalar(0)),
ptmp2 = pset1<ResPacket>(ResScalar(0)), ptmp3 = pset1<ResPacket>(ResScalar(0));
// process initial unaligned coeffs // process initial unaligned coeffs
// FIXME this loop get vectorized by the compiler ! // FIXME this loop get vectorized by the compiler !
for (Index j=0; j<alignedStart; ++j) for (Index j=0; j<alignedStart; ++j)
{ {
Scalar b = rhs[j]; RhsScalar b = rhs[j];
tmp0 += cj.pmul(lhs0[j],b); tmp1 += cj.pmul(lhs1[j],b); tmp0 += cj.pmul(lhs0[j],b); tmp1 += cj.pmul(lhs1[j],b);
tmp2 += cj.pmul(lhs2[j],b); tmp3 += cj.pmul(lhs3[j],b); tmp2 += cj.pmul(lhs2[j],b); tmp3 += cj.pmul(lhs3[j],b);
} }
@@ -358,11 +435,11 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector(
switch(alignmentPattern) switch(alignmentPattern)
{ {
case AllAligned: case AllAligned:
for (Index j = alignedStart; j<alignedSize; j+=PacketSize) for (Index j = alignedStart; j<alignedSize; j+=RhsPacketSize)
_EIGEN_ACCUMULATE_PACKETS(d,d,d); _EIGEN_ACCUMULATE_PACKETS(d,d,d);
break; break;
case EvenAligned: case EvenAligned:
for (Index j = alignedStart; j<alignedSize; j+=PacketSize) for (Index j = alignedStart; j<alignedSize; j+=RhsPacketSize)
_EIGEN_ACCUMULATE_PACKETS(d,du,d); _EIGEN_ACCUMULATE_PACKETS(d,du,d);
break; break;
case FirstAligned: case FirstAligned:
@@ -374,69 +451,72 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector(
* overlaping the desired unaligned packet. This is *much* more efficient * overlaping the desired unaligned packet. This is *much* more efficient
* than basic unaligned loads. * than basic unaligned loads.
*/ */
Packet A01, A02, A03, b, A11, A12, A13; LhsPacket A01, A02, A03, A11, A12, A13;
A01 = ei_pload(&lhs1[alignedStart-1]); A01 = pload<LhsPacket>(&lhs1[alignedStart-1]);
A02 = ei_pload(&lhs2[alignedStart-2]); A02 = pload<LhsPacket>(&lhs2[alignedStart-2]);
A03 = ei_pload(&lhs3[alignedStart-3]); A03 = pload<LhsPacket>(&lhs3[alignedStart-3]);
for (Index j = alignedStart; j<peeledSize; j+=peels*PacketSize) for (Index j = alignedStart; j<peeledSize; j+=peels*RhsPacketSize)
{ {
b = ei_pload(&rhs[j]); RhsPacket b = pload<RhsPacket>(&rhs[j]);
A11 = ei_pload(&lhs1[j-1+PacketSize]); ei_palign<1>(A01,A11); A11 = pload<LhsPacket>(&lhs1[j-1+LhsPacketSize]); palign<1>(A01,A11);
A12 = ei_pload(&lhs2[j-2+PacketSize]); ei_palign<2>(A02,A12); A12 = pload<LhsPacket>(&lhs2[j-2+LhsPacketSize]); palign<2>(A02,A12);
A13 = ei_pload(&lhs3[j-3+PacketSize]); ei_palign<3>(A03,A13); A13 = pload<LhsPacket>(&lhs3[j-3+LhsPacketSize]); palign<3>(A03,A13);
ptmp0 = cj.pmadd(ei_pload (&lhs0[j]), b, ptmp0); ptmp0 = pcj.pmadd(pload<LhsPacket>(&lhs0[j]), b, ptmp0);
ptmp1 = cj.pmadd(A01, b, ptmp1); ptmp1 = pcj.pmadd(A01, b, ptmp1);
A01 = ei_pload(&lhs1[j-1+2*PacketSize]); ei_palign<1>(A11,A01); A01 = pload<LhsPacket>(&lhs1[j-1+2*LhsPacketSize]); palign<1>(A11,A01);
ptmp2 = cj.pmadd(A02, b, ptmp2); ptmp2 = pcj.pmadd(A02, b, ptmp2);
A02 = ei_pload(&lhs2[j-2+2*PacketSize]); ei_palign<2>(A12,A02); A02 = pload<LhsPacket>(&lhs2[j-2+2*LhsPacketSize]); palign<2>(A12,A02);
ptmp3 = cj.pmadd(A03, b, ptmp3); ptmp3 = pcj.pmadd(A03, b, ptmp3);
A03 = ei_pload(&lhs3[j-3+2*PacketSize]); ei_palign<3>(A13,A03); A03 = pload<LhsPacket>(&lhs3[j-3+2*LhsPacketSize]); palign<3>(A13,A03);
b = ei_pload(&rhs[j+PacketSize]); b = pload<RhsPacket>(&rhs[j+RhsPacketSize]);
ptmp0 = cj.pmadd(ei_pload (&lhs0[j+PacketSize]), b, ptmp0); ptmp0 = pcj.pmadd(pload<LhsPacket>(&lhs0[j+LhsPacketSize]), b, ptmp0);
ptmp1 = cj.pmadd(A11, b, ptmp1); ptmp1 = pcj.pmadd(A11, b, ptmp1);
ptmp2 = cj.pmadd(A12, b, ptmp2); ptmp2 = pcj.pmadd(A12, b, ptmp2);
ptmp3 = cj.pmadd(A13, b, ptmp3); ptmp3 = pcj.pmadd(A13, b, ptmp3);
} }
} }
for (Index j = peeledSize; j<alignedSize; j+=PacketSize) for (Index j = peeledSize; j<alignedSize; j+=RhsPacketSize)
_EIGEN_ACCUMULATE_PACKETS(d,du,du); _EIGEN_ACCUMULATE_PACKETS(d,du,du);
break; break;
default: default:
for (Index j = alignedStart; j<alignedSize; j+=PacketSize) for (Index j = alignedStart; j<alignedSize; j+=RhsPacketSize)
_EIGEN_ACCUMULATE_PACKETS(du,du,du); _EIGEN_ACCUMULATE_PACKETS(du,du,du);
break; break;
} }
tmp0 += ei_predux(ptmp0); tmp0 += predux(ptmp0);
tmp1 += ei_predux(ptmp1); tmp1 += predux(ptmp1);
tmp2 += ei_predux(ptmp2); tmp2 += predux(ptmp2);
tmp3 += ei_predux(ptmp3); tmp3 += predux(ptmp3);
} }
} // end explicit vectorization } // end explicit vectorization
// process remaining coeffs (or all if no explicit vectorization) // process remaining coeffs (or all if no explicit vectorization)
// FIXME this loop get vectorized by the compiler ! // FIXME this loop get vectorized by the compiler !
for (Index j=alignedSize; j<size; ++j) for (Index j=alignedSize; j<depth; ++j)
{ {
Scalar b = rhs[j]; RhsScalar b = rhs[j];
tmp0 += cj.pmul(lhs0[j],b); tmp1 += cj.pmul(lhs1[j],b); tmp0 += cj.pmul(lhs0[j],b); tmp1 += cj.pmul(lhs1[j],b);
tmp2 += cj.pmul(lhs2[j],b); tmp3 += cj.pmul(lhs3[j],b); tmp2 += cj.pmul(lhs2[j],b); tmp3 += cj.pmul(lhs3[j],b);
} }
res[i] += alpha*tmp0; res[i+offset1] += alpha*tmp1; res[i+2] += alpha*tmp2; res[i+offset3] += alpha*tmp3; res[i*resIncr] += alpha*tmp0;
res[(i+offset1)*resIncr] += alpha*tmp1;
res[(i+2)*resIncr] += alpha*tmp2;
res[(i+offset3)*resIncr] += alpha*tmp3;
} }
// process remaining first and last rows (at most columnsAtOnce-1) // process remaining first and last rows (at most columnsAtOnce-1)
Index end = res.size(); Index end = rows;
Index start = rowBound; Index start = rowBound;
do do
{ {
for (Index i=start; i<end; ++i) for (Index i=start; i<end; ++i)
{ {
Scalar tmp0 = Scalar(0); EIGEN_ALIGN16 ResScalar tmp0 = ResScalar(0);
Packet ptmp0 = ei_pset1(tmp0); ResPacket ptmp0 = pset1<ResPacket>(tmp0);
const Scalar* lhs0 = lhs + i*lhsStride; const LhsScalar* lhs0 = lhs + i*lhsStride;
// process first unaligned result's coeffs // process first unaligned result's coeffs
// FIXME this loop get vectorized by the compiler ! // FIXME this loop get vectorized by the compiler !
for (Index j=0; j<alignedStart; ++j) for (Index j=0; j<alignedStart; ++j)
@@ -445,20 +525,20 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector(
if (alignedSize>alignedStart) if (alignedSize>alignedStart)
{ {
// process aligned rhs coeffs // process aligned rhs coeffs
if ((size_t(lhs0+alignedStart)%sizeof(Packet))==0) if ((size_t(lhs0+alignedStart)%sizeof(LhsPacket))==0)
for (Index j = alignedStart;j<alignedSize;j+=PacketSize) for (Index j = alignedStart;j<alignedSize;j+=RhsPacketSize)
ptmp0 = cj.pmadd(ei_pload(&lhs0[j]), ei_pload(&rhs[j]), ptmp0); ptmp0 = pcj.pmadd(pload<LhsPacket>(&lhs0[j]), pload<RhsPacket>(&rhs[j]), ptmp0);
else else
for (Index j = alignedStart;j<alignedSize;j+=PacketSize) for (Index j = alignedStart;j<alignedSize;j+=RhsPacketSize)
ptmp0 = cj.pmadd(ei_ploadu(&lhs0[j]), ei_pload(&rhs[j]), ptmp0); ptmp0 = pcj.pmadd(ploadu<LhsPacket>(&lhs0[j]), pload<RhsPacket>(&rhs[j]), ptmp0);
tmp0 += ei_predux(ptmp0); tmp0 += predux(ptmp0);
} }
// process remaining scalars // process remaining scalars
// FIXME this loop get vectorized by the compiler ! // FIXME this loop get vectorized by the compiler !
for (Index j=alignedSize; j<size; ++j) for (Index j=alignedSize; j<depth; ++j)
tmp0 += cj.pmul(lhs0[j], rhs[j]); tmp0 += cj.pmul(lhs0[j], rhs[j]);
res[i] += alpha*tmp0; res[i*resIncr] += alpha*tmp0;
} }
if (skipRows) if (skipRows)
{ {
@@ -468,9 +548,12 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector(
} }
else else
break; break;
} while(PacketSize>1); } while(Vectorizable);
#undef _EIGEN_ACCUMULATE_PACKETS #undef _EIGEN_ACCUMULATE_PACKETS
} }
};
} // end namespace internal
#endif // EIGEN_GENERAL_MATRIX_VECTOR_H #endif // EIGEN_GENERAL_MATRIX_VECTOR_H

View File

@@ -25,19 +25,21 @@
#ifndef EIGEN_PARALLELIZER_H #ifndef EIGEN_PARALLELIZER_H
#define EIGEN_PARALLELIZER_H #define EIGEN_PARALLELIZER_H
namespace internal {
/** \internal */ /** \internal */
inline void ei_manage_multi_threading(Action action, int* v) inline void manage_multi_threading(Action action, int* v)
{ {
static int m_maxThreads = -1; static EIGEN_UNUSED int m_maxThreads = -1;
if(action==SetAction) if(action==SetAction)
{ {
ei_internal_assert(v!=0); eigen_internal_assert(v!=0);
m_maxThreads = *v; m_maxThreads = *v;
} }
else if(action==GetAction) else if(action==GetAction)
{ {
ei_internal_assert(v!=0); eigen_internal_assert(v!=0);
#ifdef EIGEN_HAS_OPENMP #ifdef EIGEN_HAS_OPENMP
if(m_maxThreads>0) if(m_maxThreads>0)
*v = m_maxThreads; *v = m_maxThreads;
@@ -49,7 +51,7 @@ inline void ei_manage_multi_threading(Action action, int* v)
} }
else else
{ {
ei_internal_assert(false); eigen_internal_assert(false);
} }
} }
@@ -58,7 +60,7 @@ inline void ei_manage_multi_threading(Action action, int* v)
inline int nbThreads() inline int nbThreads()
{ {
int ret; int ret;
ei_manage_multi_threading(GetAction, &ret); manage_multi_threading(GetAction, &ret);
return ret; return ret;
} }
@@ -66,7 +68,7 @@ inline int nbThreads()
* \sa nbThreads */ * \sa nbThreads */
inline void setNbThreads(int v) inline void setNbThreads(int v)
{ {
ei_manage_multi_threading(SetAction, &v); manage_multi_threading(SetAction, &v);
} }
template<typename Index> struct GemmParallelInfo template<typename Index> struct GemmParallelInfo
@@ -81,7 +83,7 @@ template<typename Index> struct GemmParallelInfo
}; };
template<bool Condition, typename Functor, typename Index> template<bool Condition, typename Functor, typename Index>
void ei_parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpose) void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpose)
{ {
#ifndef EIGEN_HAS_OPENMP #ifndef EIGEN_HAS_OPENMP
// FIXME the transpose variable is only needed to properly split // FIXME the transpose variable is only needed to properly split
@@ -122,7 +124,7 @@ void ei_parallelize_gemm(const Functor& func, Index rows, Index cols, bool trans
Index blockCols = (cols / threads) & ~Index(0x3); Index blockCols = (cols / threads) & ~Index(0x3);
Index blockRows = (rows / threads) & ~Index(0x7); Index blockRows = (rows / threads) & ~Index(0x7);
GemmParallelInfo<Index>* info = new GemmParallelInfo<Index>[threads]; GemmParallelInfo<Index>* info = new GemmParallelInfo<Index>[threads];
#pragma omp parallel for schedule(static,1) num_threads(threads) #pragma omp parallel for schedule(static,1) num_threads(threads)
@@ -147,4 +149,6 @@ void ei_parallelize_gemm(const Functor& func, Index rows, Index cols, bool trans
#endif #endif
} }
} // end namespace internal
#endif // EIGEN_PARALLELIZER_H #endif // EIGEN_PARALLELIZER_H

View File

@@ -25,13 +25,14 @@
#ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_H #ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_H
#define EIGEN_SELFADJOINT_MATRIX_MATRIX_H #define EIGEN_SELFADJOINT_MATRIX_MATRIX_H
namespace internal {
// pack a selfadjoint block diagonal for use with the gebp_kernel // pack a selfadjoint block diagonal for use with the gebp_kernel
template<typename Scalar, typename Index, int mr, int StorageOrder> template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder>
struct ei_symm_pack_lhs struct symm_pack_lhs
{ {
enum { PacketSize = ei_packet_traits<Scalar>::size };
template<int BlockRows> inline template<int BlockRows> inline
void pack(Scalar* blockA, const ei_const_blas_data_mapper<Scalar,Index,StorageOrder>& lhs, Index cols, Index i, Index& count) void pack(Scalar* blockA, const const_blas_data_mapper<Scalar,Index,StorageOrder>& lhs, Index cols, Index i, Index& count)
{ {
// normal copy // normal copy
for(Index k=0; k<i; k++) for(Index k=0; k<i; k++)
@@ -42,9 +43,9 @@ struct ei_symm_pack_lhs
for(Index k=i; k<i+BlockRows; k++) for(Index k=i; k<i+BlockRows; k++)
{ {
for(Index w=0; w<h; w++) for(Index w=0; w<h; w++)
blockA[count++] = ei_conj(lhs(k, i+w)); // transposed blockA[count++] = conj(lhs(k, i+w)); // transposed
blockA[count++] = ei_real(lhs(k,k)); // real (diagonal) blockA[count++] = real(lhs(k,k)); // real (diagonal)
for(Index w=h+1; w<BlockRows; w++) for(Index w=h+1; w<BlockRows; w++)
blockA[count++] = lhs(i+w, k); // normal blockA[count++] = lhs(i+w, k); // normal
@@ -53,22 +54,22 @@ struct ei_symm_pack_lhs
// transposed copy // transposed copy
for(Index k=i+BlockRows; k<cols; k++) for(Index k=i+BlockRows; k<cols; k++)
for(Index w=0; w<BlockRows; w++) for(Index w=0; w<BlockRows; w++)
blockA[count++] = ei_conj(lhs(k, i+w)); // transposed blockA[count++] = conj(lhs(k, i+w)); // transposed
} }
void operator()(Scalar* blockA, const Scalar* _lhs, Index lhsStride, Index cols, Index rows) void operator()(Scalar* blockA, const Scalar* _lhs, Index lhsStride, Index cols, Index rows)
{ {
ei_const_blas_data_mapper<Scalar,Index,StorageOrder> lhs(_lhs,lhsStride); const_blas_data_mapper<Scalar,Index,StorageOrder> lhs(_lhs,lhsStride);
Index count = 0; Index count = 0;
Index peeled_mc = (rows/mr)*mr; Index peeled_mc = (rows/Pack1)*Pack1;
for(Index i=0; i<peeled_mc; i+=mr) for(Index i=0; i<peeled_mc; i+=Pack1)
{ {
pack<mr>(blockA, lhs, cols, i, count); pack<Pack1>(blockA, lhs, cols, i, count);
} }
if(rows-peeled_mc>=PacketSize) if(rows-peeled_mc>=Pack2)
{ {
pack<PacketSize>(blockA, lhs, cols, peeled_mc, count); pack<Pack2>(blockA, lhs, cols, peeled_mc, count);
peeled_mc += PacketSize; peeled_mc += Pack2;
} }
// do the same with mr==1 // do the same with mr==1
@@ -77,23 +78,23 @@ struct ei_symm_pack_lhs
for(Index k=0; k<i; k++) for(Index k=0; k<i; k++)
blockA[count++] = lhs(i, k); // normal blockA[count++] = lhs(i, k); // normal
blockA[count++] = ei_real(lhs(i, i)); // real (diagonal) blockA[count++] = real(lhs(i, i)); // real (diagonal)
for(Index k=i+1; k<cols; k++) for(Index k=i+1; k<cols; k++)
blockA[count++] = ei_conj(lhs(k, i)); // transposed blockA[count++] = conj(lhs(k, i)); // transposed
} }
} }
}; };
template<typename Scalar, typename Index, int nr, int StorageOrder> template<typename Scalar, typename Index, int nr, int StorageOrder>
struct ei_symm_pack_rhs struct symm_pack_rhs
{ {
enum { PacketSize = ei_packet_traits<Scalar>::size }; enum { PacketSize = packet_traits<Scalar>::size };
void operator()(Scalar* blockB, const Scalar* _rhs, Index rhsStride, Scalar alpha, Index rows, Index cols, Index k2) void operator()(Scalar* blockB, const Scalar* _rhs, Index rhsStride, Index rows, Index cols, Index k2)
{ {
Index end_k = k2 + rows; Index end_k = k2 + rows;
Index count = 0; Index count = 0;
ei_const_blas_data_mapper<Scalar,Index,StorageOrder> rhs(_rhs,rhsStride); const_blas_data_mapper<Scalar,Index,StorageOrder> rhs(_rhs,rhsStride);
Index packet_cols = (cols/nr)*nr; Index packet_cols = (cols/nr)*nr;
// first part: normal case // first part: normal case
@@ -101,12 +102,12 @@ struct ei_symm_pack_rhs
{ {
for(Index k=k2; k<end_k; k++) for(Index k=k2; k<end_k; k++)
{ {
blockB[count+0] = alpha*rhs(k,j2+0); blockB[count+0] = rhs(k,j2+0);
blockB[count+1] = alpha*rhs(k,j2+1); blockB[count+1] = rhs(k,j2+1);
if (nr==4) if (nr==4)
{ {
blockB[count+2] = alpha*rhs(k,j2+2); blockB[count+2] = rhs(k,j2+2);
blockB[count+3] = alpha*rhs(k,j2+3); blockB[count+3] = rhs(k,j2+3);
} }
count += nr; count += nr;
} }
@@ -119,12 +120,12 @@ struct ei_symm_pack_rhs
// transpose // transpose
for(Index k=k2; k<j2; k++) for(Index k=k2; k<j2; k++)
{ {
blockB[count+0] = alpha*ei_conj(rhs(j2+0,k)); blockB[count+0] = conj(rhs(j2+0,k));
blockB[count+1] = alpha*ei_conj(rhs(j2+1,k)); blockB[count+1] = conj(rhs(j2+1,k));
if (nr==4) if (nr==4)
{ {
blockB[count+2] = alpha*ei_conj(rhs(j2+2,k)); blockB[count+2] = conj(rhs(j2+2,k));
blockB[count+3] = alpha*ei_conj(rhs(j2+3,k)); blockB[count+3] = conj(rhs(j2+3,k));
} }
count += nr; count += nr;
} }
@@ -134,25 +135,25 @@ struct ei_symm_pack_rhs
{ {
// normal // normal
for (Index w=0 ; w<h; ++w) for (Index w=0 ; w<h; ++w)
blockB[count+w] = alpha*rhs(k,j2+w); blockB[count+w] = rhs(k,j2+w);
blockB[count+h] = alpha*rhs(k,k); blockB[count+h] = real(rhs(k,k));
// transpose // transpose
for (Index w=h+1 ; w<nr; ++w) for (Index w=h+1 ; w<nr; ++w)
blockB[count+w] = alpha*ei_conj(rhs(j2+w,k)); blockB[count+w] = conj(rhs(j2+w,k));
count += nr; count += nr;
++h; ++h;
} }
// normal // normal
for(Index k=j2+nr; k<end_k; k++) for(Index k=j2+nr; k<end_k; k++)
{ {
blockB[count+0] = alpha*rhs(k,j2+0); blockB[count+0] = rhs(k,j2+0);
blockB[count+1] = alpha*rhs(k,j2+1); blockB[count+1] = rhs(k,j2+1);
if (nr==4) if (nr==4)
{ {
blockB[count+2] = alpha*rhs(k,j2+2); blockB[count+2] = rhs(k,j2+2);
blockB[count+3] = alpha*rhs(k,j2+3); blockB[count+3] = rhs(k,j2+3);
} }
count += nr; count += nr;
} }
@@ -163,12 +164,12 @@ struct ei_symm_pack_rhs
{ {
for(Index k=k2; k<end_k; k++) for(Index k=k2; k<end_k; k++)
{ {
blockB[count+0] = alpha*ei_conj(rhs(j2+0,k)); blockB[count+0] = conj(rhs(j2+0,k));
blockB[count+1] = alpha*ei_conj(rhs(j2+1,k)); blockB[count+1] = conj(rhs(j2+1,k));
if (nr==4) if (nr==4)
{ {
blockB[count+2] = alpha*ei_conj(rhs(j2+2,k)); blockB[count+2] = conj(rhs(j2+2,k));
blockB[count+3] = alpha*ei_conj(rhs(j2+3,k)); blockB[count+3] = conj(rhs(j2+3,k));
} }
count += nr; count += nr;
} }
@@ -181,13 +182,13 @@ struct ei_symm_pack_rhs
Index half = std::min(end_k,j2); Index half = std::min(end_k,j2);
for(Index k=k2; k<half; k++) for(Index k=k2; k<half; k++)
{ {
blockB[count] = alpha*ei_conj(rhs(j2,k)); blockB[count] = conj(rhs(j2,k));
count += 1; count += 1;
} }
if(half==j2 && half<k2+rows) if(half==j2 && half<k2+rows)
{ {
blockB[count] = alpha*ei_real(rhs(j2,j2)); blockB[count] = real(rhs(j2,j2));
count += 1; count += 1;
} }
else else
@@ -196,7 +197,7 @@ struct ei_symm_pack_rhs
// normal // normal
for(Index k=half+1; k<k2+rows; k++) for(Index k=half+1; k<k2+rows; k++)
{ {
blockB[count] = alpha*rhs(k,j2); blockB[count] = rhs(k,j2);
count += 1; count += 1;
} }
} }
@@ -210,12 +211,12 @@ template <typename Scalar, typename Index,
int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs, int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs, int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs,
int ResStorageOrder> int ResStorageOrder>
struct ei_product_selfadjoint_matrix; struct product_selfadjoint_matrix;
template <typename Scalar, typename Index, template <typename Scalar, typename Index,
int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs, int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs> int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs>
struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,ConjugateLhs, RhsStorageOrder,RhsSelfAdjoint,ConjugateRhs,RowMajor> struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,ConjugateLhs, RhsStorageOrder,RhsSelfAdjoint,ConjugateRhs,RowMajor>
{ {
static EIGEN_STRONG_INLINE void run( static EIGEN_STRONG_INLINE void run(
@@ -225,7 +226,7 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint
Scalar* res, Index resStride, Scalar* res, Index resStride,
Scalar alpha) Scalar alpha)
{ {
ei_product_selfadjoint_matrix<Scalar, Index, product_selfadjoint_matrix<Scalar, Index,
EIGEN_LOGICAL_XOR(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor, EIGEN_LOGICAL_XOR(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
RhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsSelfAdjoint,ConjugateRhs), RhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsSelfAdjoint,ConjugateRhs),
EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor, EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
@@ -238,7 +239,7 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint
template <typename Scalar, typename Index, template <typename Scalar, typename Index,
int LhsStorageOrder, bool ConjugateLhs, int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs> int RhsStorageOrder, bool ConjugateRhs>
struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor> struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor>
{ {
static EIGEN_DONT_INLINE void run( static EIGEN_DONT_INLINE void run(
@@ -250,15 +251,12 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,Conjugate
{ {
Index size = rows; Index size = rows;
ei_const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride); const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
ei_const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride); const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
if (ConjugateRhs) typedef gebp_traits<Scalar,Scalar> Traits;
alpha = ei_conj(alpha);
typedef ei_product_blocking_traits<Scalar> Blocking; Index kc = size; // cache block size along the K direction
Index kc = size; // cache block size along the K direction
Index mc = rows; // cache block size along the M direction Index mc = rows; // cache block size along the M direction
Index nc = cols; // cache block size along the N direction Index nc = cols; // cache block size along the N direction
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc); computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
@@ -266,14 +264,15 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,Conjugate
kc = std::min(kc,mc); kc = std::min(kc,mc);
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc); Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols; std::size_t sizeW = kc*Traits::WorkSpaceFactor;
std::size_t sizeB = sizeW + kc*cols;
Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB); Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB);
Scalar* blockB = allocatedBlockB + kc*Blocking::PacketSize*Blocking::nr; Scalar* blockB = allocatedBlockB + sizeW;
ei_gebp_kernel<Scalar, Index, Blocking::mr, Blocking::nr, ei_conj_helper<ConjugateLhs,ConjugateRhs> > gebp_kernel; gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
ei_symm_pack_lhs<Scalar, Index, Blocking::mr,LhsStorageOrder> pack_lhs; symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
ei_gemm_pack_rhs<Scalar, Index, Blocking::nr,RhsStorageOrder> pack_rhs; gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
ei_gemm_pack_lhs<Scalar, Index, Blocking::mr,LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed; gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed;
for(Index k2=0; k2<size; k2+=kc) for(Index k2=0; k2<size; k2+=kc)
{ {
@@ -282,7 +281,7 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,Conjugate
// we have selected one row panel of rhs and one column panel of lhs // we have selected one row panel of rhs and one column panel of lhs
// pack rhs's panel into a sequential chunk of memory // pack rhs's panel into a sequential chunk of memory
// and expand each coeff to a constant packet for further reuse // and expand each coeff to a constant packet for further reuse
pack_rhs(blockB, &rhs(k2,0), rhsStride, alpha, actual_kc, cols); pack_rhs(blockB, &rhs(k2,0), rhsStride, actual_kc, cols);
// the select lhs's panel has to be split in three different parts: // the select lhs's panel has to be split in three different parts:
// 1 - the transposed panel above the diagonal block => transposed packed copy // 1 - the transposed panel above the diagonal block => transposed packed copy
@@ -294,7 +293,7 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,Conjugate
// transposed packed copy // transposed packed copy
pack_lhs_transposed(blockA, &lhs(k2, i2), lhsStride, actual_kc, actual_mc); pack_lhs_transposed(blockA, &lhs(k2, i2), lhsStride, actual_kc, actual_mc);
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols); gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
} }
// the block diagonal // the block diagonal
{ {
@@ -302,16 +301,16 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,Conjugate
// symmetric packed copy // symmetric packed copy
pack_lhs(blockA, &lhs(k2,k2), lhsStride, actual_kc, actual_mc); pack_lhs(blockA, &lhs(k2,k2), lhsStride, actual_kc, actual_mc);
gebp_kernel(res+k2, resStride, blockA, blockB, actual_mc, actual_kc, cols); gebp_kernel(res+k2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
} }
for(Index i2=k2+kc; i2<size; i2+=mc) for(Index i2=k2+kc; i2<size; i2+=mc)
{ {
const Index actual_mc = std::min(i2+mc,size)-i2; const Index actual_mc = std::min(i2+mc,size)-i2;
ei_gemm_pack_lhs<Scalar, Index, Blocking::mr,LhsStorageOrder,false>() gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder,false>()
(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc); (blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols); gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
} }
} }
@@ -324,7 +323,7 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,Conjugate
template <typename Scalar, typename Index, template <typename Scalar, typename Index,
int LhsStorageOrder, bool ConjugateLhs, int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs> int RhsStorageOrder, bool ConjugateRhs>
struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor> struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor>
{ {
static EIGEN_DONT_INLINE void run( static EIGEN_DONT_INLINE void run(
@@ -336,12 +335,9 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,Conjugat
{ {
Index size = cols; Index size = cols;
ei_const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride); const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
if (ConjugateRhs) typedef gebp_traits<Scalar,Scalar> Traits;
alpha = ei_conj(alpha);
typedef ei_product_blocking_traits<Scalar> Blocking;
Index kc = size; // cache block size along the K direction Index kc = size; // cache block size along the K direction
Index mc = rows; // cache block size along the M direction Index mc = rows; // cache block size along the M direction
@@ -349,19 +345,20 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,Conjugat
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc); computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc); Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols; std::size_t sizeW = kc*Traits::WorkSpaceFactor;
std::size_t sizeB = sizeW + kc*cols;
Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB); Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB);
Scalar* blockB = allocatedBlockB + kc*Blocking::PacketSize*Blocking::nr; Scalar* blockB = allocatedBlockB + sizeW;
ei_gebp_kernel<Scalar, Index, Blocking::mr, Blocking::nr, ei_conj_helper<ConjugateLhs,ConjugateRhs> > gebp_kernel; gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
ei_gemm_pack_lhs<Scalar, Index, Blocking::mr,LhsStorageOrder> pack_lhs; gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
ei_symm_pack_rhs<Scalar, Index, Blocking::nr,RhsStorageOrder> pack_rhs; symm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
for(Index k2=0; k2<size; k2+=kc) for(Index k2=0; k2<size; k2+=kc)
{ {
const Index actual_kc = std::min(k2+kc,size)-k2; const Index actual_kc = std::min(k2+kc,size)-k2;
pack_rhs(blockB, _rhs, rhsStride, alpha, actual_kc, cols, k2); pack_rhs(blockB, _rhs, rhsStride, actual_kc, cols, k2);
// => GEPP // => GEPP
for(Index i2=0; i2<rows; i2+=mc) for(Index i2=0; i2<rows; i2+=mc)
@@ -369,7 +366,7 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,Conjugat
const Index actual_mc = std::min(i2+mc,rows)-i2; const Index actual_mc = std::min(i2+mc,rows)-i2;
pack_lhs(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc); pack_lhs(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols); gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
} }
} }
@@ -378,14 +375,18 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,Conjugat
} }
}; };
} // end namespace internal
/*************************************************************************** /***************************************************************************
* Wrapper to ei_product_selfadjoint_matrix * Wrapper to product_selfadjoint_matrix
***************************************************************************/ ***************************************************************************/
namespace internal {
template<typename Lhs, int LhsMode, typename Rhs, int RhsMode> template<typename Lhs, int LhsMode, typename Rhs, int RhsMode>
struct ei_traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false> > struct traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false> >
: ei_traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>, Lhs, Rhs> > : traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>, Lhs, Rhs> >
{}; {};
}
template<typename Lhs, int LhsMode, typename Rhs, int RhsMode> template<typename Lhs, int LhsMode, typename Rhs, int RhsMode>
struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false> struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>
@@ -404,7 +405,7 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>
template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
{ {
ei_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols()); eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs); const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs);
const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs); const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs);
@@ -412,18 +413,18 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs) Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
* RhsBlasTraits::extractScalarFactor(m_rhs); * RhsBlasTraits::extractScalarFactor(m_rhs);
ei_product_selfadjoint_matrix<Scalar, Index, internal::product_selfadjoint_matrix<Scalar, Index,
EIGEN_LOGICAL_XOR(LhsIsUpper, EIGEN_LOGICAL_XOR(LhsIsUpper,
ei_traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint, internal::traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint,
NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)), NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)),
EIGEN_LOGICAL_XOR(RhsIsUpper, EIGEN_LOGICAL_XOR(RhsIsUpper,
ei_traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint, internal::traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint,
NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsIsUpper,bool(RhsBlasTraits::NeedToConjugate)), NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsIsUpper,bool(RhsBlasTraits::NeedToConjugate)),
ei_traits<Dest>::Flags&RowMajorBit ? RowMajor : ColMajor> internal::traits<Dest>::Flags&RowMajorBit ? RowMajor : ColMajor>
::run( ::run(
lhs.rows(), rhs.cols(), // sizes lhs.rows(), rhs.cols(), // sizes
&lhs.coeff(0,0), lhs.outerStride(), // lhs info &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
&rhs.coeff(0,0), rhs.outerStride(), // rhs info &rhs.coeffRef(0,0), rhs.outerStride(), // rhs info
&dst.coeffRef(0,0), dst.outerStride(), // result info &dst.coeffRef(0,0), dst.outerStride(), // result info
actualAlpha // alpha actualAlpha // alpha
); );

View File

@@ -25,19 +25,23 @@
#ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_H #ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_H
#define EIGEN_SELFADJOINT_MATRIX_VECTOR_H #define EIGEN_SELFADJOINT_MATRIX_VECTOR_H
namespace internal {
/* Optimized selfadjoint matrix * vector product: /* Optimized selfadjoint matrix * vector product:
* This algorithm processes 2 columns at onces that allows to both reduce * This algorithm processes 2 columns at onces that allows to both reduce
* the number of load/stores of the result by a factor 2 and to reduce * the number of load/stores of the result by a factor 2 and to reduce
* the instruction dependency. * the instruction dependency.
*/ */
template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs>
static EIGEN_DONT_INLINE void ei_product_selfadjoint_vector( static EIGEN_DONT_INLINE void product_selfadjoint_vector(
Index size, Index size,
const Scalar* lhs, Index lhsStride, const Scalar* lhs, Index lhsStride,
const Scalar* _rhs, Index rhsIncr, const Scalar* _rhs, Index rhsIncr,
Scalar* res, Scalar alpha) Scalar* res,
Scalar alpha)
{ {
typedef typename ei_packet_traits<Scalar>::type Packet; typedef typename packet_traits<Scalar>::type Packet;
typedef typename NumTraits<Scalar>::Real RealScalar;
const Index PacketSize = sizeof(Packet)/sizeof(Scalar); const Index PacketSize = sizeof(Packet)/sizeof(Scalar);
enum { enum {
@@ -46,11 +50,16 @@ static EIGEN_DONT_INLINE void ei_product_selfadjoint_vector(
FirstTriangular = IsRowMajor == IsLower FirstTriangular = IsRowMajor == IsLower
}; };
ei_conj_helper<NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, IsRowMajor), ConjugateRhs> cj0; conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, IsRowMajor), ConjugateRhs> cj0;
ei_conj_helper<NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> cj1; conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> cj1;
conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex, ConjugateRhs> cjd;
Scalar cjAlpha = ConjugateRhs ? ei_conj(alpha) : alpha; conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, IsRowMajor), ConjugateRhs> pcj0;
conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> pcj1;
Scalar cjAlpha = ConjugateRhs ? conj(alpha) : alpha;
// FIXME this copy is now handled outside product_selfadjoint_vector, so it could probably be removed.
// if the rhs is not sequentially stored in memory we copy it to a temporary buffer, // if the rhs is not sequentially stored in memory we copy it to a temporary buffer,
// this is because we need to extract packets // this is because we need to extract packets
const Scalar* EIGEN_RESTRICT rhs = _rhs; const Scalar* EIGEN_RESTRICT rhs = _rhs;
@@ -74,39 +83,39 @@ static EIGEN_DONT_INLINE void ei_product_selfadjoint_vector(
register const Scalar* EIGEN_RESTRICT A1 = lhs + (j+1)*lhsStride; register const Scalar* EIGEN_RESTRICT A1 = lhs + (j+1)*lhsStride;
Scalar t0 = cjAlpha * rhs[j]; Scalar t0 = cjAlpha * rhs[j];
Packet ptmp0 = ei_pset1(t0); Packet ptmp0 = pset1<Packet>(t0);
Scalar t1 = cjAlpha * rhs[j+1]; Scalar t1 = cjAlpha * rhs[j+1];
Packet ptmp1 = ei_pset1(t1); Packet ptmp1 = pset1<Packet>(t1);
Scalar t2 = 0; Scalar t2 = 0;
Packet ptmp2 = ei_pset1(t2); Packet ptmp2 = pset1<Packet>(t2);
Scalar t3 = 0; Scalar t3 = 0;
Packet ptmp3 = ei_pset1(t3); Packet ptmp3 = pset1<Packet>(t3);
size_t starti = FirstTriangular ? 0 : j+2; size_t starti = FirstTriangular ? 0 : j+2;
size_t endi = FirstTriangular ? j : size; size_t endi = FirstTriangular ? j : size;
size_t alignedEnd = starti; size_t alignedStart = (starti) + first_aligned(&res[starti], endi-starti);
size_t alignedStart = (starti) + ei_first_aligned(&res[starti], endi-starti); size_t alignedEnd = alignedStart + ((endi-alignedStart)/(PacketSize))*(PacketSize);
alignedEnd = alignedStart + ((endi-alignedStart)/(PacketSize))*(PacketSize);
res[j] += cj0.pmul(A0[j], t0); // TODO make sure this product is a real * complex and that the rhs is properly conjugated if needed
res[j] += cjd.pmul(internal::real(A0[j]), t0);
res[j+1] += cjd.pmul(internal::real(A1[j+1]), t1);
if(FirstTriangular) if(FirstTriangular)
{ {
res[j+1] += cj0.pmul(A1[j+1], t1);
res[j] += cj0.pmul(A1[j], t1); res[j] += cj0.pmul(A1[j], t1);
t3 += cj1.pmul(A1[j], rhs[j]); t3 += cj1.pmul(A1[j], rhs[j]);
} }
else else
{ {
res[j+1] += cj0.pmul(A0[j+1],t0) + cj0.pmul(A1[j+1],t1); res[j+1] += cj0.pmul(A0[j+1],t0);
t2 += cj1.pmul(A0[j+1], rhs[j+1]); t2 += cj1.pmul(A0[j+1], rhs[j+1]);
} }
for (size_t i=starti; i<alignedStart; ++i) for (size_t i=starti; i<alignedStart; ++i)
{ {
res[i] += t0 * A0[i] + t1 * A1[i]; res[i] += t0 * A0[i] + t1 * A1[i];
t2 += ei_conj(A0[i]) * rhs[i]; t2 += conj(A0[i]) * rhs[i];
t3 += ei_conj(A1[i]) * rhs[i]; t3 += conj(A1[i]) * rhs[i];
} }
// Yes this an optimization for gcc 4.3 and 4.4 (=> huge speed up) // Yes this an optimization for gcc 4.3 and 4.4 (=> huge speed up)
// gcc 4.2 does this optimization automatically. // gcc 4.2 does this optimization automatically.
@@ -116,15 +125,15 @@ static EIGEN_DONT_INLINE void ei_product_selfadjoint_vector(
Scalar* EIGEN_RESTRICT resIt = res + alignedStart; Scalar* EIGEN_RESTRICT resIt = res + alignedStart;
for (size_t i=alignedStart; i<alignedEnd; i+=PacketSize) for (size_t i=alignedStart; i<alignedEnd; i+=PacketSize)
{ {
Packet A0i = ei_ploadu(a0It); a0It += PacketSize; Packet A0i = ploadu<Packet>(a0It); a0It += PacketSize;
Packet A1i = ei_ploadu(a1It); a1It += PacketSize; Packet A1i = ploadu<Packet>(a1It); a1It += PacketSize;
Packet Bi = ei_ploadu(rhsIt); rhsIt += PacketSize; // FIXME should be aligned in most cases Packet Bi = ploadu<Packet>(rhsIt); rhsIt += PacketSize; // FIXME should be aligned in most cases
Packet Xi = ei_pload (resIt); Packet Xi = pload <Packet>(resIt);
Xi = cj0.pmadd(A0i,ptmp0, cj0.pmadd(A1i,ptmp1,Xi)); Xi = pcj0.pmadd(A0i,ptmp0, pcj0.pmadd(A1i,ptmp1,Xi));
ptmp2 = cj1.pmadd(A0i, Bi, ptmp2); ptmp2 = pcj1.pmadd(A0i, Bi, ptmp2);
ptmp3 = cj1.pmadd(A1i, Bi, ptmp3); ptmp3 = pcj1.pmadd(A1i, Bi, ptmp3);
ei_pstore(resIt,Xi); resIt += PacketSize; pstore(resIt,Xi); resIt += PacketSize;
} }
for (size_t i=alignedEnd; i<endi; i++) for (size_t i=alignedEnd; i<endi; i++)
{ {
@@ -133,8 +142,8 @@ static EIGEN_DONT_INLINE void ei_product_selfadjoint_vector(
t3 += cj1.pmul(A1[i], rhs[i]); t3 += cj1.pmul(A1[i], rhs[i]);
} }
res[j] += alpha * (t2 + ei_predux(ptmp2)); res[j] += alpha * (t2 + predux(ptmp2));
res[j+1] += alpha * (t3 + ei_predux(ptmp3)); res[j+1] += alpha * (t3 + predux(ptmp3));
} }
for (Index j=FirstTriangular ? 0 : bound;j<(FirstTriangular ? bound : size);j++) for (Index j=FirstTriangular ? 0 : bound;j<(FirstTriangular ? bound : size);j++)
{ {
@@ -142,8 +151,10 @@ static EIGEN_DONT_INLINE void ei_product_selfadjoint_vector(
Scalar t1 = cjAlpha * rhs[j]; Scalar t1 = cjAlpha * rhs[j];
Scalar t2 = 0; Scalar t2 = 0;
res[j] += cj0.pmul(A0[j],t1); // TODO make sure this product is a real * complex and that the rhs is properly conjugated if needed
for (Index i=FirstTriangular ? 0 : j+1; i<(FirstTriangular ? j : size); i++) { res[j] += cjd.pmul(internal::real(A0[j]), t1);
for (Index i=FirstTriangular ? 0 : j+1; i<(FirstTriangular ? j : size); i++)
{
res[i] += cj0.pmul(A0[i], t1); res[i] += cj0.pmul(A0[i], t1);
t2 += cj1.pmul(A0[i], rhs[i]); t2 += cj1.pmul(A0[i], rhs[i]);
} }
@@ -154,14 +165,18 @@ static EIGEN_DONT_INLINE void ei_product_selfadjoint_vector(
ei_aligned_stack_delete(Scalar, const_cast<Scalar*>(rhs), size); ei_aligned_stack_delete(Scalar, const_cast<Scalar*>(rhs), size);
} }
} // end namespace internal
/*************************************************************************** /***************************************************************************
* Wrapper to ei_product_selfadjoint_vector * Wrapper to product_selfadjoint_vector
***************************************************************************/ ***************************************************************************/
namespace internal {
template<typename Lhs, int LhsMode, typename Rhs> template<typename Lhs, int LhsMode, typename Rhs>
struct ei_traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true> > struct traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true> >
: ei_traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>, Lhs, Rhs> > : traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>, Lhs, Rhs> >
{}; {};
}
template<typename Lhs, int LhsMode, typename Rhs> template<typename Lhs, int LhsMode, typename Rhs>
struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true> struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
@@ -175,9 +190,13 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {} SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const
{ {
ei_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols()); typedef typename Dest::Scalar ResScalar;
typedef typename Base::RhsScalar RhsScalar;
typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
eigen_assert(dest.rows()==m_lhs.rows() && dest.cols()==m_rhs.cols());
const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs); const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs);
const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs); const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs);
@@ -185,16 +204,94 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs) Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
* RhsBlasTraits::extractScalarFactor(m_rhs); * RhsBlasTraits::extractScalarFactor(m_rhs);
ei_assert(dst.innerStride()==1 && "not implemented yet"); enum {
EvalToDest = (Dest::InnerStrideAtCompileTime==1),
ei_product_selfadjoint_vector<Scalar, Index, (ei_traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)> UseRhs = (_ActualRhsType::InnerStrideAtCompileTime==1)
};
internal::gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,!EvalToDest> static_dest;
internal::gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!UseRhs> static_rhs;
bool freeDestPtr = false;
ResScalar* actualDestPtr;
if(EvalToDest)
actualDestPtr = dest.data();
else
{
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
int size = dest.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
if((actualDestPtr=static_dest.data())==0)
{
freeDestPtr = true;
actualDestPtr = ei_aligned_stack_new(ResScalar,dest.size());
}
MappedDest(actualDestPtr, dest.size()) = dest;
}
bool freeRhsPtr = false;
RhsScalar* actualRhsPtr;
if(UseRhs)
actualRhsPtr = const_cast<RhsScalar*>(rhs.data());
else
{
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
int size = rhs.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
if((actualRhsPtr=static_rhs.data())==0)
{
freeRhsPtr = true;
actualRhsPtr = ei_aligned_stack_new(RhsScalar,rhs.size());
}
Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, rhs.size()) = rhs;
}
internal::product_selfadjoint_vector<Scalar, Index, (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)>
( (
lhs.rows(), // size lhs.rows(), // size
&lhs.coeff(0,0), lhs.outerStride(), // lhs info &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
&rhs.coeff(0), rhs.innerStride(), // rhs info actualRhsPtr, 1, // rhs info
&dst.coeffRef(0), // result info actualDestPtr, // result info
actualAlpha // scale factor actualAlpha // scale factor
); );
if(!EvalToDest)
{
dest = MappedDest(actualDestPtr, dest.size());
if(freeDestPtr) ei_aligned_stack_delete(ResScalar, actualDestPtr, dest.size());
}
if(freeRhsPtr) ei_aligned_stack_delete(RhsScalar, actualRhsPtr, rhs.size());
}
};
namespace internal {
template<typename Lhs, typename Rhs, int RhsMode>
struct traits<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false> >
: traits<ProductBase<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>, Lhs, Rhs> >
{};
}
template<typename Lhs, typename Rhs, int RhsMode>
struct SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>
: public ProductBase<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>, Lhs, Rhs >
{
EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix)
enum {
RhsUpLo = RhsMode&(Upper|Lower)
};
SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const
{
// let's simply transpose the product
Transpose<Dest> destT(dest);
SelfadjointProductMatrix<Transpose<const Rhs>, int(RhsUpLo)==Upper ? Lower : Upper, false,
Transpose<const Lhs>, 0, true>(m_rhs.transpose(), m_lhs.transpose()).scaleAndAddTo(destT, alpha);
} }
}; };

Some files were not shown because too many files have changed in this diff Show More