mirror of
https://gitlab.com/libeigen/eigen.git
synced 2026-04-10 11:34:33 +08:00
Compare commits
638 Commits
3.2.8
...
3.3-alpha1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f9303cc7c5 | ||
|
|
b20a55a608 | ||
|
|
ed265258e4 | ||
|
|
a835dfca73 | ||
|
|
941a99ac1a | ||
|
|
d91db41a31 | ||
|
|
3942db9d7c | ||
|
|
5c9ee73eb9 | ||
|
|
5a1cc5d24c | ||
|
|
2795ffd6a0 | ||
|
|
ef2b54f422 | ||
|
|
5ad7981f73 | ||
|
|
aa768add0b | ||
|
|
51455824ea | ||
|
|
f8976fdbe0 | ||
|
|
92b9f0e102 | ||
|
|
cda55ab245 | ||
|
|
14458ec0a0 | ||
|
|
6b99afa5ae | ||
|
|
b5ad3d2cf7 | ||
|
|
6522c3a6f0 | ||
|
|
be5e2ecc21 | ||
|
|
aba8c9ee17 | ||
|
|
a75616887e | ||
|
|
280f93ff65 | ||
|
|
6059188f9d | ||
|
|
0b2412df50 | ||
|
|
9001f4a46b | ||
|
|
f41831e445 | ||
|
|
2ab603316a | ||
|
|
2ed1495eec | ||
|
|
d4c24eb016 | ||
|
|
78358a7241 | ||
|
|
a09cfe650f | ||
|
|
e5c78d85c8 | ||
|
|
1bdd06a199 | ||
|
|
0721690dbb | ||
|
|
8097d8d028 | ||
|
|
d2e0927127 | ||
|
|
dc2c103b3b | ||
|
|
d6a4805fdf | ||
|
|
a40f6ab276 | ||
|
|
61e0977e10 | ||
|
|
712e2fed17 | ||
|
|
a5d1bb2be8 | ||
|
|
93635cafee | ||
|
|
23aab82c0c | ||
|
|
0d5e673baa | ||
|
|
cac6b23033 | ||
|
|
febcce34f1 | ||
|
|
6245591349 | ||
|
|
60e4260d0d | ||
|
|
e68c7b8368 | ||
|
|
65bfa5fce7 | ||
|
|
3602926ed5 | ||
|
|
ce57dbd937 | ||
|
|
2afdef6a54 | ||
|
|
1f5024332e | ||
|
|
65186ef18d | ||
|
|
becd89df29 | ||
|
|
d4f5efc51a | ||
|
|
7e0d7a76b8 | ||
|
|
e31fc50280 | ||
|
|
9a4713e505 | ||
|
|
506964fc29 | ||
|
|
db0f5c9d90 | ||
|
|
b986c147cd | ||
|
|
cbce0e3b12 | ||
|
|
a5dc49e7e8 | ||
|
|
e1d28b7ea7 | ||
|
|
0570594f2c | ||
|
|
099597406f | ||
|
|
6db3a557f4 | ||
|
|
aec4814370 | ||
|
|
f7d5b9323d | ||
|
|
175ed636ea | ||
|
|
76874b128e | ||
|
|
41e1f3498c | ||
|
|
b9db19aec4 | ||
|
|
f84417d97b | ||
|
|
1a30a8e7a2 | ||
|
|
a44d022caf | ||
|
|
2195822df6 | ||
|
|
f6282e451a | ||
|
|
4b3052c54d | ||
|
|
a446020b78 | ||
|
|
3d951df223 | ||
|
|
6d6e6d0b88 | ||
|
|
ce65c2922a | ||
|
|
4200bdec24 | ||
|
|
3b0ad02c10 | ||
|
|
815fa0dbf6 | ||
|
|
d259b719d1 | ||
|
|
0dda72316f | ||
|
|
586d10f7e0 | ||
|
|
d3e5db9a80 | ||
|
|
5e635f9ca1 | ||
|
|
45ee14a13a | ||
|
|
87f3e533f5 | ||
|
|
ab8b497a7e | ||
|
|
6544b49e59 | ||
|
|
2d93060291 | ||
|
|
c11971de37 | ||
|
|
88e352adac | ||
|
|
6799c26cd6 | ||
|
|
7a39439904 | ||
|
|
e94f9eb637 | ||
|
|
513e357b48 | ||
|
|
943035e5bd | ||
|
|
06a22ca5bd | ||
|
|
3275eddc24 | ||
|
|
979b73cebf | ||
|
|
a5ec25f11c | ||
|
|
7a243959b4 | ||
|
|
b756f6af5e | ||
|
|
05787f8367 | ||
|
|
b900fe47d5 | ||
|
|
4b3d697e12 | ||
|
|
8315e025e1 | ||
|
|
e892524efe | ||
|
|
f5aa640862 | ||
|
|
7cecd39a84 | ||
|
|
592ee2a4b4 | ||
|
|
6527dbb9f8 | ||
|
|
b80036abec | ||
|
|
3912ca0d53 | ||
|
|
ea87561564 | ||
|
|
b8df8815f4 | ||
|
|
002c2923c2 | ||
|
|
dbb3e2cf8a | ||
|
|
0d09845562 | ||
|
|
20b96025fd | ||
|
|
1dd6a329e8 | ||
|
|
bc40eb745d | ||
|
|
e6297741c9 | ||
|
|
6de6fa9483 | ||
|
|
7b7df7b6b8 | ||
|
|
6e55284e51 | ||
|
|
a93af65938 | ||
|
|
7fa6fe8d8c | ||
|
|
fa17358c4b | ||
|
|
3f2101b03b | ||
|
|
0485a2468d | ||
|
|
ebdacfc5ea | ||
|
|
81f9e968fd | ||
|
|
66b30728f8 | ||
|
|
5359e5cdb2 | ||
|
|
864318e508 | ||
|
|
c2019dfeb3 | ||
|
|
ea7113dd0c | ||
|
|
9115896590 | ||
|
|
95ef94f1ee | ||
|
|
8f1d547c92 | ||
|
|
1e911b276c | ||
|
|
4ed213f97b | ||
|
|
56e155dd60 | ||
|
|
925d0d375a | ||
|
|
44eedd8915 | ||
|
|
6021b68d8b | ||
|
|
f1f480b116 | ||
|
|
dc31fcb9ba | ||
|
|
f587075987 | ||
|
|
28b36632ec | ||
|
|
109005c6c9 | ||
|
|
a4aa7c6217 | ||
|
|
7d41e97fa9 | ||
|
|
fffe63045c | ||
|
|
db9dbbda32 | ||
|
|
f0ce85b757 | ||
|
|
670c71d906 | ||
|
|
d8098ee7d5 | ||
|
|
3625734bc8 | ||
|
|
392a30db82 | ||
|
|
c911fc8dee | ||
|
|
98ff17eb9e | ||
|
|
e102ddbf1f | ||
|
|
555b9c6843 | ||
|
|
53b930887d | ||
|
|
3f49cf4c90 | ||
|
|
7f824dd613 | ||
|
|
c5f9eafcbc | ||
|
|
33e699c9fe | ||
|
|
6b4d255cab | ||
|
|
973b0a90db | ||
|
|
84264ceebc | ||
|
|
b4ab72678c | ||
|
|
788941d3b1 | ||
|
|
4c8cd13b35 | ||
|
|
c38c195321 | ||
|
|
23535ed31c | ||
|
|
62f21e2d11 | ||
|
|
763c833637 | ||
|
|
36643eec0c | ||
|
|
02db7c9bc6 | ||
|
|
53a61a067b | ||
|
|
95e19be381 | ||
|
|
2a33075aeb | ||
|
|
23da99492f | ||
|
|
6441befbb3 | ||
|
|
c3e398d138 | ||
|
|
b0d08869a9 | ||
|
|
18c9d155f3 | ||
|
|
71523a2e25 | ||
|
|
d9778f3391 | ||
|
|
5f9630d7f9 | ||
|
|
793e4c6d77 | ||
|
|
307c4fc292 | ||
|
|
bb3a9b4941 | ||
|
|
476beed7f8 | ||
|
|
9fc1c92137 | ||
|
|
9c7cfa7dab | ||
|
|
3ccd23efc0 | ||
|
|
0848ba0a6e | ||
|
|
b3b3dcad05 | ||
|
|
ad5fdc7ddd | ||
|
|
40821876ea | ||
|
|
7043083be4 | ||
|
|
84aaef93ba | ||
|
|
6b33b29f00 | ||
|
|
846b227bb7 | ||
|
|
368ea23406 | ||
|
|
4cc0c961f3 | ||
|
|
386d9e5ebd | ||
|
|
a5a7b68b76 | ||
|
|
6fc5438205 | ||
|
|
e9edb085c0 | ||
|
|
6318d53b41 | ||
|
|
5c84dd5665 | ||
|
|
0c8b0e007b | ||
|
|
3f6aa4cd5d | ||
|
|
4a8888dfbc | ||
|
|
3af4c6c1c9 | ||
|
|
82b6ac0864 | ||
|
|
fad36cc814 | ||
|
|
06036d8bb1 | ||
|
|
d2db15016b | ||
|
|
6a9a29e96f | ||
|
|
bb6acc561e | ||
|
|
40f326ef2e | ||
|
|
ab5db86fe9 | ||
|
|
ea160a898c | ||
|
|
367794e668 | ||
|
|
736a805883 | ||
|
|
9ab8ac5c8b | ||
|
|
38874b1651 | ||
|
|
e2e66930c6 | ||
|
|
7baa1ba03e | ||
|
|
97cbe28829 | ||
|
|
972a535288 | ||
|
|
e5b490b654 | ||
|
|
a546be56e0 | ||
|
|
3946c981b1 | ||
|
|
2212e40e95 | ||
|
|
321a2cbe3d | ||
|
|
2f2a441a4d | ||
|
|
91b64a9c65 | ||
|
|
84d103bee8 | ||
|
|
916ef52fff | ||
|
|
d93ba137f2 | ||
|
|
9756c7fb4d | ||
|
|
93a62265dc | ||
|
|
b0d5aaafcc | ||
|
|
25a98be948 | ||
|
|
192bce2795 | ||
|
|
e6832ce93d | ||
|
|
0b2cbb2bdc | ||
|
|
feaf76c001 | ||
|
|
f899aeb301 | ||
|
|
785b9c0127 | ||
|
|
0eb06f1391 | ||
|
|
64753af3b7 | ||
|
|
cacbc5679d | ||
|
|
04665ef9e1 | ||
|
|
d4c574707e | ||
|
|
f9350e70eb | ||
|
|
4aba24a1b2 | ||
|
|
302cf8ffe2 | ||
|
|
3a4299b245 | ||
|
|
9aef0db992 | ||
|
|
9a2447b0c9 | ||
|
|
cd8b996f99 | ||
|
|
913a61870d | ||
|
|
8f031a3cee | ||
|
|
e6c5723dcd | ||
|
|
274b1f5d7e | ||
|
|
cbe3a1a83e | ||
|
|
a7ae628c9f | ||
|
|
0a9b5d1396 | ||
|
|
d0b7b5cb55 | ||
|
|
56d4ef7ad6 | ||
|
|
98a8d43457 | ||
|
|
b685660b22 | ||
|
|
8bc26562f4 | ||
|
|
3e7bc8d686 | ||
|
|
acc761cf0c | ||
|
|
ea1190486f | ||
|
|
0e5fed74e7 | ||
|
|
f13b3d4433 | ||
|
|
abec18bae0 | ||
|
|
9df186c140 | ||
|
|
466bcc589e | ||
|
|
d457734a19 | ||
|
|
6b800744ce | ||
|
|
48f6b274e2 | ||
|
|
2451679951 | ||
|
|
a81d17b73a | ||
|
|
051d5325cc | ||
|
|
ebea530782 | ||
|
|
c88e1abaf3 | ||
|
|
807793ec3b | ||
|
|
140f85bb99 | ||
|
|
a852001196 | ||
|
|
e66caf48e8 | ||
|
|
ef81730625 | ||
|
|
a605a1d7df | ||
|
|
447e060b81 | ||
|
|
494fa991c3 | ||
|
|
4a936974a5 | ||
|
|
c2107d30ce | ||
|
|
ebf8ca4fa8 | ||
|
|
dd698e6680 | ||
|
|
1dded10cb7 | ||
|
|
6273aca9b1 | ||
|
|
4dd7d0b5dc | ||
|
|
4b9eddaef8 | ||
|
|
28a4c92cbf | ||
|
|
173b34e9ab | ||
|
|
da2baf685d | ||
|
|
8c6a3b5ace | ||
|
|
de18cd413d | ||
|
|
1681a665d9 | ||
|
|
834f66e9fc | ||
|
|
40258078c6 | ||
|
|
c460af414e | ||
|
|
fd1d4bd86c | ||
|
|
91359e1d0a | ||
|
|
8838ed39f4 | ||
|
|
e7457e419d | ||
|
|
dfa991cbae | ||
|
|
dbd12b4cda | ||
|
|
d6a8b43b39 | ||
|
|
e709488361 | ||
|
|
10a1f81822 | ||
|
|
e5048b5501 | ||
|
|
249c48ba00 | ||
|
|
0250f4a9f2 | ||
|
|
0339502a4f | ||
|
|
43eb2ca6e1 | ||
|
|
016c29f207 | ||
|
|
70bc3b0668 | ||
|
|
3220eb2b93 | ||
|
|
fc2d5b86ce | ||
|
|
5a3c48e3c6 | ||
|
|
3b429b71e6 | ||
|
|
9c6b82bcd5 | ||
|
|
4f126b862d | ||
|
|
da5b98a94d | ||
|
|
d19d09ae6a | ||
|
|
9d7843d0d0 | ||
|
|
3be9f5c4d7 | ||
|
|
e0cff9ae0d | ||
|
|
5dbe758dc3 | ||
|
|
04c8c5d9ef | ||
|
|
0f82399fe9 | ||
|
|
761691f18d | ||
|
|
5401fbcc50 | ||
|
|
085aa8e601 | ||
|
|
0eb220c00d | ||
|
|
d7f51feb07 | ||
|
|
0e9753c8df | ||
|
|
1de49ef4c2 | ||
|
|
a1f1e1e51d | ||
|
|
7c18ab921c | ||
|
|
15b5adb327 | ||
|
|
74e558cfa8 | ||
|
|
03a0df2010 | ||
|
|
b8b7807269 | ||
|
|
383b6dfafe | ||
|
|
5861cfb55e | ||
|
|
3105986e71 | ||
|
|
39dcd01b0a | ||
|
|
8481dc21ea | ||
|
|
79b4e6acaf | ||
|
|
3c38589984 | ||
|
|
8313fb7df7 | ||
|
|
dfb674a25e | ||
|
|
20d030f207 | ||
|
|
678207e02a | ||
|
|
68d4afe985 | ||
|
|
f873686602 | ||
|
|
73cdeae1d3 | ||
|
|
0cbd5ae3cb | ||
|
|
ae01c05e18 | ||
|
|
bd76d837e6 | ||
|
|
35d3053d55 | ||
|
|
731d7b84b4 | ||
|
|
7bd578d11d | ||
|
|
3b169d792d | ||
|
|
3238ca6abc | ||
|
|
1efae98fee | ||
|
|
35722fa022 | ||
|
|
71950f02e5 | ||
|
|
58af8bf90c | ||
|
|
2adbf6b8ca | ||
|
|
41e20248f8 | ||
|
|
09a5361d1b | ||
|
|
266a84558f | ||
|
|
1b4bb20cf1 | ||
|
|
eb7e4c2b9c | ||
|
|
ad044008da | ||
|
|
79cb875249 | ||
|
|
7e225b6fa4 | ||
|
|
1b8cc9af43 | ||
|
|
3d59ae0203 | ||
|
|
4df8b5a75e | ||
|
|
b3343bfdae | ||
|
|
ccf290a65c | ||
|
|
d3f7915aeb | ||
|
|
abdbe8562e | ||
|
|
29eaa2b0f1 | ||
|
|
f42b105f73 | ||
|
|
d27968eb7e | ||
|
|
4472f3e578 | ||
|
|
83e5b7656b | ||
|
|
4bab4790c0 | ||
|
|
4e2b18d909 | ||
|
|
8d9bfb3a7b | ||
|
|
a6a628ca6b | ||
|
|
e134226a03 | ||
|
|
9ee62fdcd5 | ||
|
|
d6b2f300db | ||
|
|
61c45d7cfd | ||
|
|
d7698c18b7 | ||
|
|
f329d0908a | ||
|
|
2ab4922431 | ||
|
|
41b717de25 | ||
|
|
cc0f89eb3b | ||
|
|
dc04f12967 | ||
|
|
8878e1c1de | ||
|
|
596be3cd86 | ||
|
|
e26134ed62 | ||
|
|
e21e29a088 | ||
|
|
447a5a6b01 | ||
|
|
f52b78491c | ||
|
|
1c78d6f2a6 | ||
|
|
85da0c2281 | ||
|
|
364cfd529d | ||
|
|
25664afacd | ||
|
|
e1d6e6c972 | ||
|
|
577056aa94 | ||
|
|
5144f66728 | ||
|
|
0fd6d52724 | ||
|
|
eb6929cb19 | ||
|
|
f218c0181d | ||
|
|
fef4e071d7 | ||
|
|
46cf9cda32 | ||
|
|
7b829940d1 | ||
|
|
1d76ceab55 | ||
|
|
717b7954ce | ||
|
|
fb68b149cb | ||
|
|
35c3a8bb84 | ||
|
|
e274607d7f | ||
|
|
151b8b95c6 | ||
|
|
02babb9c0f | ||
|
|
3589a9c115 | ||
|
|
1dd3d89818 | ||
|
|
ca5c12587b | ||
|
|
e56aabf205 | ||
|
|
b6b88c0808 | ||
|
|
488c15615a | ||
|
|
9f58524cbd | ||
|
|
1330f8bbd1 | ||
|
|
d99ab35f9e | ||
|
|
8580eb6808 | ||
|
|
a9df28c95b | ||
|
|
5ffe29cb9f | ||
|
|
d73ccd717e | ||
|
|
2f6f8bf31c | ||
|
|
f2c3e2b10f | ||
|
|
657407227e | ||
|
|
f89fcefa79 | ||
|
|
a5e49976f5 | ||
|
|
19a71056ae | ||
|
|
31fdd67756 | ||
|
|
fd78874888 | ||
|
|
d4317a85e8 | ||
|
|
9e885fb766 | ||
|
|
224a1fe4c6 | ||
|
|
cf9940e17b | ||
|
|
39228cb224 | ||
|
|
a4f956b1da | ||
|
|
19bf13aa62 | ||
|
|
0ee391863e | ||
|
|
14a5f135a3 | ||
|
|
d23fcc0672 | ||
|
|
87681e508f | ||
|
|
4c8eeeaed6 | ||
|
|
cd3bbffa73 | ||
|
|
eedd5063fd | ||
|
|
58740ce4c6 | ||
|
|
4ab01f7c21 | ||
|
|
5db2baa573 | ||
|
|
4c8b95d5c5 | ||
|
|
7550107028 | ||
|
|
2dc968e453 | ||
|
|
2231b3dece | ||
|
|
00ea121881 | ||
|
|
0196141938 | ||
|
|
b0f2b6f297 | ||
|
|
d9cb604a5d | ||
|
|
4fd7f47692 | ||
|
|
ae73859a0a | ||
|
|
131449298f | ||
|
|
56ea45ff0f | ||
|
|
bb483313f6 | ||
|
|
fb53384b0f | ||
|
|
61409d9449 | ||
|
|
1a7b84dc75 | ||
|
|
37357a310f | ||
|
|
cf1eea11de | ||
|
|
78732186ee | ||
|
|
4250a0cab0 | ||
|
|
a4e37b0617 | ||
|
|
306fceccbe | ||
|
|
75e7f381c8 | ||
|
|
2386fc8528 | ||
|
|
e1f6a45b14 | ||
|
|
90893bbe18 | ||
|
|
473e6d4c3d | ||
|
|
4369538227 | ||
|
|
99cfbd6e84 | ||
|
|
6466fa63be | ||
|
|
05089aba75 | ||
|
|
bf9877a92a | ||
|
|
90f4e90f1d | ||
|
|
573b377110 | ||
|
|
2fc3b484d7 | ||
|
|
33669348c4 | ||
|
|
f5ff4d826f | ||
|
|
b7fc8746e0 | ||
|
|
f074bb4b5f | ||
|
|
57154fdb32 | ||
|
|
f41b1f1666 | ||
|
|
2fffe69b1b | ||
|
|
bcf9bb5c1f | ||
|
|
4ec3f04b3a | ||
|
|
2e9cb06a87 | ||
|
|
a46061ab7b | ||
|
|
a8ad8887bf | ||
|
|
400becc591 | ||
|
|
bffb6bdf45 | ||
|
|
27f3fb2bcc | ||
|
|
f8fbb3f9a6 | ||
|
|
8e817b65d0 | ||
|
|
410070e5ab | ||
|
|
1cfd51908c | ||
|
|
eb21a8173e | ||
|
|
8afce86e64 | ||
|
|
692136350b | ||
|
|
531fa9de77 | ||
|
|
26275b250a | ||
|
|
488874781b | ||
|
|
052b6b40f1 | ||
|
|
ecbf2a6656 | ||
|
|
6af6cf0c2e | ||
|
|
3cf642baa3 | ||
|
|
458cf91cd9 | ||
|
|
03ec601ff7 | ||
|
|
333b497383 | ||
|
|
2da1594750 | ||
|
|
01b8440579 | ||
|
|
3594451ee0 | ||
|
|
b192e29eae | ||
|
|
ab41652d81 | ||
|
|
7765039f1c | ||
|
|
a66f5fc2fd | ||
|
|
ece6b440f9 | ||
|
|
1b7e12847d | ||
|
|
0f4dd15dfc | ||
|
|
92ceb02c6d | ||
|
|
110fb90250 | ||
|
|
829dddd0fd | ||
|
|
db05f2d01e | ||
|
|
0ed00d5438 | ||
|
|
9bd8a4bab5 | ||
|
|
ee27d50633 | ||
|
|
73a24de424 | ||
|
|
63eb0f6fe6 | ||
|
|
fc5c3e85e2 | ||
|
|
4a3e6c8be1 | ||
|
|
c7bb1e8ea8 | ||
|
|
168ceb271e | ||
|
|
8fdcaded5e | ||
|
|
c43154bbc5 | ||
|
|
1ce0178363 | ||
|
|
3dca4a1efc | ||
|
|
05274219a7 | ||
|
|
2aa09e6b4e | ||
|
|
5d2fd64a1a | ||
|
|
f64b4480af | ||
|
|
eae8e27b7d | ||
|
|
37a93c4263 | ||
|
|
ccc1277a42 | ||
|
|
f839099512 | ||
|
|
9930e9583b | ||
|
|
1ec0f4fadf | ||
|
|
3109f0e74e | ||
|
|
ef09ce4552 | ||
|
|
3a4b6827b4 | ||
|
|
31e2ffe82c | ||
|
|
73dd95e7b0 | ||
|
|
682196e9fc | ||
|
|
33f40b2883 | ||
|
|
0f82a1d7b7 | ||
|
|
9aee1e300a | ||
|
|
b10cd3afd2 | ||
|
|
4084dce038 | ||
|
|
548b781380 | ||
|
|
6f4adc9e94 | ||
|
|
371d3bef36 | ||
|
|
63464754ef | ||
|
|
eb563049f7 | ||
|
|
dc7e6acc05 | ||
|
|
d4eda01488 | ||
|
|
24d65ac0b0 | ||
|
|
20cac72b82 | ||
|
|
36c9d08274 | ||
|
|
f77054f43c | ||
|
|
1d3b64d32b | ||
|
|
00f048d44f | ||
|
|
97a36ecba4 | ||
|
|
159fb181c2 | ||
|
|
91ab2489dd | ||
|
|
9daf8eba6f | ||
|
|
3373c903b3 | ||
|
|
9f49f00feb | ||
|
|
f56d452c7e | ||
|
|
af79b158a1 |
@@ -147,6 +147,12 @@ if(NOT MSVC)
|
||||
ei_add_cxx_compiler_flag("-Wenum-conversion")
|
||||
ei_add_cxx_compiler_flag("-Wc++11-extensions")
|
||||
|
||||
# -Wshadow is insanely too strict with gcc, hopefully it will become usable with gcc 6
|
||||
# if(NOT CMAKE_COMPILER_IS_GNUCXX OR (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "5.0.0"))
|
||||
if(NOT CMAKE_COMPILER_IS_GNUCXX)
|
||||
ei_add_cxx_compiler_flag("-Wshadow")
|
||||
endif()
|
||||
|
||||
ei_add_cxx_compiler_flag("-Wno-psabi")
|
||||
ei_add_cxx_compiler_flag("-Wno-variadic-macros")
|
||||
ei_add_cxx_compiler_flag("-Wno-long-long")
|
||||
@@ -168,6 +174,11 @@ if(NOT MSVC)
|
||||
else()
|
||||
ei_add_cxx_compiler_flag("-ansi")
|
||||
endif()
|
||||
|
||||
if(ANDROID_NDK)
|
||||
ei_add_cxx_compiler_flag("-pie")
|
||||
ei_add_cxx_compiler_flag("-fPIE")
|
||||
endif()
|
||||
|
||||
set(CMAKE_REQUIRED_FLAGS "")
|
||||
|
||||
@@ -208,7 +219,7 @@ if(NOT MSVC)
|
||||
endif()
|
||||
|
||||
option(EIGEN_TEST_FMA "Enable/Disable FMA in tests/examples" OFF)
|
||||
if(EIGEN_TEST_FMA)
|
||||
if(EIGEN_TEST_FMA AND NOT EIGEN_TEST_NEON)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfma")
|
||||
message(STATUS "Enabling FMA in tests/examples")
|
||||
endif()
|
||||
@@ -227,7 +238,12 @@ if(NOT MSVC)
|
||||
|
||||
option(EIGEN_TEST_NEON "Enable/Disable Neon in tests/examples" OFF)
|
||||
if(EIGEN_TEST_NEON)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -mcpu=cortex-a8")
|
||||
if(EIGEN_TEST_FMA)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon-vfpv4")
|
||||
else()
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon")
|
||||
endif()
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfloat-abi=softfp")
|
||||
message(STATUS "Enabling NEON in tests/examples")
|
||||
endif()
|
||||
|
||||
@@ -321,7 +337,7 @@ if(EIGEN_TEST_NO_EXCEPTIONS)
|
||||
message(STATUS "Disabling exceptions in tests/examples")
|
||||
endif()
|
||||
|
||||
option(EIGEN_TEST_C++0x "Enables all C++0x features." OFF)
|
||||
option(EIGEN_TEST_CXX11 "Enable testing with C++11 and C++11 features (e.g. Tensor module)." OFF)
|
||||
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
|
||||
|
||||
|
||||
28
Eigen/Core
28
Eigen/Core
@@ -24,9 +24,15 @@
|
||||
#ifdef EIGEN_INTERNAL_DEBUGGING
|
||||
#undef EIGEN_INTERNAL_DEBUGGING
|
||||
#endif
|
||||
|
||||
|
||||
// Do not try to vectorize on CUDA!
|
||||
#ifndef EIGEN_DONT_VECTORIZE
|
||||
#define EIGEN_DONT_VECTORIZE
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN_EXCEPTIONS
|
||||
#undef EIGEN_EXCEPTIONS
|
||||
#endif
|
||||
|
||||
// All functions callable from CUDA code must be qualified with __device__
|
||||
#define EIGEN_DEVICE_FUNC __host__ __device__
|
||||
@@ -67,9 +73,9 @@
|
||||
// and inclusion of their respective header files
|
||||
#include "src/Core/util/MKL_support.h"
|
||||
|
||||
// if alignment is disabled, then disable vectorization. Note: EIGEN_ALIGN is the proper check, it takes into
|
||||
// account both the user's will (EIGEN_DONT_ALIGN) and our own platform checks
|
||||
#if !EIGEN_ALIGN
|
||||
// if alignment is disabled, then disable vectorization. Note: EIGEN_MAX_ALIGN_BYTES is the proper check, it takes into
|
||||
// account both the user's will (EIGEN_MAX_ALIGN_BYTES,EIGEN_DONT_ALIGN) and our own platform checks
|
||||
#if EIGEN_MAX_ALIGN_BYTES==0
|
||||
#ifndef EIGEN_DONT_VECTORIZE
|
||||
#define EIGEN_DONT_VECTORIZE
|
||||
#endif
|
||||
@@ -125,6 +131,12 @@
|
||||
#define EIGEN_VECTORIZE_SSE4_1
|
||||
#define EIGEN_VECTORIZE_SSE4_2
|
||||
#endif
|
||||
#ifdef __AVX2__
|
||||
#define EIGEN_VECTORIZE_AVX2
|
||||
#endif
|
||||
#ifdef __FMA__
|
||||
#define EIGEN_VECTORIZE_FMA
|
||||
#endif
|
||||
|
||||
// include files
|
||||
|
||||
@@ -178,7 +190,7 @@
|
||||
#undef bool
|
||||
#undef vector
|
||||
#undef pixel
|
||||
#elif defined __ARM_NEON
|
||||
#elif (defined __ARM_NEON) || (defined __ARM_NEON__)
|
||||
#define EIGEN_VECTORIZE
|
||||
#define EIGEN_VECTORIZE_NEON
|
||||
#include <arm_neon.h>
|
||||
@@ -294,15 +306,19 @@ using std::ptrdiff_t;
|
||||
// Use AVX for floats and doubles, SSE for integers
|
||||
#include "src/Core/arch/SSE/PacketMath.h"
|
||||
#include "src/Core/arch/SSE/Complex.h"
|
||||
#include "src/Core/arch/SSE/MathFunctions.h"
|
||||
#include "src/Core/arch/AVX/PacketMath.h"
|
||||
#include "src/Core/arch/AVX/MathFunctions.h"
|
||||
#include "src/Core/arch/AVX/Complex.h"
|
||||
#include "src/Core/arch/AVX/TypeCasting.h"
|
||||
#elif defined EIGEN_VECTORIZE_SSE
|
||||
#include "src/Core/arch/SSE/PacketMath.h"
|
||||
#include "src/Core/arch/SSE/MathFunctions.h"
|
||||
#include "src/Core/arch/SSE/Complex.h"
|
||||
#include "src/Core/arch/SSE/TypeCasting.h"
|
||||
#elif defined(EIGEN_VECTORIZE_ALTIVEC) || defined(EIGEN_VECTORIZE_VSX)
|
||||
#include "src/Core/arch/AltiVec/PacketMath.h"
|
||||
#include "src/Core/arch/AltiVec/MathFunctions.h"
|
||||
#include "src/Core/arch/AltiVec/Complex.h"
|
||||
#elif defined EIGEN_VECTORIZE_NEON
|
||||
#include "src/Core/arch/NEON/PacketMath.h"
|
||||
@@ -343,7 +359,6 @@ using std::ptrdiff_t;
|
||||
#include "src/Core/NestByValue.h"
|
||||
|
||||
// #include "src/Core/ForceAlignedAccess.h"
|
||||
// #include "src/Core/Flagged.h"
|
||||
|
||||
#include "src/Core/ReturnByValue.h"
|
||||
#include "src/Core/NoAlias.h"
|
||||
@@ -375,7 +390,6 @@ using std::ptrdiff_t;
|
||||
#include "src/Core/IO.h"
|
||||
#include "src/Core/Swap.h"
|
||||
#include "src/Core/CommaInitializer.h"
|
||||
#include "src/Core/ProductBase.h"
|
||||
#include "src/Core/GeneralProduct.h"
|
||||
#include "src/Core/Solve.h"
|
||||
#include "src/Core/Inverse.h"
|
||||
|
||||
@@ -9,10 +9,6 @@
|
||||
#include "LU"
|
||||
#include <limits>
|
||||
|
||||
#ifndef M_PI
|
||||
#define M_PI 3.14159265358979323846
|
||||
#endif
|
||||
|
||||
/** \defgroup Geometry_Module Geometry module
|
||||
*
|
||||
*
|
||||
|
||||
@@ -12,24 +12,26 @@
|
||||
* This module currently provides iterative methods to solve problems of the form \c A \c x = \c b, where \c A is a squared matrix, usually very large and sparse.
|
||||
* Those solvers are accessible via the following classes:
|
||||
* - ConjugateGradient for selfadjoint (hermitian) matrices,
|
||||
* - LeastSquaresConjugateGradient for rectangular least-square problems,
|
||||
* - BiCGSTAB for general square matrices.
|
||||
*
|
||||
* These iterative solvers are associated with some preconditioners:
|
||||
* - IdentityPreconditioner - not really useful
|
||||
* - DiagonalPreconditioner - also called JAcobi preconditioner, work very well on diagonal dominant matrices.
|
||||
* - IncompleteILUT - incomplete LU factorization with dual thresholding
|
||||
* - DiagonalPreconditioner - also called Jacobi preconditioner, work very well on diagonal dominant matrices.
|
||||
* - IncompleteLUT - incomplete LU factorization with dual thresholding
|
||||
*
|
||||
* Such problems can also be solved using the direct sparse decomposition modules: SparseCholesky, CholmodSupport, UmfPackSupport, SuperLUSupport.
|
||||
*
|
||||
* \code
|
||||
* #include <Eigen/IterativeLinearSolvers>
|
||||
* \endcode
|
||||
\code
|
||||
#include <Eigen/IterativeLinearSolvers>
|
||||
\endcode
|
||||
*/
|
||||
|
||||
#include "src/IterativeLinearSolvers/SolveWithGuess.h"
|
||||
#include "src/IterativeLinearSolvers/IterativeSolverBase.h"
|
||||
#include "src/IterativeLinearSolvers/BasicPreconditioners.h"
|
||||
#include "src/IterativeLinearSolvers/ConjugateGradient.h"
|
||||
#include "src/IterativeLinearSolvers/LeastSquareConjugateGradient.h"
|
||||
#include "src/IterativeLinearSolvers/BiCGSTAB.h"
|
||||
#include "src/IterativeLinearSolvers/IncompleteLUT.h"
|
||||
|
||||
|
||||
@@ -11,9 +11,9 @@
|
||||
* - \ref SparseQR_Module
|
||||
* - \ref IterativeLinearSolvers_Module
|
||||
*
|
||||
* \code
|
||||
* #include <Eigen/Sparse>
|
||||
* \endcode
|
||||
\code
|
||||
#include <Eigen/Sparse>
|
||||
\endcode
|
||||
*/
|
||||
|
||||
#include "SparseCore"
|
||||
|
||||
@@ -226,6 +226,11 @@ template<typename _MatrixType, int _UpLo> class LDLT
|
||||
#endif
|
||||
|
||||
protected:
|
||||
|
||||
static void check_template_parameters()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
|
||||
}
|
||||
|
||||
/** \internal
|
||||
* Used to compute and store the Cholesky decomposition A = L D L^* = U^* D U.
|
||||
@@ -309,9 +314,9 @@ template<> struct ldlt_inplace<Lower>
|
||||
}
|
||||
|
||||
// In some previous versions of Eigen (e.g., 3.2.1), the scaling was omitted if the pivot
|
||||
// was smaller than the cutoff value. However, soince LDLT is not rank-revealing
|
||||
// we should only make sure we do not introduce INF or NaN values.
|
||||
// LAPACK also uses 0 as the cutoff value.
|
||||
// was smaller than the cutoff value. However, since LDLT is not rank-revealing
|
||||
// we should only make sure that we do not introduce INF or NaN values.
|
||||
// Remark that LAPACK also uses 0 as the cutoff value.
|
||||
RealScalar realAkk = numext::real(mat.coeffRef(k,k));
|
||||
if((rs>0) && (abs(realAkk) > RealScalar(0)))
|
||||
A21 /= realAkk;
|
||||
@@ -424,6 +429,8 @@ template<typename MatrixType> struct LDLT_Traits<MatrixType,Upper>
|
||||
template<typename MatrixType, int _UpLo>
|
||||
LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const MatrixType& a)
|
||||
{
|
||||
check_template_parameters();
|
||||
|
||||
eigen_assert(a.rows()==a.cols());
|
||||
const Index size = a.rows();
|
||||
|
||||
@@ -447,7 +454,7 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const MatrixType& a)
|
||||
*/
|
||||
template<typename MatrixType, int _UpLo>
|
||||
template<typename Derived>
|
||||
LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Derived>& w, const typename NumTraits<typename MatrixType::Scalar>::Real& sigma)
|
||||
LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Derived>& w, const typename LDLT<MatrixType,_UpLo>::RealScalar& sigma)
|
||||
{
|
||||
typedef typename TranspositionType::StorageIndex IndexType;
|
||||
const Index size = w.rows();
|
||||
@@ -490,9 +497,9 @@ void LDLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) cons
|
||||
const typename Diagonal<const MatrixType>::RealReturnType vecD(vectorD());
|
||||
// In some previous versions, tolerance was set to the max of 1/highest and the maximal diagonal entry * epsilon
|
||||
// as motivated by LAPACK's xGELSS:
|
||||
// RealScalar tolerance = numext::maxi(vectorD.array().abs().maxCoeff() *NumTraits<RealScalar>::epsilon(),RealScalar(1) / NumTraits<RealScalar>::highest());
|
||||
// RealScalar tolerance = numext::maxi(vecD.array().abs().maxCoeff() * NumTraits<RealScalar>::epsilon(),RealScalar(1) / NumTraits<RealScalar>::highest());
|
||||
// However, LDLT is not rank revealing, and so adjusting the tolerance wrt to the highest
|
||||
// diagonal element is not well justified and to numerical issues in some cases.
|
||||
// diagonal element is not well justified and leads to numerical issues in some cases.
|
||||
// Moreover, Lapack's xSYTRS routines use 0 for the tolerance.
|
||||
RealScalar tolerance = RealScalar(1) / NumTraits<RealScalar>::highest();
|
||||
|
||||
|
||||
@@ -170,6 +170,12 @@ template<typename _MatrixType, int _UpLo> class LLT
|
||||
#endif
|
||||
|
||||
protected:
|
||||
|
||||
static void check_template_parameters()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
|
||||
}
|
||||
|
||||
/** \internal
|
||||
* Used to compute and store L
|
||||
* The strict upper part is not used and even not initialized.
|
||||
@@ -377,6 +383,8 @@ template<typename MatrixType> struct LLT_Traits<MatrixType,Upper>
|
||||
template<typename MatrixType, int _UpLo>
|
||||
LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const MatrixType& a)
|
||||
{
|
||||
check_template_parameters();
|
||||
|
||||
eigen_assert(a.rows()==a.cols());
|
||||
const Index size = a.rows();
|
||||
m_matrix.resize(size, size);
|
||||
|
||||
@@ -60,7 +60,7 @@ template<> struct mkl_llt<EIGTYPE> \
|
||||
lda = m.outerStride(); \
|
||||
\
|
||||
info = LAPACKE_##MKLPREFIX##potrf( matrix_order, uplo, size, (MKLTYPE*)a, lda ); \
|
||||
info = (info==0) ? Success : NumericalIssue; \
|
||||
info = (info==0) ? -1 : info>0 ? info-1 : size; \
|
||||
return info; \
|
||||
} \
|
||||
}; \
|
||||
|
||||
@@ -277,6 +277,7 @@ class CholmodBase : public SparseSolverBase<Derived>
|
||||
if(!x_cd)
|
||||
{
|
||||
this->m_info = NumericalIssue;
|
||||
return;
|
||||
}
|
||||
// TODO optimize this copy by swapping when possible (be careful with alignment, etc.)
|
||||
dest = Matrix<Scalar,Dest::RowsAtCompileTime,Dest::ColsAtCompileTime>::Map(reinterpret_cast<Scalar*>(x_cd->x),b.rows(),b.cols());
|
||||
@@ -298,6 +299,7 @@ class CholmodBase : public SparseSolverBase<Derived>
|
||||
if(!x_cs)
|
||||
{
|
||||
this->m_info = NumericalIssue;
|
||||
return;
|
||||
}
|
||||
// TODO optimize this copy by swapping when possible (be careful with alignment, etc.)
|
||||
dest = viewAsEigen<DestScalar,DestOptions,DestIndex>(*x_cs);
|
||||
@@ -367,7 +369,7 @@ class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimpl
|
||||
CholmodSimplicialLLT(const MatrixType& matrix) : Base()
|
||||
{
|
||||
init();
|
||||
compute(matrix);
|
||||
this->compute(matrix);
|
||||
}
|
||||
|
||||
~CholmodSimplicialLLT() {}
|
||||
@@ -414,7 +416,7 @@ class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimp
|
||||
CholmodSimplicialLDLT(const MatrixType& matrix) : Base()
|
||||
{
|
||||
init();
|
||||
compute(matrix);
|
||||
this->compute(matrix);
|
||||
}
|
||||
|
||||
~CholmodSimplicialLDLT() {}
|
||||
@@ -459,7 +461,7 @@ class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSuper
|
||||
CholmodSupernodalLLT(const MatrixType& matrix) : Base()
|
||||
{
|
||||
init();
|
||||
compute(matrix);
|
||||
this->compute(matrix);
|
||||
}
|
||||
|
||||
~CholmodSupernodalLLT() {}
|
||||
@@ -506,7 +508,7 @@ class CholmodDecomposition : public CholmodBase<_MatrixType, _UpLo, CholmodDecom
|
||||
CholmodDecomposition(const MatrixType& matrix) : Base()
|
||||
{
|
||||
init();
|
||||
compute(matrix);
|
||||
this->compute(matrix);
|
||||
}
|
||||
|
||||
~CholmodDecomposition() {}
|
||||
|
||||
@@ -24,6 +24,9 @@ namespace Eigen {
|
||||
* API for the %Matrix class provides easy access to linear-algebra
|
||||
* operations.
|
||||
*
|
||||
* See documentation of class Matrix for detailed information on the template parameters
|
||||
* storage layout.
|
||||
*
|
||||
* This class can be extended with the help of the plugin mechanism described on the page
|
||||
* \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_ARRAY_PLUGIN.
|
||||
*
|
||||
@@ -74,7 +77,7 @@ class Array
|
||||
{
|
||||
return Base::operator=(other);
|
||||
}
|
||||
|
||||
|
||||
/** Set all the entries to \a value.
|
||||
* \sa DenseBase::setConstant(), DenseBase::fill()
|
||||
*/
|
||||
@@ -101,7 +104,7 @@ class Array
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Array& operator=(const ArrayBase<OtherDerived>& other)
|
||||
EIGEN_STRONG_INLINE Array& operator=(const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
return Base::_set(other);
|
||||
}
|
||||
@@ -145,6 +148,7 @@ class Array
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN_HAVE_RVALUE_REFERENCES
|
||||
EIGEN_DEVICE_FUNC
|
||||
Array(Array&& other)
|
||||
: Base(std::move(other))
|
||||
{
|
||||
@@ -152,6 +156,7 @@ class Array
|
||||
if (RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic)
|
||||
Base::_set_noalias(other);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
Array& operator=(Array&& other)
|
||||
{
|
||||
other.swap(*this);
|
||||
@@ -220,43 +225,18 @@ class Array
|
||||
m_storage.data()[3] = val3;
|
||||
}
|
||||
|
||||
/** Constructor copying the value of the expression \a other */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Array(const ArrayBase<OtherDerived>& other)
|
||||
: Base(other.rows() * other.cols(), other.rows(), other.cols())
|
||||
{
|
||||
Base::_check_template_params();
|
||||
Base::_set_noalias(other);
|
||||
}
|
||||
/** Copy constructor */
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Array(const Array& other)
|
||||
: Base(other.rows() * other.cols(), other.rows(), other.cols())
|
||||
{
|
||||
Base::_check_template_params();
|
||||
Base::_set_noalias(other);
|
||||
}
|
||||
/** Copy constructor with in-place evaluation */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Array(const ReturnByValue<OtherDerived>& other)
|
||||
{
|
||||
Base::_check_template_params();
|
||||
Base::resize(other.rows(), other.cols());
|
||||
other.evalTo(*this);
|
||||
}
|
||||
: Base(other)
|
||||
{ }
|
||||
|
||||
/** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Array(const EigenBase<OtherDerived> &other)
|
||||
: Base(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols())
|
||||
{
|
||||
Base::_check_template_params();
|
||||
Base::_resize_to_match(other);
|
||||
*this = other;
|
||||
}
|
||||
: Base(other.derived())
|
||||
{ }
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; }
|
||||
EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); }
|
||||
|
||||
@@ -83,22 +83,10 @@ template<typename Derived> class ArrayBase
|
||||
#endif // not EIGEN_PARSED_BY_DOXYGEN
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
/** \internal the plain matrix type corresponding to this expression. Note that is not necessarily
|
||||
* exactly the return type of eval(): in the case of plain matrices, the return type of eval() is a const
|
||||
* reference to a matrix, not a matrix! It is however guaranteed that the return type of eval() is either
|
||||
* PlainObject or const PlainObject&.
|
||||
*/
|
||||
typedef Array<typename internal::traits<Derived>::Scalar,
|
||||
internal::traits<Derived>::RowsAtCompileTime,
|
||||
internal::traits<Derived>::ColsAtCompileTime,
|
||||
AutoAlign | (internal::traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
|
||||
internal::traits<Derived>::MaxRowsAtCompileTime,
|
||||
internal::traits<Derived>::MaxColsAtCompileTime
|
||||
> PlainObject;
|
||||
|
||||
typedef typename Base::PlainObject PlainObject;
|
||||
|
||||
/** \internal Represents a matrix with all coefficients equal to one another*/
|
||||
typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,Derived> ConstantReturnType;
|
||||
typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,PlainObject> ConstantReturnType;
|
||||
#endif // not EIGEN_PARSED_BY_DOXYGEN
|
||||
|
||||
#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::ArrayBase
|
||||
|
||||
@@ -52,7 +52,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
|
||||
const Scalar
|
||||
>::type ScalarWithConstIfNotLvalue;
|
||||
|
||||
typedef typename internal::nested<ExpressionType>::type NestedExpressionType;
|
||||
typedef typename internal::ref_selector<ExpressionType>::type NestedExpressionType;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {}
|
||||
@@ -149,7 +149,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
|
||||
/** Forwards the resizing request to the nested expression
|
||||
* \sa DenseBase::resize(Index,Index)*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
void resize(Index nbRows, Index nbCols) { m_expression.const_cast_derived().resize(nbRows,nbCols); }
|
||||
void resize(Index rows, Index cols) { m_expression.const_cast_derived().resize(rows,cols); }
|
||||
|
||||
protected:
|
||||
NestedExpressionType m_expression;
|
||||
@@ -195,10 +195,10 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
|
||||
const Scalar
|
||||
>::type ScalarWithConstIfNotLvalue;
|
||||
|
||||
typedef typename internal::nested<ExpressionType>::type NestedExpressionType;
|
||||
typedef typename internal::ref_selector<ExpressionType>::type NestedExpressionType;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit inline MatrixWrapper(ExpressionType& a_matrix) : m_expression(a_matrix) {}
|
||||
explicit inline MatrixWrapper(ExpressionType& matrix) : m_expression(matrix) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index rows() const { return m_expression.rows(); }
|
||||
@@ -288,7 +288,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
|
||||
/** Forwards the resizing request to the nested expression
|
||||
* \sa DenseBase::resize(Index,Index)*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
void resize(Index nbRows, Index nbCols) { m_expression.const_cast_derived().resize(nbRows,nbCols); }
|
||||
void resize(Index rows, Index cols) { m_expression.const_cast_derived().resize(rows,cols); }
|
||||
|
||||
protected:
|
||||
NestedExpressionType m_expression;
|
||||
|
||||
@@ -28,18 +28,22 @@ template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc>
|
||||
struct copy_using_evaluator_traits
|
||||
{
|
||||
typedef typename DstEvaluator::XprType Dst;
|
||||
typedef typename Dst::Scalar DstScalar;
|
||||
// TODO distinguish between linear traversal and inner-traversals
|
||||
typedef typename find_best_packet<DstScalar,Dst::SizeAtCompileTime>::type PacketType;
|
||||
|
||||
enum {
|
||||
DstFlags = DstEvaluator::Flags,
|
||||
SrcFlags = SrcEvaluator::Flags
|
||||
SrcFlags = SrcEvaluator::Flags,
|
||||
RequiredAlignment = unpacket_traits<PacketType>::alignment
|
||||
};
|
||||
|
||||
public:
|
||||
enum {
|
||||
DstIsAligned = DstFlags & AlignedBit,
|
||||
DstAlignment = DstEvaluator::Alignment,
|
||||
SrcAlignment = SrcEvaluator::Alignment,
|
||||
DstHasDirectAccess = DstFlags & DirectAccessBit,
|
||||
SrcIsAligned = SrcFlags & AlignedBit,
|
||||
JointAlignment = bool(DstIsAligned) && bool(SrcIsAligned) ? Aligned : Unaligned
|
||||
JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment)
|
||||
};
|
||||
|
||||
private:
|
||||
@@ -51,7 +55,7 @@ private:
|
||||
: int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
|
||||
: int(Dst::MaxRowsAtCompileTime),
|
||||
MaxSizeAtCompileTime = Dst::SizeAtCompileTime,
|
||||
PacketSize = packet_traits<typename Dst::Scalar>::size
|
||||
PacketSize = unpacket_traits<PacketType>::size
|
||||
};
|
||||
|
||||
enum {
|
||||
@@ -62,10 +66,10 @@ private:
|
||||
&& (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit)
|
||||
&& (functor_traits<AssignFunc>::PacketAccess),
|
||||
MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0
|
||||
&& int(DstIsAligned) && int(SrcIsAligned),
|
||||
&& int(JointAlignment)>=int(RequiredAlignment),
|
||||
MayLinearize = StorageOrdersAgree && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
|
||||
MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess
|
||||
&& (DstIsAligned || MaxSizeAtCompileTime == Dynamic),
|
||||
&& ((int(DstAlignment)>=int(RequiredAlignment)) || MaxSizeAtCompileTime == Dynamic),
|
||||
/* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
|
||||
so it's only good for large enough sizes. */
|
||||
MaySliceVectorize = MightVectorize && DstHasDirectAccess
|
||||
@@ -107,8 +111,8 @@ public:
|
||||
: int(NoUnrolling)
|
||||
)
|
||||
: int(Traversal) == int(LinearVectorizedTraversal)
|
||||
? ( bool(MayUnrollCompletely) && bool(DstIsAligned) ? int(CompleteUnrolling)
|
||||
: int(NoUnrolling) )
|
||||
? ( bool(MayUnrollCompletely) && (int(DstAlignment)>=int(RequiredAlignment)) ? int(CompleteUnrolling)
|
||||
: int(NoUnrolling) )
|
||||
: int(Traversal) == int(LinearTraversal)
|
||||
? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling)
|
||||
: int(NoUnrolling) )
|
||||
@@ -124,8 +128,9 @@ public:
|
||||
EIGEN_DEBUG_VAR(DstFlags)
|
||||
EIGEN_DEBUG_VAR(SrcFlags)
|
||||
std::cerr.unsetf(std::ios::hex);
|
||||
EIGEN_DEBUG_VAR(DstIsAligned)
|
||||
EIGEN_DEBUG_VAR(SrcIsAligned)
|
||||
EIGEN_DEBUG_VAR(DstAlignment)
|
||||
EIGEN_DEBUG_VAR(SrcAlignment)
|
||||
EIGEN_DEBUG_VAR(RequiredAlignment)
|
||||
EIGEN_DEBUG_VAR(JointAlignment)
|
||||
EIGEN_DEBUG_VAR(InnerSize)
|
||||
EIGEN_DEBUG_VAR(InnerMaxSize)
|
||||
@@ -225,6 +230,7 @@ struct copy_using_evaluator_innervec_CompleteUnrolling
|
||||
// FIXME: this is not very clean, perhaps this information should be provided by the kernel?
|
||||
typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
|
||||
typedef typename DstEvaluatorType::XprType DstXprType;
|
||||
typedef typename Kernel::PacketType PacketType;
|
||||
|
||||
enum {
|
||||
outer = Index / DstXprType::InnerSizeAtCompileTime,
|
||||
@@ -234,8 +240,8 @@ struct copy_using_evaluator_innervec_CompleteUnrolling
|
||||
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
{
|
||||
kernel.template assignPacketByOuterInner<Aligned, JointAlignment>(outer, inner);
|
||||
enum { NextIndex = Index + packet_traits<typename DstXprType::Scalar>::size };
|
||||
kernel.template assignPacketByOuterInner<Aligned, JointAlignment, PacketType>(outer, inner);
|
||||
enum { NextIndex = Index + unpacket_traits<PacketType>::size };
|
||||
copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
|
||||
}
|
||||
};
|
||||
@@ -249,10 +255,11 @@ struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop>
|
||||
template<typename Kernel, int Index_, int Stop>
|
||||
struct copy_using_evaluator_innervec_InnerUnrolling
|
||||
{
|
||||
typedef typename Kernel::PacketType PacketType;
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
|
||||
{
|
||||
kernel.template assignPacketByOuterInner<Aligned, Aligned>(outer, Index_);
|
||||
enum { NextIndex = Index_ + packet_traits<typename Kernel::Scalar>::size };
|
||||
kernel.template assignPacketByOuterInner<Aligned, Aligned, PacketType>(outer, Index_);
|
||||
enum { NextIndex = Index_ + unpacket_traits<PacketType>::size };
|
||||
copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop>::run(kernel, outer);
|
||||
}
|
||||
};
|
||||
@@ -360,20 +367,23 @@ struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling>
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
{
|
||||
const Index size = kernel.size();
|
||||
typedef packet_traits<typename Kernel::Scalar> PacketTraits;
|
||||
typedef typename Kernel::Scalar Scalar;
|
||||
typedef typename Kernel::PacketType PacketType;
|
||||
enum {
|
||||
packetSize = PacketTraits::size,
|
||||
dstIsAligned = int(Kernel::AssignmentTraits::DstIsAligned),
|
||||
dstAlignment = PacketTraits::AlignedOnScalar ? Aligned : dstIsAligned,
|
||||
requestedAlignment = Kernel::AssignmentTraits::RequiredAlignment,
|
||||
packetSize = unpacket_traits<PacketType>::size,
|
||||
dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
|
||||
dstAlignment = packet_traits<Scalar>::AlignedOnScalar ? int(requestedAlignment)
|
||||
: int(Kernel::AssignmentTraits::DstAlignment),
|
||||
srcAlignment = Kernel::AssignmentTraits::JointAlignment
|
||||
};
|
||||
const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned(&kernel.dstEvaluator().coeffRef(0), size);
|
||||
const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned<requestedAlignment>(&kernel.dstEvaluator().coeffRef(0), size);
|
||||
const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
|
||||
|
||||
unaligned_dense_assignment_loop<dstIsAligned!=0>::run(kernel, 0, alignedStart);
|
||||
|
||||
for(Index index = alignedStart; index < alignedEnd; index += packetSize)
|
||||
kernel.template assignPacket<dstAlignment, srcAlignment>(index);
|
||||
kernel.template assignPacket<dstAlignment, srcAlignment, PacketType>(index);
|
||||
|
||||
unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size);
|
||||
}
|
||||
@@ -403,14 +413,15 @@ struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrollin
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
|
||||
{
|
||||
typedef typename Kernel::PacketType PacketType;
|
||||
EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel)
|
||||
{
|
||||
const Index innerSize = kernel.innerSize();
|
||||
const Index outerSize = kernel.outerSize();
|
||||
const Index packetSize = packet_traits<typename Kernel::Scalar>::size;
|
||||
const Index packetSize = unpacket_traits<PacketType>::size;
|
||||
for(Index outer = 0; outer < outerSize; ++outer)
|
||||
for(Index inner = 0; inner < innerSize; inner+=packetSize)
|
||||
kernel.template assignPacketByOuterInner<Aligned, Aligned>(outer, inner);
|
||||
kernel.template assignPacketByOuterInner<Aligned, Aligned, PacketType>(outer, inner);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -471,18 +482,27 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel)
|
||||
{
|
||||
typedef packet_traits<typename Kernel::Scalar> PacketTraits;
|
||||
typedef typename Kernel::Scalar Scalar;
|
||||
typedef typename Kernel::PacketType PacketType;
|
||||
enum {
|
||||
packetSize = PacketTraits::size,
|
||||
alignable = PacketTraits::AlignedOnScalar,
|
||||
dstAlignment = alignable ? Aligned : int(Kernel::AssignmentTraits::DstIsAligned)
|
||||
packetSize = unpacket_traits<PacketType>::size,
|
||||
requestedAlignment = int(Kernel::AssignmentTraits::RequiredAlignment),
|
||||
alignable = packet_traits<Scalar>::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment)>=sizeof(Scalar),
|
||||
dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
|
||||
dstAlignment = alignable ? int(requestedAlignment)
|
||||
: int(Kernel::AssignmentTraits::DstAlignment)
|
||||
};
|
||||
const Scalar *dst_ptr = &kernel.dstEvaluator().coeffRef(0,0);
|
||||
if((!bool(dstIsAligned)) && (size_t(dst_ptr) % sizeof(Scalar))>0)
|
||||
{
|
||||
// the pointer is not aligend-on scalar, so alignment is not possible
|
||||
return dense_assignment_loop<Kernel,DefaultTraversal,NoUnrolling>::run(kernel);
|
||||
}
|
||||
const Index packetAlignedMask = packetSize - 1;
|
||||
const Index innerSize = kernel.innerSize();
|
||||
const Index outerSize = kernel.outerSize();
|
||||
const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0;
|
||||
Index alignedStart = ((!alignable) || Kernel::AssignmentTraits::DstIsAligned) ? 0
|
||||
: internal::first_aligned(&kernel.dstEvaluator().coeffRef(0,0), innerSize);
|
||||
Index alignedStart = ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned<requestedAlignment>(dst_ptr, innerSize);
|
||||
|
||||
for(Index outer = 0; outer < outerSize; ++outer)
|
||||
{
|
||||
@@ -493,7 +513,7 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
|
||||
|
||||
// do the vectorizable part of the assignment
|
||||
for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize)
|
||||
kernel.template assignPacketByOuterInner<dstAlignment, Unaligned>(outer, inner);
|
||||
kernel.template assignPacketByOuterInner<dstAlignment, Unaligned, PacketType>(outer, inner);
|
||||
|
||||
// do the non-vectorizable part of the assignment
|
||||
for(Index inner = alignedEnd; inner<innerSize ; ++inner)
|
||||
@@ -527,6 +547,7 @@ public:
|
||||
typedef typename DstEvaluatorType::Scalar Scalar;
|
||||
typedef typename DstEvaluatorType::StorageIndex StorageIndex;
|
||||
typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor> AssignmentTraits;
|
||||
typedef typename AssignmentTraits::PacketType PacketType;
|
||||
|
||||
|
||||
EIGEN_DEVICE_FUNC generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
|
||||
@@ -571,24 +592,24 @@ public:
|
||||
}
|
||||
|
||||
|
||||
template<int StoreMode, int LoadMode>
|
||||
template<int StoreMode, int LoadMode, typename PacketType>
|
||||
EIGEN_DEVICE_FUNC void assignPacket(Index row, Index col)
|
||||
{
|
||||
m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row,col), m_src.template packet<LoadMode>(row,col));
|
||||
m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row,col), m_src.template packet<LoadMode,PacketType>(row,col));
|
||||
}
|
||||
|
||||
template<int StoreMode, int LoadMode>
|
||||
template<int StoreMode, int LoadMode, typename PacketType>
|
||||
EIGEN_DEVICE_FUNC void assignPacket(Index index)
|
||||
{
|
||||
m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode>(index));
|
||||
m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode,PacketType>(index));
|
||||
}
|
||||
|
||||
template<int StoreMode, int LoadMode>
|
||||
template<int StoreMode, int LoadMode, typename PacketType>
|
||||
EIGEN_DEVICE_FUNC void assignPacketByOuterInner(Index outer, Index inner)
|
||||
{
|
||||
Index row = rowIndexByOuterInner(outer, inner);
|
||||
Index col = colIndexByOuterInner(outer, inner);
|
||||
assignPacket<StoreMode,LoadMode>(row, col);
|
||||
assignPacket<StoreMode,LoadMode,PacketType>(row, col);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC static Index rowIndexByOuterInner(Index outer, Index inner)
|
||||
@@ -626,8 +647,8 @@ EIGEN_DEVICE_FUNC void call_dense_assignment_loop(const DstXprType& dst, const S
|
||||
{
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||
|
||||
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
|
||||
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
|
||||
typedef evaluator<DstXprType> DstEvaluatorType;
|
||||
typedef evaluator<SrcXprType> SrcEvaluatorType;
|
||||
|
||||
DstEvaluatorType dstEvaluator(dst);
|
||||
SrcEvaluatorType srcEvaluator(src);
|
||||
@@ -749,6 +770,26 @@ EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src)
|
||||
call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar>());
|
||||
}
|
||||
|
||||
template<typename Dst, typename Src, typename Func>
|
||||
EIGEN_DEVICE_FUNC void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func)
|
||||
{
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
|
||||
// TODO check whether this is the right place to perform these checks:
|
||||
EIGEN_STATIC_ASSERT_LVALUE(Dst)
|
||||
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst,Src)
|
||||
|
||||
Assignment<Dst,Src,Func>::run(dst, src, func);
|
||||
}
|
||||
template<typename Dst, typename Src>
|
||||
EIGEN_DEVICE_FUNC void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src)
|
||||
{
|
||||
call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar>());
|
||||
}
|
||||
|
||||
// forward declaration
|
||||
template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, const Src &src);
|
||||
|
||||
@@ -776,7 +817,6 @@ struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Scalar>
|
||||
EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/)
|
||||
{
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||
|
||||
src.evalTo(dst);
|
||||
}
|
||||
};
|
||||
|
||||
254
Eigen/src/Core/Assign_MKL.h
Normal file → Executable file
254
Eigen/src/Core/Assign_MKL.h
Normal file → Executable file
@@ -1,6 +1,7 @@
|
||||
/*
|
||||
Copyright (c) 2011, Intel Corporation. All rights reserved.
|
||||
|
||||
Copyright (C) 2015 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
@@ -37,17 +38,13 @@ namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename Op> struct vml_call
|
||||
{ enum { IsSupported = 0 }; };
|
||||
|
||||
template<typename Dst, typename Src, typename UnaryOp>
|
||||
template<typename Dst, typename Src>
|
||||
class vml_assign_traits
|
||||
{
|
||||
private:
|
||||
enum {
|
||||
DstHasDirectAccess = Dst::Flags & DirectAccessBit,
|
||||
SrcHasDirectAccess = Src::Flags & DirectAccessBit,
|
||||
|
||||
StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)),
|
||||
InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
|
||||
: int(Dst::Flags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
|
||||
@@ -57,165 +54,118 @@ class vml_assign_traits
|
||||
: int(Dst::MaxRowsAtCompileTime),
|
||||
MaxSizeAtCompileTime = Dst::SizeAtCompileTime,
|
||||
|
||||
MightEnableVml = vml_call<UnaryOp>::IsSupported && StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess
|
||||
&& Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1,
|
||||
MightEnableVml = StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess && Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1,
|
||||
MightLinearize = MightEnableVml && (int(Dst::Flags) & int(Src::Flags) & LinearAccessBit),
|
||||
VmlSize = MightLinearize ? MaxSizeAtCompileTime : InnerMaxSize,
|
||||
LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD,
|
||||
MayEnableVml = MightEnableVml && LargeEnough,
|
||||
MayLinearize = MayEnableVml && MightLinearize
|
||||
LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD
|
||||
};
|
||||
public:
|
||||
enum {
|
||||
Traversal = MayLinearize ? LinearVectorizedTraversal
|
||||
: MayEnableVml ? InnerVectorizedTraversal
|
||||
: DefaultTraversal
|
||||
EnableVml = MightEnableVml && LargeEnough,
|
||||
Traversal = MightLinearize ? LinearTraversal : DefaultTraversal
|
||||
};
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling,
|
||||
int VmlTraversal = vml_assign_traits<Derived1, Derived2, UnaryOp>::Traversal >
|
||||
struct vml_assign_impl
|
||||
: assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>
|
||||
{
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling>
|
||||
struct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, InnerVectorizedTraversal>
|
||||
{
|
||||
typedef typename Derived1::Scalar Scalar;
|
||||
static inline void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src)
|
||||
{
|
||||
// in case we want to (or have to) skip VML at runtime we can call:
|
||||
// assign_impl<Derived1,Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>::run(dst,src);
|
||||
const Index innerSize = dst.innerSize();
|
||||
const Index outerSize = dst.outerSize();
|
||||
for(Index outer = 0; outer < outerSize; ++outer) {
|
||||
const Scalar *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) :
|
||||
&(src.nestedExpression().coeffRef(0, outer));
|
||||
Scalar *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer));
|
||||
vml_call<UnaryOp>::run(src.functor(), innerSize, src_ptr, dst_ptr );
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling>
|
||||
struct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, LinearVectorizedTraversal>
|
||||
{
|
||||
static inline void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src)
|
||||
{
|
||||
// in case we want to (or have to) skip VML at runtime we can call:
|
||||
// assign_impl<Derived1,Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>::run(dst,src);
|
||||
vml_call<UnaryOp>::run(src.functor(), dst.size(), src.nestedExpression().data(), dst.data() );
|
||||
}
|
||||
};
|
||||
|
||||
// Macroses
|
||||
|
||||
#define EIGEN_MKL_VML_SPECIALIZE_ASSIGN(TRAVERSAL,UNROLLING) \
|
||||
template<typename Derived1, typename Derived2, typename UnaryOp> \
|
||||
struct assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>, TRAVERSAL, UNROLLING, Specialized> { \
|
||||
static inline void run(Derived1 &dst, const Eigen::CwiseUnaryOp<UnaryOp, Derived2> &src) { \
|
||||
vml_assign_impl<Derived1,Derived2,UnaryOp,TRAVERSAL,UNROLLING>::run(dst, src); \
|
||||
} \
|
||||
};
|
||||
|
||||
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,NoUnrolling)
|
||||
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,CompleteUnrolling)
|
||||
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,InnerUnrolling)
|
||||
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,NoUnrolling)
|
||||
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,CompleteUnrolling)
|
||||
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,NoUnrolling)
|
||||
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,CompleteUnrolling)
|
||||
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,InnerUnrolling)
|
||||
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,CompleteUnrolling)
|
||||
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,NoUnrolling)
|
||||
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(SliceVectorizedTraversal,NoUnrolling)
|
||||
|
||||
|
||||
#define EIGEN_PP_EXPAND(ARG) ARG
|
||||
#if !defined (EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1)
|
||||
#define EIGEN_MKL_VML_MODE VML_HA
|
||||
#define EIGEN_VMLMODE_EXPAND_LA , VML_HA
|
||||
#else
|
||||
#define EIGEN_MKL_VML_MODE VML_LA
|
||||
#define EIGEN_VMLMODE_EXPAND_LA , VML_LA
|
||||
#endif
|
||||
|
||||
#define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \
|
||||
template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \
|
||||
enum { IsSupported = 1 }; \
|
||||
static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& /*func*/, \
|
||||
int size, const EIGENTYPE* src, EIGENTYPE* dst) { \
|
||||
VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst); \
|
||||
} \
|
||||
#define EIGEN_VMLMODE_EXPAND__
|
||||
|
||||
#define EIGEN_VMLMODE_PREFIX_LA vm
|
||||
#define EIGEN_VMLMODE_PREFIX__ v
|
||||
#define EIGEN_VMLMODE_PREFIX(VMLMODE) EIGEN_CAT(EIGEN_VMLMODE_PREFIX_,VMLMODE)
|
||||
|
||||
#define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \
|
||||
template< typename DstXprType, typename SrcXprNested> \
|
||||
struct Assignment<DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested>, assign_op<EIGENTYPE>, \
|
||||
Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml,EIGENTYPE>::type> { \
|
||||
typedef CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested> SrcXprType; \
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE> &/*func*/) { \
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \
|
||||
if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) { \
|
||||
VMLOP(dst.size(), (const VMLTYPE*)src.nestedExpression().data(), \
|
||||
(VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE) ); \
|
||||
} else { \
|
||||
const Index outerSize = dst.outerSize(); \
|
||||
for(Index outer = 0; outer < outerSize; ++outer) { \
|
||||
const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) : \
|
||||
&(src.nestedExpression().coeffRef(0, outer)); \
|
||||
EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); \
|
||||
VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr, \
|
||||
(VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE)); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
}; \
|
||||
|
||||
|
||||
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),s##VMLOP), float, float, VMLMODE) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),d##VMLOP), double, double, VMLMODE)
|
||||
|
||||
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),c##VMLOP), scomplex, MKL_Complex8, VMLMODE) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),z##VMLOP), dcomplex, MKL_Complex16, VMLMODE)
|
||||
|
||||
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP, VMLMODE) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE)
|
||||
|
||||
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sin, Sin, LA)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(asin, Asin, LA)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sinh, Sinh, LA)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(cos, Cos, LA)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(acos, Acos, LA)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(cosh, Cosh, LA)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(tan, Tan, LA)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(atan, Atan, LA)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(tanh, Tanh, LA)
|
||||
// EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs, Abs, _)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(exp, Exp, LA)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(log, Ln, LA)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(log10, Log10, LA)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sqrt, Sqrt, _)
|
||||
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr, _)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(arg, Arg, _)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(round, Round, _)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(floor, Floor, _)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(ceil, Ceil, _)
|
||||
|
||||
#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \
|
||||
template< typename DstXprType, typename SrcXprNested> \
|
||||
struct Assignment<DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested>, assign_op<EIGENTYPE>, \
|
||||
Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml,EIGENTYPE>::type> { \
|
||||
typedef CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested> SrcXprType; \
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE> &/*func*/) { \
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \
|
||||
VMLTYPE exponent = reinterpret_cast<const VMLTYPE&>(src.functor().m_exponent); \
|
||||
if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) \
|
||||
{ \
|
||||
VMLOP( dst.size(), (const VMLTYPE*)src.nestedExpression().data(), exponent, \
|
||||
(VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE) ); \
|
||||
} else { \
|
||||
const Index outerSize = dst.outerSize(); \
|
||||
for(Index outer = 0; outer < outerSize; ++outer) { \
|
||||
const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) : \
|
||||
&(src.nestedExpression().coeffRef(0, outer)); \
|
||||
EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); \
|
||||
VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr, exponent, \
|
||||
(VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE)); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
};
|
||||
|
||||
#define EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \
|
||||
template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \
|
||||
enum { IsSupported = 1 }; \
|
||||
static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& /*func*/, \
|
||||
int size, const EIGENTYPE* src, EIGENTYPE* dst) { \
|
||||
MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \
|
||||
VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst, vmlMode); \
|
||||
} \
|
||||
};
|
||||
|
||||
#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \
|
||||
template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \
|
||||
enum { IsSupported = 1 }; \
|
||||
static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& func, \
|
||||
int size, const EIGENTYPE* src, EIGENTYPE* dst) { \
|
||||
EIGENTYPE exponent = func.m_exponent; \
|
||||
MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \
|
||||
VMLOP(&size, (const VMLTYPE*)src, (const VMLTYPE*)&exponent, \
|
||||
(VMLTYPE*)dst, &vmlMode); \
|
||||
} \
|
||||
};
|
||||
|
||||
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vs##VMLOP, float, float) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vd##VMLOP, double, double)
|
||||
|
||||
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vc##VMLOP, scomplex, MKL_Complex8) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vz##VMLOP, dcomplex, MKL_Complex16)
|
||||
|
||||
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP)
|
||||
|
||||
|
||||
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vms##VMLOP, float, float) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmd##VMLOP, double, double)
|
||||
|
||||
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmc##VMLOP, scomplex, MKL_Complex8) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmz##VMLOP, dcomplex, MKL_Complex16)
|
||||
|
||||
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(EIGENOP, VMLOP) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP)
|
||||
|
||||
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sin, Sin)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(asin, Asin)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(cos, Cos)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(acos, Acos)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(tan, Tan)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(atan, Atan)
|
||||
//EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs, Abs)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(exp, Exp)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(log, Ln)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sqrt, Sqrt)
|
||||
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr)
|
||||
|
||||
// The vm*powx functions are not avaibale in the windows version of MKL.
|
||||
#ifndef _WIN32
|
||||
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmspowx_, float, float)
|
||||
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdpowx_, double, double)
|
||||
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcpowx_, scomplex, MKL_Complex8)
|
||||
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzpowx_, dcomplex, MKL_Complex16)
|
||||
#endif
|
||||
|
||||
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmsPowx, float, float, LA)
|
||||
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdPowx, double, double, LA)
|
||||
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcPowx, scomplex, MKL_Complex8, LA)
|
||||
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzPowx, dcomplex, MKL_Complex16, LA)
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
|
||||
@@ -55,7 +55,7 @@ struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprTyp
|
||||
typedef typename traits<XprType>::Scalar Scalar;
|
||||
typedef typename traits<XprType>::StorageKind StorageKind;
|
||||
typedef typename traits<XprType>::XprKind XprKind;
|
||||
typedef typename nested<XprType>::type XprTypeNested;
|
||||
typedef typename ref_selector<XprType>::type XprTypeNested;
|
||||
typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
|
||||
enum{
|
||||
MatrixRows = traits<XprType>::RowsAtCompileTime,
|
||||
@@ -81,14 +81,16 @@ struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprTyp
|
||||
OuterStrideAtCompileTime = HasSameStorageOrderAsXprType
|
||||
? int(outer_stride_at_compile_time<XprType>::ret)
|
||||
: int(inner_stride_at_compile_time<XprType>::ret),
|
||||
// IsAligned is needed by MapBase's assertions
|
||||
// We can sefely set it to false here. Internal alignment errors will be detected by an eigen_internal_assert in the respective evaluator
|
||||
IsAligned = 0,
|
||||
|
||||
// FIXME, this traits is rather specialized for dense object and it needs to be cleaned further
|
||||
FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0,
|
||||
FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0,
|
||||
Flags = (traits<XprType>::Flags & (DirectAccessBit | (InnerPanel?CompressedAccessBit:0))) | FlagsLvalueBit | FlagsRowMajorBit
|
||||
Flags = (traits<XprType>::Flags & (DirectAccessBit | (InnerPanel?CompressedAccessBit:0))) | FlagsLvalueBit | FlagsRowMajorBit,
|
||||
// FIXME DirectAccessBit should not be handled by expressions
|
||||
//
|
||||
// Alignment is needed by MapBase's assertions
|
||||
// We can sefely set it to false here. Internal alignment errors will be detected by an eigen_internal_assert in the respective evaluator
|
||||
Alignment = 0
|
||||
};
|
||||
};
|
||||
|
||||
@@ -124,26 +126,26 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class
|
||||
/** Fixed-size constructor
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Block(XprType& xpr, Index a_startRow, Index a_startCol)
|
||||
: Impl(xpr, a_startRow, a_startCol)
|
||||
inline Block(XprType& xpr, Index startRow, Index startCol)
|
||||
: Impl(xpr, startRow, startCol)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic,THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE)
|
||||
eigen_assert(a_startRow >= 0 && BlockRows >= 1 && a_startRow + BlockRows <= xpr.rows()
|
||||
&& a_startCol >= 0 && BlockCols >= 1 && a_startCol + BlockCols <= xpr.cols());
|
||||
eigen_assert(startRow >= 0 && BlockRows >= 1 && startRow + BlockRows <= xpr.rows()
|
||||
&& startCol >= 0 && BlockCols >= 1 && startCol + BlockCols <= xpr.cols());
|
||||
}
|
||||
|
||||
/** Dynamic-size constructor
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Block(XprType& xpr,
|
||||
Index a_startRow, Index a_startCol,
|
||||
Index startRow, Index startCol,
|
||||
Index blockRows, Index blockCols)
|
||||
: Impl(xpr, a_startRow, a_startCol, blockRows, blockCols)
|
||||
: Impl(xpr, startRow, startCol, blockRows, blockCols)
|
||||
{
|
||||
eigen_assert((RowsAtCompileTime==Dynamic || RowsAtCompileTime==blockRows)
|
||||
&& (ColsAtCompileTime==Dynamic || ColsAtCompileTime==blockCols));
|
||||
eigen_assert(a_startRow >= 0 && blockRows >= 0 && a_startRow <= xpr.rows() - blockRows
|
||||
&& a_startCol >= 0 && blockCols >= 0 && a_startCol <= xpr.cols() - blockCols);
|
||||
eigen_assert(startRow >= 0 && blockRows >= 0 && startRow <= xpr.rows() - blockRows
|
||||
&& startCol >= 0 && blockCols >= 0 && startCol <= xpr.cols() - blockCols);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -159,10 +161,10 @@ class BlockImpl<XprType, BlockRows, BlockCols, InnerPanel, Dense>
|
||||
typedef Impl Base;
|
||||
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl)
|
||||
EIGEN_DEVICE_FUNC inline BlockImpl(XprType& xpr, Index i) : Impl(xpr,i) {}
|
||||
EIGEN_DEVICE_FUNC inline BlockImpl(XprType& xpr, Index a_startRow, Index a_startCol) : Impl(xpr, a_startRow, a_startCol) {}
|
||||
EIGEN_DEVICE_FUNC inline BlockImpl(XprType& xpr, Index startRow, Index startCol) : Impl(xpr, startRow, startCol) {}
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline BlockImpl(XprType& xpr, Index a_startRow, Index a_startCol, Index blockRows, Index blockCols)
|
||||
: Impl(xpr, a_startRow, a_startCol, blockRows, blockCols) {}
|
||||
inline BlockImpl(XprType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols)
|
||||
: Impl(xpr, startRow, startCol, blockRows, blockCols) {}
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
@@ -198,8 +200,8 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
|
||||
/** Fixed-size constructor
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline BlockImpl_dense(XprType& xpr, Index a_startRow, Index a_startCol)
|
||||
: m_xpr(xpr), m_startRow(a_startRow), m_startCol(a_startCol),
|
||||
inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol)
|
||||
: m_xpr(xpr), m_startRow(startRow), m_startCol(startCol),
|
||||
m_blockRows(BlockRows), m_blockCols(BlockCols)
|
||||
{}
|
||||
|
||||
@@ -207,9 +209,9 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline BlockImpl_dense(XprType& xpr,
|
||||
Index a_startRow, Index a_startCol,
|
||||
Index startRow, Index startCol,
|
||||
Index blockRows, Index blockCols)
|
||||
: m_xpr(xpr), m_startRow(a_startRow), m_startCol(a_startCol),
|
||||
: m_xpr(xpr), m_startRow(startRow), m_startCol(startCol),
|
||||
m_blockRows(blockRows), m_blockCols(blockCols)
|
||||
{}
|
||||
|
||||
|
||||
@@ -80,7 +80,7 @@ struct any_unroller<Derived, Dynamic>
|
||||
template<typename Derived>
|
||||
inline bool DenseBase<Derived>::all() const
|
||||
{
|
||||
typedef typename internal::evaluator<Derived>::type Evaluator;
|
||||
typedef internal::evaluator<Derived> Evaluator;
|
||||
enum {
|
||||
unroll = SizeAtCompileTime != Dynamic
|
||||
&& Evaluator::CoeffReadCost != Dynamic
|
||||
@@ -106,7 +106,7 @@ inline bool DenseBase<Derived>::all() const
|
||||
template<typename Derived>
|
||||
inline bool DenseBase<Derived>::any() const
|
||||
{
|
||||
typedef typename internal::evaluator<Derived>::type Evaluator;
|
||||
typedef internal::evaluator<Derived> Evaluator;
|
||||
enum {
|
||||
unroll = SizeAtCompileTime != Dynamic
|
||||
&& Evaluator::CoeffReadCost != Dynamic
|
||||
|
||||
@@ -105,6 +105,9 @@ struct CommaInitializer
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline ~CommaInitializer()
|
||||
#if defined VERIFY_RAISES_ASSERT && (!defined EIGEN_NO_ASSERTION_CHECKING) && defined EIGEN_EXCEPTIONS
|
||||
throw(Eigen::eigen_assert_exception)
|
||||
#endif
|
||||
{
|
||||
eigen_assert((m_row+m_currentBlockRows) == m_xpr.rows()
|
||||
&& m_col == m_xpr.cols()
|
||||
|
||||
@@ -28,13 +28,9 @@ struct storage_kind_to_evaluator_kind {
|
||||
// It can be Dense, Sparse, Triangular, Diagonal, SelfAdjoint, Band, etc.
|
||||
template<typename StorageKind> struct storage_kind_to_shape;
|
||||
|
||||
|
||||
template<> struct storage_kind_to_shape<Dense> { typedef DenseShape Shape; };
|
||||
|
||||
|
||||
// FIXME Is this necessary? And why was it not before refactoring???
|
||||
template<> struct storage_kind_to_shape<PermutationStorage> { typedef PermutationShape Shape; };
|
||||
|
||||
template<> struct storage_kind_to_shape<Dense> { typedef DenseShape Shape; };
|
||||
template<> struct storage_kind_to_shape<PermutationStorage> { typedef PermutationShape Shape; };
|
||||
template<> struct storage_kind_to_shape<TranspositionsStorage> { typedef TranspositionsShape Shape; };
|
||||
|
||||
// Evaluators have to be specialized with respect to various criteria such as:
|
||||
// - storage/structure/shape
|
||||
@@ -63,11 +59,6 @@ template< typename T,
|
||||
template<typename T>
|
||||
struct evaluator_traits_base
|
||||
{
|
||||
// TODO check whether these two indirections are really needed.
|
||||
// Basically, if nobody overwrite type and nestedType, then, they can be dropped
|
||||
// typedef evaluator<T> type;
|
||||
// typedef evaluator<T> nestedType;
|
||||
|
||||
// by default, get evaluator kind and shape from storage
|
||||
typedef typename storage_kind_to_evaluator_kind<typename traits<T>::StorageKind>::Kind Kind;
|
||||
typedef typename storage_kind_to_shape<typename traits<T>::StorageKind>::Shape Shape;
|
||||
@@ -94,27 +85,28 @@ struct evaluator : public unary_evaluator<T>
|
||||
|
||||
|
||||
// TODO: Think about const-correctness
|
||||
|
||||
template<typename T>
|
||||
struct evaluator<const T>
|
||||
: evaluator<T>
|
||||
{ };
|
||||
|
||||
// ---------- base class for all writable evaluators ----------
|
||||
|
||||
// TODO this class does not seem to be necessary anymore
|
||||
template<typename ExpressionType>
|
||||
struct evaluator_base
|
||||
{
|
||||
// typedef typename evaluator_traits<ExpressionType>::type type;
|
||||
// typedef typename evaluator_traits<ExpressionType>::nestedType nestedType;
|
||||
typedef evaluator<ExpressionType> type;
|
||||
typedef evaluator<ExpressionType> nestedType;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit evaluator(const T& xpr) : evaluator<T>(xpr) {}
|
||||
};
|
||||
|
||||
// ---------- base class for all evaluators ----------
|
||||
|
||||
template<typename ExpressionType>
|
||||
struct evaluator_base : public noncopyable
|
||||
{
|
||||
// FIXME is it really usefull?
|
||||
typedef typename traits<ExpressionType>::StorageIndex StorageIndex;
|
||||
|
||||
// TODO that's not very nice to have to propagate all these traits. They are currently only needed to handle outer,inner indices.
|
||||
typedef traits<ExpressionType> ExpressionTraits;
|
||||
|
||||
enum {
|
||||
Alignment = 0
|
||||
};
|
||||
};
|
||||
|
||||
// -------------------- Matrix and Array --------------------
|
||||
@@ -131,8 +123,6 @@ struct evaluator<PlainObjectBase<Derived> >
|
||||
typedef PlainObjectBase<Derived> PlainObjectType;
|
||||
typedef typename PlainObjectType::Scalar Scalar;
|
||||
typedef typename PlainObjectType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename PlainObjectType::PacketScalar PacketScalar;
|
||||
typedef typename PlainObjectType::PacketReturnType PacketReturnType;
|
||||
|
||||
enum {
|
||||
IsRowMajor = PlainObjectType::IsRowMajor,
|
||||
@@ -141,8 +131,8 @@ struct evaluator<PlainObjectBase<Derived> >
|
||||
ColsAtCompileTime = PlainObjectType::ColsAtCompileTime,
|
||||
|
||||
CoeffReadCost = NumTraits<Scalar>::ReadCost,
|
||||
Flags = compute_matrix_evaluator_flags< Scalar,Derived::RowsAtCompileTime,Derived::ColsAtCompileTime,
|
||||
Derived::Options,Derived::MaxRowsAtCompileTime,Derived::MaxColsAtCompileTime>::ret
|
||||
Flags = traits<Derived>::EvaluatorFlags,
|
||||
Alignment = traits<Derived>::Alignment
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC evaluator()
|
||||
@@ -182,36 +172,36 @@ struct evaluator<PlainObjectBase<Derived> >
|
||||
return const_cast<Scalar*>(m_data)[index];
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
PacketReturnType packet(Index row, Index col) const
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packet(Index row, Index col) const
|
||||
{
|
||||
if (IsRowMajor)
|
||||
return ploadt<PacketScalar, LoadMode>(m_data + row * m_outerStride.value() + col);
|
||||
return ploadt<PacketType, LoadMode>(m_data + row * m_outerStride.value() + col);
|
||||
else
|
||||
return ploadt<PacketScalar, LoadMode>(m_data + row + col * m_outerStride.value());
|
||||
return ploadt<PacketType, LoadMode>(m_data + row + col * m_outerStride.value());
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
PacketReturnType packet(Index index) const
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packet(Index index) const
|
||||
{
|
||||
return ploadt<PacketScalar, LoadMode>(m_data + index);
|
||||
return ploadt<PacketType, LoadMode>(m_data + index);
|
||||
}
|
||||
|
||||
template<int StoreMode>
|
||||
void writePacket(Index row, Index col, const PacketScalar& x)
|
||||
template<int StoreMode,typename PacketType>
|
||||
void writePacket(Index row, Index col, const PacketType& x)
|
||||
{
|
||||
if (IsRowMajor)
|
||||
return pstoret<Scalar, PacketScalar, StoreMode>
|
||||
return pstoret<Scalar, PacketType, StoreMode>
|
||||
(const_cast<Scalar*>(m_data) + row * m_outerStride.value() + col, x);
|
||||
else
|
||||
return pstoret<Scalar, PacketScalar, StoreMode>
|
||||
return pstoret<Scalar, PacketType, StoreMode>
|
||||
(const_cast<Scalar*>(m_data) + row + col * m_outerStride.value(), x);
|
||||
}
|
||||
|
||||
template<int StoreMode>
|
||||
void writePacket(Index index, const PacketScalar& x)
|
||||
template<int StoreMode, typename PacketType>
|
||||
void writePacket(Index index, const PacketType& x)
|
||||
{
|
||||
return pstoret<Scalar, PacketScalar, StoreMode>(const_cast<Scalar*>(m_data) + index, x);
|
||||
return pstoret<Scalar, PacketType, StoreMode>(const_cast<Scalar*>(m_data) + index, x);
|
||||
}
|
||||
|
||||
protected:
|
||||
@@ -229,7 +219,7 @@ struct evaluator<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
|
||||
{
|
||||
typedef Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> XprType;
|
||||
|
||||
evaluator() {}
|
||||
EIGEN_DEVICE_FUNC evaluator() {}
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& m)
|
||||
: evaluator<PlainObjectBase<XprType> >(m)
|
||||
@@ -242,7 +232,7 @@ struct evaluator<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
|
||||
{
|
||||
typedef Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> XprType;
|
||||
|
||||
evaluator() {}
|
||||
EIGEN_DEVICE_FUNC evaluator() {}
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& m)
|
||||
: evaluator<PlainObjectBase<XprType> >(m)
|
||||
@@ -259,15 +249,14 @@ struct unary_evaluator<Transpose<ArgType>, IndexBased>
|
||||
|
||||
enum {
|
||||
CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
|
||||
Flags = evaluator<ArgType>::Flags ^ RowMajorBit
|
||||
Flags = evaluator<ArgType>::Flags ^ RowMajorBit,
|
||||
Alignment = evaluator<ArgType>::Alignment
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& t) : m_argImpl(t.nestedExpression()) {}
|
||||
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename XprType::PacketScalar PacketScalar;
|
||||
typedef typename XprType::PacketReturnType PacketReturnType;
|
||||
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
|
||||
{
|
||||
@@ -289,32 +278,32 @@ struct unary_evaluator<Transpose<ArgType>, IndexBased>
|
||||
return m_argImpl.coeffRef(index);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
PacketReturnType packet(Index row, Index col) const
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packet(Index row, Index col) const
|
||||
{
|
||||
return m_argImpl.template packet<LoadMode>(col, row);
|
||||
return m_argImpl.template packet<LoadMode,PacketType>(col, row);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
PacketReturnType packet(Index index) const
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packet(Index index) const
|
||||
{
|
||||
return m_argImpl.template packet<LoadMode>(index);
|
||||
return m_argImpl.template packet<LoadMode,PacketType>(index);
|
||||
}
|
||||
|
||||
template<int StoreMode>
|
||||
void writePacket(Index row, Index col, const PacketScalar& x)
|
||||
template<int StoreMode, typename PacketType>
|
||||
void writePacket(Index row, Index col, const PacketType& x)
|
||||
{
|
||||
m_argImpl.template writePacket<StoreMode>(col, row, x);
|
||||
m_argImpl.template writePacket<StoreMode,PacketType>(col, row, x);
|
||||
}
|
||||
|
||||
template<int StoreMode>
|
||||
void writePacket(Index index, const PacketScalar& x)
|
||||
template<int StoreMode, typename PacketType>
|
||||
void writePacket(Index index, const PacketType& x)
|
||||
{
|
||||
m_argImpl.template writePacket<StoreMode>(index, x);
|
||||
m_argImpl.template writePacket<StoreMode,PacketType>(index, x);
|
||||
}
|
||||
|
||||
protected:
|
||||
typename evaluator<ArgType>::nestedType m_argImpl;
|
||||
evaluator<ArgType> m_argImpl;
|
||||
};
|
||||
|
||||
// -------------------- CwiseNullaryOp --------------------
|
||||
@@ -335,7 +324,8 @@ struct evaluator<CwiseNullaryOp<NullaryOp,PlainObjectType> >
|
||||
& ( HereditaryBits
|
||||
| (functor_has_linear_access<NullaryOp>::ret ? LinearAccessBit : 0)
|
||||
| (functor_traits<NullaryOp>::PacketAccess ? PacketAccessBit : 0)))
|
||||
| (functor_traits<NullaryOp>::IsRepeatable ? 0 : EvalBeforeNestingBit) // FIXME EvalBeforeNestingBit should be needed anymore
|
||||
| (functor_traits<NullaryOp>::IsRepeatable ? 0 : EvalBeforeNestingBit), // FIXME EvalBeforeNestingBit should be needed anymore
|
||||
Alignment = 0 // FIXME alignment should not matter here, perhaps we could set it to AlignMax??
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& n)
|
||||
@@ -343,7 +333,6 @@ struct evaluator<CwiseNullaryOp<NullaryOp,PlainObjectType> >
|
||||
{ }
|
||||
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename XprType::PacketScalar PacketScalar;
|
||||
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
|
||||
{
|
||||
@@ -355,16 +344,16 @@ struct evaluator<CwiseNullaryOp<NullaryOp,PlainObjectType> >
|
||||
return m_functor(index);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
PacketScalar packet(Index row, Index col) const
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packet(Index row, Index col) const
|
||||
{
|
||||
return m_functor.packetOp(row, col);
|
||||
return m_functor.template packetOp<Index,PacketType>(row, col);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
PacketScalar packet(Index index) const
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packet(Index index) const
|
||||
{
|
||||
return m_functor.packetOp(index);
|
||||
return m_functor.template packetOp<Index,PacketType>(index);
|
||||
}
|
||||
|
||||
protected:
|
||||
@@ -382,9 +371,9 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased >
|
||||
enum {
|
||||
CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost,
|
||||
|
||||
Flags = evaluator<ArgType>::Flags & (
|
||||
HereditaryBits | LinearAccessBit | AlignedBit
|
||||
| (functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0))
|
||||
Flags = evaluator<ArgType>::Flags
|
||||
& (HereditaryBits | LinearAccessBit | (functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0)),
|
||||
Alignment = evaluator<ArgType>::Alignment
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op)
|
||||
@@ -393,7 +382,6 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased >
|
||||
{ }
|
||||
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename XprType::PacketScalar PacketScalar;
|
||||
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
|
||||
{
|
||||
@@ -405,21 +393,21 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased >
|
||||
return m_functor(m_argImpl.coeff(index));
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
PacketScalar packet(Index row, Index col) const
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packet(Index row, Index col) const
|
||||
{
|
||||
return m_functor.packetOp(m_argImpl.template packet<LoadMode>(row, col));
|
||||
return m_functor.packetOp(m_argImpl.template packet<LoadMode, PacketType>(row, col));
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
PacketScalar packet(Index index) const
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packet(Index index) const
|
||||
{
|
||||
return m_functor.packetOp(m_argImpl.template packet<LoadMode>(index));
|
||||
return m_functor.packetOp(m_argImpl.template packet<LoadMode, PacketType>(index));
|
||||
}
|
||||
|
||||
protected:
|
||||
const UnaryOp m_functor;
|
||||
typename evaluator<ArgType>::nestedType m_argImpl;
|
||||
evaluator<ArgType> m_argImpl;
|
||||
};
|
||||
|
||||
// -------------------- CwiseBinaryOp --------------------
|
||||
@@ -451,13 +439,13 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase
|
||||
Flags0 = (int(LhsFlags) | int(RhsFlags)) & (
|
||||
HereditaryBits
|
||||
| (int(LhsFlags) & int(RhsFlags) &
|
||||
( AlignedBit
|
||||
| (StorageOrdersAgree ? LinearAccessBit : 0)
|
||||
( (StorageOrdersAgree ? LinearAccessBit : 0)
|
||||
| (functor_traits<BinaryOp>::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0)
|
||||
)
|
||||
)
|
||||
),
|
||||
Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit)
|
||||
Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit),
|
||||
Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<Lhs>::Alignment,evaluator<Rhs>::Alignment)
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit binary_evaluator(const XprType& xpr)
|
||||
@@ -467,7 +455,6 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase
|
||||
{ }
|
||||
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename XprType::PacketScalar PacketScalar;
|
||||
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
|
||||
{
|
||||
@@ -479,24 +466,24 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase
|
||||
return m_functor(m_lhsImpl.coeff(index), m_rhsImpl.coeff(index));
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
PacketScalar packet(Index row, Index col) const
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packet(Index row, Index col) const
|
||||
{
|
||||
return m_functor.packetOp(m_lhsImpl.template packet<LoadMode>(row, col),
|
||||
m_rhsImpl.template packet<LoadMode>(row, col));
|
||||
return m_functor.packetOp(m_lhsImpl.template packet<LoadMode,PacketType>(row, col),
|
||||
m_rhsImpl.template packet<LoadMode,PacketType>(row, col));
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
PacketScalar packet(Index index) const
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packet(Index index) const
|
||||
{
|
||||
return m_functor.packetOp(m_lhsImpl.template packet<LoadMode>(index),
|
||||
m_rhsImpl.template packet<LoadMode>(index));
|
||||
return m_functor.packetOp(m_lhsImpl.template packet<LoadMode,PacketType>(index),
|
||||
m_rhsImpl.template packet<LoadMode,PacketType>(index));
|
||||
}
|
||||
|
||||
protected:
|
||||
const BinaryOp m_functor;
|
||||
typename evaluator<Lhs>::nestedType m_lhsImpl;
|
||||
typename evaluator<Rhs>::nestedType m_rhsImpl;
|
||||
evaluator<Lhs> m_lhsImpl;
|
||||
evaluator<Rhs> m_rhsImpl;
|
||||
};
|
||||
|
||||
// -------------------- CwiseUnaryView --------------------
|
||||
@@ -510,7 +497,9 @@ struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType>, IndexBased>
|
||||
enum {
|
||||
CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost,
|
||||
|
||||
Flags = (evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit))
|
||||
Flags = (evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit)),
|
||||
|
||||
Alignment = 0 // FIXME it is not very clear why alignment is necessarily lost...
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op)
|
||||
@@ -543,7 +532,7 @@ struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType>, IndexBased>
|
||||
|
||||
protected:
|
||||
const UnaryOp m_unaryOp;
|
||||
typename evaluator<ArgType>::nestedType m_argImpl;
|
||||
evaluator<ArgType> m_argImpl;
|
||||
};
|
||||
|
||||
// -------------------- Map --------------------
|
||||
@@ -560,8 +549,6 @@ struct mapbase_evaluator : evaluator_base<Derived>
|
||||
typedef typename XprType::PointerType PointerType;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename XprType::PacketScalar PacketScalar;
|
||||
typedef typename XprType::PacketReturnType PacketReturnType;
|
||||
|
||||
enum {
|
||||
IsRowMajor = XprType::RowsAtCompileTime,
|
||||
@@ -597,30 +584,30 @@ struct mapbase_evaluator : evaluator_base<Derived>
|
||||
return m_data[index * m_xpr.innerStride()];
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
PacketReturnType packet(Index row, Index col) const
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packet(Index row, Index col) const
|
||||
{
|
||||
PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride();
|
||||
return internal::ploadt<PacketScalar, LoadMode>(ptr);
|
||||
return internal::ploadt<PacketType, LoadMode>(ptr);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
PacketReturnType packet(Index index) const
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packet(Index index) const
|
||||
{
|
||||
return internal::ploadt<PacketScalar, LoadMode>(m_data + index * m_xpr.innerStride());
|
||||
return internal::ploadt<PacketType, LoadMode>(m_data + index * m_xpr.innerStride());
|
||||
}
|
||||
|
||||
template<int StoreMode>
|
||||
void writePacket(Index row, Index col, const PacketScalar& x)
|
||||
template<int StoreMode, typename PacketType>
|
||||
void writePacket(Index row, Index col, const PacketType& x)
|
||||
{
|
||||
PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride();
|
||||
return internal::pstoret<Scalar, PacketScalar, StoreMode>(ptr, x);
|
||||
return internal::pstoret<Scalar, PacketType, StoreMode>(ptr, x);
|
||||
}
|
||||
|
||||
template<int StoreMode>
|
||||
void writePacket(Index index, const PacketScalar& x)
|
||||
template<int StoreMode, typename PacketType>
|
||||
void writePacket(Index index, const PacketType& x)
|
||||
{
|
||||
internal::pstoret<Scalar, PacketScalar, StoreMode>(m_data + index * m_xpr.innerStride(), x);
|
||||
internal::pstoret<Scalar, PacketType, StoreMode>(m_data + index * m_xpr.innerStride(), x);
|
||||
}
|
||||
|
||||
protected:
|
||||
@@ -634,6 +621,8 @@ struct evaluator<Map<PlainObjectType, MapOptions, StrideType> >
|
||||
{
|
||||
typedef Map<PlainObjectType, MapOptions, StrideType> XprType;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
// TODO: should check for smaller packet types once we can handle multi-sized packet types
|
||||
typedef typename packet_traits<Scalar>::type PacketScalar;
|
||||
|
||||
enum {
|
||||
InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
|
||||
@@ -645,18 +634,21 @@ struct evaluator<Map<PlainObjectType, MapOptions, StrideType> >
|
||||
HasNoInnerStride = InnerStrideAtCompileTime == 1,
|
||||
HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0,
|
||||
HasNoStride = HasNoInnerStride && HasNoOuterStride,
|
||||
IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned),
|
||||
IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic,
|
||||
|
||||
PacketAlignment = unpacket_traits<PacketScalar>::alignment,
|
||||
|
||||
KeepsPacketAccess = bool(HasNoInnerStride)
|
||||
&& ( bool(IsDynamicSize)
|
||||
|| HasNoOuterStride
|
||||
|| ( OuterStrideAtCompileTime!=Dynamic
|
||||
&& ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime)%EIGEN_ALIGN_BYTES)==0 ) ),
|
||||
&& ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime) % PacketAlignment)==0 ) ),
|
||||
Flags0 = evaluator<PlainObjectType>::Flags,
|
||||
Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit),
|
||||
Flags2 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime))
|
||||
? int(Flags1) : int(Flags1 & ~LinearAccessBit),
|
||||
Flags = KeepsPacketAccess ? int(Flags2) : (int(Flags2) & ~PacketAccessBit)
|
||||
Flags1 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime))
|
||||
? int(Flags0) : int(Flags0 & ~LinearAccessBit),
|
||||
Flags = KeepsPacketAccess ? int(Flags1) : (int(Flags1) & ~PacketAccessBit),
|
||||
|
||||
Alignment = int(MapOptions)&int(AlignedMask)
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& map)
|
||||
@@ -673,7 +665,8 @@ struct evaluator<Ref<PlainObjectType, RefOptions, StrideType> >
|
||||
typedef Ref<PlainObjectType, RefOptions, StrideType> XprType;
|
||||
|
||||
enum {
|
||||
Flags = evaluator<Map<PlainObjectType, RefOptions, StrideType> >::Flags
|
||||
Flags = evaluator<Map<PlainObjectType, RefOptions, StrideType> >::Flags,
|
||||
Alignment = evaluator<Map<PlainObjectType, RefOptions, StrideType> >::Alignment
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& ref)
|
||||
@@ -691,7 +684,9 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
|
||||
: block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel>
|
||||
{
|
||||
typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
// TODO: should check for smaller packet types once we can handle multi-sized packet types
|
||||
typedef typename packet_traits<Scalar>::type PacketScalar;
|
||||
|
||||
enum {
|
||||
CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
|
||||
@@ -717,14 +712,16 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
|
||||
&& (InnerStrideAtCompileTime == 1)
|
||||
? PacketAccessBit : 0,
|
||||
|
||||
MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % EIGEN_ALIGN_BYTES) == 0)) ? AlignedBit : 0,
|
||||
FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator<ArgType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0,
|
||||
FlagsRowMajorBit = XprType::Flags&RowMajorBit,
|
||||
Flags0 = evaluator<ArgType>::Flags & ( (HereditaryBits & ~RowMajorBit) |
|
||||
DirectAccessBit |
|
||||
MaskPacketAccessBit |
|
||||
MaskAlignedBit),
|
||||
Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit
|
||||
MaskPacketAccessBit),
|
||||
Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit,
|
||||
|
||||
PacketAlignment = unpacket_traits<PacketScalar>::alignment,
|
||||
Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0,
|
||||
Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ArgType>::Alignment, Alignment0)
|
||||
};
|
||||
typedef block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> block_evaluator_type;
|
||||
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& block) : block_evaluator_type(block) {}
|
||||
@@ -756,8 +753,6 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
|
||||
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename XprType::PacketScalar PacketScalar;
|
||||
typedef typename XprType::PacketReturnType PacketReturnType;
|
||||
|
||||
enum {
|
||||
RowsAtCompileTime = XprType::RowsAtCompileTime
|
||||
@@ -783,35 +778,35 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
|
||||
return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
PacketReturnType packet(Index row, Index col) const
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packet(Index row, Index col) const
|
||||
{
|
||||
return m_argImpl.template packet<LoadMode>(m_startRow.value() + row, m_startCol.value() + col);
|
||||
return m_argImpl.template packet<LoadMode,PacketType>(m_startRow.value() + row, m_startCol.value() + col);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
PacketReturnType packet(Index index) const
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packet(Index index) const
|
||||
{
|
||||
return packet<LoadMode>(RowsAtCompileTime == 1 ? 0 : index,
|
||||
RowsAtCompileTime == 1 ? index : 0);
|
||||
return packet<LoadMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
|
||||
RowsAtCompileTime == 1 ? index : 0);
|
||||
}
|
||||
|
||||
template<int StoreMode>
|
||||
void writePacket(Index row, Index col, const PacketScalar& x)
|
||||
template<int StoreMode, typename PacketType>
|
||||
void writePacket(Index row, Index col, const PacketType& x)
|
||||
{
|
||||
return m_argImpl.template writePacket<StoreMode>(m_startRow.value() + row, m_startCol.value() + col, x);
|
||||
return m_argImpl.template writePacket<StoreMode,PacketType>(m_startRow.value() + row, m_startCol.value() + col, x);
|
||||
}
|
||||
|
||||
template<int StoreMode>
|
||||
void writePacket(Index index, const PacketScalar& x)
|
||||
template<int StoreMode, typename PacketType>
|
||||
void writePacket(Index index, const PacketType& x)
|
||||
{
|
||||
return writePacket<StoreMode>(RowsAtCompileTime == 1 ? 0 : index,
|
||||
RowsAtCompileTime == 1 ? index : 0,
|
||||
x);
|
||||
return writePacket<StoreMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
|
||||
RowsAtCompileTime == 1 ? index : 0,
|
||||
x);
|
||||
}
|
||||
|
||||
protected:
|
||||
typename evaluator<ArgType>::nestedType m_argImpl;
|
||||
evaluator<ArgType> m_argImpl;
|
||||
const variable_if_dynamic<Index, ArgType::RowsAtCompileTime == 1 ? 0 : Dynamic> m_startRow;
|
||||
const variable_if_dynamic<Index, ArgType::ColsAtCompileTime == 1 ? 0 : Dynamic> m_startCol;
|
||||
};
|
||||
@@ -825,12 +820,13 @@ struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /* HasDirectAc
|
||||
typename Block<ArgType, BlockRows, BlockCols, InnerPanel>::PlainObject>
|
||||
{
|
||||
typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block)
|
||||
: mapbase_evaluator<XprType, typename XprType::PlainObject>(block)
|
||||
{
|
||||
// FIXME this should be an internal assertion
|
||||
eigen_assert(EIGEN_IMPLIES(evaluator<XprType>::Flags&AlignedBit, (size_t(block.data()) % EIGEN_ALIGN_BYTES) == 0) && "data is not aligned");
|
||||
eigen_assert(((size_t(block.data()) % EIGEN_PLAIN_ENUM_MAX(1,evaluator<XprType>::Alignment)) == 0) && "data is not aligned");
|
||||
}
|
||||
};
|
||||
|
||||
@@ -849,7 +845,9 @@ struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
|
||||
+ EIGEN_SIZE_MAX(evaluator<ThenMatrixType>::CoeffReadCost,
|
||||
evaluator<ElseMatrixType>::CoeffReadCost),
|
||||
|
||||
Flags = (unsigned int)evaluator<ThenMatrixType>::Flags & evaluator<ElseMatrixType>::Flags & HereditaryBits
|
||||
Flags = (unsigned int)evaluator<ThenMatrixType>::Flags & evaluator<ElseMatrixType>::Flags & HereditaryBits,
|
||||
|
||||
Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ThenMatrixType>::Alignment, evaluator<ElseMatrixType>::Alignment)
|
||||
};
|
||||
|
||||
inline EIGEN_DEVICE_FUNC explicit evaluator(const XprType& select)
|
||||
@@ -877,9 +875,9 @@ struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
|
||||
}
|
||||
|
||||
protected:
|
||||
typename evaluator<ConditionMatrixType>::nestedType m_conditionImpl;
|
||||
typename evaluator<ThenMatrixType>::nestedType m_thenImpl;
|
||||
typename evaluator<ElseMatrixType>::nestedType m_elseImpl;
|
||||
evaluator<ConditionMatrixType> m_conditionImpl;
|
||||
evaluator<ThenMatrixType> m_thenImpl;
|
||||
evaluator<ElseMatrixType> m_elseImpl;
|
||||
};
|
||||
|
||||
|
||||
@@ -891,7 +889,6 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
|
||||
{
|
||||
typedef Replicate<ArgType, RowFactor, ColFactor> XprType;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename XprType::PacketReturnType PacketReturnType;
|
||||
enum {
|
||||
Factor = (RowFactor==Dynamic || ColFactor==Dynamic) ? Dynamic : RowFactor*ColFactor
|
||||
};
|
||||
@@ -901,7 +898,9 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
|
||||
enum {
|
||||
CoeffReadCost = evaluator<ArgTypeNestedCleaned>::CoeffReadCost,
|
||||
|
||||
Flags = (evaluator<ArgTypeNestedCleaned>::Flags & HereditaryBits & ~RowMajorBit) | (traits<XprType>::Flags & RowMajorBit)
|
||||
Flags = (evaluator<ArgTypeNestedCleaned>::Flags & HereditaryBits & ~RowMajorBit) | (traits<XprType>::Flags & RowMajorBit),
|
||||
|
||||
Alignment = evaluator<ArgTypeNestedCleaned>::Alignment
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& replicate)
|
||||
@@ -923,9 +922,19 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
|
||||
|
||||
return m_argImpl.coeff(actual_row, actual_col);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
// try to avoid using modulo; this is a pure optimization strategy
|
||||
const Index actual_index = internal::traits<XprType>::RowsAtCompileTime==1
|
||||
? (ColFactor==1 ? index : index%m_cols.value())
|
||||
: (RowFactor==1 ? index : index%m_rows.value());
|
||||
|
||||
return m_argImpl.coeff(actual_index);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
PacketReturnType packet(Index row, Index col) const
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packet(Index row, Index col) const
|
||||
{
|
||||
const Index actual_row = internal::traits<XprType>::RowsAtCompileTime==1 ? 0
|
||||
: RowFactor==1 ? row
|
||||
@@ -934,12 +943,22 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
|
||||
: ColFactor==1 ? col
|
||||
: col % m_cols.value();
|
||||
|
||||
return m_argImpl.template packet<LoadMode>(actual_row, actual_col);
|
||||
return m_argImpl.template packet<LoadMode,PacketType>(actual_row, actual_col);
|
||||
}
|
||||
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packet(Index index) const
|
||||
{
|
||||
const Index actual_index = internal::traits<XprType>::RowsAtCompileTime==1
|
||||
? (ColFactor==1 ? index : index%m_cols.value())
|
||||
: (RowFactor==1 ? index : index%m_rows.value());
|
||||
|
||||
return m_argImpl.template packet<LoadMode,PacketType>(actual_index);
|
||||
}
|
||||
|
||||
protected:
|
||||
const ArgTypeNested m_arg; // FIXME is it OK to store both the argument and its evaluator?? (we have the same situation in evaluator_product)
|
||||
typename evaluator<ArgTypeNestedCleaned>::nestedType m_argImpl;
|
||||
evaluator<ArgTypeNestedCleaned> m_argImpl;
|
||||
const variable_if_dynamic<Index, ArgType::RowsAtCompileTime> m_rows;
|
||||
const variable_if_dynamic<Index, ArgType::ColsAtCompileTime> m_cols;
|
||||
};
|
||||
@@ -965,7 +984,9 @@ struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
|
||||
CoeffReadCost = TraversalSize==Dynamic ? Dynamic
|
||||
: TraversalSize * evaluator<ArgType>::CoeffReadCost + int(CostOpType::value),
|
||||
|
||||
Flags = (traits<XprType>::Flags&RowMajorBit) | (evaluator<ArgType>::Flags&HereditaryBits)
|
||||
Flags = (traits<XprType>::Flags&RowMajorBit) | (evaluator<ArgType>::Flags&HereditaryBits),
|
||||
|
||||
Alignment = 0 // FIXME this could be improved
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit evaluator(const XprType expr)
|
||||
@@ -1001,15 +1022,14 @@ struct evaluator_wrapper_base
|
||||
typedef typename remove_all<typename XprType::NestedExpressionType>::type ArgType;
|
||||
enum {
|
||||
CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
|
||||
Flags = evaluator<ArgType>::Flags
|
||||
Flags = evaluator<ArgType>::Flags,
|
||||
Alignment = evaluator<ArgType>::Alignment
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit evaluator_wrapper_base(const ArgType& arg) : m_argImpl(arg) {}
|
||||
|
||||
typedef typename ArgType::Scalar Scalar;
|
||||
typedef typename ArgType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename ArgType::PacketScalar PacketScalar;
|
||||
typedef typename ArgType::PacketReturnType PacketReturnType;
|
||||
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
|
||||
{
|
||||
@@ -1031,32 +1051,32 @@ struct evaluator_wrapper_base
|
||||
return m_argImpl.coeffRef(index);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
PacketReturnType packet(Index row, Index col) const
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packet(Index row, Index col) const
|
||||
{
|
||||
return m_argImpl.template packet<LoadMode>(row, col);
|
||||
return m_argImpl.template packet<LoadMode,PacketType>(row, col);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
PacketReturnType packet(Index index) const
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packet(Index index) const
|
||||
{
|
||||
return m_argImpl.template packet<LoadMode>(index);
|
||||
return m_argImpl.template packet<LoadMode,PacketType>(index);
|
||||
}
|
||||
|
||||
template<int StoreMode>
|
||||
void writePacket(Index row, Index col, const PacketScalar& x)
|
||||
template<int StoreMode, typename PacketType>
|
||||
void writePacket(Index row, Index col, const PacketType& x)
|
||||
{
|
||||
m_argImpl.template writePacket<StoreMode>(row, col, x);
|
||||
}
|
||||
|
||||
template<int StoreMode>
|
||||
void writePacket(Index index, const PacketScalar& x)
|
||||
template<int StoreMode, typename PacketType>
|
||||
void writePacket(Index index, const PacketType& x)
|
||||
{
|
||||
m_argImpl.template writePacket<StoreMode>(index, x);
|
||||
}
|
||||
|
||||
protected:
|
||||
typename evaluator<ArgType>::nestedType m_argImpl;
|
||||
evaluator<ArgType> m_argImpl;
|
||||
};
|
||||
|
||||
template<typename TArgType>
|
||||
@@ -1085,7 +1105,7 @@ struct unary_evaluator<ArrayWrapper<TArgType> >
|
||||
// -------------------- Reverse --------------------
|
||||
|
||||
// defined in Reverse.h:
|
||||
template<typename PacketScalar, bool ReversePacket> struct reverse_packet_cond;
|
||||
template<typename PacketType, bool ReversePacket> struct reverse_packet_cond;
|
||||
|
||||
template<typename ArgType, int Direction>
|
||||
struct unary_evaluator<Reverse<ArgType, Direction> >
|
||||
@@ -1094,17 +1114,12 @@ struct unary_evaluator<Reverse<ArgType, Direction> >
|
||||
typedef Reverse<ArgType, Direction> XprType;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename XprType::PacketScalar PacketScalar;
|
||||
typedef typename XprType::PacketReturnType PacketReturnType;
|
||||
|
||||
enum {
|
||||
PacketSize = internal::packet_traits<Scalar>::size,
|
||||
IsRowMajor = XprType::IsRowMajor,
|
||||
IsColMajor = !IsRowMajor,
|
||||
ReverseRow = (Direction == Vertical) || (Direction == BothDirections),
|
||||
ReverseCol = (Direction == Horizontal) || (Direction == BothDirections),
|
||||
OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1,
|
||||
OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1,
|
||||
ReversePacket = (Direction == BothDirections)
|
||||
|| ((Direction == Vertical) && IsColMajor)
|
||||
|| ((Direction == Horizontal) && IsRowMajor),
|
||||
@@ -1117,9 +1132,10 @@ struct unary_evaluator<Reverse<ArgType, Direction> >
|
||||
LinearAccess = ( (Direction==BothDirections) && (int(Flags0)&PacketAccessBit) )
|
||||
? LinearAccessBit : 0,
|
||||
|
||||
Flags = int(Flags0) & (HereditaryBits | PacketAccessBit | LinearAccess)
|
||||
Flags = int(Flags0) & (HereditaryBits | PacketAccessBit | LinearAccess),
|
||||
|
||||
Alignment = 0 // FIXME in some rare cases, Alignment could be preserved, like a Vector4f.
|
||||
};
|
||||
typedef internal::reverse_packet_cond<PacketScalar,ReversePacket> reverse_packet;
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& reverse)
|
||||
: m_argImpl(reverse.nestedExpression()),
|
||||
@@ -1149,38 +1165,53 @@ struct unary_evaluator<Reverse<ArgType, Direction> >
|
||||
return m_argImpl.coeffRef(m_rows.value() * m_cols.value() - index - 1);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
PacketScalar packet(Index row, Index col) const
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packet(Index row, Index col) const
|
||||
{
|
||||
return reverse_packet::run(m_argImpl.template packet<LoadMode>(
|
||||
enum {
|
||||
PacketSize = unpacket_traits<PacketType>::size,
|
||||
OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1,
|
||||
OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1
|
||||
};
|
||||
typedef internal::reverse_packet_cond<PacketType,ReversePacket> reverse_packet;
|
||||
return reverse_packet::run(m_argImpl.template packet<LoadMode,PacketType>(
|
||||
ReverseRow ? m_rows.value() - row - OffsetRow : row,
|
||||
ReverseCol ? m_cols.value() - col - OffsetCol : col));
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
PacketScalar packet(Index index) const
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packet(Index index) const
|
||||
{
|
||||
return preverse(m_argImpl.template packet<LoadMode>(m_rows.value() * m_cols.value() - index - PacketSize));
|
||||
enum { PacketSize = unpacket_traits<PacketType>::size };
|
||||
return preverse(m_argImpl.template packet<LoadMode,PacketType>(m_rows.value() * m_cols.value() - index - PacketSize));
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
void writePacket(Index row, Index col, const PacketScalar& x)
|
||||
template<int LoadMode, typename PacketType>
|
||||
void writePacket(Index row, Index col, const PacketType& x)
|
||||
{
|
||||
// FIXME we could factorize some code with packet(i,j)
|
||||
enum {
|
||||
PacketSize = unpacket_traits<PacketType>::size,
|
||||
OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1,
|
||||
OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1
|
||||
};
|
||||
typedef internal::reverse_packet_cond<PacketType,ReversePacket> reverse_packet;
|
||||
m_argImpl.template writePacket<LoadMode>(
|
||||
ReverseRow ? m_rows.value() - row - OffsetRow : row,
|
||||
ReverseCol ? m_cols.value() - col - OffsetCol : col,
|
||||
reverse_packet::run(x));
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
void writePacket(Index index, const PacketScalar& x)
|
||||
template<int LoadMode, typename PacketType>
|
||||
void writePacket(Index index, const PacketType& x)
|
||||
{
|
||||
enum { PacketSize = unpacket_traits<PacketType>::size };
|
||||
m_argImpl.template writePacket<LoadMode>
|
||||
(m_rows.value() * m_cols.value() - index - PacketSize, preverse(x));
|
||||
}
|
||||
|
||||
protected:
|
||||
typename evaluator<ArgType>::nestedType m_argImpl;
|
||||
evaluator<ArgType> m_argImpl;
|
||||
|
||||
// If we do not reverse rows, then we do not need to know the number of rows; same for columns
|
||||
const variable_if_dynamic<Index, ReverseRow ? ArgType::RowsAtCompileTime : 0> m_rows;
|
||||
@@ -1199,7 +1230,9 @@ struct evaluator<Diagonal<ArgType, DiagIndex> >
|
||||
enum {
|
||||
CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
|
||||
|
||||
Flags = (unsigned int)evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit) & ~RowMajorBit
|
||||
Flags = (unsigned int)evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit) & ~RowMajorBit,
|
||||
|
||||
Alignment = 0
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& diagonal)
|
||||
@@ -1233,7 +1266,7 @@ struct evaluator<Diagonal<ArgType, DiagIndex> >
|
||||
}
|
||||
|
||||
protected:
|
||||
typename evaluator<ArgType>::nestedType m_argImpl;
|
||||
evaluator<ArgType> m_argImpl;
|
||||
const internal::variable_if_dynamicindex<Index, XprType::DiagIndex> m_index;
|
||||
|
||||
private:
|
||||
@@ -1291,15 +1324,12 @@ class EvalToTemp
|
||||
|
||||
template<typename ArgType>
|
||||
struct evaluator<EvalToTemp<ArgType> >
|
||||
: public evaluator<typename ArgType::PlainObject>::type
|
||||
: public evaluator<typename ArgType::PlainObject>
|
||||
{
|
||||
typedef EvalToTemp<ArgType> XprType;
|
||||
typedef typename ArgType::PlainObject PlainObject;
|
||||
typedef typename evaluator<PlainObject>::type Base;
|
||||
typedef evaluator<PlainObject> Base;
|
||||
|
||||
typedef evaluator type;
|
||||
typedef evaluator nestedType;
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
|
||||
: m_result(xpr.rows(), xpr.cols())
|
||||
{
|
||||
|
||||
@@ -34,7 +34,7 @@ class InnerIterator
|
||||
{
|
||||
protected:
|
||||
typedef internal::inner_iterator_selector<XprType, typename internal::evaluator_traits<XprType>::Kind> IteratorType;
|
||||
typedef typename internal::evaluator<XprType>::type EvaluatorType;
|
||||
typedef internal::evaluator<XprType> EvaluatorType;
|
||||
typedef typename internal::traits<XprType>::Scalar Scalar;
|
||||
public:
|
||||
/** Construct an iterator over the \a outerId -th row or column of \a xpr */
|
||||
@@ -74,7 +74,7 @@ template<typename XprType>
|
||||
class inner_iterator_selector<XprType, IndexBased>
|
||||
{
|
||||
protected:
|
||||
typedef typename evaluator<XprType>::type EvaluatorType;
|
||||
typedef evaluator<XprType> EvaluatorType;
|
||||
typedef typename traits<XprType>::Scalar Scalar;
|
||||
enum { IsRowMajor = (XprType::Flags&RowMajorBit)==RowMajorBit };
|
||||
|
||||
@@ -112,7 +112,7 @@ class inner_iterator_selector<XprType, IteratorBased>
|
||||
{
|
||||
protected:
|
||||
typedef typename evaluator<XprType>::InnerIterator Base;
|
||||
typedef typename evaluator<XprType>::type EvaluatorType;
|
||||
typedef evaluator<XprType> EvaluatorType;
|
||||
|
||||
public:
|
||||
EIGEN_STRONG_INLINE inner_iterator_selector(const EvaluatorType &eval, const Index &outerId, const Index &/*innerSize*/)
|
||||
|
||||
@@ -95,8 +95,8 @@ class CwiseBinaryOp :
|
||||
BinaryOp>::ret>::Base Base;
|
||||
EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseBinaryOp)
|
||||
|
||||
typedef typename internal::nested<LhsType>::type LhsNested;
|
||||
typedef typename internal::nested<RhsType>::type RhsNested;
|
||||
typedef typename internal::ref_selector<LhsType>::type LhsNested;
|
||||
typedef typename internal::ref_selector<RhsType>::type RhsNested;
|
||||
typedef typename internal::remove_reference<LhsNested>::type _LhsNested;
|
||||
typedef typename internal::remove_reference<RhsNested>::type _RhsNested;
|
||||
|
||||
|
||||
@@ -49,13 +49,13 @@ class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(CwiseNullaryOp)
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
CwiseNullaryOp(Index nbRows, Index nbCols, const NullaryOp& func = NullaryOp())
|
||||
: m_rows(nbRows), m_cols(nbCols), m_functor(func)
|
||||
CwiseNullaryOp(Index rows, Index cols, const NullaryOp& func = NullaryOp())
|
||||
: m_rows(rows), m_cols(cols), m_functor(func)
|
||||
{
|
||||
eigen_assert(nbRows >= 0
|
||||
&& (RowsAtCompileTime == Dynamic || RowsAtCompileTime == nbRows)
|
||||
&& nbCols >= 0
|
||||
&& (ColsAtCompileTime == Dynamic || ColsAtCompileTime == nbCols));
|
||||
eigen_assert(rows >= 0
|
||||
&& (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
|
||||
&& cols >= 0
|
||||
&& (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
@@ -113,10 +113,10 @@ class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename CustomNullaryOp>
|
||||
EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, Derived>
|
||||
EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
|
||||
DenseBase<Derived>::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func)
|
||||
{
|
||||
return CwiseNullaryOp<CustomNullaryOp, Derived>(rows, cols, func);
|
||||
return CwiseNullaryOp<CustomNullaryOp, PlainObject>(rows, cols, func);
|
||||
}
|
||||
|
||||
/** \returns an expression of a matrix defined by a custom functor \a func
|
||||
@@ -139,12 +139,12 @@ DenseBase<Derived>::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& f
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename CustomNullaryOp>
|
||||
EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, Derived>
|
||||
EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
|
||||
DenseBase<Derived>::NullaryExpr(Index size, const CustomNullaryOp& func)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
if(RowsAtCompileTime == 1) return CwiseNullaryOp<CustomNullaryOp, Derived>(1, size, func);
|
||||
else return CwiseNullaryOp<CustomNullaryOp, Derived>(size, 1, func);
|
||||
if(RowsAtCompileTime == 1) return CwiseNullaryOp<CustomNullaryOp, PlainObject>(1, size, func);
|
||||
else return CwiseNullaryOp<CustomNullaryOp, PlainObject>(size, 1, func);
|
||||
}
|
||||
|
||||
/** \returns an expression of a matrix defined by a custom functor \a func
|
||||
@@ -158,19 +158,19 @@ DenseBase<Derived>::NullaryExpr(Index size, const CustomNullaryOp& func)
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename CustomNullaryOp>
|
||||
EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, Derived>
|
||||
EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
|
||||
DenseBase<Derived>::NullaryExpr(const CustomNullaryOp& func)
|
||||
{
|
||||
return CwiseNullaryOp<CustomNullaryOp, Derived>(RowsAtCompileTime, ColsAtCompileTime, func);
|
||||
return CwiseNullaryOp<CustomNullaryOp, PlainObject>(RowsAtCompileTime, ColsAtCompileTime, func);
|
||||
}
|
||||
|
||||
/** \returns an expression of a constant matrix of value \a value
|
||||
*
|
||||
* The parameters \a nbRows and \a nbCols are the number of rows and of columns of
|
||||
* The parameters \a rows and \a cols are the number of rows and of columns of
|
||||
* the returned matrix. Must be compatible with this DenseBase type.
|
||||
*
|
||||
* This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
|
||||
* it is redundant to pass \a nbRows and \a nbCols as arguments, so Zero() should be used
|
||||
* it is redundant to pass \a rows and \a cols as arguments, so Zero() should be used
|
||||
* instead.
|
||||
*
|
||||
* The template parameter \a CustomNullaryOp is the type of the functor.
|
||||
@@ -179,9 +179,9 @@ DenseBase<Derived>::NullaryExpr(const CustomNullaryOp& func)
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
|
||||
DenseBase<Derived>::Constant(Index nbRows, Index nbCols, const Scalar& value)
|
||||
DenseBase<Derived>::Constant(Index rows, Index cols, const Scalar& value)
|
||||
{
|
||||
return DenseBase<Derived>::NullaryExpr(nbRows, nbCols, internal::scalar_constant_op<Scalar>(value));
|
||||
return DenseBase<Derived>::NullaryExpr(rows, cols, internal::scalar_constant_op<Scalar>(value));
|
||||
}
|
||||
|
||||
/** \returns an expression of a constant matrix of value \a value
|
||||
@@ -245,7 +245,7 @@ EIGEN_STRONG_INLINE const typename DenseBase<Derived>::SequentialLinSpacedReturn
|
||||
DenseBase<Derived>::LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,false>(low,high,size));
|
||||
return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,PacketScalar,false>(low,high,size));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -258,7 +258,7 @@ DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& hig
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
|
||||
return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,false>(low,high,Derived::SizeAtCompileTime));
|
||||
return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,PacketScalar,false>(low,high,Derived::SizeAtCompileTime));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -279,7 +279,7 @@ EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedRetu
|
||||
DenseBase<Derived>::LinSpaced(Index size, const Scalar& low, const Scalar& high)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,true>(low,high,size));
|
||||
return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,PacketScalar,true>(low,high,size));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -292,7 +292,7 @@ DenseBase<Derived>::LinSpaced(const Scalar& low, const Scalar& high)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
|
||||
return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,true>(low,high,Derived::SizeAtCompileTime));
|
||||
return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,PacketScalar,true>(low,high,Derived::SizeAtCompileTime));
|
||||
}
|
||||
|
||||
/** \returns true if all coefficients in this matrix are approximately equal to \a val, to within precision \a prec */
|
||||
@@ -300,9 +300,10 @@ template<typename Derived>
|
||||
bool DenseBase<Derived>::isApproxToConstant
|
||||
(const Scalar& val, const RealScalar& prec) const
|
||||
{
|
||||
typename internal::nested_eval<Derived,1>::type self(derived());
|
||||
for(Index j = 0; j < cols(); ++j)
|
||||
for(Index i = 0; i < rows(); ++i)
|
||||
if(!internal::isApprox(this->coeff(i, j), val, prec))
|
||||
if(!internal::isApprox(self.coeff(i, j), val, prec))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
@@ -356,8 +357,8 @@ PlainObjectBase<Derived>::setConstant(Index size, const Scalar& val)
|
||||
|
||||
/** Resizes to the given size, and sets all coefficients in this expression to the given \a value.
|
||||
*
|
||||
* \param nbRows the new number of rows
|
||||
* \param nbCols the new number of columns
|
||||
* \param rows the new number of rows
|
||||
* \param cols the new number of columns
|
||||
* \param val the value to which all coefficients are set
|
||||
*
|
||||
* Example: \include Matrix_setConstant_int_int.cpp
|
||||
@@ -367,9 +368,9 @@ PlainObjectBase<Derived>::setConstant(Index size, const Scalar& val)
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived&
|
||||
PlainObjectBase<Derived>::setConstant(Index nbRows, Index nbCols, const Scalar& val)
|
||||
PlainObjectBase<Derived>::setConstant(Index rows, Index cols, const Scalar& val)
|
||||
{
|
||||
resize(nbRows, nbCols);
|
||||
resize(rows, cols);
|
||||
return setConstant(val);
|
||||
}
|
||||
|
||||
@@ -390,7 +391,7 @@ template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index newSize, const Scalar& low, const Scalar& high)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
return derived() = Derived::NullaryExpr(newSize, internal::linspaced_op<Scalar,false>(low,high,newSize));
|
||||
return derived() = Derived::NullaryExpr(newSize, internal::linspaced_op<Scalar,PacketScalar,false>(low,high,newSize));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -428,9 +429,9 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(const Scalar& low,
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
|
||||
DenseBase<Derived>::Zero(Index nbRows, Index nbCols)
|
||||
DenseBase<Derived>::Zero(Index rows, Index cols)
|
||||
{
|
||||
return Constant(nbRows, nbCols, Scalar(0));
|
||||
return Constant(rows, cols, Scalar(0));
|
||||
}
|
||||
|
||||
/** \returns an expression of a zero vector.
|
||||
@@ -484,9 +485,10 @@ DenseBase<Derived>::Zero()
|
||||
template<typename Derived>
|
||||
bool DenseBase<Derived>::isZero(const RealScalar& prec) const
|
||||
{
|
||||
typename internal::nested_eval<Derived,1>::type self(derived());
|
||||
for(Index j = 0; j < cols(); ++j)
|
||||
for(Index i = 0; i < rows(); ++i)
|
||||
if(!internal::isMuchSmallerThan(this->coeff(i, j), static_cast<Scalar>(1), prec))
|
||||
if(!internal::isMuchSmallerThan(self.coeff(i, j), static_cast<Scalar>(1), prec))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
@@ -523,8 +525,8 @@ PlainObjectBase<Derived>::setZero(Index newSize)
|
||||
|
||||
/** Resizes to the given size, and sets all coefficients in this expression to zero.
|
||||
*
|
||||
* \param nbRows the new number of rows
|
||||
* \param nbCols the new number of columns
|
||||
* \param rows the new number of rows
|
||||
* \param cols the new number of columns
|
||||
*
|
||||
* Example: \include Matrix_setZero_int_int.cpp
|
||||
* Output: \verbinclude Matrix_setZero_int_int.out
|
||||
@@ -533,9 +535,9 @@ PlainObjectBase<Derived>::setZero(Index newSize)
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived&
|
||||
PlainObjectBase<Derived>::setZero(Index nbRows, Index nbCols)
|
||||
PlainObjectBase<Derived>::setZero(Index rows, Index cols)
|
||||
{
|
||||
resize(nbRows, nbCols);
|
||||
resize(rows, cols);
|
||||
return setConstant(Scalar(0));
|
||||
}
|
||||
|
||||
@@ -543,7 +545,7 @@ PlainObjectBase<Derived>::setZero(Index nbRows, Index nbCols)
|
||||
|
||||
/** \returns an expression of a matrix where all coefficients equal one.
|
||||
*
|
||||
* The parameters \a nbRows and \a nbCols are the number of rows and of columns of
|
||||
* The parameters \a rows and \a cols are the number of rows and of columns of
|
||||
* the returned matrix. Must be compatible with this MatrixBase type.
|
||||
*
|
||||
* This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
|
||||
@@ -557,9 +559,9 @@ PlainObjectBase<Derived>::setZero(Index nbRows, Index nbCols)
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
|
||||
DenseBase<Derived>::Ones(Index nbRows, Index nbCols)
|
||||
DenseBase<Derived>::Ones(Index rows, Index cols)
|
||||
{
|
||||
return Constant(nbRows, nbCols, Scalar(1));
|
||||
return Constant(rows, cols, Scalar(1));
|
||||
}
|
||||
|
||||
/** \returns an expression of a vector where all coefficients equal one.
|
||||
@@ -649,8 +651,8 @@ PlainObjectBase<Derived>::setOnes(Index newSize)
|
||||
|
||||
/** Resizes to the given size, and sets all coefficients in this expression to one.
|
||||
*
|
||||
* \param nbRows the new number of rows
|
||||
* \param nbCols the new number of columns
|
||||
* \param rows the new number of rows
|
||||
* \param cols the new number of columns
|
||||
*
|
||||
* Example: \include Matrix_setOnes_int_int.cpp
|
||||
* Output: \verbinclude Matrix_setOnes_int_int.out
|
||||
@@ -659,9 +661,9 @@ PlainObjectBase<Derived>::setOnes(Index newSize)
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived&
|
||||
PlainObjectBase<Derived>::setOnes(Index nbRows, Index nbCols)
|
||||
PlainObjectBase<Derived>::setOnes(Index rows, Index cols)
|
||||
{
|
||||
resize(nbRows, nbCols);
|
||||
resize(rows, cols);
|
||||
return setConstant(Scalar(1));
|
||||
}
|
||||
|
||||
@@ -669,7 +671,7 @@ PlainObjectBase<Derived>::setOnes(Index nbRows, Index nbCols)
|
||||
|
||||
/** \returns an expression of the identity matrix (not necessarily square).
|
||||
*
|
||||
* The parameters \a nbRows and \a nbCols are the number of rows and of columns of
|
||||
* The parameters \a rows and \a cols are the number of rows and of columns of
|
||||
* the returned matrix. Must be compatible with this MatrixBase type.
|
||||
*
|
||||
* This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
|
||||
@@ -683,9 +685,9 @@ PlainObjectBase<Derived>::setOnes(Index nbRows, Index nbCols)
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType
|
||||
MatrixBase<Derived>::Identity(Index nbRows, Index nbCols)
|
||||
MatrixBase<Derived>::Identity(Index rows, Index cols)
|
||||
{
|
||||
return DenseBase<Derived>::NullaryExpr(nbRows, nbCols, internal::scalar_identity_op<Scalar>());
|
||||
return DenseBase<Derived>::NullaryExpr(rows, cols, internal::scalar_identity_op<Scalar>());
|
||||
}
|
||||
|
||||
/** \returns an expression of the identity matrix (not necessarily square).
|
||||
@@ -719,18 +721,19 @@ template<typename Derived>
|
||||
bool MatrixBase<Derived>::isIdentity
|
||||
(const RealScalar& prec) const
|
||||
{
|
||||
typename internal::nested_eval<Derived,1>::type self(derived());
|
||||
for(Index j = 0; j < cols(); ++j)
|
||||
{
|
||||
for(Index i = 0; i < rows(); ++i)
|
||||
{
|
||||
if(i == j)
|
||||
{
|
||||
if(!internal::isApprox(this->coeff(i, j), static_cast<Scalar>(1), prec))
|
||||
if(!internal::isApprox(self.coeff(i, j), static_cast<Scalar>(1), prec))
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(!internal::isMuchSmallerThan(this->coeff(i, j), static_cast<RealScalar>(1), prec))
|
||||
if(!internal::isMuchSmallerThan(self.coeff(i, j), static_cast<RealScalar>(1), prec))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -780,8 +783,8 @@ EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity()
|
||||
|
||||
/** \brief Resizes to the given size, and writes the identity expression (not necessarily square) into *this.
|
||||
*
|
||||
* \param nbRows the new number of rows
|
||||
* \param nbCols the new number of columns
|
||||
* \param rows the new number of rows
|
||||
* \param cols the new number of columns
|
||||
*
|
||||
* Example: \include Matrix_setIdentity_int_int.cpp
|
||||
* Output: \verbinclude Matrix_setIdentity_int_int.out
|
||||
@@ -789,9 +792,9 @@ EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity()
|
||||
* \sa MatrixBase::setIdentity(), class CwiseNullaryOp, MatrixBase::Identity()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity(Index nbRows, Index nbCols)
|
||||
EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity(Index rows, Index cols)
|
||||
{
|
||||
derived().resize(nbRows, nbCols);
|
||||
derived().resize(rows, cols);
|
||||
return setIdentity();
|
||||
}
|
||||
|
||||
|
||||
@@ -84,8 +84,7 @@ class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename in
|
||||
nestedExpression() { return m_matrix.const_cast_derived(); }
|
||||
|
||||
protected:
|
||||
// FIXME changed from MatrixType::Nested because of a weird compilation error with sun CC
|
||||
typename internal::nested<MatrixType>::type m_matrix;
|
||||
typename internal::ref_selector<MatrixType>::type m_matrix;
|
||||
ViewOp m_functor;
|
||||
};
|
||||
|
||||
|
||||
@@ -49,6 +49,8 @@ template<typename Derived> class DenseBase
|
||||
public:
|
||||
using internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar,
|
||||
typename NumTraits<typename internal::traits<Derived>::Scalar>::Real>::operator*;
|
||||
using internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar,
|
||||
typename NumTraits<typename internal::traits<Derived>::Scalar>::Real>::operator/;
|
||||
|
||||
|
||||
/** Inner iterator type to iterate over the coefficients of a row or column.
|
||||
@@ -66,8 +68,14 @@ template<typename Derived> class DenseBase
|
||||
*/
|
||||
typedef typename internal::traits<Derived>::StorageIndex StorageIndex;
|
||||
|
||||
/** The numeric type of the expression' coefficients, e.g. float, double, int or std::complex<float>, etc. */
|
||||
typedef typename internal::traits<Derived>::Scalar Scalar;
|
||||
typedef typename internal::packet_traits<Scalar>::type PacketScalar;
|
||||
|
||||
/** The numeric type of the expression' coefficients, e.g. float, double, int or std::complex<float>, etc.
|
||||
*
|
||||
* It is an alias for the Scalar type */
|
||||
typedef Scalar value_type;
|
||||
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
|
||||
typedef internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar,
|
||||
@@ -169,8 +177,39 @@ template<typename Derived> class DenseBase
|
||||
InnerStrideAtCompileTime = internal::inner_stride_at_compile_time<Derived>::ret,
|
||||
OuterStrideAtCompileTime = internal::outer_stride_at_compile_time<Derived>::ret
|
||||
};
|
||||
|
||||
typedef typename internal::find_best_packet<Scalar,SizeAtCompileTime>::type PacketScalar;
|
||||
|
||||
enum { IsPlainObjectBase = 0 };
|
||||
|
||||
/** The plain matrix type corresponding to this expression.
|
||||
* \sa PlainObject */
|
||||
typedef Matrix<typename internal::traits<Derived>::Scalar,
|
||||
internal::traits<Derived>::RowsAtCompileTime,
|
||||
internal::traits<Derived>::ColsAtCompileTime,
|
||||
AutoAlign | (internal::traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
|
||||
internal::traits<Derived>::MaxRowsAtCompileTime,
|
||||
internal::traits<Derived>::MaxColsAtCompileTime
|
||||
> PlainMatrix;
|
||||
|
||||
/** The plain array type corresponding to this expression.
|
||||
* \sa PlainObject */
|
||||
typedef Array<typename internal::traits<Derived>::Scalar,
|
||||
internal::traits<Derived>::RowsAtCompileTime,
|
||||
internal::traits<Derived>::ColsAtCompileTime,
|
||||
AutoAlign | (internal::traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
|
||||
internal::traits<Derived>::MaxRowsAtCompileTime,
|
||||
internal::traits<Derived>::MaxColsAtCompileTime
|
||||
> PlainArray;
|
||||
|
||||
/** \brief The plain matrix or array type corresponding to this expression.
|
||||
*
|
||||
* This is not necessarily exactly the return type of eval(). In the case of plain matrices,
|
||||
* the return type of eval() is a const reference to a matrix, not a matrix! It is however guaranteed
|
||||
* that the return type of eval() is either PlainObject or const PlainObject&.
|
||||
*/
|
||||
typedef typename internal::conditional<internal::is_same<typename internal::traits<Derived>::XprKind,MatrixXpr >::value,
|
||||
PlainMatrix, PlainArray>::type PlainObject;
|
||||
|
||||
/** \returns the number of nonzero coefficients which is in practice the number
|
||||
* of stored coefficients. */
|
||||
@@ -221,22 +260,21 @@ template<typename Derived> class DenseBase
|
||||
* nothing else.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
void resize(Index nbRows, Index nbCols)
|
||||
void resize(Index rows, Index cols)
|
||||
{
|
||||
EIGEN_ONLY_USED_FOR_DEBUG(nbRows);
|
||||
EIGEN_ONLY_USED_FOR_DEBUG(nbCols);
|
||||
eigen_assert(nbRows == this->rows() && nbCols == this->cols()
|
||||
EIGEN_ONLY_USED_FOR_DEBUG(rows);
|
||||
EIGEN_ONLY_USED_FOR_DEBUG(cols);
|
||||
eigen_assert(rows == this->rows() && cols == this->cols()
|
||||
&& "DenseBase::resize() does not actually allow to resize.");
|
||||
}
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
|
||||
/** \internal Represents a matrix with all coefficients equal to one another*/
|
||||
typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,Derived> ConstantReturnType;
|
||||
typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,PlainObject> ConstantReturnType;
|
||||
/** \internal Represents a vector with linearly spaced coefficients that allows sequential access only. */
|
||||
typedef CwiseNullaryOp<internal::linspaced_op<Scalar,false>,Derived> SequentialLinSpacedReturnType;
|
||||
typedef CwiseNullaryOp<internal::linspaced_op<Scalar,PacketScalar,false>,PlainObject> SequentialLinSpacedReturnType;
|
||||
/** \internal Represents a vector with linearly spaced coefficients that allows random access. */
|
||||
typedef CwiseNullaryOp<internal::linspaced_op<Scalar,true>,Derived> RandomAccessLinSpacedReturnType;
|
||||
typedef CwiseNullaryOp<internal::linspaced_op<Scalar,PacketScalar,true>,PlainObject> RandomAccessLinSpacedReturnType;
|
||||
/** \internal the return type of MatrixBase::eigenvalues() */
|
||||
typedef Matrix<typename NumTraits<typename internal::traits<Derived>::Scalar>::Real, internal::traits<Derived>::ColsAtCompileTime, 1> EigenvaluesReturnType;
|
||||
|
||||
@@ -269,18 +307,17 @@ template<typename Derived> class DenseBase
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator=(const ReturnByValue<OtherDerived>& func);
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
/** Copies \a other into *this without evaluating other. \returns a reference to *this.
|
||||
/** \ínternal
|
||||
* Copies \a other into *this without evaluating other. \returns a reference to *this.
|
||||
* \deprecated */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& lazyAssign(const DenseBase<OtherDerived>& other);
|
||||
#endif // not EIGEN_PARSED_BY_DOXYGEN
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
CommaInitializer<Derived> operator<< (const Scalar& s);
|
||||
|
||||
// TODO flagged is temporarly disabled. It seems useless now
|
||||
/** \deprecated it now returns \c *this */
|
||||
template<unsigned int Added,unsigned int Removed>
|
||||
EIGEN_DEPRECATED
|
||||
const Derived& flagged() const
|
||||
@@ -316,13 +353,13 @@ template<typename Derived> class DenseBase
|
||||
LinSpaced(const Scalar& low, const Scalar& high);
|
||||
|
||||
template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
|
||||
static const CwiseNullaryOp<CustomNullaryOp, Derived>
|
||||
static const CwiseNullaryOp<CustomNullaryOp, PlainObject>
|
||||
NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func);
|
||||
template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
|
||||
static const CwiseNullaryOp<CustomNullaryOp, Derived>
|
||||
static const CwiseNullaryOp<CustomNullaryOp, PlainObject>
|
||||
NullaryExpr(Index size, const CustomNullaryOp& func);
|
||||
template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
|
||||
static const CwiseNullaryOp<CustomNullaryOp, Derived>
|
||||
static const CwiseNullaryOp<CustomNullaryOp, PlainObject>
|
||||
NullaryExpr(const CustomNullaryOp& func);
|
||||
|
||||
EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index rows, Index cols);
|
||||
@@ -368,6 +405,8 @@ template<typename Derived> class DenseBase
|
||||
*
|
||||
* Notice that in the case of a plain matrix or vector (not an expression) this function just returns
|
||||
* a const reference, in order to avoid a useless copy.
|
||||
*
|
||||
* \warning Be carefull with eval() and the auto C++ keyword, as detailed in this \link TopicPitfalls_auto_keyword page \endlink.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE EvalReturnType eval() const
|
||||
@@ -429,8 +468,7 @@ template<typename Derived> class DenseBase
|
||||
|
||||
template<typename BinaryOp>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename internal::result_of<BinaryOp(typename internal::traits<Derived>::Scalar)>::type
|
||||
redux(const BinaryOp& func) const;
|
||||
Scalar redux(const BinaryOp& func) const;
|
||||
|
||||
template<typename Visitor>
|
||||
EIGEN_DEVICE_FUNC
|
||||
@@ -456,14 +494,35 @@ template<typename Derived> class DenseBase
|
||||
typedef VectorwiseOp<Derived, Vertical> ColwiseReturnType;
|
||||
typedef const VectorwiseOp<const Derived, Vertical> ConstColwiseReturnType;
|
||||
|
||||
ConstRowwiseReturnType rowwise() const;
|
||||
RowwiseReturnType rowwise();
|
||||
ConstColwiseReturnType colwise() const;
|
||||
ColwiseReturnType colwise();
|
||||
/** \returns a VectorwiseOp wrapper of *this providing additional partial reduction operations
|
||||
*
|
||||
* Example: \include MatrixBase_rowwise.cpp
|
||||
* Output: \verbinclude MatrixBase_rowwise.out
|
||||
*
|
||||
* \sa colwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
|
||||
*/
|
||||
//Code moved here due to a CUDA compiler bug
|
||||
EIGEN_DEVICE_FUNC inline ConstRowwiseReturnType rowwise() const {
|
||||
return ConstRowwiseReturnType(derived());
|
||||
}
|
||||
EIGEN_DEVICE_FUNC RowwiseReturnType rowwise();
|
||||
|
||||
static const CwiseNullaryOp<internal::scalar_random_op<Scalar>,Derived> Random(Index rows, Index cols);
|
||||
static const CwiseNullaryOp<internal::scalar_random_op<Scalar>,Derived> Random(Index size);
|
||||
static const CwiseNullaryOp<internal::scalar_random_op<Scalar>,Derived> Random();
|
||||
/** \returns a VectorwiseOp wrapper of *this providing additional partial reduction operations
|
||||
*
|
||||
* Example: \include MatrixBase_colwise.cpp
|
||||
* Output: \verbinclude MatrixBase_colwise.out
|
||||
*
|
||||
* \sa rowwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC inline ConstColwiseReturnType colwise() const {
|
||||
return ConstColwiseReturnType(derived());
|
||||
}
|
||||
EIGEN_DEVICE_FUNC ColwiseReturnType colwise();
|
||||
|
||||
typedef CwiseNullaryOp<internal::scalar_random_op<Scalar>,PlainObject> RandomReturnType;
|
||||
static const RandomReturnType Random(Index rows, Index cols);
|
||||
static const RandomReturnType Random(Index size);
|
||||
static const RandomReturnType Random();
|
||||
|
||||
template<typename ThenDerived,typename ElseDerived>
|
||||
const Select<Derived,ThenDerived,ElseDerived>
|
||||
@@ -481,14 +540,33 @@ template<typename Derived> class DenseBase
|
||||
template<int p> RealScalar lpNorm() const;
|
||||
|
||||
template<int RowFactor, int ColFactor>
|
||||
EIGEN_DEVICE_FUNC
|
||||
const Replicate<Derived,RowFactor,ColFactor> replicate() const;
|
||||
const Replicate<Derived,Dynamic,Dynamic> replicate(Index rowFacor,Index colFactor) const;
|
||||
/**
|
||||
* \return an expression of the replication of \c *this
|
||||
*
|
||||
* Example: \include MatrixBase_replicate_int_int.cpp
|
||||
* Output: \verbinclude MatrixBase_replicate_int_int.out
|
||||
*
|
||||
* \sa VectorwiseOp::replicate(), DenseBase::replicate<int,int>(), class Replicate
|
||||
*/
|
||||
//Code moved here due to a CUDA compiler bug
|
||||
EIGEN_DEVICE_FUNC
|
||||
const Replicate<Derived, Dynamic, Dynamic> replicate(Index rowFactor, Index colFactor) const
|
||||
{
|
||||
return Replicate<Derived, Dynamic, Dynamic>(derived(), rowFactor, colFactor);
|
||||
}
|
||||
|
||||
typedef Reverse<Derived, BothDirections> ReverseReturnType;
|
||||
typedef const Reverse<const Derived, BothDirections> ConstReverseReturnType;
|
||||
ReverseReturnType reverse();
|
||||
ConstReverseReturnType reverse() const;
|
||||
void reverseInPlace();
|
||||
EIGEN_DEVICE_FUNC ReverseReturnType reverse();
|
||||
/** This is the const version of reverse(). */
|
||||
//Code moved here due to a CUDA compiler bug
|
||||
EIGEN_DEVICE_FUNC ConstReverseReturnType reverse() const
|
||||
{
|
||||
return ConstReverseReturnType(derived());
|
||||
}
|
||||
EIGEN_DEVICE_FUNC void reverseInPlace();
|
||||
|
||||
#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::DenseBase
|
||||
# include "../plugins/BlockMethods.h"
|
||||
|
||||
@@ -97,7 +97,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
|
||||
{
|
||||
eigen_internal_assert(row >= 0 && row < rows()
|
||||
&& col >= 0 && col < cols());
|
||||
return typename internal::evaluator<Derived>::type(derived()).coeff(row,col);
|
||||
return internal::evaluator<Derived>(derived()).coeff(row,col);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
@@ -139,7 +139,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
|
||||
coeff(Index index) const
|
||||
{
|
||||
eigen_internal_assert(index >= 0 && index < size());
|
||||
return typename internal::evaluator<Derived>::type(derived()).coeff(index);
|
||||
return internal::evaluator<Derived>(derived()).coeff(index);
|
||||
}
|
||||
|
||||
|
||||
@@ -216,8 +216,9 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
|
||||
template<int LoadMode>
|
||||
EIGEN_STRONG_INLINE PacketReturnType packet(Index row, Index col) const
|
||||
{
|
||||
typedef typename internal::packet_traits<Scalar>::type DefaultPacketType;
|
||||
eigen_internal_assert(row >= 0 && row < rows() && col >= 0 && col < cols());
|
||||
return typename internal::evaluator<Derived>::type(derived()).template packet<LoadMode>(row,col);
|
||||
return internal::evaluator<Derived>(derived()).template packet<LoadMode,DefaultPacketType>(row,col);
|
||||
}
|
||||
|
||||
|
||||
@@ -242,8 +243,9 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
|
||||
template<int LoadMode>
|
||||
EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
|
||||
{
|
||||
typedef typename internal::packet_traits<Scalar>::type DefaultPacketType;
|
||||
eigen_internal_assert(index >= 0 && index < size());
|
||||
return typename internal::evaluator<Derived>::type(derived()).template packet<LoadMode>(index);
|
||||
return internal::evaluator<Derived>(derived()).template packet<LoadMode,DefaultPacketType>(index);
|
||||
}
|
||||
|
||||
protected:
|
||||
@@ -323,7 +325,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
|
||||
{
|
||||
eigen_internal_assert(row >= 0 && row < rows()
|
||||
&& col >= 0 && col < cols());
|
||||
return typename internal::evaluator<Derived>::type(derived()).coeffRef(row,col);
|
||||
return internal::evaluator<Derived>(derived()).coeffRef(row,col);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
@@ -369,7 +371,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
|
||||
coeffRef(Index index)
|
||||
{
|
||||
eigen_internal_assert(index >= 0 && index < size());
|
||||
return typename internal::evaluator<Derived>::type(derived()).coeffRef(index);
|
||||
return internal::evaluator<Derived>(derived()).coeffRef(index);
|
||||
}
|
||||
|
||||
/** \returns a reference to the coefficient at given index.
|
||||
@@ -580,33 +582,42 @@ class DenseCoeffsBase<Derived, DirectWriteAccessors>
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename Derived, bool JustReturnZero>
|
||||
template<int Alignment, typename Derived, bool JustReturnZero>
|
||||
struct first_aligned_impl
|
||||
{
|
||||
static inline Index run(const Derived&)
|
||||
{ return 0; }
|
||||
};
|
||||
|
||||
template<typename Derived>
|
||||
struct first_aligned_impl<Derived, false>
|
||||
template<int Alignment, typename Derived>
|
||||
struct first_aligned_impl<Alignment, Derived, false>
|
||||
{
|
||||
static inline Index run(const Derived& m)
|
||||
{
|
||||
return internal::first_aligned(&m.const_cast_derived().coeffRef(0,0), m.size());
|
||||
return internal::first_aligned<Alignment>(&m.const_cast_derived().coeffRef(0,0), m.size());
|
||||
}
|
||||
};
|
||||
|
||||
/** \internal \returns the index of the first element of the array that is well aligned for vectorization.
|
||||
/** \internal \returns the index of the first element of the array stored by \a m that is properly aligned with respect to \a Alignment for vectorization.
|
||||
*
|
||||
* \tparam Alignment requested alignment in Bytes.
|
||||
*
|
||||
* There is also the variant first_aligned(const Scalar*, Integer) defined in Memory.h. See it for more
|
||||
* documentation.
|
||||
*/
|
||||
template<typename Derived>
|
||||
static inline Index first_aligned(const Derived& m)
|
||||
template<int Alignment, typename Derived>
|
||||
static inline Index first_aligned(const DenseBase<Derived>& m)
|
||||
{
|
||||
return first_aligned_impl
|
||||
<Derived, (Derived::Flags & AlignedBit) || !(Derived::Flags & DirectAccessBit)>
|
||||
::run(m);
|
||||
enum { ReturnZero = (int(evaluator<Derived>::Alignment) >= Alignment) || !(Derived::Flags & DirectAccessBit) };
|
||||
return first_aligned_impl<Alignment, Derived, ReturnZero>::run(m.derived());
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
static inline Index first_default_aligned(const DenseBase<Derived>& m)
|
||||
{
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
typedef typename packet_traits<Scalar>::type DefaultPacketType;
|
||||
return first_aligned<unpacket_traits<DefaultPacketType>::alignment>(m);
|
||||
}
|
||||
|
||||
template<typename Derived, bool HasDirectAccess = has_direct_access<Derived>::ret>
|
||||
|
||||
@@ -40,8 +40,7 @@ void check_static_allocation_size()
|
||||
*/
|
||||
template <typename T, int Size, int MatrixOrArrayOptions,
|
||||
int Alignment = (MatrixOrArrayOptions&DontAlign) ? 0
|
||||
: (((Size*sizeof(T))%EIGEN_ALIGN_BYTES)==0) ? EIGEN_ALIGN_BYTES
|
||||
: 0 >
|
||||
: compute_default_alignment<T,Size>::value >
|
||||
struct plain_array
|
||||
{
|
||||
T array[Size];
|
||||
@@ -81,14 +80,71 @@ struct plain_array
|
||||
#endif
|
||||
|
||||
template <typename T, int Size, int MatrixOrArrayOptions>
|
||||
struct plain_array<T, Size, MatrixOrArrayOptions, EIGEN_ALIGN_BYTES>
|
||||
struct plain_array<T, Size, MatrixOrArrayOptions, 8>
|
||||
{
|
||||
EIGEN_USER_ALIGN_DEFAULT T array[Size];
|
||||
EIGEN_ALIGN_TO_BOUNDARY(8) T array[Size];
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
plain_array()
|
||||
{
|
||||
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(7);
|
||||
check_static_allocation_size<T,Size>();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
plain_array(constructor_without_unaligned_array_assert)
|
||||
{
|
||||
check_static_allocation_size<T,Size>();
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, int Size, int MatrixOrArrayOptions>
|
||||
struct plain_array<T, Size, MatrixOrArrayOptions, 16>
|
||||
{
|
||||
EIGEN_ALIGN_TO_BOUNDARY(16) T array[Size];
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
plain_array()
|
||||
{
|
||||
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(EIGEN_ALIGN_BYTES-1);
|
||||
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(15);
|
||||
check_static_allocation_size<T,Size>();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
plain_array(constructor_without_unaligned_array_assert)
|
||||
{
|
||||
check_static_allocation_size<T,Size>();
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, int Size, int MatrixOrArrayOptions>
|
||||
struct plain_array<T, Size, MatrixOrArrayOptions, 32>
|
||||
{
|
||||
EIGEN_ALIGN_TO_BOUNDARY(32) T array[Size];
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
plain_array()
|
||||
{
|
||||
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(31);
|
||||
check_static_allocation_size<T,Size>();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
plain_array(constructor_without_unaligned_array_assert)
|
||||
{
|
||||
check_static_allocation_size<T,Size>();
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, int Size, int MatrixOrArrayOptions>
|
||||
struct plain_array<T, Size, MatrixOrArrayOptions, 64>
|
||||
{
|
||||
EIGEN_ALIGN_TO_BOUNDARY(64) T array[Size];
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
plain_array()
|
||||
{
|
||||
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(63);
|
||||
check_static_allocation_size<T,Size>();
|
||||
}
|
||||
|
||||
@@ -102,7 +158,7 @@ struct plain_array<T, Size, MatrixOrArrayOptions, EIGEN_ALIGN_BYTES>
|
||||
template <typename T, int MatrixOrArrayOptions, int Alignment>
|
||||
struct plain_array<T, 0, MatrixOrArrayOptions, Alignment>
|
||||
{
|
||||
EIGEN_USER_ALIGN_DEFAULT T array[1];
|
||||
T array[1];
|
||||
EIGEN_DEVICE_FUNC plain_array() {}
|
||||
EIGEN_DEVICE_FUNC plain_array(constructor_without_unaligned_array_assert) {}
|
||||
};
|
||||
@@ -140,7 +196,13 @@ template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseSt
|
||||
if (this != &other) m_data = other.m_data;
|
||||
return *this;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(Index,Index,Index) {}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) {
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
|
||||
eigen_internal_assert(size==rows*cols && rows==_Rows && cols==_Cols);
|
||||
EIGEN_UNUSED_VARIABLE(size);
|
||||
EIGEN_UNUSED_VARIABLE(rows);
|
||||
EIGEN_UNUSED_VARIABLE(cols);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); }
|
||||
EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;}
|
||||
EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;}
|
||||
@@ -186,10 +248,10 @@ template<typename T, int Size, int _Options> class DenseStorage<T, Size, Dynamic
|
||||
Index m_cols;
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0), m_cols(0) {}
|
||||
explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
|
||||
EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
|
||||
: m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0), m_cols(0) {}
|
||||
DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows), m_cols(other.m_cols) {}
|
||||
DenseStorage& operator=(const DenseStorage& other)
|
||||
EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows), m_cols(other.m_cols) {}
|
||||
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
|
||||
{
|
||||
if (this != &other)
|
||||
{
|
||||
@@ -199,13 +261,13 @@ template<typename T, int Size, int _Options> class DenseStorage<T, Size, Dynamic
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
DenseStorage(Index, Index nbRows, Index nbCols) : m_rows(nbRows), m_cols(nbCols) {}
|
||||
void swap(DenseStorage& other)
|
||||
EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index cols) : m_rows(rows), m_cols(cols) {}
|
||||
EIGEN_DEVICE_FUNC void swap(DenseStorage& other)
|
||||
{ std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
|
||||
EIGEN_DEVICE_FUNC Index rows() const {return m_rows;}
|
||||
EIGEN_DEVICE_FUNC Index cols() const {return m_cols;}
|
||||
void conservativeResize(Index, Index nbRows, Index nbCols) { m_rows = nbRows; m_cols = nbCols; }
|
||||
void resize(Index, Index nbRows, Index nbCols) { m_rows = nbRows; m_cols = nbCols; }
|
||||
EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index cols) { m_rows = rows; m_cols = cols; }
|
||||
EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index cols) { m_rows = rows; m_cols = cols; }
|
||||
EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
|
||||
EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
|
||||
};
|
||||
@@ -217,10 +279,10 @@ template<typename T, int Size, int _Cols, int _Options> class DenseStorage<T, Si
|
||||
Index m_rows;
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0) {}
|
||||
explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
|
||||
EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
|
||||
: m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0) {}
|
||||
DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows) {}
|
||||
DenseStorage& operator=(const DenseStorage& other)
|
||||
EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows) {}
|
||||
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
|
||||
{
|
||||
if (this != &other)
|
||||
{
|
||||
@@ -229,12 +291,12 @@ template<typename T, int Size, int _Cols, int _Options> class DenseStorage<T, Si
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
DenseStorage(Index, Index nbRows, Index) : m_rows(nbRows) {}
|
||||
void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
|
||||
EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index) : m_rows(rows) {}
|
||||
EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
|
||||
EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;}
|
||||
EIGEN_DEVICE_FUNC Index cols(void) const {return _Cols;}
|
||||
void conservativeResize(Index, Index nbRows, Index) { m_rows = nbRows; }
|
||||
void resize(Index, Index nbRows, Index) { m_rows = nbRows; }
|
||||
EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index) { m_rows = rows; }
|
||||
EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index) { m_rows = rows; }
|
||||
EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
|
||||
EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
|
||||
};
|
||||
@@ -246,10 +308,10 @@ template<typename T, int Size, int _Rows, int _Options> class DenseStorage<T, Si
|
||||
Index m_cols;
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC DenseStorage() : m_cols(0) {}
|
||||
explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
|
||||
EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
|
||||
: m_data(internal::constructor_without_unaligned_array_assert()), m_cols(0) {}
|
||||
DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_cols(other.m_cols) {}
|
||||
DenseStorage& operator=(const DenseStorage& other)
|
||||
EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_cols(other.m_cols) {}
|
||||
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
|
||||
{
|
||||
if (this != &other)
|
||||
{
|
||||
@@ -258,12 +320,12 @@ template<typename T, int Size, int _Rows, int _Options> class DenseStorage<T, Si
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
DenseStorage(Index, Index, Index nbCols) : m_cols(nbCols) {}
|
||||
void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
|
||||
EIGEN_DEVICE_FUNC DenseStorage(Index, Index, Index cols) : m_cols(cols) {}
|
||||
EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
|
||||
EIGEN_DEVICE_FUNC Index rows(void) const {return _Rows;}
|
||||
EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;}
|
||||
void conservativeResize(Index, Index, Index nbCols) { m_cols = nbCols; }
|
||||
void resize(Index, Index, Index nbCols) { m_cols = nbCols; }
|
||||
void conservativeResize(Index, Index, Index cols) { m_cols = cols; }
|
||||
void resize(Index, Index, Index cols) { m_cols = cols; }
|
||||
EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
|
||||
EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
|
||||
};
|
||||
@@ -276,19 +338,22 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
|
||||
Index m_cols;
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0), m_cols(0) {}
|
||||
explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
|
||||
EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
|
||||
: m_data(0), m_rows(0), m_cols(0) {}
|
||||
DenseStorage(Index size, Index nbRows, Index nbCols)
|
||||
: m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(nbRows), m_cols(nbCols)
|
||||
{ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN }
|
||||
DenseStorage(const DenseStorage& other)
|
||||
EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols)
|
||||
: m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(rows), m_cols(cols)
|
||||
{
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
|
||||
eigen_internal_assert(size==rows*cols && rows>=0 && cols >=0);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
|
||||
: m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(other.m_rows*other.m_cols))
|
||||
, m_rows(other.m_rows)
|
||||
, m_cols(other.m_cols)
|
||||
{
|
||||
internal::smart_copy(other.m_data, other.m_data+other.m_rows*other.m_cols, m_data);
|
||||
}
|
||||
DenseStorage& operator=(const DenseStorage& other)
|
||||
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
|
||||
{
|
||||
if (this != &other)
|
||||
{
|
||||
@@ -298,6 +363,7 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
|
||||
return *this;
|
||||
}
|
||||
#ifdef EIGEN_HAVE_RVALUE_REFERENCES
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseStorage(DenseStorage&& other)
|
||||
: m_data(std::move(other.m_data))
|
||||
, m_rows(std::move(other.m_rows))
|
||||
@@ -307,6 +373,7 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
|
||||
other.m_rows = 0;
|
||||
other.m_cols = 0;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseStorage& operator=(DenseStorage&& other)
|
||||
{
|
||||
using std::swap;
|
||||
@@ -316,18 +383,18 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, m_rows*m_cols); }
|
||||
void swap(DenseStorage& other)
|
||||
EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, m_rows*m_cols); }
|
||||
EIGEN_DEVICE_FUNC void swap(DenseStorage& other)
|
||||
{ std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
|
||||
EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;}
|
||||
EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;}
|
||||
void conservativeResize(Index size, Index nbRows, Index nbCols)
|
||||
void conservativeResize(Index size, Index rows, Index cols)
|
||||
{
|
||||
m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*m_cols);
|
||||
m_rows = nbRows;
|
||||
m_cols = nbCols;
|
||||
m_rows = rows;
|
||||
m_cols = cols;
|
||||
}
|
||||
void resize(Index size, Index nbRows, Index nbCols)
|
||||
EIGEN_DEVICE_FUNC void resize(Index size, Index rows, Index cols)
|
||||
{
|
||||
if(size != m_rows*m_cols)
|
||||
{
|
||||
@@ -338,8 +405,8 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
|
||||
m_data = 0;
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
|
||||
}
|
||||
m_rows = nbRows;
|
||||
m_cols = nbCols;
|
||||
m_rows = rows;
|
||||
m_cols = cols;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC const T *data() const { return m_data; }
|
||||
EIGEN_DEVICE_FUNC T *data() { return m_data; }
|
||||
@@ -353,15 +420,19 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_cols(0) {}
|
||||
explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {}
|
||||
DenseStorage(Index size, Index, Index nbCols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_cols(nbCols)
|
||||
{ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN }
|
||||
DenseStorage(const DenseStorage& other)
|
||||
EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_cols(cols)
|
||||
{
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
|
||||
eigen_internal_assert(size==rows*cols && rows==_Rows && cols >=0);
|
||||
EIGEN_UNUSED_VARIABLE(rows);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
|
||||
: m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(_Rows*other.m_cols))
|
||||
, m_cols(other.m_cols)
|
||||
{
|
||||
internal::smart_copy(other.m_data, other.m_data+_Rows*m_cols, m_data);
|
||||
}
|
||||
DenseStorage& operator=(const DenseStorage& other)
|
||||
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
|
||||
{
|
||||
if (this != &other)
|
||||
{
|
||||
@@ -371,6 +442,7 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
|
||||
return *this;
|
||||
}
|
||||
#ifdef EIGEN_HAVE_RVALUE_REFERENCES
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseStorage(DenseStorage&& other)
|
||||
: m_data(std::move(other.m_data))
|
||||
, m_cols(std::move(other.m_cols))
|
||||
@@ -378,6 +450,7 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
|
||||
other.m_data = nullptr;
|
||||
other.m_cols = 0;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseStorage& operator=(DenseStorage&& other)
|
||||
{
|
||||
using std::swap;
|
||||
@@ -386,16 +459,16 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols); }
|
||||
void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
|
||||
EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols); }
|
||||
EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
|
||||
EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;}
|
||||
EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;}
|
||||
void conservativeResize(Index size, Index, Index nbCols)
|
||||
EIGEN_DEVICE_FUNC void conservativeResize(Index size, Index, Index cols)
|
||||
{
|
||||
m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, _Rows*m_cols);
|
||||
m_cols = nbCols;
|
||||
m_cols = cols;
|
||||
}
|
||||
EIGEN_STRONG_INLINE void resize(Index size, Index, Index nbCols)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(Index size, Index, Index cols)
|
||||
{
|
||||
if(size != _Rows*m_cols)
|
||||
{
|
||||
@@ -406,7 +479,7 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
|
||||
m_data = 0;
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
|
||||
}
|
||||
m_cols = nbCols;
|
||||
m_cols = cols;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC const T *data() const { return m_data; }
|
||||
EIGEN_DEVICE_FUNC T *data() { return m_data; }
|
||||
@@ -420,15 +493,19 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0) {}
|
||||
explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {}
|
||||
DenseStorage(Index size, Index nbRows, Index) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(nbRows)
|
||||
{ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN }
|
||||
DenseStorage(const DenseStorage& other)
|
||||
EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(rows)
|
||||
{
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
|
||||
eigen_internal_assert(size==rows*cols && rows>=0 && cols == _Cols);
|
||||
EIGEN_UNUSED_VARIABLE(cols);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
|
||||
: m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(other.m_rows*_Cols))
|
||||
, m_rows(other.m_rows)
|
||||
{
|
||||
internal::smart_copy(other.m_data, other.m_data+other.m_rows*_Cols, m_data);
|
||||
}
|
||||
DenseStorage& operator=(const DenseStorage& other)
|
||||
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
|
||||
{
|
||||
if (this != &other)
|
||||
{
|
||||
@@ -438,6 +515,7 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
|
||||
return *this;
|
||||
}
|
||||
#ifdef EIGEN_HAVE_RVALUE_REFERENCES
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseStorage(DenseStorage&& other)
|
||||
: m_data(std::move(other.m_data))
|
||||
, m_rows(std::move(other.m_rows))
|
||||
@@ -445,6 +523,7 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
|
||||
other.m_data = nullptr;
|
||||
other.m_rows = 0;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseStorage& operator=(DenseStorage&& other)
|
||||
{
|
||||
using std::swap;
|
||||
@@ -453,16 +532,16 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows); }
|
||||
void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
|
||||
EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows); }
|
||||
EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
|
||||
EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;}
|
||||
EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;}
|
||||
void conservativeResize(Index size, Index nbRows, Index)
|
||||
void conservativeResize(Index size, Index rows, Index)
|
||||
{
|
||||
m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*_Cols);
|
||||
m_rows = nbRows;
|
||||
m_rows = rows;
|
||||
}
|
||||
EIGEN_STRONG_INLINE void resize(Index size, Index nbRows, Index)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(Index size, Index rows, Index)
|
||||
{
|
||||
if(size != m_rows*_Cols)
|
||||
{
|
||||
@@ -473,7 +552,7 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
|
||||
m_data = 0;
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
|
||||
}
|
||||
m_rows = nbRows;
|
||||
m_rows = rows;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC const T *data() const { return m_data; }
|
||||
EIGEN_DEVICE_FUNC T *data() { return m_data; }
|
||||
|
||||
@@ -37,7 +37,7 @@ template<typename MatrixType, int DiagIndex>
|
||||
struct traits<Diagonal<MatrixType,DiagIndex> >
|
||||
: traits<MatrixType>
|
||||
{
|
||||
typedef typename nested<MatrixType>::type MatrixTypeNested;
|
||||
typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
|
||||
typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;
|
||||
typedef typename MatrixType::StorageKind StorageKind;
|
||||
enum {
|
||||
@@ -170,7 +170,7 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
|
||||
EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value()>0 ? 0 : -m_index.value(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value()>0 ? m_index.value() : 0; }
|
||||
// trigger a compile time error is someone try to call packet
|
||||
// trigger a compile-time error if someone try to call packet
|
||||
template<int LoadMode> typename MatrixType::PacketReturnType packet(Index) const;
|
||||
template<int LoadMode> typename MatrixType::PacketReturnType packet(Index,Index) const;
|
||||
};
|
||||
|
||||
@@ -99,7 +99,7 @@ EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scala
|
||||
template<typename Derived>
|
||||
inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const
|
||||
{
|
||||
using std::sqrt;
|
||||
EIGEN_USING_STD_MATH(sqrt)
|
||||
return sqrt(squaredNorm());
|
||||
}
|
||||
|
||||
@@ -141,7 +141,7 @@ struct lpNorm_selector
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline RealScalar run(const MatrixBase<Derived>& m)
|
||||
{
|
||||
using std::pow;
|
||||
EIGEN_USING_STD_MATH(pow)
|
||||
return pow(m.cwiseAbs().array().pow(p).sum(), RealScalar(1)/p);
|
||||
}
|
||||
};
|
||||
@@ -224,13 +224,13 @@ bool MatrixBase<Derived>::isOrthogonal
|
||||
template<typename Derived>
|
||||
bool MatrixBase<Derived>::isUnitary(const RealScalar& prec) const
|
||||
{
|
||||
typename Derived::Nested nested(derived());
|
||||
typename internal::nested_eval<Derived,1>::type self(derived());
|
||||
for(Index i = 0; i < cols(); ++i)
|
||||
{
|
||||
if(!internal::isApprox(nested.col(i).squaredNorm(), static_cast<RealScalar>(1), prec))
|
||||
if(!internal::isApprox(self.col(i).squaredNorm(), static_cast<RealScalar>(1), prec))
|
||||
return false;
|
||||
for(Index j = 0; j < i; ++j)
|
||||
if(!internal::isMuchSmallerThan(nested.col(i).dot(nested.col(j)), static_cast<Scalar>(1), prec))
|
||||
if(!internal::isMuchSmallerThan(self.col(i).dot(self.col(j)), static_cast<Scalar>(1), prec))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
||||
@@ -1,140 +0,0 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_FLAGGED_H
|
||||
#define EIGEN_FLAGGED_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class Flagged
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression with modified flags
|
||||
*
|
||||
* \param ExpressionType the type of the object of which we are modifying the flags
|
||||
* \param Added the flags added to the expression
|
||||
* \param Removed the flags removed from the expression (has priority over Added).
|
||||
*
|
||||
* This class represents an expression whose flags have been modified.
|
||||
* It is the return type of MatrixBase::flagged()
|
||||
* and most of the time this is the only way it is used.
|
||||
*
|
||||
* \sa MatrixBase::flagged()
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
template<typename ExpressionType, unsigned int Added, unsigned int Removed>
|
||||
struct traits<Flagged<ExpressionType, Added, Removed> > : traits<ExpressionType>
|
||||
{
|
||||
enum { Flags = (ExpressionType::Flags | Added) & ~Removed };
|
||||
};
|
||||
}
|
||||
|
||||
template<typename ExpressionType, unsigned int Added, unsigned int Removed> class Flagged
|
||||
: public MatrixBase<Flagged<ExpressionType, Added, Removed> >
|
||||
{
|
||||
public:
|
||||
|
||||
typedef MatrixBase<Flagged> Base;
|
||||
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(Flagged)
|
||||
typedef typename internal::conditional<internal::must_nest_by_value<ExpressionType>::ret,
|
||||
ExpressionType, const ExpressionType&>::type ExpressionTypeNested;
|
||||
typedef typename ExpressionType::InnerIterator InnerIterator;
|
||||
|
||||
explicit inline Flagged(const ExpressionType& matrix) : m_matrix(matrix) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows(); }
|
||||
EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols(); }
|
||||
EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_matrix.outerStride(); }
|
||||
EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_matrix.innerStride(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC inline CoeffReturnType coeff(Index row, Index col) const
|
||||
{
|
||||
return m_matrix.coeff(row, col);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
return m_matrix.coeff(index);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index row, Index col) const
|
||||
{
|
||||
return m_matrix.const_cast_derived().coeffRef(row, col);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index index) const
|
||||
{
|
||||
return m_matrix.const_cast_derived().coeffRef(index);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col)
|
||||
{
|
||||
return m_matrix.const_cast_derived().coeffRef(row, col);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index)
|
||||
{
|
||||
return m_matrix.const_cast_derived().coeffRef(index);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline const PacketScalar packet(Index row, Index col) const
|
||||
{
|
||||
return m_matrix.template packet<LoadMode>(row, col);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline void writePacket(Index row, Index col, const PacketScalar& x)
|
||||
{
|
||||
m_matrix.const_cast_derived().template writePacket<LoadMode>(row, col, x);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline const PacketScalar packet(Index index) const
|
||||
{
|
||||
return m_matrix.template packet<LoadMode>(index);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline void writePacket(Index index, const PacketScalar& x)
|
||||
{
|
||||
m_matrix.const_cast_derived().template writePacket<LoadMode>(index, x);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC const ExpressionType& _expression() const { return m_matrix; }
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC typename ExpressionType::PlainObject solveTriangular(const MatrixBase<OtherDerived>& other) const;
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC void solveTriangularInPlace(const MatrixBase<OtherDerived>& other) const;
|
||||
|
||||
protected:
|
||||
ExpressionTypeNested m_matrix;
|
||||
};
|
||||
|
||||
/** \returns an expression of *this with added and removed flags
|
||||
*
|
||||
* This is mostly for internal use.
|
||||
*
|
||||
* \sa class Flagged
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<unsigned int Added,unsigned int Removed>
|
||||
inline const Flagged<Derived, Added, Removed>
|
||||
DenseBase<Derived>::flagged() const
|
||||
{
|
||||
return Flagged<Derived, Added, Removed>(derived());
|
||||
}
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_FLAGGED_H
|
||||
@@ -183,7 +183,7 @@ struct gemv_static_vector_if<Scalar,Size,Dynamic,true>
|
||||
template<typename Scalar,int Size,int MaxSize>
|
||||
struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
|
||||
{
|
||||
#if EIGEN_ALIGN_STATICALLY
|
||||
#if EIGEN_MAX_STATIC_ALIGN_BYTES!=0
|
||||
internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0> m_data;
|
||||
EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
|
||||
#else
|
||||
@@ -196,7 +196,7 @@ struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
|
||||
internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?PacketSize:0),0> m_data;
|
||||
EIGEN_STRONG_INLINE Scalar* data() {
|
||||
return ForceAlignment
|
||||
? reinterpret_cast<Scalar*>((reinterpret_cast<size_t>(m_data.array) & ~(size_t(EIGEN_ALIGN_BYTES-1))) + EIGEN_ALIGN_BYTES)
|
||||
? reinterpret_cast<Scalar*>((reinterpret_cast<size_t>(m_data.array) & ~(size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES)
|
||||
: m_data.array;
|
||||
}
|
||||
#endif
|
||||
@@ -249,8 +249,8 @@ template<> struct gemv_dense_sense_selector<OnTheRight,ColMajor,true>
|
||||
|
||||
gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
|
||||
|
||||
bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0));
|
||||
bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
|
||||
const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0));
|
||||
const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
|
||||
|
||||
RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
|
||||
|
||||
|
||||
@@ -49,6 +49,7 @@ struct default_packet_traits
|
||||
HasMul = 1,
|
||||
HasNegate = 1,
|
||||
HasAbs = 1,
|
||||
HasArg = 0,
|
||||
HasAbs2 = 1,
|
||||
HasMin = 1,
|
||||
HasMax = 1,
|
||||
@@ -58,8 +59,10 @@ struct default_packet_traits
|
||||
|
||||
HasDiv = 0,
|
||||
HasSqrt = 0,
|
||||
HasRsqrt = 0,
|
||||
HasExp = 0,
|
||||
HasLog = 0,
|
||||
HasLog10 = 0,
|
||||
HasPow = 0,
|
||||
|
||||
HasSin = 0,
|
||||
@@ -67,7 +70,14 @@ struct default_packet_traits
|
||||
HasTan = 0,
|
||||
HasASin = 0,
|
||||
HasACos = 0,
|
||||
HasATan = 0
|
||||
HasATan = 0,
|
||||
HasSinh = 0,
|
||||
HasCosh = 0,
|
||||
HasTanh = 0,
|
||||
|
||||
HasRound = 0,
|
||||
HasFloor = 0,
|
||||
HasCeil = 0
|
||||
};
|
||||
};
|
||||
|
||||
@@ -97,6 +107,28 @@ template<typename T> struct packet_traits : default_packet_traits
|
||||
|
||||
template<typename T> struct packet_traits<const T> : packet_traits<T> { };
|
||||
|
||||
template <typename Src, typename Tgt> struct type_casting_traits {
|
||||
enum {
|
||||
VectorizedCast = 0,
|
||||
SrcCoeffRatio = 1,
|
||||
TgtCoeffRatio = 1
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
/** \internal \returns static_cast<TgtType>(a) (coeff-wise) */
|
||||
template <typename SrcPacket, typename TgtPacket>
|
||||
EIGEN_DEVICE_FUNC inline TgtPacket
|
||||
pcast(const SrcPacket& a) {
|
||||
return static_cast<TgtPacket>(a);
|
||||
}
|
||||
template <typename SrcPacket, typename TgtPacket>
|
||||
EIGEN_DEVICE_FUNC inline TgtPacket
|
||||
pcast(const SrcPacket& a, const SrcPacket& /*b*/) {
|
||||
return static_cast<TgtPacket>(a);
|
||||
}
|
||||
|
||||
|
||||
/** \internal \returns a + b (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
padd(const Packet& a,
|
||||
@@ -140,6 +172,10 @@ pmax(const Packet& a,
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pabs(const Packet& a) { using std::abs; return abs(a); }
|
||||
|
||||
/** \internal \returns the phase angle of \a a */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
parg(const Packet& a) { using numext::arg; return arg(a); }
|
||||
|
||||
/** \internal \returns the bitwise and of \a a and \a b */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pand(const Packet& a, const Packet& b) { return a & b; }
|
||||
@@ -225,8 +261,8 @@ inline void pbroadcast2(const typename unpacket_traits<Packet>::type *a,
|
||||
}
|
||||
|
||||
/** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */
|
||||
template<typename Scalar> inline typename packet_traits<Scalar>::type
|
||||
plset(const Scalar& a) { return a; }
|
||||
template<typename Packet> inline Packet
|
||||
plset(const typename unpacket_traits<Packet>::type& a) { return a; }
|
||||
|
||||
/** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */
|
||||
template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from)
|
||||
@@ -245,7 +281,15 @@ template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu
|
||||
/** \internal tries to do cache prefetching of \a addr */
|
||||
template<typename Scalar> inline void prefetch(const Scalar* addr)
|
||||
{
|
||||
#if !EIGEN_COMP_MSVC
|
||||
#ifdef __CUDA_ARCH__
|
||||
#if defined(__LP64__)
|
||||
// 64-bit pointer operand constraint for inlined asm
|
||||
asm(" prefetch.L1 [ %1 ];" : "=l"(addr) : "l"(addr));
|
||||
#else
|
||||
// 32-bit pointer operand constraint for inlined asm
|
||||
asm(" prefetch.L1 [ %1 ];" : "=r"(addr) : "r"(addr));
|
||||
#endif
|
||||
#elif !EIGEN_COMP_MSVC
|
||||
__builtin_prefetch(addr);
|
||||
#endif
|
||||
}
|
||||
@@ -287,6 +331,21 @@ template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Pack
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a)
|
||||
{ return a; }
|
||||
|
||||
template<size_t offset, typename Packet>
|
||||
struct protate_impl
|
||||
{
|
||||
// Empty so attempts to use this unimplemented path will fail to compile.
|
||||
// Only specializations of this template should be used.
|
||||
};
|
||||
|
||||
/** \internal \returns a packet with the coefficients rotated to the right in little-endian convention,
|
||||
* by the given offset, e.g. for offset == 1:
|
||||
* (packet[3], packet[2], packet[1], packet[0]) becomes (packet[0], packet[3], packet[2], packet[1])
|
||||
*/
|
||||
template<size_t offset, typename Packet> EIGEN_DEVICE_FUNC inline Packet protate(const Packet& a)
|
||||
{
|
||||
return offset ? protate_impl<offset, Packet>::run(a) : a;
|
||||
}
|
||||
|
||||
/** \internal \returns \a a with real and imaginary part flipped (for complex type only) */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a)
|
||||
@@ -321,10 +380,22 @@ Packet pasin(const Packet& a) { using std::asin; return asin(a); }
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet pacos(const Packet& a) { using std::acos; return acos(a); }
|
||||
|
||||
/** \internal \returns the atan of \a a (coeff-wise) */
|
||||
/** \internal \returns the arc tangent of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet patan(const Packet& a) { using std::atan; return atan(a); }
|
||||
|
||||
/** \internal \returns the hyperbolic sine of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet psinh(const Packet& a) { using std::sinh; return sinh(a); }
|
||||
|
||||
/** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet pcosh(const Packet& a) { using std::cosh; return cosh(a); }
|
||||
|
||||
/** \internal \returns the hyperbolic tan of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet ptanh(const Packet& a) { using std::tanh; return tanh(a); }
|
||||
|
||||
/** \internal \returns the exp of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet pexp(const Packet& a) { using std::exp; return exp(a); }
|
||||
@@ -333,10 +404,32 @@ Packet pexp(const Packet& a) { using std::exp; return exp(a); }
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet plog(const Packet& a) { using std::log; return log(a); }
|
||||
|
||||
/** \internal \returns the log10 of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet plog10(const Packet& a) { using std::log10; return log10(a); }
|
||||
|
||||
/** \internal \returns the square-root of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet psqrt(const Packet& a) { using std::sqrt; return sqrt(a); }
|
||||
|
||||
/** \internal \returns the reciprocal square-root of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet prsqrt(const Packet& a) {
|
||||
return pdiv(pset1<Packet>(1), psqrt(a));
|
||||
}
|
||||
|
||||
/** \internal \returns the rounded value of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet pround(const Packet& a) { using numext::round; return round(a); }
|
||||
|
||||
/** \internal \returns the floor of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet pfloor(const Packet& a) { using numext::floor; return floor(a); }
|
||||
|
||||
/** \internal \returns the ceil of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); }
|
||||
|
||||
/***************************************************************************
|
||||
* The following functions might not have to be overwritten for vectorized types
|
||||
***************************************************************************/
|
||||
@@ -357,22 +450,22 @@ pmadd(const Packet& a,
|
||||
{ return padd(pmul(a, b),c); }
|
||||
|
||||
/** \internal \returns a packet version of \a *from.
|
||||
* If LoadMode equals #Aligned, \a from must be 16 bytes aligned */
|
||||
template<typename Packet, int LoadMode>
|
||||
* The pointer \a from must be aligned on a \a Alignment bytes boundary. */
|
||||
template<typename Packet, int Alignment>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits<Packet>::type* from)
|
||||
{
|
||||
if(LoadMode == Aligned)
|
||||
if(Alignment >= unpacket_traits<Packet>::alignment)
|
||||
return pload<Packet>(from);
|
||||
else
|
||||
return ploadu<Packet>(from);
|
||||
}
|
||||
|
||||
/** \internal copy the packet \a from to \a *to.
|
||||
* If StoreMode equals #Aligned, \a to must be 16 bytes aligned */
|
||||
template<typename Scalar, typename Packet, int LoadMode>
|
||||
* The pointer \a from must be aligned on a \a Alignment bytes boundary. */
|
||||
template<typename Scalar, typename Packet, int Alignment>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from)
|
||||
{
|
||||
if(LoadMode == Aligned)
|
||||
if(Alignment >= unpacket_traits<Packet>::alignment)
|
||||
pstore(to, from);
|
||||
else
|
||||
pstoreu(to, from);
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
#define EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(NAME,FUNCTOR) \
|
||||
template<typename Derived> \
|
||||
inline const Eigen::CwiseUnaryOp<Eigen::internal::FUNCTOR<typename Derived::Scalar>, const Derived> \
|
||||
NAME(const Eigen::ArrayBase<Derived>& x) { \
|
||||
(NAME)(const Eigen::ArrayBase<Derived>& x) { \
|
||||
return Eigen::CwiseUnaryOp<Eigen::internal::FUNCTOR<typename Derived::Scalar>, const Derived>(x.derived()); \
|
||||
}
|
||||
|
||||
@@ -34,22 +34,36 @@
|
||||
} \
|
||||
};
|
||||
|
||||
|
||||
namespace Eigen
|
||||
{
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(real,scalar_real_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(imag,scalar_imag_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(conj,scalar_conjugate_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(inverse,scalar_inverse_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sin,scalar_sin_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cos,scalar_cos_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(asin,scalar_asin_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(acos,scalar_acos_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tan,scalar_tan_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(atan,scalar_atan_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(asin,scalar_asin_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(acos,scalar_acos_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sinh,scalar_sinh_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cosh,scalar_cosh_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh,scalar_tanh_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log10,scalar_log10_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs,scalar_abs_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs2,scalar_abs2_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(arg,scalar_arg_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sqrt,scalar_sqrt_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(square,scalar_square_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cube,scalar_cube_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(round,scalar_round_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(floor,scalar_floor_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(ceil,scalar_ceil_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isnan,scalar_isnan_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isinf,scalar_isinf_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isfinite,scalar_isfinite_op)
|
||||
|
||||
template<typename Derived>
|
||||
inline const Eigen::CwiseUnaryOp<Eigen::internal::scalar_pow_op<typename Derived::Scalar>, const Derived>
|
||||
@@ -57,16 +71,46 @@ namespace Eigen
|
||||
return x.derived().pow(exponent);
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_binary_pow_op<typename Derived::Scalar, typename Derived::Scalar>, const Derived, const Derived>
|
||||
pow(const Eigen::ArrayBase<Derived>& x, const Eigen::ArrayBase<Derived>& exponents)
|
||||
/** \returns an expression of the coefficient-wise power of \a x to the given array of \a exponents.
|
||||
*
|
||||
* This function computes the coefficient-wise power.
|
||||
*
|
||||
* Example: \include Cwise_array_power_array.cpp
|
||||
* Output: \verbinclude Cwise_array_power_array.out
|
||||
*
|
||||
* \sa ArrayBase::pow()
|
||||
*/
|
||||
template<typename Derived,typename ExponentDerived>
|
||||
inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_binary_pow_op<typename Derived::Scalar, typename ExponentDerived::Scalar>, const Derived, const ExponentDerived>
|
||||
pow(const Eigen::ArrayBase<Derived>& x, const Eigen::ArrayBase<ExponentDerived>& exponents)
|
||||
{
|
||||
return Eigen::CwiseBinaryOp<Eigen::internal::scalar_binary_pow_op<typename Derived::Scalar, typename Derived::Scalar>, const Derived, const Derived>(
|
||||
return Eigen::CwiseBinaryOp<Eigen::internal::scalar_binary_pow_op<typename Derived::Scalar, typename ExponentDerived::Scalar>, const Derived, const ExponentDerived>(
|
||||
x.derived(),
|
||||
exponents.derived()
|
||||
);
|
||||
}
|
||||
|
||||
/** \returns an expression of the coefficient-wise power of the scalar \a x to the given array of \a exponents.
|
||||
*
|
||||
* This function computes the coefficient-wise power between a scalar and an array of exponents.
|
||||
* Beaware that the scalar type of the input scalar \a x and the exponents \a exponents must be the same.
|
||||
*
|
||||
* Example: \include Cwise_scalar_power_array.cpp
|
||||
* Output: \verbinclude Cwise_scalar_power_array.out
|
||||
*
|
||||
* \sa ArrayBase::pow()
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_binary_pow_op<typename Derived::Scalar, typename Derived::Scalar>, const typename Derived::ConstantReturnType, const Derived>
|
||||
pow(const typename Derived::Scalar& x, const Eigen::ArrayBase<Derived>& exponents)
|
||||
{
|
||||
typename Derived::ConstantReturnType constant_x(exponents.rows(), exponents.cols(), x);
|
||||
return Eigen::CwiseBinaryOp<Eigen::internal::scalar_binary_pow_op<typename Derived::Scalar, typename Derived::Scalar>, const typename Derived::ConstantReturnType, const Derived>(
|
||||
constant_x,
|
||||
exponents.derived()
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Component-wise division of a scalar by array elements.
|
||||
**/
|
||||
|
||||
@@ -47,7 +47,7 @@ class Inverse : public InverseImpl<XprType,typename internal::traits<XprType>::S
|
||||
public:
|
||||
typedef typename XprType::StorageIndex StorageIndex;
|
||||
typedef typename XprType::PlainObject PlainObject;
|
||||
typedef typename internal::nested<XprType>::type XprTypeNested;
|
||||
typedef typename internal::ref_selector<XprType>::type XprTypeNested;
|
||||
typedef typename internal::remove_all<XprTypeNested>::type XprTypeNestedCleaned;
|
||||
|
||||
explicit Inverse(const XprType &xpr)
|
||||
@@ -100,14 +100,11 @@ namespace internal {
|
||||
*/
|
||||
template<typename ArgType>
|
||||
struct unary_evaluator<Inverse<ArgType> >
|
||||
: public evaluator<typename Inverse<ArgType>::PlainObject>::type
|
||||
: public evaluator<typename Inverse<ArgType>::PlainObject>
|
||||
{
|
||||
typedef Inverse<ArgType> InverseType;
|
||||
typedef typename InverseType::PlainObject PlainObject;
|
||||
typedef typename evaluator<PlainObject>::type Base;
|
||||
|
||||
typedef evaluator<InverseType> type;
|
||||
typedef evaluator<InverseType> nestedType;
|
||||
typedef evaluator<PlainObject> Base;
|
||||
|
||||
enum { Flags = Base::Flags | EvalBeforeNestingBit };
|
||||
|
||||
|
||||
@@ -19,7 +19,7 @@ namespace Eigen {
|
||||
* \brief A matrix or vector expression mapping an existing array of data.
|
||||
*
|
||||
* \tparam PlainObjectType the equivalent matrix type of the mapped data
|
||||
* \tparam MapOptions specifies whether the pointer is \c #Aligned, or \c #Unaligned.
|
||||
* \tparam MapOptions specifies the pointer alignment in bytes. It can be: \c #Aligned128, , \c #Aligned64, \c #Aligned32, \c #Aligned16, \c #Aligned8 or \c #Unaligned.
|
||||
* The default is \c #Unaligned.
|
||||
* \tparam StrideType optionally specifies strides. By default, Map assumes the memory layout
|
||||
* of an ordinary, contiguous array. This can be overridden by specifying strides.
|
||||
@@ -77,7 +77,7 @@ struct traits<Map<PlainObjectType, MapOptions, StrideType> >
|
||||
OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
|
||||
? int(PlainObjectType::OuterStrideAtCompileTime)
|
||||
: int(StrideType::OuterStrideAtCompileTime),
|
||||
IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned),
|
||||
Alignment = int(MapOptions)&int(AlignedMask),
|
||||
Flags0 = TraitsBase::Flags & (~NestByRefBit),
|
||||
Flags = is_lvalue<PlainObjectType>::value ? int(Flags0) : (int(Flags0) & ~LvalueBit)
|
||||
};
|
||||
@@ -117,11 +117,11 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
|
||||
/** Constructor in the fixed-size case.
|
||||
*
|
||||
* \param dataPtr pointer to the array to map
|
||||
* \param a_stride optional Stride object, passing the strides.
|
||||
* \param stride optional Stride object, passing the strides.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit inline Map(PointerArgType dataPtr, const StrideType& a_stride = StrideType())
|
||||
: Base(cast_to_pointer_type(dataPtr)), m_stride(a_stride)
|
||||
explicit inline Map(PointerArgType dataPtr, const StrideType& stride = StrideType())
|
||||
: Base(cast_to_pointer_type(dataPtr)), m_stride(stride)
|
||||
{
|
||||
PlainObjectType::Base::_check_template_params();
|
||||
}
|
||||
@@ -129,12 +129,12 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
|
||||
/** Constructor in the dynamic-size vector case.
|
||||
*
|
||||
* \param dataPtr pointer to the array to map
|
||||
* \param a_size the size of the vector expression
|
||||
* \param a_stride optional Stride object, passing the strides.
|
||||
* \param size the size of the vector expression
|
||||
* \param stride optional Stride object, passing the strides.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Map(PointerArgType dataPtr, Index a_size, const StrideType& a_stride = StrideType())
|
||||
: Base(cast_to_pointer_type(dataPtr), a_size), m_stride(a_stride)
|
||||
inline Map(PointerArgType dataPtr, Index size, const StrideType& stride = StrideType())
|
||||
: Base(cast_to_pointer_type(dataPtr), size), m_stride(stride)
|
||||
{
|
||||
PlainObjectType::Base::_check_template_params();
|
||||
}
|
||||
@@ -142,13 +142,13 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
|
||||
/** Constructor in the dynamic-size matrix case.
|
||||
*
|
||||
* \param dataPtr pointer to the array to map
|
||||
* \param nbRows the number of rows of the matrix expression
|
||||
* \param nbCols the number of columns of the matrix expression
|
||||
* \param a_stride optional Stride object, passing the strides.
|
||||
* \param rows the number of rows of the matrix expression
|
||||
* \param cols the number of columns of the matrix expression
|
||||
* \param stride optional Stride object, passing the strides.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Map(PointerArgType dataPtr, Index nbRows, Index nbCols, const StrideType& a_stride = StrideType())
|
||||
: Base(cast_to_pointer_type(dataPtr), nbRows, nbCols), m_stride(a_stride)
|
||||
inline Map(PointerArgType dataPtr, Index rows, Index cols, const StrideType& stride = StrideType())
|
||||
: Base(cast_to_pointer_type(dataPtr), rows, cols), m_stride(stride)
|
||||
{
|
||||
PlainObjectType::Base::_check_template_params();
|
||||
}
|
||||
|
||||
@@ -146,12 +146,12 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline MapBase(PointerType dataPtr, Index nbRows, Index nbCols)
|
||||
: m_data(dataPtr), m_rows(nbRows), m_cols(nbCols)
|
||||
inline MapBase(PointerType dataPtr, Index rows, Index cols)
|
||||
: m_data(dataPtr), m_rows(rows), m_cols(cols)
|
||||
{
|
||||
eigen_assert( (dataPtr == 0)
|
||||
|| ( nbRows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == nbRows)
|
||||
&& nbCols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == nbCols)));
|
||||
|| ( rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
|
||||
&& cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols)));
|
||||
checkSanity();
|
||||
}
|
||||
|
||||
@@ -160,7 +160,9 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void checkSanity() const
|
||||
{
|
||||
eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::IsAligned, (size_t(m_data) % EIGEN_ALIGN_BYTES) == 0) && "data is not aligned");
|
||||
#if EIGEN_MAX_ALIGN_BYTES>0
|
||||
eigen_assert(((size_t(m_data) % EIGEN_PLAIN_ENUM_MAX(1,internal::traits<Derived>::Alignment)) == 0) && "data is not aligned");
|
||||
#endif
|
||||
}
|
||||
|
||||
PointerType m_data;
|
||||
@@ -234,7 +236,7 @@ template<typename Derived> class MapBase<Derived, WriteAccessors>
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit inline MapBase(PointerType dataPtr) : Base(dataPtr) {}
|
||||
EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index vecSize) : Base(dataPtr, vecSize) {}
|
||||
EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index nbRows, Index nbCols) : Base(dataPtr, nbRows, nbCols) {}
|
||||
EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index rows, Index cols) : Base(dataPtr, rows, cols) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator=(const MapBase& other)
|
||||
|
||||
@@ -10,6 +10,9 @@
|
||||
#ifndef EIGEN_MATHFUNCTIONS_H
|
||||
#define EIGEN_MATHFUNCTIONS_H
|
||||
|
||||
// source: http://www.geom.uiuc.edu/~huberty/math5337/groupe/digits.html
|
||||
#define EIGEN_PI 3.141592653589793238462643383279502884197169399375105820974944592307816406
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
// On WINCE, std::abs is defined for int only, so let's defined our own overloads:
|
||||
@@ -276,7 +279,7 @@ struct norm1_default_impl
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline RealScalar run(const Scalar& x)
|
||||
{
|
||||
using std::abs;
|
||||
EIGEN_USING_STD_MATH(abs);
|
||||
return abs(real(x)) + abs(imag(x));
|
||||
}
|
||||
};
|
||||
@@ -287,7 +290,7 @@ struct norm1_default_impl<Scalar, false>
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline Scalar run(const Scalar& x)
|
||||
{
|
||||
using std::abs;
|
||||
EIGEN_USING_STD_MATH(abs);
|
||||
return abs(x);
|
||||
}
|
||||
};
|
||||
@@ -313,8 +316,8 @@ struct hypot_impl
|
||||
{
|
||||
EIGEN_USING_STD_MATH(max);
|
||||
EIGEN_USING_STD_MATH(min);
|
||||
using std::abs;
|
||||
using std::sqrt;
|
||||
EIGEN_USING_STD_MATH(abs);
|
||||
EIGEN_USING_STD_MATH(sqrt);
|
||||
RealScalar _x = abs(x);
|
||||
RealScalar _y = abs(y);
|
||||
Scalar p, qp;
|
||||
@@ -328,6 +331,7 @@ struct hypot_impl
|
||||
p = _y;
|
||||
qp = _x / p;
|
||||
}
|
||||
if(p==RealScalar(0)) return RealScalar(0);
|
||||
return p * sqrt(RealScalar(1) + qp*qp);
|
||||
}
|
||||
};
|
||||
@@ -345,6 +349,7 @@ struct hypot_retval
|
||||
template<typename OldType, typename NewType>
|
||||
struct cast_impl
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline NewType run(const OldType& x)
|
||||
{
|
||||
return static_cast<NewType>(x);
|
||||
@@ -354,35 +359,119 @@ struct cast_impl
|
||||
// here, for once, we're plainly returning NewType: we don't want cast to do weird things.
|
||||
|
||||
template<typename OldType, typename NewType>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline NewType cast(const OldType& x)
|
||||
{
|
||||
return cast_impl<OldType, NewType>::run(x);
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
* Implementation of logp1 *
|
||||
* Implementation of round *
|
||||
****************************************************************************/
|
||||
|
||||
#if EIGEN_HAS_CXX11_MATH
|
||||
template<typename Scalar>
|
||||
struct round_impl {
|
||||
static inline Scalar run(const Scalar& x)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL)
|
||||
using std::round;
|
||||
return round(x);
|
||||
}
|
||||
};
|
||||
#else
|
||||
template<typename Scalar>
|
||||
struct round_impl
|
||||
{
|
||||
static inline Scalar run(const Scalar& x)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL)
|
||||
EIGEN_USING_STD_MATH(floor);
|
||||
EIGEN_USING_STD_MATH(ceil);
|
||||
return (x > Scalar(0)) ? floor(x + Scalar(0.5)) : ceil(x - Scalar(0.5));
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
template<typename Scalar>
|
||||
struct round_retval
|
||||
{
|
||||
typedef Scalar type;
|
||||
};
|
||||
|
||||
/****************************************************************************
|
||||
* Implementation of arg *
|
||||
****************************************************************************/
|
||||
|
||||
#if EIGEN_HAS_CXX11_MATH
|
||||
template<typename Scalar>
|
||||
struct arg_impl {
|
||||
static inline Scalar run(const Scalar& x)
|
||||
{
|
||||
EIGEN_USING_STD_MATH(arg);
|
||||
return arg(x);
|
||||
}
|
||||
};
|
||||
#else
|
||||
template<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
|
||||
struct arg_default_impl
|
||||
{
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline RealScalar run(const Scalar& x)
|
||||
{
|
||||
return (x < Scalar(0)) ? Scalar(EIGEN_PI) : Scalar(0); }
|
||||
};
|
||||
|
||||
template<typename Scalar>
|
||||
struct arg_default_impl<Scalar,true>
|
||||
{
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline RealScalar run(const Scalar& x)
|
||||
{
|
||||
EIGEN_USING_STD_MATH(arg);
|
||||
return arg(x);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Scalar> struct arg_impl : arg_default_impl<Scalar> {};
|
||||
#endif
|
||||
|
||||
template<typename Scalar>
|
||||
struct arg_retval
|
||||
{
|
||||
typedef typename NumTraits<Scalar>::Real type;
|
||||
};
|
||||
|
||||
/****************************************************************************
|
||||
* Implementation of log1p *
|
||||
****************************************************************************/
|
||||
template<typename Scalar, bool isComplex = NumTraits<Scalar>::IsComplex >
|
||||
struct log1p_impl
|
||||
{
|
||||
static inline Scalar run(const Scalar& x)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
|
||||
// Let's be conservative and enable the default C++11 implementation only if we are sure it exists
|
||||
#if (__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC || EIGEN_COMP_ICC) \
|
||||
&& (EIGEN_ARCH_i386_OR_x86_64) && (EIGEN_OS_GNULINUX || EIGEN_OS_WIN_STRICT || EIGEN_OS_MAC)
|
||||
using std::log1p;
|
||||
return log1p(x);
|
||||
#else
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
using std::log;
|
||||
Scalar x1p = RealScalar(1) + x;
|
||||
return ( x1p == Scalar(1) ) ? x : x * ( log(x1p) / (x1p - RealScalar(1)) );
|
||||
#endif
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
EIGEN_USING_STD_MATH(log);
|
||||
Scalar x1p = RealScalar(1) + x;
|
||||
return ( x1p == Scalar(1) ) ? x : x * ( log(x1p) / (x1p - RealScalar(1)) );
|
||||
}
|
||||
};
|
||||
|
||||
#if EIGEN_HAS_CXX11_MATH
|
||||
template<typename Scalar>
|
||||
struct log1p_impl<Scalar, false> {
|
||||
static inline Scalar run(const Scalar& x)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
|
||||
using std::log1p;
|
||||
return log1p(x);
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
template<typename Scalar>
|
||||
struct log1p_retval
|
||||
{
|
||||
@@ -399,7 +488,7 @@ struct pow_default_impl
|
||||
typedef Scalar retval;
|
||||
static inline Scalar run(const Scalar& x, const Scalar& y)
|
||||
{
|
||||
using std::pow;
|
||||
EIGEN_USING_STD_MATH(pow);
|
||||
return pow(x, y);
|
||||
}
|
||||
};
|
||||
@@ -467,48 +556,48 @@ struct random_default_impl<Scalar, false, false>
|
||||
};
|
||||
|
||||
enum {
|
||||
floor_log2_terminate,
|
||||
floor_log2_move_up,
|
||||
floor_log2_move_down,
|
||||
floor_log2_bogus
|
||||
meta_floor_log2_terminate,
|
||||
meta_floor_log2_move_up,
|
||||
meta_floor_log2_move_down,
|
||||
meta_floor_log2_bogus
|
||||
};
|
||||
|
||||
template<unsigned int n, int lower, int upper> struct floor_log2_selector
|
||||
template<unsigned int n, int lower, int upper> struct meta_floor_log2_selector
|
||||
{
|
||||
enum { middle = (lower + upper) / 2,
|
||||
value = (upper <= lower + 1) ? int(floor_log2_terminate)
|
||||
: (n < (1 << middle)) ? int(floor_log2_move_down)
|
||||
: (n==0) ? int(floor_log2_bogus)
|
||||
: int(floor_log2_move_up)
|
||||
value = (upper <= lower + 1) ? int(meta_floor_log2_terminate)
|
||||
: (n < (1 << middle)) ? int(meta_floor_log2_move_down)
|
||||
: (n==0) ? int(meta_floor_log2_bogus)
|
||||
: int(meta_floor_log2_move_up)
|
||||
};
|
||||
};
|
||||
|
||||
template<unsigned int n,
|
||||
int lower = 0,
|
||||
int upper = sizeof(unsigned int) * CHAR_BIT - 1,
|
||||
int selector = floor_log2_selector<n, lower, upper>::value>
|
||||
struct floor_log2 {};
|
||||
int selector = meta_floor_log2_selector<n, lower, upper>::value>
|
||||
struct meta_floor_log2 {};
|
||||
|
||||
template<unsigned int n, int lower, int upper>
|
||||
struct floor_log2<n, lower, upper, floor_log2_move_down>
|
||||
struct meta_floor_log2<n, lower, upper, meta_floor_log2_move_down>
|
||||
{
|
||||
enum { value = floor_log2<n, lower, floor_log2_selector<n, lower, upper>::middle>::value };
|
||||
enum { value = meta_floor_log2<n, lower, meta_floor_log2_selector<n, lower, upper>::middle>::value };
|
||||
};
|
||||
|
||||
template<unsigned int n, int lower, int upper>
|
||||
struct floor_log2<n, lower, upper, floor_log2_move_up>
|
||||
struct meta_floor_log2<n, lower, upper, meta_floor_log2_move_up>
|
||||
{
|
||||
enum { value = floor_log2<n, floor_log2_selector<n, lower, upper>::middle, upper>::value };
|
||||
enum { value = meta_floor_log2<n, meta_floor_log2_selector<n, lower, upper>::middle, upper>::value };
|
||||
};
|
||||
|
||||
template<unsigned int n, int lower, int upper>
|
||||
struct floor_log2<n, lower, upper, floor_log2_terminate>
|
||||
struct meta_floor_log2<n, lower, upper, meta_floor_log2_terminate>
|
||||
{
|
||||
enum { value = (n >= ((unsigned int)(1) << (lower+1))) ? lower+1 : lower };
|
||||
};
|
||||
|
||||
template<unsigned int n, int lower, int upper>
|
||||
struct floor_log2<n, lower, upper, floor_log2_bogus>
|
||||
struct meta_floor_log2<n, lower, upper, meta_floor_log2_bogus>
|
||||
{
|
||||
// no value, error at compile time
|
||||
};
|
||||
@@ -516,11 +605,24 @@ struct floor_log2<n, lower, upper, floor_log2_bogus>
|
||||
template<typename Scalar>
|
||||
struct random_default_impl<Scalar, false, true>
|
||||
{
|
||||
typedef typename NumTraits<Scalar>::NonInteger NonInteger;
|
||||
|
||||
static inline Scalar run(const Scalar& x, const Scalar& y)
|
||||
{
|
||||
return x + Scalar((NonInteger(y)-x+1) * std::rand() / (RAND_MAX + NonInteger(1)));
|
||||
{
|
||||
using std::max;
|
||||
using std::min;
|
||||
typedef typename conditional<NumTraits<Scalar>::IsSigned,std::ptrdiff_t,std::size_t>::type ScalarX;
|
||||
if(y<x)
|
||||
return x;
|
||||
std::size_t range = ScalarX(y)-ScalarX(x);
|
||||
std::size_t offset = 0;
|
||||
// rejection sampling
|
||||
std::size_t divisor = (range+RAND_MAX-1)/(range+1);
|
||||
std::size_t multiplier = (range+RAND_MAX-1)/std::size_t(RAND_MAX);
|
||||
|
||||
do {
|
||||
offset = ( (std::size_t(std::rand()) * multiplier) / divisor );
|
||||
} while (offset > range);
|
||||
|
||||
return Scalar(ScalarX(x) + offset);
|
||||
}
|
||||
|
||||
static inline Scalar run()
|
||||
@@ -528,7 +630,7 @@ struct random_default_impl<Scalar, false, true>
|
||||
#ifdef EIGEN_MAKING_DOCS
|
||||
return run(Scalar(NumTraits<Scalar>::IsSigned ? -10 : 0), Scalar(10));
|
||||
#else
|
||||
enum { rand_bits = floor_log2<(unsigned int)(RAND_MAX)+1>::value,
|
||||
enum { rand_bits = meta_floor_log2<(unsigned int)(RAND_MAX)+1>::value,
|
||||
scalar_bits = sizeof(Scalar) * CHAR_BIT,
|
||||
shift = EIGEN_PLAIN_ENUM_MAX(0, int(rand_bits) - int(scalar_bits)),
|
||||
offset = NumTraits<Scalar>::IsSigned ? (1 << (EIGEN_PLAIN_ENUM_MIN(rand_bits,scalar_bits)-1)) : 0
|
||||
@@ -568,14 +670,15 @@ inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random()
|
||||
} // end namespace internal
|
||||
|
||||
/****************************************************************************
|
||||
* Generic math function *
|
||||
* Generic math functions *
|
||||
****************************************************************************/
|
||||
|
||||
namespace numext {
|
||||
|
||||
|
||||
#ifndef __CUDA_ARCH__
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline T mini(const T& x, const T& y)
|
||||
EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y)
|
||||
{
|
||||
EIGEN_USING_STD_MATH(min);
|
||||
return min EIGEN_NOT_A_MACRO (x,y);
|
||||
@@ -583,11 +686,38 @@ inline T mini(const T& x, const T& y)
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline T maxi(const T& x, const T& y)
|
||||
EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y)
|
||||
{
|
||||
EIGEN_USING_STD_MATH(max);
|
||||
return max EIGEN_NOT_A_MACRO (x,y);
|
||||
}
|
||||
#else
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y)
|
||||
{
|
||||
return y < x ? y : x;
|
||||
}
|
||||
template<>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_ALWAYS_INLINE float mini(const float& x, const float& y)
|
||||
{
|
||||
return fmin(x, y);
|
||||
}
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y)
|
||||
{
|
||||
return x < y ? y : x;
|
||||
}
|
||||
template<>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_ALWAYS_INLINE float maxi(const float& x, const float& y)
|
||||
{
|
||||
return fmax(x, y);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
template<typename Scalar>
|
||||
EIGEN_DEVICE_FUNC
|
||||
@@ -617,6 +747,13 @@ inline EIGEN_MATHFUNC_RETVAL(imag, Scalar) imag(const Scalar& x)
|
||||
return EIGEN_MATHFUNC_IMPL(imag, Scalar)::run(x);
|
||||
}
|
||||
|
||||
template<typename Scalar>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline EIGEN_MATHFUNC_RETVAL(arg, Scalar) arg(const Scalar& x)
|
||||
{
|
||||
return EIGEN_MATHFUNC_IMPL(arg, Scalar)::run(x);
|
||||
}
|
||||
|
||||
template<typename Scalar>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline typename internal::add_const_on_value_type< EIGEN_MATHFUNC_RETVAL(imag_ref, Scalar) >::type imag_ref(const Scalar& x)
|
||||
@@ -673,22 +810,81 @@ inline EIGEN_MATHFUNC_RETVAL(pow, Scalar) pow(const Scalar& x, const Scalar& y)
|
||||
return EIGEN_MATHFUNC_IMPL(pow, Scalar)::run(x, y);
|
||||
}
|
||||
|
||||
// std::isfinite is non standard, so let's define our own version,
|
||||
// even though it is not very efficient.
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
bool (isfinite)(const T& x)
|
||||
{
|
||||
return x<NumTraits<T>::highest() && x>NumTraits<T>::lowest();
|
||||
#if EIGEN_HAS_CXX11_MATH
|
||||
using std::isfinite;
|
||||
return isfinite EIGEN_NOT_A_MACRO (x);
|
||||
#else
|
||||
return x<NumTraits<T>::highest() && x>NumTraits<T>::lowest();
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
bool (isnan)(const T& x)
|
||||
{
|
||||
#if EIGEN_HAS_CXX11_MATH
|
||||
using std::isnan;
|
||||
return isnan EIGEN_NOT_A_MACRO (x);
|
||||
#else
|
||||
return x != x;
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
bool (isinf)(const T& x)
|
||||
{
|
||||
#if EIGEN_HAS_CXX11_MATH
|
||||
using std::isinf;
|
||||
return isinf EIGEN_NOT_A_MACRO (x);
|
||||
#else
|
||||
return x>NumTraits<T>::highest() || x<NumTraits<T>::lowest();
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
bool (isfinite)(const std::complex<T>& x)
|
||||
{
|
||||
using std::real;
|
||||
using std::imag;
|
||||
return isfinite(real(x)) && isfinite(imag(x));
|
||||
return (numext::isfinite)(numext::real(x)) && (numext::isfinite)(numext::imag(x));
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
bool (isnan)(const std::complex<T>& x)
|
||||
{
|
||||
return (numext::isnan)(numext::real(x)) || (numext::isnan)(numext::imag(x));
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
bool (isinf)(const std::complex<T>& x)
|
||||
{
|
||||
return ((numext::isinf)(numext::real(x)) || (numext::isinf)(numext::imag(x))) && (!(numext::isnan)(x));
|
||||
}
|
||||
|
||||
template<typename Scalar>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline EIGEN_MATHFUNC_RETVAL(round, Scalar) round(const Scalar& x)
|
||||
{
|
||||
return EIGEN_MATHFUNC_IMPL(round, Scalar)::run(x);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
T (floor)(const T& x)
|
||||
{
|
||||
EIGEN_USING_STD_MATH(floor);
|
||||
return floor(x);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
T (ceil)(const T& x)
|
||||
{
|
||||
EIGEN_USING_STD_MATH(ceil);
|
||||
return ceil(x);
|
||||
}
|
||||
|
||||
// Log base 2 for 32 bits positive integers.
|
||||
@@ -726,14 +922,14 @@ struct scalar_fuzzy_default_impl<Scalar, false, false>
|
||||
template<typename OtherScalar> EIGEN_DEVICE_FUNC
|
||||
static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, const RealScalar& prec)
|
||||
{
|
||||
using std::abs;
|
||||
EIGEN_USING_STD_MATH(abs);
|
||||
return abs(x) <= abs(y) * prec;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec)
|
||||
{
|
||||
EIGEN_USING_STD_MATH(min);
|
||||
using std::abs;
|
||||
EIGEN_USING_STD_MATH(abs);
|
||||
return abs(x - y) <= (min)(abs(x), abs(y)) * prec;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
|
||||
@@ -24,13 +24,13 @@ namespace Eigen {
|
||||
* The %Matrix class encompasses \em both fixed-size and dynamic-size objects (\ref fixedsize "note").
|
||||
*
|
||||
* The first three template parameters are required:
|
||||
* \tparam _Scalar \anchor matrix_tparam_scalar Numeric type, e.g. float, double, int or std::complex<float>.
|
||||
* User defined sclar types are supported as well (see \ref user_defined_scalars "here").
|
||||
* \tparam _Scalar Numeric type, e.g. float, double, int or std::complex<float>.
|
||||
* User defined scalar types are supported as well (see \ref user_defined_scalars "here").
|
||||
* \tparam _Rows Number of rows, or \b Dynamic
|
||||
* \tparam _Cols Number of columns, or \b Dynamic
|
||||
*
|
||||
* The remaining template parameters are optional -- in most cases you don't have to worry about them.
|
||||
* \tparam _Options \anchor matrix_tparam_options A combination of either \b #RowMajor or \b #ColMajor, and of either
|
||||
* \tparam _Options A combination of either \b #RowMajor or \b #ColMajor, and of either
|
||||
* \b #AutoAlign or \b #DontAlign.
|
||||
* The former controls \ref TopicStorageOrders "storage order", and defaults to column-major. The latter controls alignment, which is required
|
||||
* for vectorization. It defaults to aligning matrices except for fixed sizes that aren't a multiple of the packet size.
|
||||
@@ -97,6 +97,40 @@ namespace Eigen {
|
||||
* are the dimensions of the original matrix, while _Rows and _Cols are Dynamic.</dd>
|
||||
* </dl>
|
||||
*
|
||||
* <i><b>ABI and storage layout</b></i>
|
||||
*
|
||||
* The table below summarizes the ABI of some possible Matrix instances which is fixed thorough the lifetime of Eigen 3.
|
||||
* <table class="manual">
|
||||
* <tr><th>Matrix type</th><th>Equivalent C structure</th></tr>
|
||||
* <tr><td>\code Matrix<T,Dynamic,Dynamic> \endcode</td><td>\code
|
||||
* struct {
|
||||
* T *data; // with (size_t(data)%EIGEN_MAX_ALIGN_BYTES)==0
|
||||
* Eigen::Index rows, cols;
|
||||
* };
|
||||
* \endcode</td></tr>
|
||||
* <tr class="alt"><td>\code
|
||||
* Matrix<T,Dynamic,1>
|
||||
* Matrix<T,1,Dynamic> \endcode</td><td>\code
|
||||
* struct {
|
||||
* T *data; // with (size_t(data)%EIGEN_MAX_ALIGN_BYTES)==0
|
||||
* Eigen::Index size;
|
||||
* };
|
||||
* \endcode</td></tr>
|
||||
* <tr><td>\code Matrix<T,Rows,Cols> \endcode</td><td>\code
|
||||
* struct {
|
||||
* T data[Rows*Cols]; // with (size_t(data)%A(Rows*Cols*sizeof(T)))==0
|
||||
* };
|
||||
* \endcode</td></tr>
|
||||
* <tr class="alt"><td>\code Matrix<T,Dynamic,Dynamic,0,MaxRows,MaxCols> \endcode</td><td>\code
|
||||
* struct {
|
||||
* T data[MaxRows*MaxCols]; // with (size_t(data)%A(MaxRows*MaxCols*sizeof(T)))==0
|
||||
* Eigen::Index rows, cols;
|
||||
* };
|
||||
* \endcode</td></tr>
|
||||
* </table>
|
||||
* Note that in this table Rows, Cols, MaxRows and MaxCols are all positive integers. A(S) is defined to the largest possible power-of-two
|
||||
* smaller to EIGEN_MAX_STATIC_ALIGN_BYTES.
|
||||
*
|
||||
* \see MatrixBase for the majority of the API methods for matrices, \ref TopicClassHierarchy,
|
||||
* \ref TopicStorageOrders
|
||||
*/
|
||||
@@ -105,6 +139,20 @@ namespace internal {
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
|
||||
struct traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
|
||||
{
|
||||
private:
|
||||
enum { size = internal::size_at_compile_time<_Rows,_Cols>::ret };
|
||||
typedef typename find_best_packet<_Scalar,size>::type PacketScalar;
|
||||
enum {
|
||||
row_major_bit = _Options&RowMajor ? RowMajorBit : 0,
|
||||
is_dynamic_size_storage = _MaxRows==Dynamic || _MaxCols==Dynamic,
|
||||
max_size = is_dynamic_size_storage ? Dynamic : _MaxRows*_MaxCols,
|
||||
default_alignment = compute_default_alignment<_Scalar,max_size>::value,
|
||||
actual_alignment = ((_Options&DontAlign)==0) ? default_alignment : 0,
|
||||
required_alignment = unpacket_traits<PacketScalar>::alignment,
|
||||
packet_access_bit = packet_traits<_Scalar>::Vectorizable && (actual_alignment>=required_alignment) ? PacketAccessBit : 0
|
||||
};
|
||||
|
||||
public:
|
||||
typedef _Scalar Scalar;
|
||||
typedef Dense StorageKind;
|
||||
typedef Eigen::Index StorageIndex;
|
||||
@@ -115,11 +163,13 @@ struct traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
|
||||
MaxRowsAtCompileTime = _MaxRows,
|
||||
MaxColsAtCompileTime = _MaxCols,
|
||||
Flags = compute_matrix_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret,
|
||||
// FIXME, the following flag in only used to define NeedsToAlign in PlainObjectBase
|
||||
EvaluatorFlags = compute_matrix_evaluator_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret,
|
||||
Options = _Options,
|
||||
InnerStrideAtCompileTime = 1,
|
||||
OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime
|
||||
OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime,
|
||||
|
||||
// FIXME, the following flag in only used to define NeedsToAlign in PlainObjectBase
|
||||
EvaluatorFlags = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit,
|
||||
Alignment = actual_alignment
|
||||
};
|
||||
};
|
||||
}
|
||||
@@ -170,7 +220,7 @@ class Matrix
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Matrix& operator=(const MatrixBase<OtherDerived>& other)
|
||||
EIGEN_STRONG_INLINE Matrix& operator=(const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
return Base::_set(other);
|
||||
}
|
||||
@@ -219,6 +269,7 @@ class Matrix
|
||||
{ Base::_check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED }
|
||||
|
||||
#ifdef EIGEN_HAVE_RVALUE_REFERENCES
|
||||
EIGEN_DEVICE_FUNC
|
||||
Matrix(Matrix&& other)
|
||||
: Base(std::move(other))
|
||||
{
|
||||
@@ -226,6 +277,7 @@ class Matrix
|
||||
if (RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic)
|
||||
Base::_set_noalias(other);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
Matrix& operator=(Matrix&& other)
|
||||
{
|
||||
other.swap(*this);
|
||||
@@ -264,8 +316,8 @@ class Matrix
|
||||
*
|
||||
* \warning This constructor is disabled for fixed-size \c 1x1 matrices. For instance,
|
||||
* calling Matrix<double,1,1>(1) will call the initialization constructor: Matrix(const Scalar&).
|
||||
* For fixed-size \c 1x1 matrices it is thefore recommended to use the default
|
||||
* constructor Matrix() instead, especilly when using one of the non standard
|
||||
* For fixed-size \c 1x1 matrices it is therefore recommended to use the default
|
||||
* constructor Matrix() instead, especially when using one of the non standard
|
||||
* \c EIGEN_INITIALIZE_MATRICES_BY_{ZERO,\c NAN} macros (see \ref TopicPreprocessorDirectives).
|
||||
*/
|
||||
EIGEN_STRONG_INLINE explicit Matrix(Index dim);
|
||||
@@ -279,8 +331,8 @@ class Matrix
|
||||
*
|
||||
* \warning This constructor is disabled for fixed-size \c 1x2 and \c 2x1 vectors. For instance,
|
||||
* calling Matrix2f(2,1) will call the initialization constructor: Matrix(const Scalar& x, const Scalar& y).
|
||||
* For fixed-size \c 1x2 or \c 2x1 vectors it is thefore recommended to use the default
|
||||
* constructor Matrix() instead, especilly when using one of the non standard
|
||||
* For fixed-size \c 1x2 or \c 2x1 vectors it is therefore recommended to use the default
|
||||
* constructor Matrix() instead, especially when using one of the non standard
|
||||
* \c EIGEN_INITIALIZE_MATRICES_BY_{ZERO,\c NAN} macros (see \ref TopicPreprocessorDirectives).
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
@@ -313,37 +365,10 @@ class Matrix
|
||||
}
|
||||
|
||||
|
||||
/** \brief Constructor copying the value of the expression \a other */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Matrix(const MatrixBase<OtherDerived>& other)
|
||||
: Base(other.rows() * other.cols(), other.rows(), other.cols())
|
||||
{
|
||||
// This test resides here, to bring the error messages closer to the user. Normally, these checks
|
||||
// are performed deeply within the library, thus causing long and scary error traces.
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<Scalar, typename OtherDerived::Scalar>::value),
|
||||
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
|
||||
|
||||
Base::_check_template_params();
|
||||
Base::_set_noalias(other);
|
||||
}
|
||||
/** \brief Copy constructor */
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Matrix(const Matrix& other)
|
||||
: Base(other.rows() * other.cols(), other.rows(), other.cols())
|
||||
{
|
||||
Base::_check_template_params();
|
||||
Base::_set_noalias(other);
|
||||
}
|
||||
/** \brief Copy constructor with in-place evaluation */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Matrix(const ReturnByValue<OtherDerived>& other)
|
||||
{
|
||||
Base::_check_template_params();
|
||||
Base::resize(other.rows(), other.cols());
|
||||
other.evalTo(*this);
|
||||
}
|
||||
EIGEN_STRONG_INLINE Matrix(const Matrix& other) : Base(other)
|
||||
{ }
|
||||
|
||||
/** \brief Copy constructor for generic expressions.
|
||||
* \sa MatrixBase::operator=(const EigenBase<OtherDerived>&)
|
||||
@@ -351,14 +376,8 @@ class Matrix
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Matrix(const EigenBase<OtherDerived> &other)
|
||||
: Base(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols())
|
||||
{
|
||||
Base::_check_template_params();
|
||||
Base::_resize_to_match(other);
|
||||
// FIXME/CHECK: isn't *this = other.derived() more efficient. it allows to
|
||||
// go for pure _set() implementations, right?
|
||||
*this = other;
|
||||
}
|
||||
: Base(other.derived())
|
||||
{ }
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; }
|
||||
EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); }
|
||||
|
||||
@@ -81,6 +81,7 @@ template<typename Derived> class MatrixBase
|
||||
using Base::operator*=;
|
||||
using Base::operator/=;
|
||||
using Base::operator*;
|
||||
using Base::operator/;
|
||||
|
||||
typedef typename Base::CoeffReturnType CoeffReturnType;
|
||||
typedef typename Base::ConstTransposeReturnType ConstTransposeReturnType;
|
||||
@@ -101,23 +102,11 @@ template<typename Derived> class MatrixBase
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index diagonalSize() const { return (std::min)(rows(),cols()); }
|
||||
|
||||
/** \brief The plain matrix type corresponding to this expression.
|
||||
*
|
||||
* This is not necessarily exactly the return type of eval(). In the case of plain matrices,
|
||||
* the return type of eval() is a const reference to a matrix, not a matrix! It is however guaranteed
|
||||
* that the return type of eval() is either PlainObject or const PlainObject&.
|
||||
*/
|
||||
typedef Matrix<typename internal::traits<Derived>::Scalar,
|
||||
internal::traits<Derived>::RowsAtCompileTime,
|
||||
internal::traits<Derived>::ColsAtCompileTime,
|
||||
AutoAlign | (internal::traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
|
||||
internal::traits<Derived>::MaxRowsAtCompileTime,
|
||||
internal::traits<Derived>::MaxColsAtCompileTime
|
||||
> PlainObject;
|
||||
typedef typename Base::PlainObject PlainObject;
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
/** \internal Represents a matrix with all coefficients equal to one another*/
|
||||
typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,Derived> ConstantReturnType;
|
||||
typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,PlainObject> ConstantReturnType;
|
||||
/** \internal the return type of MatrixBase::adjoint() */
|
||||
typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
|
||||
CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, ConstTransposeReturnType>,
|
||||
@@ -126,7 +115,7 @@ template<typename Derived> class MatrixBase
|
||||
/** \internal Return type of eigenvalues() */
|
||||
typedef Matrix<std::complex<RealScalar>, internal::traits<Derived>::ColsAtCompileTime, 1, ColMajor> EigenvaluesReturnType;
|
||||
/** \internal the return type of identity */
|
||||
typedef CwiseNullaryOp<internal::scalar_identity_op<Scalar>,Derived> IdentityReturnType;
|
||||
typedef CwiseNullaryOp<internal::scalar_identity_op<Scalar>,PlainObject> IdentityReturnType;
|
||||
/** \internal the return type of unit vectors */
|
||||
typedef Block<const CwiseNullaryOp<internal::scalar_identity_op<Scalar>, SquareMatrixType>,
|
||||
internal::traits<Derived>::RowsAtCompileTime,
|
||||
@@ -164,12 +153,6 @@ template<typename Derived> class MatrixBase
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator=(const ReturnByValue<OtherDerived>& other);
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename ProductDerived, typename Lhs, typename Rhs>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& lazyAssign(const ProductBase<ProductDerived, Lhs,Rhs>& other);
|
||||
#endif // not EIGEN_PARSED_BY_DOXYGEN
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator+=(const MatrixBase<OtherDerived>& other);
|
||||
@@ -329,7 +312,7 @@ template<typename Derived> class MatrixBase
|
||||
template<bool Enable> inline const Derived& forceAlignedAccessIf() const { return derived(); }
|
||||
template<bool Enable> inline Derived& forceAlignedAccessIf() { return derived(); }
|
||||
|
||||
Scalar trace() const;
|
||||
EIGEN_DEVICE_FUNC Scalar trace() const;
|
||||
|
||||
template<int p> EIGEN_DEVICE_FUNC RealScalar lpNorm() const;
|
||||
|
||||
@@ -412,7 +395,8 @@ template<typename Derived> class MatrixBase
|
||||
|
||||
ScalarMultipleReturnType operator*(const UniformScaling<Scalar>& s) const;
|
||||
// put this as separate enum value to work around possible GCC 4.3 bug (?)
|
||||
enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1?Vertical:Horizontal };
|
||||
enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1&&RowsAtCompileTime==1 ? ((internal::traits<Derived>::Flags&RowMajorBit)==RowMajorBit ? Horizontal : Vertical)
|
||||
: ColsAtCompileTime==1 ? Vertical : Horizontal };
|
||||
typedef Homogeneous<Derived, HomogeneousReturnTypeDirection> HomogeneousReturnType;
|
||||
HomogeneousReturnType homogeneous() const;
|
||||
|
||||
|
||||
@@ -83,8 +83,25 @@ template<typename T> struct GenericNumTraits
|
||||
// make sure to override this for floating-point types
|
||||
return Real(0);
|
||||
}
|
||||
static inline T highest() { return (std::numeric_limits<T>::max)(); }
|
||||
static inline T lowest() { return IsInteger ? (std::numeric_limits<T>::min)() : (-(std::numeric_limits<T>::max)()); }
|
||||
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline T highest() {
|
||||
#if defined(__CUDA_ARCH__)
|
||||
return (internal::device::numeric_limits<T>::max)();
|
||||
#else
|
||||
return (std::numeric_limits<T>::max)();
|
||||
#endif
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline T lowest() {
|
||||
#if defined(__CUDA_ARCH__)
|
||||
return IsInteger ? (internal::device::numeric_limits<T>::min)() : (-(internal::device::numeric_limits<T>::max)());
|
||||
#else
|
||||
return IsInteger ? (std::numeric_limits<T>::min)() : (-(std::numeric_limits<T>::max)());
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T> struct NumTraits : GenericNumTraits<T>
|
||||
|
||||
@@ -42,10 +42,6 @@ namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename PermutationType, typename MatrixType, int Side, bool Transposed=false>
|
||||
struct permut_matrix_product_retval;
|
||||
template<typename PermutationType, typename MatrixType, int Side, bool Transposed=false>
|
||||
struct permut_sparsematrix_product_retval;
|
||||
enum PermPermProduct_t {PermPermProduct};
|
||||
|
||||
} // end namespace internal
|
||||
@@ -353,7 +349,7 @@ class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompile
|
||||
* array's size.
|
||||
*/
|
||||
template<typename Other>
|
||||
explicit inline PermutationMatrix(const MatrixBase<Other>& a_indices) : m_indices(a_indices)
|
||||
explicit inline PermutationMatrix(const MatrixBase<Other>& indices) : m_indices(indices)
|
||||
{}
|
||||
|
||||
/** Convert the Transpositions \a tr to a permutation matrix */
|
||||
@@ -401,12 +397,12 @@ class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompile
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename Other>
|
||||
PermutationMatrix(const Transpose<PermutationBase<Other> >& other)
|
||||
: m_indices(other.nestedPermutation().size())
|
||||
: m_indices(other.nestedExpression().size())
|
||||
{
|
||||
eigen_internal_assert(m_indices.size() <= NumTraits<StorageIndex>::highest());
|
||||
StorageIndex end = StorageIndex(m_indices.size());
|
||||
for (StorageIndex i=0; i<end;++i)
|
||||
m_indices.coeffRef(other.nestedPermutation().indices().coeff(i)) = i;
|
||||
m_indices.coeffRef(other.nestedExpression().indices().coeff(i)) = i;
|
||||
}
|
||||
template<typename Lhs,typename Rhs>
|
||||
PermutationMatrix(internal::PermPermProduct_t, const Lhs& lhs, const Rhs& rhs)
|
||||
@@ -527,8 +523,8 @@ class PermutationWrapper : public PermutationBase<PermutationWrapper<_IndicesTyp
|
||||
typedef typename Traits::IndicesType IndicesType;
|
||||
#endif
|
||||
|
||||
inline PermutationWrapper(const IndicesType& a_indices)
|
||||
: m_indices(a_indices)
|
||||
inline PermutationWrapper(const IndicesType& indices)
|
||||
: m_indices(indices)
|
||||
{}
|
||||
|
||||
/** const version of indices(). */
|
||||
@@ -541,18 +537,15 @@ class PermutationWrapper : public PermutationBase<PermutationWrapper<_IndicesTyp
|
||||
};
|
||||
|
||||
|
||||
// TODO: Do we need to define these operator* functions? Would it be better to have them inherited
|
||||
// from MatrixBase?
|
||||
|
||||
/** \returns the matrix with the permutation applied to the columns.
|
||||
*/
|
||||
template<typename MatrixDerived, typename PermutationDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
const Product<MatrixDerived, PermutationDerived, DefaultProduct>
|
||||
const Product<MatrixDerived, PermutationDerived, AliasFreeProduct>
|
||||
operator*(const MatrixBase<MatrixDerived> &matrix,
|
||||
const PermutationBase<PermutationDerived>& permutation)
|
||||
{
|
||||
return Product<MatrixDerived, PermutationDerived, DefaultProduct>
|
||||
return Product<MatrixDerived, PermutationDerived, AliasFreeProduct>
|
||||
(matrix.derived(), permutation.derived());
|
||||
}
|
||||
|
||||
@@ -560,89 +553,16 @@ operator*(const MatrixBase<MatrixDerived> &matrix,
|
||||
*/
|
||||
template<typename PermutationDerived, typename MatrixDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
const Product<PermutationDerived, MatrixDerived, DefaultProduct>
|
||||
const Product<PermutationDerived, MatrixDerived, AliasFreeProduct>
|
||||
operator*(const PermutationBase<PermutationDerived> &permutation,
|
||||
const MatrixBase<MatrixDerived>& matrix)
|
||||
{
|
||||
return Product<PermutationDerived, MatrixDerived, DefaultProduct>
|
||||
return Product<PermutationDerived, MatrixDerived, AliasFreeProduct>
|
||||
(permutation.derived(), matrix.derived());
|
||||
}
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename PermutationType, typename MatrixType, int Side, bool Transposed>
|
||||
struct traits<permut_matrix_product_retval<PermutationType, MatrixType, Side, Transposed> >
|
||||
: traits<typename MatrixType::PlainObject>
|
||||
{
|
||||
typedef typename MatrixType::PlainObject ReturnType;
|
||||
};
|
||||
|
||||
template<typename PermutationType, typename MatrixType, int Side, bool Transposed>
|
||||
struct permut_matrix_product_retval
|
||||
: public ReturnByValue<permut_matrix_product_retval<PermutationType, MatrixType, Side, Transposed> >
|
||||
{
|
||||
typedef typename remove_all<typename MatrixType::Nested>::type MatrixTypeNestedCleaned;
|
||||
typedef typename MatrixType::StorageIndex StorageIndex;
|
||||
|
||||
permut_matrix_product_retval(const PermutationType& perm, const MatrixType& matrix)
|
||||
: m_permutation(perm), m_matrix(matrix)
|
||||
{}
|
||||
|
||||
inline Index rows() const { return m_matrix.rows(); }
|
||||
inline Index cols() const { return m_matrix.cols(); }
|
||||
|
||||
template<typename Dest> inline void evalTo(Dest& dst) const
|
||||
{
|
||||
const Index n = Side==OnTheLeft ? rows() : cols();
|
||||
// FIXME we need an is_same for expression that is not sensitive to constness. For instance
|
||||
// is_same_xpr<Block<const Matrix>, Block<Matrix> >::value should be true.
|
||||
if(is_same<MatrixTypeNestedCleaned,Dest>::value && extract_data(dst) == extract_data(m_matrix))
|
||||
{
|
||||
// apply the permutation inplace
|
||||
Matrix<bool,PermutationType::RowsAtCompileTime,1,0,PermutationType::MaxRowsAtCompileTime> mask(m_permutation.size());
|
||||
mask.fill(false);
|
||||
Index r = 0;
|
||||
while(r < m_permutation.size())
|
||||
{
|
||||
// search for the next seed
|
||||
while(r<m_permutation.size() && mask[r]) r++;
|
||||
if(r>=m_permutation.size())
|
||||
break;
|
||||
// we got one, let's follow it until we are back to the seed
|
||||
Index k0 = r++;
|
||||
Index kPrev = k0;
|
||||
mask.coeffRef(k0) = true;
|
||||
for(Index k=m_permutation.indices().coeff(k0); k!=k0; k=m_permutation.indices().coeff(k))
|
||||
{
|
||||
Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>(dst, k)
|
||||
.swap(Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>
|
||||
(dst,((Side==OnTheLeft) ^ Transposed) ? k0 : kPrev));
|
||||
|
||||
mask.coeffRef(k) = true;
|
||||
kPrev = k;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for(Index i = 0; i < n; ++i)
|
||||
{
|
||||
Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>
|
||||
(dst, ((Side==OnTheLeft) ^ Transposed) ? m_permutation.indices().coeff(i) : i)
|
||||
|
||||
=
|
||||
|
||||
Block<const MatrixTypeNestedCleaned,Side==OnTheLeft ? 1 : MatrixType::RowsAtCompileTime,Side==OnTheRight ? 1 : MatrixType::ColsAtCompileTime>
|
||||
(m_matrix, ((Side==OnTheRight) ^ Transposed) ? m_permutation.indices().coeff(i) : i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
const PermutationType& m_permutation;
|
||||
typename MatrixType::Nested m_matrix;
|
||||
};
|
||||
|
||||
/* Template partial specialization for transposed/inverse permutations */
|
||||
|
||||
template<typename Derived>
|
||||
@@ -700,22 +620,22 @@ class Transpose<PermutationBase<Derived> >
|
||||
/** \returns the matrix with the inverse permutation applied to the columns.
|
||||
*/
|
||||
template<typename OtherDerived> friend
|
||||
const Product<OtherDerived, Transpose, DefaultProduct>
|
||||
const Product<OtherDerived, Transpose, AliasFreeProduct>
|
||||
operator*(const MatrixBase<OtherDerived>& matrix, const Transpose& trPerm)
|
||||
{
|
||||
return Product<OtherDerived, Transpose, DefaultProduct>(matrix.derived(), trPerm.derived());
|
||||
return Product<OtherDerived, Transpose, AliasFreeProduct>(matrix.derived(), trPerm.derived());
|
||||
}
|
||||
|
||||
/** \returns the matrix with the inverse permutation applied to the rows.
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
const Product<Transpose, OtherDerived, DefaultProduct>
|
||||
const Product<Transpose, OtherDerived, AliasFreeProduct>
|
||||
operator*(const MatrixBase<OtherDerived>& matrix) const
|
||||
{
|
||||
return Product<Transpose, OtherDerived, DefaultProduct>(*this, matrix.derived());
|
||||
return Product<Transpose, OtherDerived, AliasFreeProduct>(*this, matrix.derived());
|
||||
}
|
||||
|
||||
const PermutationType& nestedPermutation() const { return m_permutation; }
|
||||
const PermutationType& nestedExpression() const { return m_permutation; }
|
||||
|
||||
protected:
|
||||
const PermutationType& m_permutation;
|
||||
@@ -728,32 +648,6 @@ const PermutationWrapper<const Derived> MatrixBase<Derived>::asPermutation() con
|
||||
}
|
||||
|
||||
namespace internal {
|
||||
|
||||
// TODO currently a permutation matrix expression has the form PermutationMatrix or PermutationWrapper
|
||||
// or their transpose; in the future shape should be defined by the expression traits
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType>
|
||||
struct evaluator_traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType> >
|
||||
{
|
||||
typedef typename storage_kind_to_evaluator_kind<Dense>::Kind Kind;
|
||||
typedef PermutationShape Shape;
|
||||
static const int AssumeAliasing = 0;
|
||||
};
|
||||
|
||||
template<typename IndicesType>
|
||||
struct evaluator_traits<PermutationWrapper<IndicesType> >
|
||||
{
|
||||
typedef typename storage_kind_to_evaluator_kind<Dense>::Kind Kind;
|
||||
typedef PermutationShape Shape;
|
||||
static const int AssumeAliasing = 0;
|
||||
};
|
||||
|
||||
template<typename Derived>
|
||||
struct evaluator_traits<Transpose<PermutationBase<Derived> > >
|
||||
{
|
||||
typedef typename storage_kind_to_evaluator_kind<Dense>::Kind Kind;
|
||||
typedef PermutationShape Shape;
|
||||
static const int AssumeAliasing = 0;
|
||||
};
|
||||
|
||||
template<> struct AssignmentKind<DenseShape,PermutationShape> { typedef EigenBase2EigenBase Kind; };
|
||||
|
||||
|
||||
@@ -69,8 +69,9 @@ template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers> struct m
|
||||
#ifdef EIGEN_PARSED_BY_DOXYGEN
|
||||
namespace internal {
|
||||
|
||||
// this is a warkaround to doxygen not being able to understand the inheritence logic
|
||||
// this is a workaround to doxygen not being able to understand the inheritance logic
|
||||
// when it is hidden by the dense_xpr_base helper struct.
|
||||
/** This class is just a workaround for Doxygen and it does not not actually exist. */
|
||||
template<typename Derived> struct dense_xpr_base_dispatcher_for_doxygen;// : public MatrixBase<Derived> {};
|
||||
/** This class is just a workaround for Doxygen and it does not not actually exist. */
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
|
||||
@@ -96,6 +97,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
|
||||
typedef typename internal::traits<Derived>::StorageKind StorageKind;
|
||||
typedef typename internal::traits<Derived>::Scalar Scalar;
|
||||
|
||||
typedef typename internal::packet_traits<Scalar>::type PacketScalar;
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
typedef Derived DenseType;
|
||||
@@ -114,20 +116,23 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
typedef Eigen::Map<Derived, Unaligned> MapType;
|
||||
friend class Eigen::Map<const Derived, Unaligned>;
|
||||
typedef const Eigen::Map<const Derived, Unaligned> ConstMapType;
|
||||
friend class Eigen::Map<Derived, Aligned>;
|
||||
typedef Eigen::Map<Derived, Aligned> AlignedMapType;
|
||||
friend class Eigen::Map<const Derived, Aligned>;
|
||||
typedef const Eigen::Map<const Derived, Aligned> ConstAlignedMapType;
|
||||
#if EIGEN_MAX_ALIGN_BYTES>0
|
||||
// for EIGEN_MAX_ALIGN_BYTES==0, AlignedMax==Unaligned, and many compilers generate warnings for friend-ing a class twice.
|
||||
friend class Eigen::Map<Derived, AlignedMax>;
|
||||
friend class Eigen::Map<const Derived, AlignedMax>;
|
||||
#endif
|
||||
typedef Eigen::Map<Derived, AlignedMax> AlignedMapType;
|
||||
typedef const Eigen::Map<const Derived, AlignedMax> ConstAlignedMapType;
|
||||
template<typename StrideType> struct StridedMapType { typedef Eigen::Map<Derived, Unaligned, StrideType> type; };
|
||||
template<typename StrideType> struct StridedConstMapType { typedef Eigen::Map<const Derived, Unaligned, StrideType> type; };
|
||||
template<typename StrideType> struct StridedAlignedMapType { typedef Eigen::Map<Derived, Aligned, StrideType> type; };
|
||||
template<typename StrideType> struct StridedConstAlignedMapType { typedef Eigen::Map<const Derived, Aligned, StrideType> type; };
|
||||
template<typename StrideType> struct StridedAlignedMapType { typedef Eigen::Map<Derived, AlignedMax, StrideType> type; };
|
||||
template<typename StrideType> struct StridedConstAlignedMapType { typedef Eigen::Map<const Derived, AlignedMax, StrideType> type; };
|
||||
|
||||
protected:
|
||||
DenseStorage<Scalar, Base::MaxSizeAtCompileTime, Base::RowsAtCompileTime, Base::ColsAtCompileTime, Options> m_storage;
|
||||
|
||||
public:
|
||||
enum { NeedsToAlign = SizeAtCompileTime != Dynamic && (internal::traits<Derived>::EvaluatorFlags & AlignedBit) != 0 };
|
||||
enum { NeedsToAlign = (SizeAtCompileTime != Dynamic) && (internal::traits<Derived>::Alignment>0) };
|
||||
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
@@ -244,22 +249,22 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
* \sa resize(Index) for vectors, resize(NoChange_t, Index), resize(Index, NoChange_t)
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void resize(Index nbRows, Index nbCols)
|
||||
EIGEN_STRONG_INLINE void resize(Index rows, Index cols)
|
||||
{
|
||||
eigen_assert( EIGEN_IMPLIES(RowsAtCompileTime!=Dynamic,nbRows==RowsAtCompileTime)
|
||||
&& EIGEN_IMPLIES(ColsAtCompileTime!=Dynamic,nbCols==ColsAtCompileTime)
|
||||
&& EIGEN_IMPLIES(RowsAtCompileTime==Dynamic && MaxRowsAtCompileTime!=Dynamic,nbRows<=MaxRowsAtCompileTime)
|
||||
&& EIGEN_IMPLIES(ColsAtCompileTime==Dynamic && MaxColsAtCompileTime!=Dynamic,nbCols<=MaxColsAtCompileTime)
|
||||
&& nbRows>=0 && nbCols>=0 && "Invalid sizes when resizing a matrix or array.");
|
||||
internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(nbRows, nbCols);
|
||||
eigen_assert( EIGEN_IMPLIES(RowsAtCompileTime!=Dynamic,rows==RowsAtCompileTime)
|
||||
&& EIGEN_IMPLIES(ColsAtCompileTime!=Dynamic,cols==ColsAtCompileTime)
|
||||
&& EIGEN_IMPLIES(RowsAtCompileTime==Dynamic && MaxRowsAtCompileTime!=Dynamic,rows<=MaxRowsAtCompileTime)
|
||||
&& EIGEN_IMPLIES(ColsAtCompileTime==Dynamic && MaxColsAtCompileTime!=Dynamic,cols<=MaxColsAtCompileTime)
|
||||
&& rows>=0 && cols>=0 && "Invalid sizes when resizing a matrix or array.");
|
||||
internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(rows, cols);
|
||||
#ifdef EIGEN_INITIALIZE_COEFFS
|
||||
Index size = nbRows*nbCols;
|
||||
Index size = rows*cols;
|
||||
bool size_changed = size != this->size();
|
||||
m_storage.resize(size, nbRows, nbCols);
|
||||
m_storage.resize(size, rows, cols);
|
||||
if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
|
||||
#else
|
||||
internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(nbRows, nbCols);
|
||||
m_storage.resize(nbRows*nbCols, nbRows, nbCols);
|
||||
internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(rows, cols);
|
||||
m_storage.resize(rows*cols, rows, cols);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -300,9 +305,9 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
* \sa resize(Index,Index)
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void resize(NoChange_t, Index nbCols)
|
||||
inline void resize(NoChange_t, Index cols)
|
||||
{
|
||||
resize(rows(), nbCols);
|
||||
resize(rows(), cols);
|
||||
}
|
||||
|
||||
/** Resizes the matrix, changing only the number of rows. For the parameter of type NoChange_t, just pass the special value \c NoChange
|
||||
@@ -314,9 +319,9 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
* \sa resize(Index,Index)
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void resize(Index nbRows, NoChange_t)
|
||||
inline void resize(Index rows, NoChange_t)
|
||||
{
|
||||
resize(nbRows, cols());
|
||||
resize(rows, cols());
|
||||
}
|
||||
|
||||
/** Resizes \c *this to have the same dimensions as \a other.
|
||||
@@ -356,9 +361,9 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
* appended to the matrix they will be uninitialized.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void conservativeResize(Index nbRows, Index nbCols)
|
||||
EIGEN_STRONG_INLINE void conservativeResize(Index rows, Index cols)
|
||||
{
|
||||
internal::conservative_resize_like_impl<Derived>::run(*this, nbRows, nbCols);
|
||||
internal::conservative_resize_like_impl<Derived>::run(*this, rows, cols);
|
||||
}
|
||||
|
||||
/** Resizes the matrix to \a rows x \a cols while leaving old values untouched.
|
||||
@@ -369,10 +374,10 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
* In case the matrix is growing, new rows will be uninitialized.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void conservativeResize(Index nbRows, NoChange_t)
|
||||
EIGEN_STRONG_INLINE void conservativeResize(Index rows, NoChange_t)
|
||||
{
|
||||
// Note: see the comment in conservativeResize(Index,Index)
|
||||
conservativeResize(nbRows, cols());
|
||||
conservativeResize(rows, cols());
|
||||
}
|
||||
|
||||
/** Resizes the matrix to \a rows x \a cols while leaving old values untouched.
|
||||
@@ -383,10 +388,10 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
* In case the matrix is growing, new columns will be uninitialized.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void conservativeResize(NoChange_t, Index nbCols)
|
||||
EIGEN_STRONG_INLINE void conservativeResize(NoChange_t, Index cols)
|
||||
{
|
||||
// Note: see the comment in conservativeResize(Index,Index)
|
||||
conservativeResize(rows(), nbCols);
|
||||
conservativeResize(rows(), cols);
|
||||
}
|
||||
|
||||
/** Resizes the vector to \a size while retaining old values.
|
||||
@@ -479,9 +484,13 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
}
|
||||
#endif
|
||||
|
||||
/** Copy constructor */
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE PlainObjectBase(Index a_size, Index nbRows, Index nbCols)
|
||||
: m_storage(a_size, nbRows, nbCols)
|
||||
EIGEN_STRONG_INLINE PlainObjectBase(const PlainObjectBase& other)
|
||||
: Base(), m_storage(other.m_storage) { }
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE PlainObjectBase(Index size, Index rows, Index cols)
|
||||
: m_storage(size, rows, cols)
|
||||
{
|
||||
// _check_template_params();
|
||||
// EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
|
||||
@@ -498,15 +507,36 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
return this->derived();
|
||||
}
|
||||
|
||||
/** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */
|
||||
/** \sa PlainObjectBase::operator=(const EigenBase<OtherDerived>&) */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE PlainObjectBase(const DenseBase<OtherDerived> &other)
|
||||
: m_storage()
|
||||
{
|
||||
_check_template_params();
|
||||
resizeLike(other);
|
||||
_set_noalias(other);
|
||||
}
|
||||
|
||||
/** \sa PlainObjectBase::operator=(const EigenBase<OtherDerived>&) */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE PlainObjectBase(const EigenBase<OtherDerived> &other)
|
||||
: m_storage(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols())
|
||||
: m_storage()
|
||||
{
|
||||
_check_template_params();
|
||||
internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(other.derived().rows(), other.derived().cols());
|
||||
Base::operator=(other.derived());
|
||||
resizeLike(other);
|
||||
*this = other.derived();
|
||||
}
|
||||
/** \brief Copy constructor with in-place evaluation */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE PlainObjectBase(const ReturnByValue<OtherDerived>& other)
|
||||
{
|
||||
_check_template_params();
|
||||
// FIXME this does not automatically transpose vectors if necessary
|
||||
resize(other.rows(), other.cols());
|
||||
other.evalTo(this->derived());
|
||||
}
|
||||
|
||||
/** \name Map
|
||||
@@ -668,12 +698,12 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
|
||||
template<typename T0, typename T1>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void _init2(Index nbRows, Index nbCols, typename internal::enable_if<Base::SizeAtCompileTime!=2,T0>::type* = 0)
|
||||
EIGEN_STRONG_INLINE void _init2(Index rows, Index cols, typename internal::enable_if<Base::SizeAtCompileTime!=2,T0>::type* = 0)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(bool(NumTraits<T0>::IsInteger) &&
|
||||
bool(NumTraits<T1>::IsInteger),
|
||||
FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED)
|
||||
resize(nbRows,nbCols);
|
||||
resize(rows,cols);
|
||||
}
|
||||
|
||||
template<typename T0, typename T1>
|
||||
|
||||
@@ -25,7 +25,7 @@ template<typename Lhs, typename Rhs, int Option, typename StorageKind> class Pro
|
||||
* This class represents an expression of the product of two arbitrary matrices.
|
||||
*
|
||||
* The other template parameters are:
|
||||
* \tparam Option can be DefaultProduct or LazyProduct
|
||||
* \tparam Option can be DefaultProduct, AliasFreeProduct, or LazyProduct
|
||||
*
|
||||
*/
|
||||
|
||||
@@ -53,6 +53,18 @@ template<typename Lhs, typename Rhs, typename LhsShape>
|
||||
typedef typename Lhs::Scalar Scalar;
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename RhsShape>
|
||||
struct product_result_scalar<Lhs, Rhs, TranspositionsShape, RhsShape>
|
||||
{
|
||||
typedef typename Rhs::Scalar Scalar;
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename LhsShape>
|
||||
struct product_result_scalar<Lhs, Rhs, LhsShape, TranspositionsShape>
|
||||
{
|
||||
typedef typename Lhs::Scalar Scalar;
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, int Option>
|
||||
struct traits<Product<Lhs, Rhs, Option> >
|
||||
{
|
||||
@@ -80,10 +92,11 @@ struct traits<Product<Lhs, Rhs, Option> >
|
||||
InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsTraits::ColsAtCompileTime, RhsTraits::RowsAtCompileTime),
|
||||
|
||||
// The storage order is somewhat arbitrary here. The correct one will be determined through the evaluator.
|
||||
Flags = ( (MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1)
|
||||
|| ((LhsTraits::Flags&NoPreferredStorageOrderBit) && (RhsTraits::Flags&RowMajorBit))
|
||||
|| ((RhsTraits::Flags&NoPreferredStorageOrderBit) && (LhsTraits::Flags&RowMajorBit)) )
|
||||
? RowMajorBit : (MaxColsAtCompileTime==1 ? 0 : NoPreferredStorageOrderBit)
|
||||
Flags = (MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1) ? RowMajorBit
|
||||
: (MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1) ? 0
|
||||
: ( ((LhsTraits::Flags&NoPreferredStorageOrderBit) && (RhsTraits::Flags&RowMajorBit))
|
||||
|| ((RhsTraits::Flags&NoPreferredStorageOrderBit) && (LhsTraits::Flags&RowMajorBit)) ) ? RowMajorBit
|
||||
: NoPreferredStorageOrderBit
|
||||
};
|
||||
};
|
||||
|
||||
@@ -108,8 +121,8 @@ class Product : public ProductImpl<_Lhs,_Rhs,Option,
|
||||
internal::product_type<Lhs,Rhs>::ret>::ret>::Base Base;
|
||||
EIGEN_GENERIC_PUBLIC_INTERFACE(Product)
|
||||
|
||||
typedef typename internal::nested<Lhs>::type LhsNested;
|
||||
typedef typename internal::nested<Rhs>::type RhsNested;
|
||||
typedef typename internal::ref_selector<Lhs>::type LhsNested;
|
||||
typedef typename internal::ref_selector<Rhs>::type RhsNested;
|
||||
typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
|
||||
typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
|
||||
|
||||
@@ -152,7 +165,7 @@ public:
|
||||
|
||||
operator const Scalar() const
|
||||
{
|
||||
return typename internal::evaluator<ProductXpr>::type(derived()).coeff(0,0);
|
||||
return internal::evaluator<ProductXpr>(derived()).coeff(0,0);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -190,7 +203,7 @@ class ProductImpl<Lhs,Rhs,Option,Dense>
|
||||
EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS);
|
||||
eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) );
|
||||
|
||||
return typename internal::evaluator<Derived>::type(derived()).coeff(row,col);
|
||||
return internal::evaluator<Derived>(derived()).coeff(row,col);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC Scalar coeff(Index i) const
|
||||
@@ -198,7 +211,7 @@ class ProductImpl<Lhs,Rhs,Option,Dense>
|
||||
EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS);
|
||||
eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) );
|
||||
|
||||
return typename internal::evaluator<Derived>::type(derived()).coeff(i);
|
||||
return internal::evaluator<Derived>(derived()).coeff(i);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1,27 +0,0 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_PRODUCTBASE_H
|
||||
#define EIGEN_PRODUCTBASE_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \internal
|
||||
* Overloaded to perform an efficient C = (A*B).lazy() */
|
||||
template<typename Derived>
|
||||
template<typename ProductDerived, typename Lhs, typename Rhs>
|
||||
Derived& MatrixBase<Derived>::lazyAssign(const ProductBase<ProductDerived, Lhs,Rhs>& other)
|
||||
{
|
||||
other.derived().evalTo(derived());
|
||||
return derived();
|
||||
}
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_PRODUCTBASE_H
|
||||
374
Eigen/src/Core/ProductEvaluators.h
Normal file → Executable file
374
Eigen/src/Core/ProductEvaluators.h
Normal file → Executable file
@@ -32,9 +32,6 @@ struct evaluator<Product<Lhs, Rhs, Options> >
|
||||
typedef Product<Lhs, Rhs, Options> XprType;
|
||||
typedef product_evaluator<XprType> Base;
|
||||
|
||||
typedef evaluator type;
|
||||
typedef evaluator nestedType;
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {}
|
||||
};
|
||||
|
||||
@@ -47,9 +44,6 @@ struct evaluator<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Produ
|
||||
typedef CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Product<Lhs, Rhs, DefaultProduct> > XprType;
|
||||
typedef evaluator<Product<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>,const Lhs>, Rhs, DefaultProduct> > Base;
|
||||
|
||||
typedef evaluator type;
|
||||
typedef evaluator nestedType;
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
|
||||
: Base(xpr.functor().m_other * xpr.nestedExpression().lhs() * xpr.nestedExpression().rhs())
|
||||
{}
|
||||
@@ -63,9 +57,6 @@ struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> >
|
||||
typedef Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> XprType;
|
||||
typedef evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> > Base;
|
||||
|
||||
typedef evaluator type;
|
||||
typedef evaluator nestedType;
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
|
||||
: Base(Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>(
|
||||
Product<Lhs, Rhs, LazyProduct>(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()),
|
||||
@@ -90,18 +81,25 @@ struct evaluator_traits<Product<Lhs, Rhs, DefaultProduct> >
|
||||
enum { AssumeAliasing = 1 };
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs>
|
||||
struct evaluator_traits<Product<Lhs, Rhs, AliasFreeProduct> >
|
||||
: evaluator_traits_base<Product<Lhs, Rhs, AliasFreeProduct> >
|
||||
{
|
||||
enum { AssumeAliasing = 0 };
|
||||
};
|
||||
|
||||
// This is the default evaluator implementation for products:
|
||||
// It creates a temporary and call generic_product_impl
|
||||
template<typename Lhs, typename Rhs, int ProductTag, typename LhsShape, typename RhsShape>
|
||||
struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, ProductTag, LhsShape, RhsShape, typename traits<Lhs>::Scalar, typename traits<Rhs>::Scalar>
|
||||
: public evaluator<typename Product<Lhs, Rhs, DefaultProduct>::PlainObject>::type
|
||||
template<typename Lhs, typename Rhs, int Options, int ProductTag, typename LhsShape, typename RhsShape>
|
||||
struct product_evaluator<Product<Lhs, Rhs, Options>, ProductTag, LhsShape, RhsShape, typename traits<Lhs>::Scalar, typename traits<Rhs>::Scalar,
|
||||
EnableIf<(Options==DefaultProduct || Options==AliasFreeProduct)> >
|
||||
: public evaluator<typename Product<Lhs, Rhs, Options>::PlainObject>
|
||||
{
|
||||
typedef Product<Lhs, Rhs, DefaultProduct> XprType;
|
||||
typedef Product<Lhs, Rhs, Options> XprType;
|
||||
typedef typename XprType::PlainObject PlainObject;
|
||||
typedef typename evaluator<PlainObject>::type Base;
|
||||
typedef evaluator<PlainObject> Base;
|
||||
enum {
|
||||
Flags = Base::Flags | EvalBeforeNestingBit
|
||||
// CoeffReadCost = 0 // FIXME why is it needed? (this was already the case before the evaluators, see traits<ProductBase>)
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
|
||||
@@ -109,7 +107,8 @@ struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, ProductTag, LhsShape
|
||||
{
|
||||
::new (static_cast<Base*>(this)) Base(m_result);
|
||||
|
||||
// FIXME shall we handle nested_eval here?
|
||||
// FIXME shall we handle nested_eval here?,
|
||||
// if so, then we must take care at removing the call to nested_eval in the specializations (e.g., in permutation_matrix_product, transposition_matrix_product, etc.)
|
||||
// typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
|
||||
// typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
|
||||
// typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
|
||||
@@ -128,10 +127,11 @@ protected:
|
||||
};
|
||||
|
||||
// Dense = Product
|
||||
template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar>
|
||||
struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::assign_op<Scalar>, Dense2Dense, Scalar>
|
||||
template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
|
||||
struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scalar>, Dense2Dense,
|
||||
typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type>
|
||||
{
|
||||
typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
|
||||
typedef Product<Lhs,Rhs,Options> SrcXprType;
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
|
||||
{
|
||||
// FIXME shall we handle nested_eval here?
|
||||
@@ -140,10 +140,11 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::assign_
|
||||
};
|
||||
|
||||
// Dense += Product
|
||||
template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar>
|
||||
struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::add_assign_op<Scalar>, Dense2Dense, Scalar>
|
||||
template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
|
||||
struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<Scalar>, Dense2Dense,
|
||||
typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type>
|
||||
{
|
||||
typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
|
||||
typedef Product<Lhs,Rhs,Options> SrcXprType;
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar> &)
|
||||
{
|
||||
// FIXME shall we handle nested_eval here?
|
||||
@@ -152,10 +153,11 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::add_ass
|
||||
};
|
||||
|
||||
// Dense -= Product
|
||||
template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar>
|
||||
struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::sub_assign_op<Scalar>, Dense2Dense, Scalar>
|
||||
template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
|
||||
struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<Scalar>, Dense2Dense,
|
||||
typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type>
|
||||
{
|
||||
typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
|
||||
typedef Product<Lhs,Rhs,Options> SrcXprType;
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar> &)
|
||||
{
|
||||
// FIXME shall we handle nested_eval here?
|
||||
@@ -210,7 +212,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct>
|
||||
template<typename Dst, typename Lhs, typename Rhs, typename Func>
|
||||
EIGEN_DONT_INLINE void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&)
|
||||
{
|
||||
typename evaluator<Rhs>::type rhsEval(rhs);
|
||||
evaluator<Rhs> rhsEval(rhs);
|
||||
// FIXME make sure lhs is sequentially stored
|
||||
// FIXME not very good if rhs is real and lhs complex while alpha is real too
|
||||
// FIXME we should probably build an evaluator for dst
|
||||
@@ -223,7 +225,7 @@ EIGEN_DONT_INLINE void outer_product_selector_run(Dst& dst, const Lhs &lhs, cons
|
||||
template<typename Dst, typename Lhs, typename Rhs, typename Func>
|
||||
EIGEN_DONT_INLINE void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&)
|
||||
{
|
||||
typename evaluator<Lhs>::type lhsEval(lhs);
|
||||
evaluator<Lhs> lhsEval(lhs);
|
||||
// FIXME make sure rhs is sequentially stored
|
||||
// FIXME not very good if lhs is real and rhs complex while alpha is real too
|
||||
// FIXME we should probably build an evaluator for dst
|
||||
@@ -395,8 +397,8 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
||||
typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
|
||||
typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
|
||||
|
||||
typedef typename evaluator<LhsNestedCleaned>::type LhsEtorType;
|
||||
typedef typename evaluator<RhsNestedCleaned>::type RhsEtorType;
|
||||
typedef evaluator<LhsNestedCleaned> LhsEtorType;
|
||||
typedef evaluator<RhsNestedCleaned> RhsEtorType;
|
||||
|
||||
enum {
|
||||
RowsAtCompileTime = LhsNestedCleaned::RowsAtCompileTime,
|
||||
@@ -409,7 +411,8 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
||||
|
||||
LhsCoeffReadCost = LhsEtorType::CoeffReadCost,
|
||||
RhsCoeffReadCost = RhsEtorType::CoeffReadCost,
|
||||
CoeffReadCost = (InnerSize == Dynamic || LhsCoeffReadCost==Dynamic || RhsCoeffReadCost==Dynamic || NumTraits<Scalar>::AddCost==Dynamic || NumTraits<Scalar>::MulCost==Dynamic) ? Dynamic
|
||||
CoeffReadCost = InnerSize==0 ? NumTraits<Scalar>::ReadCost
|
||||
: (InnerSize == Dynamic || LhsCoeffReadCost==Dynamic || RhsCoeffReadCost==Dynamic || NumTraits<Scalar>::AddCost==Dynamic || NumTraits<Scalar>::MulCost==Dynamic) ? Dynamic
|
||||
: InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
|
||||
+ (InnerSize - 1) * NumTraits<Scalar>::AddCost,
|
||||
|
||||
@@ -418,24 +421,22 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
||||
LhsFlags = LhsEtorType::Flags,
|
||||
RhsFlags = RhsEtorType::Flags,
|
||||
|
||||
LhsAlignment = LhsEtorType::Alignment,
|
||||
RhsAlignment = RhsEtorType::Alignment,
|
||||
|
||||
LhsIsAligned = int(LhsAlignment) >= int(unpacket_traits<PacketScalar>::alignment),
|
||||
RhsIsAligned = int(RhsAlignment) >= int(unpacket_traits<PacketScalar>::alignment),
|
||||
|
||||
LhsRowMajor = LhsFlags & RowMajorBit,
|
||||
RhsRowMajor = RhsFlags & RowMajorBit,
|
||||
|
||||
SameType = is_same<typename LhsNestedCleaned::Scalar,typename RhsNestedCleaned::Scalar>::value,
|
||||
|
||||
CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit)
|
||||
&& (ColsAtCompileTime == Dynamic
|
||||
|| ( (ColsAtCompileTime % packet_traits<Scalar>::size) == 0
|
||||
&& (RhsFlags&AlignedBit)
|
||||
)
|
||||
),
|
||||
&& (ColsAtCompileTime == Dynamic || ( (ColsAtCompileTime % PacketSize) == 0 && RhsIsAligned ) ),
|
||||
|
||||
CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit)
|
||||
&& (RowsAtCompileTime == Dynamic
|
||||
|| ( (RowsAtCompileTime % packet_traits<Scalar>::size) == 0
|
||||
&& (LhsFlags&AlignedBit)
|
||||
)
|
||||
),
|
||||
&& (RowsAtCompileTime == Dynamic || ( (RowsAtCompileTime % PacketSize) == 0 && LhsIsAligned ) ),
|
||||
|
||||
EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
|
||||
: (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
|
||||
@@ -443,11 +444,13 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
||||
|
||||
Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
|
||||
| (EvalToRowMajor ? RowMajorBit : 0)
|
||||
| (CanVectorizeLhs ? (LhsFlags & AlignedBit) : 0)
|
||||
| (CanVectorizeRhs ? (RhsFlags & AlignedBit) : 0)
|
||||
// TODO enable vectorization for mixed types
|
||||
| (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0),
|
||||
|
||||
Alignment = CanVectorizeLhs ? LhsAlignment
|
||||
: CanVectorizeRhs ? RhsAlignment
|
||||
: 0,
|
||||
|
||||
/* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
|
||||
* of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner
|
||||
* loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
|
||||
@@ -457,7 +460,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
||||
&& LhsRowMajor
|
||||
&& (!RhsRowMajor)
|
||||
&& (LhsFlags & RhsFlags & ActualPacketAccessBit)
|
||||
&& (LhsFlags & RhsFlags & AlignedBit)
|
||||
&& (LhsIsAligned && RhsIsAligned)
|
||||
&& (InnerSize % packet_traits<Scalar>::size == 0)
|
||||
};
|
||||
|
||||
@@ -479,13 +482,13 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
||||
return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum();
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
const PacketReturnType packet(Index row, Index col) const
|
||||
template<int LoadMode, typename PacketType>
|
||||
const PacketType packet(Index row, Index col) const
|
||||
{
|
||||
PacketScalar res;
|
||||
PacketType res;
|
||||
typedef etor_product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor,
|
||||
Unroll ? InnerSize-1 : Dynamic,
|
||||
LhsEtorType, RhsEtorType, PacketScalar, LoadMode> PacketImpl;
|
||||
Unroll ? InnerSize : Dynamic,
|
||||
LhsEtorType, RhsEtorType, PacketType, LoadMode> PacketImpl;
|
||||
|
||||
PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res);
|
||||
return res;
|
||||
@@ -527,7 +530,7 @@ struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, Load
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
|
||||
{
|
||||
etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
|
||||
res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
|
||||
res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex-1)), rhs.template packet<LoadMode,Packet>(UnrollingIndex-1, col), res);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -537,25 +540,43 @@ struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, Load
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
|
||||
{
|
||||
etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
|
||||
res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
|
||||
res = pmadd(lhs.template packet<LoadMode,Packet>(row, UnrollingIndex-1), pset1<Packet>(rhs.coeff(UnrollingIndex-1, col)), res);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||
struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
|
||||
{
|
||||
res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode,Packet>(0, col));
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||
struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
|
||||
{
|
||||
res = pmul(lhs.template packet<LoadMode,Packet>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||
struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
|
||||
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
|
||||
{
|
||||
res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
|
||||
res = pset1<Packet>(0);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||
struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
|
||||
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
|
||||
{
|
||||
res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
|
||||
res = pset1<Packet>(0);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -564,10 +585,9 @@ struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
|
||||
{
|
||||
eigen_assert(innerDim>0 && "you are using a non initialized matrix");
|
||||
res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
|
||||
for(Index i = 1; i < innerDim; ++i)
|
||||
res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
|
||||
res = pset1<Packet>(0);
|
||||
for(Index i = 0; i < innerDim; ++i)
|
||||
res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode,Packet>(i, col), res);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -576,10 +596,9 @@ struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
|
||||
{
|
||||
eigen_assert(innerDim>0 && "you are using a non initialized matrix");
|
||||
res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
|
||||
for(Index i = 1; i < innerDim; ++i)
|
||||
res = pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
|
||||
res = pset1<Packet>(0);
|
||||
for(Index i = 0; i < innerDim; ++i)
|
||||
res = pmadd(lhs.template packet<LoadMode,Packet>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -663,7 +682,6 @@ struct diagonal_product_evaluator_base
|
||||
: evaluator_base<Derived>
|
||||
{
|
||||
typedef typename scalar_product_traits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar;
|
||||
typedef typename internal::packet_traits<Scalar>::type PacketScalar;
|
||||
public:
|
||||
enum {
|
||||
CoeffReadCost = NumTraits<Scalar>::MulCost + evaluator<MatrixType>::CoeffReadCost + evaluator<DiagonalType>::CoeffReadCost,
|
||||
@@ -678,8 +696,8 @@ public:
|
||||
//_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagFlags)&PacketAccessBit))),
|
||||
_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))),
|
||||
_LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0,
|
||||
Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0) | AlignedBit
|
||||
//(int(MatrixFlags)&int(DiagFlags)&AlignedBit),
|
||||
Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0),
|
||||
Alignment = evaluator<MatrixType>::Alignment
|
||||
};
|
||||
|
||||
diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag)
|
||||
@@ -693,26 +711,26 @@ public:
|
||||
}
|
||||
|
||||
protected:
|
||||
template<int LoadMode>
|
||||
EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, internal::true_type) const
|
||||
template<int LoadMode,typename PacketType>
|
||||
EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::true_type) const
|
||||
{
|
||||
return internal::pmul(m_matImpl.template packet<LoadMode>(row, col),
|
||||
internal::pset1<PacketScalar>(m_diagImpl.coeff(id)));
|
||||
return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col),
|
||||
internal::pset1<PacketType>(m_diagImpl.coeff(id)));
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, internal::false_type) const
|
||||
template<int LoadMode,typename PacketType>
|
||||
EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::false_type) const
|
||||
{
|
||||
enum {
|
||||
InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime,
|
||||
DiagonalPacketLoadMode = (LoadMode == Aligned && (((InnerSize%16) == 0) || (int(DiagFlags)&AlignedBit)==AlignedBit) ? Aligned : Unaligned)
|
||||
DiagonalPacketLoadMode = EIGEN_PLAIN_ENUM_MIN(LoadMode,((InnerSize%16) == 0) ? int(Aligned16) : int(evaluator<DiagonalType>::Alignment)) // FIXME hardcoded 16!!
|
||||
};
|
||||
return internal::pmul(m_matImpl.template packet<LoadMode>(row, col),
|
||||
m_diagImpl.template packet<DiagonalPacketLoadMode>(id));
|
||||
return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col),
|
||||
m_diagImpl.template packet<DiagonalPacketLoadMode,PacketType>(id));
|
||||
}
|
||||
|
||||
typename evaluator<DiagonalType>::nestedType m_diagImpl;
|
||||
typename evaluator<MatrixType>::nestedType m_matImpl;
|
||||
evaluator<DiagonalType> m_diagImpl;
|
||||
evaluator<MatrixType> m_matImpl;
|
||||
};
|
||||
|
||||
// diagonal * dense
|
||||
@@ -724,9 +742,7 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalSha
|
||||
using Base::m_diagImpl;
|
||||
using Base::m_matImpl;
|
||||
using Base::coeff;
|
||||
using Base::packet_impl;
|
||||
typedef typename Base::Scalar Scalar;
|
||||
typedef typename Base::PacketScalar PacketScalar;
|
||||
|
||||
typedef Product<Lhs, Rhs, ProductKind> XprType;
|
||||
typedef typename XprType::PlainObject PlainObject;
|
||||
@@ -746,18 +762,19 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalSha
|
||||
}
|
||||
|
||||
#ifndef __CUDACC__
|
||||
template<int LoadMode>
|
||||
EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const
|
||||
template<int LoadMode,typename PacketType>
|
||||
EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
|
||||
{
|
||||
// NVCC complains about template keyword, so we disable this function in CUDA mode
|
||||
return this->template packet_impl<LoadMode>(row,col, row,
|
||||
// FIXME: NVCC used to complain about the template keyword, but we have to check whether this is still the case.
|
||||
// See also similar calls below.
|
||||
return this->template packet_impl<LoadMode,PacketType>(row,col, row,
|
||||
typename internal::conditional<int(StorageOrder)==RowMajor, internal::true_type, internal::false_type>::type());
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
EIGEN_STRONG_INLINE PacketScalar packet(Index idx) const
|
||||
template<int LoadMode,typename PacketType>
|
||||
EIGEN_STRONG_INLINE PacketType packet(Index idx) const
|
||||
{
|
||||
return packet<LoadMode>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx);
|
||||
return packet<LoadMode,PacketType>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx);
|
||||
}
|
||||
#endif
|
||||
};
|
||||
@@ -771,9 +788,7 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape,
|
||||
using Base::m_diagImpl;
|
||||
using Base::m_matImpl;
|
||||
using Base::coeff;
|
||||
using Base::packet_impl;
|
||||
typedef typename Base::Scalar Scalar;
|
||||
typedef typename Base::PacketScalar PacketScalar;
|
||||
|
||||
typedef Product<Lhs, Rhs, ProductKind> XprType;
|
||||
typedef typename XprType::PlainObject PlainObject;
|
||||
@@ -791,17 +806,17 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape,
|
||||
}
|
||||
|
||||
#ifndef __CUDACC__
|
||||
template<int LoadMode>
|
||||
EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const
|
||||
template<int LoadMode,typename PacketType>
|
||||
EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
|
||||
{
|
||||
return this->template packet_impl<LoadMode>(row,col, col,
|
||||
return this->template packet_impl<LoadMode,PacketType>(row,col, col,
|
||||
typename internal::conditional<int(StorageOrder)==ColMajor, internal::true_type, internal::false_type>::type());
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
EIGEN_STRONG_INLINE PacketScalar packet(Index idx) const
|
||||
template<int LoadMode,typename PacketType>
|
||||
EIGEN_STRONG_INLINE PacketType packet(Index idx) const
|
||||
{
|
||||
return packet<LoadMode>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx);
|
||||
return packet<LoadMode,PacketType>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx);
|
||||
}
|
||||
#endif
|
||||
};
|
||||
@@ -809,48 +824,187 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape,
|
||||
/***************************************************************************
|
||||
* Products with permutation matrices
|
||||
***************************************************************************/
|
||||
|
||||
template<typename Lhs, typename Rhs, int ProductTag>
|
||||
struct generic_product_impl<Lhs, Rhs, PermutationShape, DenseShape, ProductTag>
|
||||
|
||||
/** \internal
|
||||
* \class permutation_matrix_product
|
||||
* Internal helper class implementing the product between a permutation matrix and a matrix.
|
||||
* This class is specialized for DenseShape below and for SparseShape in SparseCore/SparsePermutation.h
|
||||
*/
|
||||
template<typename ExpressionType, int Side, bool Transposed, typename ExpressionShape>
|
||||
struct permutation_matrix_product;
|
||||
|
||||
template<typename ExpressionType, int Side, bool Transposed>
|
||||
struct permutation_matrix_product<ExpressionType, Side, Transposed, DenseShape>
|
||||
{
|
||||
typedef typename nested_eval<ExpressionType, 1>::type MatrixType;
|
||||
typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;
|
||||
|
||||
template<typename Dest, typename PermutationType>
|
||||
static inline void run(Dest& dst, const PermutationType& perm, const ExpressionType& xpr)
|
||||
{
|
||||
MatrixType mat(xpr);
|
||||
const Index n = Side==OnTheLeft ? mat.rows() : mat.cols();
|
||||
// FIXME we need an is_same for expression that is not sensitive to constness. For instance
|
||||
// is_same_xpr<Block<const Matrix>, Block<Matrix> >::value should be true.
|
||||
//if(is_same<MatrixTypeCleaned,Dest>::value && extract_data(dst) == extract_data(mat))
|
||||
if(is_same_dense(dst, mat))
|
||||
{
|
||||
// apply the permutation inplace
|
||||
Matrix<bool,PermutationType::RowsAtCompileTime,1,0,PermutationType::MaxRowsAtCompileTime> mask(perm.size());
|
||||
mask.fill(false);
|
||||
Index r = 0;
|
||||
while(r < perm.size())
|
||||
{
|
||||
// search for the next seed
|
||||
while(r<perm.size() && mask[r]) r++;
|
||||
if(r>=perm.size())
|
||||
break;
|
||||
// we got one, let's follow it until we are back to the seed
|
||||
Index k0 = r++;
|
||||
Index kPrev = k0;
|
||||
mask.coeffRef(k0) = true;
|
||||
for(Index k=perm.indices().coeff(k0); k!=k0; k=perm.indices().coeff(k))
|
||||
{
|
||||
Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>(dst, k)
|
||||
.swap(Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>
|
||||
(dst,((Side==OnTheLeft) ^ Transposed) ? k0 : kPrev));
|
||||
|
||||
mask.coeffRef(k) = true;
|
||||
kPrev = k;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for(Index i = 0; i < n; ++i)
|
||||
{
|
||||
Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>
|
||||
(dst, ((Side==OnTheLeft) ^ Transposed) ? perm.indices().coeff(i) : i)
|
||||
|
||||
=
|
||||
|
||||
Block<const MatrixTypeCleaned,Side==OnTheLeft ? 1 : MatrixTypeCleaned::RowsAtCompileTime,Side==OnTheRight ? 1 : MatrixTypeCleaned::ColsAtCompileTime>
|
||||
(mat, ((Side==OnTheRight) ^ Transposed) ? perm.indices().coeff(i) : i);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
||||
struct generic_product_impl<Lhs, Rhs, PermutationShape, MatrixShape, ProductTag>
|
||||
{
|
||||
template<typename Dest>
|
||||
static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
{
|
||||
permut_matrix_product_retval<Lhs, Rhs, OnTheLeft, false> pmpr(lhs, rhs);
|
||||
pmpr.evalTo(dst);
|
||||
permutation_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, int ProductTag>
|
||||
struct generic_product_impl<Lhs, Rhs, DenseShape, PermutationShape, ProductTag>
|
||||
template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
||||
struct generic_product_impl<Lhs, Rhs, MatrixShape, PermutationShape, ProductTag>
|
||||
{
|
||||
template<typename Dest>
|
||||
static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
{
|
||||
permut_matrix_product_retval<Rhs, Lhs, OnTheRight, false> pmpr(rhs, lhs);
|
||||
pmpr.evalTo(dst);
|
||||
permutation_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, int ProductTag>
|
||||
struct generic_product_impl<Transpose<Lhs>, Rhs, PermutationShape, DenseShape, ProductTag>
|
||||
template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
||||
struct generic_product_impl<Transpose<Lhs>, Rhs, PermutationShape, MatrixShape, ProductTag>
|
||||
{
|
||||
template<typename Dest>
|
||||
static void evalTo(Dest& dst, const Transpose<Lhs>& lhs, const Rhs& rhs)
|
||||
{
|
||||
permut_matrix_product_retval<Lhs, Rhs, OnTheLeft, true> pmpr(lhs.nestedPermutation(), rhs);
|
||||
pmpr.evalTo(dst);
|
||||
permutation_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, int ProductTag>
|
||||
struct generic_product_impl<Lhs, Transpose<Rhs>, DenseShape, PermutationShape, ProductTag>
|
||||
template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
||||
struct generic_product_impl<Lhs, Transpose<Rhs>, MatrixShape, PermutationShape, ProductTag>
|
||||
{
|
||||
template<typename Dest>
|
||||
static void evalTo(Dest& dst, const Lhs& lhs, const Transpose<Rhs>& rhs)
|
||||
{
|
||||
permut_matrix_product_retval<Rhs, Lhs, OnTheRight, true> pmpr(rhs.nestedPermutation(), lhs);
|
||||
pmpr.evalTo(dst);
|
||||
permutation_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
* Products with transpositions matrices
|
||||
***************************************************************************/
|
||||
|
||||
// FIXME could we unify Transpositions and Permutation into a single "shape"??
|
||||
|
||||
/** \internal
|
||||
* \class transposition_matrix_product
|
||||
* Internal helper class implementing the product between a permutation matrix and a matrix.
|
||||
*/
|
||||
template<typename ExpressionType, int Side, bool Transposed, typename ExpressionShape>
|
||||
struct transposition_matrix_product
|
||||
{
|
||||
typedef typename nested_eval<ExpressionType, 1>::type MatrixType;
|
||||
typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;
|
||||
|
||||
template<typename Dest, typename TranspositionType>
|
||||
static inline void run(Dest& dst, const TranspositionType& tr, const ExpressionType& xpr)
|
||||
{
|
||||
MatrixType mat(xpr);
|
||||
typedef typename TranspositionType::StorageIndex StorageIndex;
|
||||
const Index size = tr.size();
|
||||
StorageIndex j = 0;
|
||||
|
||||
if(!(is_same<MatrixTypeCleaned,Dest>::value && extract_data(dst) == extract_data(mat)))
|
||||
dst = mat;
|
||||
|
||||
for(Index k=(Transposed?size-1:0) ; Transposed?k>=0:k<size ; Transposed?--k:++k)
|
||||
if(Index(j=tr.coeff(k))!=k)
|
||||
{
|
||||
if(Side==OnTheLeft) dst.row(k).swap(dst.row(j));
|
||||
else if(Side==OnTheRight) dst.col(k).swap(dst.col(j));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
||||
struct generic_product_impl<Lhs, Rhs, TranspositionsShape, MatrixShape, ProductTag>
|
||||
{
|
||||
template<typename Dest>
|
||||
static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
{
|
||||
transposition_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
||||
struct generic_product_impl<Lhs, Rhs, MatrixShape, TranspositionsShape, ProductTag>
|
||||
{
|
||||
template<typename Dest>
|
||||
static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
{
|
||||
transposition_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
||||
struct generic_product_impl<Transpose<Lhs>, Rhs, TranspositionsShape, MatrixShape, ProductTag>
|
||||
{
|
||||
template<typename Dest>
|
||||
static void evalTo(Dest& dst, const Transpose<Lhs>& lhs, const Rhs& rhs)
|
||||
{
|
||||
transposition_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
||||
struct generic_product_impl<Lhs, Transpose<Rhs>, MatrixShape, TranspositionsShape, ProductTag>
|
||||
{
|
||||
template<typename Dest>
|
||||
static void evalTo(Dest& dst, const Lhs& lhs, const Transpose<Rhs>& rhs)
|
||||
{
|
||||
transposition_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -53,7 +53,7 @@ struct functor_traits<scalar_random_op<Scalar> >
|
||||
* \sa DenseBase::setRandom(), DenseBase::Random(Index), DenseBase::Random()
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline const CwiseNullaryOp<internal::scalar_random_op<typename internal::traits<Derived>::Scalar>, Derived>
|
||||
inline const typename DenseBase<Derived>::RandomReturnType
|
||||
DenseBase<Derived>::Random(Index rows, Index cols)
|
||||
{
|
||||
return NullaryExpr(rows, cols, internal::scalar_random_op<Scalar>());
|
||||
@@ -84,7 +84,7 @@ DenseBase<Derived>::Random(Index rows, Index cols)
|
||||
* \sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random()
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline const CwiseNullaryOp<internal::scalar_random_op<typename internal::traits<Derived>::Scalar>, Derived>
|
||||
inline const typename DenseBase<Derived>::RandomReturnType
|
||||
DenseBase<Derived>::Random(Index size)
|
||||
{
|
||||
return NullaryExpr(size, internal::scalar_random_op<Scalar>());
|
||||
@@ -110,7 +110,7 @@ DenseBase<Derived>::Random(Index size)
|
||||
* \sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random(Index)
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline const CwiseNullaryOp<internal::scalar_random_op<typename internal::traits<Derived>::Scalar>, Derived>
|
||||
inline const typename DenseBase<Derived>::RandomReturnType
|
||||
DenseBase<Derived>::Random()
|
||||
{
|
||||
return NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_random_op<Scalar>());
|
||||
@@ -162,8 +162,8 @@ PlainObjectBase<Derived>::setRandom(Index newSize)
|
||||
*
|
||||
* \not_reentrant
|
||||
*
|
||||
* \param nbRows the new number of rows
|
||||
* \param nbCols the new number of columns
|
||||
* \param rows the new number of rows
|
||||
* \param cols the new number of columns
|
||||
*
|
||||
* Example: \include Matrix_setRandom_int_int.cpp
|
||||
* Output: \verbinclude Matrix_setRandom_int_int.out
|
||||
@@ -172,9 +172,9 @@ PlainObjectBase<Derived>::setRandom(Index newSize)
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived&
|
||||
PlainObjectBase<Derived>::setRandom(Index nbRows, Index nbCols)
|
||||
PlainObjectBase<Derived>::setRandom(Index rows, Index cols)
|
||||
{
|
||||
resize(nbRows, nbCols);
|
||||
resize(rows, cols);
|
||||
return setRandom();
|
||||
}
|
||||
|
||||
|
||||
@@ -165,7 +165,7 @@ struct redux_vec_unroller<Func, Derived, Start, 1>
|
||||
index = Start * packet_traits<typename Derived::Scalar>::size,
|
||||
outer = index / int(Derived::InnerSizeAtCompileTime),
|
||||
inner = index % int(Derived::InnerSizeAtCompileTime),
|
||||
alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned
|
||||
alignment = Derived::Alignment
|
||||
};
|
||||
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
@@ -173,7 +173,7 @@ struct redux_vec_unroller<Func, Derived, Start, 1>
|
||||
|
||||
static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&)
|
||||
{
|
||||
return mat.template packetByOuterInner<alignment>(outer, inner);
|
||||
return mat.template packetByOuterInner<alignment,PacketScalar>(outer, inner);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -222,11 +222,12 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
|
||||
const Index size = mat.size();
|
||||
|
||||
const Index packetSize = packet_traits<Scalar>::size;
|
||||
const Index alignedStart = internal::first_aligned(mat);
|
||||
const int packetAlignment = unpacket_traits<PacketScalar>::alignment;
|
||||
enum {
|
||||
alignment = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) || bool(Derived::Flags & AlignedBit)
|
||||
? Aligned : Unaligned
|
||||
alignment0 = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned),
|
||||
alignment = EIGEN_PLAIN_ENUM_MAX(alignment0, Derived::Alignment)
|
||||
};
|
||||
const Index alignedStart = internal::first_default_aligned(mat.nestedExpression());
|
||||
const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
|
||||
const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
|
||||
const Index alignedEnd2 = alignedStart + alignedSize2;
|
||||
@@ -234,19 +235,19 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
|
||||
Scalar res;
|
||||
if(alignedSize)
|
||||
{
|
||||
PacketScalar packet_res0 = mat.template packet<alignment>(alignedStart);
|
||||
PacketScalar packet_res0 = mat.template packet<alignment,PacketScalar>(alignedStart);
|
||||
if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop
|
||||
{
|
||||
PacketScalar packet_res1 = mat.template packet<alignment>(alignedStart+packetSize);
|
||||
PacketScalar packet_res1 = mat.template packet<alignment,PacketScalar>(alignedStart+packetSize);
|
||||
for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize)
|
||||
{
|
||||
packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(index));
|
||||
packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment>(index+packetSize));
|
||||
packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment,PacketScalar>(index));
|
||||
packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment,PacketScalar>(index+packetSize));
|
||||
}
|
||||
|
||||
packet_res0 = func.packetOp(packet_res0,packet_res1);
|
||||
if(alignedEnd>alignedEnd2)
|
||||
packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(alignedEnd2));
|
||||
packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment,PacketScalar>(alignedEnd2));
|
||||
}
|
||||
res = func.predux(packet_res0);
|
||||
|
||||
@@ -272,7 +273,7 @@ template<typename Func, typename Derived>
|
||||
struct redux_impl<Func, Derived, SliceVectorizedTraversal, NoUnrolling>
|
||||
{
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
typedef typename packet_traits<Scalar>::type PacketScalar;
|
||||
typedef typename packet_traits<Scalar>::type PacketType;
|
||||
|
||||
EIGEN_DEVICE_FUNC static Scalar run(const Derived &mat, const Func& func)
|
||||
{
|
||||
@@ -286,10 +287,10 @@ struct redux_impl<Func, Derived, SliceVectorizedTraversal, NoUnrolling>
|
||||
Scalar res;
|
||||
if(packetedInnerSize)
|
||||
{
|
||||
PacketScalar packet_res = mat.template packet<Unaligned>(0,0);
|
||||
PacketType packet_res = mat.template packet<Unaligned,PacketType>(0,0);
|
||||
for(Index j=0; j<outerSize; ++j)
|
||||
for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize))
|
||||
packet_res = func.packetOp(packet_res, mat.template packetByOuterInner<Unaligned>(j,i));
|
||||
packet_res = func.packetOp(packet_res, mat.template packetByOuterInner<Unaligned,PacketType>(j,i));
|
||||
|
||||
res = func.predux(packet_res);
|
||||
for(Index j=0; j<outerSize; ++j)
|
||||
@@ -352,7 +353,8 @@ public:
|
||||
IsRowMajor = XprType::IsRowMajor,
|
||||
SizeAtCompileTime = XprType::SizeAtCompileTime,
|
||||
InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime,
|
||||
CoeffReadCost = evaluator<XprType>::CoeffReadCost
|
||||
CoeffReadCost = evaluator<XprType>::CoeffReadCost,
|
||||
Alignment = evaluator<XprType>::Alignment
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); }
|
||||
@@ -369,24 +371,26 @@ public:
|
||||
CoeffReturnType coeff(Index index) const
|
||||
{ return m_evaluator.coeff(index); }
|
||||
|
||||
template<int LoadMode>
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketReturnType packet(Index row, Index col) const
|
||||
{ return m_evaluator.template packet<LoadMode>(row, col); }
|
||||
{ return m_evaluator.template packet<LoadMode,PacketType>(row, col); }
|
||||
|
||||
template<int LoadMode>
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketReturnType packet(Index index) const
|
||||
{ return m_evaluator.template packet<LoadMode>(index); }
|
||||
{ return m_evaluator.template packet<LoadMode,PacketType>(index); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
|
||||
{ return m_evaluator.coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
|
||||
|
||||
template<int LoadMode>
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketReturnType packetByOuterInner(Index outer, Index inner) const
|
||||
{ return m_evaluator.template packet<LoadMode>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
|
||||
{ return m_evaluator.template packet<LoadMode,PacketType>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
|
||||
|
||||
const XprType & nestedExpression() const { return m_xpr; }
|
||||
|
||||
protected:
|
||||
typename internal::evaluator<XprType>::nestedType m_evaluator;
|
||||
internal::evaluator<XprType> m_evaluator;
|
||||
const XprType &m_xpr;
|
||||
};
|
||||
|
||||
@@ -406,7 +410,7 @@ protected:
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename Func>
|
||||
EIGEN_STRONG_INLINE typename internal::result_of<Func(typename internal::traits<Derived>::Scalar)>::type
|
||||
typename internal::traits<Derived>::Scalar
|
||||
DenseBase<Derived>::redux(const Func& func) const
|
||||
{
|
||||
eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
|
||||
|
||||
@@ -18,7 +18,7 @@ namespace Eigen {
|
||||
* \brief A matrix or vector expression mapping an existing expression
|
||||
*
|
||||
* \tparam PlainObjectType the equivalent matrix type of the mapped data
|
||||
* \tparam Options specifies whether the pointer is \c #Aligned, or \c #Unaligned.
|
||||
* \tparam MapOptions specifies the pointer alignment in bytes. It can be: \c #Aligned128, , \c #Aligned64, \c #Aligned32, \c #Aligned16, \c #Aligned8 or \c #Unaligned.
|
||||
* The default is \c #Unaligned.
|
||||
* \tparam StrideType optionally specifies strides. By default, Ref implies a contiguous storage along the inner dimension (inner stride==1),
|
||||
* but accepts a variable outer stride (leading dimension).
|
||||
@@ -48,8 +48,9 @@ namespace Eigen {
|
||||
* VectorXf a;
|
||||
* foo1(a.head()); // OK
|
||||
* foo1(A.col()); // OK
|
||||
* foo1(A.row()); // compilation error because here innerstride!=1
|
||||
* foo2(A.row()); // The row is copied into a contiguous temporary
|
||||
* foo1(A.row()); // Compilation error because here innerstride!=1
|
||||
* foo2(A.row()); // Compilation error because A.row() is a 1xN object while foo2 is expecting a Nx1 object
|
||||
* foo2(A.row().transpose()); // The row is copied into a contiguous temporary
|
||||
* foo2(2*a); // The expression is evaluated into a temporary
|
||||
* foo2(A.col().segment(2,4)); // No temporary
|
||||
* \endcode
|
||||
@@ -91,7 +92,8 @@ struct traits<Ref<_PlainObjectType, _Options, _StrideType> >
|
||||
typedef _StrideType StrideType;
|
||||
enum {
|
||||
Options = _Options,
|
||||
Flags = traits<Map<_PlainObjectType, _Options, _StrideType> >::Flags | NestByRefBit
|
||||
Flags = traits<Map<_PlainObjectType, _Options, _StrideType> >::Flags | NestByRefBit,
|
||||
Alignment = traits<Map<_PlainObjectType, _Options, _StrideType> >::Alignment
|
||||
};
|
||||
|
||||
template<typename Derived> struct match {
|
||||
@@ -103,8 +105,9 @@ struct traits<Ref<_PlainObjectType, _Options, _StrideType> >
|
||||
|| (int(StrideType::InnerStrideAtCompileTime)==0 && int(Derived::InnerStrideAtCompileTime)==1),
|
||||
OuterStrideMatch = Derived::IsVectorAtCompileTime
|
||||
|| int(StrideType::OuterStrideAtCompileTime)==int(Dynamic) || int(StrideType::OuterStrideAtCompileTime)==int(Derived::OuterStrideAtCompileTime),
|
||||
AlignmentMatch = (_Options!=Aligned) || ((PlainObjectType::Flags&AlignedBit)==0) || ((traits<Derived>::Flags&AlignedBit)==AlignedBit),
|
||||
MatchAtCompileTime = HasDirectAccess && StorageOrderMatch && InnerStrideMatch && OuterStrideMatch && AlignmentMatch
|
||||
AlignmentMatch = (int(traits<PlainObjectType>::Alignment)==int(Unaligned)) || (int(evaluator<Derived>::Alignment) >= int(Alignment)), // FIXME the first condition is not very clear, it should be replaced by the required alignment
|
||||
ScalarTypeMatch = internal::is_same<typename PlainObjectType::Scalar, typename Derived::Scalar>::value,
|
||||
MatchAtCompileTime = HasDirectAccess && StorageOrderMatch && InnerStrideMatch && OuterStrideMatch && AlignmentMatch && ScalarTypeMatch
|
||||
};
|
||||
typedef typename internal::conditional<MatchAtCompileTime,internal::true_type,internal::false_type>::type type;
|
||||
};
|
||||
@@ -183,9 +186,11 @@ protected:
|
||||
template<typename PlainObjectType, int Options, typename StrideType> class Ref
|
||||
: public RefBase<Ref<PlainObjectType, Options, StrideType> >
|
||||
{
|
||||
private:
|
||||
typedef internal::traits<Ref> Traits;
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC inline Ref(const PlainObjectBase<Derived>& expr);
|
||||
EIGEN_DEVICE_FUNC inline Ref(const PlainObjectBase<Derived>& expr,
|
||||
typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0);
|
||||
public:
|
||||
|
||||
typedef RefBase<Ref> Base;
|
||||
@@ -194,13 +199,15 @@ template<typename PlainObjectType, int Options, typename StrideType> class Ref
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC inline Ref(PlainObjectBase<Derived>& expr)
|
||||
EIGEN_DEVICE_FUNC inline Ref(PlainObjectBase<Derived>& expr,
|
||||
typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(bool(Traits::template match<Derived>::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH);
|
||||
Base::construct(expr.derived());
|
||||
}
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr)
|
||||
EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr,
|
||||
typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0)
|
||||
#else
|
||||
template<typename Derived>
|
||||
inline Ref(DenseBase<Derived>& expr)
|
||||
@@ -227,7 +234,8 @@ template<typename TPlainObjectType, int Options, typename StrideType> class Ref<
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(Ref)
|
||||
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr)
|
||||
EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr,
|
||||
typename internal::enable_if<bool(Traits::template match<Derived>::ScalarTypeMatch),Derived>::type* = 0)
|
||||
{
|
||||
// std::cout << match_helper<Derived>::HasDirectAccess << "," << match_helper<Derived>::OuterStrideMatch << "," << match_helper<Derived>::InnerStrideMatch << "\n";
|
||||
// std::cout << int(StrideType::OuterStrideAtCompileTime) << " - " << int(Derived::OuterStrideAtCompileTime) << "\n";
|
||||
|
||||
@@ -35,10 +35,7 @@ struct traits<Replicate<MatrixType,RowFactor,ColFactor> >
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
typedef typename traits<MatrixType>::StorageKind StorageKind;
|
||||
typedef typename traits<MatrixType>::XprKind XprKind;
|
||||
enum {
|
||||
Factor = (RowFactor==Dynamic || ColFactor==Dynamic) ? Dynamic : RowFactor*ColFactor
|
||||
};
|
||||
typedef typename nested<MatrixType,Factor>::type MatrixTypeNested;
|
||||
typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
|
||||
typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;
|
||||
enum {
|
||||
RowsAtCompileTime = RowFactor==Dynamic || int(MatrixType::RowsAtCompileTime)==Dynamic
|
||||
@@ -72,8 +69,9 @@ template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
|
||||
typedef typename internal::remove_all<MatrixType>::type NestedExpression;
|
||||
|
||||
template<typename OriginalMatrixType>
|
||||
inline explicit Replicate(const OriginalMatrixType& a_matrix)
|
||||
: m_matrix(a_matrix), m_rowFactor(RowFactor), m_colFactor(ColFactor)
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline explicit Replicate(const OriginalMatrixType& matrix)
|
||||
: m_matrix(matrix), m_rowFactor(RowFactor), m_colFactor(ColFactor)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<typename internal::remove_const<MatrixType>::type,OriginalMatrixType>::value),
|
||||
THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE)
|
||||
@@ -81,41 +79,20 @@ template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
|
||||
}
|
||||
|
||||
template<typename OriginalMatrixType>
|
||||
inline Replicate(const OriginalMatrixType& a_matrix, Index rowFactor, Index colFactor)
|
||||
: m_matrix(a_matrix), m_rowFactor(rowFactor), m_colFactor(colFactor)
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Replicate(const OriginalMatrixType& matrix, Index rowFactor, Index colFactor)
|
||||
: m_matrix(matrix), m_rowFactor(rowFactor), m_colFactor(colFactor)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<typename internal::remove_const<MatrixType>::type,OriginalMatrixType>::value),
|
||||
THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE)
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index rows() const { return m_matrix.rows() * m_rowFactor.value(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index cols() const { return m_matrix.cols() * m_colFactor.value(); }
|
||||
|
||||
inline Scalar coeff(Index rowId, Index colId) const
|
||||
{
|
||||
// try to avoid using modulo; this is a pure optimization strategy
|
||||
const Index actual_row = internal::traits<MatrixType>::RowsAtCompileTime==1 ? 0
|
||||
: RowFactor==1 ? rowId
|
||||
: rowId%m_matrix.rows();
|
||||
const Index actual_col = internal::traits<MatrixType>::ColsAtCompileTime==1 ? 0
|
||||
: ColFactor==1 ? colId
|
||||
: colId%m_matrix.cols();
|
||||
|
||||
return m_matrix.coeff(actual_row, actual_col);
|
||||
}
|
||||
template<int LoadMode>
|
||||
inline PacketScalar packet(Index rowId, Index colId) const
|
||||
{
|
||||
const Index actual_row = internal::traits<MatrixType>::RowsAtCompileTime==1 ? 0
|
||||
: RowFactor==1 ? rowId
|
||||
: rowId%m_matrix.rows();
|
||||
const Index actual_col = internal::traits<MatrixType>::ColsAtCompileTime==1 ? 0
|
||||
: ColFactor==1 ? colId
|
||||
: colId%m_matrix.cols();
|
||||
|
||||
return m_matrix.template packet<LoadMode>(actual_row, actual_col);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const _MatrixTypeNested& nestedExpression() const
|
||||
{
|
||||
return m_matrix;
|
||||
@@ -137,27 +114,12 @@ template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<int RowFactor, int ColFactor>
|
||||
inline const Replicate<Derived,RowFactor,ColFactor>
|
||||
const Replicate<Derived,RowFactor,ColFactor>
|
||||
DenseBase<Derived>::replicate() const
|
||||
{
|
||||
return Replicate<Derived,RowFactor,ColFactor>(derived());
|
||||
}
|
||||
|
||||
/**
|
||||
* \return an expression of the replication of \c *this
|
||||
*
|
||||
* Example: \include MatrixBase_replicate_int_int.cpp
|
||||
* Output: \verbinclude MatrixBase_replicate_int_int.out
|
||||
*
|
||||
* \sa VectorwiseOp::replicate(), DenseBase::replicate<int,int>(), class Replicate
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline const Replicate<Derived,Dynamic,Dynamic>
|
||||
DenseBase<Derived>::replicate(Index rowFactor,Index colFactor) const
|
||||
{
|
||||
return Replicate<Derived,Dynamic,Dynamic>(derived(),rowFactor,colFactor);
|
||||
}
|
||||
|
||||
/**
|
||||
* \return an expression of the replication of each column (or row) of \c *this
|
||||
*
|
||||
|
||||
@@ -94,15 +94,12 @@ namespace internal {
|
||||
|
||||
template<typename Derived>
|
||||
struct evaluator<ReturnByValue<Derived> >
|
||||
: public evaluator<typename internal::traits<Derived>::ReturnType>::type
|
||||
: public evaluator<typename internal::traits<Derived>::ReturnType>
|
||||
{
|
||||
typedef ReturnByValue<Derived> XprType;
|
||||
typedef typename internal::traits<Derived>::ReturnType PlainObject;
|
||||
typedef typename evaluator<PlainObject>::type Base;
|
||||
typedef evaluator<PlainObject> Base;
|
||||
|
||||
typedef evaluator type;
|
||||
typedef evaluator nestedType;
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
|
||||
: m_result(xpr.rows(), xpr.cols())
|
||||
{
|
||||
|
||||
@@ -37,7 +37,7 @@ struct traits<Reverse<MatrixType, Direction> >
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
typedef typename traits<MatrixType>::StorageKind StorageKind;
|
||||
typedef typename traits<MatrixType>::XprKind XprKind;
|
||||
typedef typename nested<MatrixType>::type MatrixTypeNested;
|
||||
typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
|
||||
typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;
|
||||
enum {
|
||||
RowsAtCompileTime = MatrixType::RowsAtCompileTime,
|
||||
@@ -48,14 +48,14 @@ struct traits<Reverse<MatrixType, Direction> >
|
||||
};
|
||||
};
|
||||
|
||||
template<typename PacketScalar, bool ReversePacket> struct reverse_packet_cond
|
||||
template<typename PacketType, bool ReversePacket> struct reverse_packet_cond
|
||||
{
|
||||
static inline PacketScalar run(const PacketScalar& x) { return preverse(x); }
|
||||
static inline PacketType run(const PacketType& x) { return preverse(x); }
|
||||
};
|
||||
|
||||
template<typename PacketScalar> struct reverse_packet_cond<PacketScalar,false>
|
||||
template<typename PacketType> struct reverse_packet_cond<PacketType,false>
|
||||
{
|
||||
static inline PacketScalar run(const PacketScalar& x) { return x; }
|
||||
static inline PacketType run(const PacketType& x) { return x; }
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
@@ -70,10 +70,6 @@ template<typename MatrixType, int Direction> class Reverse
|
||||
typedef typename internal::remove_all<MatrixType>::type NestedExpression;
|
||||
using Base::IsRowMajor;
|
||||
|
||||
// next line is necessary because otherwise const version of operator()
|
||||
// is hidden by non-const version defined in this file
|
||||
using Base::operator();
|
||||
|
||||
protected:
|
||||
enum {
|
||||
PacketSize = internal::packet_traits<Scalar>::size,
|
||||
@@ -101,69 +97,6 @@ template<typename MatrixType, int Direction> class Reverse
|
||||
return -m_matrix.innerStride();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Scalar& operator()(Index row, Index col)
|
||||
{
|
||||
eigen_assert(row >= 0 && row < rows() && col >= 0 && col < cols());
|
||||
return coeffRef(row, col);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col)
|
||||
{
|
||||
return m_matrix.const_cast_derived().coeffRef(ReverseRow ? m_matrix.rows() - row - 1 : row,
|
||||
ReverseCol ? m_matrix.cols() - col - 1 : col);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline CoeffReturnType coeff(Index row, Index col) const
|
||||
{
|
||||
return m_matrix.coeff(ReverseRow ? m_matrix.rows() - row - 1 : row,
|
||||
ReverseCol ? m_matrix.cols() - col - 1 : col);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
return m_matrix.coeff(m_matrix.size() - index - 1);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index)
|
||||
{
|
||||
return m_matrix.const_cast_derived().coeffRef(m_matrix.size() - index - 1);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Scalar& operator()(Index index)
|
||||
{
|
||||
eigen_assert(index >= 0 && index < m_matrix.size());
|
||||
return coeffRef(index);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline const PacketScalar packet(Index row, Index col) const
|
||||
{
|
||||
return reverse_packet::run(m_matrix.template packet<LoadMode>(
|
||||
ReverseRow ? m_matrix.rows() - row - OffsetRow : row,
|
||||
ReverseCol ? m_matrix.cols() - col - OffsetCol : col));
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline void writePacket(Index row, Index col, const PacketScalar& x)
|
||||
{
|
||||
m_matrix.const_cast_derived().template writePacket<LoadMode>(
|
||||
ReverseRow ? m_matrix.rows() - row - OffsetRow : row,
|
||||
ReverseCol ? m_matrix.cols() - col - OffsetCol : col,
|
||||
reverse_packet::run(x));
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline const PacketScalar packet(Index index) const
|
||||
{
|
||||
return internal::preverse(m_matrix.template packet<LoadMode>( m_matrix.size() - index - PacketSize ));
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline void writePacket(Index index, const PacketScalar& x)
|
||||
{
|
||||
m_matrix.const_cast_derived().template writePacket<LoadMode>(m_matrix.size() - index - PacketSize, internal::preverse(x));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC const typename internal::remove_all<typename MatrixType::Nested>::type&
|
||||
nestedExpression() const
|
||||
{
|
||||
@@ -187,30 +120,90 @@ DenseBase<Derived>::reverse()
|
||||
return ReverseReturnType(derived());
|
||||
}
|
||||
|
||||
/** This is the const version of reverse(). */
|
||||
template<typename Derived>
|
||||
inline const typename DenseBase<Derived>::ConstReverseReturnType
|
||||
DenseBase<Derived>::reverse() const
|
||||
{
|
||||
return ConstReverseReturnType(derived());
|
||||
}
|
||||
|
||||
//reverse const overload moved DenseBase.h due to a CUDA compiler bug
|
||||
|
||||
/** This is the "in place" version of reverse: it reverses \c *this.
|
||||
*
|
||||
* In most cases it is probably better to simply use the reversed expression
|
||||
* of a matrix. However, when reversing the matrix data itself is really needed,
|
||||
* then this "in-place" version is probably the right choice because it provides
|
||||
* the following additional features:
|
||||
* the following additional benefits:
|
||||
* - less error prone: doing the same operation with .reverse() requires special care:
|
||||
* \code m = m.reverse().eval(); \endcode
|
||||
* - this API allows to avoid creating a temporary (the current implementation creates a temporary, but that could be avoided using swap)
|
||||
* - this API enables reverse operations without the need for a temporary
|
||||
* - it allows future optimizations (cache friendliness, etc.)
|
||||
*
|
||||
* \sa reverse() */
|
||||
* \sa VectorwiseOp::reverseInPlace(), reverse() */
|
||||
template<typename Derived>
|
||||
inline void DenseBase<Derived>::reverseInPlace()
|
||||
{
|
||||
derived() = derived().reverse().eval();
|
||||
if(cols()>rows())
|
||||
{
|
||||
Index half = cols()/2;
|
||||
leftCols(half).swap(rightCols(half).reverse());
|
||||
if((cols()%2)==1)
|
||||
{
|
||||
Index half2 = rows()/2;
|
||||
col(half).head(half2).swap(col(half).tail(half2).reverse());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Index half = rows()/2;
|
||||
topRows(half).swap(bottomRows(half).reverse());
|
||||
if((rows()%2)==1)
|
||||
{
|
||||
Index half2 = cols()/2;
|
||||
row(half).head(half2).swap(row(half).tail(half2).reverse());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<int Direction>
|
||||
struct vectorwise_reverse_inplace_impl;
|
||||
|
||||
template<>
|
||||
struct vectorwise_reverse_inplace_impl<Vertical>
|
||||
{
|
||||
template<typename ExpressionType>
|
||||
static void run(ExpressionType &xpr)
|
||||
{
|
||||
Index half = xpr.rows()/2;
|
||||
xpr.topRows(half).swap(xpr.bottomRows(half).colwise().reverse());
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct vectorwise_reverse_inplace_impl<Horizontal>
|
||||
{
|
||||
template<typename ExpressionType>
|
||||
static void run(ExpressionType &xpr)
|
||||
{
|
||||
Index half = xpr.cols()/2;
|
||||
xpr.leftCols(half).swap(xpr.rightCols(half).rowwise().reverse());
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/** This is the "in place" version of VectorwiseOp::reverse: it reverses each column or row of \c *this.
|
||||
*
|
||||
* In most cases it is probably better to simply use the reversed expression
|
||||
* of a matrix. However, when reversing the matrix data itself is really needed,
|
||||
* then this "in-place" version is probably the right choice because it provides
|
||||
* the following additional benefits:
|
||||
* - less error prone: doing the same operation with .reverse() requires special care:
|
||||
* \code m = m.reverse().eval(); \endcode
|
||||
* - this API enables reverse operations without the need for a temporary
|
||||
*
|
||||
* \sa DenseBase::reverseInPlace(), reverse() */
|
||||
template<typename ExpressionType, int Direction>
|
||||
void VectorwiseOp<ExpressionType,Direction>::reverseInPlace()
|
||||
{
|
||||
internal::vectorwise_reverse_inplace_impl<Direction>::run(_expression().const_cast_derived());
|
||||
}
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
@@ -32,7 +32,7 @@ namespace internal {
|
||||
template<typename MatrixType, unsigned int UpLo>
|
||||
struct traits<SelfAdjointView<MatrixType, UpLo> > : traits<MatrixType>
|
||||
{
|
||||
typedef typename nested<MatrixType>::type MatrixTypeNested;
|
||||
typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
|
||||
typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
|
||||
typedef MatrixType ExpressionType;
|
||||
typedef typename MatrixType::PlainObject FullMatrixType;
|
||||
|
||||
@@ -113,15 +113,12 @@ namespace internal {
|
||||
// Evaluator of Solve -> eval into a temporary
|
||||
template<typename Decomposition, typename RhsType>
|
||||
struct evaluator<Solve<Decomposition,RhsType> >
|
||||
: public evaluator<typename Solve<Decomposition,RhsType>::PlainObject>::type
|
||||
: public evaluator<typename Solve<Decomposition,RhsType>::PlainObject>
|
||||
{
|
||||
typedef Solve<Decomposition,RhsType> SolveType;
|
||||
typedef typename SolveType::PlainObject PlainObject;
|
||||
typedef typename evaluator<PlainObject>::type Base;
|
||||
typedef evaluator<PlainObject> Base;
|
||||
|
||||
typedef evaluator type;
|
||||
typedef evaluator nestedType;
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit evaluator(const SolveType& solve)
|
||||
: m_result(solve.rows(), solve.cols())
|
||||
{
|
||||
|
||||
@@ -198,8 +198,8 @@ void TriangularViewImpl<MatrixType,Mode,Dense>::solveInPlace(const MatrixBase<Ot
|
||||
* diagonal must be non zero). It works as a forward (resp. backward) substitution if \c *this
|
||||
* is an upper (resp. lower) triangular matrix.
|
||||
*
|
||||
* Example: \include MatrixBase_marked.cpp
|
||||
* Output: \verbinclude MatrixBase_marked.out
|
||||
* Example: \include Triangular_solve.cpp
|
||||
* Output: \verbinclude Triangular_solve.out
|
||||
*
|
||||
* This function returns an expression of the inverse-multiply and can works in-place if it is assigned
|
||||
* to the same matrix or vector \a other.
|
||||
|
||||
@@ -157,19 +157,32 @@ inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
|
||||
MatrixBase<Derived>::stableNorm() const
|
||||
{
|
||||
using std::sqrt;
|
||||
using std::abs;
|
||||
const Index blockSize = 4096;
|
||||
RealScalar scale(0);
|
||||
RealScalar invScale(1);
|
||||
RealScalar ssq(0); // sum of square
|
||||
|
||||
typedef typename internal::nested_eval<Derived,2>::type DerivedCopy;
|
||||
typedef typename internal::remove_all<DerivedCopy>::type DerivedCopyClean;
|
||||
DerivedCopy copy(derived());
|
||||
|
||||
enum {
|
||||
Alignment = (int(Flags)&DirectAccessBit) || (int(Flags)&AlignedBit) ? 1 : 0
|
||||
CanAlign = (int(Flags)&DirectAccessBit) || (int(internal::evaluator<DerivedCopyClean>::Alignment)>0) // FIXME
|
||||
};
|
||||
typedef typename internal::conditional<CanAlign, Ref<const Matrix<Scalar,Dynamic,1,0,blockSize,1>, internal::evaluator<DerivedCopyClean>::Alignment>,
|
||||
typename DerivedCopyClean
|
||||
::ConstSegmentReturnType>::type SegmentWrapper;
|
||||
Index n = size();
|
||||
Index bi = internal::first_aligned(derived());
|
||||
|
||||
if(n==1)
|
||||
return abs(this->coeff(0));
|
||||
|
||||
Index bi = internal::first_default_aligned(copy);
|
||||
if (bi>0)
|
||||
internal::stable_norm_kernel(this->head(bi), ssq, scale, invScale);
|
||||
internal::stable_norm_kernel(copy.head(bi), ssq, scale, invScale);
|
||||
for (; bi<n; bi+=blockSize)
|
||||
internal::stable_norm_kernel(this->segment(bi,numext::mini(blockSize, n - bi)).template forceAlignedAccessIf<Alignment>(), ssq, scale, invScale);
|
||||
internal::stable_norm_kernel(SegmentWrapper(copy.segment(bi,numext::mini(blockSize, n - bi))), ssq, scale, invScale);
|
||||
return scale * sqrt(ssq);
|
||||
}
|
||||
|
||||
|
||||
@@ -21,7 +21,6 @@ class generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, swap
|
||||
{
|
||||
protected:
|
||||
typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, swap_assign_op<typename DstEvaluatorTypeT::Scalar>, BuiltIn> Base;
|
||||
typedef typename DstEvaluatorTypeT::PacketScalar PacketScalar;
|
||||
using Base::m_dst;
|
||||
using Base::m_src;
|
||||
using Base::m_functor;
|
||||
@@ -35,25 +34,29 @@ public:
|
||||
: Base(dst, src, func, dstExpr)
|
||||
{}
|
||||
|
||||
template<int StoreMode, int LoadMode>
|
||||
template<int StoreMode, int LoadMode, typename PacketType>
|
||||
void assignPacket(Index row, Index col)
|
||||
{
|
||||
m_functor.template swapPacket<StoreMode,LoadMode,PacketScalar>(&m_dst.coeffRef(row,col), &const_cast<SrcEvaluatorTypeT&>(m_src).coeffRef(row,col));
|
||||
PacketType tmp = m_src.template packet<LoadMode,PacketType>(row,col);
|
||||
const_cast<SrcEvaluatorTypeT&>(m_src).template writePacket<LoadMode>(row,col, m_dst.template packet<StoreMode,PacketType>(row,col));
|
||||
m_dst.template writePacket<StoreMode>(row,col,tmp);
|
||||
}
|
||||
|
||||
template<int StoreMode, int LoadMode>
|
||||
template<int StoreMode, int LoadMode, typename PacketType>
|
||||
void assignPacket(Index index)
|
||||
{
|
||||
m_functor.template swapPacket<StoreMode,LoadMode,PacketScalar>(&m_dst.coeffRef(index), &const_cast<SrcEvaluatorTypeT&>(m_src).coeffRef(index));
|
||||
PacketType tmp = m_src.template packet<LoadMode,PacketType>(index);
|
||||
const_cast<SrcEvaluatorTypeT&>(m_src).template writePacket<LoadMode>(index, m_dst.template packet<StoreMode,PacketType>(index));
|
||||
m_dst.template writePacket<StoreMode>(index,tmp);
|
||||
}
|
||||
|
||||
// TODO find a simple way not to have to copy/paste this function from generic_dense_assignment_kernel, by simple I mean no CRTP (Gael)
|
||||
template<int StoreMode, int LoadMode>
|
||||
template<int StoreMode, int LoadMode, typename PacketType>
|
||||
void assignPacketByOuterInner(Index outer, Index inner)
|
||||
{
|
||||
Index row = Base::rowIndexByOuterInner(outer, inner);
|
||||
Index col = Base::colIndexByOuterInner(outer, inner);
|
||||
assignPacket<StoreMode,LoadMode>(row, col);
|
||||
assignPacket<StoreMode,LoadMode,PacketType>(row, col);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@ namespace internal {
|
||||
template<typename MatrixType>
|
||||
struct traits<Transpose<MatrixType> > : public traits<MatrixType>
|
||||
{
|
||||
typedef typename nested<MatrixType>::type MatrixTypeNested;
|
||||
typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
|
||||
typedef typename remove_reference<MatrixTypeNested>::type MatrixTypeNestedPlain;
|
||||
enum {
|
||||
RowsAtCompileTime = MatrixType::ColsAtCompileTime,
|
||||
@@ -60,7 +60,7 @@ template<typename MatrixType> class Transpose
|
||||
typedef typename internal::remove_all<MatrixType>::type NestedExpression;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit inline Transpose(MatrixType& a_matrix) : m_matrix(a_matrix) {}
|
||||
explicit inline Transpose(MatrixType& matrix) : m_matrix(matrix) {}
|
||||
|
||||
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Transpose)
|
||||
|
||||
@@ -233,7 +233,7 @@ struct inplace_transpose_selector<MatrixType,true,true> { // PacketSize x Packet
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
typedef typename internal::packet_traits<typename MatrixType::Scalar>::type Packet;
|
||||
const Index PacketSize = internal::packet_traits<Scalar>::size;
|
||||
const Index Alignment = internal::evaluator<MatrixType>::Flags&AlignedBit ? Aligned : Unaligned;
|
||||
const Index Alignment = internal::evaluator<MatrixType>::Alignment;
|
||||
PacketBlock<Packet> A;
|
||||
for (Index i=0; i<PacketSize; ++i)
|
||||
A.packet[i] = m.template packetByOuterInner<Alignment>(i,0);
|
||||
@@ -317,14 +317,6 @@ inline void MatrixBase<Derived>::adjointInPlace()
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename BinOp,typename NestedXpr,typename Rhs>
|
||||
struct blas_traits<SelfCwiseBinaryOp<BinOp,NestedXpr,Rhs> >
|
||||
: blas_traits<NestedXpr>
|
||||
{
|
||||
typedef SelfCwiseBinaryOp<BinOp,NestedXpr,Rhs> XprType;
|
||||
static inline const XprType extract(const XprType& x) { return x; }
|
||||
};
|
||||
|
||||
template<bool DestIsTransposed, typename OtherDerived>
|
||||
struct check_transpose_aliasing_compile_time_selector
|
||||
{
|
||||
|
||||
@@ -41,10 +41,6 @@ namespace Eigen {
|
||||
* \sa class PermutationMatrix
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
template<typename TranspositionType, typename MatrixType, int Side, bool Transposed=false> struct transposition_matrix_product_retval;
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
class TranspositionsBase
|
||||
{
|
||||
@@ -66,7 +62,7 @@ class TranspositionsBase
|
||||
indices() = other.indices();
|
||||
return derived();
|
||||
}
|
||||
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
/** This is a special case of the templated operator=. Its purpose is to
|
||||
* prevent a default operator= from hiding the templated operator=.
|
||||
@@ -79,7 +75,11 @@ class TranspositionsBase
|
||||
#endif
|
||||
|
||||
/** \returns the number of transpositions */
|
||||
inline Index size() const { return indices().size(); }
|
||||
Index size() const { return indices().size(); }
|
||||
/** \returns the number of rows of the equivalent permutation matrix */
|
||||
Index rows() const { return indices().size(); }
|
||||
/** \returns the number of columns of the equivalent permutation matrix */
|
||||
Index cols() const { return indices().size(); }
|
||||
|
||||
/** Direct access to the underlying index vector */
|
||||
inline const StorageIndex& coeff(Index i) const { return indices().coeff(i); }
|
||||
@@ -147,9 +147,10 @@ class TranspositionsBase
|
||||
namespace internal {
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex>
|
||||
struct traits<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex> >
|
||||
: traits<PermutationMatrix<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex> >
|
||||
{
|
||||
typedef Matrix<_StorageIndex, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType;
|
||||
typedef _StorageIndex StorageIndex;
|
||||
typedef TranspositionsStorage StorageKind;
|
||||
};
|
||||
}
|
||||
|
||||
@@ -178,7 +179,7 @@ class Transpositions : public TranspositionsBase<Transpositions<SizeAtCompileTim
|
||||
|
||||
/** Generic constructor from expression of the transposition indices. */
|
||||
template<typename Other>
|
||||
explicit inline Transpositions(const MatrixBase<Other>& a_indices) : m_indices(a_indices)
|
||||
explicit inline Transpositions(const MatrixBase<Other>& indices) : m_indices(indices)
|
||||
{}
|
||||
|
||||
/** Copies the \a other transpositions into \c *this */
|
||||
@@ -218,9 +219,11 @@ class Transpositions : public TranspositionsBase<Transpositions<SizeAtCompileTim
|
||||
namespace internal {
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex, int _PacketAccess>
|
||||
struct traits<Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex>,_PacketAccess> >
|
||||
: traits<PermutationMatrix<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex> >
|
||||
{
|
||||
typedef Map<const Matrix<_StorageIndex,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1>, _PacketAccess> IndicesType;
|
||||
typedef _StorageIndex StorageIndex;
|
||||
typedef TranspositionsStorage StorageKind;
|
||||
};
|
||||
}
|
||||
|
||||
@@ -275,9 +278,9 @@ class Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex>,P
|
||||
namespace internal {
|
||||
template<typename _IndicesType>
|
||||
struct traits<TranspositionsWrapper<_IndicesType> >
|
||||
: traits<PermutationWrapper<_IndicesType> >
|
||||
{
|
||||
typedef typename _IndicesType::Scalar StorageIndex;
|
||||
typedef _IndicesType IndicesType;
|
||||
typedef TranspositionsStorage StorageKind;
|
||||
};
|
||||
}
|
||||
|
||||
@@ -292,8 +295,8 @@ class TranspositionsWrapper
|
||||
typedef typename Traits::IndicesType IndicesType;
|
||||
typedef typename IndicesType::Scalar StorageIndex;
|
||||
|
||||
explicit inline TranspositionsWrapper(IndicesType& a_indices)
|
||||
: m_indices(a_indices)
|
||||
explicit inline TranspositionsWrapper(IndicesType& indices)
|
||||
: m_indices(indices)
|
||||
{}
|
||||
|
||||
/** Copies the \a other transpositions into \c *this */
|
||||
@@ -325,80 +328,43 @@ class TranspositionsWrapper
|
||||
const typename IndicesType::Nested m_indices;
|
||||
};
|
||||
|
||||
|
||||
|
||||
/** \returns the \a matrix with the \a transpositions applied to the columns.
|
||||
*/
|
||||
template<typename Derived, typename TranspositionsDerived>
|
||||
inline const internal::transposition_matrix_product_retval<TranspositionsDerived, Derived, OnTheRight>
|
||||
operator*(const MatrixBase<Derived>& matrix,
|
||||
const TranspositionsBase<TranspositionsDerived> &transpositions)
|
||||
template<typename MatrixDerived, typename TranspositionsDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
const Product<MatrixDerived, TranspositionsDerived, AliasFreeProduct>
|
||||
operator*(const MatrixBase<MatrixDerived> &matrix,
|
||||
const TranspositionsBase<TranspositionsDerived>& transpositions)
|
||||
{
|
||||
return internal::transposition_matrix_product_retval
|
||||
<TranspositionsDerived, Derived, OnTheRight>
|
||||
(transpositions.derived(), matrix.derived());
|
||||
return Product<MatrixDerived, TranspositionsDerived, AliasFreeProduct>
|
||||
(matrix.derived(), transpositions.derived());
|
||||
}
|
||||
|
||||
/** \returns the \a matrix with the \a transpositions applied to the rows.
|
||||
*/
|
||||
template<typename Derived, typename TranspositionDerived>
|
||||
inline const internal::transposition_matrix_product_retval
|
||||
<TranspositionDerived, Derived, OnTheLeft>
|
||||
operator*(const TranspositionsBase<TranspositionDerived> &transpositions,
|
||||
const MatrixBase<Derived>& matrix)
|
||||
template<typename TranspositionsDerived, typename MatrixDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
const Product<TranspositionsDerived, MatrixDerived, AliasFreeProduct>
|
||||
operator*(const TranspositionsBase<TranspositionsDerived> &transpositions,
|
||||
const MatrixBase<MatrixDerived>& matrix)
|
||||
{
|
||||
return internal::transposition_matrix_product_retval
|
||||
<TranspositionDerived, Derived, OnTheLeft>
|
||||
(transpositions.derived(), matrix.derived());
|
||||
return Product<TranspositionsDerived, MatrixDerived, AliasFreeProduct>
|
||||
(transpositions.derived(), matrix.derived());
|
||||
}
|
||||
|
||||
// Template partial specialization for transposed/inverse transpositions
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename TranspositionType, typename MatrixType, int Side, bool Transposed>
|
||||
struct traits<transposition_matrix_product_retval<TranspositionType, MatrixType, Side, Transposed> >
|
||||
{
|
||||
typedef typename MatrixType::PlainObject ReturnType;
|
||||
};
|
||||
|
||||
template<typename TranspositionType, typename MatrixType, int Side, bool Transposed>
|
||||
struct transposition_matrix_product_retval
|
||||
: public ReturnByValue<transposition_matrix_product_retval<TranspositionType, MatrixType, Side, Transposed> >
|
||||
{
|
||||
typedef typename remove_all<typename MatrixType::Nested>::type MatrixTypeNestedCleaned;
|
||||
typedef typename TranspositionType::StorageIndex StorageIndex;
|
||||
|
||||
transposition_matrix_product_retval(const TranspositionType& tr, const MatrixType& matrix)
|
||||
: m_transpositions(tr), m_matrix(matrix)
|
||||
{}
|
||||
|
||||
inline Index rows() const { return m_matrix.rows(); }
|
||||
inline Index cols() const { return m_matrix.cols(); }
|
||||
|
||||
template<typename Dest> inline void evalTo(Dest& dst) const
|
||||
{
|
||||
const Index size = m_transpositions.size();
|
||||
StorageIndex j = 0;
|
||||
|
||||
if(!(is_same<MatrixTypeNestedCleaned,Dest>::value && extract_data(dst) == extract_data(m_matrix)))
|
||||
dst = m_matrix;
|
||||
|
||||
for(Index k=(Transposed?size-1:0) ; Transposed?k>=0:k<size ; Transposed?--k:++k)
|
||||
if(Index(j=m_transpositions.coeff(k))!=k)
|
||||
{
|
||||
if(Side==OnTheLeft)
|
||||
dst.row(k).swap(dst.row(j));
|
||||
else if(Side==OnTheRight)
|
||||
dst.col(k).swap(dst.col(j));
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
const TranspositionType& m_transpositions;
|
||||
typename MatrixType::Nested m_matrix;
|
||||
};
|
||||
template<typename Derived>
|
||||
struct traits<Transpose<TranspositionsBase<Derived> > >
|
||||
: traits<Derived>
|
||||
{};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/* Template partial specialization for transposed/inverse transpositions */
|
||||
|
||||
template<typename TranspositionsDerived>
|
||||
class Transpose<TranspositionsBase<TranspositionsDerived> >
|
||||
{
|
||||
@@ -408,25 +374,29 @@ class Transpose<TranspositionsBase<TranspositionsDerived> >
|
||||
|
||||
explicit Transpose(const TranspositionType& t) : m_transpositions(t) {}
|
||||
|
||||
inline int size() const { return m_transpositions.size(); }
|
||||
Index size() const { return m_transpositions.size(); }
|
||||
Index rows() const { return m_transpositions.size(); }
|
||||
Index cols() const { return m_transpositions.size(); }
|
||||
|
||||
/** \returns the \a matrix with the inverse transpositions applied to the columns.
|
||||
*/
|
||||
template<typename Derived> friend
|
||||
inline const internal::transposition_matrix_product_retval<TranspositionType, Derived, OnTheRight, true>
|
||||
operator*(const MatrixBase<Derived>& matrix, const Transpose& trt)
|
||||
template<typename OtherDerived> friend
|
||||
const Product<OtherDerived, Transpose, AliasFreeProduct>
|
||||
operator*(const MatrixBase<OtherDerived>& matrix, const Transpose& trt)
|
||||
{
|
||||
return internal::transposition_matrix_product_retval<TranspositionType, Derived, OnTheRight, true>(trt.m_transpositions, matrix.derived());
|
||||
return Product<OtherDerived, Transpose, AliasFreeProduct>(matrix.derived(), trt.derived());
|
||||
}
|
||||
|
||||
/** \returns the \a matrix with the inverse transpositions applied to the rows.
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline const internal::transposition_matrix_product_retval<TranspositionType, Derived, OnTheLeft, true>
|
||||
operator*(const MatrixBase<Derived>& matrix) const
|
||||
template<typename OtherDerived>
|
||||
const Product<Transpose, OtherDerived, AliasFreeProduct>
|
||||
operator*(const MatrixBase<OtherDerived>& matrix) const
|
||||
{
|
||||
return internal::transposition_matrix_product_retval<TranspositionType, Derived, OnTheLeft, true>(m_transpositions, matrix.derived());
|
||||
return Product<Transpose, OtherDerived, AliasFreeProduct>(*this, matrix.derived());
|
||||
}
|
||||
|
||||
const TranspositionType& nestedExpression() const { return m_transpositions; }
|
||||
|
||||
protected:
|
||||
const TranspositionType& m_transpositions;
|
||||
|
||||
@@ -19,9 +19,7 @@ template<int Side, typename TriangularType, typename Rhs> struct triangular_solv
|
||||
|
||||
}
|
||||
|
||||
/** \internal
|
||||
*
|
||||
* \class TriangularBase
|
||||
/** \class TriangularBase
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Base class for triangular part in a matrix
|
||||
@@ -38,10 +36,14 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
|
||||
MaxColsAtCompileTime = internal::traits<Derived>::MaxColsAtCompileTime,
|
||||
|
||||
SizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::RowsAtCompileTime,
|
||||
internal::traits<Derived>::ColsAtCompileTime>::ret)
|
||||
/**< This is equal to the number of coefficients, i.e. the number of
|
||||
internal::traits<Derived>::ColsAtCompileTime>::ret),
|
||||
/**< This is equal to the number of coefficients, i.e. the number of
|
||||
* rows times the number of columns, or to \a Dynamic if this is not
|
||||
* known at compile-time. \sa RowsAtCompileTime, ColsAtCompileTime */
|
||||
|
||||
MaxSizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::MaxRowsAtCompileTime,
|
||||
internal::traits<Derived>::MaxColsAtCompileTime>::ret)
|
||||
|
||||
};
|
||||
typedef typename internal::traits<Derived>::Scalar Scalar;
|
||||
typedef typename internal::traits<Derived>::StorageKind StorageKind;
|
||||
@@ -63,11 +65,11 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
|
||||
inline Index innerStride() const { return derived().innerStride(); }
|
||||
|
||||
// dummy resize function
|
||||
void resize(Index nbRows, Index nbCols)
|
||||
void resize(Index rows, Index cols)
|
||||
{
|
||||
EIGEN_UNUSED_VARIABLE(nbRows);
|
||||
EIGEN_UNUSED_VARIABLE(nbCols);
|
||||
eigen_assert(nbRows==rows() && nbCols==nbCols);
|
||||
EIGEN_UNUSED_VARIABLE(rows);
|
||||
EIGEN_UNUSED_VARIABLE(cols);
|
||||
eigen_assert(rows==this->rows() && cols==this->cols());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
@@ -148,17 +150,17 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
|
||||
/** \class TriangularView
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Base class for triangular part in a matrix
|
||||
* \brief Expression of a triangular part in a matrix
|
||||
*
|
||||
* \param MatrixType the type of the object in which we are taking the triangular part
|
||||
* \param Mode the kind of triangular matrix expression to construct. Can be #Upper,
|
||||
* #Lower, #UnitUpper, #UnitLower, #StrictlyUpper, or #StrictlyLower.
|
||||
* This is in fact a bit field; it must have either #Upper or #Lower,
|
||||
* and additionnaly it may have #UnitDiag or #ZeroDiag or neither.
|
||||
* and additionally it may have #UnitDiag or #ZeroDiag or neither.
|
||||
*
|
||||
* This class represents a triangular part of a matrix, not necessarily square. Strictly speaking, for rectangular
|
||||
* matrices one should speak of "trapezoid" parts. This class is the return type
|
||||
* of MatrixBase::triangularView() and most of the time this is the only way it is used.
|
||||
* of MatrixBase::triangularView() and SparseMatrixBase::triangularView(), and most of the time this is the only way it is used.
|
||||
*
|
||||
* \sa MatrixBase::triangularView()
|
||||
*/
|
||||
@@ -166,7 +168,7 @@ namespace internal {
|
||||
template<typename MatrixType, unsigned int _Mode>
|
||||
struct traits<TriangularView<MatrixType, _Mode> > : traits<MatrixType>
|
||||
{
|
||||
typedef typename nested<MatrixType>::type MatrixTypeNested;
|
||||
typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
|
||||
typedef typename remove_reference<MatrixTypeNested>::type MatrixTypeNestedNonRef;
|
||||
typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
|
||||
typedef typename MatrixType::PlainObject FullMatrixType;
|
||||
@@ -306,6 +308,15 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
|
||||
MatrixTypeNested m_matrix;
|
||||
};
|
||||
|
||||
/** \ingroup Core_Module
|
||||
*
|
||||
* \brief Base class for a triangular part in a \b dense matrix
|
||||
*
|
||||
* This class is an abstract base class of class TriangularView, and objects of type TriangularViewImpl cannot be instantiated.
|
||||
* It extends class TriangularView with additional methods which available for dense expressions only.
|
||||
*
|
||||
* \sa class TriangularView, MatrixBase::triangularView()
|
||||
*/
|
||||
template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_MatrixType,_Mode,Dense>
|
||||
: public TriangularBase<TriangularView<_MatrixType, _Mode> >
|
||||
{
|
||||
@@ -549,8 +560,8 @@ void TriangularBase<Derived>::evalTo(MatrixBase<DenseDerived> &other) const
|
||||
* The parameter \a Mode can have the following values: \c #Upper, \c #StrictlyUpper, \c #UnitUpper,
|
||||
* \c #Lower, \c #StrictlyLower, \c #UnitLower.
|
||||
*
|
||||
* Example: \include MatrixBase_extract.cpp
|
||||
* Output: \verbinclude MatrixBase_extract.out
|
||||
* Example: \include MatrixBase_triangularView.cpp
|
||||
* Output: \verbinclude MatrixBase_triangularView.out
|
||||
*
|
||||
* \sa class TriangularView
|
||||
*/
|
||||
@@ -653,7 +664,6 @@ struct unary_evaluator<TriangularView<MatrixType,Mode>, IndexBased>
|
||||
{
|
||||
typedef TriangularView<MatrixType,Mode> XprType;
|
||||
typedef evaluator<typename internal::remove_all<MatrixType>::type> Base;
|
||||
typedef evaluator<XprType> type;
|
||||
unary_evaluator(const XprType &xpr) : Base(xpr.nestedExpression()) {}
|
||||
};
|
||||
|
||||
@@ -723,8 +733,8 @@ EIGEN_DEVICE_FUNC void call_triangular_assignment_loop(const DstXprType& dst, co
|
||||
{
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||
|
||||
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
|
||||
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
|
||||
typedef evaluator<DstXprType> DstEvaluatorType;
|
||||
typedef evaluator<SrcXprType> SrcEvaluatorType;
|
||||
|
||||
DstEvaluatorType dstEvaluator(dst);
|
||||
SrcEvaluatorType srcEvaluator(src);
|
||||
|
||||
@@ -41,7 +41,7 @@ struct traits<PartialReduxExpr<MatrixType, MemberOp, Direction> >
|
||||
typedef typename traits<MatrixType>::StorageKind StorageKind;
|
||||
typedef typename traits<MatrixType>::XprKind XprKind;
|
||||
typedef typename MatrixType::Scalar InputScalar;
|
||||
typedef typename nested<MatrixType>::type MatrixTypeNested;
|
||||
typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
|
||||
typedef typename remove_all<MatrixTypeNested>::type _MatrixTypeNested;
|
||||
enum {
|
||||
RowsAtCompileTime = Direction==Vertical ? 1 : MatrixType::RowsAtCompileTime,
|
||||
@@ -65,13 +65,16 @@ class PartialReduxExpr : public internal::dense_xpr_base< PartialReduxExpr<Matri
|
||||
typedef typename internal::traits<PartialReduxExpr>::MatrixTypeNested MatrixTypeNested;
|
||||
typedef typename internal::traits<PartialReduxExpr>::_MatrixTypeNested _MatrixTypeNested;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit PartialReduxExpr(const MatrixType& mat, const MemberOp& func = MemberOp())
|
||||
: m_matrix(mat), m_functor(func) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
Index rows() const { return (Direction==Vertical ? 1 : m_matrix.rows()); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
Index cols() const { return (Direction==Horizontal ? 1 : m_matrix.cols()); }
|
||||
|
||||
EIGEN_STRONG_INLINE const Scalar coeff(Index i, Index j) const
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index i, Index j) const
|
||||
{
|
||||
if (Direction==Vertical)
|
||||
return m_functor(m_matrix.col(j));
|
||||
@@ -79,7 +82,7 @@ class PartialReduxExpr : public internal::dense_xpr_base< PartialReduxExpr<Matri
|
||||
return m_functor(m_matrix.row(i));
|
||||
}
|
||||
|
||||
const Scalar coeff(Index index) const
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
|
||||
{
|
||||
if (Direction==Vertical)
|
||||
return m_functor(m_matrix.col(index));
|
||||
@@ -100,7 +103,8 @@ class PartialReduxExpr : public internal::dense_xpr_base< PartialReduxExpr<Matri
|
||||
template<typename Scalar, int Size> struct Cost \
|
||||
{ enum { value = COST }; }; \
|
||||
template<typename XprType> \
|
||||
EIGEN_STRONG_INLINE ResultType operator()(const XprType& mat) const \
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
||||
ResultType operator()(const XprType& mat) const \
|
||||
{ return mat.MEMBER(); } \
|
||||
}
|
||||
|
||||
@@ -124,13 +128,13 @@ EIGEN_MEMBER_FUNCTOR(prod, (Size-1)*NumTraits<Scalar>::MulCost);
|
||||
template <typename BinaryOp, typename Scalar>
|
||||
struct member_redux {
|
||||
typedef typename result_of<
|
||||
BinaryOp(Scalar)
|
||||
BinaryOp(Scalar,Scalar)
|
||||
>::type result_type;
|
||||
template<typename _Scalar, int Size> struct Cost
|
||||
{ enum { value = (Size-1) * functor_traits<BinaryOp>::Cost }; };
|
||||
explicit member_redux(const BinaryOp func) : m_functor(func) {}
|
||||
EIGEN_DEVICE_FUNC explicit member_redux(const BinaryOp func) : m_functor(func) {}
|
||||
template<typename Derived>
|
||||
inline result_type operator()(const DenseBase<Derived>& mat) const
|
||||
EIGEN_DEVICE_FUNC inline result_type operator()(const DenseBase<Derived>& mat) const
|
||||
{ return mat.redux(m_functor); }
|
||||
const BinaryOp m_functor;
|
||||
};
|
||||
@@ -160,8 +164,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
typedef typename ExpressionType::Scalar Scalar;
|
||||
typedef typename ExpressionType::RealScalar RealScalar;
|
||||
typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
|
||||
typedef typename internal::conditional<internal::must_nest_by_value<ExpressionType>::ret,
|
||||
ExpressionType, ExpressionType&>::type ExpressionTypeNested;
|
||||
typedef typename internal::ref_selector<ExpressionType>::non_const_type ExpressionTypeNested;
|
||||
typedef typename internal::remove_all<ExpressionTypeNested>::type ExpressionTypeNestedCleaned;
|
||||
|
||||
template<template<typename _Scalar> class Functor,
|
||||
@@ -182,17 +185,18 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
};
|
||||
|
||||
enum {
|
||||
IsVertical = (Direction==Vertical) ? 1 : 0,
|
||||
IsHorizontal = (Direction==Horizontal) ? 1 : 0
|
||||
isVertical = (Direction==Vertical) ? 1 : 0,
|
||||
isHorizontal = (Direction==Horizontal) ? 1 : 0
|
||||
};
|
||||
|
||||
protected:
|
||||
|
||||
/** \internal
|
||||
* \returns the i-th subvector according to the \c Direction */
|
||||
typedef typename internal::conditional<Direction==Vertical,
|
||||
typedef typename internal::conditional<isVertical,
|
||||
typename ExpressionType::ColXpr,
|
||||
typename ExpressionType::RowXpr>::type SubVector;
|
||||
EIGEN_DEVICE_FUNC
|
||||
SubVector subVector(Index i)
|
||||
{
|
||||
return SubVector(m_matrix.derived(),i);
|
||||
@@ -200,58 +204,62 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
|
||||
/** \internal
|
||||
* \returns the number of subvectors in the direction \c Direction */
|
||||
EIGEN_DEVICE_FUNC
|
||||
Index subVectors() const
|
||||
{ return Direction==Vertical?m_matrix.cols():m_matrix.rows(); }
|
||||
{ return isVertical?m_matrix.cols():m_matrix.rows(); }
|
||||
|
||||
template<typename OtherDerived> struct ExtendedType {
|
||||
typedef Replicate<OtherDerived,
|
||||
Direction==Vertical ? 1 : ExpressionType::RowsAtCompileTime,
|
||||
Direction==Horizontal ? 1 : ExpressionType::ColsAtCompileTime> Type;
|
||||
isVertical ? 1 : ExpressionType::RowsAtCompileTime,
|
||||
isHorizontal ? 1 : ExpressionType::ColsAtCompileTime> Type;
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* Replicates a vector to match the size of \c *this */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename ExtendedType<OtherDerived>::Type
|
||||
extendedTo(const DenseBase<OtherDerived>& other) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(Direction==Vertical, OtherDerived::MaxColsAtCompileTime==1),
|
||||
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isVertical, OtherDerived::MaxColsAtCompileTime==1),
|
||||
YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED)
|
||||
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(Direction==Horizontal, OtherDerived::MaxRowsAtCompileTime==1),
|
||||
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isHorizontal, OtherDerived::MaxRowsAtCompileTime==1),
|
||||
YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED)
|
||||
return typename ExtendedType<OtherDerived>::Type
|
||||
(other.derived(),
|
||||
Direction==Vertical ? 1 : m_matrix.rows(),
|
||||
Direction==Horizontal ? 1 : m_matrix.cols());
|
||||
isVertical ? 1 : m_matrix.rows(),
|
||||
isHorizontal ? 1 : m_matrix.cols());
|
||||
}
|
||||
|
||||
template<typename OtherDerived> struct OppositeExtendedType {
|
||||
typedef Replicate<OtherDerived,
|
||||
Direction==Horizontal ? 1 : ExpressionType::RowsAtCompileTime,
|
||||
Direction==Vertical ? 1 : ExpressionType::ColsAtCompileTime> Type;
|
||||
isHorizontal ? 1 : ExpressionType::RowsAtCompileTime,
|
||||
isVertical ? 1 : ExpressionType::ColsAtCompileTime> Type;
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* Replicates a vector in the opposite direction to match the size of \c *this */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename OppositeExtendedType<OtherDerived>::Type
|
||||
extendedToOpposite(const DenseBase<OtherDerived>& other) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(Direction==Horizontal, OtherDerived::MaxColsAtCompileTime==1),
|
||||
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isHorizontal, OtherDerived::MaxColsAtCompileTime==1),
|
||||
YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED)
|
||||
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(Direction==Vertical, OtherDerived::MaxRowsAtCompileTime==1),
|
||||
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isVertical, OtherDerived::MaxRowsAtCompileTime==1),
|
||||
YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED)
|
||||
return typename OppositeExtendedType<OtherDerived>::Type
|
||||
(other.derived(),
|
||||
Direction==Horizontal ? 1 : m_matrix.rows(),
|
||||
Direction==Vertical ? 1 : m_matrix.cols());
|
||||
isHorizontal ? 1 : m_matrix.rows(),
|
||||
isVertical ? 1 : m_matrix.cols());
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit inline VectorwiseOp(ExpressionType& matrix) : m_matrix(matrix) {}
|
||||
|
||||
/** \internal */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const ExpressionType& _expression() const { return m_matrix; }
|
||||
|
||||
/** \returns a row or column vector expression of \c *this reduxed by \a func
|
||||
@@ -262,6 +270,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
* \sa class VectorwiseOp, DenseBase::colwise(), DenseBase::rowwise()
|
||||
*/
|
||||
template<typename BinaryOp>
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename ReduxReturnType<BinaryOp>::Type
|
||||
redux(const BinaryOp& func = BinaryOp()) const
|
||||
{ return typename ReduxReturnType<BinaryOp>::Type(_expression(), internal::member_redux<BinaryOp,Scalar>(func)); }
|
||||
@@ -290,6 +299,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
* Output: \verbinclude PartialRedux_minCoeff.out
|
||||
*
|
||||
* \sa DenseBase::minCoeff() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const MinCoeffReturnType minCoeff() const
|
||||
{ return MinCoeffReturnType(_expression()); }
|
||||
|
||||
@@ -302,6 +312,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
* Output: \verbinclude PartialRedux_maxCoeff.out
|
||||
*
|
||||
* \sa DenseBase::maxCoeff() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const MaxCoeffReturnType maxCoeff() const
|
||||
{ return MaxCoeffReturnType(_expression()); }
|
||||
|
||||
@@ -313,6 +324,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
* Output: \verbinclude PartialRedux_squaredNorm.out
|
||||
*
|
||||
* \sa DenseBase::squaredNorm() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const SquaredNormReturnType squaredNorm() const
|
||||
{ return SquaredNormReturnType(_expression()); }
|
||||
|
||||
@@ -324,6 +336,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
* Output: \verbinclude PartialRedux_norm.out
|
||||
*
|
||||
* \sa DenseBase::norm() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const NormReturnType norm() const
|
||||
{ return NormReturnType(_expression()); }
|
||||
|
||||
@@ -334,6 +347,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
* This is a vector with real entries, even if the original matrix has complex entries.
|
||||
*
|
||||
* \sa DenseBase::blueNorm() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const BlueNormReturnType blueNorm() const
|
||||
{ return BlueNormReturnType(_expression()); }
|
||||
|
||||
@@ -344,6 +358,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
* This is a vector with real entries, even if the original matrix has complex entries.
|
||||
*
|
||||
* \sa DenseBase::stableNorm() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const StableNormReturnType stableNorm() const
|
||||
{ return StableNormReturnType(_expression()); }
|
||||
|
||||
@@ -354,6 +369,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
* This is a vector with real entries, even if the original matrix has complex entries.
|
||||
*
|
||||
* \sa DenseBase::hypotNorm() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const HypotNormReturnType hypotNorm() const
|
||||
{ return HypotNormReturnType(_expression()); }
|
||||
|
||||
@@ -364,6 +380,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
* Output: \verbinclude PartialRedux_sum.out
|
||||
*
|
||||
* \sa DenseBase::sum() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const SumReturnType sum() const
|
||||
{ return SumReturnType(_expression()); }
|
||||
|
||||
@@ -371,6 +388,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
* of each column (or row) of the referenced expression.
|
||||
*
|
||||
* \sa DenseBase::mean() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const MeanReturnType mean() const
|
||||
{ return MeanReturnType(_expression()); }
|
||||
|
||||
@@ -379,6 +397,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
* This expression can be assigned to a vector with entries of type \c bool.
|
||||
*
|
||||
* \sa DenseBase::all() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const AllReturnType all() const
|
||||
{ return AllReturnType(_expression()); }
|
||||
|
||||
@@ -387,6 +406,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
* This expression can be assigned to a vector with entries of type \c bool.
|
||||
*
|
||||
* \sa DenseBase::any() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const AnyReturnType any() const
|
||||
{ return Any(_expression()); }
|
||||
|
||||
@@ -399,6 +419,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
* Output: \verbinclude PartialRedux_count.out
|
||||
*
|
||||
* \sa DenseBase::count() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const CountReturnType count() const
|
||||
{ return CountReturnType(_expression()); }
|
||||
|
||||
@@ -409,6 +430,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
* Output: \verbinclude PartialRedux_prod.out
|
||||
*
|
||||
* \sa DenseBase::prod() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const ProdReturnType prod() const
|
||||
{ return ProdReturnType(_expression()); }
|
||||
|
||||
@@ -420,10 +442,12 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
* Output: \verbinclude Vectorwise_reverse.out
|
||||
*
|
||||
* \sa DenseBase::reverse() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const ReverseReturnType reverse() const
|
||||
{ return ReverseReturnType( _expression() ); }
|
||||
|
||||
typedef Replicate<ExpressionType,Direction==Vertical?Dynamic:1,Direction==Horizontal?Dynamic:1> ReplicateReturnType;
|
||||
typedef Replicate<ExpressionType,(isVertical?Dynamic:1),(isHorizontal?Dynamic:1)> ReplicateReturnType;
|
||||
EIGEN_DEVICE_FUNC
|
||||
const ReplicateReturnType replicate(Index factor) const;
|
||||
|
||||
/**
|
||||
@@ -435,17 +459,20 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
* \sa VectorwiseOp::replicate(Index), DenseBase::replicate(), class Replicate
|
||||
*/
|
||||
// NOTE implemented here because of sunstudio's compilation errors
|
||||
template<int Factor> const Replicate<ExpressionType,(IsVertical?Factor:1),(IsHorizontal?Factor:1)>
|
||||
// isVertical*Factor+isHorizontal instead of (isVertical?Factor:1) to handle CUDA bug with ternary operator
|
||||
template<int Factor> const Replicate<ExpressionType,isVertical*Factor+isHorizontal,isHorizontal*Factor+isVertical>
|
||||
EIGEN_DEVICE_FUNC
|
||||
replicate(Index factor = Factor) const
|
||||
{
|
||||
return Replicate<ExpressionType,Direction==Vertical?Factor:1,Direction==Horizontal?Factor:1>
|
||||
(_expression(),Direction==Vertical?factor:1,Direction==Horizontal?factor:1);
|
||||
return Replicate<ExpressionType,(isVertical?Factor:1),(isHorizontal?Factor:1)>
|
||||
(_expression(),isVertical?factor:1,isHorizontal?factor:1);
|
||||
}
|
||||
|
||||
/////////// Artithmetic operators ///////////
|
||||
|
||||
/** Copies the vector \a other to each subvector of \c *this */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
ExpressionType& operator=(const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
|
||||
@@ -456,6 +483,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
|
||||
/** Adds the vector \a other to each subvector of \c *this */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
ExpressionType& operator+=(const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
|
||||
@@ -465,6 +493,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
|
||||
/** Substracts the vector \a other to each subvector of \c *this */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
ExpressionType& operator-=(const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
|
||||
@@ -474,6 +503,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
|
||||
/** Multiples each subvector of \c *this by the vector \a other */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
ExpressionType& operator*=(const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
|
||||
@@ -485,6 +515,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
|
||||
/** Divides each subvector of \c *this by the vector \a other */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
ExpressionType& operator/=(const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
|
||||
@@ -495,7 +526,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
}
|
||||
|
||||
/** Returns the expression of the sum of the vector \a other to each subvector of \c *this */
|
||||
template<typename OtherDerived> EIGEN_STRONG_INLINE
|
||||
template<typename OtherDerived> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
|
||||
CwiseBinaryOp<internal::scalar_sum_op<Scalar>,
|
||||
const ExpressionTypeNestedCleaned,
|
||||
const typename ExtendedType<OtherDerived>::Type>
|
||||
@@ -508,6 +539,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
|
||||
/** Returns the expression of the difference between each subvector of \c *this and the vector \a other */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
CwiseBinaryOp<internal::scalar_difference_op<Scalar>,
|
||||
const ExpressionTypeNestedCleaned,
|
||||
const typename ExtendedType<OtherDerived>::Type>
|
||||
@@ -520,10 +552,11 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
|
||||
/** Returns the expression where each subvector is the product of the vector \a other
|
||||
* by the corresponding subvector of \c *this */
|
||||
template<typename OtherDerived> EIGEN_STRONG_INLINE
|
||||
template<typename OtherDerived> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
|
||||
CwiseBinaryOp<internal::scalar_product_op<Scalar>,
|
||||
const ExpressionTypeNestedCleaned,
|
||||
const typename ExtendedType<OtherDerived>::Type>
|
||||
EIGEN_DEVICE_FUNC
|
||||
operator*(const DenseBase<OtherDerived>& other) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
|
||||
@@ -535,6 +568,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
/** Returns the expression where each subvector is the quotient of the corresponding
|
||||
* subvector of \c *this by the vector \a other */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
CwiseBinaryOp<internal::scalar_quotient_op<Scalar>,
|
||||
const ExpressionTypeNestedCleaned,
|
||||
const typename ExtendedType<OtherDerived>::Type>
|
||||
@@ -550,6 +584,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
* The referenced matrix is \b not modified.
|
||||
* \sa MatrixBase::normalized(), normalize()
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
CwiseBinaryOp<internal::scalar_quotient_op<Scalar>,
|
||||
const ExpressionTypeNestedCleaned,
|
||||
const typename OppositeExtendedType<typename ReturnType<internal::member_norm,RealScalar>::Type>::Type>
|
||||
@@ -559,10 +594,12 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
/** Normalize in-place each row or columns of the referenced matrix.
|
||||
* \sa MatrixBase::normalize(), normalized()
|
||||
*/
|
||||
void normalize() {
|
||||
EIGEN_DEVICE_FUNC void normalize() {
|
||||
m_matrix = this->normalized();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void reverseInPlace();
|
||||
|
||||
/////////// Geometry module ///////////
|
||||
|
||||
typedef Homogeneous<ExpressionType,Direction> HomogeneousReturnType;
|
||||
@@ -570,6 +607,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
|
||||
typedef typename ExpressionType::PlainObject CrossReturnType;
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
const CrossReturnType cross(const MatrixBase<OtherDerived>& other) const;
|
||||
|
||||
enum {
|
||||
@@ -600,19 +638,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
ExpressionTypeNested m_matrix;
|
||||
};
|
||||
|
||||
/** \returns a VectorwiseOp wrapper of *this providing additional partial reduction operations
|
||||
*
|
||||
* Example: \include MatrixBase_colwise.cpp
|
||||
* Output: \verbinclude MatrixBase_colwise.out
|
||||
*
|
||||
* \sa rowwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline const typename DenseBase<Derived>::ConstColwiseReturnType
|
||||
DenseBase<Derived>::colwise() const
|
||||
{
|
||||
return ConstColwiseReturnType(derived());
|
||||
}
|
||||
//const colwise moved to DenseBase.h due to CUDA compiler bug
|
||||
|
||||
|
||||
/** \returns a writable VectorwiseOp wrapper of *this providing additional partial reduction operations
|
||||
*
|
||||
@@ -625,19 +652,8 @@ DenseBase<Derived>::colwise()
|
||||
return ColwiseReturnType(derived());
|
||||
}
|
||||
|
||||
/** \returns a VectorwiseOp wrapper of *this providing additional partial reduction operations
|
||||
*
|
||||
* Example: \include MatrixBase_rowwise.cpp
|
||||
* Output: \verbinclude MatrixBase_rowwise.out
|
||||
*
|
||||
* \sa colwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline const typename DenseBase<Derived>::ConstRowwiseReturnType
|
||||
DenseBase<Derived>::rowwise() const
|
||||
{
|
||||
return ConstRowwiseReturnType(derived());
|
||||
}
|
||||
//const rowwise moved to DenseBase.h due to CUDA compiler bug
|
||||
|
||||
|
||||
/** \returns a writable VectorwiseOp wrapper of *this providing additional partial reduction operations
|
||||
*
|
||||
|
||||
@@ -22,6 +22,7 @@ struct visitor_impl
|
||||
row = (UnrollCount-1) % Derived::RowsAtCompileTime
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(const Derived &mat, Visitor& visitor)
|
||||
{
|
||||
visitor_impl<Visitor, Derived, UnrollCount-1>::run(mat, visitor);
|
||||
@@ -32,6 +33,7 @@ struct visitor_impl
|
||||
template<typename Visitor, typename Derived>
|
||||
struct visitor_impl<Visitor, Derived, 1>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(const Derived &mat, Visitor& visitor)
|
||||
{
|
||||
return visitor.init(mat.coeff(0, 0), 0, 0);
|
||||
@@ -41,6 +43,7 @@ struct visitor_impl<Visitor, Derived, 1>
|
||||
template<typename Visitor, typename Derived>
|
||||
struct visitor_impl<Visitor, Derived, Dynamic>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(const Derived& mat, Visitor& visitor)
|
||||
{
|
||||
visitor.init(mat.coeff(0,0), 0, 0);
|
||||
@@ -57,6 +60,7 @@ template<typename XprType>
|
||||
class visitor_evaluator
|
||||
{
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit visitor_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) {}
|
||||
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
@@ -67,15 +71,15 @@ public:
|
||||
CoeffReadCost = internal::evaluator<XprType>::CoeffReadCost
|
||||
};
|
||||
|
||||
Index rows() const { return m_xpr.rows(); }
|
||||
Index cols() const { return m_xpr.cols(); }
|
||||
Index size() const { return m_xpr.size(); }
|
||||
EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); }
|
||||
EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); }
|
||||
EIGEN_DEVICE_FUNC Index size() const { return m_xpr.size(); }
|
||||
|
||||
CoeffReturnType coeff(Index row, Index col) const
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
|
||||
{ return m_evaluator.coeff(row, col); }
|
||||
|
||||
protected:
|
||||
typename internal::evaluator<XprType>::nestedType m_evaluator;
|
||||
internal::evaluator<XprType> m_evaluator;
|
||||
const XprType &m_xpr;
|
||||
};
|
||||
} // end namespace internal
|
||||
@@ -99,6 +103,7 @@ protected:
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename Visitor>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void DenseBase<Derived>::visit(Visitor& visitor) const
|
||||
{
|
||||
typedef typename internal::visitor_evaluator<Derived> ThisEvaluator;
|
||||
@@ -125,6 +130,7 @@ struct coeff_visitor
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
Index row, col;
|
||||
Scalar res;
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void init(const Scalar& value, Index i, Index j)
|
||||
{
|
||||
res = value;
|
||||
@@ -142,6 +148,7 @@ template <typename Derived>
|
||||
struct min_coeff_visitor : coeff_visitor<Derived>
|
||||
{
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
void operator() (const Scalar& value, Index i, Index j)
|
||||
{
|
||||
if(value < this->res)
|
||||
@@ -168,7 +175,8 @@ struct functor_traits<min_coeff_visitor<Scalar> > {
|
||||
template <typename Derived>
|
||||
struct max_coeff_visitor : coeff_visitor<Derived>
|
||||
{
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
void operator() (const Scalar& value, Index i, Index j)
|
||||
{
|
||||
if(value > this->res)
|
||||
@@ -196,6 +204,7 @@ struct functor_traits<max_coeff_visitor<Scalar> > {
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename IndexType>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename internal::traits<Derived>::Scalar
|
||||
DenseBase<Derived>::minCoeff(IndexType* rowId, IndexType* colId) const
|
||||
{
|
||||
@@ -213,6 +222,7 @@ DenseBase<Derived>::minCoeff(IndexType* rowId, IndexType* colId) const
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename IndexType>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename internal::traits<Derived>::Scalar
|
||||
DenseBase<Derived>::minCoeff(IndexType* index) const
|
||||
{
|
||||
@@ -230,6 +240,7 @@ DenseBase<Derived>::minCoeff(IndexType* index) const
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename IndexType>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename internal::traits<Derived>::Scalar
|
||||
DenseBase<Derived>::maxCoeff(IndexType* rowPtr, IndexType* colPtr) const
|
||||
{
|
||||
@@ -247,6 +258,7 @@ DenseBase<Derived>::maxCoeff(IndexType* rowPtr, IndexType* colPtr) const
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename IndexType>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename internal::traits<Derived>::Scalar
|
||||
DenseBase<Derived>::maxCoeff(IndexType* index) const
|
||||
{
|
||||
|
||||
@@ -45,7 +45,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct unpacket_traits<Packet4cf> { typedef std::complex<float> type; enum {size=4}; typedef Packet2cf half; };
|
||||
template<> struct unpacket_traits<Packet4cf> { typedef std::complex<float> type; enum {size=4, alignment=Aligned32}; typedef Packet2cf half; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf padd<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_add_ps(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf psub<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_sub_ps(a.v,b.v)); }
|
||||
@@ -267,7 +267,7 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct unpacket_traits<Packet2cd> { typedef std::complex<double> type; enum {size=2}; typedef Packet1cd half; };
|
||||
template<> struct unpacket_traits<Packet2cd> { typedef std::complex<double> type; enum {size=2, alignment=Aligned32}; typedef Packet1cd half; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd padd<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_add_pd(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd psub<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_sub_pd(a.v,b.v)); }
|
||||
|
||||
@@ -271,6 +271,86 @@ pexp<Packet8f>(const Packet8f& _x) {
|
||||
return pmax(pmul(y, _mm256_castsi256_ps(emm0)), _x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d
|
||||
pexp<Packet4d>(const Packet4d& _x) {
|
||||
Packet4d x = _x;
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet4d(1, 1.0);
|
||||
_EIGEN_DECLARE_CONST_Packet4d(2, 2.0);
|
||||
_EIGEN_DECLARE_CONST_Packet4d(half, 0.5);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet4d(exp_hi, 709.437);
|
||||
_EIGEN_DECLARE_CONST_Packet4d(exp_lo, -709.436139303);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet4d(cephes_LOG2EF, 1.4426950408889634073599);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet4d(cephes_exp_p0, 1.26177193074810590878e-4);
|
||||
_EIGEN_DECLARE_CONST_Packet4d(cephes_exp_p1, 3.02994407707441961300e-2);
|
||||
_EIGEN_DECLARE_CONST_Packet4d(cephes_exp_p2, 9.99999999999999999910e-1);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q0, 3.00198505138664455042e-6);
|
||||
_EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q1, 2.52448340349684104192e-3);
|
||||
_EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q2, 2.27265548208155028766e-1);
|
||||
_EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q3, 2.00000000000000000009e0);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet4d(cephes_exp_C1, 0.693145751953125);
|
||||
_EIGEN_DECLARE_CONST_Packet4d(cephes_exp_C2, 1.42860682030941723212e-6);
|
||||
_EIGEN_DECLARE_CONST_Packet4i(1023, 1023);
|
||||
|
||||
Packet4d tmp, fx;
|
||||
|
||||
// clamp x
|
||||
x = pmax(pmin(x, p4d_exp_hi), p4d_exp_lo);
|
||||
// Express exp(x) as exp(g + n*log(2)).
|
||||
fx = pmadd(p4d_cephes_LOG2EF, x, p4d_half);
|
||||
|
||||
// Get the integer modulus of log(2), i.e. the "n" described above.
|
||||
fx = _mm256_floor_pd(fx);
|
||||
|
||||
// Get the remainder modulo log(2), i.e. the "g" described above. Subtract
|
||||
// n*log(2) out in two steps, i.e. n*C1 + n*C2, C1+C2=log2 to get the last
|
||||
// digits right.
|
||||
tmp = pmul(fx, p4d_cephes_exp_C1);
|
||||
Packet4d z = pmul(fx, p4d_cephes_exp_C2);
|
||||
x = psub(x, tmp);
|
||||
x = psub(x, z);
|
||||
|
||||
Packet4d x2 = pmul(x, x);
|
||||
|
||||
// Evaluate the numerator polynomial of the rational interpolant.
|
||||
Packet4d px = p4d_cephes_exp_p0;
|
||||
px = pmadd(px, x2, p4d_cephes_exp_p1);
|
||||
px = pmadd(px, x2, p4d_cephes_exp_p2);
|
||||
px = pmul(px, x);
|
||||
|
||||
// Evaluate the denominator polynomial of the rational interpolant.
|
||||
Packet4d qx = p4d_cephes_exp_q0;
|
||||
qx = pmadd(qx, x2, p4d_cephes_exp_q1);
|
||||
qx = pmadd(qx, x2, p4d_cephes_exp_q2);
|
||||
qx = pmadd(qx, x2, p4d_cephes_exp_q3);
|
||||
|
||||
// I don't really get this bit, copied from the SSE2 routines, so...
|
||||
// TODO(gonnet): Figure out what is going on here, perhaps find a better
|
||||
// rational interpolant?
|
||||
x = _mm256_div_pd(px, psub(qx, px));
|
||||
x = pmadd(p4d_2, x, p4d_1);
|
||||
|
||||
// Build e=2^n by constructing the exponents in a 128-bit vector and
|
||||
// shifting them to where they belong in double-precision values.
|
||||
__m128i emm0 = _mm256_cvtpd_epi32(fx);
|
||||
emm0 = _mm_add_epi32(emm0, p4i_1023);
|
||||
emm0 = _mm_shuffle_epi32(emm0, _MM_SHUFFLE(3, 1, 2, 0));
|
||||
__m128i lo = _mm_slli_epi64(emm0, 52);
|
||||
__m128i hi = _mm_slli_epi64(_mm_srli_epi64(emm0, 32), 52);
|
||||
__m256i e = _mm256_insertf128_si256(_mm256_setzero_si256(), lo, 0);
|
||||
e = _mm256_insertf128_si256(e, hi, 1);
|
||||
|
||||
// Construct the result 2^n * exp(g) = e * x. The max is used to catch
|
||||
// non-finite values in the input.
|
||||
return pmax(pmul(x, Packet4d(e)), _x);
|
||||
}
|
||||
|
||||
// Functions for sqrt.
|
||||
// The EIGEN_FAST_MATH version uses the _mm_rsqrt_ps approximation and one step
|
||||
// of Newton's method, at a cost of 1-2 bits of precision as opposed to the
|
||||
@@ -300,15 +380,59 @@ psqrt<Packet8f>(const Packet8f& _x) {
|
||||
return pmul(_x, x);
|
||||
}
|
||||
#else
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8f psqrt<Packet8f>(const Packet8f& x) {
|
||||
template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet8f psqrt<Packet8f>(const Packet8f& x) {
|
||||
return _mm256_sqrt_ps(x);
|
||||
}
|
||||
#endif
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4d psqrt<Packet4d>(const Packet4d& x) {
|
||||
template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4d psqrt<Packet4d>(const Packet4d& x) {
|
||||
return _mm256_sqrt_pd(x);
|
||||
}
|
||||
#if EIGEN_FAST_MATH
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet8f prsqrt<Packet8f>(const Packet8f& _x) {
|
||||
_EIGEN_DECLARE_CONST_Packet8f_FROM_INT(inf, 0x7f800000);
|
||||
_EIGEN_DECLARE_CONST_Packet8f_FROM_INT(nan, 0x7fc00000);
|
||||
_EIGEN_DECLARE_CONST_Packet8f(one_point_five, 1.5f);
|
||||
_EIGEN_DECLARE_CONST_Packet8f(minus_half, -0.5f);
|
||||
_EIGEN_DECLARE_CONST_Packet8f_FROM_INT(flt_min, 0x00800000);
|
||||
|
||||
Packet8f neg_half = pmul(_x, p8f_minus_half);
|
||||
|
||||
// select only the inverse sqrt of positive normal inputs (denormals are
|
||||
// flushed to zero and cause infs as well).
|
||||
Packet8f le_zero_mask = _mm256_cmp_ps(_x, p8f_flt_min, _CMP_LT_OQ);
|
||||
Packet8f x = _mm256_andnot_ps(le_zero_mask, _mm256_rsqrt_ps(_x));
|
||||
|
||||
// Fill in NaNs and Infs for the negative/zero entries.
|
||||
Packet8f neg_mask = _mm256_cmp_ps(_x, _mm256_setzero_ps(), _CMP_LT_OQ);
|
||||
Packet8f zero_mask = _mm256_andnot_ps(neg_mask, le_zero_mask);
|
||||
Packet8f infs_and_nans = _mm256_or_ps(_mm256_and_ps(neg_mask, p8f_nan),
|
||||
_mm256_and_ps(zero_mask, p8f_inf));
|
||||
|
||||
// Do a single step of Newton's iteration.
|
||||
x = pmul(x, pmadd(neg_half, pmul(x, x), p8f_one_point_five));
|
||||
|
||||
// Insert NaNs and Infs in all the right places.
|
||||
return _mm256_or_ps(x, infs_and_nans);
|
||||
}
|
||||
|
||||
#else
|
||||
template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet8f prsqrt<Packet8f>(const Packet8f& x) {
|
||||
_EIGEN_DECLARE_CONST_Packet8f(one, 1.0f);
|
||||
return _mm256_div_ps(p8f_one, _mm256_sqrt_ps(x));
|
||||
}
|
||||
#endif
|
||||
|
||||
template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4d prsqrt<Packet4d>(const Packet4d& x) {
|
||||
_EIGEN_DECLARE_CONST_Packet4d(one, 1.0);
|
||||
return _mm256_div_pd(p4d_one, _mm256_sqrt_pd(x));
|
||||
}
|
||||
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
|
||||
@@ -60,11 +60,12 @@ template<> struct packet_traits<float> : default_packet_traits
|
||||
HasHalfPacket = 1,
|
||||
|
||||
HasDiv = 1,
|
||||
HasSin = 1,
|
||||
HasSin = EIGEN_FAST_MATH,
|
||||
HasCos = 0,
|
||||
HasLog = 1,
|
||||
HasExp = 1,
|
||||
HasSqrt = 1,
|
||||
HasRsqrt = 1,
|
||||
HasBlend = 1
|
||||
};
|
||||
};
|
||||
@@ -79,8 +80,9 @@ template<> struct packet_traits<double> : default_packet_traits
|
||||
HasHalfPacket = 1,
|
||||
|
||||
HasDiv = 1,
|
||||
HasExp = 0,
|
||||
HasExp = 1,
|
||||
HasSqrt = 1,
|
||||
HasRsqrt = 1,
|
||||
HasBlend = 1
|
||||
};
|
||||
};
|
||||
@@ -98,9 +100,9 @@ template<> struct packet_traits<int> : default_packet_traits
|
||||
};
|
||||
*/
|
||||
|
||||
template<> struct unpacket_traits<Packet8f> { typedef float type; typedef Packet4f half; enum {size=8}; };
|
||||
template<> struct unpacket_traits<Packet4d> { typedef double type; typedef Packet2d half; enum {size=4}; };
|
||||
template<> struct unpacket_traits<Packet8i> { typedef int type; typedef Packet4i half; enum {size=8}; };
|
||||
template<> struct unpacket_traits<Packet8f> { typedef float type; typedef Packet4f half; enum {size=8, alignment=Aligned32}; };
|
||||
template<> struct unpacket_traits<Packet4d> { typedef double type; typedef Packet2d half; enum {size=4, alignment=Aligned32}; };
|
||||
template<> struct unpacket_traits<Packet8i> { typedef int type; typedef Packet4i half; enum {size=8, alignment=Aligned32}; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pset1<Packet8f>(const float& from) { return _mm256_set1_ps(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pset1<Packet4d>(const double& from) { return _mm256_set1_pd(from); }
|
||||
@@ -109,8 +111,8 @@ template<> EIGEN_STRONG_INLINE Packet8i pset1<Packet8i>(const int& from) { re
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pload1<Packet8f>(const float* from) { return _mm256_broadcast_ss(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pload1<Packet4d>(const double* from) { return _mm256_broadcast_sd(from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f plset<float>(const float& a) { return _mm256_add_ps(_mm256_set1_ps(a), _mm256_set_ps(7.0,6.0,5.0,4.0,3.0,2.0,1.0,0.0)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d plset<double>(const double& a) { return _mm256_add_pd(_mm256_set1_pd(a), _mm256_set_pd(3.0,2.0,1.0,0.0)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet8f plset<Packet8f>(const float& a) { return _mm256_add_ps(_mm256_set1_ps(a), _mm256_set_ps(7.0,6.0,5.0,4.0,3.0,2.0,1.0,0.0)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d plset<Packet4d>(const double& a) { return _mm256_add_pd(_mm256_set1_pd(a), _mm256_set_pd(3.0,2.0,1.0,0.0)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f padd<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_add_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d padd<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_add_pd(a,b); }
|
||||
@@ -432,26 +434,30 @@ struct palign_impl<Offset,Packet8f>
|
||||
if (Offset==1)
|
||||
{
|
||||
first = _mm256_blend_ps(first, second, 1);
|
||||
Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(0,3,2,1));
|
||||
first = _mm256_blend_ps(tmp, _mm256_permute2f128_ps (tmp, tmp, 1), 0x88);
|
||||
Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(0,3,2,1));
|
||||
Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
|
||||
first = _mm256_blend_ps(tmp1, tmp2, 0x88);
|
||||
}
|
||||
else if (Offset==2)
|
||||
{
|
||||
first = _mm256_blend_ps(first, second, 3);
|
||||
Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(1,0,3,2));
|
||||
first = _mm256_blend_ps(tmp, _mm256_permute2f128_ps (tmp, tmp, 1), 0xcc);
|
||||
Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(1,0,3,2));
|
||||
Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
|
||||
first = _mm256_blend_ps(tmp1, tmp2, 0xcc);
|
||||
}
|
||||
else if (Offset==3)
|
||||
{
|
||||
first = _mm256_blend_ps(first, second, 7);
|
||||
Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(2,1,0,3));
|
||||
first = _mm256_blend_ps(tmp, _mm256_permute2f128_ps (tmp, tmp, 1), 0xee);
|
||||
Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(2,1,0,3));
|
||||
Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
|
||||
first = _mm256_blend_ps(tmp1, tmp2, 0xee);
|
||||
}
|
||||
else if (Offset==4)
|
||||
{
|
||||
first = _mm256_blend_ps(first, second, 15);
|
||||
Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(3,2,1,0));
|
||||
first = _mm256_permute_ps(_mm256_permute2f128_ps (tmp, tmp, 1), _MM_SHUFFLE(3,2,1,0));
|
||||
Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(3,2,1,0));
|
||||
Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
|
||||
first = _mm256_permute_ps(tmp2, _MM_SHUFFLE(3,2,1,0));
|
||||
}
|
||||
else if (Offset==5)
|
||||
{
|
||||
|
||||
51
Eigen/src/Core/arch/AVX/TypeCasting.h
Normal file
51
Eigen/src/Core/arch/AVX/TypeCasting.h
Normal file
@@ -0,0 +1,51 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_TYPE_CASTING_AVX_H
|
||||
#define EIGEN_TYPE_CASTING_AVX_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
// For now we use SSE to handle integers, so we can't use AVX instructions to cast
|
||||
// from int to float
|
||||
template <>
|
||||
struct type_casting_traits<float, int> {
|
||||
enum {
|
||||
VectorizedCast = 0,
|
||||
SrcCoeffRatio = 1,
|
||||
TgtCoeffRatio = 1
|
||||
};
|
||||
};
|
||||
|
||||
template <>
|
||||
struct type_casting_traits<int, float> {
|
||||
enum {
|
||||
VectorizedCast = 0,
|
||||
SrcCoeffRatio = 1,
|
||||
TgtCoeffRatio = 1
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8i pcast<Packet8f, Packet8i>(const Packet8f& a) {
|
||||
return _mm256_cvtps_epi32(a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8i, Packet8f>(const Packet8i& a) {
|
||||
return _mm256_cvtepi32_ps(a);
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_TYPE_CASTING_AVX_H
|
||||
@@ -53,7 +53,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; typedef Packet2cf half; };
|
||||
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
|
||||
{
|
||||
@@ -275,7 +275,7 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1}; typedef Packet1cd half; };
|
||||
template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
|
||||
@@ -408,7 +408,7 @@ template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, con
|
||||
// TODO optimize it for AltiVec
|
||||
Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
|
||||
Packet2d s = vec_madd(b.v, b.v, p2d_ZERO_);
|
||||
return Packet1cd(pdiv(res.v, vec_add(s,vec_perm(s, s, p16uc_COMPLEX32_REV))));
|
||||
return Packet1cd(pdiv(res.v, vec_add(s,vec_perm(s, s, p16uc_REVERSE64))));
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
|
||||
|
||||
290
Eigen/src/Core/arch/AltiVec/MathFunctions.h
Normal file
290
Eigen/src/Core/arch/AltiVec/MathFunctions.h
Normal file
@@ -0,0 +1,290 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2007 Julien Pommier
|
||||
// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
/* The sin, cos, exp, and log functions of this file come from
|
||||
* Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
|
||||
*/
|
||||
|
||||
#ifndef EIGEN_MATH_FUNCTIONS_ALTIVEC_H
|
||||
#define EIGEN_MATH_FUNCTIONS_ALTIVEC_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4f plog<Packet4f>(const Packet4f& _x)
|
||||
{
|
||||
Packet4f x = _x;
|
||||
_EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
|
||||
_EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
|
||||
_EIGEN_DECLARE_CONST_Packet4i(23, 23);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask, ~0x7f800000);
|
||||
|
||||
/* the smallest non denormalized float number */
|
||||
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos, 0x00800000);
|
||||
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_inf, 0xff800000); // -1.f/0.f
|
||||
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_nan, 0xffffffff);
|
||||
|
||||
/* natural logarithm computed for 4 simultaneous float
|
||||
return NaN for x <= 0
|
||||
*/
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292E-2f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, - 1.1514610310E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, - 1.2420140846E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, + 1.4249322787E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, - 1.6668057665E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, + 2.0000714765E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, - 2.4999993993E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, + 3.3333331174E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375f);
|
||||
|
||||
|
||||
Packet4i emm0;
|
||||
|
||||
/* isvalid_mask is 0 if x < 0 or x is NaN. */
|
||||
Packet4ui isvalid_mask = reinterpret_cast<Packet4ui>(vec_cmpge(x, p4f_ZERO));
|
||||
Packet4ui iszero_mask = reinterpret_cast<Packet4ui>(vec_cmpeq(x, p4f_ZERO));
|
||||
|
||||
x = pmax(x, p4f_min_norm_pos); /* cut off denormalized stuff */
|
||||
emm0 = vec_sr(reinterpret_cast<Packet4i>(x),
|
||||
reinterpret_cast<Packet4ui>(p4i_23));
|
||||
|
||||
/* keep only the fractional part */
|
||||
x = pand(x, p4f_inv_mant_mask);
|
||||
x = por(x, p4f_half);
|
||||
|
||||
emm0 = psub(emm0, p4i_0x7f);
|
||||
Packet4f e = padd(vec_ctf(emm0, 0), p4f_1);
|
||||
|
||||
/* part2:
|
||||
if( x < SQRTHF ) {
|
||||
e -= 1;
|
||||
x = x + x - 1.0;
|
||||
} else { x = x - 1.0; }
|
||||
*/
|
||||
Packet4f mask = reinterpret_cast<Packet4f>(vec_cmplt(x, p4f_cephes_SQRTHF));
|
||||
Packet4f tmp = pand(x, mask);
|
||||
x = psub(x, p4f_1);
|
||||
e = psub(e, pand(p4f_1, mask));
|
||||
x = padd(x, tmp);
|
||||
|
||||
Packet4f x2 = pmul(x,x);
|
||||
Packet4f x3 = pmul(x2,x);
|
||||
|
||||
Packet4f y, y1, y2;
|
||||
y = pmadd(p4f_cephes_log_p0, x, p4f_cephes_log_p1);
|
||||
y1 = pmadd(p4f_cephes_log_p3, x, p4f_cephes_log_p4);
|
||||
y2 = pmadd(p4f_cephes_log_p6, x, p4f_cephes_log_p7);
|
||||
y = pmadd(y , x, p4f_cephes_log_p2);
|
||||
y1 = pmadd(y1, x, p4f_cephes_log_p5);
|
||||
y2 = pmadd(y2, x, p4f_cephes_log_p8);
|
||||
y = pmadd(y, x3, y1);
|
||||
y = pmadd(y, x3, y2);
|
||||
y = pmul(y, x3);
|
||||
|
||||
y1 = pmul(e, p4f_cephes_log_q1);
|
||||
tmp = pmul(x2, p4f_half);
|
||||
y = padd(y, y1);
|
||||
x = psub(x, tmp);
|
||||
y2 = pmul(e, p4f_cephes_log_q2);
|
||||
x = padd(x, y);
|
||||
x = padd(x, y2);
|
||||
// negative arg will be NAN, 0 will be -INF
|
||||
x = vec_sel(x, p4f_minus_inf, iszero_mask);
|
||||
x = vec_sel(p4f_minus_nan, x, isvalid_mask);
|
||||
return x;
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4f pexp<Packet4f>(const Packet4f& _x)
|
||||
{
|
||||
Packet4f x = _x;
|
||||
_EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
|
||||
_EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
|
||||
_EIGEN_DECLARE_CONST_Packet4i(23, 23);
|
||||
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647950f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f);
|
||||
|
||||
Packet4f tmp, fx;
|
||||
Packet4i emm0;
|
||||
|
||||
// clamp x
|
||||
x = vec_max(vec_min(x, p4f_exp_hi), p4f_exp_lo);
|
||||
|
||||
/* express exp(x) as exp(g + n*log(2)) */
|
||||
fx = pmadd(x, p4f_cephes_LOG2EF, p4f_half);
|
||||
|
||||
fx = vec_floor(fx);
|
||||
|
||||
tmp = pmul(fx, p4f_cephes_exp_C1);
|
||||
Packet4f z = pmul(fx, p4f_cephes_exp_C2);
|
||||
x = psub(x, tmp);
|
||||
x = psub(x, z);
|
||||
|
||||
z = pmul(x,x);
|
||||
|
||||
Packet4f y = p4f_cephes_exp_p0;
|
||||
y = pmadd(y, x, p4f_cephes_exp_p1);
|
||||
y = pmadd(y, x, p4f_cephes_exp_p2);
|
||||
y = pmadd(y, x, p4f_cephes_exp_p3);
|
||||
y = pmadd(y, x, p4f_cephes_exp_p4);
|
||||
y = pmadd(y, x, p4f_cephes_exp_p5);
|
||||
y = pmadd(y, z, x);
|
||||
y = padd(y, p4f_1);
|
||||
|
||||
// build 2^n
|
||||
emm0 = vec_cts(fx, 0);
|
||||
emm0 = vec_add(emm0, p4i_0x7f);
|
||||
emm0 = vec_sl(emm0, reinterpret_cast<Packet4ui>(p4i_23));
|
||||
|
||||
// Altivec's max & min operators just drop silent NaNs. Check NaNs in
|
||||
// inputs and return them unmodified.
|
||||
Packet4ui isnumber_mask = reinterpret_cast<Packet4ui>(vec_cmpeq(_x, _x));
|
||||
return vec_sel(_x, pmax(pmul(y, reinterpret_cast<Packet4f>(emm0)), _x),
|
||||
isnumber_mask);
|
||||
}
|
||||
|
||||
#ifdef __VSX__
|
||||
// VSX support varies between different compilers and even different
|
||||
// versions of the same compiler. For gcc version >= 4.9.3, we can use
|
||||
// vec_cts to efficiently convert Packet2d to Packet2l. Otherwise, use
|
||||
// a slow version that works with older compilers.
|
||||
static inline Packet2l ConvertToPacket2l(const Packet2d& x) {
|
||||
#if EIGEN_GNUC_AT_LEAST(5, 0) || \
|
||||
(EIGEN_GNUC_AT(4, 9) && __GNUC_PATCHLEVEL__ >= 3)
|
||||
return vec_cts(x, 0); // TODO: check clang version.
|
||||
#else
|
||||
double tmp[2];
|
||||
memcpy(tmp, &x, sizeof(tmp));
|
||||
Packet2l l = { static_cast<long long>(tmp[0]),
|
||||
static_cast<long long>(tmp[1]) };
|
||||
return l;
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet2d pexp<Packet2d>(const Packet2d& _x)
|
||||
{
|
||||
Packet2d x = _x;
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet2d(1 , 1.0);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(2 , 2.0);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet2d(exp_hi, 709.437);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
|
||||
|
||||
Packet2d tmp, fx;
|
||||
Packet2l emm0;
|
||||
|
||||
// clamp x
|
||||
x = pmax(pmin(x, p2d_exp_hi), p2d_exp_lo);
|
||||
/* express exp(x) as exp(g + n*log(2)) */
|
||||
fx = pmadd(p2d_cephes_LOG2EF, x, p2d_half);
|
||||
|
||||
fx = vec_floor(fx);
|
||||
|
||||
tmp = pmul(fx, p2d_cephes_exp_C1);
|
||||
Packet2d z = pmul(fx, p2d_cephes_exp_C2);
|
||||
x = psub(x, tmp);
|
||||
x = psub(x, z);
|
||||
|
||||
Packet2d x2 = pmul(x,x);
|
||||
|
||||
Packet2d px = p2d_cephes_exp_p0;
|
||||
px = pmadd(px, x2, p2d_cephes_exp_p1);
|
||||
px = pmadd(px, x2, p2d_cephes_exp_p2);
|
||||
px = pmul (px, x);
|
||||
|
||||
Packet2d qx = p2d_cephes_exp_q0;
|
||||
qx = pmadd(qx, x2, p2d_cephes_exp_q1);
|
||||
qx = pmadd(qx, x2, p2d_cephes_exp_q2);
|
||||
qx = pmadd(qx, x2, p2d_cephes_exp_q3);
|
||||
|
||||
x = pdiv(px,psub(qx,px));
|
||||
x = pmadd(p2d_2,x,p2d_1);
|
||||
|
||||
// build 2^n
|
||||
emm0 = ConvertToPacket2l(fx);
|
||||
|
||||
#ifdef __POWER8_VECTOR__
|
||||
static const Packet2l p2l_1023 = { 1023, 1023 };
|
||||
static const Packet2ul p2ul_52 = { 52, 52 };
|
||||
|
||||
emm0 = vec_add(emm0, p2l_1023);
|
||||
emm0 = vec_sl(emm0, p2ul_52);
|
||||
#else
|
||||
// Code is a bit complex for POWER7. There is actually a
|
||||
// vec_xxsldi intrinsic but it is not supported by some gcc versions.
|
||||
// So we shift (52-32) bits and do a word swap with zeros.
|
||||
_EIGEN_DECLARE_CONST_Packet4i(1023, 1023);
|
||||
_EIGEN_DECLARE_CONST_Packet4i(20, 20); // 52 - 32
|
||||
|
||||
Packet4i emm04i = reinterpret_cast<Packet4i>(emm0);
|
||||
emm04i = vec_add(emm04i, p4i_1023);
|
||||
emm04i = vec_sl(emm04i, reinterpret_cast<Packet4ui>(p4i_20));
|
||||
static const Packet16uc perm = {
|
||||
0x14, 0x15, 0x16, 0x17, 0x00, 0x01, 0x02, 0x03,
|
||||
0x1c, 0x1d, 0x1e, 0x1f, 0x08, 0x09, 0x0a, 0x0b };
|
||||
#ifdef _BIG_ENDIAN
|
||||
emm0 = reinterpret_cast<Packet2l>(vec_perm(p4i_ZERO, emm04i, perm));
|
||||
#else
|
||||
emm0 = reinterpret_cast<Packet2l>(vec_perm(emm04i, p4i_ZERO, perm));
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
// Altivec's max & min operators just drop silent NaNs. Check NaNs in
|
||||
// inputs and return them unmodified.
|
||||
Packet2ul isnumber_mask = reinterpret_cast<Packet2ul>(vec_cmpeq(_x, _x));
|
||||
return vec_sel(_x, pmax(pmul(x, reinterpret_cast<Packet2d>(emm0)), _x),
|
||||
isnumber_mask);
|
||||
}
|
||||
#endif
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_MATH_FUNCTIONS_ALTIVEC_H
|
||||
@@ -59,6 +59,9 @@ typedef __vector unsigned char Packet16uc;
|
||||
#define _EIGEN_DECLARE_CONST_Packet2l(NAME,X) \
|
||||
Packet2l p2l_##NAME = pset1<Packet2l>(X)
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
|
||||
const Packet4f p4f_##NAME = reinterpret_cast<Packet4f>(pset1<Packet4i>(X))
|
||||
|
||||
#define DST_CHAN 1
|
||||
#define DST_CTRL(size, count, stride) (((size) << 24) | ((count) << 16) | (stride))
|
||||
|
||||
@@ -66,10 +69,12 @@ typedef __vector unsigned char Packet16uc;
|
||||
// These constants are endian-agnostic
|
||||
static _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0); //{ 0.0, 0.0, 0.0, 0.0}
|
||||
static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0); //{ 0, 0, 0, 0,}
|
||||
#ifndef __VSX__
|
||||
static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE,1); //{ 1, 1, 1, 1}
|
||||
static Packet4f p4f_ONE = vec_ctf(p4i_ONE, 0); //{ 1.0, 1.0, 1.0, 1.0}
|
||||
#endif
|
||||
static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS16,-16); //{ -16, -16, -16, -16}
|
||||
static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS1,-1); //{ -1, -1, -1, -1}
|
||||
static Packet4f p4f_ONE = vec_ctf(p4i_ONE, 0); //{ 1.0, 1.0, 1.0, 1.0}
|
||||
static Packet4f p4f_ZERO_ = (Packet4f) vec_sl((Packet4ui)p4i_MINUS1, (Packet4ui)p4i_MINUS1); //{ 0x80000000, 0x80000000, 0x80000000, 0x80000000}
|
||||
|
||||
static Packet4f p4f_COUNTDOWN = { 0.0, 1.0, 2.0, 3.0 };
|
||||
@@ -130,8 +135,8 @@ template<> struct packet_traits<float> : default_packet_traits
|
||||
HasDiv = 1,
|
||||
HasSin = 0,
|
||||
HasCos = 0,
|
||||
HasLog = 0,
|
||||
HasExp = 0,
|
||||
HasLog = 1,
|
||||
HasExp = 1,
|
||||
HasSqrt = 0
|
||||
};
|
||||
};
|
||||
@@ -148,8 +153,8 @@ template<> struct packet_traits<int> : default_packet_traits
|
||||
};
|
||||
|
||||
|
||||
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}; typedef Packet4f half; };
|
||||
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; typedef Packet4i half; };
|
||||
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; };
|
||||
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
|
||||
|
||||
inline std::ostream & operator <<(std::ostream & s, const Packet16uc & v)
|
||||
{
|
||||
@@ -289,8 +294,8 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const
|
||||
to[3*stride] = ai[3];
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f plset<float>(const float& a) { return vec_add(pset1<Packet4f>(a), p4f_COUNTDOWN); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i plset<int>(const int& a) { return vec_add(pset1<Packet4i>(a), p4i_COUNTDOWN); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) { return vec_add(pset1<Packet4f>(a), p4f_COUNTDOWN); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a) { return vec_add(pset1<Packet4i>(a), p4i_COUNTDOWN); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_add(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_add(a,b); }
|
||||
@@ -751,12 +756,12 @@ template<> struct packet_traits<double> : default_packet_traits
|
||||
HasHalfPacket = 0,
|
||||
|
||||
HasDiv = 1,
|
||||
HasExp = 0,
|
||||
HasExp = 1,
|
||||
HasSqrt = 0
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2}; typedef Packet2d half; };
|
||||
template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
|
||||
|
||||
|
||||
inline std::ostream & operator <<(std::ostream & s, const Packet2d & v)
|
||||
@@ -807,7 +812,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to,
|
||||
to[0*stride] = af[0];
|
||||
to[1*stride] = af[1];
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d plset<double>(const double& a) { return vec_add(pset1<Packet2d>(a), p2d_COUNTDOWN); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) { return vec_add(pset1<Packet2d>(a), p2d_COUNTDOWN); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_add(a,b); }
|
||||
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
ADD_SUBDIRECTORY(SSE)
|
||||
ADD_SUBDIRECTORY(AltiVec)
|
||||
ADD_SUBDIRECTORY(NEON)
|
||||
ADD_SUBDIRECTORY(AVX)
|
||||
ADD_SUBDIRECTORY(CUDA)
|
||||
ADD_SUBDIRECTORY(Default)
|
||||
ADD_SUBDIRECTORY(NEON)
|
||||
ADD_SUBDIRECTORY(SSE)
|
||||
|
||||
|
||||
|
||||
|
||||
6
Eigen/src/Core/arch/CUDA/CMakeLists.txt
Normal file
6
Eigen/src/Core/arch/CUDA/CMakeLists.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
FILE(GLOB Eigen_Core_arch_CUDA_SRCS "*.h")
|
||||
|
||||
INSTALL(FILES
|
||||
${Eigen_Core_arch_CUDA_SRCS}
|
||||
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/CUDA COMPONENT Devel
|
||||
)
|
||||
@@ -18,49 +18,49 @@ namespace internal {
|
||||
// introduce conflicts between these packet_traits definitions and the ones
|
||||
// we'll use on the host side (SSE, AVX, ...)
|
||||
#if defined(__CUDACC__) && defined(EIGEN_USE_GPU)
|
||||
template<> EIGEN_STRONG_INLINE
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
float4 plog<float4>(const float4& a)
|
||||
{
|
||||
return make_float4(logf(a.x), logf(a.y), logf(a.z), logf(a.w));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
double2 plog<double2>(const double2& a)
|
||||
{
|
||||
return make_double2(log(a.x), log(a.y));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
float4 pexp<float4>(const float4& a)
|
||||
{
|
||||
return make_float4(expf(a.x), expf(a.y), expf(a.z), expf(a.w));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
double2 pexp<double2>(const double2& a)
|
||||
{
|
||||
return make_double2(exp(a.x), exp(a.y));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
float4 psqrt<float4>(const float4& a)
|
||||
{
|
||||
return make_float4(sqrtf(a.x), sqrtf(a.y), sqrtf(a.z), sqrtf(a.w));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
double2 psqrt<double2>(const double2& a)
|
||||
{
|
||||
return make_double2(sqrt(a.x), sqrt(a.y));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
float4 prsqrt<float4>(const float4& a)
|
||||
{
|
||||
return make_float4(rsqrtf(a.x), rsqrtf(a.y), rsqrtf(a.z), rsqrtf(a.w));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
double2 prsqrt<double2>(const double2& a)
|
||||
{
|
||||
return make_double2(rsqrt(a.x), rsqrt(a.y));
|
||||
|
||||
@@ -65,8 +65,8 @@ template<> struct packet_traits<double> : default_packet_traits
|
||||
};
|
||||
|
||||
|
||||
template<> struct unpacket_traits<float4> { typedef float type; enum {size=4}; typedef float4 half; };
|
||||
template<> struct unpacket_traits<double2> { typedef double type; enum {size=2}; typedef double2 half; };
|
||||
template<> struct unpacket_traits<float4> { typedef float type; enum {size=4, alignment=Aligned16}; typedef float4 half; };
|
||||
template<> struct unpacket_traits<double2> { typedef double type; enum {size=2, alignment=Aligned16}; typedef double2 half; };
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pset1<float4>(const float& from) {
|
||||
return make_float4(from, from, from, from);
|
||||
@@ -76,10 +76,10 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pset1<double2>(const do
|
||||
}
|
||||
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 plset<float>(const float& a) {
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 plset<float4>(const float& a) {
|
||||
return make_float4(a, a+1, a+2, a+3);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 plset<double>(const double& a) {
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 plset<double2>(const double& a) {
|
||||
return make_double2(a, a+1);
|
||||
}
|
||||
|
||||
@@ -197,21 +197,21 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Unaligned>(cons
|
||||
}
|
||||
#endif
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline float4 pgather<float, float4>(const float* from, int stride) {
|
||||
template<> EIGEN_DEVICE_FUNC inline float4 pgather<float, float4>(const float* from, Index stride) {
|
||||
return make_float4(from[0*stride], from[1*stride], from[2*stride], from[3*stride]);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline double2 pgather<double, double2>(const double* from, int stride) {
|
||||
template<> EIGEN_DEVICE_FUNC inline double2 pgather<double, double2>(const double* from, Index stride) {
|
||||
return make_double2(from[0*stride], from[1*stride]);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<float, float4>(float* to, const float4& from, int stride) {
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<float, float4>(float* to, const float4& from, Index stride) {
|
||||
to[stride*0] = from.x;
|
||||
to[stride*1] = from.y;
|
||||
to[stride*2] = from.z;
|
||||
to[stride*3] = from.w;
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<double, double2>(double* to, const double2& from, int stride) {
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<double, double2>(double* to, const double2& from, Index stride) {
|
||||
to[stride*0] = from.x;
|
||||
to[stride*1] = from.y;
|
||||
}
|
||||
@@ -245,14 +245,14 @@ template<> EIGEN_DEVICE_FUNC inline double predux_min<double2>(const double2& a)
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline float4 pabs<float4>(const float4& a) {
|
||||
return make_float4(fabs(a.x), fabs(a.y), fabs(a.z), fabs(a.w));
|
||||
return make_float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w));
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline double2 pabs<double2>(const double2& a) {
|
||||
return make_double2(abs(a.x), abs(a.y));
|
||||
return make_double2(fabs(a.x), fabs(a.y));
|
||||
}
|
||||
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<float4,4>& kernel) {
|
||||
double tmp = kernel.packet[0].y;
|
||||
kernel.packet[0].y = kernel.packet[1].x;
|
||||
@@ -279,7 +279,7 @@ ptranspose(PacketBlock<float4,4>& kernel) {
|
||||
kernel.packet[3].z = tmp;
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<double2,2>& kernel) {
|
||||
double tmp = kernel.packet[0].y;
|
||||
kernel.packet[0].y = kernel.packet[1].x;
|
||||
|
||||
@@ -48,7 +48,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; typedef Packet2cf half; };
|
||||
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
|
||||
{
|
||||
@@ -114,7 +114,7 @@ template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<f
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
|
||||
{
|
||||
Packet4f res;
|
||||
Packet4f res = pset1<Packet4f>(0.f);
|
||||
res = vsetq_lane_f32(std::real(from[0*stride]), res, 0);
|
||||
res = vsetq_lane_f32(std::imag(from[0*stride]), res, 1);
|
||||
res = vsetq_lane_f32(std::real(from[1*stride]), res, 2);
|
||||
@@ -272,7 +272,7 @@ ptranspose(PacketBlock<Packet2cf,2>& kernel) {
|
||||
}
|
||||
|
||||
//---------- double ----------
|
||||
#if EIGEN_ARCH_ARM64
|
||||
#if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
|
||||
|
||||
static uint64x2_t p2ul_CONJ_XOR = EIGEN_INIT_NEON_PACKET2(0x0, 0x8000000000000000);
|
||||
|
||||
@@ -306,7 +306,7 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1}; typedef Packet1cd half; };
|
||||
template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pload<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
|
||||
@@ -365,7 +365,7 @@ template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::c
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride)
|
||||
{
|
||||
Packet2d res;
|
||||
Packet2d res = pset1<Packet2d>(0.0);
|
||||
res = vsetq_lane_f64(std::real(from[0*stride]), res, 0);
|
||||
res = vsetq_lane_f64(std::imag(from[0*stride]), res, 1);
|
||||
return Packet1cd(res);
|
||||
|
||||
@@ -76,12 +76,12 @@ typedef uint32x4_t Packet4ui;
|
||||
template<> struct packet_traits<float> : default_packet_traits
|
||||
{
|
||||
typedef Packet4f type;
|
||||
typedef Packet2f half;
|
||||
typedef Packet4f half; // Packet2f intrinsics not implemented yet
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size = 4,
|
||||
HasHalfPacket=1,
|
||||
HasHalfPacket=0, // Packet2f intrinsics not implemented yet
|
||||
|
||||
HasDiv = 1,
|
||||
// FIXME check the Has*
|
||||
@@ -95,12 +95,12 @@ template<> struct packet_traits<float> : default_packet_traits
|
||||
template<> struct packet_traits<int> : default_packet_traits
|
||||
{
|
||||
typedef Packet4i type;
|
||||
typedef Packet2i half;
|
||||
typedef Packet4i half; // Packet2i intrinsics not implemented yet
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size=4,
|
||||
HasHalfPacket=1
|
||||
HasHalfPacket=0 // Packet2i intrinsics not implemented yet
|
||||
// FIXME check the Has*
|
||||
};
|
||||
};
|
||||
@@ -114,18 +114,18 @@ EIGEN_STRONG_INLINE void vst1q_f32(float* to, float32x4_t from) { ::vst1q
|
||||
EIGEN_STRONG_INLINE void vst1_f32 (float* to, float32x2_t from) { ::vst1_f32 ((float32_t*)to,from); }
|
||||
#endif
|
||||
|
||||
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}; typedef Packet4f half; };
|
||||
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; typedef Packet4i half; };
|
||||
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; };
|
||||
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return vdupq_n_f32(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return vdupq_n_s32(from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f plset<float>(const float& a)
|
||||
template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a)
|
||||
{
|
||||
Packet4f countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3);
|
||||
return vaddq_f32(pset1<Packet4f>(a), countdown);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i plset<int>(const int& a)
|
||||
template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a)
|
||||
{
|
||||
Packet4i countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3);
|
||||
return vaddq_s32(pset1<Packet4i>(a), countdown);
|
||||
@@ -252,7 +252,7 @@ template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& f
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)
|
||||
{
|
||||
Packet4f res;
|
||||
Packet4f res = pset1<Packet4f>(0.f);
|
||||
res = vsetq_lane_f32(from[0*stride], res, 0);
|
||||
res = vsetq_lane_f32(from[1*stride], res, 1);
|
||||
res = vsetq_lane_f32(from[2*stride], res, 2);
|
||||
@@ -261,7 +261,7 @@ template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const floa
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride)
|
||||
{
|
||||
Packet4i res;
|
||||
Packet4i res = pset1<Packet4i>(0);
|
||||
res = vsetq_lane_s32(from[0*stride], res, 0);
|
||||
res = vsetq_lane_s32(from[1*stride], res, 1);
|
||||
res = vsetq_lane_s32(from[2*stride], res, 2);
|
||||
@@ -309,6 +309,23 @@ template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) {
|
||||
a_hi = vget_high_s32(a_r64);
|
||||
return vcombine_s32(a_hi, a_lo);
|
||||
}
|
||||
|
||||
template<size_t offset>
|
||||
struct protate_impl<offset, Packet4f>
|
||||
{
|
||||
static Packet4f run(const Packet4f& a) {
|
||||
return vextq_f32(a, a, offset);
|
||||
}
|
||||
};
|
||||
|
||||
template<size_t offset>
|
||||
struct protate_impl<offset, Packet4i>
|
||||
{
|
||||
static Packet4i run(const Packet4i& a) {
|
||||
return vextq_s32(a, a, offset);
|
||||
}
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vabsq_f32(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vabsq_s32(a); }
|
||||
|
||||
@@ -501,7 +518,19 @@ ptranspose(PacketBlock<Packet4i,4>& kernel) {
|
||||
}
|
||||
|
||||
//---------- double ----------
|
||||
#if EIGEN_ARCH_ARM64
|
||||
|
||||
// Clang 3.5 in the iOS toolchain has an ICE triggered by NEON intrisics for double.
|
||||
// Confirmed at least with __apple_build_version__ = 6000054.
|
||||
#ifdef __apple_build_version__
|
||||
// Let's hope that by the time __apple_build_version__ hits the 601* range, the bug will be fixed.
|
||||
// https://gist.github.com/yamaya/2924292 suggests that the 3 first digits are only updated with
|
||||
// major toolchain updates.
|
||||
#define EIGEN_APPLE_DOUBLE_NEON_BUG (__apple_build_version__ < 6010000)
|
||||
#else
|
||||
#define EIGEN_APPLE_DOUBLE_NEON_BUG 0
|
||||
#endif
|
||||
|
||||
#if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
|
||||
|
||||
#if (EIGEN_COMP_GNUC_STRICT && defined(__ANDROID__)) || defined(__apple_build_version__)
|
||||
// Bug 907: workaround missing declarations of the following two functions in the ADK
|
||||
@@ -524,12 +553,12 @@ typedef float64x1_t Packet1d;
|
||||
template<> struct packet_traits<double> : default_packet_traits
|
||||
{
|
||||
typedef Packet2d type;
|
||||
typedef Packet1d half;
|
||||
typedef Packet2d half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size = 2,
|
||||
HasHalfPacket=1,
|
||||
HasHalfPacket=0,
|
||||
|
||||
HasDiv = 1,
|
||||
// FIXME check the Has*
|
||||
@@ -541,11 +570,11 @@ template<> struct packet_traits<double> : default_packet_traits
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2}; typedef Packet2d half; };
|
||||
template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return vdupq_n_f64(from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d plset<double>(const double& a)
|
||||
template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a)
|
||||
{
|
||||
Packet2d countdown = EIGEN_INIT_NEON_PACKET2(0, 1);
|
||||
return vaddq_f64(pset1<Packet2d>(a), countdown);
|
||||
@@ -608,7 +637,7 @@ template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d&
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride)
|
||||
{
|
||||
Packet2d res;
|
||||
Packet2d res = pset1<Packet2d>(0.0);
|
||||
res = vsetq_lane_f64(from[0*stride], res, 0);
|
||||
res = vsetq_lane_f64(from[1*stride], res, 1);
|
||||
return res;
|
||||
@@ -625,6 +654,14 @@ template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { retu
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) { return vcombine_f64(vget_high_f64(a), vget_low_f64(a)); }
|
||||
|
||||
template<size_t offset>
|
||||
struct protate_impl<offset, Packet2d>
|
||||
{
|
||||
static Packet2d run(const Packet2d& a) {
|
||||
return vextq_f64(a, a, offset);
|
||||
}
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vabsq_f64(a); }
|
||||
|
||||
#if EIGEN_COMP_CLANG && defined(__apple_build_version__)
|
||||
|
||||
@@ -50,7 +50,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
||||
};
|
||||
#endif
|
||||
|
||||
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; typedef Packet2cf half; };
|
||||
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_add_ps(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_sub_ps(a.v,b.v)); }
|
||||
@@ -297,7 +297,7 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
|
||||
};
|
||||
#endif
|
||||
|
||||
template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1}; typedef Packet1cd half; };
|
||||
template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_add_pd(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_sub_pd(a.v,b.v)); }
|
||||
@@ -474,7 +474,7 @@ ptranspose(PacketBlock<Packet2cf,2>& kernel) {
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) {
|
||||
__m128d result = pblend(ifPacket, _mm_castps_pd(thenPacket.v), _mm_castps_pd(elsePacket.v));
|
||||
__m128d result = pblend<Packet2d>(ifPacket, _mm_castps_pd(thenPacket.v), _mm_castps_pd(elsePacket.v));
|
||||
return Packet2cf(_mm_castpd_ps(result));
|
||||
}
|
||||
|
||||
|
||||
@@ -138,7 +138,6 @@ Packet4f pexp<Packet4f>(const Packet4f& _x)
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
fx = _mm_floor_ps(fx);
|
||||
#else
|
||||
tmp = _mm_setzero_ps();
|
||||
emm0 = _mm_cvttps_epi32(fx);
|
||||
tmp = _mm_cvtepi32_ps(emm0);
|
||||
/* if greater, substract 1 */
|
||||
@@ -207,7 +206,6 @@ Packet2d pexp<Packet2d>(const Packet2d& _x)
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
fx = _mm_floor_pd(fx);
|
||||
#else
|
||||
tmp = _mm_setzero_pd();
|
||||
emm0 = _mm_cvttpd_epi32(fx);
|
||||
tmp = _mm_cvtepi32_pd(emm0);
|
||||
/* if greater, substract 1 */
|
||||
@@ -464,11 +462,59 @@ Packet4f psqrt<Packet4f>(const Packet4f& _x)
|
||||
|
||||
#else
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f psqrt<Packet4f>(const Packet4f& x) { return _mm_sqrt_ps(x); }
|
||||
template<>EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4f psqrt<Packet4f>(const Packet4f& x) { return _mm_sqrt_ps(x); }
|
||||
|
||||
#endif
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d psqrt<Packet2d>(const Packet2d& x) { return _mm_sqrt_pd(x); }
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet2d psqrt<Packet2d>(const Packet2d& x) { return _mm_sqrt_pd(x); }
|
||||
|
||||
#if EIGEN_FAST_MATH
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4f prsqrt<Packet4f>(const Packet4f& _x) {
|
||||
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inf, 0x7f800000);
|
||||
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(nan, 0x7fc00000);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(one_point_five, 1.5f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(minus_half, -0.5f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(flt_min, 0x00800000);
|
||||
|
||||
Packet4f neg_half = pmul(_x, p4f_minus_half);
|
||||
|
||||
// select only the inverse sqrt of positive normal inputs (denormals are
|
||||
// flushed to zero and cause infs as well).
|
||||
Packet4f le_zero_mask = _mm_cmple_ps(_x, p4f_flt_min);
|
||||
Packet4f x = _mm_andnot_ps(le_zero_mask, _mm_rsqrt_ps(_x));
|
||||
|
||||
// Fill in NaNs and Infs for the negative/zero entries.
|
||||
Packet4f neg_mask = _mm_cmplt_ps(_x, _mm_setzero_ps());
|
||||
Packet4f zero_mask = _mm_andnot_ps(neg_mask, le_zero_mask);
|
||||
Packet4f infs_and_nans = _mm_or_ps(_mm_and_ps(neg_mask, p4f_nan),
|
||||
_mm_and_ps(zero_mask, p4f_inf));
|
||||
|
||||
// Do a single step of Newton's iteration.
|
||||
x = pmul(x, pmadd(neg_half, pmul(x, x), p4f_one_point_five));
|
||||
|
||||
// Insert NaNs and Infs in all the right places.
|
||||
return _mm_or_ps(x, infs_and_nans);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4f prsqrt<Packet4f>(const Packet4f& x) {
|
||||
// Unfortunately we can't use the much faster mm_rqsrt_ps since it only provides an approximation.
|
||||
return _mm_div_ps(pset1<Packet4f>(1.0f), _mm_sqrt_ps(x));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet2d prsqrt<Packet2d>(const Packet2d& x) {
|
||||
// Unfortunately we can't use the much faster mm_rqsrt_pd since it only provides an approximation.
|
||||
return _mm_div_pd(pset1<Packet2d>(1.0), _mm_sqrt_pd(x));
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
|
||||
@@ -28,13 +28,12 @@ namespace internal {
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined EIGEN_VECTORIZE_AVX && EIGEN_COMP_GNUC_STRICT
|
||||
#if (defined EIGEN_VECTORIZE_AVX) && EIGEN_COMP_GNUC_STRICT && (__GXX_ABI_VERSION < 1004)
|
||||
// With GCC's default ABI version, a __m128 or __m256 are the same types and therefore we cannot
|
||||
// have overloads for both types without linking error.
|
||||
// One solution is to increase ABI version using -fabi-version=4 (or greater).
|
||||
// To workaround this inconvenince, we rather wrap 128bit types into the following helper
|
||||
// Otherwise, we workaround this inconvenience by wrapping 128bit types into the following helper
|
||||
// structure:
|
||||
// TODO disable this wrapper if abi-versio>=4, but to detect that without asking the user to define a macro?
|
||||
template<typename T>
|
||||
struct eigen_packet_wrapper
|
||||
{
|
||||
@@ -109,6 +108,7 @@ template<> struct packet_traits<float> : default_packet_traits
|
||||
HasLog = 1,
|
||||
HasExp = 1,
|
||||
HasSqrt = 1,
|
||||
HasRsqrt = 1,
|
||||
HasBlend = 1
|
||||
};
|
||||
};
|
||||
@@ -125,6 +125,7 @@ template<> struct packet_traits<double> : default_packet_traits
|
||||
HasDiv = 1,
|
||||
HasExp = 1,
|
||||
HasSqrt = 1,
|
||||
HasRsqrt = 1,
|
||||
HasBlend = 1
|
||||
};
|
||||
};
|
||||
@@ -143,9 +144,9 @@ template<> struct packet_traits<int> : default_packet_traits
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}; typedef Packet4f half; };
|
||||
template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2}; typedef Packet2d half; };
|
||||
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; typedef Packet4i half; };
|
||||
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; };
|
||||
template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
|
||||
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
|
||||
|
||||
#if EIGEN_COMP_MSVC==1500
|
||||
// Workaround MSVC 9 internal compiler error.
|
||||
@@ -171,11 +172,9 @@ template<> EIGEN_STRONG_INLINE Packet4f pload1<Packet4f>(const float *from) {
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_VECTORIZE_AVX
|
||||
template<> EIGEN_STRONG_INLINE Packet4f plset<float>(const float& a) { return _mm_add_ps(pset1<Packet4f>(a), _mm_set_ps(3,2,1,0)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d plset<double>(const double& a) { return _mm_add_pd(pset1<Packet2d>(a),_mm_set_pd(1,0)); }
|
||||
#endif
|
||||
template<> EIGEN_STRONG_INLINE Packet4i plset<int>(const int& a) { return _mm_add_epi32(pset1<Packet4i>(a),_mm_set_epi32(3,2,1,0)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) { return _mm_add_ps(pset1<Packet4f>(a), _mm_set_ps(3,2,1,0)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) { return _mm_add_pd(pset1<Packet2d>(a),_mm_set_pd(1,0)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a) { return _mm_add_epi32(pset1<Packet4i>(a),_mm_set_epi32(3,2,1,0)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_add_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_add_pd(a,b); }
|
||||
@@ -463,6 +462,29 @@ template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a)
|
||||
template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a)
|
||||
{ return _mm_shuffle_epi32(a,0x1B); }
|
||||
|
||||
template<size_t offset>
|
||||
struct protate_impl<offset, Packet4f>
|
||||
{
|
||||
static Packet4f run(const Packet4f& a) {
|
||||
return vec4f_swizzle1(a, offset, (offset + 1) % 4, (offset + 2) % 4, (offset + 3) % 4);
|
||||
}
|
||||
};
|
||||
|
||||
template<size_t offset>
|
||||
struct protate_impl<offset, Packet4i>
|
||||
{
|
||||
static Packet4i run(const Packet4i& a) {
|
||||
return vec4i_swizzle1(a, offset, (offset + 1) % 4, (offset + 2) % 4, (offset + 3) % 4);
|
||||
}
|
||||
};
|
||||
|
||||
template<size_t offset>
|
||||
struct protate_impl<offset, Packet2d>
|
||||
{
|
||||
static Packet2d run(const Packet2d& a) {
|
||||
return vec2d_swizzle1(a, offset, (offset + 1) % 2);
|
||||
}
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a)
|
||||
{
|
||||
|
||||
77
Eigen/src/Core/arch/SSE/TypeCasting.h
Normal file
77
Eigen/src/Core/arch/SSE/TypeCasting.h
Normal file
@@ -0,0 +1,77 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_TYPE_CASTING_SSE_H
|
||||
#define EIGEN_TYPE_CASTING_SSE_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template <>
|
||||
struct type_casting_traits<float, int> {
|
||||
enum {
|
||||
VectorizedCast = 1,
|
||||
SrcCoeffRatio = 1,
|
||||
TgtCoeffRatio = 1
|
||||
};
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a) {
|
||||
return _mm_cvttps_epi32(a);
|
||||
}
|
||||
|
||||
|
||||
template <>
|
||||
struct type_casting_traits<int, float> {
|
||||
enum {
|
||||
VectorizedCast = 1,
|
||||
SrcCoeffRatio = 1,
|
||||
TgtCoeffRatio = 1
|
||||
};
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) {
|
||||
return _mm_cvtepi32_ps(a);
|
||||
}
|
||||
|
||||
|
||||
template <>
|
||||
struct type_casting_traits<double, float> {
|
||||
enum {
|
||||
VectorizedCast = 1,
|
||||
SrcCoeffRatio = 2,
|
||||
TgtCoeffRatio = 1
|
||||
};
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet2d, Packet4f>(const Packet2d& a, const Packet2d& b) {
|
||||
return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6));
|
||||
}
|
||||
|
||||
template <>
|
||||
struct type_casting_traits<float, double> {
|
||||
enum {
|
||||
VectorizedCast = 1,
|
||||
SrcCoeffRatio = 1,
|
||||
TgtCoeffRatio = 2
|
||||
};
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pcast<Packet4f, Packet2d>(const Packet4f& a) {
|
||||
// Simply discard the second half of the input
|
||||
return _mm_cvtps_pd(a);
|
||||
}
|
||||
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_TYPE_CASTING_SSE_H
|
||||
@@ -150,14 +150,6 @@ template<typename Scalar> struct swap_assign_op {
|
||||
swap(a,const_cast<Scalar&>(b));
|
||||
#endif
|
||||
}
|
||||
|
||||
template<int LhsAlignment, int RhsAlignment, typename Packet>
|
||||
EIGEN_STRONG_INLINE void swapPacket(Scalar* a, Scalar* b) const
|
||||
{
|
||||
Packet tmp = internal::ploadt<Packet,RhsAlignment>(b);
|
||||
internal::pstoret<Scalar,Packet,RhsAlignment>(b, internal::ploadt<Packet,LhsAlignment>(a));
|
||||
internal::pstoret<Scalar,Packet,LhsAlignment>(a, tmp);
|
||||
}
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<swap_assign_op<Scalar> > {
|
||||
|
||||
@@ -154,6 +154,48 @@ struct functor_traits<scalar_max_op<Scalar> > {
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functors for comparison of two scalars
|
||||
* \todo Implement packet-comparisons
|
||||
*/
|
||||
template<typename Scalar, ComparisonName cmp> struct scalar_cmp_op;
|
||||
|
||||
template<typename Scalar, ComparisonName cmp>
|
||||
struct functor_traits<scalar_cmp_op<Scalar, cmp> > {
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::AddCost,
|
||||
PacketAccess = false
|
||||
};
|
||||
};
|
||||
|
||||
template<ComparisonName Cmp, typename Scalar>
|
||||
struct result_of<scalar_cmp_op<Scalar, Cmp>(Scalar,Scalar)> {
|
||||
typedef bool type;
|
||||
};
|
||||
|
||||
|
||||
template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_EQ> {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a==b;}
|
||||
};
|
||||
template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_LT> {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a<b;}
|
||||
};
|
||||
template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_LE> {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a<=b;}
|
||||
};
|
||||
template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_UNORD> {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return !(a<=b || b<=a);}
|
||||
};
|
||||
template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_NEQ> {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a!=b;}
|
||||
};
|
||||
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the hypot of two scalars
|
||||
*
|
||||
@@ -299,7 +341,6 @@ template<> struct functor_traits<scalar_boolean_or_op> {
|
||||
*/
|
||||
template<typename Scalar>
|
||||
struct scalar_multiple_op {
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
// FIXME default copy constructors seems bugged with std::complex<>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE scalar_multiple_op(const scalar_multiple_op& other) : m_other(other.m_other) { }
|
||||
@@ -307,6 +348,7 @@ struct scalar_multiple_op {
|
||||
EIGEN_STRONG_INLINE scalar_multiple_op(const Scalar& other) : m_other(other) { }
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a * m_other; }
|
||||
template <typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
|
||||
{ return internal::pmul(a, pset1<Packet>(m_other)); }
|
||||
typename add_const_on_value_type<typename NumTraits<Scalar>::Nested>::type m_other;
|
||||
@@ -337,11 +379,11 @@ struct functor_traits<scalar_multiple2_op<Scalar1,Scalar2> >
|
||||
*/
|
||||
template<typename Scalar>
|
||||
struct scalar_quotient1_op {
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
// FIXME default copy constructors seems bugged with std::complex<>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient1_op(const scalar_quotient1_op& other) : m_other(other.m_other) { }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient1_op(const Scalar& other) : m_other(other) {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a / m_other; }
|
||||
template <typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
|
||||
{ return internal::pdiv(a, pset1<Packet>(m_other)); }
|
||||
typename add_const_on_value_type<typename NumTraits<Scalar>::Nested>::type m_other;
|
||||
@@ -350,6 +392,18 @@ template<typename Scalar>
|
||||
struct functor_traits<scalar_quotient1_op<Scalar> >
|
||||
{ enum { Cost = 2 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasDiv }; };
|
||||
|
||||
template<typename Scalar1, typename Scalar2>
|
||||
struct scalar_quotient2_op {
|
||||
typedef typename scalar_product_traits<Scalar1,Scalar2>::ReturnType result_type;
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient2_op(const scalar_quotient2_op& other) : m_other(other.m_other) { }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient2_op(const Scalar2& other) : m_other(other) { }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar1& a) const { return a / m_other; }
|
||||
typename add_const_on_value_type<typename NumTraits<Scalar2>::Nested>::type m_other;
|
||||
};
|
||||
template<typename Scalar1,typename Scalar2>
|
||||
struct functor_traits<scalar_quotient2_op<Scalar1,Scalar2> >
|
||||
{ enum { Cost = 2 * NumTraits<Scalar1>::MulCost, PacketAccess = false }; };
|
||||
|
||||
// In Eigen, any binary op (Product, CwiseBinaryOp) require the Lhs and Rhs to have the same scalar type, except for multiplication
|
||||
// where the mixing of different types is handled by scalar_product_traits
|
||||
// In particular, real * complex<real> is allowed.
|
||||
@@ -367,11 +421,11 @@ template<typename LhsScalar,typename RhsScalar> struct functor_is_product_like<s
|
||||
/* If you wonder why doing the pset1() in packetOp() is an optimization check scalar_multiple_op */
|
||||
template<typename Scalar>
|
||||
struct scalar_add_op {
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
// FIXME default copy constructors seems bugged with std::complex<>
|
||||
EIGEN_DEVICE_FUNC inline scalar_add_op(const scalar_add_op& other) : m_other(other.m_other) { }
|
||||
EIGEN_DEVICE_FUNC inline scalar_add_op(const Scalar& other) : m_other(other) { }
|
||||
EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a + m_other; }
|
||||
template <typename Packet>
|
||||
inline const Packet packetOp(const Packet& a) const
|
||||
{ return internal::padd(a, pset1<Packet>(m_other)); }
|
||||
const Scalar m_other;
|
||||
@@ -386,10 +440,10 @@ struct functor_traits<scalar_add_op<Scalar> >
|
||||
*/
|
||||
template<typename Scalar>
|
||||
struct scalar_sub_op {
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
inline scalar_sub_op(const scalar_sub_op& other) : m_other(other.m_other) { }
|
||||
inline scalar_sub_op(const Scalar& other) : m_other(other) { }
|
||||
inline Scalar operator() (const Scalar& a) const { return a - m_other; }
|
||||
template <typename Packet>
|
||||
inline const Packet packetOp(const Packet& a) const
|
||||
{ return internal::psub(a, pset1<Packet>(m_other)); }
|
||||
const Scalar m_other;
|
||||
@@ -404,10 +458,10 @@ struct functor_traits<scalar_sub_op<Scalar> >
|
||||
*/
|
||||
template<typename Scalar>
|
||||
struct scalar_rsub_op {
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
inline scalar_rsub_op(const scalar_rsub_op& other) : m_other(other.m_other) { }
|
||||
inline scalar_rsub_op(const Scalar& other) : m_other(other) { }
|
||||
inline Scalar operator() (const Scalar& a) const { return m_other - a; }
|
||||
template <typename Packet>
|
||||
inline const Packet packetOp(const Packet& a) const
|
||||
{ return internal::psub(pset1<Packet>(m_other), a); }
|
||||
const Scalar m_other;
|
||||
|
||||
@@ -16,13 +16,12 @@ namespace internal {
|
||||
|
||||
template<typename Scalar>
|
||||
struct scalar_constant_op {
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_constant_op(const scalar_constant_op& other) : m_other(other.m_other) { }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_constant_op(const Scalar& other) : m_other(other) { }
|
||||
template<typename Index>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index, Index = 0) const { return m_other; }
|
||||
template<typename Index>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(Index, Index = 0) const { return internal::pset1<Packet>(m_other); }
|
||||
template<typename Index, typename PacketType>
|
||||
EIGEN_STRONG_INLINE const PacketType packetOp(Index, Index = 0) const { return internal::pset1<PacketType>(m_other); }
|
||||
const Scalar m_other;
|
||||
};
|
||||
template<typename Scalar>
|
||||
@@ -39,7 +38,7 @@ template<typename Scalar>
|
||||
struct functor_traits<scalar_identity_op<Scalar> >
|
||||
{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = false, IsRepeatable = true }; };
|
||||
|
||||
template <typename Scalar, bool RandomAccess> struct linspaced_op_impl;
|
||||
template <typename Scalar, typename Packet, bool RandomAccess> struct linspaced_op_impl;
|
||||
|
||||
// linear access for packet ops:
|
||||
// 1) initialization
|
||||
@@ -49,15 +48,13 @@ template <typename Scalar, bool RandomAccess> struct linspaced_op_impl;
|
||||
//
|
||||
// TODO: Perhaps it's better to initialize lazily (so not in the constructor but in packetOp)
|
||||
// in order to avoid the padd() in operator() ?
|
||||
template <typename Scalar>
|
||||
struct linspaced_op_impl<Scalar,false>
|
||||
template <typename Scalar, typename Packet>
|
||||
struct linspaced_op_impl<Scalar,Packet,false>
|
||||
{
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
|
||||
linspaced_op_impl(const Scalar& low, const Scalar& step) :
|
||||
m_low(low), m_step(step),
|
||||
m_packetStep(pset1<Packet>(packet_traits<Scalar>::size*step)),
|
||||
m_base(padd(pset1<Packet>(low), pmul(pset1<Packet>(step),plset<Scalar>(-packet_traits<Scalar>::size)))) {}
|
||||
m_packetStep(pset1<Packet>(unpacket_traits<Packet>::size*step)),
|
||||
m_base(padd(pset1<Packet>(low), pmul(pset1<Packet>(step),plset<Packet>(-unpacket_traits<Packet>::size)))) {}
|
||||
|
||||
template<typename Index>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const
|
||||
@@ -78,14 +75,12 @@ struct linspaced_op_impl<Scalar,false>
|
||||
// random access for packet ops:
|
||||
// 1) each step
|
||||
// [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) )
|
||||
template <typename Scalar>
|
||||
struct linspaced_op_impl<Scalar,true>
|
||||
template <typename Scalar, typename Packet>
|
||||
struct linspaced_op_impl<Scalar,Packet,true>
|
||||
{
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
|
||||
linspaced_op_impl(const Scalar& low, const Scalar& step) :
|
||||
m_low(low), m_step(step),
|
||||
m_lowPacket(pset1<Packet>(m_low)), m_stepPacket(pset1<Packet>(m_step)), m_interPacket(plset<Scalar>(0)) {}
|
||||
m_lowPacket(pset1<Packet>(m_low)), m_stepPacket(pset1<Packet>(m_step)), m_interPacket(plset<Packet>(0)) {}
|
||||
|
||||
template<typename Index>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return m_low+i*m_step; }
|
||||
@@ -106,12 +101,11 @@ struct linspaced_op_impl<Scalar,true>
|
||||
// Forward declaration (we default to random access which does not really give
|
||||
// us a speed gain when using packet access but it allows to use the functor in
|
||||
// nested expressions).
|
||||
template <typename Scalar, bool RandomAccess = true> struct linspaced_op;
|
||||
template <typename Scalar, bool RandomAccess> struct functor_traits< linspaced_op<Scalar,RandomAccess> >
|
||||
template <typename Scalar, typename PacketType, bool RandomAccess = true> struct linspaced_op;
|
||||
template <typename Scalar, typename PacketType, bool RandomAccess> struct functor_traits< linspaced_op<Scalar,PacketType,RandomAccess> >
|
||||
{ enum { Cost = 1, PacketAccess = packet_traits<Scalar>::HasSetLinear, IsRepeatable = true }; };
|
||||
template <typename Scalar, bool RandomAccess> struct linspaced_op
|
||||
template <typename Scalar, typename PacketType, bool RandomAccess> struct linspaced_op
|
||||
{
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
linspaced_op(const Scalar& low, const Scalar& high, Index num_steps) : impl((num_steps==1 ? high : low), (num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1))) {}
|
||||
|
||||
template<typename Index>
|
||||
@@ -126,12 +120,12 @@ template <typename Scalar, bool RandomAccess> struct linspaced_op
|
||||
return impl(col + row);
|
||||
}
|
||||
|
||||
template<typename Index>
|
||||
template<typename Index, typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(Index i) const { return impl.packetOp(i); }
|
||||
|
||||
// We need this function when assigning e.g. a RowVectorXd to a MatrixXd since
|
||||
// there row==0 and col is used for the actual iteration.
|
||||
template<typename Index>
|
||||
template<typename Index, typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(Index row, Index col) const
|
||||
{
|
||||
eigen_assert(col==0 || row==0);
|
||||
@@ -141,7 +135,8 @@ template <typename Scalar, bool RandomAccess> struct linspaced_op
|
||||
// This proxy object handles the actual required temporaries, the different
|
||||
// implementations (random vs. sequential access) as well as the
|
||||
// correct piping to size 2/4 packet operations.
|
||||
const linspaced_op_impl<Scalar,RandomAccess> impl;
|
||||
// TODO find a way to make the packet type configurable
|
||||
const linspaced_op_impl<Scalar,PacketType,RandomAccess> impl;
|
||||
};
|
||||
|
||||
// all functors allow linear access, except scalar_identity_op. So we fix here a quick meta
|
||||
|
||||
@@ -72,6 +72,8 @@ template<typename T>
|
||||
struct functor_traits<std::not_equal_to<T> >
|
||||
{ enum { Cost = 1, PacketAccess = false }; };
|
||||
|
||||
#if(__cplusplus < 201103L)
|
||||
// std::binder* are deprecated since c++11 and will be removed in c++17
|
||||
template<typename T>
|
||||
struct functor_traits<std::binder2nd<T> >
|
||||
{ enum { Cost = functor_traits<T>::Cost, PacketAccess = false }; };
|
||||
@@ -79,6 +81,7 @@ struct functor_traits<std::binder2nd<T> >
|
||||
template<typename T>
|
||||
struct functor_traits<std::binder1st<T> >
|
||||
{ enum { Cost = functor_traits<T>::Cost, PacketAccess = false }; };
|
||||
#endif
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::unary_negate<T> >
|
||||
|
||||
@@ -55,6 +55,34 @@ struct functor_traits<scalar_abs_op<Scalar> >
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the score of a scalar, to chose a pivot
|
||||
*
|
||||
* \sa class CwiseUnaryOp
|
||||
*/
|
||||
template<typename Scalar> struct scalar_score_coeff_op : scalar_abs_op<Scalar>
|
||||
{
|
||||
typedef void Score_is_abs;
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_score_coeff_op<Scalar> > : functor_traits<scalar_abs_op<Scalar> > {};
|
||||
|
||||
/* Avoid recomputing abs when we know the score and they are the same. Not a true Eigen functor. */
|
||||
template<typename Scalar, typename=void> struct abs_knowing_score
|
||||
{
|
||||
EIGEN_EMPTY_STRUCT_CTOR(abs_knowing_score)
|
||||
typedef typename NumTraits<Scalar>::Real result_type;
|
||||
template<typename Score>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a, const Score&) const { using std::abs; return abs(a); }
|
||||
};
|
||||
template<typename Scalar> struct abs_knowing_score<Scalar, typename scalar_score_coeff_op<Scalar>::Score_is_abs>
|
||||
{
|
||||
EIGEN_EMPTY_STRUCT_CTOR(abs_knowing_score)
|
||||
typedef typename NumTraits<Scalar>::Real result_type;
|
||||
template<typename Scal>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scal&, const result_type& a) const { return a; }
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the squared absolute value of a scalar
|
||||
*
|
||||
@@ -94,6 +122,27 @@ struct functor_traits<scalar_conjugate_op<Scalar> >
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the phase angle of a complex
|
||||
*
|
||||
* \sa class CwiseUnaryOp, Cwise::arg
|
||||
*/
|
||||
template<typename Scalar> struct scalar_arg_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_arg_op)
|
||||
typedef typename NumTraits<Scalar>::Real result_type;
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { using numext::arg; return arg(a); }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
|
||||
{ return internal::parg(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_arg_op<Scalar> >
|
||||
{
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::IsComplex ? 5 * NumTraits<Scalar>::MulCost : NumTraits<Scalar>::AddCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasArg
|
||||
};
|
||||
};
|
||||
/** \internal
|
||||
* \brief Template functor to cast a scalar to another type
|
||||
*
|
||||
@@ -182,7 +231,7 @@ struct functor_traits<scalar_imag_ref_op<Scalar> >
|
||||
template<typename Scalar> struct scalar_exp_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_exp_op)
|
||||
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::exp; return exp(a); }
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
template <typename Packet>
|
||||
inline Packet packetOp(const Packet& a) const { return internal::pexp(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
@@ -198,13 +247,28 @@ struct functor_traits<scalar_exp_op<Scalar> >
|
||||
template<typename Scalar> struct scalar_log_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_log_op)
|
||||
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::log; return log(a); }
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
template <typename Packet>
|
||||
inline Packet packetOp(const Packet& a) const { return internal::plog(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_log_op<Scalar> >
|
||||
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasLog }; };
|
||||
|
||||
/** \internal
|
||||
*
|
||||
* \brief Template functor to compute the base-10 logarithm of a scalar
|
||||
*
|
||||
* \sa class CwiseUnaryOp, Cwise::log10()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_log10_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_log10_op)
|
||||
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::log10; return log10(a); }
|
||||
template <typename Packet>
|
||||
inline Packet packetOp(const Packet& a) const { return internal::plog10(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_log10_op<Scalar> >
|
||||
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasLog10 }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the square root of a scalar
|
||||
@@ -213,7 +277,7 @@ struct functor_traits<scalar_log_op<Scalar> >
|
||||
template<typename Scalar> struct scalar_sqrt_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_sqrt_op)
|
||||
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::sqrt; return sqrt(a); }
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
template <typename Packet>
|
||||
inline Packet packetOp(const Packet& a) const { return internal::psqrt(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
@@ -224,6 +288,25 @@ struct functor_traits<scalar_sqrt_op<Scalar> >
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the reciprocal square root of a scalar
|
||||
* \sa class CwiseUnaryOp, Cwise::rsqrt()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_rsqrt_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_rsqrt_op)
|
||||
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::sqrt; return Scalar(1)/sqrt(a); }
|
||||
template <typename Packet>
|
||||
inline Packet packetOp(const Packet& a) const { return internal::prsqrt(a); }
|
||||
};
|
||||
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_rsqrt_op<Scalar> >
|
||||
{ enum {
|
||||
Cost = 5 * NumTraits<Scalar>::MulCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasRsqrt
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the cosine of a scalar
|
||||
* \sa class CwiseUnaryOp, ArrayBase::cos()
|
||||
@@ -231,7 +314,7 @@ struct functor_traits<scalar_sqrt_op<Scalar> >
|
||||
template<typename Scalar> struct scalar_cos_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_cos_op)
|
||||
EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { using std::cos; return cos(a); }
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
template <typename Packet>
|
||||
inline Packet packetOp(const Packet& a) const { return internal::pcos(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
@@ -250,7 +333,7 @@ struct functor_traits<scalar_cos_op<Scalar> >
|
||||
template<typename Scalar> struct scalar_sin_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_sin_op)
|
||||
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::sin; return sin(a); }
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
template <typename Packet>
|
||||
inline Packet packetOp(const Packet& a) const { return internal::psin(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
@@ -270,7 +353,7 @@ struct functor_traits<scalar_sin_op<Scalar> >
|
||||
template<typename Scalar> struct scalar_tan_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_tan_op)
|
||||
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::tan; return tan(a); }
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
template <typename Packet>
|
||||
inline Packet packetOp(const Packet& a) const { return internal::ptan(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
@@ -289,7 +372,7 @@ struct functor_traits<scalar_tan_op<Scalar> >
|
||||
template<typename Scalar> struct scalar_acos_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_acos_op)
|
||||
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::acos; return acos(a); }
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
template <typename Packet>
|
||||
inline Packet packetOp(const Packet& a) const { return internal::pacos(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
@@ -308,7 +391,7 @@ struct functor_traits<scalar_acos_op<Scalar> >
|
||||
template<typename Scalar> struct scalar_asin_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_asin_op)
|
||||
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::asin; return asin(a); }
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
template <typename Packet>
|
||||
inline Packet packetOp(const Packet& a) const { return internal::pasin(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
@@ -320,7 +403,6 @@ struct functor_traits<scalar_asin_op<Scalar> >
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the atan of a scalar
|
||||
* \sa class CwiseUnaryOp, ArrayBase::atan()
|
||||
@@ -328,7 +410,7 @@ struct functor_traits<scalar_asin_op<Scalar> >
|
||||
template<typename Scalar> struct scalar_atan_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_atan_op)
|
||||
inline const Scalar operator() (const Scalar& a) const { using std::atan; return atan(a); }
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
template <typename Packet>
|
||||
inline Packet packetOp(const Packet& a) const { return internal::patan(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
@@ -340,6 +422,63 @@ struct functor_traits<scalar_atan_op<Scalar> >
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the tanh of a scalar
|
||||
* \sa class CwiseUnaryOp, ArrayBase::tanh()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_tanh_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_tanh_op)
|
||||
inline const Scalar operator() (const Scalar& a) const { using std::tanh; return tanh(a); }
|
||||
template <typename Packet>
|
||||
inline Packet packetOp(const Packet& a) const { return internal::ptanh(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_tanh_op<Scalar> >
|
||||
{
|
||||
enum {
|
||||
Cost = 5 * NumTraits<Scalar>::MulCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasTanh
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the sinh of a scalar
|
||||
* \sa class CwiseUnaryOp, ArrayBase::sinh()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_sinh_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_sinh_op)
|
||||
inline const Scalar operator() (const Scalar& a) const { using std::sinh; return sinh(a); }
|
||||
template <typename Packet>
|
||||
inline Packet packetOp(const Packet& a) const { return internal::psinh(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_sinh_op<Scalar> >
|
||||
{
|
||||
enum {
|
||||
Cost = 5 * NumTraits<Scalar>::MulCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasSinh
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the cosh of a scalar
|
||||
* \sa class CwiseUnaryOp, ArrayBase::cosh()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_cosh_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_cosh_op)
|
||||
inline const Scalar operator() (const Scalar& a) const { using std::cosh; return cosh(a); }
|
||||
template <typename Packet>
|
||||
inline Packet packetOp(const Packet& a) const { return internal::pcosh(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_cosh_op<Scalar> >
|
||||
{
|
||||
enum {
|
||||
Cost = 5 * NumTraits<Scalar>::MulCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasCosh
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the inverse of a scalar
|
||||
* \sa class CwiseUnaryOp, Cwise::inverse()
|
||||
@@ -388,6 +527,134 @@ template<typename Scalar>
|
||||
struct functor_traits<scalar_cube_op<Scalar> >
|
||||
{ enum { Cost = 2*NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasMul }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the rounded value of a scalar
|
||||
* \sa class CwiseUnaryOp, ArrayBase::round()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_round_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_round_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return numext::round(a); }
|
||||
template <typename Packet>
|
||||
inline Packet packetOp(const Packet& a) const { return internal::pround(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_round_op<Scalar> >
|
||||
{
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::MulCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasRound
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the floor of a scalar
|
||||
* \sa class CwiseUnaryOp, ArrayBase::floor()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_floor_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_floor_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return numext::floor(a); }
|
||||
template <typename Packet>
|
||||
inline Packet packetOp(const Packet& a) const { return internal::pfloor(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_floor_op<Scalar> >
|
||||
{
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::MulCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasFloor
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the ceil of a scalar
|
||||
* \sa class CwiseUnaryOp, ArrayBase::ceil()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_ceil_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_ceil_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return numext::ceil(a); }
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
inline Packet packetOp(const Packet& a) const { return internal::pceil(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_ceil_op<Scalar> >
|
||||
{
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::MulCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasCeil
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute whether a scalar is NaN
|
||||
* \sa class CwiseUnaryOp, ArrayBase::isnan()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_isnan_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_isnan_op)
|
||||
typedef bool result_type;
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return (numext::isnan)(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_isnan_op<Scalar> >
|
||||
{
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::MulCost,
|
||||
PacketAccess = false
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to check whether a scalar is +/-inf
|
||||
* \sa class CwiseUnaryOp, ArrayBase::isinf()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_isinf_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_isinf_op)
|
||||
typedef bool result_type;
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return (numext::isinf)(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_isinf_op<Scalar> >
|
||||
{
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::MulCost,
|
||||
PacketAccess = false
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to check whether a scalar has a finite value
|
||||
* \sa class CwiseUnaryOp, ArrayBase::isfinite()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_isfinite_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_isfinite_op)
|
||||
typedef bool result_type;
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return (numext::isfinite)(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_isfinite_op<Scalar> >
|
||||
{
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::MulCost,
|
||||
PacketAccess = false
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the logical not of a boolean
|
||||
*
|
||||
* \sa class CwiseUnaryOp, ArrayBase::operator!
|
||||
*/
|
||||
template<typename Scalar> struct scalar_boolean_not_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_not_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a) const { return !a; }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_boolean_not_op<Scalar> > {
|
||||
enum {
|
||||
Cost = NumTraits<bool>::AddCost,
|
||||
PacketAccess = false
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
|
||||
@@ -25,21 +25,31 @@ inline std::ptrdiff_t manage_caching_sizes_helper(std::ptrdiff_t a, std::ptrdiff
|
||||
return a<=0 ? b : a;
|
||||
}
|
||||
|
||||
#if EIGEN_ARCH_i386_OR_x86_64
|
||||
const std::ptrdiff_t defaultL1CacheSize = 32*1024;
|
||||
const std::ptrdiff_t defaultL2CacheSize = 256*1024;
|
||||
const std::ptrdiff_t defaultL3CacheSize = 2*1024*1024;
|
||||
#else
|
||||
const std::ptrdiff_t defaultL1CacheSize = 16*1024;
|
||||
const std::ptrdiff_t defaultL2CacheSize = 512*1024;
|
||||
const std::ptrdiff_t defaultL3CacheSize = 512*1024;
|
||||
#endif
|
||||
|
||||
/** \internal */
|
||||
inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff_t* l2, std::ptrdiff_t* l3)
|
||||
{
|
||||
static bool m_cache_sizes_initialized = false;
|
||||
static std::ptrdiff_t m_l1CacheSize = 32*1024;
|
||||
static std::ptrdiff_t m_l2CacheSize = 256*1024;
|
||||
static std::ptrdiff_t m_l3CacheSize = 2*1024*1024;
|
||||
static std::ptrdiff_t m_l1CacheSize = 0;
|
||||
static std::ptrdiff_t m_l2CacheSize = 0;
|
||||
static std::ptrdiff_t m_l3CacheSize = 0;
|
||||
|
||||
if(!m_cache_sizes_initialized)
|
||||
{
|
||||
int l1CacheSize, l2CacheSize, l3CacheSize;
|
||||
queryCacheSizes(l1CacheSize, l2CacheSize, l3CacheSize);
|
||||
m_l1CacheSize = manage_caching_sizes_helper(l1CacheSize, 8*1024);
|
||||
m_l2CacheSize = manage_caching_sizes_helper(l2CacheSize, 256*1024);
|
||||
m_l3CacheSize = manage_caching_sizes_helper(l3CacheSize, 8*1024*1024);
|
||||
m_l1CacheSize = manage_caching_sizes_helper(l1CacheSize, defaultL1CacheSize);
|
||||
m_l2CacheSize = manage_caching_sizes_helper(l2CacheSize, defaultL2CacheSize);
|
||||
m_l3CacheSize = manage_caching_sizes_helper(l3CacheSize, defaultL3CacheSize);
|
||||
m_cache_sizes_initialized = true;
|
||||
}
|
||||
|
||||
@@ -64,6 +74,211 @@ inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff
|
||||
}
|
||||
}
|
||||
|
||||
/* Helper for computeProductBlockingSizes.
|
||||
*
|
||||
* Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar,
|
||||
* this function computes the blocking size parameters along the respective dimensions
|
||||
* for matrix products and related algorithms. The blocking sizes depends on various
|
||||
* parameters:
|
||||
* - the L1 and L2 cache sizes,
|
||||
* - the register level blocking sizes defined by gebp_traits,
|
||||
* - the number of scalars that fit into a packet (when vectorization is enabled).
|
||||
*
|
||||
* \sa setCpuCacheSizes */
|
||||
|
||||
template<typename LhsScalar, typename RhsScalar, int KcFactor>
|
||||
void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index num_threads = 1)
|
||||
{
|
||||
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
|
||||
|
||||
// Explanations:
|
||||
// Let's recall that the product algorithms form mc x kc vertical panels A' on the lhs and
|
||||
// kc x nc blocks B' on the rhs. B' has to fit into L2/L3 cache. Moreover, A' is processed
|
||||
// per mr x kc horizontal small panels where mr is the blocking size along the m dimension
|
||||
// at the register level. This small horizontal panel has to stay within L1 cache.
|
||||
std::ptrdiff_t l1, l2, l3;
|
||||
manage_caching_sizes(GetAction, &l1, &l2, &l3);
|
||||
|
||||
if (num_threads > 1) {
|
||||
typedef typename Traits::ResScalar ResScalar;
|
||||
enum {
|
||||
kdiv = KcFactor * (Traits::mr * sizeof(LhsScalar) + Traits::nr * sizeof(RhsScalar)),
|
||||
ksub = Traits::mr * Traits::nr * sizeof(ResScalar),
|
||||
k_mask = -8,
|
||||
|
||||
mr = Traits::mr,
|
||||
mr_mask = -mr,
|
||||
|
||||
nr = Traits::nr,
|
||||
nr_mask = -nr
|
||||
};
|
||||
// Increasing k gives us more time to prefetch the content of the "C"
|
||||
// registers. However once the latency is hidden there is no point in
|
||||
// increasing the value of k, so we'll cap it at 320 (value determined
|
||||
// experimentally).
|
||||
const Index k_cache = (std::min<Index>)((l1-ksub)/kdiv, 320);
|
||||
if (k_cache < k) {
|
||||
k = k_cache & k_mask;
|
||||
eigen_internal_assert(k > 0);
|
||||
}
|
||||
|
||||
const Index n_cache = (l2-l1) / (nr * sizeof(RhsScalar) * k);
|
||||
const Index n_per_thread = numext::div_ceil(n, num_threads);
|
||||
if (n_cache <= n_per_thread) {
|
||||
// Don't exceed the capacity of the l2 cache.
|
||||
eigen_internal_assert(n_cache >= static_cast<Index>(nr));
|
||||
n = n_cache & nr_mask;
|
||||
eigen_internal_assert(n > 0);
|
||||
} else {
|
||||
n = (std::min<Index>)(n, (n_per_thread + nr - 1) & nr_mask);
|
||||
}
|
||||
|
||||
if (l3 > l2) {
|
||||
// l3 is shared between all cores, so we'll give each thread its own chunk of l3.
|
||||
const Index m_cache = (l3-l2) / (sizeof(LhsScalar) * k * num_threads);
|
||||
const Index m_per_thread = numext::div_ceil(m, num_threads);
|
||||
if(m_cache < m_per_thread && m_cache >= static_cast<Index>(mr)) {
|
||||
m = m_cache & mr_mask;
|
||||
eigen_internal_assert(m > 0);
|
||||
} else {
|
||||
m = (std::min<Index>)(m, (m_per_thread + mr - 1) & mr_mask);
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
// In unit tests we do not want to use extra large matrices,
|
||||
// so we reduce the cache size to check the blocking strategy is not flawed
|
||||
#ifdef EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS
|
||||
l1 = 9*1024;
|
||||
l2 = 32*1024;
|
||||
l3 = 512*1024;
|
||||
#endif
|
||||
|
||||
// Early return for small problems because the computation below are time consuming for small problems.
|
||||
// Perhaps it would make more sense to consider k*n*m??
|
||||
// Note that for very tiny problem, this function should be bypassed anyway
|
||||
// because we use the coefficient-based implementation for them.
|
||||
if((std::max)(k,(std::max)(m,n))<48)
|
||||
return;
|
||||
|
||||
typedef typename Traits::ResScalar ResScalar;
|
||||
enum {
|
||||
k_peeling = 8,
|
||||
k_div = KcFactor * (Traits::mr * sizeof(LhsScalar) + Traits::nr * sizeof(RhsScalar)),
|
||||
k_sub = Traits::mr * Traits::nr * sizeof(ResScalar)
|
||||
};
|
||||
|
||||
// ---- 1st level of blocking on L1, yields kc ----
|
||||
|
||||
// Blocking on the third dimension (i.e., k) is chosen so that an horizontal panel
|
||||
// of size mr x kc of the lhs plus a vertical panel of kc x nr of the rhs both fits within L1 cache.
|
||||
// We also include a register-level block of the result (mx x nr).
|
||||
// (In an ideal world only the lhs panel would stay in L1)
|
||||
// Moreover, kc has to be a multiple of 8 to be compatible with loop peeling, leading to a maximum blocking size of:
|
||||
const Index max_kc = ((l1-k_sub)/k_div) & (~(k_peeling-1));
|
||||
const Index old_k = k;
|
||||
if(k>max_kc)
|
||||
{
|
||||
// We are really blocking on the third dimension:
|
||||
// -> reduce blocking size to make sure the last block is as large as possible
|
||||
// while keeping the same number of sweeps over the result.
|
||||
k = (k%max_kc)==0 ? max_kc
|
||||
: max_kc - k_peeling * ((max_kc-1-(k%max_kc))/(k_peeling*(k/max_kc+1)));
|
||||
|
||||
eigen_internal_assert(((old_k/k) == (old_k/max_kc)) && "the number of sweeps has to remain the same");
|
||||
}
|
||||
|
||||
// ---- 2nd level of blocking on max(L2,L3), yields nc ----
|
||||
|
||||
// TODO find a reliable way to get the actual amount of cache per core to use for 2nd level blocking, that is:
|
||||
// actual_l2 = max(l2, l3/nb_core_sharing_l3)
|
||||
// The number below is quite conservative: it is better to underestimate the cache size rather than overestimating it)
|
||||
// For instance, it corresponds to 6MB of L3 shared among 4 cores.
|
||||
#ifdef EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS
|
||||
const Index actual_l2 = l3;
|
||||
#else
|
||||
const Index actual_l2 = 1572864; // == 1.5 MB
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
// Here, nc is chosen such that a block of kc x nc of the rhs fit within half of L2.
|
||||
// The second half is implicitly reserved to access the result and lhs coefficients.
|
||||
// When k<max_kc, then nc can arbitrarily growth. In practice, it seems to be fruitful
|
||||
// to limit this growth: we bound nc to growth by a factor x1.5.
|
||||
// However, if the entire lhs block fit within L1, then we are not going to block on the rows at all,
|
||||
// and it becomes fruitful to keep the packed rhs blocks in L1 if there is enough remaining space.
|
||||
Index max_nc;
|
||||
const Index lhs_bytes = m * k * sizeof(LhsScalar);
|
||||
const Index remaining_l1 = l1- k_sub - lhs_bytes;
|
||||
if(remaining_l1 >= Index(Traits::nr*sizeof(RhsScalar))*k)
|
||||
{
|
||||
// L1 blocking
|
||||
max_nc = remaining_l1 / (k*sizeof(RhsScalar));
|
||||
}
|
||||
else
|
||||
{
|
||||
// L2 blocking
|
||||
max_nc = (3*actual_l2)/(2*2*max_kc*sizeof(RhsScalar));
|
||||
}
|
||||
// WARNING Below, we assume that Traits::nr is a power of two.
|
||||
Index nc = std::min<Index>(actual_l2/(2*k*sizeof(RhsScalar)), max_nc) & (~(Traits::nr-1));
|
||||
if(n>nc)
|
||||
{
|
||||
// We are really blocking over the columns:
|
||||
// -> reduce blocking size to make sure the last block is as large as possible
|
||||
// while keeping the same number of sweeps over the packed lhs.
|
||||
// Here we allow one more sweep if this gives us a perfect match, thus the commented "-1"
|
||||
n = (n%nc)==0 ? nc
|
||||
: (nc - Traits::nr * ((nc/*-1*/-(n%nc))/(Traits::nr*(n/nc+1))));
|
||||
}
|
||||
else if(old_k==k)
|
||||
{
|
||||
// So far, no blocking at all, i.e., kc==k, and nc==n.
|
||||
// In this case, let's perform a blocking over the rows such that the packed lhs data is kept in cache L1/L2
|
||||
// TODO: part of this blocking strategy is now implemented within the kernel itself, so the L1-based heuristic here should be obsolete.
|
||||
Index problem_size = k*n*sizeof(LhsScalar);
|
||||
Index actual_lm = actual_l2;
|
||||
Index max_mc = m;
|
||||
if(problem_size<=1024)
|
||||
{
|
||||
// problem is small enough to keep in L1
|
||||
// Let's choose m such that lhs's block fit in 1/3 of L1
|
||||
actual_lm = l1;
|
||||
}
|
||||
else if(l3!=0 && problem_size<=32768)
|
||||
{
|
||||
// we have both L2 and L3, and problem is small enough to be kept in L2
|
||||
// Let's choose m such that lhs's block fit in 1/3 of L2
|
||||
actual_lm = l2;
|
||||
max_mc = 576;
|
||||
}
|
||||
Index mc = (std::min<Index>)(actual_lm/(3*k*sizeof(LhsScalar)), max_mc);
|
||||
if (mc > Traits::mr) mc -= mc % Traits::mr;
|
||||
else if (mc==0) return;
|
||||
m = (m%mc)==0 ? mc
|
||||
: (mc - Traits::mr * ((mc/*-1*/-(m%mc))/(Traits::mr*(m/mc+1))));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline bool useSpecificBlockingSizes(Index& k, Index& m, Index& n)
|
||||
{
|
||||
#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES
|
||||
if (EIGEN_TEST_SPECIFIC_BLOCKING_SIZES) {
|
||||
k = std::min<Index>(k, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K);
|
||||
m = std::min<Index>(m, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M);
|
||||
n = std::min<Index>(n, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N);
|
||||
return true;
|
||||
}
|
||||
#else
|
||||
EIGEN_UNUSED_VARIABLE(k)
|
||||
EIGEN_UNUSED_VARIABLE(m)
|
||||
EIGEN_UNUSED_VARIABLE(n)
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
||||
/** \brief Computes the blocking parameters for a m x k times k x n matrix product
|
||||
*
|
||||
* \param[in,out] k Input: the third dimension of the product. Output: the blocking size along the same dimension.
|
||||
@@ -72,88 +287,34 @@ inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff
|
||||
*
|
||||
* Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar,
|
||||
* this function computes the blocking size parameters along the respective dimensions
|
||||
* for matrix products and related algorithms. The blocking sizes depends on various
|
||||
* parameters:
|
||||
* - the L1 and L2 cache sizes,
|
||||
* - the register level blocking sizes defined by gebp_traits,
|
||||
* - the number of scalars that fit into a packet (when vectorization is enabled).
|
||||
* for matrix products and related algorithms.
|
||||
*
|
||||
* The blocking size parameters may be evaluated:
|
||||
* - either by a heuristic based on cache sizes;
|
||||
* - or using fixed prescribed values (for testing purposes).
|
||||
*
|
||||
* \sa setCpuCacheSizes */
|
||||
#define CEIL(a, b) ((a)+(b)-1)/(b)
|
||||
|
||||
template<typename LhsScalar, typename RhsScalar, int KcFactor, typename SizeType>
|
||||
void computeProductBlockingSizes(SizeType& k, SizeType& m, SizeType& n, int num_threads)
|
||||
template<typename LhsScalar, typename RhsScalar, int KcFactor>
|
||||
void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1)
|
||||
{
|
||||
// Explanations:
|
||||
// Let's recall the product algorithms form kc x nc horizontal panels B' on the rhs and
|
||||
// mc x kc blocks A' on the lhs. A' has to fit into L2 cache. Moreover, B' is processed
|
||||
// per kc x nr vertical small panels where nr is the blocking size along the n dimension
|
||||
// at the register level. For vectorization purpose, these small vertical panels are unpacked,
|
||||
// e.g., each coefficient is replicated to fit a packet. This small vertical panel has to
|
||||
// stay in L1 cache.
|
||||
std::ptrdiff_t l1, l2, l3;
|
||||
manage_caching_sizes(GetAction, &l1, &l2, &l3);
|
||||
|
||||
if (num_threads > 1) {
|
||||
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
|
||||
typedef typename Traits::ResScalar ResScalar;
|
||||
enum {
|
||||
kdiv = KcFactor * (Traits::mr * sizeof(LhsScalar) + Traits::nr * sizeof(RhsScalar)),
|
||||
ksub = Traits::mr * Traits::nr * sizeof(ResScalar),
|
||||
k_mask = (0xffffffff/8)*8,
|
||||
|
||||
mr = Traits::mr,
|
||||
mr_mask = (0xffffffff/mr)*mr,
|
||||
|
||||
nr = Traits::nr,
|
||||
nr_mask = (0xffffffff/nr)*nr
|
||||
};
|
||||
SizeType k_cache = (l1-ksub)/kdiv;
|
||||
if (k_cache < k) {
|
||||
k = k_cache & k_mask;
|
||||
eigen_assert(k > 0);
|
||||
}
|
||||
|
||||
SizeType n_cache = (l2-l1) / (nr * sizeof(RhsScalar) * k);
|
||||
SizeType n_per_thread = CEIL(n, num_threads);
|
||||
if (n_cache <= n_per_thread) {
|
||||
// Don't exceed the capacity of the l2 cache.
|
||||
eigen_assert(n_cache >= static_cast<SizeType>(nr));
|
||||
n = n_cache & nr_mask;
|
||||
eigen_assert(n > 0);
|
||||
} else {
|
||||
n = (std::min<SizeType>)(n, (n_per_thread + nr - 1) & nr_mask);
|
||||
}
|
||||
|
||||
if (l3 > l2) {
|
||||
// l3 is shared between all cores, so we'll give each thread its own chunk of l3.
|
||||
SizeType m_cache = (l3-l2) / (sizeof(LhsScalar) * k * num_threads);
|
||||
SizeType m_per_thread = CEIL(m, num_threads);
|
||||
if(m_cache < m_per_thread && m_cache >= static_cast<SizeType>(mr)) {
|
||||
m = m_cache & mr_mask;
|
||||
eigen_assert(m > 0);
|
||||
} else {
|
||||
m = (std::min<SizeType>)(m, (m_per_thread + mr - 1) & mr_mask);
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
// In unit tests we do not want to use extra large matrices,
|
||||
// so we reduce the block size to check the blocking strategy is not flawed
|
||||
#ifndef EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS
|
||||
k = std::min<SizeType>(k,sizeof(LhsScalar)<=4 ? 360 : 240);
|
||||
n = std::min<SizeType>(n,3840/sizeof(RhsScalar));
|
||||
m = std::min<SizeType>(m,3840/sizeof(RhsScalar));
|
||||
#else
|
||||
k = std::min<SizeType>(k,24);
|
||||
n = std::min<SizeType>(n,384/sizeof(RhsScalar));
|
||||
m = std::min<SizeType>(m,384/sizeof(RhsScalar));
|
||||
#endif
|
||||
if (!useSpecificBlockingSizes(k, m, n)) {
|
||||
evaluateProductBlockingSizesHeuristic<LhsScalar, RhsScalar, KcFactor>(k, m, n, num_threads);
|
||||
}
|
||||
|
||||
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
|
||||
enum {
|
||||
kr = 8,
|
||||
mr = Traits::mr,
|
||||
nr = Traits::nr
|
||||
};
|
||||
if (k > kr) k -= k % kr;
|
||||
if (m > mr) m -= m % mr;
|
||||
if (n > nr) n -= n % nr;
|
||||
}
|
||||
|
||||
template<typename LhsScalar, typename RhsScalar, typename SizeType>
|
||||
inline void computeProductBlockingSizes(SizeType& k, SizeType& m, SizeType& n, int num_threads)
|
||||
template<typename LhsScalar, typename RhsScalar>
|
||||
inline void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1)
|
||||
{
|
||||
computeProductBlockingSizes<LhsScalar,RhsScalar,1>(k, m, n, num_threads);
|
||||
}
|
||||
@@ -220,11 +381,14 @@ public:
|
||||
nr = 4,
|
||||
|
||||
// register block size along the M direction (currently, this one cannot be modified)
|
||||
default_mr = (EIGEN_PLAIN_ENUM_MIN(16,NumberOfRegisters)/2/nr)*LhsPacketSize,
|
||||
#if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC) && !defined(EIGEN_VECTORIZE_VSX)
|
||||
// we assume 16 registers
|
||||
mr = 3*LhsPacketSize,
|
||||
// See bug 992, if the scalar type is not vectorizable but that EIGEN_HAS_SINGLE_INSTRUCTION_MADD is defined,
|
||||
// then using 3*LhsPacketSize triggers non-implemented paths in syrk.
|
||||
mr = Vectorizable ? 3*LhsPacketSize : default_mr,
|
||||
#else
|
||||
mr = (EIGEN_PLAIN_ENUM_MIN(16,NumberOfRegisters)/2/nr)*LhsPacketSize,
|
||||
mr = default_mr,
|
||||
#endif
|
||||
|
||||
LhsProgress = LhsPacketSize,
|
||||
@@ -698,6 +862,80 @@ protected:
|
||||
conj_helper<ResPacket,ResPacket,false,ConjRhs> cj;
|
||||
};
|
||||
|
||||
// helper for the rotating kernel below
|
||||
template <typename GebpKernel, bool UseRotatingKernel = GebpKernel::UseRotatingKernel>
|
||||
struct PossiblyRotatingKernelHelper
|
||||
{
|
||||
// default implementation, not rotating
|
||||
|
||||
typedef typename GebpKernel::Traits Traits;
|
||||
typedef typename Traits::RhsScalar RhsScalar;
|
||||
typedef typename Traits::RhsPacket RhsPacket;
|
||||
typedef typename Traits::AccPacket AccPacket;
|
||||
|
||||
const Traits& traits;
|
||||
PossiblyRotatingKernelHelper(const Traits& t) : traits(t) {}
|
||||
|
||||
|
||||
template <size_t K, size_t Index>
|
||||
void loadOrRotateRhs(RhsPacket& to, const RhsScalar* from) const
|
||||
{
|
||||
traits.loadRhs(from + (Index+4*K)*Traits::RhsProgress, to);
|
||||
}
|
||||
|
||||
void unrotateResult(AccPacket&,
|
||||
AccPacket&,
|
||||
AccPacket&,
|
||||
AccPacket&)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
// rotating implementation
|
||||
template <typename GebpKernel>
|
||||
struct PossiblyRotatingKernelHelper<GebpKernel, true>
|
||||
{
|
||||
typedef typename GebpKernel::Traits Traits;
|
||||
typedef typename Traits::RhsScalar RhsScalar;
|
||||
typedef typename Traits::RhsPacket RhsPacket;
|
||||
typedef typename Traits::AccPacket AccPacket;
|
||||
|
||||
const Traits& traits;
|
||||
PossiblyRotatingKernelHelper(const Traits& t) : traits(t) {}
|
||||
|
||||
template <size_t K, size_t Index>
|
||||
void loadOrRotateRhs(RhsPacket& to, const RhsScalar* from) const
|
||||
{
|
||||
if (Index == 0) {
|
||||
to = pload<RhsPacket>(from + 4*K*Traits::RhsProgress);
|
||||
} else {
|
||||
EIGEN_ASM_COMMENT("Do not reorder code, we're very tight on registers");
|
||||
to = protate<1>(to);
|
||||
}
|
||||
}
|
||||
|
||||
void unrotateResult(AccPacket& res0,
|
||||
AccPacket& res1,
|
||||
AccPacket& res2,
|
||||
AccPacket& res3)
|
||||
{
|
||||
PacketBlock<AccPacket> resblock;
|
||||
resblock.packet[0] = res0;
|
||||
resblock.packet[1] = res1;
|
||||
resblock.packet[2] = res2;
|
||||
resblock.packet[3] = res3;
|
||||
ptranspose(resblock);
|
||||
resblock.packet[3] = protate<1>(resblock.packet[3]);
|
||||
resblock.packet[2] = protate<2>(resblock.packet[2]);
|
||||
resblock.packet[1] = protate<3>(resblock.packet[1]);
|
||||
ptranspose(resblock);
|
||||
res0 = resblock.packet[0];
|
||||
res1 = resblock.packet[1];
|
||||
res2 = resblock.packet[2];
|
||||
res3 = resblock.packet[3];
|
||||
}
|
||||
};
|
||||
|
||||
/* optimized GEneral packed Block * packed Panel product kernel
|
||||
*
|
||||
* Mixing type logic: C += A * B
|
||||
@@ -731,6 +969,16 @@ struct gebp_kernel
|
||||
ResPacketSize = Traits::ResPacketSize
|
||||
};
|
||||
|
||||
|
||||
static const bool UseRotatingKernel =
|
||||
EIGEN_ARCH_ARM &&
|
||||
internal::is_same<LhsScalar, float>::value &&
|
||||
internal::is_same<RhsScalar, float>::value &&
|
||||
internal::is_same<ResScalar, float>::value &&
|
||||
Traits::LhsPacketSize == 4 &&
|
||||
Traits::RhsPacketSize == 4 &&
|
||||
Traits::ResPacketSize == 4;
|
||||
|
||||
EIGEN_DONT_INLINE
|
||||
void operator()(const DataMapper& res, const LhsScalar* blockA, const RhsScalar* blockB,
|
||||
Index rows, Index depth, Index cols, ResScalar alpha,
|
||||
@@ -758,22 +1006,36 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
|
||||
const Index peeled_kc = depth & ~(pk-1);
|
||||
const Index prefetch_res_offset = 32/sizeof(ResScalar);
|
||||
// const Index depth2 = depth & ~1;
|
||||
|
||||
|
||||
//---------- Process 3 * LhsProgress rows at once ----------
|
||||
// This corresponds to 3*LhsProgress x nr register blocks.
|
||||
// Usually, make sense only with FMA
|
||||
if(mr>=3*Traits::LhsProgress)
|
||||
{
|
||||
// loops on each largest micro horizontal panel of lhs (3*Traits::LhsProgress x depth)
|
||||
for(Index i=0; i<peeled_mc3; i+=3*Traits::LhsProgress)
|
||||
{
|
||||
PossiblyRotatingKernelHelper<gebp_kernel> possiblyRotatingKernelHelper(traits);
|
||||
|
||||
// Here, the general idea is to loop on each largest micro horizontal panel of the lhs (3*Traits::LhsProgress x depth)
|
||||
// and on each largest micro vertical panel of the rhs (depth * nr).
|
||||
// Blocking sizes, i.e., 'depth' has been computed so that the micro horizontal panel of the lhs fit in L1.
|
||||
// However, if depth is too small, we can extend the number of rows of these horizontal panels.
|
||||
// This actual number of rows is computed as follow:
|
||||
const Index l1 = defaultL1CacheSize; // in Bytes, TODO, l1 should be passed to this function.
|
||||
// The max(1, ...) here is needed because we may be using blocking params larger than what our known l1 cache size
|
||||
// suggests we should be using: either because our known l1 cache size is inaccurate (e.g. on Android, we can only guess),
|
||||
// or because we are testing specific blocking sizes.
|
||||
const Index actual_panel_rows = (3*LhsProgress) * std::max<Index>(1,( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 3*LhsProgress) ));
|
||||
for(Index i1=0; i1<peeled_mc3; i1+=actual_panel_rows)
|
||||
{
|
||||
// loops on each largest micro vertical panel of rhs (depth * nr)
|
||||
const Index actual_panel_end = (std::min)(i1+actual_panel_rows, peeled_mc3);
|
||||
for(Index j2=0; j2<packet_cols4; j2+=nr)
|
||||
{
|
||||
// We select a 3*Traits::LhsProgress x nr micro block of res which is entirely
|
||||
for(Index i=i1; i<actual_panel_end; i+=3*LhsProgress)
|
||||
{
|
||||
|
||||
// We selected a 3*Traits::LhsProgress x nr micro block of res which is entirely
|
||||
// stored into 3 x nr registers.
|
||||
|
||||
const LhsScalar* blA = &blockA[i*strideA+offsetA*(3*Traits::LhsProgress)];
|
||||
const LhsScalar* blA = &blockA[i*strideA+offsetA*(3*LhsProgress)];
|
||||
prefetch(&blA[0]);
|
||||
|
||||
// gets res block as register
|
||||
@@ -798,50 +1060,50 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
|
||||
const RhsScalar* blB = &blockB[j2*strideB+offsetB*nr];
|
||||
prefetch(&blB[0]);
|
||||
LhsPacket A0, A1;
|
||||
|
||||
|
||||
for(Index k=0; k<peeled_kc; k+=pk)
|
||||
{
|
||||
EIGEN_ASM_COMMENT("begin gebp micro kernel 3pX4");
|
||||
RhsPacket B_0, T0;
|
||||
LhsPacket A2;
|
||||
|
||||
#define EIGEN_GEBGP_ONESTEP(K) \
|
||||
#define EIGEN_GEBP_ONESTEP(K) \
|
||||
do { \
|
||||
EIGEN_ASM_COMMENT("begin step of gebp micro kernel 3pX4"); \
|
||||
EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
|
||||
internal::prefetch(blA+(3*K+16)*LhsProgress); \
|
||||
internal::prefetch(blB+(4*K+16)*RhsProgress); /* Bug 953 */ \
|
||||
if (EIGEN_ARCH_ARM) internal::prefetch(blB+(4*K+16)*RhsProgress); /* Bug 953 */ \
|
||||
traits.loadLhs(&blA[(0+3*K)*LhsProgress], A0); \
|
||||
traits.loadLhs(&blA[(1+3*K)*LhsProgress], A1); \
|
||||
traits.loadLhs(&blA[(2+3*K)*LhsProgress], A2); \
|
||||
traits.loadRhs(&blB[(0+4*K)*RhsProgress], B_0); \
|
||||
possiblyRotatingKernelHelper.template loadOrRotateRhs<K, 0>(B_0, blB); \
|
||||
traits.madd(A0, B_0, C0, T0); \
|
||||
traits.madd(A1, B_0, C4, T0); \
|
||||
traits.madd(A2, B_0, C8, B_0); \
|
||||
traits.loadRhs(&blB[1+4*K*RhsProgress], B_0); \
|
||||
possiblyRotatingKernelHelper.template loadOrRotateRhs<K, 1>(B_0, blB); \
|
||||
traits.madd(A0, B_0, C1, T0); \
|
||||
traits.madd(A1, B_0, C5, T0); \
|
||||
traits.madd(A2, B_0, C9, B_0); \
|
||||
traits.loadRhs(&blB[2+4*K*RhsProgress], B_0); \
|
||||
possiblyRotatingKernelHelper.template loadOrRotateRhs<K, 2>(B_0, blB); \
|
||||
traits.madd(A0, B_0, C2, T0); \
|
||||
traits.madd(A1, B_0, C6, T0); \
|
||||
traits.madd(A2, B_0, C10, B_0); \
|
||||
traits.loadRhs(&blB[3+4*K*RhsProgress], B_0); \
|
||||
possiblyRotatingKernelHelper.template loadOrRotateRhs<K, 3>(B_0, blB); \
|
||||
traits.madd(A0, B_0, C3 , T0); \
|
||||
traits.madd(A1, B_0, C7, T0); \
|
||||
traits.madd(A2, B_0, C11, B_0); \
|
||||
EIGEN_ASM_COMMENT("end step of gebp micro kernel 3pX4"); \
|
||||
} while(false)
|
||||
|
||||
internal::prefetch(blB + 4 * pk * sizeof(RhsScalar)); /* Bug 953 */
|
||||
EIGEN_GEBGP_ONESTEP(0);
|
||||
EIGEN_GEBGP_ONESTEP(1);
|
||||
EIGEN_GEBGP_ONESTEP(2);
|
||||
EIGEN_GEBGP_ONESTEP(3);
|
||||
EIGEN_GEBGP_ONESTEP(4);
|
||||
EIGEN_GEBGP_ONESTEP(5);
|
||||
EIGEN_GEBGP_ONESTEP(6);
|
||||
EIGEN_GEBGP_ONESTEP(7);
|
||||
|
||||
internal::prefetch(blB);
|
||||
EIGEN_GEBP_ONESTEP(0);
|
||||
EIGEN_GEBP_ONESTEP(1);
|
||||
EIGEN_GEBP_ONESTEP(2);
|
||||
EIGEN_GEBP_ONESTEP(3);
|
||||
EIGEN_GEBP_ONESTEP(4);
|
||||
EIGEN_GEBP_ONESTEP(5);
|
||||
EIGEN_GEBP_ONESTEP(6);
|
||||
EIGEN_GEBP_ONESTEP(7);
|
||||
|
||||
blB += pk*4*RhsProgress;
|
||||
blA += pk*3*Traits::LhsProgress;
|
||||
@@ -853,12 +1115,17 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
|
||||
{
|
||||
RhsPacket B_0, T0;
|
||||
LhsPacket A2;
|
||||
EIGEN_GEBGP_ONESTEP(0);
|
||||
EIGEN_GEBP_ONESTEP(0);
|
||||
blB += 4*RhsProgress;
|
||||
blA += 3*Traits::LhsProgress;
|
||||
}
|
||||
#undef EIGEN_GEBGP_ONESTEP
|
||||
|
||||
|
||||
#undef EIGEN_GEBP_ONESTEP
|
||||
|
||||
possiblyRotatingKernelHelper.unrotateResult(C0, C1, C2, C3);
|
||||
possiblyRotatingKernelHelper.unrotateResult(C4, C5, C6, C7);
|
||||
possiblyRotatingKernelHelper.unrotateResult(C8, C9, C10, C11);
|
||||
|
||||
ResPacket R0, R1, R2;
|
||||
ResPacket alphav = pset1<ResPacket>(alpha);
|
||||
|
||||
@@ -900,12 +1167,15 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
|
||||
traits.acc(C11, alphav, R2);
|
||||
r3.storePacket(0 * Traits::ResPacketSize, R0);
|
||||
r3.storePacket(1 * Traits::ResPacketSize, R1);
|
||||
r3.storePacket(2 * Traits::ResPacketSize, R2);
|
||||
r3.storePacket(2 * Traits::ResPacketSize, R2);
|
||||
}
|
||||
}
|
||||
|
||||
// Deal with remaining columns of the rhs
|
||||
for(Index j2=packet_cols4; j2<cols; j2++)
|
||||
{
|
||||
for(Index i=i1; i<actual_panel_end; i+=3*LhsProgress)
|
||||
{
|
||||
// One column at a time
|
||||
const LhsScalar* blA = &blockA[i*strideA+offsetA*(3*Traits::LhsProgress)];
|
||||
prefetch(&blA[0]);
|
||||
@@ -976,7 +1246,8 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
|
||||
traits.acc(C8, alphav, R2);
|
||||
r0.storePacket(0 * Traits::ResPacketSize, R0);
|
||||
r0.storePacket(1 * Traits::ResPacketSize, R1);
|
||||
r0.storePacket(2 * Traits::ResPacketSize, R2);
|
||||
r0.storePacket(2 * Traits::ResPacketSize, R2);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -984,13 +1255,21 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
|
||||
//---------- Process 2 * LhsProgress rows at once ----------
|
||||
if(mr>=2*Traits::LhsProgress)
|
||||
{
|
||||
// loops on each largest micro horizontal panel of lhs (2*LhsProgress x depth)
|
||||
for(Index i=peeled_mc3; i<peeled_mc2; i+=2*LhsProgress)
|
||||
const Index l1 = defaultL1CacheSize; // in Bytes, TODO, l1 should be passed to this function.
|
||||
// The max(1, ...) here is needed because we may be using blocking params larger than what our known l1 cache size
|
||||
// suggests we should be using: either because our known l1 cache size is inaccurate (e.g. on Android, we can only guess),
|
||||
// or because we are testing specific blocking sizes.
|
||||
Index actual_panel_rows = (2*LhsProgress) * std::max<Index>(1,( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 2*LhsProgress) ));
|
||||
|
||||
for(Index i1=peeled_mc3; i1<peeled_mc2; i1+=actual_panel_rows)
|
||||
{
|
||||
// loops on each largest micro vertical panel of rhs (depth * nr)
|
||||
Index actual_panel_end = (std::min)(i1+actual_panel_rows, peeled_mc2);
|
||||
for(Index j2=0; j2<packet_cols4; j2+=nr)
|
||||
{
|
||||
// We select a 2*Traits::LhsProgress x nr micro block of res which is entirely
|
||||
for(Index i=i1; i<actual_panel_end; i+=2*LhsProgress)
|
||||
{
|
||||
|
||||
// We selected a 2*Traits::LhsProgress x nr micro block of res which is entirely
|
||||
// stored into 2 x nr registers.
|
||||
|
||||
const LhsScalar* blA = &blockA[i*strideA+offsetA*(2*Traits::LhsProgress)];
|
||||
@@ -1094,11 +1373,14 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
|
||||
r2.storePacket(1 * Traits::ResPacketSize, R1);
|
||||
r3.storePacket(0 * Traits::ResPacketSize, R2);
|
||||
r3.storePacket(1 * Traits::ResPacketSize, R3);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Deal with remaining columns of the rhs
|
||||
for(Index j2=packet_cols4; j2<cols; j2++)
|
||||
{
|
||||
for(Index i=i1; i<actual_panel_end; i+=2*LhsProgress)
|
||||
{
|
||||
// One column at a time
|
||||
const LhsScalar* blA = &blockA[i*strideA+offsetA*(2*Traits::LhsProgress)];
|
||||
prefetch(&blA[0]);
|
||||
@@ -1165,6 +1447,7 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
|
||||
traits.acc(C4, alphav, R1);
|
||||
r0.storePacket(0 * Traits::ResPacketSize, R0);
|
||||
r0.storePacket(1 * Traits::ResPacketSize, R1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1286,17 +1569,17 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
|
||||
|
||||
for(Index k=0; k<peeled_kc; k+=pk)
|
||||
{
|
||||
EIGEN_ASM_COMMENT("begin gebp micro kernel 2pX1");
|
||||
EIGEN_ASM_COMMENT("begin gebp micro kernel 1pX1");
|
||||
RhsPacket B_0;
|
||||
|
||||
#define EIGEN_GEBGP_ONESTEP(K) \
|
||||
do { \
|
||||
EIGEN_ASM_COMMENT("begin step of gebp micro kernel 2pX1"); \
|
||||
EIGEN_ASM_COMMENT("begin step of gebp micro kernel 1pX1"); \
|
||||
EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
|
||||
traits.loadLhs(&blA[(0+1*K)*LhsProgress], A0); \
|
||||
traits.loadRhs(&blB[(0+K)*RhsProgress], B_0); \
|
||||
traits.madd(A0, B_0, C0, B_0); \
|
||||
EIGEN_ASM_COMMENT("end step of gebp micro kernel 2pX1"); \
|
||||
EIGEN_ASM_COMMENT("end step of gebp micro kernel 1pX1"); \
|
||||
} while(false);
|
||||
|
||||
EIGEN_GEBGP_ONESTEP(0);
|
||||
@@ -1311,7 +1594,7 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
|
||||
blB += pk*RhsProgress;
|
||||
blA += pk*1*Traits::LhsProgress;
|
||||
|
||||
EIGEN_ASM_COMMENT("end gebp micro kernel 2pX1");
|
||||
EIGEN_ASM_COMMENT("end gebp micro kernel 1pX1");
|
||||
}
|
||||
|
||||
// process remaining peeled loop
|
||||
@@ -1783,19 +2066,19 @@ EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, DataMapper, nr, ColMajor, Co
|
||||
const LinearMapper dm3 = rhs.getLinearMapper(0, j2 + 3);
|
||||
|
||||
Index k=0;
|
||||
if((PacketSize%4)==0) // TODO enbale vectorized transposition for PacketSize==2 ??
|
||||
if((PacketSize%4)==0) // TODO enable vectorized transposition for PacketSize==2 ??
|
||||
{
|
||||
for(; k<peeled_k; k+=PacketSize) {
|
||||
PacketBlock<Packet,(PacketSize%4)==0?4:PacketSize> kernel;
|
||||
kernel.packet[0] = dm0.loadPacket(k);
|
||||
kernel.packet[1] = dm1.loadPacket(k);
|
||||
kernel.packet[2] = dm2.loadPacket(k);
|
||||
kernel.packet[3] = dm3.loadPacket(k);
|
||||
kernel.packet[1%PacketSize] = dm1.loadPacket(k);
|
||||
kernel.packet[2%PacketSize] = dm2.loadPacket(k);
|
||||
kernel.packet[3%PacketSize] = dm3.loadPacket(k);
|
||||
ptranspose(kernel);
|
||||
pstoreu(blockB+count+0*PacketSize, cj.pconj(kernel.packet[0]));
|
||||
pstoreu(blockB+count+1*PacketSize, cj.pconj(kernel.packet[1]));
|
||||
pstoreu(blockB+count+2*PacketSize, cj.pconj(kernel.packet[2]));
|
||||
pstoreu(blockB+count+3*PacketSize, cj.pconj(kernel.packet[3]));
|
||||
pstoreu(blockB+count+1*PacketSize, cj.pconj(kernel.packet[1%PacketSize]));
|
||||
pstoreu(blockB+count+2*PacketSize, cj.pconj(kernel.packet[2%PacketSize]));
|
||||
pstoreu(blockB+count+3*PacketSize, cj.pconj(kernel.packet[3%PacketSize]));
|
||||
count+=4*PacketSize;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -164,6 +164,8 @@ static void run(Index rows, Index cols, Index depth,
|
||||
|
||||
ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, blocking.blockA());
|
||||
ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, blocking.blockB());
|
||||
|
||||
const bool pack_rhs_once = mc!=rows && kc==depth && nc==cols;
|
||||
|
||||
// For each horizontal panel of the rhs, and corresponding panel of the lhs...
|
||||
for(Index i2=0; i2<rows; i2+=mc)
|
||||
@@ -188,7 +190,8 @@ static void run(Index rows, Index cols, Index depth,
|
||||
// We pack the rhs's block into a sequential chunk of memory (L2 caching)
|
||||
// Note that this block will be read a very high number of times, which is equal to the number of
|
||||
// micro horizontal panel of the large rhs's panel (e.g., rows/12 times).
|
||||
pack_rhs(blockB, rhs.getSubMapper(k2,j2), actual_kc, actual_nc);
|
||||
if((!pack_rhs_once) || i2==0)
|
||||
pack_rhs(blockB, rhs.getSubMapper(k2,j2), actual_kc, actual_nc);
|
||||
|
||||
// Everything is packed, we can now call the panel * block kernel:
|
||||
gebp(res.getSubMapper(i2, j2), blockA, blockB, actual_mc, actual_kc, actual_nc, alpha);
|
||||
@@ -201,15 +204,10 @@ static void run(Index rows, Index cols, Index depth,
|
||||
};
|
||||
|
||||
/*********************************************************************************
|
||||
* Specialization of GeneralProduct<> for "large" GEMM, i.e.,
|
||||
* Specialization of generic_product_impl for "large" GEMM, i.e.,
|
||||
* implementation of the high level wrapper to general_matrix_matrix_product
|
||||
**********************************************************************************/
|
||||
|
||||
template<typename Lhs, typename Rhs>
|
||||
struct traits<GeneralProduct<Lhs,Rhs,GemmProduct> >
|
||||
: traits<ProductBase<GeneralProduct<Lhs,Rhs,GemmProduct>, Lhs, Rhs> >
|
||||
{};
|
||||
|
||||
template<typename Scalar, typename Index, typename Gemm, typename Lhs, typename Rhs, typename Dest, typename BlockingType>
|
||||
struct gemm_functor
|
||||
{
|
||||
@@ -217,8 +215,9 @@ struct gemm_functor
|
||||
: m_lhs(lhs), m_rhs(rhs), m_dest(dest), m_actualAlpha(actualAlpha), m_blocking(blocking)
|
||||
{}
|
||||
|
||||
void initParallelSession() const
|
||||
void initParallelSession(Index num_threads) const
|
||||
{
|
||||
m_blocking.initParallel(m_lhs.rows(), m_rhs.cols(), m_lhs.cols(), num_threads);
|
||||
m_blocking.allocateA();
|
||||
}
|
||||
|
||||
@@ -276,7 +275,7 @@ class level3_blocking
|
||||
};
|
||||
|
||||
template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor>
|
||||
class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, KcFactor, true>
|
||||
class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, KcFactor, true /* == FiniteAtCompileTime */>
|
||||
: public level3_blocking<
|
||||
typename conditional<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::type,
|
||||
typename conditional<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::type>
|
||||
@@ -294,19 +293,32 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
|
||||
SizeB = ActualCols * MaxDepth
|
||||
};
|
||||
|
||||
EIGEN_ALIGN_DEFAULT LhsScalar m_staticA[SizeA];
|
||||
EIGEN_ALIGN_DEFAULT RhsScalar m_staticB[SizeB];
|
||||
#if EIGEN_MAX_STATIC_ALIGN_BYTES >= EIGEN_DEFAULT_ALIGN_BYTES
|
||||
EIGEN_ALIGN_MAX LhsScalar m_staticA[SizeA];
|
||||
EIGEN_ALIGN_MAX RhsScalar m_staticB[SizeB];
|
||||
#else
|
||||
EIGEN_ALIGN_MAX char m_staticA[SizeA * sizeof(LhsScalar) + EIGEN_DEFAULT_ALIGN_BYTES-1];
|
||||
EIGEN_ALIGN_MAX char m_staticB[SizeB * sizeof(RhsScalar) + EIGEN_DEFAULT_ALIGN_BYTES-1];
|
||||
#endif
|
||||
|
||||
public:
|
||||
|
||||
gemm_blocking_space(Index /*rows*/, Index /*cols*/, Index /*depth*/, int /*num_threads*/, bool /*full_rows = false*/)
|
||||
gemm_blocking_space(Index /*rows*/, Index /*cols*/, Index /*depth*/, Index /*num_threads*/, bool /*full_rows = false*/)
|
||||
{
|
||||
this->m_mc = ActualRows;
|
||||
this->m_nc = ActualCols;
|
||||
this->m_kc = MaxDepth;
|
||||
#if EIGEN_MAX_STATIC_ALIGN_BYTES >= EIGEN_DEFAULT_ALIGN_BYTES
|
||||
this->m_blockA = m_staticA;
|
||||
this->m_blockB = m_staticB;
|
||||
#else
|
||||
this->m_blockA = reinterpret_cast<LhsScalar*>((std::size_t(m_staticA) + (EIGEN_DEFAULT_ALIGN_BYTES-1)) & ~std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1));
|
||||
this->m_blockB = reinterpret_cast<RhsScalar*>((std::size_t(m_staticB) + (EIGEN_DEFAULT_ALIGN_BYTES-1)) & ~std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1));
|
||||
#endif
|
||||
}
|
||||
|
||||
void initParallel(Index, Index, Index, Index)
|
||||
{}
|
||||
|
||||
inline void allocateA() {}
|
||||
inline void allocateB() {}
|
||||
@@ -331,7 +343,7 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
|
||||
|
||||
public:
|
||||
|
||||
gemm_blocking_space(Index rows, Index cols, Index depth, int num_threads, bool l3_blocking)
|
||||
gemm_blocking_space(Index rows, Index cols, Index depth, Index num_threads, bool l3_blocking)
|
||||
{
|
||||
this->m_mc = Transpose ? cols : rows;
|
||||
this->m_nc = Transpose ? rows : cols;
|
||||
@@ -351,6 +363,19 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
|
||||
m_sizeA = this->m_mc * this->m_kc;
|
||||
m_sizeB = this->m_kc * this->m_nc;
|
||||
}
|
||||
|
||||
void initParallel(Index rows, Index cols, Index depth, Index num_threads)
|
||||
{
|
||||
this->m_mc = Transpose ? cols : rows;
|
||||
this->m_nc = Transpose ? rows : cols;
|
||||
this->m_kc = depth;
|
||||
|
||||
eigen_internal_assert(this->m_blockA==0 && this->m_blockB==0);
|
||||
Index m = this->m_mc;
|
||||
computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, m, this->m_nc, num_threads);
|
||||
m_sizeA = this->m_mc * this->m_kc;
|
||||
m_sizeB = this->m_kc * this->m_nc;
|
||||
}
|
||||
|
||||
void allocateA()
|
||||
{
|
||||
@@ -437,6 +462,8 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
|
||||
static void scaleAndAddTo(Dest& dst, const Lhs& a_lhs, const Rhs& a_rhs, const Scalar& alpha)
|
||||
{
|
||||
eigen_assert(dst.rows()==a_lhs.rows() && dst.cols()==a_rhs.cols());
|
||||
if(a_lhs.cols()==0 || a_lhs.rows()==0 || a_rhs.cols()==0)
|
||||
return;
|
||||
|
||||
typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
|
||||
typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);
|
||||
|
||||
@@ -125,7 +125,7 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,C
|
||||
|
||||
// How many coeffs of the result do we have to skip to be aligned.
|
||||
// Here we assume data are at least aligned on the base scalar type.
|
||||
Index alignedStart = internal::first_aligned(res,size);
|
||||
Index alignedStart = internal::first_default_aligned(res,size);
|
||||
Index alignedSize = ResPacketSize>1 ? alignedStart + ((size-alignedStart) & ~ResPacketAlignedMask) : 0;
|
||||
const Index peeledSize = alignedSize - RhsPacketSize*peels - RhsPacketSize + 1;
|
||||
|
||||
@@ -463,7 +463,8 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,R
|
||||
Index rowBound = ((rows-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows;
|
||||
for (Index i=skipRows; i<rowBound; i+=rowsAtOnce)
|
||||
{
|
||||
EIGEN_ALIGN_DEFAULT ResScalar tmp0 = ResScalar(0);
|
||||
// FIXME: what is the purpose of this EIGEN_ALIGN_DEFAULT ??
|
||||
EIGEN_ALIGN_MAX ResScalar tmp0 = ResScalar(0);
|
||||
ResScalar tmp1 = ResScalar(0), tmp2 = ResScalar(0), tmp3 = ResScalar(0);
|
||||
|
||||
// this helps the compiler generating good binary code
|
||||
@@ -572,7 +573,7 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,R
|
||||
{
|
||||
for (Index i=start; i<end; ++i)
|
||||
{
|
||||
EIGEN_ALIGN_DEFAULT ResScalar tmp0 = ResScalar(0);
|
||||
EIGEN_ALIGN_MAX ResScalar tmp0 = ResScalar(0);
|
||||
ResPacket ptmp0 = pset1<ResPacket>(tmp0);
|
||||
const LhsScalars lhs0 = lhs.getVectorMapper(i, 0);
|
||||
// process first unaligned result's coeffs
|
||||
|
||||
25
Eigen/src/Core/products/GeneralMatrixVector_MKL.h
Normal file → Executable file
25
Eigen/src/Core/products/GeneralMatrixVector_MKL.h
Normal file → Executable file
@@ -46,38 +46,37 @@ namespace internal {
|
||||
|
||||
// gemv specialization
|
||||
|
||||
template<typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
|
||||
struct general_matrix_vector_product_gemv :
|
||||
general_matrix_vector_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,ConjugateRhs,BuiltIn> {};
|
||||
template<typename Index, typename LhsScalar, int StorageOrder, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
|
||||
struct general_matrix_vector_product_gemv;
|
||||
|
||||
#define EIGEN_MKL_GEMV_SPECIALIZE(Scalar) \
|
||||
template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
|
||||
struct general_matrix_vector_product<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs,Specialized> { \
|
||||
struct general_matrix_vector_product<Index,Scalar,const_blas_data_mapper<Scalar,Index,ColMajor>,ColMajor,ConjugateLhs,Scalar,const_blas_data_mapper<Scalar,Index,RowMajor>,ConjugateRhs,Specialized> { \
|
||||
static void run( \
|
||||
Index rows, Index cols, \
|
||||
const Scalar* lhs, Index lhsStride, \
|
||||
const Scalar* rhs, Index rhsIncr, \
|
||||
const const_blas_data_mapper<Scalar,Index,ColMajor> &lhs, \
|
||||
const const_blas_data_mapper<Scalar,Index,RowMajor> &rhs, \
|
||||
Scalar* res, Index resIncr, Scalar alpha) \
|
||||
{ \
|
||||
if (ConjugateLhs) { \
|
||||
general_matrix_vector_product<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs,BuiltIn>::run( \
|
||||
rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \
|
||||
general_matrix_vector_product<Index,Scalar,const_blas_data_mapper<Scalar,Index,ColMajor>,ColMajor,ConjugateLhs,Scalar,const_blas_data_mapper<Scalar,Index,RowMajor>,ConjugateRhs,BuiltIn>::run( \
|
||||
rows, cols, lhs, rhs, res, resIncr, alpha); \
|
||||
} else { \
|
||||
general_matrix_vector_product_gemv<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs>::run( \
|
||||
rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \
|
||||
rows, cols, lhs.data(), lhs.stride(), rhs.data(), rhs.stride(), res, resIncr, alpha); \
|
||||
} \
|
||||
} \
|
||||
}; \
|
||||
template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
|
||||
struct general_matrix_vector_product<Index,Scalar,RowMajor,ConjugateLhs,Scalar,ConjugateRhs,Specialized> { \
|
||||
struct general_matrix_vector_product<Index,Scalar,const_blas_data_mapper<Scalar,Index,RowMajor>,RowMajor,ConjugateLhs,Scalar,const_blas_data_mapper<Scalar,Index,ColMajor>,ConjugateRhs,Specialized> { \
|
||||
static void run( \
|
||||
Index rows, Index cols, \
|
||||
const Scalar* lhs, Index lhsStride, \
|
||||
const Scalar* rhs, Index rhsIncr, \
|
||||
const const_blas_data_mapper<Scalar,Index,RowMajor> &lhs, \
|
||||
const const_blas_data_mapper<Scalar,Index,ColMajor> &rhs, \
|
||||
Scalar* res, Index resIncr, Scalar alpha) \
|
||||
{ \
|
||||
general_matrix_vector_product_gemv<Index,Scalar,RowMajor,ConjugateLhs,Scalar,ConjugateRhs>::run( \
|
||||
rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \
|
||||
rows, cols, lhs.data(), lhs.stride(), rhs.data(), rhs.stride(), res, resIncr, alpha); \
|
||||
} \
|
||||
}; \
|
||||
|
||||
|
||||
@@ -120,25 +120,28 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos
|
||||
return func(0,rows, 0,cols);
|
||||
|
||||
Eigen::initParallel();
|
||||
func.initParallelSession();
|
||||
func.initParallelSession(threads);
|
||||
|
||||
if(transpose)
|
||||
std::swap(rows,cols);
|
||||
|
||||
Index blockCols = (cols / threads) & ~Index(0x3);
|
||||
Index blockRows = (rows / threads);
|
||||
blockRows = (blockRows/Functor::Traits::mr)*Functor::Traits::mr;
|
||||
|
||||
ei_declare_aligned_stack_constructed_variable(GemmParallelInfo<Index>,info,threads,0);
|
||||
|
||||
|
||||
#pragma omp parallel num_threads(threads)
|
||||
{
|
||||
Index i = omp_get_thread_num();
|
||||
// Note that the actual number of threads might be lower than the number of request ones.
|
||||
Index actual_threads = omp_get_num_threads();
|
||||
|
||||
Index blockCols = (cols / actual_threads) & ~Index(0x3);
|
||||
Index blockRows = (rows / actual_threads);
|
||||
blockRows = (blockRows/Functor::Traits::mr)*Functor::Traits::mr;
|
||||
|
||||
Index r0 = i*blockRows;
|
||||
Index actualBlockRows = (i+1==threads) ? rows-r0 : blockRows;
|
||||
Index actualBlockRows = (i+1==actual_threads) ? rows-r0 : blockRows;
|
||||
|
||||
Index c0 = i*blockCols;
|
||||
Index actualBlockCols = (i+1==threads) ? cols-c0 : blockCols;
|
||||
Index actualBlockCols = (i+1==actual_threads) ? cols-c0 : blockCols;
|
||||
|
||||
info[i].lhs_start = r0;
|
||||
info[i].lhs_length = actualBlockRows;
|
||||
|
||||
@@ -94,7 +94,7 @@ EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrd
|
||||
|
||||
size_t starti = FirstTriangular ? 0 : j+2;
|
||||
size_t endi = FirstTriangular ? j : size;
|
||||
size_t alignedStart = (starti) + internal::first_aligned(&res[starti], endi-starti);
|
||||
size_t alignedStart = (starti) + internal::first_default_aligned(&res[starti], endi-starti);
|
||||
size_t alignedEnd = alignedStart + ((endi-alignedStart)/(PacketSize))*(PacketSize);
|
||||
|
||||
// TODO make sure this product is a real * complex and that the rhs is properly conjugated if needed
|
||||
|
||||
@@ -257,6 +257,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
|
||||
Scalar* _res, Index resStride,
|
||||
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
|
||||
{
|
||||
const Index PacketBytes = packet_traits<Scalar>::size*sizeof(Scalar);
|
||||
// strip zeros
|
||||
Index diagSize = (std::min)(_cols,_depth);
|
||||
Index rows = _rows;
|
||||
@@ -274,7 +275,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
|
||||
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
|
||||
|
||||
std::size_t sizeA = kc*mc;
|
||||
std::size_t sizeB = kc*cols+EIGEN_ALIGN_BYTES/sizeof(Scalar);
|
||||
std::size_t sizeB = kc*cols+EIGEN_MAX_ALIGN_BYTES/sizeof(Scalar);
|
||||
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
|
||||
@@ -311,7 +312,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
|
||||
Index ts = (IsLower && actual_k2>=cols) ? 0 : actual_kc;
|
||||
|
||||
Scalar* geb = blockB+ts*ts;
|
||||
geb = geb + internal::first_aligned(geb,EIGEN_ALIGN_BYTES/sizeof(Scalar));
|
||||
geb = geb + internal::first_aligned<PacketBytes>(geb,PacketBytes/sizeof(Scalar));
|
||||
|
||||
pack_rhs(geb, rhs.getSubMapper(actual_k2,IsLower ? 0 : k2), actual_kc, rs);
|
||||
|
||||
|
||||
4
Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h
Normal file → Executable file
4
Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h
Normal file → Executable file
@@ -122,7 +122,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
|
||||
Map<const MatrixLhs, 0, OuterStride<> > lhsMap(_lhs,rows,depth,OuterStride<>(lhsStride)); \
|
||||
MatrixLhs aa_tmp=lhsMap.template triangularView<Mode>(); \
|
||||
MKL_INT aStride = aa_tmp.outerStride(); \
|
||||
gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth); \
|
||||
gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth, 1, true); \
|
||||
general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \
|
||||
rows, cols, depth, aa_tmp.data(), aStride, _rhs, rhsStride, res, resStride, alpha, gemm_blocking, 0); \
|
||||
\
|
||||
@@ -236,7 +236,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
|
||||
Map<const MatrixRhs, 0, OuterStride<> > rhsMap(_rhs,depth,cols, OuterStride<>(rhsStride)); \
|
||||
MatrixRhs aa_tmp=rhsMap.template triangularView<Mode>(); \
|
||||
MKL_INT aStride = aa_tmp.outerStride(); \
|
||||
gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth); \
|
||||
gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth, 1, true); \
|
||||
general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \
|
||||
rows, cols, depth, _lhs, lhsStride, aa_tmp.data(), aStride, res, resStride, alpha, gemm_blocking, 0); \
|
||||
\
|
||||
|
||||
@@ -117,8 +117,9 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conju
|
||||
{
|
||||
// TODO write a small kernel handling this (can be shared with trsv)
|
||||
Index i = IsLower ? k2+k1+k : k2-k1-k-1;
|
||||
Index s = IsLower ? k2+k1 : i+1;
|
||||
Index rs = actualPanelWidth - k - 1; // remaining size
|
||||
Index s = TriStorageOrder==RowMajor ? (IsLower ? k2+k1 : i+1)
|
||||
: IsLower ? i+1 : i-rs;
|
||||
|
||||
Scalar a = (Mode & UnitDiag) ? Scalar(1) : Scalar(1)/conj(tri(i,i));
|
||||
for (Index j=j2; j<j2+actual_cols; ++j)
|
||||
@@ -135,7 +136,6 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conju
|
||||
}
|
||||
else
|
||||
{
|
||||
Index s = IsLower ? i+1 : i-rs;
|
||||
Scalar b = (other(i,j) *= a);
|
||||
Scalar* r = &other(s,j);
|
||||
const Scalar* l = &tri(s,i);
|
||||
|
||||
7
Eigen/src/Core/util/BlasUtil.h
Normal file → Executable file
7
Eigen/src/Core/util/BlasUtil.h
Normal file → Executable file
@@ -166,7 +166,7 @@ class BlasLinearMapper {
|
||||
return ploadt<HalfPacket, AlignmentType>(m_data + i);
|
||||
}
|
||||
|
||||
EIGEN_ALWAYS_INLINE void storePacket(Index i, Packet p) const {
|
||||
EIGEN_ALWAYS_INLINE void storePacket(Index i, const Packet &p) const {
|
||||
pstoret<Scalar, Packet, AlignmentType>(m_data + i, p);
|
||||
}
|
||||
|
||||
@@ -214,7 +214,7 @@ class blas_data_mapper {
|
||||
}
|
||||
|
||||
template<typename SubPacket>
|
||||
EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, SubPacket p) const {
|
||||
EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, const SubPacket &p) const {
|
||||
pscatter<Scalar, SubPacket>(&operator()(i, j), p, m_stride);
|
||||
}
|
||||
|
||||
@@ -224,12 +224,13 @@ class blas_data_mapper {
|
||||
}
|
||||
|
||||
const Index stride() const { return m_stride; }
|
||||
const Scalar* data() const { return m_data; }
|
||||
|
||||
Index firstAligned(Index size) const {
|
||||
if (size_t(m_data)%sizeof(Scalar)) {
|
||||
return -1;
|
||||
}
|
||||
return internal::first_aligned(m_data, size);
|
||||
return internal::first_default_aligned(m_data, size);
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2007-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
@@ -140,7 +140,7 @@ const unsigned int LvalueBit = 0x20;
|
||||
*/
|
||||
const unsigned int DirectAccessBit = 0x40;
|
||||
|
||||
/** \ingroup flags
|
||||
/** \deprecated \ingroup flags
|
||||
*
|
||||
* means the first coefficient packet is guaranteed to be aligned.
|
||||
* An expression cannot has the AlignedBit without the PacketAccessBit flag.
|
||||
@@ -215,12 +215,31 @@ enum {
|
||||
};
|
||||
|
||||
/** \ingroup enums
|
||||
* Enum for indicating whether an object is aligned or not. */
|
||||
* Enum for indicating whether a buffer is aligned or not. */
|
||||
enum {
|
||||
/** Object is not correctly aligned for vectorization. */
|
||||
Unaligned=0,
|
||||
/** Object is aligned for vectorization. */
|
||||
Aligned=1
|
||||
Unaligned=0, /**< Data pointer has no specific alignment. */
|
||||
Aligned8=8, /**< Data pointer is aligned on a 8 bytes boundary. */
|
||||
Aligned16=16, /**< Data pointer is aligned on a 16 bytes boundary. */
|
||||
Aligned32=32, /**< Data pointer is aligned on a 32 bytes boundary. */
|
||||
Aligned64=64, /**< Data pointer is aligned on a 64 bytes boundary. */
|
||||
Aligned128=128, /**< Data pointer is aligned on a 128 bytes boundary. */
|
||||
AlignedMask=255,
|
||||
Aligned=16, /**< \deprecated Synonym for Aligned16. */
|
||||
#if EIGEN_MAX_ALIGN_BYTES==128
|
||||
AlignedMax = Aligned128
|
||||
#elif EIGEN_MAX_ALIGN_BYTES==64
|
||||
AlignedMax = Aligned64
|
||||
#elif EIGEN_MAX_ALIGN_BYTES==32
|
||||
AlignedMax = Aligned32
|
||||
#elif EIGEN_MAX_ALIGN_BYTES==16
|
||||
AlignedMax = Aligned16
|
||||
#elif EIGEN_MAX_ALIGN_BYTES==8
|
||||
AlignedMax = Aligned8
|
||||
#elif EIGEN_MAX_ALIGN_BYTES==0
|
||||
AlignedMax = Unaligned
|
||||
#else
|
||||
#error Invalid value for EIGEN_MAX_ALIGN_BYTES
|
||||
#endif
|
||||
};
|
||||
|
||||
/** \ingroup enums
|
||||
@@ -452,8 +471,8 @@ namespace Architecture
|
||||
}
|
||||
|
||||
/** \internal \ingroup enums
|
||||
* Enum used as template parameter in GeneralProduct. */
|
||||
enum { DefaultProduct=0, CoeffBasedProductMode, LazyCoeffBasedProductMode, LazyProduct, OuterProduct, InnerProduct, GemvProduct, GemmProduct };
|
||||
* Enum used as template parameter in Product and product evalautors. */
|
||||
enum { DefaultProduct=0, LazyProduct, AliasFreeProduct, CoeffBasedProductMode, LazyCoeffBasedProductMode, OuterProduct, InnerProduct, GemvProduct, GemmProduct };
|
||||
|
||||
/** \internal \ingroup enums
|
||||
* Enum used in experimental parallel implementation. */
|
||||
@@ -468,6 +487,9 @@ struct Sparse {};
|
||||
/** The type used to identify a permutation storage. */
|
||||
struct PermutationStorage {};
|
||||
|
||||
/** The type used to identify a permutation storage. */
|
||||
struct TranspositionsStorage {};
|
||||
|
||||
/** The type used to identify a matrix expression */
|
||||
struct MatrixXpr {};
|
||||
|
||||
@@ -482,6 +504,7 @@ struct BandShape { static std::string debugName() { return "BandSha
|
||||
struct TriangularShape { static std::string debugName() { return "TriangularShape"; } };
|
||||
struct SelfAdjointShape { static std::string debugName() { return "SelfAdjointShape"; } };
|
||||
struct PermutationShape { static std::string debugName() { return "PermutationShape"; } };
|
||||
struct TranspositionsShape { static std::string debugName() { return "TranspositionsShape"; } };
|
||||
struct SparseShape { static std::string debugName() { return "SparseShape"; } };
|
||||
|
||||
namespace internal {
|
||||
@@ -492,6 +515,16 @@ struct IndexBased {};
|
||||
// evaluator based on iterators to access coefficients.
|
||||
struct IteratorBased {};
|
||||
|
||||
/** \internal
|
||||
* Constants for comparison functors
|
||||
*/
|
||||
enum ComparisonName {
|
||||
cmp_EQ = 0,
|
||||
cmp_LT = 1,
|
||||
cmp_LE = 2,
|
||||
cmp_UNORD = 3,
|
||||
cmp_NEQ = 4
|
||||
};
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
@@ -91,8 +91,6 @@ template<typename NullaryOp, typename MatrixType> class CwiseNullaryOp;
|
||||
template<typename UnaryOp, typename MatrixType> class CwiseUnaryOp;
|
||||
template<typename ViewOp, typename MatrixType> class CwiseUnaryView;
|
||||
template<typename BinaryOp, typename Lhs, typename Rhs> class CwiseBinaryOp;
|
||||
template<typename BinOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp; // TODO deprecated
|
||||
template<typename Derived, typename Lhs, typename Rhs> class ProductBase; // TODO deprecated
|
||||
template<typename Decomposition, typename Rhstype> class Solve;
|
||||
template<typename XprType> class Inverse;
|
||||
|
||||
@@ -102,9 +100,6 @@ namespace internal {
|
||||
|
||||
template<typename Lhs, typename Rhs, int Option = DefaultProduct> class Product;
|
||||
|
||||
template<typename Lhs, typename Rhs, int Mode> class GeneralProduct; // TODO deprecated
|
||||
template<typename Lhs, typename Rhs, int NestingFlags> class CoeffBasedProduct; // TODO deprecated
|
||||
|
||||
template<typename Derived> class DiagonalBase;
|
||||
template<typename _DiagonalVectorType> class DiagonalWrapper;
|
||||
template<typename _Scalar, int SizeAtCompileTime, int MaxSizeAtCompileTime=SizeAtCompileTime> class DiagonalMatrix;
|
||||
@@ -152,6 +147,9 @@ template<typename _Scalar, int Rows=Dynamic, int Cols=Dynamic, int Supers=Dynami
|
||||
|
||||
namespace internal {
|
||||
template<typename Lhs, typename Rhs> struct product_type;
|
||||
|
||||
template<bool> struct EnableIf;
|
||||
|
||||
/** \internal
|
||||
* \class product_evaluator
|
||||
* Products need their own evaluator with more template arguments allowing for
|
||||
@@ -162,7 +160,8 @@ template< typename T,
|
||||
typename LhsShape = typename evaluator_traits<typename T::Lhs>::Shape,
|
||||
typename RhsShape = typename evaluator_traits<typename T::Rhs>::Shape,
|
||||
typename LhsScalar = typename traits<typename T::Lhs>::Scalar,
|
||||
typename RhsScalar = typename traits<typename T::Rhs>::Scalar
|
||||
typename RhsScalar = typename traits<typename T::Rhs>::Scalar,
|
||||
typename = EnableIf<true> // extra template parameter for SFINAE-based specialization
|
||||
> struct product_evaluator;
|
||||
}
|
||||
|
||||
@@ -189,6 +188,7 @@ template<typename Scalar> struct scalar_imag_op;
|
||||
template<typename Scalar> struct scalar_abs_op;
|
||||
template<typename Scalar> struct scalar_abs2_op;
|
||||
template<typename Scalar> struct scalar_sqrt_op;
|
||||
template<typename Scalar> struct scalar_rsqrt_op;
|
||||
template<typename Scalar> struct scalar_exp_op;
|
||||
template<typename Scalar> struct scalar_log_op;
|
||||
template<typename Scalar> struct scalar_cos_op;
|
||||
@@ -213,6 +213,7 @@ template<typename Scalar> struct scalar_identity_op;
|
||||
template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_product_op;
|
||||
template<typename LhsScalar,typename RhsScalar> struct scalar_multiple2_op;
|
||||
template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_quotient_op;
|
||||
template<typename LhsScalar,typename RhsScalar> struct scalar_quotient2_op;
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
@@ -264,6 +265,7 @@ template<typename Derived> class QuaternionBase;
|
||||
template<typename Scalar> class Rotation2D;
|
||||
template<typename Scalar> class AngleAxis;
|
||||
template<typename Scalar,int Dim> class Translation;
|
||||
template<typename Scalar,int Dim> class AlignedBox;
|
||||
|
||||
template<typename Scalar, int Options = AutoAlign> class Quaternion;
|
||||
template<typename Scalar,int Dim,int Mode,int _Options=AutoAlign> class Transform;
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
@@ -13,7 +13,7 @@
|
||||
|
||||
#define EIGEN_WORLD_VERSION 3
|
||||
#define EIGEN_MAJOR_VERSION 2
|
||||
#define EIGEN_MINOR_VERSION 90
|
||||
#define EIGEN_MINOR_VERSION 91
|
||||
|
||||
#define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
|
||||
(EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
|
||||
@@ -101,7 +101,7 @@
|
||||
|
||||
|
||||
/// \internal EIGEN_GNUC_STRICT set to 1 if the compiler is really GCC and not a compatible compiler (e.g., ICC, clang, mingw, etc.)
|
||||
#if EIGEN_COMP_GNUC && !(EIGEN_COMP_CLANG || EIGEN_COMP_CLANG || EIGEN_COMP_MINGW || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM )
|
||||
#if EIGEN_COMP_GNUC && !(EIGEN_COMP_CLANG || EIGEN_COMP_ICC || EIGEN_COMP_MINGW || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM )
|
||||
#define EIGEN_COMP_GNUC_STRICT 1
|
||||
#else
|
||||
#define EIGEN_COMP_GNUC_STRICT 0
|
||||
@@ -213,7 +213,8 @@
|
||||
#endif
|
||||
|
||||
/// \internal EIGEN_OS_ANDROID set to 1 if the OS is Android
|
||||
#if defined(__ANDROID__)
|
||||
// note: ANDROID is defined when using ndk_build, __ANDROID__ is defined when using a standalone toolchain.
|
||||
#if defined(__ANDROID__) || defined(ANDROID)
|
||||
#define EIGEN_OS_ANDROID 1
|
||||
#else
|
||||
#define EIGEN_OS_ANDROID 0
|
||||
@@ -282,6 +283,19 @@
|
||||
#define EIGEN_OS_WIN_STRICT 0
|
||||
#endif
|
||||
|
||||
/// \internal EIGEN_OS_SUN set to 1 if the OS is SUN
|
||||
#if (defined(sun) || defined(__sun)) && !(defined(__SVR4) || defined(__svr4__))
|
||||
#define EIGEN_OS_SUN 1
|
||||
#else
|
||||
#define EIGEN_OS_SUN 0
|
||||
#endif
|
||||
|
||||
/// \internal EIGEN_OS_SOLARIS set to 1 if the OS is Solaris
|
||||
#if (defined(sun) || defined(__sun)) && (defined(__SVR4) || defined(__svr4__))
|
||||
#define EIGEN_OS_SOLARIS 1
|
||||
#else
|
||||
#define EIGEN_OS_SOLARIS 0
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
@@ -292,65 +306,10 @@
|
||||
#define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 1
|
||||
#endif
|
||||
|
||||
// 16 byte alignment is only useful for vectorization. Since it affects the ABI, we need to enable
|
||||
// 16 byte alignment on all platforms where vectorization might be enabled. In theory we could always
|
||||
// enable alignment, but it can be a cause of problems on some platforms, so we just disable it in
|
||||
// certain common platform (compiler+architecture combinations) to avoid these problems.
|
||||
// Only static alignment is really problematic (relies on nonstandard compiler extensions that don't
|
||||
// work everywhere, for example don't work on GCC/ARM), try to keep heap alignment even
|
||||
// when we have to disable static alignment.
|
||||
#if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64)
|
||||
#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
|
||||
#else
|
||||
#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0
|
||||
#endif
|
||||
|
||||
// static alignment is completely disabled with GCC 3, Sun Studio, and QCC/QNX
|
||||
#if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT \
|
||||
&& !EIGEN_GCC3_OR_OLDER \
|
||||
&& !EIGEN_COMP_SUNCC \
|
||||
&& !EIGEN_OS_QNX
|
||||
#define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1
|
||||
#else
|
||||
#define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0
|
||||
#endif
|
||||
|
||||
// Defined the boundary (in bytes) on which the data needs to be aligned. Note
|
||||
// that unless EIGEN_ALIGN is defined and not equal to 0, the data may not be
|
||||
// aligned at all regardless of the value of this #define.
|
||||
#define EIGEN_ALIGN_BYTES 16
|
||||
|
||||
#ifdef EIGEN_DONT_ALIGN
|
||||
#ifndef EIGEN_DONT_ALIGN_STATICALLY
|
||||
#define EIGEN_DONT_ALIGN_STATICALLY
|
||||
#endif
|
||||
#define EIGEN_ALIGN 0
|
||||
#elif !defined(EIGEN_DONT_VECTORIZE)
|
||||
#if defined(__AVX__)
|
||||
#undef EIGEN_ALIGN_BYTES
|
||||
#define EIGEN_ALIGN_BYTES 32
|
||||
#endif
|
||||
#define EIGEN_ALIGN 1
|
||||
#else
|
||||
#define EIGEN_ALIGN 0
|
||||
#endif
|
||||
|
||||
|
||||
// This macro can be used to prevent from macro expansion, e.g.:
|
||||
// std::max EIGEN_NOT_A_MACRO(a,b)
|
||||
#define EIGEN_NOT_A_MACRO
|
||||
|
||||
// EIGEN_ALIGN_STATICALLY is the true test whether we want to align arrays on the stack or not. It takes into account both the user choice to explicitly disable
|
||||
// alignment (EIGEN_DONT_ALIGN_STATICALLY) and the architecture config (EIGEN_ARCH_WANTS_STACK_ALIGNMENT). Henceforth, only EIGEN_ALIGN_STATICALLY should be used.
|
||||
#if EIGEN_ARCH_WANTS_STACK_ALIGNMENT && !defined(EIGEN_DONT_ALIGN_STATICALLY)
|
||||
#define EIGEN_ALIGN_STATICALLY 1
|
||||
#else
|
||||
#define EIGEN_ALIGN_STATICALLY 0
|
||||
#ifndef EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
|
||||
#define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
|
||||
#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION Eigen::RowMajor
|
||||
#else
|
||||
@@ -382,17 +341,44 @@
|
||||
#define EIGEN_HAVE_RVALUE_REFERENCES
|
||||
#endif
|
||||
|
||||
// Does the compiler support result_of?
|
||||
#if (__has_feature(cxx_lambdas) || (defined(__cplusplus) && __cplusplus >= 201103L))
|
||||
#define EIGEN_HAS_STD_RESULT_OF 1
|
||||
#endif
|
||||
|
||||
// Does the compiler support variadic templates?
|
||||
#if __cplusplus > 199711L
|
||||
#define EIGEN_HAS_VARIADIC_TEMPLATES 1
|
||||
#endif
|
||||
|
||||
// Does the compiler support const expressions?
|
||||
#if (defined(__plusplus) && __cplusplus >= 201402L) || \
|
||||
#ifdef __CUDACC__
|
||||
// Const expressions are not supported regardless of what host compiler is used
|
||||
#elif (defined(__cplusplus) && __cplusplus >= 201402L) || \
|
||||
EIGEN_GNUC_AT_LEAST(4,9)
|
||||
#define EIGEN_HAS_CONSTEXPR 1
|
||||
#endif
|
||||
|
||||
// Does the compiler support C++11 math?
|
||||
// Let's be conservative and enable the default C++11 implementation only if we are sure it exists
|
||||
#ifndef EIGEN_HAS_CXX11_MATH
|
||||
#if (__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC || EIGEN_COMP_ICC) \
|
||||
&& (EIGEN_ARCH_i386_OR_x86_64) && (EIGEN_OS_GNULINUX || EIGEN_OS_WIN_STRICT || EIGEN_OS_MAC)
|
||||
#define EIGEN_HAS_CXX11_MATH 1
|
||||
#else
|
||||
#define EIGEN_HAS_CXX11_MATH 0
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// Does the compiler support proper C++11 containers?
|
||||
#ifndef EIGEN_HAS_CXX11_CONTAINERS
|
||||
#if ((__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG)) || EIGEN_COMP_MSVC >= 1900
|
||||
#define EIGEN_HAS_CXX11_CONTAINERS 1
|
||||
#else
|
||||
#define EIGEN_HAS_CXX11_CONTAINERS 0
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/** Allows to disable some optimizations which might affect the accuracy of the result.
|
||||
* Such optimization are enabled by default, and set EIGEN_FAST_MATH to 0 to disable them.
|
||||
* They currently include:
|
||||
@@ -550,6 +536,20 @@ namespace Eigen {
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
//------------------------------------------------------------------------------------------
|
||||
// Static and dynamic alignment control
|
||||
//
|
||||
// The main purpose of this section is to define EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES
|
||||
// as the maximal boundary in bytes on which dynamically and statically allocated data may be alignment respectively.
|
||||
// The values of EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES can be specified by the user. If not,
|
||||
// a default value is automatically computed based on architecture, compiler, and OS.
|
||||
//
|
||||
// This section also defines macros EIGEN_ALIGN_TO_BOUNDARY(N) and the shortcuts EIGEN_ALIGN{8,16,32,_MAX}
|
||||
// to be used to declare statically aligned buffers.
|
||||
//------------------------------------------------------------------------------------------
|
||||
|
||||
|
||||
/* EIGEN_ALIGN_TO_BOUNDARY(n) forces data to be n-byte aligned. This is used to satisfy SIMD requirements.
|
||||
* However, we do that EVEN if vectorization (EIGEN_VECTORIZE) is disabled,
|
||||
* so that vectorization doesn't affect binary compatibility.
|
||||
@@ -570,22 +570,124 @@ namespace Eigen {
|
||||
#error Please tell me what is the equivalent of __attribute__((aligned(n))) for your compiler
|
||||
#endif
|
||||
|
||||
// If the user explicitly disable vectorization, then we also disable alignment
|
||||
#if defined(EIGEN_DONT_VECTORIZE)
|
||||
#define EIGEN_IDEAL_MAX_ALIGN_BYTES 0
|
||||
#elif defined(__AVX__)
|
||||
// 32 bytes static alignmeent is preferred only if really required
|
||||
#define EIGEN_IDEAL_MAX_ALIGN_BYTES 32
|
||||
#else
|
||||
#define EIGEN_IDEAL_MAX_ALIGN_BYTES 16
|
||||
#endif
|
||||
|
||||
|
||||
// EIGEN_MIN_ALIGN_BYTES defines the minimal value for which the notion of explicit alignment makes sense
|
||||
#define EIGEN_MIN_ALIGN_BYTES 16
|
||||
|
||||
// Defined the boundary (in bytes) on which the data needs to be aligned. Note
|
||||
// that unless EIGEN_ALIGN is defined and not equal to 0, the data may not be
|
||||
// aligned at all regardless of the value of this #define.
|
||||
|
||||
#if (defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)) && defined(EIGEN_MAX_STATIC_ALIGN_BYTES) && EIGEN_MAX_STATIC_ALIGN_BYTES>0
|
||||
#error EIGEN_MAX_STATIC_ALIGN_BYTES and EIGEN_DONT_ALIGN[_STATICALLY] are both defined with EIGEN_MAX_STATIC_ALIGN_BYTES!=0. Use EIGEN_MAX_STATIC_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN_STATICALLY.
|
||||
#endif
|
||||
|
||||
// EIGEN_DONT_ALIGN_STATICALLY and EIGEN_DONT_ALIGN are deprectated
|
||||
// They imply EIGEN_MAX_STATIC_ALIGN_BYTES=0
|
||||
#if defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)
|
||||
#ifdef EIGEN_MAX_STATIC_ALIGN_BYTES
|
||||
#undef EIGEN_MAX_STATIC_ALIGN_BYTES
|
||||
#endif
|
||||
#define EIGEN_MAX_STATIC_ALIGN_BYTES 0
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_MAX_STATIC_ALIGN_BYTES
|
||||
|
||||
// Try to automatically guess what is the best default value for EIGEN_MAX_STATIC_ALIGN_BYTES
|
||||
|
||||
// 16 byte alignment is only useful for vectorization. Since it affects the ABI, we need to enable
|
||||
// 16 byte alignment on all platforms where vectorization might be enabled. In theory we could always
|
||||
// enable alignment, but it can be a cause of problems on some platforms, so we just disable it in
|
||||
// certain common platform (compiler+architecture combinations) to avoid these problems.
|
||||
// Only static alignment is really problematic (relies on nonstandard compiler extensions that don't
|
||||
// work everywhere, for example don't work on GCC/ARM), try to keep heap alignment even
|
||||
// when we have to disable static alignment.
|
||||
#if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64)
|
||||
#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
|
||||
#else
|
||||
#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0
|
||||
#endif
|
||||
|
||||
// static alignment is completely disabled with GCC 3, Sun Studio, and QCC/QNX
|
||||
#if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT \
|
||||
&& !EIGEN_GCC3_OR_OLDER \
|
||||
&& !EIGEN_COMP_SUNCC \
|
||||
&& !EIGEN_OS_QNX
|
||||
#define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1
|
||||
#else
|
||||
#define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0
|
||||
#endif
|
||||
|
||||
#if EIGEN_ARCH_WANTS_STACK_ALIGNMENT
|
||||
#define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
|
||||
#else
|
||||
#define EIGEN_MAX_STATIC_ALIGN_BYTES 0
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
// If EIGEN_MAX_ALIGN_BYTES is defined, then it is considered as an upper bound for EIGEN_MAX_ALIGN_BYTES
|
||||
#if defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES<EIGEN_MAX_STATIC_ALIGN_BYTES
|
||||
#undef EIGEN_MAX_STATIC_ALIGN_BYTES
|
||||
#define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
|
||||
#endif
|
||||
|
||||
#if EIGEN_MAX_STATIC_ALIGN_BYTES==0 && !defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT)
|
||||
#define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
|
||||
#endif
|
||||
|
||||
// At this stage, EIGEN_MAX_STATIC_ALIGN_BYTES>0 is the true test whether we want to align arrays on the stack or not.
|
||||
// It takes into account both the user choice to explicitly enable/disable alignment (by settting EIGEN_MAX_STATIC_ALIGN_BYTES)
|
||||
// and the architecture config (EIGEN_ARCH_WANTS_STACK_ALIGNMENT).
|
||||
// Henceforth, only EIGEN_MAX_STATIC_ALIGN_BYTES should be used.
|
||||
|
||||
|
||||
// Shortcuts to EIGEN_ALIGN_TO_BOUNDARY
|
||||
#define EIGEN_ALIGN8 EIGEN_ALIGN_TO_BOUNDARY(8)
|
||||
#define EIGEN_ALIGN16 EIGEN_ALIGN_TO_BOUNDARY(16)
|
||||
#define EIGEN_ALIGN32 EIGEN_ALIGN_TO_BOUNDARY(32)
|
||||
#define EIGEN_ALIGN_DEFAULT EIGEN_ALIGN_TO_BOUNDARY(EIGEN_ALIGN_BYTES)
|
||||
|
||||
#if EIGEN_ALIGN_STATICALLY
|
||||
#define EIGEN_USER_ALIGN_TO_BOUNDARY(n) EIGEN_ALIGN_TO_BOUNDARY(n)
|
||||
#define EIGEN_USER_ALIGN16 EIGEN_ALIGN16
|
||||
#define EIGEN_USER_ALIGN32 EIGEN_ALIGN32
|
||||
#define EIGEN_USER_ALIGN_DEFAULT EIGEN_ALIGN_DEFAULT
|
||||
#define EIGEN_ALIGN64 EIGEN_ALIGN_TO_BOUNDARY(64)
|
||||
#if EIGEN_MAX_STATIC_ALIGN_BYTES>0
|
||||
#define EIGEN_ALIGN_MAX EIGEN_ALIGN_TO_BOUNDARY(EIGEN_MAX_STATIC_ALIGN_BYTES)
|
||||
#else
|
||||
#define EIGEN_USER_ALIGN_TO_BOUNDARY(n)
|
||||
#define EIGEN_USER_ALIGN16
|
||||
#define EIGEN_USER_ALIGN32
|
||||
#define EIGEN_USER_ALIGN_DEFAULT
|
||||
#define EIGEN_ALIGN_MAX
|
||||
#endif
|
||||
|
||||
|
||||
// Dynamic alignment control
|
||||
|
||||
#if defined(EIGEN_DONT_ALIGN) && defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES>0
|
||||
#error EIGEN_MAX_ALIGN_BYTES and EIGEN_DONT_ALIGN are both defined with EIGEN_MAX_ALIGN_BYTES!=0. Use EIGEN_MAX_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN.
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN_DONT_ALIGN
|
||||
#ifdef EIGEN_MAX_ALIGN_BYTES
|
||||
#undef EIGEN_MAX_ALIGN_BYTES
|
||||
#endif
|
||||
#define EIGEN_MAX_ALIGN_BYTES 0
|
||||
#elif !defined(EIGEN_MAX_ALIGN_BYTES)
|
||||
#define EIGEN_MAX_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
|
||||
#endif
|
||||
|
||||
#if EIGEN_IDEAL_MAX_ALIGN_BYTES > EIGEN_MAX_ALIGN_BYTES
|
||||
#define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
|
||||
#else
|
||||
#define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
|
||||
#endif
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
|
||||
#ifdef EIGEN_DONT_USE_RESTRICT_KEYWORD
|
||||
#define EIGEN_RESTRICT
|
||||
#endif
|
||||
@@ -611,7 +713,7 @@ namespace Eigen {
|
||||
// just an empty macro !
|
||||
#define EIGEN_EMPTY
|
||||
|
||||
#if EIGEN_COMP_MSVC_STRICT && EIGEN_COMP_MSVC < 1900
|
||||
#if EIGEN_COMP_MSVC_STRICT && EIGEN_COMP_MSVC < 1900 // for older MSVC versions using the base operator is sufficient (cf Bug 1000)
|
||||
#define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
|
||||
using Base::operator =;
|
||||
#elif EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653)
|
||||
@@ -630,6 +732,11 @@ namespace Eigen {
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/** \internal
|
||||
* \brief Macro to manually inherit assignment operators.
|
||||
* This is necessary, because the implicitly defined assignment operator gets deleted when a custom operator= is defined.
|
||||
*/
|
||||
#define EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Derived) EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived)
|
||||
|
||||
/**
|
||||
@@ -646,7 +753,7 @@ namespace Eigen {
|
||||
typedef typename Eigen::internal::traits<Derived>::Scalar Scalar; /*!< \brief Numeric type, e.g. float, double, int or std::complex<float>. */ \
|
||||
typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; /*!< \brief The underlying numeric type for composed scalar types. \details In cases where Scalar is e.g. std::complex<T>, T were corresponding to RealScalar. */ \
|
||||
typedef typename Base::CoeffReturnType CoeffReturnType; /*!< \brief The return type for coefficient access. \details Depending on whether the object allows direct coefficient access (e.g. for a MatrixXd), this type is either 'const Scalar&' or simply 'Scalar' for objects that do not allow direct coefficient access. */ \
|
||||
typedef typename Eigen::internal::nested<Derived>::type Nested; \
|
||||
typedef typename Eigen::internal::ref_selector<Derived>::type Nested; \
|
||||
typedef typename Eigen::internal::traits<Derived>::StorageKind StorageKind; \
|
||||
typedef typename Eigen::internal::traits<Derived>::StorageIndex StorageIndex; \
|
||||
enum { RowsAtCompileTime = Eigen::internal::traits<Derived>::RowsAtCompileTime, \
|
||||
@@ -719,8 +826,13 @@ namespace Eigen {
|
||||
# define EIGEN_TRY try
|
||||
# define EIGEN_CATCH(X) catch (X)
|
||||
#else
|
||||
# define EIGEN_THROW_X(X) std::abort()
|
||||
# define EIGEN_THROW std::abort()
|
||||
# ifdef __CUDA_ARCH__
|
||||
# define EIGEN_THROW_X(X) asm("trap;") return {}
|
||||
# define EIGEN_THROW asm("trap;"); return {}
|
||||
# else
|
||||
# define EIGEN_THROW_X(X) std::abort()
|
||||
# define EIGEN_THROW std::abort()
|
||||
# endif
|
||||
# define EIGEN_TRY if (true)
|
||||
# define EIGEN_CATCH(X) else
|
||||
#endif
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2008-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2009 Kenneth Riddile <kfriddile@yahoo.com>
|
||||
// Copyright (C) 2010 Hauke Heibel <hauke.heibel@gmail.com>
|
||||
@@ -32,7 +32,7 @@
|
||||
// page 114, "[The] LP64 model [...] is used by all 64-bit UNIX ports" so it's indeed
|
||||
// quite safe, at least within the context of glibc, to equate 64-bit with LP64.
|
||||
#if defined(__GLIBC__) && ((__GLIBC__>=2 && __GLIBC_MINOR__ >= 8) || __GLIBC__>2) \
|
||||
&& defined(__LP64__) && ! defined( __SANITIZE_ADDRESS__ ) && (EIGEN_ALIGN_BYTES == 16)
|
||||
&& defined(__LP64__) && ! defined( __SANITIZE_ADDRESS__ ) && (EIGEN_DEFAULT_ALIGN_BYTES == 16)
|
||||
#define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 1
|
||||
#else
|
||||
#define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 0
|
||||
@@ -42,14 +42,14 @@
|
||||
// See http://svn.freebsd.org/viewvc/base/stable/6/lib/libc/stdlib/malloc.c?view=markup
|
||||
// FreeBSD 7 seems to have 16-byte aligned malloc except on ARM and MIPS architectures
|
||||
// See http://svn.freebsd.org/viewvc/base/stable/7/lib/libc/stdlib/malloc.c?view=markup
|
||||
#if defined(__FreeBSD__) && !(EIGEN_ARCH_ARM || EIGEN_ARCH_MIPS) && (EIGEN_ALIGN_BYTES == 16)
|
||||
#if defined(__FreeBSD__) && !(EIGEN_ARCH_ARM || EIGEN_ARCH_MIPS) && (EIGEN_DEFAULT_ALIGN_BYTES == 16)
|
||||
#define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 1
|
||||
#else
|
||||
#define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 0
|
||||
#endif
|
||||
|
||||
#if (EIGEN_OS_MAC && (EIGEN_ALIGN_BYTES == 16)) \
|
||||
|| (EIGEN_OS_WIN64 && (EIGEN_ALIGN_BYTES == 16)) \
|
||||
#if (EIGEN_OS_MAC && (EIGEN_DEFAULT_ALIGN_BYTES == 16)) \
|
||||
|| (EIGEN_OS_WIN64 && (EIGEN_DEFAULT_ALIGN_BYTES == 16)) \
|
||||
|| EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED \
|
||||
|| EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED
|
||||
#define EIGEN_MALLOC_ALREADY_ALIGNED 1
|
||||
@@ -59,18 +59,20 @@
|
||||
|
||||
#endif
|
||||
|
||||
// See bug 554 (http://eigen.tuxfamily.org/bz/show_bug.cgi?id=554)
|
||||
// It seems to be unsafe to check _POSIX_ADVISORY_INFO without including unistd.h first.
|
||||
// Currently, let's include it only on unix systems:
|
||||
#if EIGEN_OS_UNIX
|
||||
#include <unistd.h>
|
||||
#if (EIGEN_OS_QNX || (defined _GNU_SOURCE) || EIGEN_COMP_PGI || ((defined _XOPEN_SOURCE) && (_XOPEN_SOURCE >= 600))) && (defined _POSIX_ADVISORY_INFO) && (_POSIX_ADVISORY_INFO > 0)
|
||||
#define EIGEN_HAS_POSIX_MEMALIGN 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_HAS_POSIX_MEMALIGN
|
||||
#define EIGEN_HAS_POSIX_MEMALIGN 0
|
||||
// See bug 554 (http://eigen.tuxfamily.org/bz/show_bug.cgi?id=554)
|
||||
// It seems to be unsafe to check _POSIX_ADVISORY_INFO without including unistd.h first.
|
||||
// Currently, let's include it only on unix systems:
|
||||
#if EIGEN_OS_UNIX && !(EIGEN_OS_SUN || EIGEN_OS_SOLARIS)
|
||||
#include <unistd.h>
|
||||
#if (EIGEN_OS_QNX || (defined _GNU_SOURCE) || EIGEN_COMP_PGI || ((defined _XOPEN_SOURCE) && (_XOPEN_SOURCE >= 600))) && (defined _POSIX_ADVISORY_INFO) && (_POSIX_ADVISORY_INFO > 0)
|
||||
#define EIGEN_HAS_POSIX_MEMALIGN 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_HAS_POSIX_MEMALIGN
|
||||
#define EIGEN_HAS_POSIX_MEMALIGN 0
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined EIGEN_VECTORIZE_SSE || defined EIGEN_VECTORIZE_AVX
|
||||
@@ -105,9 +107,9 @@ inline void throw_std_bad_alloc()
|
||||
*/
|
||||
inline void* handmade_aligned_malloc(std::size_t size)
|
||||
{
|
||||
void *original = std::malloc(size+EIGEN_ALIGN_BYTES);
|
||||
void *original = std::malloc(size+EIGEN_DEFAULT_ALIGN_BYTES);
|
||||
if (original == 0) return 0;
|
||||
void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_ALIGN_BYTES-1))) + EIGEN_ALIGN_BYTES);
|
||||
void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1))) + EIGEN_DEFAULT_ALIGN_BYTES);
|
||||
*(reinterpret_cast<void**>(aligned) - 1) = original;
|
||||
return aligned;
|
||||
}
|
||||
@@ -128,9 +130,9 @@ inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t =
|
||||
if (ptr == 0) return handmade_aligned_malloc(size);
|
||||
void *original = *(reinterpret_cast<void**>(ptr) - 1);
|
||||
std::ptrdiff_t previous_offset = static_cast<char *>(ptr)-static_cast<char *>(original);
|
||||
original = std::realloc(original,size+EIGEN_ALIGN_BYTES);
|
||||
original = std::realloc(original,size+EIGEN_DEFAULT_ALIGN_BYTES);
|
||||
if (original == 0) return 0;
|
||||
void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_ALIGN_BYTES-1))) + EIGEN_ALIGN_BYTES);
|
||||
void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1))) + EIGEN_DEFAULT_ALIGN_BYTES);
|
||||
void *previous_aligned = static_cast<char *>(original)+previous_offset;
|
||||
if(aligned!=previous_aligned)
|
||||
std::memmove(aligned, previous_aligned, size);
|
||||
@@ -216,16 +218,16 @@ EIGEN_DEVICE_FUNC inline void* aligned_malloc(size_t size)
|
||||
check_that_malloc_is_allowed();
|
||||
|
||||
void *result;
|
||||
#if !EIGEN_ALIGN
|
||||
#if EIGEN_DEFAULT_ALIGN_BYTES==0
|
||||
result = std::malloc(size);
|
||||
#elif EIGEN_MALLOC_ALREADY_ALIGNED
|
||||
result = std::malloc(size);
|
||||
#elif EIGEN_HAS_POSIX_MEMALIGN
|
||||
if(posix_memalign(&result, EIGEN_ALIGN_BYTES, size)) result = 0;
|
||||
if(posix_memalign(&result, EIGEN_DEFAULT_ALIGN_BYTES, size)) result = 0;
|
||||
#elif EIGEN_HAS_MM_MALLOC
|
||||
result = _mm_malloc(size, EIGEN_ALIGN_BYTES);
|
||||
result = _mm_malloc(size, EIGEN_DEFAULT_ALIGN_BYTES);
|
||||
#elif EIGEN_OS_WIN_STRICT
|
||||
result = _aligned_malloc(size, EIGEN_ALIGN_BYTES);
|
||||
result = _aligned_malloc(size, EIGEN_DEFAULT_ALIGN_BYTES);
|
||||
#else
|
||||
result = handmade_aligned_malloc(size);
|
||||
#endif
|
||||
@@ -239,7 +241,7 @@ EIGEN_DEVICE_FUNC inline void* aligned_malloc(size_t size)
|
||||
/** \internal Frees memory allocated with aligned_malloc. */
|
||||
EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr)
|
||||
{
|
||||
#if !EIGEN_ALIGN
|
||||
#if EIGEN_DEFAULT_ALIGN_BYTES==0
|
||||
std::free(ptr);
|
||||
#elif EIGEN_MALLOC_ALREADY_ALIGNED
|
||||
std::free(ptr);
|
||||
@@ -264,7 +266,7 @@ inline void* aligned_realloc(void *ptr, size_t new_size, size_t old_size)
|
||||
EIGEN_UNUSED_VARIABLE(old_size);
|
||||
|
||||
void *result;
|
||||
#if !EIGEN_ALIGN
|
||||
#if EIGEN_DEFAULT_ALIGN_BYTES==0
|
||||
result = std::realloc(ptr,new_size);
|
||||
#elif EIGEN_MALLOC_ALREADY_ALIGNED
|
||||
result = std::realloc(ptr,new_size);
|
||||
@@ -275,12 +277,12 @@ inline void* aligned_realloc(void *ptr, size_t new_size, size_t old_size)
|
||||
// implements _mm_malloc/_mm_free based on the corresponding _aligned_
|
||||
// functions. This may not always be the case and we just try to be safe.
|
||||
#if EIGEN_OS_WIN_STRICT && defined(_mm_free)
|
||||
result = _aligned_realloc(ptr,new_size,EIGEN_ALIGN_BYTES);
|
||||
result = _aligned_realloc(ptr,new_size,EIGEN_DEFAULT_ALIGN_BYTES);
|
||||
#else
|
||||
result = generic_aligned_realloc(ptr,new_size,old_size);
|
||||
#endif
|
||||
#elif EIGEN_OS_WIN_STRICT
|
||||
result = _aligned_realloc(ptr,new_size,EIGEN_ALIGN_BYTES);
|
||||
result = _aligned_realloc(ptr,new_size,EIGEN_DEFAULT_ALIGN_BYTES);
|
||||
#else
|
||||
result = handmade_aligned_realloc(ptr,new_size,old_size);
|
||||
#endif
|
||||
@@ -504,47 +506,57 @@ template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_align
|
||||
|
||||
/****************************************************************************/
|
||||
|
||||
/** \internal Returns the index of the first element of the array that is well aligned for vectorization.
|
||||
/** \internal Returns the index of the first element of the array that is well aligned with respect to the requested \a Alignment.
|
||||
*
|
||||
* \tparam Alignment requested alignment in Bytes.
|
||||
* \param array the address of the start of the array
|
||||
* \param size the size of the array
|
||||
*
|
||||
* \note If no element of the array is well aligned, the size of the array is returned. Typically,
|
||||
* for example with SSE, "well aligned" means 16-byte-aligned. If vectorization is disabled or if the
|
||||
* \note If no element of the array is well aligned or the requested alignment is not a multiple of a scalar,
|
||||
* the size of the array is returned. For example with SSE, the requested alignment is typically 16-bytes. If
|
||||
* packet size for the given scalar type is 1, then everything is considered well-aligned.
|
||||
*
|
||||
* \note If the scalar type is vectorizable, we rely on the following assumptions: sizeof(Scalar) is a
|
||||
* power of 2, the packet size in bytes is also a power of 2, and is a multiple of sizeof(Scalar). On the
|
||||
* other hand, we do not assume that the array address is a multiple of sizeof(Scalar), as that fails for
|
||||
* \note Otherwise, if the Alignment is larger that the scalar size, we rely on the assumptions that sizeof(Scalar) is a
|
||||
* power of 2. On the other hand, we do not assume that the array address is a multiple of sizeof(Scalar), as that fails for
|
||||
* example with Scalar=double on certain 32-bit platforms, see bug #79.
|
||||
*
|
||||
* There is also the variant first_aligned(const MatrixBase&) defined in DenseCoeffsBase.h.
|
||||
* \sa first_default_aligned()
|
||||
*/
|
||||
template<typename Scalar, typename Index>
|
||||
template<int Alignment, typename Scalar, typename Index>
|
||||
inline Index first_aligned(const Scalar* array, Index size)
|
||||
{
|
||||
static const Index PacketSize = packet_traits<Scalar>::size;
|
||||
static const Index PacketAlignedMask = PacketSize-1;
|
||||
static const Index ScalarSize = sizeof(Scalar);
|
||||
static const Index AlignmentSize = Alignment / ScalarSize;
|
||||
static const Index AlignmentMask = AlignmentSize-1;
|
||||
|
||||
if(PacketSize==1)
|
||||
if(AlignmentSize<=1)
|
||||
{
|
||||
// Either there is no vectorization, or a packet consists of exactly 1 scalar so that all elements
|
||||
// of the array have the same alignment.
|
||||
// Either the requested alignment if smaller than a scalar, or it exactly match a 1 scalar
|
||||
// so that all elements of the array have the same alignment.
|
||||
return 0;
|
||||
}
|
||||
else if(size_t(array) & (sizeof(Scalar)-1))
|
||||
else if( (std::size_t(array) & (sizeof(Scalar)-1)) || (Alignment%ScalarSize)!=0)
|
||||
{
|
||||
// There is vectorization for this scalar type, but the array is not aligned to the size of a single scalar.
|
||||
// The array is not aligned to the size of a single scalar, or the requested alignment is not a multiple of the scalar size.
|
||||
// Consequently, no element of the array is well aligned.
|
||||
return size;
|
||||
}
|
||||
else
|
||||
{
|
||||
return std::min<Index>( (PacketSize - (Index((size_t(array)/sizeof(Scalar))) & PacketAlignedMask))
|
||||
& PacketAlignedMask, size);
|
||||
return std::min<Index>( (AlignmentSize - (Index((std::size_t(array)/sizeof(Scalar))) & AlignmentMask)) & AlignmentMask, size);
|
||||
}
|
||||
}
|
||||
|
||||
/** \internal Returns the index of the first element of the array that is well aligned with respect the largest packet requirement.
|
||||
* \sa first_aligned(Scalar*,Index) and first_default_aligned(DenseBase<Derived>) */
|
||||
template<typename Scalar, typename Index>
|
||||
inline Index first_default_aligned(const Scalar* array, Index size)
|
||||
{
|
||||
typedef typename packet_traits<Scalar>::type DefaultPacketType;
|
||||
return first_aligned<unpacket_traits<DefaultPacketType>::alignment>(array, size);
|
||||
}
|
||||
|
||||
/** \internal Returns the smallest integer multiple of \a base and greater or equal to \a size
|
||||
*/
|
||||
template<typename Index>
|
||||
@@ -557,18 +569,18 @@ inline Index first_multiple(Index size, Index base)
|
||||
// use memcpy on trivial types, i.e., on types that does not require an initialization ctor.
|
||||
template<typename T, bool UseMemcpy> struct smart_copy_helper;
|
||||
|
||||
template<typename T> void smart_copy(const T* start, const T* end, T* target)
|
||||
template<typename T> EIGEN_DEVICE_FUNC void smart_copy(const T* start, const T* end, T* target)
|
||||
{
|
||||
smart_copy_helper<T,!NumTraits<T>::RequireInitialization>::run(start, end, target);
|
||||
}
|
||||
|
||||
template<typename T> struct smart_copy_helper<T,true> {
|
||||
static inline void run(const T* start, const T* end, T* target)
|
||||
EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target)
|
||||
{ memcpy(target, start, std::ptrdiff_t(end)-std::ptrdiff_t(start)); }
|
||||
};
|
||||
|
||||
template<typename T> struct smart_copy_helper<T,false> {
|
||||
static inline void run(const T* start, const T* end, T* target)
|
||||
EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target)
|
||||
{ std::copy(start, end, target); }
|
||||
};
|
||||
|
||||
@@ -687,9 +699,14 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
|
||||
* The underlying stack allocation function can controlled with the EIGEN_ALLOCA preprocessor token.
|
||||
*/
|
||||
#ifdef EIGEN_ALLOCA
|
||||
// We always manually re-align the result of EIGEN_ALLOCA.
|
||||
// If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment.
|
||||
#define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast<void*>((reinterpret_cast<size_t>(EIGEN_ALLOCA(SIZE+EIGEN_ALIGN_BYTES-1)) + EIGEN_ALIGN_BYTES-1) & ~(size_t(EIGEN_ALIGN_BYTES-1)))
|
||||
|
||||
#if EIGEN_DEFAULT_ALIGN_BYTES>0
|
||||
// We always manually re-align the result of EIGEN_ALLOCA.
|
||||
// If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment.
|
||||
#define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast<void*>((reinterpret_cast<std::size_t>(EIGEN_ALLOCA(SIZE+EIGEN_DEFAULT_ALIGN_BYTES-1)) + EIGEN_DEFAULT_ALIGN_BYTES-1) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1)))
|
||||
#else
|
||||
#define EIGEN_ALIGNED_ALLOCA(SIZE) EIGEN_ALLOCA(SIZE)
|
||||
#endif
|
||||
|
||||
#define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \
|
||||
Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
|
||||
@@ -713,7 +730,7 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
|
||||
*** Implementation of EIGEN_MAKE_ALIGNED_OPERATOR_NEW [_IF] ***
|
||||
*****************************************************************************/
|
||||
|
||||
#if EIGEN_ALIGN
|
||||
#if EIGEN_MAX_ALIGN_BYTES!=0
|
||||
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
|
||||
void* operator new(size_t size, const std::nothrow_t&) throw() { \
|
||||
EIGEN_TRY { return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); } \
|
||||
@@ -749,7 +766,7 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
|
||||
|
||||
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(true)
|
||||
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \
|
||||
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(((Size)!=Eigen::Dynamic) && ((sizeof(Scalar)*(Size))%EIGEN_ALIGN_BYTES==0)))
|
||||
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(((Size)!=Eigen::Dynamic) && ((sizeof(Scalar)*(Size))%EIGEN_MAX_ALIGN_BYTES==0)))
|
||||
|
||||
/****************************************************************************/
|
||||
|
||||
|
||||
@@ -117,6 +117,10 @@ template<typename T> struct enable_if<true,T>
|
||||
{ typedef T type; };
|
||||
|
||||
#if defined(__CUDA_ARCH__)
|
||||
#if !defined(__FLT_EPSILON__)
|
||||
#define __FLT_EPSILON__ FLT_EPSILON
|
||||
#define __DBL_EPSILON__ DBL_EPSILON
|
||||
#endif
|
||||
|
||||
namespace device {
|
||||
|
||||
@@ -124,16 +128,53 @@ template<typename T> struct numeric_limits
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static T epsilon() { return 0; }
|
||||
static T (max)() { assert(false && "Highest not supported for this type"); }
|
||||
static T (min)() { assert(false && "Lowest not supported for this type"); }
|
||||
};
|
||||
template<> struct numeric_limits<float>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static float epsilon() { return __FLT_EPSILON__; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
static float (max)() { return CUDART_MAX_NORMAL_F; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
static float (min)() { return __FLT_EPSILON__; }
|
||||
};
|
||||
template<> struct numeric_limits<double>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static double epsilon() { return __DBL_EPSILON__; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
static double (max)() { return CUDART_INF; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
static double (min)() { return __DBL_EPSILON__; }
|
||||
};
|
||||
template<> struct numeric_limits<int>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static int epsilon() { return 0; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
static int (max)() { return INT_MAX; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
static int (min)() { return INT_MIN; }
|
||||
};
|
||||
template<> struct numeric_limits<long>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static long epsilon() { return 0; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
static long (max)() { return LONG_MAX; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
static long (min)() { return LONG_MIN; }
|
||||
};
|
||||
template<> struct numeric_limits<long long>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static long long epsilon() { return 0; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
static long long (max)() { return LLONG_MAX; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
static long long (min)() { return LLONG_MIN; }
|
||||
};
|
||||
|
||||
}
|
||||
@@ -145,11 +186,11 @@ template<> struct numeric_limits<double>
|
||||
*/
|
||||
class noncopyable
|
||||
{
|
||||
noncopyable(const noncopyable&);
|
||||
const noncopyable& operator=(const noncopyable&);
|
||||
EIGEN_DEVICE_FUNC noncopyable(const noncopyable&);
|
||||
EIGEN_DEVICE_FUNC const noncopyable& operator=(const noncopyable&);
|
||||
protected:
|
||||
noncopyable() {}
|
||||
~noncopyable() {}
|
||||
EIGEN_DEVICE_FUNC noncopyable() {}
|
||||
EIGEN_DEVICE_FUNC ~noncopyable() {}
|
||||
};
|
||||
|
||||
|
||||
@@ -160,7 +201,13 @@ protected:
|
||||
* upcoming next STL generation (using a templated result member).
|
||||
* If none of these members is provided, then the type of the first argument is returned. FIXME, that behavior is a pretty bad hack.
|
||||
*/
|
||||
template<typename T> struct result_of {};
|
||||
#ifdef EIGEN_HAS_STD_RESULT_OF
|
||||
template<typename T> struct result_of {
|
||||
typedef typename std::result_of<T>::type type1;
|
||||
typedef typename remove_all<type1>::type type;
|
||||
};
|
||||
#else
|
||||
template<typename T> struct result_of { };
|
||||
|
||||
struct has_none {int a[1];};
|
||||
struct has_std_result_type {int a[2];};
|
||||
@@ -178,10 +225,10 @@ struct unary_result_of_select<Func, ArgType, sizeof(has_tr1_result)> {typedef ty
|
||||
template<typename Func, typename ArgType>
|
||||
struct result_of<Func(ArgType)> {
|
||||
template<typename T>
|
||||
static has_std_result_type testFunctor(T const *, typename T::result_type const * = 0);
|
||||
static has_std_result_type testFunctor(T const *, typename T::result_type const * = 0);
|
||||
template<typename T>
|
||||
static has_tr1_result testFunctor(T const *, typename T::template result<T(ArgType)>::type const * = 0);
|
||||
static has_none testFunctor(...);
|
||||
static has_tr1_result testFunctor(T const *, typename T::template result<T(ArgType)>::type const * = 0);
|
||||
static has_none testFunctor(...);
|
||||
|
||||
// note that the following indirection is needed for gcc-3.3
|
||||
enum {FunctorType = sizeof(testFunctor(static_cast<Func*>(0)))};
|
||||
@@ -202,15 +249,16 @@ struct binary_result_of_select<Func, ArgType0, ArgType1, sizeof(has_tr1_result)>
|
||||
template<typename Func, typename ArgType0, typename ArgType1>
|
||||
struct result_of<Func(ArgType0,ArgType1)> {
|
||||
template<typename T>
|
||||
static has_std_result_type testFunctor(T const *, typename T::result_type const * = 0);
|
||||
static has_std_result_type testFunctor(T const *, typename T::result_type const * = 0);
|
||||
template<typename T>
|
||||
static has_tr1_result testFunctor(T const *, typename T::template result<T(ArgType0,ArgType1)>::type const * = 0);
|
||||
static has_none testFunctor(...);
|
||||
static has_tr1_result testFunctor(T const *, typename T::template result<T(ArgType0,ArgType1)>::type const * = 0);
|
||||
static has_none testFunctor(...);
|
||||
|
||||
// note that the following indirection is needed for gcc-3.3
|
||||
enum {FunctorType = sizeof(testFunctor(static_cast<Func*>(0)))};
|
||||
typedef typename binary_result_of_select<Func, ArgType0, ArgType1, FunctorType>::type type;
|
||||
};
|
||||
#endif
|
||||
|
||||
/** \internal In short, it computes int(sqrt(\a Y)) with \a Y an integer.
|
||||
* Usage example: \code meta_sqrt<1023>::ret \endcode
|
||||
@@ -284,6 +332,14 @@ template<typename T> EIGEN_DEVICE_FUNC void swap(T &a, T &b) { T tmp = b; b =
|
||||
template<typename T> EIGEN_STRONG_INLINE void swap(T &a, T &b) { std::swap(a,b); }
|
||||
#endif
|
||||
|
||||
// Integer division with rounding up.
|
||||
// T is assumed to be an integer type with a>=0, and b>0
|
||||
template<typename T>
|
||||
T div_ceil(const T &a, const T &b)
|
||||
{
|
||||
return (a+b-1) / b;
|
||||
}
|
||||
|
||||
} // end namespace numext
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
@@ -119,7 +119,72 @@ template<typename T> struct unpacket_traits
|
||||
{
|
||||
typedef T type;
|
||||
typedef T half;
|
||||
enum {size=1};
|
||||
enum
|
||||
{
|
||||
size = 1,
|
||||
alignment = 1
|
||||
};
|
||||
};
|
||||
|
||||
template<int Size, typename PacketType,
|
||||
bool Stop = Size==Dynamic || (Size%unpacket_traits<PacketType>::size)==0 || is_same<PacketType,typename unpacket_traits<PacketType>::half>::value>
|
||||
struct find_best_packet_helper;
|
||||
|
||||
template< int Size, typename PacketType>
|
||||
struct find_best_packet_helper<Size,PacketType,true>
|
||||
{
|
||||
typedef PacketType type;
|
||||
};
|
||||
|
||||
template<int Size, typename PacketType>
|
||||
struct find_best_packet_helper<Size,PacketType,false>
|
||||
{
|
||||
typedef typename find_best_packet_helper<Size,typename unpacket_traits<PacketType>::half>::type type;
|
||||
};
|
||||
|
||||
template<typename T, int Size>
|
||||
struct find_best_packet
|
||||
{
|
||||
typedef typename find_best_packet_helper<Size,typename packet_traits<T>::type>::type type;
|
||||
};
|
||||
|
||||
#if EIGEN_MAX_STATIC_ALIGN_BYTES>0
|
||||
template<int ArrayBytes, int AlignmentBytes,
|
||||
bool Match = bool((ArrayBytes%AlignmentBytes)==0),
|
||||
bool TryHalf = bool(AlignmentBytes>EIGEN_MIN_ALIGN_BYTES) >
|
||||
struct compute_default_alignment_helper
|
||||
{
|
||||
enum { value = 0 };
|
||||
};
|
||||
|
||||
template<int ArrayBytes, int AlignmentBytes, bool TryHalf>
|
||||
struct compute_default_alignment_helper<ArrayBytes, AlignmentBytes, true, TryHalf> // Match
|
||||
{
|
||||
enum { value = AlignmentBytes };
|
||||
};
|
||||
|
||||
template<int ArrayBytes, int AlignmentBytes>
|
||||
struct compute_default_alignment_helper<ArrayBytes, AlignmentBytes, false, true> // Try-half
|
||||
{
|
||||
// current packet too large, try with an half-packet
|
||||
enum { value = compute_default_alignment_helper<ArrayBytes, AlignmentBytes/2>::value };
|
||||
};
|
||||
#else
|
||||
// If static alignment is disabled, no need to bother.
|
||||
// This also avoids a division by zero in "bool Match = bool((ArrayBytes%AlignmentBytes)==0)"
|
||||
template<int ArrayBytes, int AlignmentBytes>
|
||||
struct compute_default_alignment_helper
|
||||
{
|
||||
enum { value = 0 };
|
||||
};
|
||||
#endif
|
||||
|
||||
template<typename T, int Size> struct compute_default_alignment {
|
||||
enum { value = compute_default_alignment_helper<Size*sizeof(T),EIGEN_MAX_STATIC_ALIGN_BYTES>::value };
|
||||
};
|
||||
|
||||
template<typename T> struct compute_default_alignment<T,Dynamic> {
|
||||
enum { value = EIGEN_MAX_ALIGN_BYTES };
|
||||
};
|
||||
|
||||
template<typename _Scalar, int _Rows, int _Cols,
|
||||
@@ -153,40 +218,6 @@ class compute_matrix_flags
|
||||
enum { ret = DirectAccessBit | LvalueBit | NestByRefBit | row_major_bit };
|
||||
};
|
||||
|
||||
template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
|
||||
class compute_matrix_evaluator_flags
|
||||
{
|
||||
enum {
|
||||
row_major_bit = Options&RowMajor ? RowMajorBit : 0,
|
||||
is_dynamic_size_storage = MaxRows==Dynamic || MaxCols==Dynamic,
|
||||
|
||||
aligned_bit =
|
||||
(
|
||||
((Options&DontAlign)==0)
|
||||
&& (
|
||||
#if EIGEN_ALIGN_STATICALLY
|
||||
((!is_dynamic_size_storage) && (((MaxCols*MaxRows*int(sizeof(Scalar))) % EIGEN_ALIGN_BYTES) == 0))
|
||||
#else
|
||||
0
|
||||
#endif
|
||||
|
||||
||
|
||||
|
||||
#if EIGEN_ALIGN
|
||||
is_dynamic_size_storage
|
||||
#else
|
||||
0
|
||||
#endif
|
||||
|
||||
)
|
||||
) ? AlignedBit : 0,
|
||||
packet_access_bit = packet_traits<Scalar>::Vectorizable && aligned_bit ? PacketAccessBit : 0
|
||||
};
|
||||
|
||||
public:
|
||||
enum { ret = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit | aligned_bit };
|
||||
};
|
||||
|
||||
template<int _Rows, int _Cols> struct size_at_compile_time
|
||||
{
|
||||
enum { ret = (_Rows==Dynamic || _Cols==Dynamic) ? Dynamic : _Rows * _Cols };
|
||||
@@ -309,9 +340,6 @@ template<typename T> struct plain_matrix_type_row_major
|
||||
> type;
|
||||
};
|
||||
|
||||
// we should be able to get rid of this one too
|
||||
template<typename T> struct must_nest_by_value { enum { ret = false }; };
|
||||
|
||||
/** \internal The reference selector for template expressions. The idea is that we don't
|
||||
* need to use references for expressions since they are light weight proxy
|
||||
* objects which should generate no copying overhead. */
|
||||
@@ -323,6 +351,12 @@ struct ref_selector
|
||||
T const&,
|
||||
const T
|
||||
>::type type;
|
||||
|
||||
typedef typename conditional<
|
||||
bool(traits<T>::Flags & NestByRefBit),
|
||||
T &,
|
||||
T
|
||||
>::type non_const_type;
|
||||
};
|
||||
|
||||
/** \internal Adds the const qualifier on the value-type of T2 if and only if T1 is a const type */
|
||||
@@ -337,13 +371,6 @@ struct transfer_constness
|
||||
};
|
||||
|
||||
|
||||
// When using evaluators, we never evaluate when assembling the expression!!
|
||||
// TODO: get rid of this nested class since it's just an alias for ref_selector.
|
||||
template<typename T, int n=1, typename PlainObject = void> struct nested
|
||||
{
|
||||
typedef typename ref_selector<T>::type type;
|
||||
};
|
||||
|
||||
// However, we still need a mechanism to detect whether an expression which is evaluated multiple time
|
||||
// has to be evaluated into a temporary.
|
||||
// That's the purpose of this new nested_eval helper:
|
||||
@@ -428,7 +455,9 @@ struct special_scalar_op_base : public DenseCoeffsBase<Derived>
|
||||
{
|
||||
// dummy operator* so that the
|
||||
// "using special_scalar_op_base::operator*" compiles
|
||||
void operator*() const;
|
||||
struct dummy {};
|
||||
void operator*(dummy) const;
|
||||
void operator/(dummy) const;
|
||||
};
|
||||
|
||||
template<typename Derived,typename Scalar,typename OtherScalar>
|
||||
@@ -452,6 +481,16 @@ struct special_scalar_op_base<Derived,Scalar,OtherScalar,true> : public DenseCo
|
||||
#endif
|
||||
return static_cast<const special_scalar_op_base&>(matrix).operator*(scalar);
|
||||
}
|
||||
|
||||
const CwiseUnaryOp<scalar_quotient2_op<Scalar,OtherScalar>, Derived>
|
||||
operator/(const OtherScalar& scalar) const
|
||||
{
|
||||
#ifdef EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN
|
||||
EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN
|
||||
#endif
|
||||
return CwiseUnaryOp<scalar_quotient2_op<Scalar,OtherScalar>, Derived>
|
||||
(*static_cast<const Derived*>(this), scalar_quotient2_op<Scalar,OtherScalar>(scalar));
|
||||
}
|
||||
};
|
||||
|
||||
template<typename XprType, typename CastType> struct cast_return_type
|
||||
@@ -603,6 +642,18 @@ template<typename T, int S> struct is_diagonal<DiagonalMatrix<T,S> >
|
||||
template<typename S1, typename S2> struct glue_shapes;
|
||||
template<> struct glue_shapes<DenseShape,TriangularShape> { typedef TriangularShape type; };
|
||||
|
||||
template<typename T1, typename T2>
|
||||
bool is_same_dense(const T1 &mat1, const T2 &mat2, typename enable_if<has_direct_access<T1>::ret&&has_direct_access<T2>::ret, T1>::type * = 0)
|
||||
{
|
||||
return (mat1.data()==mat2.data()) && (mat1.innerStride()==mat2.innerStride()) && (mat1.outerStride()==mat2.outerStride());
|
||||
}
|
||||
|
||||
template<typename T1, typename T2>
|
||||
bool is_same_dense(const T1 &, const T2 &, typename enable_if<!(has_direct_access<T1>::ret&&has_direct_access<T2>::ret), T1>::type * = 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
// we require Lhs and Rhs to have the same scalar type. Currently there is no example of a binary functor
|
||||
|
||||
@@ -234,6 +234,12 @@ template<typename _MatrixType> class ComplexEigenSolver
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
static void check_template_parameters()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
|
||||
}
|
||||
|
||||
EigenvectorType m_eivec;
|
||||
EigenvalueType m_eivalues;
|
||||
ComplexSchur<MatrixType> m_schur;
|
||||
@@ -251,6 +257,8 @@ template<typename MatrixType>
|
||||
ComplexEigenSolver<MatrixType>&
|
||||
ComplexEigenSolver<MatrixType>::compute(const MatrixType& matrix, bool computeEigenvectors)
|
||||
{
|
||||
check_template_parameters();
|
||||
|
||||
// this code is inspired from Jampack
|
||||
eigen_assert(matrix.cols() == matrix.rows());
|
||||
|
||||
|
||||
@@ -299,6 +299,13 @@ template<typename _MatrixType> class EigenSolver
|
||||
void doComputeEigenvectors();
|
||||
|
||||
protected:
|
||||
|
||||
static void check_template_parameters()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
|
||||
EIGEN_STATIC_ASSERT(!NumTraits<Scalar>::IsComplex, NUMERIC_TYPE_MUST_BE_REAL);
|
||||
}
|
||||
|
||||
MatrixType m_eivec;
|
||||
EigenvalueType m_eivalues;
|
||||
bool m_isInitialized;
|
||||
@@ -366,6 +373,8 @@ template<typename MatrixType>
|
||||
EigenSolver<MatrixType>&
|
||||
EigenSolver<MatrixType>::compute(const MatrixType& matrix, bool computeEigenvectors)
|
||||
{
|
||||
check_template_parameters();
|
||||
|
||||
using std::sqrt;
|
||||
using std::abs;
|
||||
using numext::isfinite;
|
||||
@@ -390,7 +399,7 @@ EigenSolver<MatrixType>::compute(const MatrixType& matrix, bool computeEigenvect
|
||||
if (i == matrix.cols() - 1 || m_matT.coeff(i+1, i) == Scalar(0))
|
||||
{
|
||||
m_eivalues.coeffRef(i) = m_matT.coeff(i, i);
|
||||
if(!isfinite(m_eivalues.coeffRef(i)))
|
||||
if(!(isfinite)(m_eivalues.coeffRef(i)))
|
||||
{
|
||||
m_isInitialized = true;
|
||||
m_eigenvectorsOk = false;
|
||||
@@ -408,7 +417,7 @@ EigenSolver<MatrixType>::compute(const MatrixType& matrix, bool computeEigenvect
|
||||
{
|
||||
Scalar t0 = m_matT.coeff(i+1, i);
|
||||
Scalar t1 = m_matT.coeff(i, i+1);
|
||||
Scalar maxval = numext::maxi(abs(p),numext::maxi(abs(t0),abs(t1)));
|
||||
Scalar maxval = numext::maxi<Scalar>(abs(p),numext::maxi<Scalar>(abs(t0),abs(t1)));
|
||||
t0 /= maxval;
|
||||
t1 /= maxval;
|
||||
Scalar p0 = p/maxval;
|
||||
@@ -417,7 +426,7 @@ EigenSolver<MatrixType>::compute(const MatrixType& matrix, bool computeEigenvect
|
||||
|
||||
m_eivalues.coeffRef(i) = ComplexScalar(m_matT.coeff(i+1, i+1) + p, z);
|
||||
m_eivalues.coeffRef(i+1) = ComplexScalar(m_matT.coeff(i+1, i+1) + p, -z);
|
||||
if(!(isfinite(m_eivalues.coeffRef(i)) && isfinite(m_eivalues.coeffRef(i+1))))
|
||||
if(!((isfinite)(m_eivalues.coeffRef(i)) && (isfinite)(m_eivalues.coeffRef(i+1))))
|
||||
{
|
||||
m_isInitialized = true;
|
||||
m_eigenvectorsOk = false;
|
||||
@@ -475,7 +484,7 @@ void EigenSolver<MatrixType>::doComputeEigenvectors()
|
||||
}
|
||||
|
||||
// Backsubstitute to find vectors of upper triangular form
|
||||
if (norm == 0.0)
|
||||
if (norm == Scalar(0))
|
||||
{
|
||||
return;
|
||||
}
|
||||
@@ -497,7 +506,7 @@ void EigenSolver<MatrixType>::doComputeEigenvectors()
|
||||
Scalar w = m_matT.coeff(i,i) - p;
|
||||
Scalar r = m_matT.row(i).segment(l,n-l+1).dot(m_matT.col(n).segment(l, n-l+1));
|
||||
|
||||
if (m_eivalues.coeff(i).imag() < 0.0)
|
||||
if (m_eivalues.coeff(i).imag() < Scalar(0))
|
||||
{
|
||||
lastw = w;
|
||||
lastr = r;
|
||||
@@ -505,9 +514,9 @@ void EigenSolver<MatrixType>::doComputeEigenvectors()
|
||||
else
|
||||
{
|
||||
l = i;
|
||||
if (m_eivalues.coeff(i).imag() == 0.0)
|
||||
if (m_eivalues.coeff(i).imag() == Scalar(0))
|
||||
{
|
||||
if (w != 0.0)
|
||||
if (w != Scalar(0))
|
||||
m_matT.coeffRef(i,n) = -r / w;
|
||||
else
|
||||
m_matT.coeffRef(i,n) = -r / (eps * norm);
|
||||
@@ -545,19 +554,19 @@ void EigenSolver<MatrixType>::doComputeEigenvectors()
|
||||
}
|
||||
else
|
||||
{
|
||||
std::complex<Scalar> cc = cdiv<Scalar>(0.0,-m_matT.coeff(n-1,n),m_matT.coeff(n-1,n-1)-p,q);
|
||||
std::complex<Scalar> cc = cdiv<Scalar>(Scalar(0),-m_matT.coeff(n-1,n),m_matT.coeff(n-1,n-1)-p,q);
|
||||
m_matT.coeffRef(n-1,n-1) = numext::real(cc);
|
||||
m_matT.coeffRef(n-1,n) = numext::imag(cc);
|
||||
}
|
||||
m_matT.coeffRef(n,n-1) = 0.0;
|
||||
m_matT.coeffRef(n,n) = 1.0;
|
||||
m_matT.coeffRef(n,n-1) = Scalar(0);
|
||||
m_matT.coeffRef(n,n) = Scalar(1);
|
||||
for (Index i = n-2; i >= 0; i--)
|
||||
{
|
||||
Scalar ra = m_matT.row(i).segment(l, n-l+1).dot(m_matT.col(n-1).segment(l, n-l+1));
|
||||
Scalar sa = m_matT.row(i).segment(l, n-l+1).dot(m_matT.col(n).segment(l, n-l+1));
|
||||
Scalar w = m_matT.coeff(i,i) - p;
|
||||
|
||||
if (m_eivalues.coeff(i).imag() < 0.0)
|
||||
if (m_eivalues.coeff(i).imag() < Scalar(0))
|
||||
{
|
||||
lastw = w;
|
||||
lastra = ra;
|
||||
@@ -579,7 +588,7 @@ void EigenSolver<MatrixType>::doComputeEigenvectors()
|
||||
Scalar y = m_matT.coeff(i+1,i);
|
||||
Scalar vr = (m_eivalues.coeff(i).real() - p) * (m_eivalues.coeff(i).real() - p) + m_eivalues.coeff(i).imag() * m_eivalues.coeff(i).imag() - q * q;
|
||||
Scalar vi = (m_eivalues.coeff(i).real() - p) * Scalar(2) * q;
|
||||
if ((vr == 0.0) && (vi == 0.0))
|
||||
if ((vr == Scalar(0)) && (vi == Scalar(0)))
|
||||
vr = eps * norm * (abs(w) + abs(q) + abs(x) + abs(y) + abs(lastw));
|
||||
|
||||
std::complex<Scalar> cc = cdiv(x*lastra-lastw*ra+q*sa,x*lastsa-lastw*sa-q*ra,vr,vi);
|
||||
@@ -599,7 +608,7 @@ void EigenSolver<MatrixType>::doComputeEigenvectors()
|
||||
}
|
||||
|
||||
// Overflow control
|
||||
Scalar t = numext::maxi(abs(m_matT.coeff(i,n-1)),abs(m_matT.coeff(i,n)));
|
||||
Scalar t = numext::maxi<Scalar>(abs(m_matT.coeff(i,n-1)),abs(m_matT.coeff(i,n)));
|
||||
if ((eps * t) * t > Scalar(1))
|
||||
m_matT.block(i, n-1, size-i, 2) /= t;
|
||||
|
||||
|
||||
@@ -263,6 +263,13 @@ template<typename _MatrixType> class GeneralizedEigenSolver
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
static void check_template_parameters()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
|
||||
EIGEN_STATIC_ASSERT(!NumTraits<Scalar>::IsComplex, NUMERIC_TYPE_MUST_BE_REAL);
|
||||
}
|
||||
|
||||
MatrixType m_eivec;
|
||||
ComplexVectorType m_alphas;
|
||||
VectorType m_betas;
|
||||
@@ -290,6 +297,8 @@ template<typename MatrixType>
|
||||
GeneralizedEigenSolver<MatrixType>&
|
||||
GeneralizedEigenSolver<MatrixType>::compute(const MatrixType& A, const MatrixType& B, bool computeEigenvectors)
|
||||
{
|
||||
check_template_parameters();
|
||||
|
||||
using std::sqrt;
|
||||
using std::abs;
|
||||
eigen_assert(A.cols() == A.rows() && B.cols() == A.rows() && B.cols() == B.rows());
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user