mirror of
https://gitlab.com/libeigen/eigen.git
synced 2026-04-10 11:34:33 +08:00
Compare commits
601 Commits
3.2.0
...
before-eva
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9452eb38f8 | ||
|
|
e6d55c081b | ||
|
|
57f71a5552 | ||
|
|
5e890d3ad7 | ||
|
|
2d90484450 | ||
|
|
84a7ead059 | ||
|
|
d6236d3b26 | ||
|
|
921a645481 | ||
|
|
4b678b96eb | ||
|
|
51b3f558bb | ||
|
|
6162672dc5 | ||
|
|
e54898f53e | ||
|
|
fafc829424 | ||
|
|
abb33258ce | ||
|
|
25bceefb4e | ||
|
|
60314beb38 | ||
|
|
280661e67d | ||
|
|
ff9bfc45f7 | ||
|
|
42e27d41a2 | ||
|
|
a44a343f03 | ||
|
|
18fbe7e7d4 | ||
|
|
3eb5253ca1 | ||
|
|
305aa1f9c5 | ||
|
|
eb39296028 | ||
|
|
b121eecf60 | ||
|
|
0369db12af | ||
|
|
01f3ca3e8d | ||
|
|
aec3d90ca6 | ||
|
|
460662cbcc | ||
|
|
1ed9e2d004 | ||
|
|
e49e84d979 | ||
|
|
c3e4080474 | ||
|
|
0ba490cf80 | ||
|
|
25a3e65a68 | ||
|
|
be3477e206 | ||
|
|
2e50289ba3 | ||
|
|
b49ef99617 | ||
|
|
9c0aa81fbf | ||
|
|
eeadc06e83 | ||
|
|
4403800e11 | ||
|
|
9062f74d13 | ||
|
|
6a3423da80 | ||
|
|
50085d2c28 | ||
|
|
e51da9c3a8 | ||
|
|
953ec08089 | ||
|
|
3e59163a24 | ||
|
|
c53f88297c | ||
|
|
6f58a41097 | ||
|
|
db76193bc7 | ||
|
|
d79516660c | ||
|
|
ba694ce8cf | ||
|
|
5f3d542b8a | ||
|
|
a0a87410d0 | ||
|
|
2c625ec9ba | ||
|
|
d1e9f39a9a | ||
|
|
7f15f27a9e | ||
|
|
922694a2d1 | ||
|
|
a8283e0ed2 | ||
|
|
529e6cb552 | ||
|
|
58687aa5e6 | ||
|
|
339f14b8d1 | ||
|
|
d4cc1bdc7f | ||
|
|
8e19027130 | ||
|
|
ef4a86d6b8 | ||
|
|
68eafc10b1 | ||
|
|
1cb71a8782 | ||
|
|
ac1bb3e5b3 | ||
|
|
da62eb22e4 | ||
|
|
77af4cc3c9 | ||
|
|
5e72151ca5 | ||
|
|
2cd38a6634 | ||
|
|
84ad8ce7e3 | ||
|
|
40b74411e4 | ||
|
|
14c8793a70 | ||
|
|
424c3ad266 | ||
|
|
a53f2b0e43 | ||
|
|
cd0b433540 | ||
|
|
338d2ec42b | ||
|
|
0a945687b7 | ||
|
|
a0d1aac6c5 | ||
|
|
4f440b8123 | ||
|
|
a20e2462bf | ||
|
|
c0f76ce2cf | ||
|
|
df604e4f49 | ||
|
|
5f1eedd655 | ||
|
|
b1169ce40c | ||
|
|
d1460d9278 | ||
|
|
cf7cf7b490 | ||
|
|
f27f55bee3 | ||
|
|
e955725ff1 | ||
|
|
23bb592a2d | ||
|
|
75d19bb087 | ||
|
|
62f948c56a | ||
|
|
da1e356306 | ||
|
|
54fbbe7b4e | ||
|
|
1967e7f2f3 | ||
|
|
77d57cd681 | ||
|
|
e3557e8dd2 | ||
|
|
5214466b7a | ||
|
|
5c4733f6e4 | ||
|
|
b47ef1431f | ||
|
|
e25e674852 | ||
|
|
4b6b76463a | ||
|
|
904509fbb6 | ||
|
|
0dfb73d46a | ||
|
|
7fa83e7374 | ||
|
|
1eefa5a841 | ||
|
|
e4b6979334 | ||
|
|
b9ee880f07 | ||
|
|
2bf58316ee | ||
|
|
85777fc131 | ||
|
|
4860da2de1 | ||
|
|
7a915f6846 | ||
|
|
1a817d3b70 | ||
|
|
8ee38d2db6 | ||
|
|
f365380496 | ||
|
|
3a9f9faada | ||
|
|
998455a570 | ||
|
|
0a8e4712d1 | ||
|
|
bf334b8ae5 | ||
|
|
324e7e8fc9 | ||
|
|
75e574275c | ||
|
|
08c615f1e4 | ||
|
|
d73ee84d37 | ||
|
|
f9496d341f | ||
|
|
e745a450de | ||
|
|
e86adc87e9 | ||
|
|
b59f045c07 | ||
|
|
755be9016a | ||
|
|
957c2c291b | ||
|
|
15c2c083e8 | ||
|
|
3117036b80 | ||
|
|
1c3843bf86 | ||
|
|
0ddde223e9 | ||
|
|
5dbbe6b400 | ||
|
|
963d338922 | ||
|
|
98ef44fe55 | ||
|
|
1fdef63d1f | ||
|
|
de150b1e14 | ||
|
|
55453c51e8 | ||
|
|
eb49100de9 | ||
|
|
afb1a8c124 | ||
|
|
c06ec0f464 | ||
|
|
95ecd582a3 | ||
|
|
b0979b8598 | ||
|
|
1ee4e2db15 | ||
|
|
abc1ca0af1 | ||
|
|
c331ce6b8e | ||
|
|
ed37c44765 | ||
|
|
96cb58fa3b | ||
|
|
ea99433523 | ||
|
|
cee62018fc | ||
|
|
58cfac9a12 | ||
|
|
45515779d3 | ||
|
|
0f1e321dd4 | ||
|
|
789674809f | ||
|
|
eb56461ac2 | ||
|
|
e3ab46b8c9 | ||
|
|
c794099e69 | ||
|
|
aa524604b7 | ||
|
|
c55c5763fe | ||
|
|
eda79321be | ||
|
|
df92649379 | ||
|
|
0f94607947 | ||
|
|
edebb15275 | ||
|
|
9aa3dc4e21 | ||
|
|
881aab14b4 | ||
|
|
5811e68dd1 | ||
|
|
9217de8bf2 | ||
|
|
84cb1d72b8 | ||
|
|
56de8d3816 | ||
|
|
b4beba72a2 | ||
|
|
b5e3d76aa5 | ||
|
|
0b7f95a03f | ||
|
|
d67aa1549b | ||
|
|
07986189b7 | ||
|
|
2fb64578aa | ||
|
|
ecf1f1d589 | ||
|
|
e7ef26fa44 | ||
|
|
450d0c3de0 | ||
|
|
f9d2f3903e | ||
|
|
6f64b0b487 | ||
|
|
c20e3641de | ||
|
|
2dbfd83424 | ||
|
|
7388fdf560 | ||
|
|
c9788d55b9 | ||
|
|
ae4d9434e2 | ||
|
|
4def7b1fa5 | ||
|
|
c79bd4b64b | ||
|
|
cf7eaed38d | ||
|
|
3d8d0f6269 | ||
|
|
b0e19db1cf | ||
|
|
8d85ce88e1 | ||
|
|
4eb92e5647 | ||
|
|
ccb4dec719 | ||
|
|
82b09fcb91 | ||
|
|
ecbd67a15a | ||
|
|
934ce93886 | ||
|
|
5c5231ab71 | ||
|
|
2606abed53 | ||
|
|
a7d20038df | ||
|
|
3454b4e5f1 | ||
|
|
94684721bd | ||
|
|
1388f4f9fd | ||
|
|
6d665d446b | ||
|
|
2c3c95990d | ||
|
|
6d6df90c9a | ||
|
|
c354bd47f7 | ||
|
|
9777a5ca60 | ||
|
|
9746396d1b | ||
|
|
e5d0cb54a5 | ||
|
|
1dd015fea6 | ||
|
|
45a4aad572 | ||
|
|
e1d461352e | ||
|
|
11fbdcbc38 | ||
|
|
0fa8290366 | ||
|
|
d936ddc3d1 | ||
|
|
de8336a9bc | ||
|
|
d5a795f673 | ||
|
|
e0dbb68c2f | ||
|
|
20c840be15 | ||
|
|
1afd50e0f3 | ||
|
|
3c66bb136b | ||
|
|
7098e6d976 | ||
|
|
feaf7c7e6d | ||
|
|
d567e3b893 | ||
|
|
148acf8e4f | ||
|
|
0587db8bf5 | ||
|
|
7903d3f27b | ||
|
|
a803ff18a9 | ||
|
|
91288e9bf9 | ||
|
|
fbd5eac7cf | ||
|
|
1b333c89c9 | ||
|
|
a1fcf599fa | ||
|
|
a91a7a1964 | ||
|
|
3b2321e3ab | ||
|
|
b446ff037e | ||
|
|
5afcb4965c | ||
|
|
096af59799 | ||
|
|
8044b00a7f | ||
|
|
aecc78325a | ||
|
|
1cb8de1250 | ||
|
|
56c4851323 | ||
|
|
ceae5b4145 | ||
|
|
ec65e6648c | ||
|
|
d992634fbc | ||
|
|
5e8622477b | ||
|
|
1221dd90aa | ||
|
|
93870d95b7 | ||
|
|
f603823ef3 | ||
|
|
8d0441052e | ||
|
|
82c8163067 | ||
|
|
1c0728043a | ||
|
|
e497a27ddc | ||
|
|
ad59ade116 | ||
|
|
39bfbd43f0 | ||
|
|
10aa14592a | ||
|
|
8d2bb2c20d | ||
|
|
c94fde118a | ||
|
|
51e85c936d | ||
|
|
8a94cb3edd | ||
|
|
7f3162f707 | ||
|
|
ee86679096 | ||
|
|
58fe2fc2b2 | ||
|
|
729363114f | ||
|
|
1697d7a179 | ||
|
|
3e1fe8e416 | ||
|
|
b776458ccb | ||
|
|
c4902a3d01 | ||
|
|
7d73c7f18b | ||
|
|
6f123d209e | ||
|
|
052aedd394 | ||
|
|
fb03b56647 | ||
|
|
6a81594771 | ||
|
|
9ce0d78513 | ||
|
|
a419cea4a0 | ||
|
|
ba3457cab2 | ||
|
|
14bc4b9704 | ||
|
|
e45a6bed45 | ||
|
|
cc73164aa8 | ||
|
|
f0a4c9d5ab | ||
|
|
8be011e776 | ||
|
|
a078f442a3 | ||
|
|
cf1a7bfbe1 | ||
|
|
bc401eb6fa | ||
|
|
b286a1e75c | ||
|
|
6bf3cc2732 | ||
|
|
7ae9b0805d | ||
|
|
08f7b3221d | ||
|
|
72707a8664 | ||
|
|
8a0845ebd7 | ||
|
|
3e42b775ea | ||
|
|
dead9085c0 | ||
|
|
4fe56a0e02 | ||
|
|
35a2c9cde7 | ||
|
|
bb4b67cf39 | ||
|
|
2db792852f | ||
|
|
847d801a4c | ||
|
|
2379ccffcb | ||
|
|
88aa18df64 | ||
|
|
bbc0ada12a | ||
|
|
1b3d7fc04c | ||
|
|
804ef2350d | ||
|
|
a6b130c63c | ||
|
|
8eac97138a | ||
|
|
1dd176b0b0 | ||
|
|
131027ee0a | ||
|
|
db7d49efbb | ||
|
|
9fdc6258cf | ||
|
|
21fecd5252 | ||
|
|
6fecb6f1b6 | ||
|
|
3e439889e0 | ||
|
|
5960befc20 | ||
|
|
cbd7e98174 | ||
|
|
7ed9441ea4 | ||
|
|
37a1d736bf | ||
|
|
b14a4628af | ||
|
|
3573a10712 | ||
|
|
bd6eca059d | ||
|
|
ed461ba9bc | ||
|
|
3bb57e21a8 | ||
|
|
4b6b3f310f | ||
|
|
cd606bbc94 | ||
|
|
0508af4287 | ||
|
|
3283d98d13 | ||
|
|
0d3f496233 | ||
|
|
6df3bee687 | ||
|
|
97965dde9b | ||
|
|
0b1430ae10 | ||
|
|
c0e08e9e4b | ||
|
|
0715d49908 | ||
|
|
3291580630 | ||
|
|
14422decc2 | ||
|
|
7ea6ef8969 | ||
|
|
31c63ef0b4 | ||
|
|
e170e7070b | ||
|
|
c1921ad3e2 | ||
|
|
c4f08cfc05 | ||
|
|
9e71ecbeec | ||
|
|
ff8d81762d | ||
|
|
6c527bd811 | ||
|
|
e722f36ffa | ||
|
|
febfc7b9b4 | ||
|
|
64a85800bd | ||
|
|
a7621809fe | ||
|
|
6cf938df53 | ||
|
|
afcfb560a2 | ||
|
|
40c42d9788 | ||
|
|
deab937d45 | ||
|
|
66f1c56aab | ||
|
|
aa0db35185 | ||
|
|
4c08385b74 | ||
|
|
4ccff2d028 | ||
|
|
3a4616d6e3 | ||
|
|
92190a1caf | ||
|
|
ac409f51f1 | ||
|
|
a6a57748dd | ||
|
|
c8c81c1e74 | ||
|
|
60cd361ebe | ||
|
|
c8bfbf4a7e | ||
|
|
01fd880424 | ||
|
|
e3fb190edf | ||
|
|
cfd3d6ce9c | ||
|
|
c39a3fa7a1 | ||
|
|
2a564695f0 | ||
|
|
a58325ac2f | ||
|
|
4cd4be97a7 | ||
|
|
548216b7ca | ||
|
|
e15cb9f4f8 | ||
|
|
bbf373bbe9 | ||
|
|
1200bd2ef0 | ||
|
|
8a49dd5626 | ||
|
|
ce99b502ce | ||
|
|
033ee7f6d9 | ||
|
|
ba2f79e680 | ||
|
|
72461be962 | ||
|
|
33ca9b4ee6 | ||
|
|
ce41b72eb8 | ||
|
|
d5cc083782 | ||
|
|
7313f32efa | ||
|
|
04e1e38eed | ||
|
|
47679c50ae | ||
|
|
76d2ca27e5 | ||
|
|
41e89c73c7 | ||
|
|
ac69d8769f | ||
|
|
6b6071866b | ||
|
|
276801b25a | ||
|
|
d61345f366 | ||
|
|
49034d1570 | ||
|
|
c550a0e634 | ||
|
|
28b2abdbea | ||
|
|
654eab3bd6 | ||
|
|
5d1291a4de | ||
|
|
8b4dd78d57 | ||
|
|
f6bac196d5 | ||
|
|
46dd1bb1be | ||
|
|
6b471f205e | ||
|
|
03a956925a | ||
|
|
f97b3cd024 | ||
|
|
5e28c41549 | ||
|
|
e59b38abef | ||
|
|
8f2d068e84 | ||
|
|
5c2d1b4710 | ||
|
|
b93520b1a5 | ||
|
|
cb8da751a0 | ||
|
|
fe0b44e876 | ||
|
|
ae83f5ede9 | ||
|
|
76c230a84d | ||
|
|
57327cc2d5 | ||
|
|
5887e82729 | ||
|
|
af9851d1d7 | ||
|
|
8fe609311d | ||
|
|
8edc964734 | ||
|
|
a37bdfc955 | ||
|
|
03de5c2410 | ||
|
|
4f572e4c14 | ||
|
|
87aee5fda1 | ||
|
|
1bb1a57ef7 | ||
|
|
7c9cdd6030 | ||
|
|
a236e15048 | ||
|
|
ad1dc50b57 | ||
|
|
019dcfc21d | ||
|
|
19521c83b8 | ||
|
|
bbd49d194a | ||
|
|
8f496cd3a3 | ||
|
|
6dc0e59b1e | ||
|
|
2702788da7 | ||
|
|
58c0a6f0fd | ||
|
|
5974685866 | ||
|
|
7fae9b358d | ||
|
|
e14f529dac | ||
|
|
fe2f437642 | ||
|
|
90b5d303db | ||
|
|
9b863c1830 | ||
|
|
11fbbc51fa | ||
|
|
d3e84b747a | ||
|
|
2e606394b1 | ||
|
|
285112fc55 | ||
|
|
9f3f42d66a | ||
|
|
a0e8577b49 | ||
|
|
a6bff116f9 | ||
|
|
36052c4911 | ||
|
|
3d2a3bc755 | ||
|
|
ad9dc05663 | ||
|
|
ff075def5c | ||
|
|
4d7dfafbe7 | ||
|
|
3390db099a | ||
|
|
c6da881849 | ||
|
|
b61facb08b | ||
|
|
4a42843513 | ||
|
|
b433fb2857 | ||
|
|
2c0303c89e | ||
|
|
0bce534c8f | ||
|
|
6bef527f9d | ||
|
|
2d186da58a | ||
|
|
3736e00ae7 | ||
|
|
40f1548b32 | ||
|
|
446320b226 | ||
|
|
54e576c88a | ||
|
|
fe19f972e1 | ||
|
|
00dc45d0f9 | ||
|
|
4bb1c48f25 | ||
|
|
bd21c82a94 | ||
|
|
1b4623e713 | ||
|
|
a1f056cf2a | ||
|
|
4612a1cd87 | ||
|
|
07417bd03f | ||
|
|
7fa007e8bf | ||
|
|
ed78a76161 | ||
|
|
eda2f8948a | ||
|
|
16cbd3d72d | ||
|
|
86daf2f75c | ||
|
|
69c057ccb1 | ||
|
|
94a7a1ec00 | ||
|
|
5864e3fbd5 | ||
|
|
d2c4f4ab21 | ||
|
|
41321e4366 | ||
|
|
e6462c2ce3 | ||
|
|
1472f4bc61 | ||
|
|
a147500dee | ||
|
|
d1c48f1606 | ||
|
|
1b8394f71f | ||
|
|
4ecfdc4716 | ||
|
|
d9381598bc | ||
|
|
403be74861 | ||
|
|
1c61e28b32 | ||
|
|
c06e373beb | ||
|
|
7bca2910c7 | ||
|
|
2cf513e973 | ||
|
|
150c9fe536 | ||
|
|
e4ffb7729a | ||
|
|
d908ccc01c | ||
|
|
2b15e00106 | ||
|
|
127d7f2071 | ||
|
|
c47010e3d2 | ||
|
|
d4dd6aaed2 | ||
|
|
d3635b08da | ||
|
|
ebd6a7a46c | ||
|
|
e0dbc2913a | ||
|
|
1d89554f1b | ||
|
|
ace2ed7b87 | ||
|
|
956251b738 | ||
|
|
6f5f488a80 | ||
|
|
916d29e58f | ||
|
|
bffdc491b3 | ||
|
|
6719e56b5b | ||
|
|
c13e9bbabf | ||
|
|
e4acd6e2fd | ||
|
|
8a89ba9275 | ||
|
|
097a105603 | ||
|
|
306ce33e1c | ||
|
|
616f9cc593 | ||
|
|
2f0faf117e | ||
|
|
8710440951 | ||
|
|
8fdffdd573 | ||
|
|
3444f06f68 | ||
|
|
8f4d93a4b7 | ||
|
|
51b361b3bb | ||
|
|
7c99b38b7c | ||
|
|
b72a686830 | ||
|
|
e3058dd88b | ||
|
|
8ea7413a64 | ||
|
|
e90229a429 | ||
|
|
cf884a9815 | ||
|
|
ddf7753631 | ||
|
|
222eedf5f3 | ||
|
|
d0e543be26 | ||
|
|
8e6d0cba5f | ||
|
|
32d46dd9b8 | ||
|
|
e41bc6cbbf | ||
|
|
55b57fcba6 | ||
|
|
39491e3b75 | ||
|
|
68168e9eae | ||
|
|
6126ad801f | ||
|
|
43df1e707c | ||
|
|
b1f4601bf9 | ||
|
|
acb82c7f16 | ||
|
|
9ef3645cc7 | ||
|
|
5f11db695b | ||
|
|
2437215221 | ||
|
|
dd27b5c4a8 | ||
|
|
70131120ab | ||
|
|
6d86cd7224 | ||
|
|
75dab1ce5e | ||
|
|
e43934d60f | ||
|
|
75edc7cc8b | ||
|
|
5897695e8a | ||
|
|
a3a55357db | ||
|
|
084dc63b4c | ||
|
|
463343fb37 | ||
|
|
5879937f58 | ||
|
|
01190b3544 | ||
|
|
c00f688c64 | ||
|
|
51573da3a4 | ||
|
|
1191949e87 | ||
|
|
3d94ed9fa0 | ||
|
|
ede27f5780 | ||
|
|
dda869051d | ||
|
|
2320073e41 | ||
|
|
c587e63631 | ||
|
|
660b905e12 | ||
|
|
4f0bd557a4 | ||
|
|
736fe99fbf | ||
|
|
6fab4012a3 | ||
|
|
2f593ee67c | ||
|
|
20e535e142 | ||
|
|
4b780553e0 | ||
|
|
9be658f701 | ||
|
|
b8f0364a1c | ||
|
|
cbe92de2b5 | ||
|
|
eeb744dc8d | ||
|
|
d5501d3a90 | ||
|
|
3c423ccfe2 | ||
|
|
738d75d3eb | ||
|
|
a992fa74eb | ||
|
|
4466875d54 | ||
|
|
5c95892b83 | ||
|
|
639d03d900 | ||
|
|
d204bb57d0 | ||
|
|
c52cbd9de9 | ||
|
|
159a3bed9e | ||
|
|
25544dbec3 | ||
|
|
04bd1e3fc0 | ||
|
|
00e30a5fc4 | ||
|
|
55ec3cc6d5 | ||
|
|
9e2b4eeac0 | ||
|
|
c273a6c37c | ||
|
|
04a9ad6e10 | ||
|
|
4e26057f66 | ||
|
|
cce68d4e91 | ||
|
|
75b3391e3f | ||
|
|
231d4a6fda | ||
|
|
64054ee396 | ||
|
|
12e69ec896 | ||
|
|
8556ca3de5 | ||
|
|
9cd2d14005 | ||
|
|
4e2e615a7c | ||
|
|
12439e1249 | ||
|
|
d93c1c113b | ||
|
|
968f7591f8 | ||
|
|
5adcc6c7b4 |
3
.hgeol
3
.hgeol
@@ -1,6 +1,9 @@
|
||||
[patterns]
|
||||
*.sh = LF
|
||||
*.MINPACK = CRLF
|
||||
scripts/*.in = LF
|
||||
debug/msvc/*.dat = CRLF
|
||||
debug/msvc/*.natvis = CRLF
|
||||
unsupported/test/mpreal/*.* = CRLF
|
||||
** = native
|
||||
|
||||
|
||||
@@ -108,7 +108,8 @@ endif()
|
||||
set(EIGEN_TEST_MAX_SIZE "320" CACHE STRING "Maximal matrix/vector size, default is 320")
|
||||
|
||||
macro(ei_add_cxx_compiler_flag FLAG)
|
||||
string(REGEX REPLACE "-" "" SFLAG ${FLAG})
|
||||
string(REGEX REPLACE "-" "" SFLAG1 ${FLAG})
|
||||
string(REGEX REPLACE "\\+" "p" SFLAG ${SFLAG1})
|
||||
check_cxx_compiler_flag(${FLAG} COMPILER_SUPPORT_${SFLAG})
|
||||
if(COMPILER_SUPPORT_${SFLAG})
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAG}")
|
||||
@@ -118,7 +119,7 @@ endmacro(ei_add_cxx_compiler_flag)
|
||||
if(NOT MSVC)
|
||||
# We assume that other compilers are partly compatible with GNUCC
|
||||
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions")
|
||||
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "-g3")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "-g0 -O2")
|
||||
|
||||
@@ -142,6 +143,9 @@ if(NOT MSVC)
|
||||
ei_add_cxx_compiler_flag("-Wpointer-arith")
|
||||
ei_add_cxx_compiler_flag("-Wwrite-strings")
|
||||
ei_add_cxx_compiler_flag("-Wformat-security")
|
||||
ei_add_cxx_compiler_flag("-Wshorten-64-to-32")
|
||||
ei_add_cxx_compiler_flag("-Wenum-conversion")
|
||||
ei_add_cxx_compiler_flag("-Wc++11-extensions")
|
||||
|
||||
ei_add_cxx_compiler_flag("-Wno-psabi")
|
||||
ei_add_cxx_compiler_flag("-Wno-variadic-macros")
|
||||
@@ -153,6 +157,7 @@ if(NOT MSVC)
|
||||
ei_add_cxx_compiler_flag("-wd981") # disable ICC's "operands are evaluated in unspecified order" remark
|
||||
ei_add_cxx_compiler_flag("-wd2304") # disbale ICC's "warning #2304: non-explicit constructor with single argument may cause implicit type conversion" produced by -Wnon-virtual-dtor
|
||||
|
||||
|
||||
# The -ansi flag must be added last, otherwise it is also used as a linker flag by check_cxx_compiler_flag making it fails
|
||||
# Moreover we should not set both -strict-ansi and -ansi
|
||||
check_cxx_compiler_flag("-strict-ansi" COMPILER_SUPPORT_STRICTANSI)
|
||||
@@ -196,6 +201,18 @@ if(NOT MSVC)
|
||||
message(STATUS "Enabling SSE4.2 in tests/examples")
|
||||
endif()
|
||||
|
||||
option(EIGEN_TEST_AVX "Enable/Disable AVX in tests/examples" OFF)
|
||||
if(EIGEN_TEST_AVX)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx")
|
||||
message(STATUS "Enabling AVX in tests/examples")
|
||||
endif()
|
||||
|
||||
option(EIGEN_TEST_FMA "Enable/Disable FMA in tests/examples" OFF)
|
||||
if(EIGEN_TEST_FMA)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfma")
|
||||
message(STATUS "Enabling FMA in tests/examples")
|
||||
endif()
|
||||
|
||||
option(EIGEN_TEST_ALTIVEC "Enable/Disable AltiVec in tests/examples" OFF)
|
||||
if(EIGEN_TEST_ALTIVEC)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maltivec -mabi=altivec")
|
||||
@@ -204,7 +221,7 @@ if(NOT MSVC)
|
||||
|
||||
option(EIGEN_TEST_NEON "Enable/Disable Neon in tests/examples" OFF)
|
||||
if(EIGEN_TEST_NEON)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -mcpu=cortex-a"8)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -mcpu=cortex-a8")
|
||||
message(STATUS "Enabling NEON in tests/examples")
|
||||
endif()
|
||||
|
||||
@@ -284,6 +301,12 @@ if(EIGEN_TEST_NO_EXPLICIT_ALIGNMENT)
|
||||
message(STATUS "Disabling alignment in tests/examples")
|
||||
endif()
|
||||
|
||||
option(EIGEN_TEST_NO_EXCEPTIONS "Disables C++ exceptions" OFF)
|
||||
if(EIGEN_TEST_NO_EXCEPTIONS)
|
||||
ei_add_cxx_compiler_flag("-fno-exceptions")
|
||||
message(STATUS "Disabling exceptions in tests/examples")
|
||||
endif()
|
||||
|
||||
option(EIGEN_TEST_C++0x "Enables all C++0x features." OFF)
|
||||
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
|
||||
@@ -418,3 +441,31 @@ else()
|
||||
endif()
|
||||
|
||||
message(STATUS "")
|
||||
|
||||
set ( EIGEN_CONFIG_CMAKE_PATH
|
||||
lib${LIB_SUFFIX}/cmake/eigen3
|
||||
CACHE PATH "The directory where the CMake files are installed"
|
||||
)
|
||||
if ( NOT IS_ABSOLUTE EIGEN_CONFIG_CMAKE_PATH )
|
||||
set ( EIGEN_CONFIG_CMAKE_PATH ${CMAKE_INSTALL_PREFIX}/${EIGEN_CONFIG_CMAKE_PATH} )
|
||||
endif ()
|
||||
|
||||
set ( EIGEN_USE_FILE ${EIGEN_CONFIG_CMAKE_PATH}/UseEigen3.cmake )
|
||||
set ( EIGEN_VERSION_STRING ${EIGEN_VERSION_NUMBER} )
|
||||
set ( EIGEN_VERSION_MAJOR ${EIGEN_WORLD_VERSION} )
|
||||
set ( EIGEN_VERSION_MINOR ${EIGEN_MAJOR_VERSION} )
|
||||
set ( EIGEN_VERSION_PATCH ${EIGEN_MINOR_VERSION} )
|
||||
set ( EIGEN_DEFINITIONS "")
|
||||
set ( EIGEN_INCLUDE_DIR ${INCLUDE_INSTALL_DIR} )
|
||||
set ( EIGEN_INCLUDE_DIRS ${EIGEN_INCLUDE_DIR} )
|
||||
set ( EIGEN_ROOT_DIR ${CMAKE_INSTALL_PREFIX} )
|
||||
|
||||
configure_file ( ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Eigen3Config.cmake.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
|
||||
@ONLY ESCAPE_QUOTES
|
||||
)
|
||||
|
||||
install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/cmake/UseEigen3.cmake
|
||||
${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
|
||||
DESTINATION ${EIGEN_CONFIG_CMAKE_PATH}
|
||||
)
|
||||
|
||||
11
Eigen/Array
11
Eigen/Array
@@ -1,11 +0,0 @@
|
||||
#ifndef EIGEN_ARRAY_MODULE_H
|
||||
#define EIGEN_ARRAY_MODULE_H
|
||||
|
||||
// include Core first to handle Eigen2 support macros
|
||||
#include "Core"
|
||||
|
||||
#ifndef EIGEN2_SUPPORT
|
||||
#error The Eigen/Array header does no longer exist in Eigen3. All that functionality has moved to Eigen/Core.
|
||||
#endif
|
||||
|
||||
#endif // EIGEN_ARRAY_MODULE_H
|
||||
@@ -10,9 +10,11 @@
|
||||
*
|
||||
*
|
||||
* This module provides two variants of the Cholesky decomposition for selfadjoint (hermitian) matrices.
|
||||
* Those decompositions are accessible via the following MatrixBase methods:
|
||||
* - MatrixBase::llt(),
|
||||
* Those decompositions are also accessible via the following methods:
|
||||
* - MatrixBase::llt()
|
||||
* - MatrixBase::ldlt()
|
||||
* - SelfAdjointView::llt()
|
||||
* - SelfAdjointView::ldlt()
|
||||
*
|
||||
* \code
|
||||
* #include <Eigen/Cholesky>
|
||||
|
||||
125
Eigen/Core
125
Eigen/Core
@@ -14,6 +14,42 @@
|
||||
// first thing Eigen does: stop the compiler from committing suicide
|
||||
#include "src/Core/util/DisableStupidWarnings.h"
|
||||
|
||||
// Handle NVCC/CUDA
|
||||
#ifdef __CUDACC__
|
||||
// Do not try asserts on CUDA!
|
||||
#ifndef EIGEN_NO_DEBUG
|
||||
#define EIGEN_NO_DEBUG
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN_INTERNAL_DEBUGGING
|
||||
#undef EIGEN_INTERNAL_DEBUGGING
|
||||
#endif
|
||||
|
||||
// Do not try to vectorize on CUDA!
|
||||
#define EIGEN_DONT_VECTORIZE
|
||||
|
||||
// All functions callable from CUDA code must be qualified with __device__
|
||||
#define EIGEN_DEVICE_FUNC __host__ __device__
|
||||
|
||||
#else
|
||||
#define EIGEN_DEVICE_FUNC
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__CUDA_ARCH__)
|
||||
#define EIGEN_USING_STD_MATH(FUNC) using ::FUNC;
|
||||
#else
|
||||
#define EIGEN_USING_STD_MATH(FUNC) using std::FUNC;
|
||||
#endif
|
||||
|
||||
#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(__CUDA_ARCH__) && !defined(EIGEN_EXCEPTIONS)
|
||||
#define EIGEN_EXCEPTIONS
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN_EXCEPTIONS
|
||||
#include <new>
|
||||
#endif
|
||||
|
||||
// then include this file where all our macros are defined. It's really important to do it first because
|
||||
// it's where we do all the alignment settings (platform detection and honoring the user's will if he
|
||||
// defined e.g. EIGEN_DONT_ALIGN) so it needs to be done before we do anything with vectorization.
|
||||
@@ -82,7 +118,16 @@
|
||||
#ifdef __SSE4_2__
|
||||
#define EIGEN_VECTORIZE_SSE4_2
|
||||
#endif
|
||||
|
||||
#ifdef __AVX__
|
||||
#define EIGEN_VECTORIZE_AVX
|
||||
#define EIGEN_VECTORIZE_SSE3
|
||||
#define EIGEN_VECTORIZE_SSSE3
|
||||
#define EIGEN_VECTORIZE_SSE4_1
|
||||
#define EIGEN_VECTORIZE_SSE4_2
|
||||
#endif
|
||||
#ifdef __FMA__
|
||||
#define EIGEN_VECTORIZE_FMA
|
||||
#endif
|
||||
// include files
|
||||
|
||||
// This extern "C" works around a MINGW-w64 compilation issue
|
||||
@@ -95,7 +140,7 @@
|
||||
extern "C" {
|
||||
// In theory we should only include immintrin.h and not the other *mmintrin.h header files directly.
|
||||
// Doing so triggers some issues with ICC. However old gcc versions seems to not have this file, thus:
|
||||
#ifdef __INTEL_COMPILER
|
||||
#if defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1110
|
||||
#include <immintrin.h>
|
||||
#else
|
||||
#include <emmintrin.h>
|
||||
@@ -112,6 +157,9 @@
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_2
|
||||
#include <nmmintrin.h>
|
||||
#endif
|
||||
#ifdef EIGEN_VECTORIZE_AVX
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
#endif
|
||||
} // end extern "C"
|
||||
#elif defined __ALTIVEC__
|
||||
@@ -165,23 +213,17 @@
|
||||
#endif
|
||||
|
||||
// required for __cpuid, needs to be included after cmath
|
||||
#if defined(_MSC_VER) && (defined(_M_IX86)||defined(_M_X64))
|
||||
#if defined(_MSC_VER) && (defined(_M_IX86)||defined(_M_X64)) && (!defined(_WIN32_WCE))
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
#if defined(_CPPUNWIND) || defined(__EXCEPTIONS)
|
||||
#define EIGEN_EXCEPTIONS
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN_EXCEPTIONS
|
||||
#include <new>
|
||||
#endif
|
||||
|
||||
/** \brief Namespace containing all symbols from the %Eigen library. */
|
||||
namespace Eigen {
|
||||
|
||||
inline static const char *SimdInstructionSetsInUse(void) {
|
||||
#if defined(EIGEN_VECTORIZE_SSE4_2)
|
||||
#if defined(EIGEN_VECTORIZE_AVX)
|
||||
return "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
|
||||
#elif defined(EIGEN_VECTORIZE_SSE4_2)
|
||||
return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
|
||||
#elif defined(EIGEN_VECTORIZE_SSE4_1)
|
||||
return "SSE, SSE2, SSE3, SSSE3, SSE4.1";
|
||||
@@ -202,34 +244,9 @@ inline static const char *SimdInstructionSetsInUse(void) {
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#define STAGE10_FULL_EIGEN2_API 10
|
||||
#define STAGE20_RESOLVE_API_CONFLICTS 20
|
||||
#define STAGE30_FULL_EIGEN3_API 30
|
||||
#define STAGE40_FULL_EIGEN3_STRICTNESS 40
|
||||
#define STAGE99_NO_EIGEN2_SUPPORT 99
|
||||
|
||||
#if defined EIGEN2_SUPPORT_STAGE40_FULL_EIGEN3_STRICTNESS
|
||||
#define EIGEN2_SUPPORT
|
||||
#define EIGEN2_SUPPORT_STAGE STAGE40_FULL_EIGEN3_STRICTNESS
|
||||
#elif defined EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API
|
||||
#define EIGEN2_SUPPORT
|
||||
#define EIGEN2_SUPPORT_STAGE STAGE30_FULL_EIGEN3_API
|
||||
#elif defined EIGEN2_SUPPORT_STAGE20_RESOLVE_API_CONFLICTS
|
||||
#define EIGEN2_SUPPORT
|
||||
#define EIGEN2_SUPPORT_STAGE STAGE20_RESOLVE_API_CONFLICTS
|
||||
#elif defined EIGEN2_SUPPORT_STAGE10_FULL_EIGEN2_API
|
||||
#define EIGEN2_SUPPORT
|
||||
#define EIGEN2_SUPPORT_STAGE STAGE10_FULL_EIGEN2_API
|
||||
#elif defined EIGEN2_SUPPORT
|
||||
// default to stage 3, that's what it's always meant
|
||||
#define EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API
|
||||
#define EIGEN2_SUPPORT_STAGE STAGE30_FULL_EIGEN3_API
|
||||
#else
|
||||
#define EIGEN2_SUPPORT_STAGE STAGE99_NO_EIGEN2_SUPPORT
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
#undef minor
|
||||
#if defined EIGEN2_SUPPORT_STAGE40_FULL_EIGEN3_STRICTNESS || defined EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API || defined EIGEN2_SUPPORT_STAGE20_RESOLVE_API_CONFLICTS || defined EIGEN2_SUPPORT_STAGE10_FULL_EIGEN2_API || defined EIGEN2_SUPPORT
|
||||
// This will generate an error message:
|
||||
#error Eigen2-support is only available up to version 3.2. Please go to "http://eigen.tuxfamily.org/index.php?title=Eigen2" for further information
|
||||
#endif
|
||||
|
||||
// we use size_t frequently and we'll never remember to prepend it with std:: everytime just to
|
||||
@@ -259,7 +276,13 @@ using std::ptrdiff_t;
|
||||
#include "src/Core/MathFunctions.h"
|
||||
#include "src/Core/GenericPacketMath.h"
|
||||
|
||||
#if defined EIGEN_VECTORIZE_SSE
|
||||
#if defined EIGEN_VECTORIZE_AVX
|
||||
// Use AVX for floats and doubles, SSE for integers
|
||||
#include "src/Core/arch/SSE/PacketMath.h"
|
||||
#include "src/Core/arch/SSE/Complex.h"
|
||||
#include "src/Core/arch/AVX/PacketMath.h"
|
||||
#include "src/Core/arch/AVX/Complex.h"
|
||||
#elif defined EIGEN_VECTORIZE_SSE
|
||||
#include "src/Core/arch/SSE/PacketMath.h"
|
||||
#include "src/Core/arch/SSE/MathFunctions.h"
|
||||
#include "src/Core/arch/SSE/Complex.h"
|
||||
@@ -273,17 +296,30 @@ using std::ptrdiff_t;
|
||||
|
||||
#include "src/Core/arch/Default/Settings.h"
|
||||
|
||||
#include "src/Core/Functors.h"
|
||||
#include "src/Core/functors/BinaryFunctors.h"
|
||||
#include "src/Core/functors/UnaryFunctors.h"
|
||||
#include "src/Core/functors/NullaryFunctors.h"
|
||||
#include "src/Core/functors/StlFunctors.h"
|
||||
|
||||
#include "src/Core/DenseCoeffsBase.h"
|
||||
#include "src/Core/DenseBase.h"
|
||||
#include "src/Core/MatrixBase.h"
|
||||
#include "src/Core/EigenBase.h"
|
||||
|
||||
#ifdef EIGEN_ENABLE_EVALUATORS
|
||||
#include "src/Core/functors/AssignmentFunctors.h"
|
||||
#include "src/Core/Product.h"
|
||||
#include "src/Core/CoreEvaluators.h"
|
||||
#include "src/Core/AssignEvaluator.h"
|
||||
#include "src/Core/ProductEvaluators.h"
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN // work around Doxygen bug triggered by Assign.h r814874
|
||||
// at least confirmed with Doxygen 1.5.5 and 1.5.6
|
||||
#include "src/Core/Assign.h"
|
||||
#endif
|
||||
|
||||
#include "src/Core/ArrayBase.h"
|
||||
#include "src/Core/util/BlasUtil.h"
|
||||
#include "src/Core/DenseStorage.h"
|
||||
#include "src/Core/NestByValue.h"
|
||||
@@ -347,7 +383,6 @@ using std::ptrdiff_t;
|
||||
#include "src/Core/Random.h"
|
||||
#include "src/Core/Replicate.h"
|
||||
#include "src/Core/Reverse.h"
|
||||
#include "src/Core/ArrayBase.h"
|
||||
#include "src/Core/ArrayWrapper.h"
|
||||
|
||||
#ifdef EIGEN_USE_BLAS
|
||||
@@ -369,8 +404,4 @@ using std::ptrdiff_t;
|
||||
|
||||
#include "src/Core/util/ReenableStupidWarnings.h"
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
#include "Eigen2Support"
|
||||
#endif
|
||||
|
||||
#endif // EIGEN_CORE_H
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
#include "Dense"
|
||||
//#include "Sparse"
|
||||
#include "Sparse"
|
||||
|
||||
@@ -1,82 +0,0 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN2SUPPORT_H
|
||||
#define EIGEN2SUPPORT_H
|
||||
|
||||
#if (!defined(EIGEN2_SUPPORT)) || (!defined(EIGEN_CORE_H))
|
||||
#error Eigen2 support must be enabled by defining EIGEN2_SUPPORT before including any Eigen header
|
||||
#endif
|
||||
|
||||
#include "src/Core/util/DisableStupidWarnings.h"
|
||||
|
||||
/** \ingroup Support_modules
|
||||
* \defgroup Eigen2Support_Module Eigen2 support module
|
||||
* This module provides a couple of deprecated functions improving the compatibility with Eigen2.
|
||||
*
|
||||
* To use it, define EIGEN2_SUPPORT before including any Eigen header
|
||||
* \code
|
||||
* #define EIGEN2_SUPPORT
|
||||
* \endcode
|
||||
*
|
||||
*/
|
||||
|
||||
#include "src/Eigen2Support/Macros.h"
|
||||
#include "src/Eigen2Support/Memory.h"
|
||||
#include "src/Eigen2Support/Meta.h"
|
||||
#include "src/Eigen2Support/Lazy.h"
|
||||
#include "src/Eigen2Support/Cwise.h"
|
||||
#include "src/Eigen2Support/CwiseOperators.h"
|
||||
#include "src/Eigen2Support/TriangularSolver.h"
|
||||
#include "src/Eigen2Support/Block.h"
|
||||
#include "src/Eigen2Support/VectorBlock.h"
|
||||
#include "src/Eigen2Support/Minor.h"
|
||||
#include "src/Eigen2Support/MathFunctions.h"
|
||||
|
||||
|
||||
#include "src/Core/util/ReenableStupidWarnings.h"
|
||||
|
||||
// Eigen2 used to include iostream
|
||||
#include<iostream>
|
||||
|
||||
#define EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, SizeSuffix) \
|
||||
using Eigen::Matrix##SizeSuffix##TypeSuffix; \
|
||||
using Eigen::Vector##SizeSuffix##TypeSuffix; \
|
||||
using Eigen::RowVector##SizeSuffix##TypeSuffix;
|
||||
|
||||
#define EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(TypeSuffix) \
|
||||
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 2) \
|
||||
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 3) \
|
||||
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 4) \
|
||||
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, X) \
|
||||
|
||||
#define EIGEN_USING_MATRIX_TYPEDEFS \
|
||||
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(i) \
|
||||
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(f) \
|
||||
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(d) \
|
||||
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(cf) \
|
||||
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(cd)
|
||||
|
||||
#define USING_PART_OF_NAMESPACE_EIGEN \
|
||||
EIGEN_USING_MATRIX_TYPEDEFS \
|
||||
using Eigen::Matrix; \
|
||||
using Eigen::MatrixBase; \
|
||||
using Eigen::ei_random; \
|
||||
using Eigen::ei_real; \
|
||||
using Eigen::ei_imag; \
|
||||
using Eigen::ei_conj; \
|
||||
using Eigen::ei_abs; \
|
||||
using Eigen::ei_abs2; \
|
||||
using Eigen::ei_sqrt; \
|
||||
using Eigen::ei_exp; \
|
||||
using Eigen::ei_log; \
|
||||
using Eigen::ei_sin; \
|
||||
using Eigen::ei_cos;
|
||||
|
||||
#endif // EIGEN2SUPPORT_H
|
||||
@@ -33,27 +33,23 @@
|
||||
#include "src/Geometry/OrthoMethods.h"
|
||||
#include "src/Geometry/EulerAngles.h"
|
||||
|
||||
#if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS
|
||||
#include "src/Geometry/Homogeneous.h"
|
||||
#include "src/Geometry/RotationBase.h"
|
||||
#include "src/Geometry/Rotation2D.h"
|
||||
#include "src/Geometry/Quaternion.h"
|
||||
#include "src/Geometry/AngleAxis.h"
|
||||
#include "src/Geometry/Transform.h"
|
||||
#include "src/Geometry/Translation.h"
|
||||
#include "src/Geometry/Scaling.h"
|
||||
#include "src/Geometry/Hyperplane.h"
|
||||
#include "src/Geometry/ParametrizedLine.h"
|
||||
#include "src/Geometry/AlignedBox.h"
|
||||
#include "src/Geometry/Umeyama.h"
|
||||
#include "src/Geometry/Homogeneous.h"
|
||||
#include "src/Geometry/RotationBase.h"
|
||||
#include "src/Geometry/Rotation2D.h"
|
||||
#include "src/Geometry/Quaternion.h"
|
||||
#include "src/Geometry/AngleAxis.h"
|
||||
#include "src/Geometry/Transform.h"
|
||||
#include "src/Geometry/Translation.h"
|
||||
#include "src/Geometry/Scaling.h"
|
||||
#include "src/Geometry/Hyperplane.h"
|
||||
#include "src/Geometry/ParametrizedLine.h"
|
||||
#include "src/Geometry/AlignedBox.h"
|
||||
#include "src/Geometry/Umeyama.h"
|
||||
|
||||
#if defined EIGEN_VECTORIZE_SSE
|
||||
#include "src/Geometry/arch/Geometry_SSE.h"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
#include "src/Eigen2Support/Geometry/All.h"
|
||||
// Use the SSE optimized version whenever possible. At the moment the
|
||||
// SSE version doesn't compile when AVX is enabled
|
||||
#if defined EIGEN_VECTORIZE_SSE && !defined EIGEN_VECTORIZE_AVX
|
||||
#include "src/Geometry/arch/Geometry_SSE.h"
|
||||
#endif
|
||||
|
||||
#include "src/Core/util/ReenableStupidWarnings.h"
|
||||
|
||||
8
Eigen/LU
8
Eigen/LU
@@ -27,14 +27,12 @@
|
||||
#include "src/LU/Determinant.h"
|
||||
#include "src/LU/Inverse.h"
|
||||
|
||||
#if defined EIGEN_VECTORIZE_SSE
|
||||
// Use the SSE optimized version whenever possible. At the moment the
|
||||
// SSE version doesn't compile when AVX is enabled
|
||||
#if defined EIGEN_VECTORIZE_SSE && !defined EIGEN_VECTORIZE_AVX
|
||||
#include "src/LU/arch/Inverse_SSE.h"
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
#include "src/Eigen2Support/LU.h"
|
||||
#endif
|
||||
|
||||
#include "src/Core/util/ReenableStupidWarnings.h"
|
||||
|
||||
#endif // EIGEN_LU_MODULE_H
|
||||
|
||||
@@ -1,32 +0,0 @@
|
||||
#ifndef EIGEN_REGRESSION_MODULE_H
|
||||
#define EIGEN_REGRESSION_MODULE_H
|
||||
|
||||
#ifndef EIGEN2_SUPPORT
|
||||
#error LeastSquares is only available in Eigen2 support mode (define EIGEN2_SUPPORT)
|
||||
#endif
|
||||
|
||||
// exclude from normal eigen3-only documentation
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
|
||||
#include "Core"
|
||||
|
||||
#include "src/Core/util/DisableStupidWarnings.h"
|
||||
|
||||
#include "Eigenvalues"
|
||||
#include "Geometry"
|
||||
|
||||
/** \defgroup LeastSquares_Module LeastSquares module
|
||||
* This module provides linear regression and related features.
|
||||
*
|
||||
* \code
|
||||
* #include <Eigen/LeastSquares>
|
||||
* \endcode
|
||||
*/
|
||||
|
||||
#include "src/Eigen2Support/LeastSquares.h"
|
||||
|
||||
#include "src/Core/util/ReenableStupidWarnings.h"
|
||||
|
||||
#endif // EIGEN2_SUPPORT
|
||||
|
||||
#endif // EIGEN_REGRESSION_MODULE_H
|
||||
12
Eigen/QR
12
Eigen/QR
@@ -15,7 +15,9 @@
|
||||
*
|
||||
* This module provides various QR decompositions
|
||||
* This module also provides some MatrixBase methods, including:
|
||||
* - MatrixBase::qr(),
|
||||
* - MatrixBase::householderQr()
|
||||
* - MatrixBase::colPivHouseholderQr()
|
||||
* - MatrixBase::fullPivHouseholderQr()
|
||||
*
|
||||
* \code
|
||||
* #include <Eigen/QR>
|
||||
@@ -31,15 +33,7 @@
|
||||
#include "src/QR/ColPivHouseholderQR_MKL.h"
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
#include "src/Eigen2Support/QR.h"
|
||||
#endif
|
||||
|
||||
#include "src/Core/util/ReenableStupidWarnings.h"
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
#include "Eigenvalues"
|
||||
#endif
|
||||
|
||||
#endif // EIGEN_QR_MODULE_H
|
||||
/* vim: set filetype=cpp et sw=2 ts=2 ai: */
|
||||
|
||||
@@ -21,16 +21,13 @@
|
||||
*/
|
||||
|
||||
#include "src/misc/Solve.h"
|
||||
#include "src/SVD/SVDBase.h"
|
||||
#include "src/SVD/JacobiSVD.h"
|
||||
#if defined(EIGEN_USE_LAPACKE) && !defined(EIGEN_USE_LAPACKE_STRICT)
|
||||
#include "src/SVD/JacobiSVD_MKL.h"
|
||||
#endif
|
||||
#include "src/SVD/UpperBidiagonalization.h"
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
#include "src/Eigen2Support/SVD.h"
|
||||
#endif
|
||||
|
||||
#include "src/Core/util/ReenableStupidWarnings.h"
|
||||
|
||||
#endif // EIGEN_SVD_MODULE_H
|
||||
|
||||
@@ -16,7 +16,10 @@
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
template<typename MatrixType, int UpLo> struct LDLT_Traits;
|
||||
template<typename MatrixType, int UpLo> struct LDLT_Traits;
|
||||
|
||||
// PositiveSemiDef means positive semi-definite and non-zero; same for NegativeSemiDef
|
||||
enum SignMatrix { PositiveSemiDef, NegativeSemiDef, ZeroSign, Indefinite };
|
||||
}
|
||||
|
||||
/** \ingroup Cholesky_Module
|
||||
@@ -40,7 +43,7 @@ template<typename MatrixType, int UpLo> struct LDLT_Traits;
|
||||
* Remember that Cholesky decompositions are not rank-revealing. Also, do not use a Cholesky
|
||||
* decomposition to determine whether a system of equations has a solution.
|
||||
*
|
||||
* \sa MatrixBase::ldlt(), class LLT
|
||||
* \sa MatrixBase::ldlt(), SelfAdjointView::ldlt(), class LLT
|
||||
*/
|
||||
template<typename _MatrixType, int _UpLo> class LDLT
|
||||
{
|
||||
@@ -69,7 +72,12 @@ template<typename _MatrixType, int _UpLo> class LDLT
|
||||
* The default constructor is useful in cases in which the user intends to
|
||||
* perform decompositions via LDLT::compute(const MatrixType&).
|
||||
*/
|
||||
LDLT() : m_matrix(), m_transpositions(), m_isInitialized(false) {}
|
||||
LDLT()
|
||||
: m_matrix(),
|
||||
m_transpositions(),
|
||||
m_sign(internal::ZeroSign),
|
||||
m_isInitialized(false)
|
||||
{}
|
||||
|
||||
/** \brief Default Constructor with memory preallocation
|
||||
*
|
||||
@@ -81,6 +89,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
|
||||
: m_matrix(size, size),
|
||||
m_transpositions(size),
|
||||
m_temporary(size),
|
||||
m_sign(internal::ZeroSign),
|
||||
m_isInitialized(false)
|
||||
{}
|
||||
|
||||
@@ -93,6 +102,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
|
||||
: m_matrix(matrix.rows(), matrix.cols()),
|
||||
m_transpositions(matrix.rows()),
|
||||
m_temporary(matrix.rows()),
|
||||
m_sign(internal::ZeroSign),
|
||||
m_isInitialized(false)
|
||||
{
|
||||
compute(matrix);
|
||||
@@ -139,21 +149,14 @@ template<typename _MatrixType, int _UpLo> class LDLT
|
||||
inline bool isPositive() const
|
||||
{
|
||||
eigen_assert(m_isInitialized && "LDLT is not initialized.");
|
||||
return m_sign == 1;
|
||||
return m_sign == internal::PositiveSemiDef || m_sign == internal::ZeroSign;
|
||||
}
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
inline bool isPositiveDefinite() const
|
||||
{
|
||||
return isPositive();
|
||||
}
|
||||
#endif
|
||||
|
||||
/** \returns true if the matrix is negative (semidefinite) */
|
||||
inline bool isNegative(void) const
|
||||
{
|
||||
eigen_assert(m_isInitialized && "LDLT is not initialized.");
|
||||
return m_sign == -1;
|
||||
return m_sign == internal::NegativeSemiDef || m_sign == internal::ZeroSign;
|
||||
}
|
||||
|
||||
/** \returns a solution x of \f$ A x = b \f$ using the current decomposition of A.
|
||||
@@ -169,7 +172,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
|
||||
* least-square solution of \f$ D y_3 = y_2 \f$ is computed. This does not mean that this function
|
||||
* computes the least-square solution of \f$ A x = b \f$ is \f$ A \f$ is singular.
|
||||
*
|
||||
* \sa MatrixBase::ldlt()
|
||||
* \sa MatrixBase::ldlt(), SelfAdjointView::ldlt()
|
||||
*/
|
||||
template<typename Rhs>
|
||||
inline const internal::solve_retval<LDLT, Rhs>
|
||||
@@ -181,15 +184,6 @@ template<typename _MatrixType, int _UpLo> class LDLT
|
||||
return internal::solve_retval<LDLT, Rhs>(*this, b.derived());
|
||||
}
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
template<typename OtherDerived, typename ResultType>
|
||||
bool solve(const MatrixBase<OtherDerived>& b, ResultType *result) const
|
||||
{
|
||||
*result = this->solve(b);
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
template<typename Derived>
|
||||
bool solveInPlace(MatrixBase<Derived> &bAndX) const;
|
||||
|
||||
@@ -235,7 +229,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
|
||||
MatrixType m_matrix;
|
||||
TranspositionType m_transpositions;
|
||||
TmpMatrixType m_temporary;
|
||||
int m_sign;
|
||||
internal::SignMatrix m_sign;
|
||||
bool m_isInitialized;
|
||||
};
|
||||
|
||||
@@ -246,49 +240,33 @@ template<int UpLo> struct ldlt_inplace;
|
||||
template<> struct ldlt_inplace<Lower>
|
||||
{
|
||||
template<typename MatrixType, typename TranspositionType, typename Workspace>
|
||||
static bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, int* sign=0)
|
||||
static bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, SignMatrix& sign)
|
||||
{
|
||||
using std::abs;
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
typedef typename MatrixType::RealScalar RealScalar;
|
||||
typedef typename MatrixType::Index Index;
|
||||
typedef typename TranspositionType::StorageIndexType IndexType;
|
||||
eigen_assert(mat.rows()==mat.cols());
|
||||
const Index size = mat.rows();
|
||||
|
||||
if (size <= 1)
|
||||
{
|
||||
transpositions.setIdentity();
|
||||
if(sign)
|
||||
*sign = numext::real(mat.coeff(0,0))>0 ? 1:-1;
|
||||
if (numext::real(mat.coeff(0,0)) > 0) sign = PositiveSemiDef;
|
||||
else if (numext::real(mat.coeff(0,0)) < 0) sign = NegativeSemiDef;
|
||||
else sign = ZeroSign;
|
||||
return true;
|
||||
}
|
||||
|
||||
RealScalar cutoff(0), biggest_in_corner;
|
||||
|
||||
for (Index k = 0; k < size; ++k)
|
||||
{
|
||||
// Find largest diagonal element
|
||||
Index index_of_biggest_in_corner;
|
||||
biggest_in_corner = mat.diagonal().tail(size-k).cwiseAbs().maxCoeff(&index_of_biggest_in_corner);
|
||||
mat.diagonal().tail(size-k).cwiseAbs().maxCoeff(&index_of_biggest_in_corner);
|
||||
index_of_biggest_in_corner += k;
|
||||
|
||||
if(k == 0)
|
||||
{
|
||||
// The biggest overall is the point of reference to which further diagonals
|
||||
// are compared; if any diagonal is negligible compared
|
||||
// to the largest overall, the algorithm bails.
|
||||
cutoff = abs(NumTraits<Scalar>::epsilon() * biggest_in_corner);
|
||||
}
|
||||
|
||||
// Finish early if the matrix is not full rank.
|
||||
if(biggest_in_corner < cutoff)
|
||||
{
|
||||
for(Index i = k; i < size; i++) transpositions.coeffRef(i) = i;
|
||||
if(sign) *sign = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
transpositions.coeffRef(k) = index_of_biggest_in_corner;
|
||||
transpositions.coeffRef(k) = IndexType(index_of_biggest_in_corner);
|
||||
if(k != index_of_biggest_in_corner)
|
||||
{
|
||||
// apply the transposition while taking care to consider only
|
||||
@@ -297,7 +275,7 @@ template<> struct ldlt_inplace<Lower>
|
||||
mat.row(k).head(k).swap(mat.row(index_of_biggest_in_corner).head(k));
|
||||
mat.col(k).tail(s).swap(mat.col(index_of_biggest_in_corner).tail(s));
|
||||
std::swap(mat.coeffRef(k,k),mat.coeffRef(index_of_biggest_in_corner,index_of_biggest_in_corner));
|
||||
for(int i=k+1;i<index_of_biggest_in_corner;++i)
|
||||
for(Index i=k+1;i<index_of_biggest_in_corner;++i)
|
||||
{
|
||||
Scalar tmp = mat.coeffRef(i,k);
|
||||
mat.coeffRef(i,k) = numext::conj(mat.coeffRef(index_of_biggest_in_corner,i));
|
||||
@@ -318,22 +296,27 @@ template<> struct ldlt_inplace<Lower>
|
||||
|
||||
if(k>0)
|
||||
{
|
||||
temp.head(k) = mat.diagonal().head(k).asDiagonal() * A10.adjoint();
|
||||
temp.head(k) = mat.diagonal().real().head(k).asDiagonal() * A10.adjoint();
|
||||
mat.coeffRef(k,k) -= (A10 * temp.head(k)).value();
|
||||
if(rs>0)
|
||||
A21.noalias() -= A20 * temp.head(k);
|
||||
}
|
||||
if((rs>0) && (abs(mat.coeffRef(k,k)) > cutoff))
|
||||
A21 /= mat.coeffRef(k,k);
|
||||
|
||||
if(sign)
|
||||
{
|
||||
// LDLT is not guaranteed to work for indefinite matrices, but let's try to get the sign right
|
||||
int newSign = numext::real(mat.diagonal().coeff(index_of_biggest_in_corner)) > 0;
|
||||
if(k == 0)
|
||||
*sign = newSign;
|
||||
else if(*sign != newSign)
|
||||
*sign = 0;
|
||||
// In some previous versions of Eigen (e.g., 3.2.1), the scaling was omitted if the pivot
|
||||
// was smaller than the cutoff value. However, soince LDLT is not rank-revealing
|
||||
// we should only make sure we do not introduce INF or NaN values.
|
||||
// LAPACK also uses 0 as the cutoff value.
|
||||
RealScalar realAkk = numext::real(mat.coeffRef(k,k));
|
||||
if((rs>0) && (abs(realAkk) > RealScalar(0)))
|
||||
A21 /= realAkk;
|
||||
|
||||
if (sign == PositiveSemiDef) {
|
||||
if (realAkk < 0) sign = Indefinite;
|
||||
} else if (sign == NegativeSemiDef) {
|
||||
if (realAkk > 0) sign = Indefinite;
|
||||
} else if (sign == ZeroSign) {
|
||||
if (realAkk > 0) sign = PositiveSemiDef;
|
||||
else if (realAkk < 0) sign = NegativeSemiDef;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -399,7 +382,7 @@ template<> struct ldlt_inplace<Lower>
|
||||
template<> struct ldlt_inplace<Upper>
|
||||
{
|
||||
template<typename MatrixType, typename TranspositionType, typename Workspace>
|
||||
static EIGEN_STRONG_INLINE bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, int* sign=0)
|
||||
static EIGEN_STRONG_INLINE bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, SignMatrix& sign)
|
||||
{
|
||||
Transpose<MatrixType> matt(mat);
|
||||
return ldlt_inplace<Lower>::unblocked(matt, transpositions, temp, sign);
|
||||
@@ -445,7 +428,7 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const MatrixType& a)
|
||||
m_isInitialized = false;
|
||||
m_temporary.resize(size);
|
||||
|
||||
internal::ldlt_inplace<UpLo>::unblocked(m_matrix, m_transpositions, m_temporary, &m_sign);
|
||||
internal::ldlt_inplace<UpLo>::unblocked(m_matrix, m_transpositions, m_temporary, m_sign);
|
||||
|
||||
m_isInitialized = true;
|
||||
return *this;
|
||||
@@ -460,6 +443,7 @@ template<typename MatrixType, int _UpLo>
|
||||
template<typename Derived>
|
||||
LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Derived>& w, const typename NumTraits<typename MatrixType::Scalar>::Real& sigma)
|
||||
{
|
||||
typedef typename TranspositionType::StorageIndexType IndexType;
|
||||
const Index size = w.rows();
|
||||
if (m_isInitialized)
|
||||
{
|
||||
@@ -471,9 +455,9 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Deri
|
||||
m_matrix.setZero();
|
||||
m_transpositions.resize(size);
|
||||
for (Index i = 0; i < size; i++)
|
||||
m_transpositions.coeffRef(i) = i;
|
||||
m_transpositions.coeffRef(i) = IndexType(i);
|
||||
m_temporary.resize(size);
|
||||
m_sign = sigma>=0 ? 1 : -1;
|
||||
m_sign = sigma>=0 ? internal::PositiveSemiDef : internal::NegativeSemiDef;
|
||||
m_isInitialized = true;
|
||||
}
|
||||
|
||||
@@ -502,18 +486,22 @@ struct solve_retval<LDLT<_MatrixType,_UpLo>, Rhs>
|
||||
// dst = D^-1 (L^-1 P b)
|
||||
// more precisely, use pseudo-inverse of D (see bug 241)
|
||||
using std::abs;
|
||||
using std::max;
|
||||
EIGEN_USING_STD_MATH(max);
|
||||
typedef typename LDLTType::MatrixType MatrixType;
|
||||
typedef typename LDLTType::Scalar Scalar;
|
||||
typedef typename LDLTType::RealScalar RealScalar;
|
||||
const Diagonal<const MatrixType> vectorD = dec().vectorD();
|
||||
RealScalar tolerance = (max)(vectorD.array().abs().maxCoeff() * NumTraits<Scalar>::epsilon(),
|
||||
RealScalar(1) / NumTraits<RealScalar>::highest()); // motivated by LAPACK's xGELSS
|
||||
const typename Diagonal<const MatrixType>::RealReturnType vectorD(dec().vectorD());
|
||||
// In some previous versions, tolerance was set to the max of 1/highest and the maximal diagonal entry * epsilon
|
||||
// as motivated by LAPACK's xGELSS:
|
||||
// RealScalar tolerance = (max)(vectorD.array().abs().maxCoeff() *NumTraits<RealScalar>::epsilon(),RealScalar(1) / NumTraits<RealScalar>::highest());
|
||||
// However, LDLT is not rank revealing, and so adjusting the tolerance wrt to the highest
|
||||
// diagonal element is not well justified and to numerical issues in some cases.
|
||||
// Moreover, Lapack's xSYTRS routines use 0 for the tolerance.
|
||||
RealScalar tolerance = RealScalar(1) / NumTraits<RealScalar>::highest();
|
||||
for (Index i = 0; i < vectorD.size(); ++i) {
|
||||
if(abs(vectorD(i)) > tolerance)
|
||||
dst.row(i) /= vectorD(i);
|
||||
dst.row(i) /= vectorD(i);
|
||||
else
|
||||
dst.row(i).setZero();
|
||||
dst.row(i).setZero();
|
||||
}
|
||||
|
||||
// dst = L^-T (D^-1 L^-1 P b)
|
||||
@@ -566,7 +554,7 @@ MatrixType LDLT<MatrixType,_UpLo>::reconstructedMatrix() const
|
||||
// L^* P
|
||||
res = matrixU() * res;
|
||||
// D(L^*P)
|
||||
res = vectorD().asDiagonal() * res;
|
||||
res = vectorD().real().asDiagonal() * res;
|
||||
// L(DL^*P)
|
||||
res = matrixL() * res;
|
||||
// P^T (LDL^*P)
|
||||
@@ -575,8 +563,10 @@ MatrixType LDLT<MatrixType,_UpLo>::reconstructedMatrix() const
|
||||
return res;
|
||||
}
|
||||
|
||||
#ifndef __CUDACC__
|
||||
/** \cholesky_module
|
||||
* \returns the Cholesky decomposition with full pivoting without square root of \c *this
|
||||
* \sa MatrixBase::ldlt()
|
||||
*/
|
||||
template<typename MatrixType, unsigned int UpLo>
|
||||
inline const LDLT<typename SelfAdjointView<MatrixType, UpLo>::PlainObject, UpLo>
|
||||
@@ -587,6 +577,7 @@ SelfAdjointView<MatrixType, UpLo>::ldlt() const
|
||||
|
||||
/** \cholesky_module
|
||||
* \returns the Cholesky decomposition with full pivoting without square root of \c *this
|
||||
* \sa SelfAdjointView::ldlt()
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline const LDLT<typename MatrixBase<Derived>::PlainObject>
|
||||
@@ -594,6 +585,7 @@ MatrixBase<Derived>::ldlt() const
|
||||
{
|
||||
return LDLT<PlainObject>(derived());
|
||||
}
|
||||
#endif // __CUDACC__
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
|
||||
@@ -41,7 +41,7 @@ template<typename MatrixType, int UpLo> struct LLT_Traits;
|
||||
* Example: \include LLT_example.cpp
|
||||
* Output: \verbinclude LLT_example.out
|
||||
*
|
||||
* \sa MatrixBase::llt(), class LDLT
|
||||
* \sa MatrixBase::llt(), SelfAdjointView::llt(), class LDLT
|
||||
*/
|
||||
/* HEY THIS DOX IS DISABLED BECAUSE THERE's A BUG EITHER HERE OR IN LDLT ABOUT THAT (OR BOTH)
|
||||
* Note that during the decomposition, only the upper triangular part of A is considered. Therefore,
|
||||
@@ -115,7 +115,7 @@ template<typename _MatrixType, int _UpLo> class LLT
|
||||
* Example: \include LLT_solve.cpp
|
||||
* Output: \verbinclude LLT_solve.out
|
||||
*
|
||||
* \sa solveInPlace(), MatrixBase::llt()
|
||||
* \sa solveInPlace(), MatrixBase::llt(), SelfAdjointView::llt()
|
||||
*/
|
||||
template<typename Rhs>
|
||||
inline const internal::solve_retval<LLT, Rhs>
|
||||
@@ -127,17 +127,6 @@ template<typename _MatrixType, int _UpLo> class LLT
|
||||
return internal::solve_retval<LLT, Rhs>(*this, b.derived());
|
||||
}
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
template<typename OtherDerived, typename ResultType>
|
||||
bool solve(const MatrixBase<OtherDerived>& b, ResultType *result) const
|
||||
{
|
||||
*result = this->solve(b);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool isPositiveDefinite() const { return true; }
|
||||
#endif
|
||||
|
||||
template<typename Derived>
|
||||
void solveInPlace(MatrixBase<Derived> &bAndX) const;
|
||||
|
||||
@@ -465,8 +454,10 @@ MatrixType LLT<MatrixType,_UpLo>::reconstructedMatrix() const
|
||||
return matrixL() * matrixL().adjoint().toDenseMatrix();
|
||||
}
|
||||
|
||||
#ifndef __CUDACC__
|
||||
/** \cholesky_module
|
||||
* \returns the LLT decomposition of \c *this
|
||||
* \sa SelfAdjointView::llt()
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline const LLT<typename MatrixBase<Derived>::PlainObject>
|
||||
@@ -477,6 +468,7 @@ MatrixBase<Derived>::llt() const
|
||||
|
||||
/** \cholesky_module
|
||||
* \returns the LLT decomposition of \c *this
|
||||
* \sa SelfAdjointView::llt()
|
||||
*/
|
||||
template<typename MatrixType, unsigned int UpLo>
|
||||
inline const LLT<typename SelfAdjointView<MatrixType, UpLo>::PlainObject, UpLo>
|
||||
@@ -484,7 +476,8 @@ SelfAdjointView<MatrixType, UpLo>::llt() const
|
||||
{
|
||||
return LLT<PlainObject,UpLo>(m_matrix);
|
||||
}
|
||||
|
||||
#endif // __CUDACC__
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_LLT_H
|
||||
|
||||
@@ -58,10 +58,12 @@ cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_Index>& mat)
|
||||
res.p = mat.outerIndexPtr();
|
||||
res.i = mat.innerIndexPtr();
|
||||
res.x = mat.valuePtr();
|
||||
res.z = 0;
|
||||
res.sorted = 1;
|
||||
if(mat.isCompressed())
|
||||
{
|
||||
res.packed = 1;
|
||||
res.nz = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -170,6 +172,7 @@ class CholmodBase : internal::noncopyable
|
||||
CholmodBase()
|
||||
: m_cholmodFactor(0), m_info(Success), m_isInitialized(false)
|
||||
{
|
||||
m_shiftOffset[0] = m_shiftOffset[1] = RealScalar(0.0);
|
||||
cholmod_start(&m_cholmod);
|
||||
}
|
||||
|
||||
@@ -241,7 +244,7 @@ class CholmodBase : internal::noncopyable
|
||||
return internal::sparse_solve_retval<CholmodBase, Rhs>(*this, b.derived());
|
||||
}
|
||||
|
||||
/** Performs a symbolic decomposition on the sparcity of \a matrix.
|
||||
/** Performs a symbolic decomposition on the sparsity pattern of \a matrix.
|
||||
*
|
||||
* This function is particularly useful when solving for several problems having the same structure.
|
||||
*
|
||||
@@ -265,7 +268,7 @@ class CholmodBase : internal::noncopyable
|
||||
|
||||
/** Performs a numeric decomposition of \a matrix
|
||||
*
|
||||
* The given matrix must has the same sparcity than the matrix on which the symbolic decomposition has been performed.
|
||||
* The given matrix must have the same sparsity pattern as the matrix on which the symbolic decomposition has been performed.
|
||||
*
|
||||
* \sa analyzePattern()
|
||||
*/
|
||||
@@ -302,7 +305,7 @@ class CholmodBase : internal::noncopyable
|
||||
{
|
||||
this->m_info = NumericalIssue;
|
||||
}
|
||||
// TODO optimize this copy by swapping when possible (be carreful with alignment, etc.)
|
||||
// TODO optimize this copy by swapping when possible (be careful with alignment, etc.)
|
||||
dest = Matrix<Scalar,Dest::RowsAtCompileTime,Dest::ColsAtCompileTime>::Map(reinterpret_cast<Scalar*>(x_cd->x),b.rows(),b.cols());
|
||||
cholmod_free_dense(&x_cd, &m_cholmod);
|
||||
}
|
||||
@@ -323,7 +326,7 @@ class CholmodBase : internal::noncopyable
|
||||
{
|
||||
this->m_info = NumericalIssue;
|
||||
}
|
||||
// TODO optimize this copy by swapping when possible (be carreful with alignment, etc.)
|
||||
// TODO optimize this copy by swapping when possible (be careful with alignment, etc.)
|
||||
dest = viewAsEigen<DestScalar,DestOptions,DestIndex>(*x_cs);
|
||||
cholmod_free_sparse(&x_cs, &m_cholmod);
|
||||
}
|
||||
@@ -365,8 +368,8 @@ class CholmodBase : internal::noncopyable
|
||||
*
|
||||
* This class allows to solve for A.X = B sparse linear problems via a simplicial LL^T Cholesky factorization
|
||||
* using the Cholmod library.
|
||||
* This simplicial variant is equivalent to Eigen's built-in SimplicialLLT class. Thefore, it has little practical interest.
|
||||
* The sparse matrix A must be selfajoint and positive definite. The vectors or matrices
|
||||
* This simplicial variant is equivalent to Eigen's built-in SimplicialLLT class. Therefore, it has little practical interest.
|
||||
* The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices
|
||||
* X and B can be either dense or sparse.
|
||||
*
|
||||
* \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
|
||||
@@ -412,8 +415,8 @@ class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimpl
|
||||
*
|
||||
* This class allows to solve for A.X = B sparse linear problems via a simplicial LDL^T Cholesky factorization
|
||||
* using the Cholmod library.
|
||||
* This simplicial variant is equivalent to Eigen's built-in SimplicialLDLT class. Thefore, it has little practical interest.
|
||||
* The sparse matrix A must be selfajoint and positive definite. The vectors or matrices
|
||||
* This simplicial variant is equivalent to Eigen's built-in SimplicialLDLT class. Therefore, it has little practical interest.
|
||||
* The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices
|
||||
* X and B can be either dense or sparse.
|
||||
*
|
||||
* \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
|
||||
@@ -458,7 +461,7 @@ class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimp
|
||||
* This class allows to solve for A.X = B sparse linear problems via a supernodal LL^T Cholesky factorization
|
||||
* using the Cholmod library.
|
||||
* This supernodal variant performs best on dense enough problems, e.g., 3D FEM, or very high order 2D FEM.
|
||||
* The sparse matrix A must be selfajoint and positive definite. The vectors or matrices
|
||||
* The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices
|
||||
* X and B can be either dense or sparse.
|
||||
*
|
||||
* \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
|
||||
@@ -501,7 +504,7 @@ class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSuper
|
||||
* \brief A general Cholesky factorization and solver based on Cholmod
|
||||
*
|
||||
* This class allows to solve for A.X = B sparse linear problems via a LL^T or LDL^T Cholesky factorization
|
||||
* using the Cholmod library. The sparse matrix A must be selfajoint and positive definite. The vectors or matrices
|
||||
* using the Cholmod library. The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices
|
||||
* X and B can be either dense or sparse.
|
||||
*
|
||||
* This variant permits to change the underlying Cholesky method at runtime.
|
||||
|
||||
@@ -69,6 +69,7 @@ class Array
|
||||
* the usage of 'using'. This should be done only for operator=.
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Array& operator=(const EigenBase<OtherDerived> &other)
|
||||
{
|
||||
return Base::operator=(other);
|
||||
@@ -84,6 +85,7 @@ class Array
|
||||
* remain row-vectors and vectors remain vectors.
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Array& operator=(const ArrayBase<OtherDerived>& other)
|
||||
{
|
||||
return Base::_set(other);
|
||||
@@ -92,6 +94,7 @@ class Array
|
||||
/** This is a special case of the templated operator=. Its purpose is to
|
||||
* prevent a default operator= from hiding the templated operator=.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Array& operator=(const Array& other)
|
||||
{
|
||||
return Base::_set(other);
|
||||
@@ -107,6 +110,7 @@ class Array
|
||||
*
|
||||
* \sa resize(Index,Index)
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Array() : Base()
|
||||
{
|
||||
Base::_check_template_params();
|
||||
@@ -116,6 +120,7 @@ class Array
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
// FIXME is it still needed ??
|
||||
/** \internal */
|
||||
EIGEN_DEVICE_FUNC
|
||||
Array(internal::constructor_without_unaligned_array_assert)
|
||||
: Base(internal::constructor_without_unaligned_array_assert())
|
||||
{
|
||||
@@ -124,41 +129,63 @@ class Array
|
||||
}
|
||||
#endif
|
||||
|
||||
/** Constructs a vector or row-vector with given dimension. \only_for_vectors
|
||||
*
|
||||
* Note that this is only useful for dynamic-size vectors. For fixed-size vectors,
|
||||
* it is redundant to pass the dimension here, so it makes more sense to use the default
|
||||
* constructor Matrix() instead.
|
||||
*/
|
||||
EIGEN_STRONG_INLINE explicit Array(Index dim)
|
||||
: Base(dim, RowsAtCompileTime == 1 ? 1 : dim, ColsAtCompileTime == 1 ? 1 : dim)
|
||||
#ifdef EIGEN_HAVE_RVALUE_REFERENCES
|
||||
Array(Array&& other)
|
||||
: Base(std::move(other))
|
||||
{
|
||||
Base::_check_template_params();
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Array)
|
||||
eigen_assert(dim >= 0);
|
||||
eigen_assert(SizeAtCompileTime == Dynamic || SizeAtCompileTime == dim);
|
||||
EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
|
||||
if (RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic)
|
||||
Base::_set_noalias(other);
|
||||
}
|
||||
Array& operator=(Array&& other)
|
||||
{
|
||||
other.swap(*this);
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE explicit Array(const T& x)
|
||||
{
|
||||
Base::_check_template_params();
|
||||
Base::template _init1<T>(x);
|
||||
}
|
||||
|
||||
template<typename T0, typename T1>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Array(const T0& val0, const T1& val1)
|
||||
{
|
||||
Base::_check_template_params();
|
||||
this->template _init2<T0,T1>(val0, val1);
|
||||
}
|
||||
#else
|
||||
/** constructs an uninitialized matrix with \a rows rows and \a cols columns.
|
||||
/** \brief Constructs a fixed-sized array initialized with coefficients starting at \a data */
|
||||
EIGEN_DEVICE_FUNC explicit Array(const Scalar *data);
|
||||
/** Constructs a vector or row-vector with given dimension. \only_for_vectors
|
||||
*
|
||||
* This is useful for dynamic-size matrices. For fixed-size matrices,
|
||||
* Note that this is only useful for dynamic-size vectors. For fixed-size vectors,
|
||||
* it is redundant to pass the dimension here, so it makes more sense to use the default
|
||||
* constructor Array() instead.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE explicit Array(Index dim);
|
||||
/** constructs an initialized 1x1 Array with the given coefficient */
|
||||
Array(const Scalar& value);
|
||||
/** constructs an uninitialized array with \a rows rows and \a cols columns.
|
||||
*
|
||||
* This is useful for dynamic-size arrays. For fixed-size arrays,
|
||||
* it is redundant to pass these parameters, so one should use the default constructor
|
||||
* Matrix() instead. */
|
||||
* Array() instead. */
|
||||
Array(Index rows, Index cols);
|
||||
/** constructs an initialized 2D vector with given coefficients */
|
||||
Array(const Scalar& val0, const Scalar& val1);
|
||||
#endif
|
||||
|
||||
/** constructs an initialized 3D vector with given coefficients */
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2)
|
||||
{
|
||||
Base::_check_template_params();
|
||||
@@ -168,6 +195,7 @@ class Array
|
||||
m_storage.data()[2] = val2;
|
||||
}
|
||||
/** constructs an initialized 4D vector with given coefficients */
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2, const Scalar& val3)
|
||||
{
|
||||
Base::_check_template_params();
|
||||
@@ -178,10 +206,9 @@ class Array
|
||||
m_storage.data()[3] = val3;
|
||||
}
|
||||
|
||||
explicit Array(const Scalar *data);
|
||||
|
||||
/** Constructor copying the value of the expression \a other */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Array(const ArrayBase<OtherDerived>& other)
|
||||
: Base(other.rows() * other.cols(), other.rows(), other.cols())
|
||||
{
|
||||
@@ -189,6 +216,7 @@ class Array
|
||||
Base::_set_noalias(other);
|
||||
}
|
||||
/** Copy constructor */
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Array(const Array& other)
|
||||
: Base(other.rows() * other.cols(), other.rows(), other.cols())
|
||||
{
|
||||
@@ -197,6 +225,7 @@ class Array
|
||||
}
|
||||
/** Copy constructor with in-place evaluation */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Array(const ReturnByValue<OtherDerived>& other)
|
||||
{
|
||||
Base::_check_template_params();
|
||||
@@ -206,11 +235,12 @@ class Array
|
||||
|
||||
/** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Array(const EigenBase<OtherDerived> &other)
|
||||
: Base(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols())
|
||||
{
|
||||
Base::_check_template_params();
|
||||
Base::resize(other.rows(), other.cols());
|
||||
Base::_resize_to_match(other);
|
||||
*this = other;
|
||||
}
|
||||
|
||||
@@ -221,8 +251,8 @@ class Array
|
||||
void swap(ArrayBase<OtherDerived> const & other)
|
||||
{ this->_swap(other.derived()); }
|
||||
|
||||
inline Index innerStride() const { return 1; }
|
||||
inline Index outerStride() const { return this->innerSize(); }
|
||||
EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; }
|
||||
EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); }
|
||||
|
||||
#ifdef EIGEN_ARRAY_PLUGIN
|
||||
#include EIGEN_ARRAY_PLUGIN
|
||||
|
||||
@@ -118,40 +118,50 @@ template<typename Derived> class ArrayBase
|
||||
/** Special case of the template operator=, in order to prevent the compiler
|
||||
* from generating a default operator= (issue hit with g++ 4.1)
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator=(const ArrayBase& other)
|
||||
{
|
||||
return internal::assign_selector<Derived,Derived>::run(derived(), other.derived());
|
||||
}
|
||||
|
||||
Derived& operator+=(const Scalar& scalar)
|
||||
{ return *this = derived() + scalar; }
|
||||
Derived& operator-=(const Scalar& scalar)
|
||||
{ return *this = derived() - scalar; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator+=(const Scalar& scalar);
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator-=(const Scalar& scalar);
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator+=(const ArrayBase<OtherDerived>& other);
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator-=(const ArrayBase<OtherDerived>& other);
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator*=(const ArrayBase<OtherDerived>& other);
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator/=(const ArrayBase<OtherDerived>& other);
|
||||
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC
|
||||
ArrayBase<Derived>& array() { return *this; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
const ArrayBase<Derived>& array() const { return *this; }
|
||||
|
||||
/** \returns an \link Eigen::MatrixBase Matrix \endlink expression of this array
|
||||
* \sa MatrixBase::array() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
MatrixWrapper<Derived> matrix() { return derived(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
const MatrixWrapper<const Derived> matrix() const { return derived(); }
|
||||
|
||||
// template<typename Dest>
|
||||
// inline void evalTo(Dest& dst) const { dst = matrix(); }
|
||||
|
||||
protected:
|
||||
EIGEN_DEVICE_FUNC
|
||||
ArrayBase() : Base() {}
|
||||
|
||||
private:
|
||||
|
||||
@@ -29,6 +29,11 @@ struct traits<ArrayWrapper<ExpressionType> >
|
||||
: public traits<typename remove_all<typename ExpressionType::Nested>::type >
|
||||
{
|
||||
typedef ArrayXpr XprKind;
|
||||
// Let's remove NestByRefBit
|
||||
enum {
|
||||
Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags,
|
||||
Flags = Flags0 & ~NestByRefBit
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
@@ -48,41 +53,54 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
|
||||
|
||||
typedef typename internal::nested<ExpressionType>::type NestedExpressionType;
|
||||
|
||||
inline ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index rows() const { return m_expression.rows(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index cols() const { return m_expression.cols(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index outerStride() const { return m_expression.outerStride(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index innerStride() const { return m_expression.innerStride(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline ScalarWithConstIfNotLvalue* data() { return m_expression.const_cast_derived().data(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar* data() const { return m_expression.data(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline CoeffReturnType coeff(Index rowId, Index colId) const
|
||||
{
|
||||
return m_expression.coeff(rowId, colId);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index rowId, Index colId)
|
||||
{
|
||||
return m_expression.const_cast_derived().coeffRef(rowId, colId);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index rowId, Index colId) const
|
||||
{
|
||||
return m_expression.const_cast_derived().coeffRef(rowId, colId);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
return m_expression.coeff(index);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index index)
|
||||
{
|
||||
return m_expression.const_cast_derived().coeffRef(index);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index index) const
|
||||
{
|
||||
return m_expression.const_cast_derived().coeffRef(index);
|
||||
@@ -113,9 +131,11 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
|
||||
}
|
||||
|
||||
template<typename Dest>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void evalTo(Dest& dst) const { dst = m_expression; }
|
||||
|
||||
const typename internal::remove_all<NestedExpressionType>::type&
|
||||
EIGEN_DEVICE_FUNC
|
||||
nestedExpression() const
|
||||
{
|
||||
return m_expression;
|
||||
@@ -123,9 +143,11 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
|
||||
|
||||
/** Forwards the resizing request to the nested expression
|
||||
* \sa DenseBase::resize(Index) */
|
||||
EIGEN_DEVICE_FUNC
|
||||
void resize(Index newSize) { m_expression.const_cast_derived().resize(newSize); }
|
||||
/** Forwards the resizing request to the nested expression
|
||||
* \sa DenseBase::resize(Index,Index)*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
void resize(Index nbRows, Index nbCols) { m_expression.const_cast_derived().resize(nbRows,nbCols); }
|
||||
|
||||
protected:
|
||||
@@ -149,6 +171,11 @@ struct traits<MatrixWrapper<ExpressionType> >
|
||||
: public traits<typename remove_all<typename ExpressionType::Nested>::type >
|
||||
{
|
||||
typedef MatrixXpr XprKind;
|
||||
// Let's remove NestByRefBit
|
||||
enum {
|
||||
Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags,
|
||||
Flags = Flags0 & ~NestByRefBit
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
@@ -168,41 +195,54 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
|
||||
|
||||
typedef typename internal::nested<ExpressionType>::type NestedExpressionType;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline MatrixWrapper(ExpressionType& a_matrix) : m_expression(a_matrix) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index rows() const { return m_expression.rows(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index cols() const { return m_expression.cols(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index outerStride() const { return m_expression.outerStride(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index innerStride() const { return m_expression.innerStride(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline ScalarWithConstIfNotLvalue* data() { return m_expression.const_cast_derived().data(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar* data() const { return m_expression.data(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline CoeffReturnType coeff(Index rowId, Index colId) const
|
||||
{
|
||||
return m_expression.coeff(rowId, colId);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index rowId, Index colId)
|
||||
{
|
||||
return m_expression.const_cast_derived().coeffRef(rowId, colId);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index rowId, Index colId) const
|
||||
{
|
||||
return m_expression.derived().coeffRef(rowId, colId);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
return m_expression.coeff(index);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index index)
|
||||
{
|
||||
return m_expression.const_cast_derived().coeffRef(index);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index index) const
|
||||
{
|
||||
return m_expression.const_cast_derived().coeffRef(index);
|
||||
@@ -232,6 +272,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
|
||||
m_expression.const_cast_derived().template writePacket<LoadMode>(index, val);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<NestedExpressionType>::type&
|
||||
nestedExpression() const
|
||||
{
|
||||
@@ -240,9 +281,11 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
|
||||
|
||||
/** Forwards the resizing request to the nested expression
|
||||
* \sa DenseBase::resize(Index) */
|
||||
EIGEN_DEVICE_FUNC
|
||||
void resize(Index newSize) { m_expression.const_cast_derived().resize(newSize); }
|
||||
/** Forwards the resizing request to the nested expression
|
||||
* \sa DenseBase::resize(Index,Index)*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
void resize(Index nbRows, Index nbCols) { m_expression.const_cast_derived().resize(nbRows,nbCols); }
|
||||
|
||||
protected:
|
||||
|
||||
@@ -105,6 +105,8 @@ public:
|
||||
EIGEN_DEBUG_VAR(DstIsAligned)
|
||||
EIGEN_DEBUG_VAR(SrcIsAligned)
|
||||
EIGEN_DEBUG_VAR(JointAlignment)
|
||||
EIGEN_DEBUG_VAR(Derived::SizeAtCompileTime)
|
||||
EIGEN_DEBUG_VAR(OtherDerived::CoeffReadCost)
|
||||
EIGEN_DEBUG_VAR(InnerSize)
|
||||
EIGEN_DEBUG_VAR(InnerMaxSize)
|
||||
EIGEN_DEBUG_VAR(PacketSize)
|
||||
@@ -139,6 +141,7 @@ struct assign_DefaultTraversal_CompleteUnrolling
|
||||
inner = Index % Derived1::InnerSizeAtCompileTime
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
dst.copyCoeffByOuterInner(outer, inner, src);
|
||||
@@ -149,12 +152,14 @@ struct assign_DefaultTraversal_CompleteUnrolling
|
||||
template<typename Derived1, typename Derived2, int Stop>
|
||||
struct assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {}
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2, int Index, int Stop>
|
||||
struct assign_DefaultTraversal_InnerUnrolling
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, typename Derived1::Index outer)
|
||||
{
|
||||
dst.copyCoeffByOuterInner(outer, Index, src);
|
||||
@@ -165,6 +170,7 @@ struct assign_DefaultTraversal_InnerUnrolling
|
||||
template<typename Derived1, typename Derived2, int Stop>
|
||||
struct assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Stop, Stop>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, typename Derived1::Index) {}
|
||||
};
|
||||
|
||||
@@ -175,6 +181,7 @@ struct assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Stop, Stop>
|
||||
template<typename Derived1, typename Derived2, int Index, int Stop>
|
||||
struct assign_LinearTraversal_CompleteUnrolling
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
dst.copyCoeff(Index, src);
|
||||
@@ -185,6 +192,7 @@ struct assign_LinearTraversal_CompleteUnrolling
|
||||
template<typename Derived1, typename Derived2, int Stop>
|
||||
struct assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {}
|
||||
};
|
||||
|
||||
@@ -249,6 +257,7 @@ struct assign_impl;
|
||||
template<typename Derived1, typename Derived2, int Unrolling, int Version>
|
||||
struct assign_impl<Derived1, Derived2, InvalidTraversal, Unrolling, Version>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(Derived1 &, const Derived2 &) { }
|
||||
};
|
||||
|
||||
@@ -256,6 +265,7 @@ template<typename Derived1, typename Derived2, int Version>
|
||||
struct assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling, Version>
|
||||
{
|
||||
typedef typename Derived1::Index Index;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
const Index innerSize = dst.innerSize();
|
||||
@@ -269,6 +279,7 @@ struct assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling, Version>
|
||||
template<typename Derived1, typename Derived2, int Version>
|
||||
struct assign_impl<Derived1, Derived2, DefaultTraversal, CompleteUnrolling, Version>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
|
||||
@@ -280,6 +291,7 @@ template<typename Derived1, typename Derived2, int Version>
|
||||
struct assign_impl<Derived1, Derived2, DefaultTraversal, InnerUnrolling, Version>
|
||||
{
|
||||
typedef typename Derived1::Index Index;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
const Index outerSize = dst.outerSize();
|
||||
@@ -297,6 +309,7 @@ template<typename Derived1, typename Derived2, int Version>
|
||||
struct assign_impl<Derived1, Derived2, LinearTraversal, NoUnrolling, Version>
|
||||
{
|
||||
typedef typename Derived1::Index Index;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
const Index size = dst.size();
|
||||
@@ -308,6 +321,7 @@ struct assign_impl<Derived1, Derived2, LinearTraversal, NoUnrolling, Version>
|
||||
template<typename Derived1, typename Derived2, int Version>
|
||||
struct assign_impl<Derived1, Derived2, LinearTraversal, CompleteUnrolling, Version>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
|
||||
@@ -492,12 +506,25 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>
|
||||
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived)
|
||||
EIGEN_STATIC_ASSERT(SameType,YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
|
||||
|
||||
#ifdef EIGEN_TEST_EVALUATORS
|
||||
|
||||
#ifdef EIGEN_DEBUG_ASSIGN
|
||||
internal::copy_using_evaluator_traits<Derived, OtherDerived>::debug();
|
||||
#endif
|
||||
eigen_assert(rows() == other.rows() && cols() == other.cols());
|
||||
internal::call_dense_assignment_loop(derived(),other.derived());
|
||||
|
||||
#else // EIGEN_TEST_EVALUATORS
|
||||
|
||||
#ifdef EIGEN_DEBUG_ASSIGN
|
||||
internal::assign_traits<Derived, OtherDerived>::debug();
|
||||
#endif
|
||||
eigen_assert(rows() == other.rows() && cols() == other.cols());
|
||||
internal::assign_impl<Derived, OtherDerived, int(SameType) ? int(internal::assign_traits<Derived, OtherDerived>::Traversal)
|
||||
: int(InvalidTraversal)>::run(derived(),other.derived());
|
||||
: int(InvalidTraversal)>::run(derived(),other.derived());
|
||||
|
||||
#endif // EIGEN_TEST_EVALUATORS
|
||||
|
||||
#ifndef EIGEN_NO_DEBUG
|
||||
checkTransposeAliasing(other.derived());
|
||||
#endif
|
||||
@@ -517,22 +544,28 @@ struct assign_selector;
|
||||
|
||||
template<typename Derived, typename OtherDerived>
|
||||
struct assign_selector<Derived,OtherDerived,false,false> {
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.derived()); }
|
||||
template<typename ActualDerived, typename ActualOtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE Derived& evalTo(ActualDerived& dst, const ActualOtherDerived& other) { other.evalTo(dst); return dst; }
|
||||
};
|
||||
template<typename Derived, typename OtherDerived>
|
||||
struct assign_selector<Derived,OtherDerived,true,false> {
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.eval()); }
|
||||
};
|
||||
template<typename Derived, typename OtherDerived>
|
||||
struct assign_selector<Derived,OtherDerived,false,true> {
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose()); }
|
||||
template<typename ActualDerived, typename ActualOtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE Derived& evalTo(ActualDerived& dst, const ActualOtherDerived& other) { Transpose<ActualDerived> dstTrans(dst); other.evalTo(dstTrans); return dst; }
|
||||
};
|
||||
template<typename Derived, typename OtherDerived>
|
||||
struct assign_selector<Derived,OtherDerived,true,true> {
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose().eval()); }
|
||||
};
|
||||
|
||||
@@ -540,18 +573,21 @@ struct assign_selector<Derived,OtherDerived,true,true> {
|
||||
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator=(const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
return internal::assign_selector<Derived,OtherDerived>::run(derived(), other.derived());
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator=(const DenseBase& other)
|
||||
{
|
||||
return internal::assign_selector<Derived,Derived>::run(derived(), other.derived());
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const MatrixBase& other)
|
||||
{
|
||||
return internal::assign_selector<Derived,Derived>::run(derived(), other.derived());
|
||||
@@ -559,6 +595,7 @@ EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const MatrixBase& ot
|
||||
|
||||
template<typename Derived>
|
||||
template <typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
return internal::assign_selector<Derived,OtherDerived>::run(derived(), other.derived());
|
||||
@@ -566,6 +603,7 @@ EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const DenseBase<Othe
|
||||
|
||||
template<typename Derived>
|
||||
template <typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const EigenBase<OtherDerived>& other)
|
||||
{
|
||||
return internal::assign_selector<Derived,OtherDerived,false>::evalTo(derived(), other.derived());
|
||||
@@ -573,6 +611,7 @@ EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const EigenBase<Othe
|
||||
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const ReturnByValue<OtherDerived>& other)
|
||||
{
|
||||
return internal::assign_selector<Derived,OtherDerived,false>::evalTo(derived(), other.derived());
|
||||
|
||||
842
Eigen/src/Core/AssignEvaluator.h
Normal file
842
Eigen/src/Core/AssignEvaluator.h
Normal file
@@ -0,0 +1,842 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2011 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2011-2013 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2011-2012 Jitse Niesen <jitse@maths.leeds.ac.uk>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_ASSIGN_EVALUATOR_H
|
||||
#define EIGEN_ASSIGN_EVALUATOR_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
// This implementation is based on Assign.h
|
||||
|
||||
namespace internal {
|
||||
|
||||
/***************************************************************************
|
||||
* Part 1 : the logic deciding a strategy for traversal and unrolling *
|
||||
***************************************************************************/
|
||||
|
||||
// copy_using_evaluator_traits is based on assign_traits
|
||||
|
||||
template <typename Derived, typename OtherDerived>
|
||||
struct copy_using_evaluator_traits
|
||||
{
|
||||
public:
|
||||
enum {
|
||||
DstIsAligned = Derived::Flags & AlignedBit,
|
||||
DstHasDirectAccess = Derived::Flags & DirectAccessBit,
|
||||
SrcIsAligned = OtherDerived::Flags & AlignedBit,
|
||||
JointAlignment = bool(DstIsAligned) && bool(SrcIsAligned) ? Aligned : Unaligned,
|
||||
SrcEvalBeforeAssign = (evaluator_traits<OtherDerived>::HasEvalTo == 1)
|
||||
};
|
||||
|
||||
private:
|
||||
enum {
|
||||
InnerSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::SizeAtCompileTime)
|
||||
: int(Derived::Flags)&RowMajorBit ? int(Derived::ColsAtCompileTime)
|
||||
: int(Derived::RowsAtCompileTime),
|
||||
InnerMaxSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::MaxSizeAtCompileTime)
|
||||
: int(Derived::Flags)&RowMajorBit ? int(Derived::MaxColsAtCompileTime)
|
||||
: int(Derived::MaxRowsAtCompileTime),
|
||||
MaxSizeAtCompileTime = Derived::SizeAtCompileTime,
|
||||
PacketSize = packet_traits<typename Derived::Scalar>::size
|
||||
};
|
||||
|
||||
enum {
|
||||
StorageOrdersAgree = (int(Derived::IsRowMajor) == int(OtherDerived::IsRowMajor)),
|
||||
MightVectorize = StorageOrdersAgree
|
||||
&& (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit),
|
||||
MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0
|
||||
&& int(DstIsAligned) && int(SrcIsAligned),
|
||||
MayLinearize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit),
|
||||
MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess
|
||||
&& (DstIsAligned || MaxSizeAtCompileTime == Dynamic),
|
||||
/* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
|
||||
so it's only good for large enough sizes. */
|
||||
MaySliceVectorize = MightVectorize && DstHasDirectAccess
|
||||
&& (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=3*PacketSize)
|
||||
/* slice vectorization can be slow, so we only want it if the slices are big, which is
|
||||
indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
|
||||
in a fixed-size matrix */
|
||||
};
|
||||
|
||||
public:
|
||||
enum {
|
||||
Traversal = int(SrcEvalBeforeAssign) ? int(AllAtOnceTraversal)
|
||||
: int(MayInnerVectorize) ? int(InnerVectorizedTraversal)
|
||||
: int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
|
||||
: int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
|
||||
: int(MayLinearize) ? int(LinearTraversal)
|
||||
: int(DefaultTraversal),
|
||||
Vectorized = int(Traversal) == InnerVectorizedTraversal
|
||||
|| int(Traversal) == LinearVectorizedTraversal
|
||||
|| int(Traversal) == SliceVectorizedTraversal
|
||||
};
|
||||
|
||||
private:
|
||||
enum {
|
||||
UnrollingLimit = EIGEN_UNROLLING_LIMIT * (Vectorized ? int(PacketSize) : 1),
|
||||
MayUnrollCompletely = int(Derived::SizeAtCompileTime) != Dynamic
|
||||
&& int(OtherDerived::CoeffReadCost) != Dynamic
|
||||
&& int(Derived::SizeAtCompileTime) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit),
|
||||
MayUnrollInner = int(InnerSize) != Dynamic
|
||||
&& int(OtherDerived::CoeffReadCost) != Dynamic
|
||||
&& int(InnerSize) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit)
|
||||
};
|
||||
|
||||
public:
|
||||
enum {
|
||||
Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal))
|
||||
? (
|
||||
int(MayUnrollCompletely) ? int(CompleteUnrolling)
|
||||
: int(MayUnrollInner) ? int(InnerUnrolling)
|
||||
: int(NoUnrolling)
|
||||
)
|
||||
: int(Traversal) == int(LinearVectorizedTraversal)
|
||||
? ( bool(MayUnrollCompletely) && bool(DstIsAligned) ? int(CompleteUnrolling)
|
||||
: int(NoUnrolling) )
|
||||
: int(Traversal) == int(LinearTraversal)
|
||||
? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling)
|
||||
: int(NoUnrolling) )
|
||||
: int(NoUnrolling)
|
||||
};
|
||||
|
||||
#ifdef EIGEN_DEBUG_ASSIGN
|
||||
static void debug()
|
||||
{
|
||||
EIGEN_DEBUG_VAR(DstIsAligned)
|
||||
EIGEN_DEBUG_VAR(SrcIsAligned)
|
||||
EIGEN_DEBUG_VAR(JointAlignment)
|
||||
EIGEN_DEBUG_VAR(InnerSize)
|
||||
EIGEN_DEBUG_VAR(InnerMaxSize)
|
||||
EIGEN_DEBUG_VAR(PacketSize)
|
||||
EIGEN_DEBUG_VAR(StorageOrdersAgree)
|
||||
EIGEN_DEBUG_VAR(MightVectorize)
|
||||
EIGEN_DEBUG_VAR(MayLinearize)
|
||||
EIGEN_DEBUG_VAR(MayInnerVectorize)
|
||||
EIGEN_DEBUG_VAR(MayLinearVectorize)
|
||||
EIGEN_DEBUG_VAR(MaySliceVectorize)
|
||||
EIGEN_DEBUG_VAR(Traversal)
|
||||
EIGEN_DEBUG_VAR(UnrollingLimit)
|
||||
EIGEN_DEBUG_VAR(MayUnrollCompletely)
|
||||
EIGEN_DEBUG_VAR(MayUnrollInner)
|
||||
EIGEN_DEBUG_VAR(Unrolling)
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
/***************************************************************************
|
||||
* Part 2 : meta-unrollers
|
||||
***************************************************************************/
|
||||
|
||||
/************************
|
||||
*** Default traversal ***
|
||||
************************/
|
||||
|
||||
template<typename Kernel, int Index, int Stop>
|
||||
struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
|
||||
{
|
||||
typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
|
||||
typedef typename DstEvaluatorType::XprType DstXprType;
|
||||
|
||||
enum {
|
||||
outer = Index / DstXprType::InnerSizeAtCompileTime,
|
||||
inner = Index % DstXprType::InnerSizeAtCompileTime
|
||||
};
|
||||
|
||||
static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
{
|
||||
kernel.assignCoeffByOuterInner(outer, inner);
|
||||
copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Kernel, int Stop>
|
||||
struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Stop, Stop>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Kernel&) { }
|
||||
};
|
||||
|
||||
template<typename Kernel, int Index, int Stop>
|
||||
struct copy_using_evaluator_DefaultTraversal_InnerUnrolling
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Kernel &kernel, int outer)
|
||||
{
|
||||
kernel.assignCoeffByOuterInner(outer, Index);
|
||||
copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Index+1, Stop>::run(kernel, outer);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Kernel, int Stop>
|
||||
struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Stop, Stop>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Kernel&, int) { }
|
||||
};
|
||||
|
||||
/***********************
|
||||
*** Linear traversal ***
|
||||
***********************/
|
||||
|
||||
template<typename Kernel, int Index, int Stop>
|
||||
struct copy_using_evaluator_LinearTraversal_CompleteUnrolling
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Kernel& kernel)
|
||||
{
|
||||
kernel.assignCoeff(Index);
|
||||
copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Kernel, int Stop>
|
||||
struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Stop, Stop>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Kernel&) { }
|
||||
};
|
||||
|
||||
/**************************
|
||||
*** Inner vectorization ***
|
||||
**************************/
|
||||
|
||||
template<typename Kernel, int Index, int Stop>
|
||||
struct copy_using_evaluator_innervec_CompleteUnrolling
|
||||
{
|
||||
typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
|
||||
typedef typename DstEvaluatorType::XprType DstXprType;
|
||||
|
||||
enum {
|
||||
outer = Index / DstXprType::InnerSizeAtCompileTime,
|
||||
inner = Index % DstXprType::InnerSizeAtCompileTime,
|
||||
JointAlignment = Kernel::AssignmentTraits::JointAlignment
|
||||
};
|
||||
|
||||
static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
{
|
||||
kernel.template assignPacketByOuterInner<Aligned, JointAlignment>(outer, inner);
|
||||
enum { NextIndex = Index + packet_traits<typename DstXprType::Scalar>::size };
|
||||
copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Kernel, int Stop>
|
||||
struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Kernel&) { }
|
||||
};
|
||||
|
||||
template<typename Kernel, int Index, int Stop>
|
||||
struct copy_using_evaluator_innervec_InnerUnrolling
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Kernel &kernel, int outer)
|
||||
{
|
||||
kernel.template assignPacketByOuterInner<Aligned, Aligned>(outer, Index);
|
||||
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
|
||||
enum { NextIndex = Index + packet_traits<typename DstXprType::Scalar>::size };
|
||||
copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop>::run(kernel, outer);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Kernel, int Stop>
|
||||
struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Kernel &, int) { }
|
||||
};
|
||||
|
||||
/***************************************************************************
|
||||
* Part 3 : implementation of all cases
|
||||
***************************************************************************/
|
||||
|
||||
// dense_assignment_loop is based on assign_impl
|
||||
|
||||
template<typename Kernel,
|
||||
int Traversal = Kernel::AssignmentTraits::Traversal,
|
||||
int Unrolling = Kernel::AssignmentTraits::Unrolling>
|
||||
struct dense_assignment_loop;
|
||||
|
||||
/************************
|
||||
*** Default traversal ***
|
||||
************************/
|
||||
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, DefaultTraversal, NoUnrolling>
|
||||
{
|
||||
static void run(Kernel &kernel)
|
||||
{
|
||||
typedef typename Kernel::Index Index;
|
||||
|
||||
for(Index outer = 0; outer < kernel.outerSize(); ++outer) {
|
||||
for(Index inner = 0; inner < kernel.innerSize(); ++inner) {
|
||||
kernel.assignCoeffByOuterInner(outer, inner);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, DefaultTraversal, CompleteUnrolling>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
{
|
||||
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
|
||||
copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, DefaultTraversal, InnerUnrolling>
|
||||
{
|
||||
typedef typename Kernel::Index Index;
|
||||
static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
{
|
||||
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
|
||||
|
||||
const Index outerSize = kernel.outerSize();
|
||||
for(Index outer = 0; outer < outerSize; ++outer)
|
||||
copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel, outer);
|
||||
}
|
||||
};
|
||||
|
||||
/***************************
|
||||
*** Linear vectorization ***
|
||||
***************************/
|
||||
|
||||
|
||||
// The goal of unaligned_dense_assignment_loop is simply to factorize the handling
|
||||
// of the non vectorizable beginning and ending parts
|
||||
|
||||
template <bool IsAligned = false>
|
||||
struct unaligned_dense_assignment_loop
|
||||
{
|
||||
// if IsAligned = true, then do nothing
|
||||
template <typename Kernel>
|
||||
static EIGEN_STRONG_INLINE void run(Kernel&, typename Kernel::Index, typename Kernel::Index) {}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct unaligned_dense_assignment_loop<false>
|
||||
{
|
||||
// MSVC must not inline this functions. If it does, it fails to optimize the
|
||||
// packet access path.
|
||||
// FIXME check which version exhibits this issue
|
||||
#ifdef _MSC_VER
|
||||
template <typename Kernel>
|
||||
static EIGEN_DONT_INLINE void run(Kernel &kernel,
|
||||
typename Kernel::Index start,
|
||||
typename Kernel::Index end)
|
||||
#else
|
||||
template <typename Kernel>
|
||||
static EIGEN_STRONG_INLINE void run(Kernel &kernel,
|
||||
typename Kernel::Index start,
|
||||
typename Kernel::Index end)
|
||||
#endif
|
||||
{
|
||||
for (typename Kernel::Index index = start; index < end; ++index)
|
||||
kernel.assignCoeff(index);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
{
|
||||
typedef typename Kernel::Index Index;
|
||||
|
||||
const Index size = kernel.size();
|
||||
typedef packet_traits<typename Kernel::Scalar> PacketTraits;
|
||||
enum {
|
||||
packetSize = PacketTraits::size,
|
||||
dstIsAligned = int(Kernel::AssignmentTraits::DstIsAligned),
|
||||
dstAlignment = PacketTraits::AlignedOnScalar ? Aligned : dstIsAligned,
|
||||
srcAlignment = Kernel::AssignmentTraits::JointAlignment
|
||||
};
|
||||
const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned(&kernel.dstEvaluator().coeffRef(0), size);
|
||||
const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
|
||||
|
||||
unaligned_dense_assignment_loop<dstIsAligned!=0>::run(kernel, 0, alignedStart);
|
||||
|
||||
for(Index index = alignedStart; index < alignedEnd; index += packetSize)
|
||||
kernel.template assignPacket<dstAlignment, srcAlignment>(index);
|
||||
|
||||
unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrolling>
|
||||
{
|
||||
typedef typename Kernel::Index Index;
|
||||
static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
{
|
||||
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
|
||||
|
||||
enum { size = DstXprType::SizeAtCompileTime,
|
||||
packetSize = packet_traits<typename Kernel::Scalar>::size,
|
||||
alignedSize = (size/packetSize)*packetSize };
|
||||
|
||||
copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, alignedSize>::run(kernel);
|
||||
copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, alignedSize, size>::run(kernel);
|
||||
}
|
||||
};
|
||||
|
||||
/**************************
|
||||
*** Inner vectorization ***
|
||||
**************************/
|
||||
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
|
||||
{
|
||||
static inline void run(Kernel &kernel)
|
||||
{
|
||||
typedef typename Kernel::Index Index;
|
||||
|
||||
const Index innerSize = kernel.innerSize();
|
||||
const Index outerSize = kernel.outerSize();
|
||||
const Index packetSize = packet_traits<typename Kernel::Scalar>::size;
|
||||
for(Index outer = 0; outer < outerSize; ++outer)
|
||||
for(Index inner = 0; inner < innerSize; inner+=packetSize)
|
||||
kernel.template assignPacketByOuterInner<Aligned, Aligned>(outer, inner);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, CompleteUnrolling>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
{
|
||||
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
|
||||
copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling>
|
||||
{
|
||||
typedef typename Kernel::Index Index;
|
||||
static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
{
|
||||
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
|
||||
const Index outerSize = kernel.outerSize();
|
||||
for(Index outer = 0; outer < outerSize; ++outer)
|
||||
copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel, outer);
|
||||
}
|
||||
};
|
||||
|
||||
/***********************
|
||||
*** Linear traversal ***
|
||||
***********************/
|
||||
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, LinearTraversal, NoUnrolling>
|
||||
{
|
||||
static inline void run(Kernel &kernel)
|
||||
{
|
||||
typedef typename Kernel::Index Index;
|
||||
const Index size = kernel.size();
|
||||
for(Index i = 0; i < size; ++i)
|
||||
kernel.assignCoeff(i);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
{
|
||||
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
|
||||
copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
|
||||
}
|
||||
};
|
||||
|
||||
/**************************
|
||||
*** Slice vectorization ***
|
||||
***************************/
|
||||
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
|
||||
{
|
||||
static inline void run(Kernel &kernel)
|
||||
{
|
||||
typedef typename Kernel::Index Index;
|
||||
typedef packet_traits<typename Kernel::Scalar> PacketTraits;
|
||||
enum {
|
||||
packetSize = PacketTraits::size,
|
||||
alignable = PacketTraits::AlignedOnScalar,
|
||||
dstAlignment = alignable ? Aligned : int(Kernel::AssignmentTraits::DstIsAligned)
|
||||
};
|
||||
const Index packetAlignedMask = packetSize - 1;
|
||||
const Index innerSize = kernel.innerSize();
|
||||
const Index outerSize = kernel.outerSize();
|
||||
const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0;
|
||||
Index alignedStart = ((!alignable) || Kernel::AssignmentTraits::DstIsAligned) ? 0
|
||||
: internal::first_aligned(&kernel.dstEvaluator().coeffRef(0,0), innerSize);
|
||||
|
||||
for(Index outer = 0; outer < outerSize; ++outer)
|
||||
{
|
||||
const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
|
||||
// do the non-vectorizable part of the assignment
|
||||
for(Index inner = 0; inner<alignedStart ; ++inner)
|
||||
kernel.assignCoeffByOuterInner(outer, inner);
|
||||
|
||||
// do the vectorizable part of the assignment
|
||||
for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize)
|
||||
kernel.template assignPacketByOuterInner<dstAlignment, Unaligned>(outer, inner);
|
||||
|
||||
// do the non-vectorizable part of the assignment
|
||||
for(Index inner = alignedEnd; inner<innerSize ; ++inner)
|
||||
kernel.assignCoeffByOuterInner(outer, inner);
|
||||
|
||||
alignedStart = std::min<Index>((alignedStart+alignedStep)%packetSize, innerSize);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/****************************
|
||||
*** All-at-once traversal ***
|
||||
****************************/
|
||||
|
||||
// TODO: this 'AllAtOnceTraversal' should be dropped or caught earlier (Gael)
|
||||
// Indeed, what to do with the kernel's functor??
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, AllAtOnceTraversal, NoUnrolling>
|
||||
{
|
||||
static inline void run(Kernel & kernel)
|
||||
{
|
||||
// Evaluate rhs in temporary to prevent aliasing problems in a = a * a;
|
||||
// TODO: Do not pass the xpr object to evalTo() (Jitse)
|
||||
kernel.srcEvaluator().evalTo(kernel.dstEvaluator(), kernel.dstExpression());
|
||||
}
|
||||
};
|
||||
|
||||
/***************************************************************************
|
||||
* Part 4 : Generic Assignment routine
|
||||
***************************************************************************/
|
||||
|
||||
// This class generalize the assignment of a coefficient (or packet) from one dense evaluator
|
||||
// to another dense writable evaluator.
|
||||
// It is parametrized by the two evaluators, and the actual assignment functor.
|
||||
// This abstraction level permits to keep the evaluation loops as simple and as generic as possible.
|
||||
// One can customize the assignment using this generic dense_assignment_kernel with different
|
||||
// functors, or by completely overloading it, by-passing a functor.
|
||||
template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor>
|
||||
class generic_dense_assignment_kernel
|
||||
{
|
||||
protected:
|
||||
typedef typename DstEvaluatorTypeT::XprType DstXprType;
|
||||
typedef typename SrcEvaluatorTypeT::XprType SrcXprType;
|
||||
public:
|
||||
|
||||
typedef DstEvaluatorTypeT DstEvaluatorType;
|
||||
typedef SrcEvaluatorTypeT SrcEvaluatorType;
|
||||
typedef typename DstEvaluatorType::Scalar Scalar;
|
||||
typedef typename DstEvaluatorType::Index Index;
|
||||
typedef copy_using_evaluator_traits<DstXprType, SrcXprType> AssignmentTraits;
|
||||
|
||||
|
||||
generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
|
||||
: m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr)
|
||||
{}
|
||||
|
||||
Index size() const { return m_dstExpr.size(); }
|
||||
Index innerSize() const { return m_dstExpr.innerSize(); }
|
||||
Index outerSize() const { return m_dstExpr.outerSize(); }
|
||||
Index outerStride() const { return m_dstExpr.outerStride(); }
|
||||
|
||||
// TODO get rid of this one:
|
||||
DstXprType& dstExpression() const { return m_dstExpr; }
|
||||
|
||||
DstEvaluatorType& dstEvaluator() { return m_dst; }
|
||||
const SrcEvaluatorType& srcEvaluator() const { return m_src; }
|
||||
|
||||
void assignCoeff(Index row, Index col)
|
||||
{
|
||||
m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col));
|
||||
}
|
||||
|
||||
void assignCoeff(Index index)
|
||||
{
|
||||
m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index));
|
||||
}
|
||||
|
||||
void assignCoeffByOuterInner(Index outer, Index inner)
|
||||
{
|
||||
Index row = rowIndexByOuterInner(outer, inner);
|
||||
Index col = colIndexByOuterInner(outer, inner);
|
||||
assignCoeff(row, col);
|
||||
}
|
||||
|
||||
|
||||
template<int StoreMode, int LoadMode>
|
||||
void assignPacket(Index row, Index col)
|
||||
{
|
||||
m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row,col), m_src.template packet<LoadMode>(row,col));
|
||||
}
|
||||
|
||||
template<int StoreMode, int LoadMode>
|
||||
void assignPacket(Index index)
|
||||
{
|
||||
m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode>(index));
|
||||
}
|
||||
|
||||
template<int StoreMode, int LoadMode>
|
||||
void assignPacketByOuterInner(Index outer, Index inner)
|
||||
{
|
||||
Index row = rowIndexByOuterInner(outer, inner);
|
||||
Index col = colIndexByOuterInner(outer, inner);
|
||||
assignPacket<StoreMode,LoadMode>(row, col);
|
||||
}
|
||||
|
||||
static Index rowIndexByOuterInner(Index outer, Index inner)
|
||||
{
|
||||
typedef typename DstEvaluatorType::ExpressionTraits Traits;
|
||||
return int(Traits::RowsAtCompileTime) == 1 ? 0
|
||||
: int(Traits::ColsAtCompileTime) == 1 ? inner
|
||||
: int(Traits::Flags)&RowMajorBit ? outer
|
||||
: inner;
|
||||
}
|
||||
|
||||
static Index colIndexByOuterInner(Index outer, Index inner)
|
||||
{
|
||||
typedef typename DstEvaluatorType::ExpressionTraits Traits;
|
||||
return int(Traits::ColsAtCompileTime) == 1 ? 0
|
||||
: int(Traits::RowsAtCompileTime) == 1 ? inner
|
||||
: int(Traits::Flags)&RowMajorBit ? inner
|
||||
: outer;
|
||||
}
|
||||
|
||||
protected:
|
||||
DstEvaluatorType& m_dst;
|
||||
const SrcEvaluatorType& m_src;
|
||||
const Functor &m_functor;
|
||||
// TODO find a way to avoid the needs of the original expression
|
||||
DstXprType& m_dstExpr;
|
||||
};
|
||||
|
||||
template<typename DstXprType, typename SrcXprType, typename Functor>
|
||||
void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func)
|
||||
{
|
||||
#ifdef EIGEN_DEBUG_ASSIGN
|
||||
// TODO these traits should be computed from information provided by the evaluators
|
||||
internal::copy_using_evaluator_traits<DstXprType, SrcXprType>::debug();
|
||||
#endif
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||
|
||||
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
|
||||
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
|
||||
|
||||
DstEvaluatorType dstEvaluator(dst);
|
||||
SrcEvaluatorType srcEvaluator(src);
|
||||
|
||||
typedef generic_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;
|
||||
Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
|
||||
|
||||
dense_assignment_loop<Kernel>::run(kernel);
|
||||
}
|
||||
|
||||
template<typename DstXprType, typename SrcXprType>
|
||||
void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src)
|
||||
{
|
||||
call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar>());
|
||||
}
|
||||
|
||||
/***************************************************************************
|
||||
* Part 5 : Entry points
|
||||
***************************************************************************/
|
||||
|
||||
// Based on DenseBase::LazyAssign()
|
||||
// The following functions are just for testing and they are meant to be moved to operator= and the likes.
|
||||
|
||||
template<typename DstXprType, template <typename> class StorageBase, typename SrcXprType>
|
||||
EIGEN_STRONG_INLINE
|
||||
const DstXprType& copy_using_evaluator(const NoAlias<DstXprType, StorageBase>& dst,
|
||||
const EigenBase<SrcXprType>& src)
|
||||
{
|
||||
return noalias_copy_using_evaluator(dst.expression(), src.derived(), internal::assign_op<typename DstXprType::Scalar>());
|
||||
}
|
||||
|
||||
template<typename XprType, int AssumeAliasing = evaluator_traits<XprType>::AssumeAliasing>
|
||||
struct AddEvalIfAssumingAliasing;
|
||||
|
||||
template<typename XprType>
|
||||
struct AddEvalIfAssumingAliasing<XprType, 0>
|
||||
{
|
||||
static const XprType& run(const XprType& xpr)
|
||||
{
|
||||
return xpr;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename XprType>
|
||||
struct AddEvalIfAssumingAliasing<XprType, 1>
|
||||
{
|
||||
static const EvalToTemp<XprType> run(const XprType& xpr)
|
||||
{
|
||||
return EvalToTemp<XprType>(xpr);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename DstXprType, typename SrcXprType, typename Functor>
|
||||
EIGEN_STRONG_INLINE
|
||||
const DstXprType& copy_using_evaluator(const EigenBase<DstXprType>& dst, const EigenBase<SrcXprType>& src, const Functor &func)
|
||||
{
|
||||
return noalias_copy_using_evaluator(dst.const_cast_derived(),
|
||||
AddEvalIfAssumingAliasing<SrcXprType>::run(src.derived()),
|
||||
func
|
||||
);
|
||||
}
|
||||
|
||||
// this mimics operator=
|
||||
template<typename DstXprType, typename SrcXprType>
|
||||
EIGEN_STRONG_INLINE
|
||||
const DstXprType& copy_using_evaluator(const EigenBase<DstXprType>& dst, const EigenBase<SrcXprType>& src)
|
||||
{
|
||||
return copy_using_evaluator(dst.const_cast_derived(), src.derived(), internal::assign_op<typename DstXprType::Scalar>());
|
||||
}
|
||||
|
||||
template<typename DstXprType, typename SrcXprType, typename Functor>
|
||||
EIGEN_STRONG_INLINE
|
||||
const DstXprType& noalias_copy_using_evaluator(const PlainObjectBase<DstXprType>& dst, const EigenBase<SrcXprType>& src, const Functor &func)
|
||||
{
|
||||
#ifdef EIGEN_DEBUG_ASSIGN
|
||||
internal::copy_using_evaluator_traits<DstXprType, SrcXprType>::debug();
|
||||
#endif
|
||||
#ifdef EIGEN_NO_AUTOMATIC_RESIZING
|
||||
eigen_assert((dst.size()==0 || (IsVectorAtCompileTime ? (dst.size() == src.size())
|
||||
: (dst.rows() == src.rows() && dst.cols() == src.cols())))
|
||||
&& "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined");
|
||||
#else
|
||||
dst.const_cast_derived().resizeLike(src.derived());
|
||||
#endif
|
||||
call_dense_assignment_loop(dst.const_cast_derived(), src.derived(), func);
|
||||
return dst.derived();
|
||||
}
|
||||
|
||||
template<typename DstXprType, typename SrcXprType, typename Functor>
|
||||
EIGEN_STRONG_INLINE
|
||||
const DstXprType& noalias_copy_using_evaluator(const EigenBase<DstXprType>& dst, const EigenBase<SrcXprType>& src, const Functor &func)
|
||||
{
|
||||
call_dense_assignment_loop(dst.const_cast_derived(), src.derived(), func);
|
||||
return dst.derived();
|
||||
}
|
||||
|
||||
// Based on DenseBase::swap()
|
||||
// TODO: Check whether we need to do something special for swapping two
|
||||
// Arrays or Matrices. (Jitse)
|
||||
|
||||
// Overload default assignPacket behavior for swapping them
|
||||
template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT>
|
||||
class swap_kernel : public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, swap_assign_op<typename DstEvaluatorTypeT::Scalar> >
|
||||
{
|
||||
typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, swap_assign_op<typename DstEvaluatorTypeT::Scalar> > Base;
|
||||
typedef typename DstEvaluatorTypeT::PacketScalar PacketScalar;
|
||||
using Base::m_dst;
|
||||
using Base::m_src;
|
||||
using Base::m_functor;
|
||||
|
||||
public:
|
||||
typedef typename Base::Scalar Scalar;
|
||||
typedef typename Base::Index Index;
|
||||
typedef typename Base::DstXprType DstXprType;
|
||||
|
||||
swap_kernel(DstEvaluatorTypeT &dst, const SrcEvaluatorTypeT &src, DstXprType& dstExpr)
|
||||
: Base(dst, src, swap_assign_op<Scalar>(), dstExpr)
|
||||
{}
|
||||
|
||||
template<int StoreMode, int LoadMode>
|
||||
void assignPacket(Index row, Index col)
|
||||
{
|
||||
m_functor.template swapPacket<StoreMode,LoadMode,PacketScalar>(&m_dst.coeffRef(row,col), &const_cast<SrcEvaluatorTypeT&>(m_src).coeffRef(row,col));
|
||||
}
|
||||
|
||||
template<int StoreMode, int LoadMode>
|
||||
void assignPacket(Index index)
|
||||
{
|
||||
m_functor.template swapPacket<StoreMode,LoadMode,PacketScalar>(&m_dst.coeffRef(index), &const_cast<SrcEvaluatorTypeT&>(m_src).coeffRef(index));
|
||||
}
|
||||
|
||||
// TODO find a simple way not to have to copy/paste this function from generic_dense_assignment_kernel, by simple I mean no CRTP (Gael)
|
||||
template<int StoreMode, int LoadMode>
|
||||
void assignPacketByOuterInner(Index outer, Index inner)
|
||||
{
|
||||
Index row = Base::rowIndexByOuterInner(outer, inner);
|
||||
Index col = Base::colIndexByOuterInner(outer, inner);
|
||||
assignPacket<StoreMode,LoadMode>(row, col);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename DstXprType, typename SrcXprType>
|
||||
void swap_using_evaluator(const DstXprType& dst, const SrcXprType& src)
|
||||
{
|
||||
// TODO there is too much redundancy with call_dense_assignment_loop
|
||||
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||
|
||||
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
|
||||
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
|
||||
|
||||
DstEvaluatorType dstEvaluator(dst);
|
||||
SrcEvaluatorType srcEvaluator(src);
|
||||
|
||||
typedef swap_kernel<DstEvaluatorType,SrcEvaluatorType> Kernel;
|
||||
Kernel kernel(dstEvaluator, srcEvaluator, dst.const_cast_derived());
|
||||
|
||||
dense_assignment_loop<Kernel>::run(kernel);
|
||||
}
|
||||
|
||||
// Based on MatrixBase::operator+= (in CwiseBinaryOp.h)
|
||||
template<typename DstXprType, typename SrcXprType>
|
||||
void add_assign_using_evaluator(const MatrixBase<DstXprType>& dst, const MatrixBase<SrcXprType>& src)
|
||||
{
|
||||
typedef typename DstXprType::Scalar Scalar;
|
||||
copy_using_evaluator(dst.derived(), src.derived(), add_assign_op<Scalar>());
|
||||
}
|
||||
|
||||
// Based on ArrayBase::operator+=
|
||||
template<typename DstXprType, typename SrcXprType>
|
||||
void add_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src)
|
||||
{
|
||||
typedef typename DstXprType::Scalar Scalar;
|
||||
copy_using_evaluator(dst.derived(), src.derived(), add_assign_op<Scalar>());
|
||||
}
|
||||
|
||||
// TODO: Add add_assign_using_evaluator for EigenBase ? (Jitse)
|
||||
|
||||
template<typename DstXprType, typename SrcXprType>
|
||||
void subtract_assign_using_evaluator(const MatrixBase<DstXprType>& dst, const MatrixBase<SrcXprType>& src)
|
||||
{
|
||||
typedef typename DstXprType::Scalar Scalar;
|
||||
copy_using_evaluator(dst.derived(), src.derived(), sub_assign_op<Scalar>());
|
||||
}
|
||||
|
||||
template<typename DstXprType, typename SrcXprType>
|
||||
void subtract_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src)
|
||||
{
|
||||
typedef typename DstXprType::Scalar Scalar;
|
||||
copy_using_evaluator(dst.derived(), src.derived(), sub_assign_op<Scalar>());
|
||||
}
|
||||
|
||||
template<typename DstXprType, typename SrcXprType>
|
||||
void multiply_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src)
|
||||
{
|
||||
typedef typename DstXprType::Scalar Scalar;
|
||||
copy_using_evaluator(dst.derived(), src.derived(), mul_assign_op<Scalar>());
|
||||
}
|
||||
|
||||
template<typename DstXprType, typename SrcXprType>
|
||||
void divide_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src)
|
||||
{
|
||||
typedef typename DstXprType::Scalar Scalar;
|
||||
copy_using_evaluator(dst.derived(), src.derived(), div_assign_op<Scalar>());
|
||||
}
|
||||
|
||||
|
||||
} // namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_ASSIGN_EVALUATOR_H
|
||||
@@ -202,6 +202,7 @@ EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(asin, Asin)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(cos, Cos)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(acos, Acos)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(tan, Tan)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(atan, Atan)
|
||||
//EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs, Abs)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(exp, Exp)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(log, Ln)
|
||||
|
||||
@@ -21,6 +21,9 @@ namespace Eigen {
|
||||
* \param XprType the type of the expression in which we are taking a block
|
||||
* \param BlockRows the number of rows of the block we are taking at compile time (optional)
|
||||
* \param BlockCols the number of columns of the block we are taking at compile time (optional)
|
||||
* \param InnerPanel is true, if the block maps to a set of rows of a row major matrix or
|
||||
* to set of columns of a column major matrix (optional). The parameter allows to determine
|
||||
* at compile time whether aligned access is possible on the block expression.
|
||||
*
|
||||
* This class represents an expression of either a fixed-size or dynamic-size block. It is the return
|
||||
* type of DenseBase::block(Index,Index,Index,Index) and DenseBase::block<int,int>(Index,Index) and
|
||||
@@ -80,8 +83,8 @@ struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprTyp
|
||||
MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % packet_traits<Scalar>::size) == 0)
|
||||
&& (InnerStrideAtCompileTime == 1)
|
||||
? PacketAccessBit : 0,
|
||||
MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % 16) == 0)) ? AlignedBit : 0,
|
||||
FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1) ? LinearAccessBit : 0,
|
||||
MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % EIGEN_ALIGN_BYTES) == 0)) ? AlignedBit : 0,
|
||||
FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (traits<XprType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0,
|
||||
FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0,
|
||||
FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0,
|
||||
Flags0 = traits<XprType>::Flags & ( (HereditaryBits & ~RowMajorBit) |
|
||||
@@ -111,6 +114,7 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class
|
||||
|
||||
/** Column or Row constructor
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Block(XprType& xpr, Index i) : Impl(xpr,i)
|
||||
{
|
||||
eigen_assert( (i>=0) && (
|
||||
@@ -120,6 +124,7 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class
|
||||
|
||||
/** Fixed-size constructor
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Block(XprType& xpr, Index a_startRow, Index a_startCol)
|
||||
: Impl(xpr, a_startRow, a_startCol)
|
||||
{
|
||||
@@ -130,6 +135,7 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class
|
||||
|
||||
/** Dynamic-size constructor
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Block(XprType& xpr,
|
||||
Index a_startRow, Index a_startCol,
|
||||
Index blockRows, Index blockCols)
|
||||
@@ -153,8 +159,9 @@ class BlockImpl<XprType, BlockRows, BlockCols, InnerPanel, Dense>
|
||||
public:
|
||||
typedef Impl Base;
|
||||
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl)
|
||||
inline BlockImpl(XprType& xpr, Index i) : Impl(xpr,i) {}
|
||||
inline BlockImpl(XprType& xpr, Index a_startRow, Index a_startCol) : Impl(xpr, a_startRow, a_startCol) {}
|
||||
EIGEN_DEVICE_FUNC inline BlockImpl(XprType& xpr, Index i) : Impl(xpr,i) {}
|
||||
EIGEN_DEVICE_FUNC inline BlockImpl(XprType& xpr, Index a_startRow, Index a_startCol) : Impl(xpr, a_startRow, a_startCol) {}
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline BlockImpl(XprType& xpr, Index a_startRow, Index a_startCol, Index blockRows, Index blockCols)
|
||||
: Impl(xpr, a_startRow, a_startCol, blockRows, blockCols) {}
|
||||
};
|
||||
@@ -176,6 +183,7 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
|
||||
|
||||
/** Column or Row constructor
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline BlockImpl_dense(XprType& xpr, Index i)
|
||||
: m_xpr(xpr),
|
||||
// It is a row if and only if BlockRows==1 and BlockCols==XprType::ColsAtCompileTime,
|
||||
@@ -190,6 +198,7 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
|
||||
|
||||
/** Fixed-size constructor
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline BlockImpl_dense(XprType& xpr, Index a_startRow, Index a_startCol)
|
||||
: m_xpr(xpr), m_startRow(a_startRow), m_startCol(a_startCol),
|
||||
m_blockRows(BlockRows), m_blockCols(BlockCols)
|
||||
@@ -197,6 +206,7 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
|
||||
|
||||
/** Dynamic-size constructor
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline BlockImpl_dense(XprType& xpr,
|
||||
Index a_startRow, Index a_startCol,
|
||||
Index blockRows, Index blockCols)
|
||||
@@ -204,9 +214,10 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
|
||||
m_blockRows(blockRows), m_blockCols(blockCols)
|
||||
{}
|
||||
|
||||
inline Index rows() const { return m_blockRows.value(); }
|
||||
inline Index cols() const { return m_blockCols.value(); }
|
||||
EIGEN_DEVICE_FUNC inline Index rows() const { return m_blockRows.value(); }
|
||||
EIGEN_DEVICE_FUNC inline Index cols() const { return m_blockCols.value(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index rowId, Index colId)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_LVALUE(XprType)
|
||||
@@ -214,17 +225,20 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
|
||||
.coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index rowId, Index colId) const
|
||||
{
|
||||
return m_xpr.derived()
|
||||
.coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index rowId, Index colId) const
|
||||
{
|
||||
return m_xpr.coeff(rowId + m_startRow.value(), colId + m_startCol.value());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index index)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_LVALUE(XprType)
|
||||
@@ -233,6 +247,7 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
|
||||
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index index) const
|
||||
{
|
||||
return m_xpr.const_cast_derived()
|
||||
@@ -240,6 +255,7 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
|
||||
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
return m_xpr
|
||||
@@ -279,21 +295,24 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
|
||||
|
||||
#ifdef EIGEN_PARSED_BY_DOXYGEN
|
||||
/** \sa MapBase::data() */
|
||||
inline const Scalar* data() const;
|
||||
inline Index innerStride() const;
|
||||
inline Index outerStride() const;
|
||||
EIGEN_DEVICE_FUNC inline const Scalar* data() const;
|
||||
EIGEN_DEVICE_FUNC inline Index innerStride() const;
|
||||
EIGEN_DEVICE_FUNC inline Index outerStride() const;
|
||||
#endif
|
||||
|
||||
const typename internal::remove_all<typename XprType::Nested>::type& nestedExpression() const
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename XprType::Nested>::type& nestedExpression() const
|
||||
{
|
||||
return m_xpr;
|
||||
}
|
||||
|
||||
Index startRow() const
|
||||
EIGEN_DEVICE_FUNC
|
||||
Index startRow() const
|
||||
{
|
||||
return m_startRow.value();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
Index startCol() const
|
||||
{
|
||||
return m_startCol.value();
|
||||
@@ -322,6 +341,7 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
|
||||
|
||||
/** Column or Row constructor
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline BlockImpl_dense(XprType& xpr, Index i)
|
||||
: Base(internal::const_cast_ptr(&xpr.coeffRef(
|
||||
(BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0,
|
||||
@@ -335,6 +355,7 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
|
||||
|
||||
/** Fixed-size constructor
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol)
|
||||
: Base(internal::const_cast_ptr(&xpr.coeffRef(startRow,startCol))), m_xpr(xpr)
|
||||
{
|
||||
@@ -343,6 +364,7 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
|
||||
|
||||
/** Dynamic-size constructor
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline BlockImpl_dense(XprType& xpr,
|
||||
Index startRow, Index startCol,
|
||||
Index blockRows, Index blockCols)
|
||||
@@ -352,12 +374,14 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
|
||||
init();
|
||||
}
|
||||
|
||||
const typename internal::remove_all<typename XprType::Nested>::type& nestedExpression() const
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename XprType::Nested>::type& nestedExpression() const
|
||||
{
|
||||
return m_xpr;
|
||||
}
|
||||
|
||||
/** \sa MapBase::innerStride() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index innerStride() const
|
||||
{
|
||||
return internal::traits<BlockType>::HasSameStorageOrderAsXprType
|
||||
@@ -366,6 +390,7 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
|
||||
}
|
||||
|
||||
/** \sa MapBase::outerStride() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index outerStride() const
|
||||
{
|
||||
return m_outerStride;
|
||||
@@ -379,6 +404,7 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
/** \internal used by allowAligned() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline BlockImpl_dense(XprType& xpr, const Scalar* data, Index blockRows, Index blockCols)
|
||||
: Base(data, blockRows, blockCols), m_xpr(xpr)
|
||||
{
|
||||
@@ -387,6 +413,7 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
|
||||
#endif
|
||||
|
||||
protected:
|
||||
EIGEN_DEVICE_FUNC
|
||||
void init()
|
||||
{
|
||||
m_outerStride = internal::traits<BlockType>::HasSameStorageOrderAsXprType
|
||||
|
||||
@@ -29,9 +29,9 @@ struct all_unroller
|
||||
};
|
||||
|
||||
template<typename Derived>
|
||||
struct all_unroller<Derived, 1>
|
||||
struct all_unroller<Derived, 0>
|
||||
{
|
||||
static inline bool run(const Derived &mat) { return mat.coeff(0, 0); }
|
||||
static inline bool run(const Derived &/*mat*/) { return true; }
|
||||
};
|
||||
|
||||
template<typename Derived>
|
||||
@@ -55,9 +55,9 @@ struct any_unroller
|
||||
};
|
||||
|
||||
template<typename Derived>
|
||||
struct any_unroller<Derived, 1>
|
||||
struct any_unroller<Derived, 0>
|
||||
{
|
||||
static inline bool run(const Derived &mat) { return mat.coeff(0, 0); }
|
||||
static inline bool run(const Derived & /*mat*/) { return false; }
|
||||
};
|
||||
|
||||
template<typename Derived>
|
||||
|
||||
@@ -8,3 +8,4 @@ INSTALL(FILES
|
||||
ADD_SUBDIRECTORY(products)
|
||||
ADD_SUBDIRECTORY(util)
|
||||
ADD_SUBDIRECTORY(arch)
|
||||
ADD_SUBDIRECTORY(functors)
|
||||
|
||||
@@ -30,6 +30,7 @@ struct CommaInitializer
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename XprType::Index Index;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline CommaInitializer(XprType& xpr, const Scalar& s)
|
||||
: m_xpr(xpr), m_row(0), m_col(1), m_currentBlockRows(1)
|
||||
{
|
||||
@@ -37,13 +38,27 @@ struct CommaInitializer
|
||||
}
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline CommaInitializer(XprType& xpr, const DenseBase<OtherDerived>& other)
|
||||
: m_xpr(xpr), m_row(0), m_col(other.cols()), m_currentBlockRows(other.rows())
|
||||
{
|
||||
m_xpr.block(0, 0, other.rows(), other.cols()) = other;
|
||||
}
|
||||
|
||||
/* Copy/Move constructor which transfers ownership. This is crucial in
|
||||
* absence of return value optimization to avoid assertions during destruction. */
|
||||
// FIXME in C++11 mode this could be replaced by a proper RValue constructor
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline CommaInitializer(const CommaInitializer& o)
|
||||
: m_xpr(o.m_xpr), m_row(o.m_row), m_col(o.m_col), m_currentBlockRows(o.m_currentBlockRows) {
|
||||
// Mark original object as finished. In absence of R-value references we need to const_cast:
|
||||
const_cast<CommaInitializer&>(o).m_row = m_xpr.rows();
|
||||
const_cast<CommaInitializer&>(o).m_col = m_xpr.cols();
|
||||
const_cast<CommaInitializer&>(o).m_currentBlockRows = 0;
|
||||
}
|
||||
|
||||
/* inserts a scalar value in the target matrix */
|
||||
EIGEN_DEVICE_FUNC
|
||||
CommaInitializer& operator,(const Scalar& s)
|
||||
{
|
||||
if (m_col==m_xpr.cols())
|
||||
@@ -63,6 +78,7 @@ struct CommaInitializer
|
||||
|
||||
/* inserts a matrix expression in the target matrix */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
CommaInitializer& operator,(const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
if(other.cols()==0 || other.rows()==0)
|
||||
@@ -88,6 +104,7 @@ struct CommaInitializer
|
||||
return *this;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline ~CommaInitializer()
|
||||
{
|
||||
eigen_assert((m_row+m_currentBlockRows) == m_xpr.rows()
|
||||
@@ -102,9 +119,10 @@ struct CommaInitializer
|
||||
* quaternion.fromRotationMatrix((Matrix3f() << axis0, axis1, axis2).finished());
|
||||
* \endcode
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline XprType& finished() { return m_xpr; }
|
||||
|
||||
XprType& m_xpr; // target expression
|
||||
XprType& m_xpr; // target expression
|
||||
Index m_row; // current row id
|
||||
Index m_col; // current col id
|
||||
Index m_currentBlockRows; // current block height
|
||||
|
||||
1121
Eigen/src/Core/CoreEvaluators.h
Normal file
1121
Eigen/src/Core/CoreEvaluators.h
Normal file
File diff suppressed because it is too large
Load Diff
@@ -122,6 +122,7 @@ class CwiseBinaryOp : internal::no_assignment_operator,
|
||||
typedef typename internal::remove_reference<LhsNested>::type _LhsNested;
|
||||
typedef typename internal::remove_reference<RhsNested>::type _RhsNested;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE CwiseBinaryOp(const Lhs& aLhs, const Rhs& aRhs, const BinaryOp& func = BinaryOp())
|
||||
: m_lhs(aLhs), m_rhs(aRhs), m_functor(func)
|
||||
{
|
||||
@@ -131,6 +132,7 @@ class CwiseBinaryOp : internal::no_assignment_operator,
|
||||
eigen_assert(aLhs.rows() == aRhs.rows() && aLhs.cols() == aRhs.cols());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index rows() const {
|
||||
// return the fixed size type if available to enable compile time optimizations
|
||||
if (internal::traits<typename internal::remove_all<LhsNested>::type>::RowsAtCompileTime==Dynamic)
|
||||
@@ -138,6 +140,7 @@ class CwiseBinaryOp : internal::no_assignment_operator,
|
||||
else
|
||||
return m_lhs.rows();
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index cols() const {
|
||||
// return the fixed size type if available to enable compile time optimizations
|
||||
if (internal::traits<typename internal::remove_all<LhsNested>::type>::ColsAtCompileTime==Dynamic)
|
||||
@@ -147,10 +150,13 @@ class CwiseBinaryOp : internal::no_assignment_operator,
|
||||
}
|
||||
|
||||
/** \returns the left hand side nested expression */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const _LhsNested& lhs() const { return m_lhs; }
|
||||
/** \returns the right hand side nested expression */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const _RhsNested& rhs() const { return m_rhs; }
|
||||
/** \returns the functor representing the binary operation */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const BinaryOp& functor() const { return m_functor; }
|
||||
|
||||
protected:
|
||||
@@ -169,6 +175,7 @@ class CwiseBinaryOpImpl<BinaryOp, Lhs, Rhs, Dense>
|
||||
typedef typename internal::dense_xpr_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type Base;
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE( Derived )
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar coeff(Index rowId, Index colId) const
|
||||
{
|
||||
return derived().functor()(derived().lhs().coeff(rowId, colId),
|
||||
@@ -182,6 +189,7 @@ class CwiseBinaryOpImpl<BinaryOp, Lhs, Rhs, Dense>
|
||||
derived().rhs().template packet<LoadMode>(rowId, colId));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
|
||||
{
|
||||
return derived().functor()(derived().lhs().coeff(index),
|
||||
@@ -227,3 +235,4 @@ MatrixBase<Derived>::operator+=(const MatrixBase<OtherDerived>& other)
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_CWISE_BINARY_OP_H
|
||||
|
||||
|
||||
@@ -54,6 +54,7 @@ class CwiseNullaryOp : internal::no_assignment_operator,
|
||||
typedef typename internal::dense_xpr_base<CwiseNullaryOp>::type Base;
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(CwiseNullaryOp)
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
CwiseNullaryOp(Index nbRows, Index nbCols, const NullaryOp& func = NullaryOp())
|
||||
: m_rows(nbRows), m_cols(nbCols), m_functor(func)
|
||||
{
|
||||
@@ -63,9 +64,12 @@ class CwiseNullaryOp : internal::no_assignment_operator,
|
||||
&& (ColsAtCompileTime == Dynamic || ColsAtCompileTime == nbCols));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index rows() const { return m_rows.value(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index cols() const { return m_cols.value(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar coeff(Index rowId, Index colId) const
|
||||
{
|
||||
return m_functor(rowId, colId);
|
||||
@@ -77,6 +81,7 @@ class CwiseNullaryOp : internal::no_assignment_operator,
|
||||
return m_functor.packetOp(rowId, colId);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
|
||||
{
|
||||
return m_functor(index);
|
||||
@@ -89,6 +94,7 @@ class CwiseNullaryOp : internal::no_assignment_operator,
|
||||
}
|
||||
|
||||
/** \returns the functor representing the nullary operation */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const NullaryOp& functor() const { return m_functor; }
|
||||
|
||||
protected:
|
||||
@@ -132,6 +138,9 @@ DenseBase<Derived>::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& f
|
||||
*
|
||||
* The template parameter \a CustomNullaryOp is the type of the functor.
|
||||
*
|
||||
* Here is an example with C++11 random generators: \include random_cpp11.cpp
|
||||
* Output: \verbinclude random_cpp11.out
|
||||
*
|
||||
* \sa class CwiseNullaryOp
|
||||
*/
|
||||
template<typename Derived>
|
||||
@@ -740,6 +749,7 @@ namespace internal {
|
||||
template<typename Derived, bool Big = (Derived::SizeAtCompileTime>=16)>
|
||||
struct setIdentity_impl
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE Derived& run(Derived& m)
|
||||
{
|
||||
return m = Derived::Identity(m.rows(), m.cols());
|
||||
@@ -750,6 +760,7 @@ template<typename Derived>
|
||||
struct setIdentity_impl<Derived, true>
|
||||
{
|
||||
typedef typename Derived::Index Index;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE Derived& run(Derived& m)
|
||||
{
|
||||
m.setZero();
|
||||
|
||||
@@ -64,20 +64,26 @@ class CwiseUnaryOp : internal::no_assignment_operator,
|
||||
typedef typename CwiseUnaryOpImpl<UnaryOp, XprType,typename internal::traits<XprType>::StorageKind>::Base Base;
|
||||
EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryOp)
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp())
|
||||
: m_xpr(xpr), m_functor(func) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index rows() const { return m_xpr.rows(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index cols() const { return m_xpr.cols(); }
|
||||
|
||||
/** \returns the functor representing the unary operation */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const UnaryOp& functor() const { return m_functor; }
|
||||
|
||||
/** \returns the nested expression */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename XprType::Nested>::type&
|
||||
nestedExpression() const { return m_xpr; }
|
||||
|
||||
/** \returns the nested expression */
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename internal::remove_all<typename XprType::Nested>::type&
|
||||
nestedExpression() { return m_xpr.const_cast_derived(); }
|
||||
|
||||
@@ -98,6 +104,7 @@ class CwiseUnaryOpImpl<UnaryOp,XprType,Dense>
|
||||
typedef typename internal::dense_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type Base;
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar coeff(Index rowId, Index colId) const
|
||||
{
|
||||
return derived().functor()(derived().nestedExpression().coeff(rowId, colId));
|
||||
@@ -109,12 +116,14 @@ class CwiseUnaryOpImpl<UnaryOp,XprType,Dense>
|
||||
return derived().functor().packetOp(derived().nestedExpression().template packet<LoadMode>(rowId, colId));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
|
||||
{
|
||||
return derived().functor()(derived().nestedExpression().coeff(index));
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE PacketScalar packet(Index index) const
|
||||
{
|
||||
return derived().functor().packetOp(derived().nestedExpression().template packet<LoadMode>(index));
|
||||
|
||||
@@ -182,6 +182,7 @@ template<typename Derived> class DenseBase
|
||||
|
||||
/** \returns the number of nonzero coefficients which is in practice the number
|
||||
* of stored coefficients. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index nonZeros() const { return size(); }
|
||||
/** \returns true if either the number of rows or the number of columns is equal to 1.
|
||||
* In other words, this function returns
|
||||
@@ -193,6 +194,7 @@ template<typename Derived> class DenseBase
|
||||
* \note For a vector, this returns just 1. For a matrix (non-vector), this is the major dimension
|
||||
* with respect to the \ref TopicStorageOrders "storage order", i.e., the number of columns for a
|
||||
* column-major matrix, and the number of rows for a row-major matrix. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
Index outerSize() const
|
||||
{
|
||||
return IsVectorAtCompileTime ? 1
|
||||
@@ -204,6 +206,7 @@ template<typename Derived> class DenseBase
|
||||
* \note For a vector, this is just the size. For a matrix (non-vector), this is the minor dimension
|
||||
* with respect to the \ref TopicStorageOrders "storage order", i.e., the number of rows for a
|
||||
* column-major matrix, and the number of columns for a row-major matrix. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
Index innerSize() const
|
||||
{
|
||||
return IsVectorAtCompileTime ? this->size()
|
||||
@@ -214,6 +217,7 @@ template<typename Derived> class DenseBase
|
||||
* Matrix::resize() and Array::resize(). The present method only asserts that the new size equals the old size, and does
|
||||
* nothing else.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
void resize(Index newSize)
|
||||
{
|
||||
EIGEN_ONLY_USED_FOR_DEBUG(newSize);
|
||||
@@ -224,6 +228,7 @@ template<typename Derived> class DenseBase
|
||||
* Matrix::resize() and Array::resize(). The present method only asserts that the new size equals the old size, and does
|
||||
* nothing else.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
void resize(Index nbRows, Index nbCols)
|
||||
{
|
||||
EIGEN_ONLY_USED_FOR_DEBUG(nbRows);
|
||||
@@ -247,42 +252,54 @@ template<typename Derived> class DenseBase
|
||||
|
||||
/** Copies \a other into *this. \returns a reference to *this. */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator=(const DenseBase<OtherDerived>& other);
|
||||
|
||||
/** Special case of the template operator=, in order to prevent the compiler
|
||||
* from generating a default operator= (issue hit with g++ 4.1)
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator=(const DenseBase& other);
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator=(const EigenBase<OtherDerived> &other);
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator+=(const EigenBase<OtherDerived> &other);
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator-=(const EigenBase<OtherDerived> &other);
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator=(const ReturnByValue<OtherDerived>& func);
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
/** Copies \a other into *this without evaluating other. \returns a reference to *this. */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& lazyAssign(const DenseBase<OtherDerived>& other);
|
||||
#endif // not EIGEN_PARSED_BY_DOXYGEN
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
CommaInitializer<Derived> operator<< (const Scalar& s);
|
||||
|
||||
template<unsigned int Added,unsigned int Removed>
|
||||
const Flagged<Derived, Added, Removed> flagged() const;
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
CommaInitializer<Derived> operator<< (const DenseBase<OtherDerived>& other);
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
Eigen::Transpose<Derived> transpose();
|
||||
typedef typename internal::add_const<Transpose<const Derived> >::type ConstTransposeReturnType;
|
||||
typedef typename internal::add_const<Transpose<const Derived> >::type ConstTransposeReturnType;
|
||||
EIGEN_DEVICE_FUNC
|
||||
ConstTransposeReturnType transpose() const;
|
||||
EIGEN_DEVICE_FUNC
|
||||
void transposeInPlace();
|
||||
#ifndef EIGEN_NO_DEBUG
|
||||
protected:
|
||||
@@ -292,65 +309,68 @@ template<typename Derived> class DenseBase
|
||||
#endif
|
||||
|
||||
|
||||
static const ConstantReturnType
|
||||
EIGEN_DEVICE_FUNC static const ConstantReturnType
|
||||
Constant(Index rows, Index cols, const Scalar& value);
|
||||
static const ConstantReturnType
|
||||
EIGEN_DEVICE_FUNC static const ConstantReturnType
|
||||
Constant(Index size, const Scalar& value);
|
||||
static const ConstantReturnType
|
||||
EIGEN_DEVICE_FUNC static const ConstantReturnType
|
||||
Constant(const Scalar& value);
|
||||
|
||||
static const SequentialLinSpacedReturnType
|
||||
EIGEN_DEVICE_FUNC static const SequentialLinSpacedReturnType
|
||||
LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high);
|
||||
static const RandomAccessLinSpacedReturnType
|
||||
EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType
|
||||
LinSpaced(Index size, const Scalar& low, const Scalar& high);
|
||||
static const SequentialLinSpacedReturnType
|
||||
EIGEN_DEVICE_FUNC static const SequentialLinSpacedReturnType
|
||||
LinSpaced(Sequential_t, const Scalar& low, const Scalar& high);
|
||||
static const RandomAccessLinSpacedReturnType
|
||||
EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType
|
||||
LinSpaced(const Scalar& low, const Scalar& high);
|
||||
|
||||
template<typename CustomNullaryOp>
|
||||
template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
|
||||
static const CwiseNullaryOp<CustomNullaryOp, Derived>
|
||||
NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func);
|
||||
template<typename CustomNullaryOp>
|
||||
template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
|
||||
static const CwiseNullaryOp<CustomNullaryOp, Derived>
|
||||
NullaryExpr(Index size, const CustomNullaryOp& func);
|
||||
template<typename CustomNullaryOp>
|
||||
template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
|
||||
static const CwiseNullaryOp<CustomNullaryOp, Derived>
|
||||
NullaryExpr(const CustomNullaryOp& func);
|
||||
|
||||
static const ConstantReturnType Zero(Index rows, Index cols);
|
||||
static const ConstantReturnType Zero(Index size);
|
||||
static const ConstantReturnType Zero();
|
||||
static const ConstantReturnType Ones(Index rows, Index cols);
|
||||
static const ConstantReturnType Ones(Index size);
|
||||
static const ConstantReturnType Ones();
|
||||
EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index rows, Index cols);
|
||||
EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index size);
|
||||
EIGEN_DEVICE_FUNC static const ConstantReturnType Zero();
|
||||
EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(Index rows, Index cols);
|
||||
EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(Index size);
|
||||
EIGEN_DEVICE_FUNC static const ConstantReturnType Ones();
|
||||
|
||||
void fill(const Scalar& value);
|
||||
Derived& setConstant(const Scalar& value);
|
||||
Derived& setLinSpaced(Index size, const Scalar& low, const Scalar& high);
|
||||
Derived& setLinSpaced(const Scalar& low, const Scalar& high);
|
||||
Derived& setZero();
|
||||
Derived& setOnes();
|
||||
Derived& setRandom();
|
||||
EIGEN_DEVICE_FUNC void fill(const Scalar& value);
|
||||
EIGEN_DEVICE_FUNC Derived& setConstant(const Scalar& value);
|
||||
EIGEN_DEVICE_FUNC Derived& setLinSpaced(Index size, const Scalar& low, const Scalar& high);
|
||||
EIGEN_DEVICE_FUNC Derived& setLinSpaced(const Scalar& low, const Scalar& high);
|
||||
EIGEN_DEVICE_FUNC Derived& setZero();
|
||||
EIGEN_DEVICE_FUNC Derived& setOnes();
|
||||
EIGEN_DEVICE_FUNC Derived& setRandom();
|
||||
|
||||
template<typename OtherDerived>
|
||||
template<typename OtherDerived> EIGEN_DEVICE_FUNC
|
||||
bool isApprox(const DenseBase<OtherDerived>& other,
|
||||
const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
|
||||
EIGEN_DEVICE_FUNC
|
||||
bool isMuchSmallerThan(const RealScalar& other,
|
||||
const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
|
||||
template<typename OtherDerived>
|
||||
template<typename OtherDerived> EIGEN_DEVICE_FUNC
|
||||
bool isMuchSmallerThan(const DenseBase<OtherDerived>& other,
|
||||
const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
|
||||
|
||||
bool isApproxToConstant(const Scalar& value, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
|
||||
bool isConstant(const Scalar& value, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
|
||||
bool isZero(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
|
||||
bool isOnes(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
|
||||
EIGEN_DEVICE_FUNC bool isApproxToConstant(const Scalar& value, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
|
||||
EIGEN_DEVICE_FUNC bool isConstant(const Scalar& value, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
|
||||
EIGEN_DEVICE_FUNC bool isZero(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
|
||||
EIGEN_DEVICE_FUNC bool isOnes(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
|
||||
|
||||
inline bool hasNaN() const;
|
||||
inline bool allFinite() const;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Derived& operator*=(const Scalar& other);
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Derived& operator/=(const Scalar& other);
|
||||
|
||||
typedef typename internal::add_const_on_value_type<typename internal::eval<Derived>::type>::type EvalReturnType;
|
||||
@@ -359,6 +379,7 @@ template<typename Derived> class DenseBase
|
||||
* Notice that in the case of a plain matrix or vector (not an expression) this function just returns
|
||||
* a const reference, in order to avoid a useless copy.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE EvalReturnType eval() const
|
||||
{
|
||||
// Even though MSVC does not honor strong inlining when the return type
|
||||
@@ -371,6 +392,7 @@ template<typename Derived> class DenseBase
|
||||
*
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void swap(const DenseBase<OtherDerived>& other,
|
||||
int = OtherDerived::ThisConstantIsPrivateInPlainObjectBase)
|
||||
{
|
||||
@@ -381,46 +403,52 @@ template<typename Derived> class DenseBase
|
||||
*
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void swap(PlainObjectBase<OtherDerived>& other)
|
||||
{
|
||||
SwapWrapper<Derived>(derived()).lazyAssign(other.derived());
|
||||
}
|
||||
|
||||
|
||||
inline const NestByValue<Derived> nestByValue() const;
|
||||
inline const ForceAlignedAccess<Derived> forceAlignedAccess() const;
|
||||
inline ForceAlignedAccess<Derived> forceAlignedAccess();
|
||||
template<bool Enable> inline const typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf() const;
|
||||
template<bool Enable> inline typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf();
|
||||
EIGEN_DEVICE_FUNC inline const NestByValue<Derived> nestByValue() const;
|
||||
EIGEN_DEVICE_FUNC inline const ForceAlignedAccess<Derived> forceAlignedAccess() const;
|
||||
EIGEN_DEVICE_FUNC inline ForceAlignedAccess<Derived> forceAlignedAccess();
|
||||
template<bool Enable> EIGEN_DEVICE_FUNC
|
||||
inline const typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf() const;
|
||||
template<bool Enable> EIGEN_DEVICE_FUNC
|
||||
inline typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf();
|
||||
|
||||
Scalar sum() const;
|
||||
Scalar mean() const;
|
||||
Scalar trace() const;
|
||||
EIGEN_DEVICE_FUNC Scalar sum() const;
|
||||
EIGEN_DEVICE_FUNC Scalar mean() const;
|
||||
EIGEN_DEVICE_FUNC Scalar trace() const;
|
||||
|
||||
Scalar prod() const;
|
||||
EIGEN_DEVICE_FUNC Scalar prod() const;
|
||||
|
||||
typename internal::traits<Derived>::Scalar minCoeff() const;
|
||||
typename internal::traits<Derived>::Scalar maxCoeff() const;
|
||||
EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar minCoeff() const;
|
||||
EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar maxCoeff() const;
|
||||
|
||||
template<typename IndexType>
|
||||
template<typename IndexType> EIGEN_DEVICE_FUNC
|
||||
typename internal::traits<Derived>::Scalar minCoeff(IndexType* row, IndexType* col) const;
|
||||
template<typename IndexType>
|
||||
template<typename IndexType> EIGEN_DEVICE_FUNC
|
||||
typename internal::traits<Derived>::Scalar maxCoeff(IndexType* row, IndexType* col) const;
|
||||
template<typename IndexType>
|
||||
template<typename IndexType> EIGEN_DEVICE_FUNC
|
||||
typename internal::traits<Derived>::Scalar minCoeff(IndexType* index) const;
|
||||
template<typename IndexType>
|
||||
template<typename IndexType> EIGEN_DEVICE_FUNC
|
||||
typename internal::traits<Derived>::Scalar maxCoeff(IndexType* index) const;
|
||||
|
||||
template<typename BinaryOp>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename internal::result_of<BinaryOp(typename internal::traits<Derived>::Scalar)>::type
|
||||
redux(const BinaryOp& func) const;
|
||||
|
||||
template<typename Visitor>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void visit(Visitor& func) const;
|
||||
|
||||
inline const WithFormat<Derived> format(const IOFormat& fmt) const;
|
||||
|
||||
/** \returns the unique coefficient of a 1x1 expression */
|
||||
EIGEN_DEVICE_FUNC
|
||||
CoeffReturnType value() const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)
|
||||
@@ -428,8 +456,8 @@ template<typename Derived> class DenseBase
|
||||
return derived().coeff(0,0);
|
||||
}
|
||||
|
||||
bool all(void) const;
|
||||
bool any(void) const;
|
||||
bool all() const;
|
||||
bool any() const;
|
||||
Index count() const;
|
||||
|
||||
typedef VectorwiseOp<Derived, Horizontal> RowwiseReturnType;
|
||||
@@ -478,27 +506,18 @@ template<typename Derived> class DenseBase
|
||||
# endif
|
||||
#undef EIGEN_CURRENT_STORAGE_BASE_CLASS
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
|
||||
Block<Derived> corner(CornerType type, Index cRows, Index cCols);
|
||||
const Block<Derived> corner(CornerType type, Index cRows, Index cCols) const;
|
||||
template<int CRows, int CCols>
|
||||
Block<Derived, CRows, CCols> corner(CornerType type);
|
||||
template<int CRows, int CCols>
|
||||
const Block<Derived, CRows, CCols> corner(CornerType type) const;
|
||||
|
||||
#endif // EIGEN2_SUPPORT
|
||||
|
||||
|
||||
// disable the use of evalTo for dense objects with a nice compilation error
|
||||
template<typename Dest> inline void evalTo(Dest& ) const
|
||||
template<typename Dest>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void evalTo(Dest& ) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<Dest,void>::value),THE_EVAL_EVALTO_FUNCTION_SHOULD_NEVER_BE_CALLED_FOR_DENSE_OBJECTS);
|
||||
}
|
||||
|
||||
protected:
|
||||
/** Default constructor. Do nothing. */
|
||||
DenseBase()
|
||||
EIGEN_DEVICE_FUNC DenseBase()
|
||||
{
|
||||
/* Just checks for self-consistency of the flags.
|
||||
* Only do it when debugging Eigen, as this borders on paranoiac and could slow compilation down
|
||||
@@ -511,9 +530,9 @@ template<typename Derived> class DenseBase
|
||||
}
|
||||
|
||||
private:
|
||||
explicit DenseBase(int);
|
||||
DenseBase(int,int);
|
||||
template<typename OtherDerived> explicit DenseBase(const DenseBase<OtherDerived>&);
|
||||
EIGEN_DEVICE_FUNC explicit DenseBase(int);
|
||||
EIGEN_DEVICE_FUNC DenseBase(int,int);
|
||||
template<typename OtherDerived> EIGEN_DEVICE_FUNC explicit DenseBase(const DenseBase<OtherDerived>&);
|
||||
};
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
@@ -61,6 +61,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
|
||||
using Base::size;
|
||||
using Base::derived;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner) const
|
||||
{
|
||||
return int(Derived::RowsAtCompileTime) == 1 ? 0
|
||||
@@ -69,6 +70,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
|
||||
: inner;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner) const
|
||||
{
|
||||
return int(Derived::ColsAtCompileTime) == 1 ? 0
|
||||
@@ -91,6 +93,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
|
||||
*
|
||||
* \sa operator()(Index,Index) const, coeffRef(Index,Index), coeff(Index) const
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const
|
||||
{
|
||||
eigen_internal_assert(row >= 0 && row < rows()
|
||||
@@ -98,6 +101,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
|
||||
return derived().coeff(row, col);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
|
||||
{
|
||||
return coeff(rowIndexByOuterInner(outer, inner),
|
||||
@@ -108,6 +112,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
|
||||
*
|
||||
* \sa operator()(Index,Index), operator[](Index)
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE CoeffReturnType operator()(Index row, Index col) const
|
||||
{
|
||||
eigen_assert(row >= 0 && row < rows()
|
||||
@@ -130,6 +135,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
|
||||
* \sa operator[](Index) const, coeffRef(Index), coeff(Index,Index) const
|
||||
*/
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE CoeffReturnType
|
||||
coeff(Index index) const
|
||||
{
|
||||
@@ -146,13 +152,12 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
|
||||
* z() const, w() const
|
||||
*/
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE CoeffReturnType
|
||||
operator[](Index index) const
|
||||
{
|
||||
#ifndef EIGEN2_SUPPORT
|
||||
EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime,
|
||||
THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD)
|
||||
#endif
|
||||
eigen_assert(index >= 0 && index < size());
|
||||
return derived().coeff(index);
|
||||
}
|
||||
@@ -167,6 +172,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
|
||||
* z() const, w() const
|
||||
*/
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE CoeffReturnType
|
||||
operator()(Index index) const
|
||||
{
|
||||
@@ -176,21 +182,25 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
|
||||
|
||||
/** equivalent to operator[](0). */
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE CoeffReturnType
|
||||
x() const { return (*this)[0]; }
|
||||
|
||||
/** equivalent to operator[](1). */
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE CoeffReturnType
|
||||
y() const { return (*this)[1]; }
|
||||
|
||||
/** equivalent to operator[](2). */
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE CoeffReturnType
|
||||
z() const { return (*this)[2]; }
|
||||
|
||||
/** equivalent to operator[](3). */
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE CoeffReturnType
|
||||
w() const { return (*this)[3]; }
|
||||
|
||||
@@ -311,6 +321,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
|
||||
*
|
||||
* \sa operator()(Index,Index), coeff(Index, Index) const, coeffRef(Index)
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col)
|
||||
{
|
||||
eigen_internal_assert(row >= 0 && row < rows()
|
||||
@@ -318,6 +329,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
|
||||
return derived().coeffRef(row, col);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar&
|
||||
coeffRefByOuterInner(Index outer, Index inner)
|
||||
{
|
||||
@@ -330,6 +342,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
|
||||
* \sa operator[](Index)
|
||||
*/
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar&
|
||||
operator()(Index row, Index col)
|
||||
{
|
||||
@@ -354,6 +367,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
|
||||
* \sa operator[](Index), coeff(Index) const, coeffRef(Index,Index)
|
||||
*/
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar&
|
||||
coeffRef(Index index)
|
||||
{
|
||||
@@ -368,13 +382,12 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
|
||||
* \sa operator[](Index) const, operator()(Index,Index), x(), y(), z(), w()
|
||||
*/
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar&
|
||||
operator[](Index index)
|
||||
{
|
||||
#ifndef EIGEN2_SUPPORT
|
||||
EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime,
|
||||
THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD)
|
||||
#endif
|
||||
eigen_assert(index >= 0 && index < size());
|
||||
return derived().coeffRef(index);
|
||||
}
|
||||
@@ -388,6 +401,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
|
||||
* \sa operator[](Index) const, operator()(Index,Index), x(), y(), z(), w()
|
||||
*/
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar&
|
||||
operator()(Index index)
|
||||
{
|
||||
@@ -397,21 +411,25 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
|
||||
|
||||
/** equivalent to operator[](0). */
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar&
|
||||
x() { return (*this)[0]; }
|
||||
|
||||
/** equivalent to operator[](1). */
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar&
|
||||
y() { return (*this)[1]; }
|
||||
|
||||
/** equivalent to operator[](2). */
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar&
|
||||
z() { return (*this)[2]; }
|
||||
|
||||
/** equivalent to operator[](3). */
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar&
|
||||
w() { return (*this)[3]; }
|
||||
|
||||
@@ -473,6 +491,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
|
||||
*/
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void copyCoeff(Index row, Index col, const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
eigen_internal_assert(row >= 0 && row < rows()
|
||||
@@ -489,6 +508,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
|
||||
*/
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void copyCoeff(Index index, const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
eigen_internal_assert(index >= 0 && index < size());
|
||||
@@ -497,6 +517,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
|
||||
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void copyCoeffByOuterInner(Index outer, Index inner, const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
const Index row = rowIndexByOuterInner(outer,inner);
|
||||
@@ -581,6 +602,7 @@ class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived
|
||||
*
|
||||
* \sa outerStride(), rowStride(), colStride()
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index innerStride() const
|
||||
{
|
||||
return derived().innerStride();
|
||||
@@ -591,6 +613,7 @@ class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived
|
||||
*
|
||||
* \sa innerStride(), rowStride(), colStride()
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index outerStride() const
|
||||
{
|
||||
return derived().outerStride();
|
||||
@@ -606,6 +629,7 @@ class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived
|
||||
*
|
||||
* \sa innerStride(), outerStride(), colStride()
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index rowStride() const
|
||||
{
|
||||
return Derived::IsRowMajor ? outerStride() : innerStride();
|
||||
@@ -615,6 +639,7 @@ class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived
|
||||
*
|
||||
* \sa innerStride(), outerStride(), rowStride()
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index colStride() const
|
||||
{
|
||||
return Derived::IsRowMajor ? innerStride() : outerStride();
|
||||
@@ -652,6 +677,7 @@ class DenseCoeffsBase<Derived, DirectWriteAccessors>
|
||||
*
|
||||
* \sa outerStride(), rowStride(), colStride()
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index innerStride() const
|
||||
{
|
||||
return derived().innerStride();
|
||||
@@ -662,6 +688,7 @@ class DenseCoeffsBase<Derived, DirectWriteAccessors>
|
||||
*
|
||||
* \sa innerStride(), rowStride(), colStride()
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index outerStride() const
|
||||
{
|
||||
return derived().outerStride();
|
||||
@@ -677,6 +704,7 @@ class DenseCoeffsBase<Derived, DirectWriteAccessors>
|
||||
*
|
||||
* \sa innerStride(), outerStride(), colStride()
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index rowStride() const
|
||||
{
|
||||
return Derived::IsRowMajor ? outerStride() : innerStride();
|
||||
@@ -686,6 +714,7 @@ class DenseCoeffsBase<Derived, DirectWriteAccessors>
|
||||
*
|
||||
* \sa innerStride(), outerStride(), rowStride()
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index colStride() const
|
||||
{
|
||||
return Derived::IsRowMajor ? innerStride() : outerStride();
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
//
|
||||
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2006-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2010 Hauke Heibel <hauke.heibel@gmail.com>
|
||||
// Copyright (C) 2010-2013 Hauke Heibel <hauke.heibel@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
@@ -24,26 +24,38 @@ namespace internal {
|
||||
|
||||
struct constructor_without_unaligned_array_assert {};
|
||||
|
||||
template<typename T, int Size>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void check_static_allocation_size()
|
||||
{
|
||||
// if EIGEN_STACK_ALLOCATION_LIMIT is defined to 0, then no limit
|
||||
#if EIGEN_STACK_ALLOCATION_LIMIT
|
||||
EIGEN_STATIC_ASSERT(Size * sizeof(T) <= EIGEN_STACK_ALLOCATION_LIMIT, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG);
|
||||
#endif
|
||||
}
|
||||
|
||||
/** \internal
|
||||
* Static array. If the MatrixOrArrayOptions require auto-alignment, the array will be automatically aligned:
|
||||
* to 16 bytes boundary if the total size is a multiple of 16 bytes.
|
||||
*/
|
||||
template <typename T, int Size, int MatrixOrArrayOptions,
|
||||
int Alignment = (MatrixOrArrayOptions&DontAlign) ? 0
|
||||
: (((Size*sizeof(T))%16)==0) ? 16
|
||||
: (((Size*sizeof(T))%EIGEN_ALIGN_BYTES)==0) ? EIGEN_ALIGN_BYTES
|
||||
: 0 >
|
||||
struct plain_array
|
||||
{
|
||||
T array[Size];
|
||||
|
||||
plain_array()
|
||||
EIGEN_DEVICE_FUNC
|
||||
plain_array()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(Size * sizeof(T) <= 128 * 128 * 8, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG);
|
||||
check_static_allocation_size<T,Size>();
|
||||
}
|
||||
|
||||
plain_array(constructor_without_unaligned_array_assert)
|
||||
EIGEN_DEVICE_FUNC
|
||||
plain_array(constructor_without_unaligned_array_assert)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(Size * sizeof(T) <= 128 * 128 * 8, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG);
|
||||
check_static_allocation_size<T,Size>();
|
||||
}
|
||||
};
|
||||
|
||||
@@ -56,41 +68,43 @@ struct plain_array
|
||||
template<typename PtrType>
|
||||
EIGEN_ALWAYS_INLINE PtrType eigen_unaligned_array_assert_workaround_gcc47(PtrType array) { return array; }
|
||||
#define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \
|
||||
eigen_assert((reinterpret_cast<size_t>(eigen_unaligned_array_assert_workaround_gcc47(array)) & sizemask) == 0 \
|
||||
eigen_assert((reinterpret_cast<size_t>(eigen_unaligned_array_assert_workaround_gcc47(array)) & (sizemask)) == 0 \
|
||||
&& "this assertion is explained here: " \
|
||||
"http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \
|
||||
" **** READ THIS WEB PAGE !!! ****");
|
||||
#else
|
||||
#define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \
|
||||
eigen_assert((reinterpret_cast<size_t>(array) & sizemask) == 0 \
|
||||
eigen_assert((reinterpret_cast<size_t>(array) & (sizemask)) == 0 \
|
||||
&& "this assertion is explained here: " \
|
||||
"http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \
|
||||
" **** READ THIS WEB PAGE !!! ****");
|
||||
#endif
|
||||
|
||||
template <typename T, int Size, int MatrixOrArrayOptions>
|
||||
struct plain_array<T, Size, MatrixOrArrayOptions, 16>
|
||||
struct plain_array<T, Size, MatrixOrArrayOptions, EIGEN_ALIGN_BYTES>
|
||||
{
|
||||
EIGEN_USER_ALIGN16 T array[Size];
|
||||
EIGEN_USER_ALIGN_DEFAULT T array[Size];
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
plain_array()
|
||||
{
|
||||
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(0xf);
|
||||
EIGEN_STATIC_ASSERT(Size * sizeof(T) <= 128 * 128 * 8, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG);
|
||||
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(EIGEN_ALIGN_BYTES-1);
|
||||
check_static_allocation_size<T,Size>();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
plain_array(constructor_without_unaligned_array_assert)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(Size * sizeof(T) <= 128 * 128 * 8, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG);
|
||||
check_static_allocation_size<T,Size>();
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, int MatrixOrArrayOptions, int Alignment>
|
||||
struct plain_array<T, 0, MatrixOrArrayOptions, Alignment>
|
||||
{
|
||||
EIGEN_USER_ALIGN16 T array[1];
|
||||
plain_array() {}
|
||||
plain_array(constructor_without_unaligned_array_assert) {}
|
||||
EIGEN_USER_ALIGN_DEFAULT T array[1];
|
||||
EIGEN_DEVICE_FUNC plain_array() {}
|
||||
EIGEN_DEVICE_FUNC plain_array(constructor_without_unaligned_array_assert) {}
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
@@ -114,33 +128,44 @@ template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseSt
|
||||
{
|
||||
internal::plain_array<T,Size,_Options> m_data;
|
||||
public:
|
||||
inline DenseStorage() {}
|
||||
inline DenseStorage(internal::constructor_without_unaligned_array_assert)
|
||||
EIGEN_DEVICE_FUNC DenseStorage() {}
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseStorage(internal::constructor_without_unaligned_array_assert)
|
||||
: m_data(internal::constructor_without_unaligned_array_assert()) {}
|
||||
inline DenseStorage(DenseIndex,DenseIndex,DenseIndex) {}
|
||||
inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); }
|
||||
static inline DenseIndex rows(void) {return _Rows;}
|
||||
static inline DenseIndex cols(void) {return _Cols;}
|
||||
inline void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {}
|
||||
inline void resize(DenseIndex,DenseIndex,DenseIndex) {}
|
||||
inline const T *data() const { return m_data.array; }
|
||||
inline T *data() { return m_data.array; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseStorage(const DenseStorage& other) : m_data(other.m_data) {}
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseStorage& operator=(const DenseStorage& other)
|
||||
{
|
||||
if (this != &other) m_data = other.m_data;
|
||||
return *this;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(DenseIndex,DenseIndex,DenseIndex) {}
|
||||
EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); }
|
||||
EIGEN_DEVICE_FUNC static DenseIndex rows(void) {return _Rows;}
|
||||
EIGEN_DEVICE_FUNC static DenseIndex cols(void) {return _Cols;}
|
||||
EIGEN_DEVICE_FUNC void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {}
|
||||
EIGEN_DEVICE_FUNC void resize(DenseIndex,DenseIndex,DenseIndex) {}
|
||||
EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
|
||||
EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
|
||||
};
|
||||
|
||||
// null matrix
|
||||
template<typename T, int _Rows, int _Cols, int _Options> class DenseStorage<T, 0, _Rows, _Cols, _Options>
|
||||
{
|
||||
public:
|
||||
inline DenseStorage() {}
|
||||
inline DenseStorage(internal::constructor_without_unaligned_array_assert) {}
|
||||
inline DenseStorage(DenseIndex,DenseIndex,DenseIndex) {}
|
||||
inline void swap(DenseStorage& ) {}
|
||||
static inline DenseIndex rows(void) {return _Rows;}
|
||||
static inline DenseIndex cols(void) {return _Cols;}
|
||||
inline void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {}
|
||||
inline void resize(DenseIndex,DenseIndex,DenseIndex) {}
|
||||
inline const T *data() const { return 0; }
|
||||
inline T *data() { return 0; }
|
||||
EIGEN_DEVICE_FUNC DenseStorage() {}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(internal::constructor_without_unaligned_array_assert) {}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage&) {}
|
||||
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage&) { return *this; }
|
||||
EIGEN_DEVICE_FUNC DenseStorage(DenseIndex,DenseIndex,DenseIndex) {}
|
||||
EIGEN_DEVICE_FUNC void swap(DenseStorage& ) {}
|
||||
EIGEN_DEVICE_FUNC static DenseIndex rows(void) {return _Rows;}
|
||||
EIGEN_DEVICE_FUNC static DenseIndex cols(void) {return _Cols;}
|
||||
EIGEN_DEVICE_FUNC void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {}
|
||||
EIGEN_DEVICE_FUNC void resize(DenseIndex,DenseIndex,DenseIndex) {}
|
||||
EIGEN_DEVICE_FUNC const T *data() const { return 0; }
|
||||
EIGEN_DEVICE_FUNC T *data() { return 0; }
|
||||
};
|
||||
|
||||
// more specializations for null matrices; these are necessary to resolve ambiguities
|
||||
@@ -160,18 +185,29 @@ template<typename T, int Size, int _Options> class DenseStorage<T, Size, Dynamic
|
||||
DenseIndex m_rows;
|
||||
DenseIndex m_cols;
|
||||
public:
|
||||
inline DenseStorage() : m_rows(0), m_cols(0) {}
|
||||
inline DenseStorage(internal::constructor_without_unaligned_array_assert)
|
||||
EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0), m_cols(0) {}
|
||||
DenseStorage(internal::constructor_without_unaligned_array_assert)
|
||||
: m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0), m_cols(0) {}
|
||||
inline DenseStorage(DenseIndex, DenseIndex nbRows, DenseIndex nbCols) : m_rows(nbRows), m_cols(nbCols) {}
|
||||
inline void swap(DenseStorage& other)
|
||||
DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows), m_cols(other.m_cols) {}
|
||||
DenseStorage& operator=(const DenseStorage& other)
|
||||
{
|
||||
if (this != &other)
|
||||
{
|
||||
m_data = other.m_data;
|
||||
m_rows = other.m_rows;
|
||||
m_cols = other.m_cols;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
DenseStorage(DenseIndex, DenseIndex nbRows, DenseIndex nbCols) : m_rows(nbRows), m_cols(nbCols) {}
|
||||
void swap(DenseStorage& other)
|
||||
{ std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
|
||||
inline DenseIndex rows() const {return m_rows;}
|
||||
inline DenseIndex cols() const {return m_cols;}
|
||||
inline void conservativeResize(DenseIndex, DenseIndex nbRows, DenseIndex nbCols) { m_rows = nbRows; m_cols = nbCols; }
|
||||
inline void resize(DenseIndex, DenseIndex nbRows, DenseIndex nbCols) { m_rows = nbRows; m_cols = nbCols; }
|
||||
inline const T *data() const { return m_data.array; }
|
||||
inline T *data() { return m_data.array; }
|
||||
EIGEN_DEVICE_FUNC DenseIndex rows() const {return m_rows;}
|
||||
EIGEN_DEVICE_FUNC DenseIndex cols() const {return m_cols;}
|
||||
void conservativeResize(DenseIndex, DenseIndex nbRows, DenseIndex nbCols) { m_rows = nbRows; m_cols = nbCols; }
|
||||
void resize(DenseIndex, DenseIndex nbRows, DenseIndex nbCols) { m_rows = nbRows; m_cols = nbCols; }
|
||||
EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
|
||||
EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
|
||||
};
|
||||
|
||||
// dynamic-size matrix with fixed-size storage and fixed width
|
||||
@@ -180,17 +216,27 @@ template<typename T, int Size, int _Cols, int _Options> class DenseStorage<T, Si
|
||||
internal::plain_array<T,Size,_Options> m_data;
|
||||
DenseIndex m_rows;
|
||||
public:
|
||||
inline DenseStorage() : m_rows(0) {}
|
||||
inline DenseStorage(internal::constructor_without_unaligned_array_assert)
|
||||
EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0) {}
|
||||
DenseStorage(internal::constructor_without_unaligned_array_assert)
|
||||
: m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0) {}
|
||||
inline DenseStorage(DenseIndex, DenseIndex nbRows, DenseIndex) : m_rows(nbRows) {}
|
||||
inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
|
||||
inline DenseIndex rows(void) const {return m_rows;}
|
||||
inline DenseIndex cols(void) const {return _Cols;}
|
||||
inline void conservativeResize(DenseIndex, DenseIndex nbRows, DenseIndex) { m_rows = nbRows; }
|
||||
inline void resize(DenseIndex, DenseIndex nbRows, DenseIndex) { m_rows = nbRows; }
|
||||
inline const T *data() const { return m_data.array; }
|
||||
inline T *data() { return m_data.array; }
|
||||
DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows) {}
|
||||
DenseStorage& operator=(const DenseStorage& other)
|
||||
{
|
||||
if (this != &other)
|
||||
{
|
||||
m_data = other.m_data;
|
||||
m_rows = other.m_rows;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
DenseStorage(DenseIndex, DenseIndex nbRows, DenseIndex) : m_rows(nbRows) {}
|
||||
void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
|
||||
EIGEN_DEVICE_FUNC DenseIndex rows(void) const {return m_rows;}
|
||||
EIGEN_DEVICE_FUNC DenseIndex cols(void) const {return _Cols;}
|
||||
void conservativeResize(DenseIndex, DenseIndex nbRows, DenseIndex) { m_rows = nbRows; }
|
||||
void resize(DenseIndex, DenseIndex nbRows, DenseIndex) { m_rows = nbRows; }
|
||||
EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
|
||||
EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
|
||||
};
|
||||
|
||||
// dynamic-size matrix with fixed-size storage and fixed height
|
||||
@@ -199,17 +245,27 @@ template<typename T, int Size, int _Rows, int _Options> class DenseStorage<T, Si
|
||||
internal::plain_array<T,Size,_Options> m_data;
|
||||
DenseIndex m_cols;
|
||||
public:
|
||||
inline DenseStorage() : m_cols(0) {}
|
||||
inline DenseStorage(internal::constructor_without_unaligned_array_assert)
|
||||
EIGEN_DEVICE_FUNC DenseStorage() : m_cols(0) {}
|
||||
DenseStorage(internal::constructor_without_unaligned_array_assert)
|
||||
: m_data(internal::constructor_without_unaligned_array_assert()), m_cols(0) {}
|
||||
inline DenseStorage(DenseIndex, DenseIndex, DenseIndex nbCols) : m_cols(nbCols) {}
|
||||
inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
|
||||
inline DenseIndex rows(void) const {return _Rows;}
|
||||
inline DenseIndex cols(void) const {return m_cols;}
|
||||
inline void conservativeResize(DenseIndex, DenseIndex, DenseIndex nbCols) { m_cols = nbCols; }
|
||||
inline void resize(DenseIndex, DenseIndex, DenseIndex nbCols) { m_cols = nbCols; }
|
||||
inline const T *data() const { return m_data.array; }
|
||||
inline T *data() { return m_data.array; }
|
||||
DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_cols(other.m_cols) {}
|
||||
DenseStorage& operator=(const DenseStorage& other)
|
||||
{
|
||||
if (this != &other)
|
||||
{
|
||||
m_data = other.m_data;
|
||||
m_cols = other.m_cols;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
DenseStorage(DenseIndex, DenseIndex, DenseIndex nbCols) : m_cols(nbCols) {}
|
||||
void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
|
||||
EIGEN_DEVICE_FUNC DenseIndex rows(void) const {return _Rows;}
|
||||
EIGEN_DEVICE_FUNC DenseIndex cols(void) const {return m_cols;}
|
||||
void conservativeResize(DenseIndex, DenseIndex, DenseIndex nbCols) { m_cols = nbCols; }
|
||||
void resize(DenseIndex, DenseIndex, DenseIndex nbCols) { m_cols = nbCols; }
|
||||
EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
|
||||
EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
|
||||
};
|
||||
|
||||
// purely dynamic matrix.
|
||||
@@ -219,18 +275,51 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
|
||||
DenseIndex m_rows;
|
||||
DenseIndex m_cols;
|
||||
public:
|
||||
inline DenseStorage() : m_data(0), m_rows(0), m_cols(0) {}
|
||||
inline DenseStorage(internal::constructor_without_unaligned_array_assert)
|
||||
EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0), m_cols(0) {}
|
||||
DenseStorage(internal::constructor_without_unaligned_array_assert)
|
||||
: m_data(0), m_rows(0), m_cols(0) {}
|
||||
inline DenseStorage(DenseIndex size, DenseIndex nbRows, DenseIndex nbCols)
|
||||
DenseStorage(DenseIndex size, DenseIndex nbRows, DenseIndex nbCols)
|
||||
: m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(nbRows), m_cols(nbCols)
|
||||
{ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN }
|
||||
inline ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, m_rows*m_cols); }
|
||||
inline void swap(DenseStorage& other)
|
||||
DenseStorage(const DenseStorage& other)
|
||||
: m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(other.m_rows*other.m_cols))
|
||||
, m_rows(other.m_rows)
|
||||
, m_cols(other.m_cols)
|
||||
{
|
||||
internal::smart_copy(other.m_data, other.m_data+other.m_rows*other.m_cols, m_data);
|
||||
}
|
||||
DenseStorage& operator=(const DenseStorage& other)
|
||||
{
|
||||
if (this != &other)
|
||||
{
|
||||
DenseStorage tmp(other);
|
||||
this->swap(tmp);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
#ifdef EIGEN_HAVE_RVALUE_REFERENCES
|
||||
DenseStorage(DenseStorage&& other)
|
||||
: m_data(std::move(other.m_data))
|
||||
, m_rows(std::move(other.m_rows))
|
||||
, m_cols(std::move(other.m_cols))
|
||||
{
|
||||
other.m_data = nullptr;
|
||||
}
|
||||
DenseStorage& operator=(DenseStorage&& other)
|
||||
{
|
||||
using std::swap;
|
||||
swap(m_data, other.m_data);
|
||||
swap(m_rows, other.m_rows);
|
||||
swap(m_cols, other.m_cols);
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, m_rows*m_cols); }
|
||||
void swap(DenseStorage& other)
|
||||
{ std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
|
||||
inline DenseIndex rows(void) const {return m_rows;}
|
||||
inline DenseIndex cols(void) const {return m_cols;}
|
||||
inline void conservativeResize(DenseIndex size, DenseIndex nbRows, DenseIndex nbCols)
|
||||
EIGEN_DEVICE_FUNC DenseIndex rows(void) const {return m_rows;}
|
||||
EIGEN_DEVICE_FUNC DenseIndex cols(void) const {return m_cols;}
|
||||
void conservativeResize(DenseIndex size, DenseIndex nbRows, DenseIndex nbCols)
|
||||
{
|
||||
m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*m_cols);
|
||||
m_rows = nbRows;
|
||||
@@ -250,8 +339,8 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
|
||||
m_rows = nbRows;
|
||||
m_cols = nbCols;
|
||||
}
|
||||
inline const T *data() const { return m_data; }
|
||||
inline T *data() { return m_data; }
|
||||
EIGEN_DEVICE_FUNC const T *data() const { return m_data; }
|
||||
EIGEN_DEVICE_FUNC T *data() { return m_data; }
|
||||
};
|
||||
|
||||
// matrix with dynamic width and fixed height (so that matrix has dynamic size).
|
||||
@@ -260,15 +349,45 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
|
||||
T *m_data;
|
||||
DenseIndex m_cols;
|
||||
public:
|
||||
inline DenseStorage() : m_data(0), m_cols(0) {}
|
||||
inline DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {}
|
||||
inline DenseStorage(DenseIndex size, DenseIndex, DenseIndex nbCols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_cols(nbCols)
|
||||
EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_cols(0) {}
|
||||
DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {}
|
||||
DenseStorage(DenseIndex size, DenseIndex, DenseIndex nbCols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_cols(nbCols)
|
||||
{ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN }
|
||||
inline ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols); }
|
||||
inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
|
||||
static inline DenseIndex rows(void) {return _Rows;}
|
||||
inline DenseIndex cols(void) const {return m_cols;}
|
||||
inline void conservativeResize(DenseIndex size, DenseIndex, DenseIndex nbCols)
|
||||
DenseStorage(const DenseStorage& other)
|
||||
: m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(_Rows*other.m_cols))
|
||||
, m_cols(other.m_cols)
|
||||
{
|
||||
internal::smart_copy(other.m_data, other.m_data+_Rows*m_cols, m_data);
|
||||
}
|
||||
DenseStorage& operator=(const DenseStorage& other)
|
||||
{
|
||||
if (this != &other)
|
||||
{
|
||||
DenseStorage tmp(other);
|
||||
this->swap(tmp);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
#ifdef EIGEN_HAVE_RVALUE_REFERENCES
|
||||
DenseStorage(DenseStorage&& other)
|
||||
: m_data(std::move(other.m_data))
|
||||
, m_cols(std::move(other.m_cols))
|
||||
{
|
||||
other.m_data = nullptr;
|
||||
}
|
||||
DenseStorage& operator=(DenseStorage&& other)
|
||||
{
|
||||
using std::swap;
|
||||
swap(m_data, other.m_data);
|
||||
swap(m_cols, other.m_cols);
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols); }
|
||||
void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
|
||||
EIGEN_DEVICE_FUNC static DenseIndex rows(void) {return _Rows;}
|
||||
EIGEN_DEVICE_FUNC DenseIndex cols(void) const {return m_cols;}
|
||||
void conservativeResize(DenseIndex size, DenseIndex, DenseIndex nbCols)
|
||||
{
|
||||
m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, _Rows*m_cols);
|
||||
m_cols = nbCols;
|
||||
@@ -286,8 +405,8 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
|
||||
}
|
||||
m_cols = nbCols;
|
||||
}
|
||||
inline const T *data() const { return m_data; }
|
||||
inline T *data() { return m_data; }
|
||||
EIGEN_DEVICE_FUNC const T *data() const { return m_data; }
|
||||
EIGEN_DEVICE_FUNC T *data() { return m_data; }
|
||||
};
|
||||
|
||||
// matrix with dynamic height and fixed width (so that matrix has dynamic size).
|
||||
@@ -296,15 +415,45 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
|
||||
T *m_data;
|
||||
DenseIndex m_rows;
|
||||
public:
|
||||
inline DenseStorage() : m_data(0), m_rows(0) {}
|
||||
inline DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {}
|
||||
inline DenseStorage(DenseIndex size, DenseIndex nbRows, DenseIndex) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(nbRows)
|
||||
EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0) {}
|
||||
DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {}
|
||||
DenseStorage(DenseIndex size, DenseIndex nbRows, DenseIndex) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(nbRows)
|
||||
{ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN }
|
||||
inline ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows); }
|
||||
inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
|
||||
inline DenseIndex rows(void) const {return m_rows;}
|
||||
static inline DenseIndex cols(void) {return _Cols;}
|
||||
inline void conservativeResize(DenseIndex size, DenseIndex nbRows, DenseIndex)
|
||||
DenseStorage(const DenseStorage& other)
|
||||
: m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(other.m_rows*_Cols))
|
||||
, m_rows(other.m_rows)
|
||||
{
|
||||
internal::smart_copy(other.m_data, other.m_data+other.m_rows*_Cols, m_data);
|
||||
}
|
||||
DenseStorage& operator=(const DenseStorage& other)
|
||||
{
|
||||
if (this != &other)
|
||||
{
|
||||
DenseStorage tmp(other);
|
||||
this->swap(tmp);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
#ifdef EIGEN_HAVE_RVALUE_REFERENCES
|
||||
DenseStorage(DenseStorage&& other)
|
||||
: m_data(std::move(other.m_data))
|
||||
, m_rows(std::move(other.m_rows))
|
||||
{
|
||||
other.m_data = nullptr;
|
||||
}
|
||||
DenseStorage& operator=(DenseStorage&& other)
|
||||
{
|
||||
using std::swap;
|
||||
swap(m_data, other.m_data);
|
||||
swap(m_rows, other.m_rows);
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows); }
|
||||
void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
|
||||
EIGEN_DEVICE_FUNC DenseIndex rows(void) const {return m_rows;}
|
||||
EIGEN_DEVICE_FUNC static DenseIndex cols(void) {return _Cols;}
|
||||
void conservativeResize(DenseIndex size, DenseIndex nbRows, DenseIndex)
|
||||
{
|
||||
m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*_Cols);
|
||||
m_rows = nbRows;
|
||||
@@ -322,8 +471,8 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
|
||||
}
|
||||
m_rows = nbRows;
|
||||
}
|
||||
inline const T *data() const { return m_data; }
|
||||
inline T *data() { return m_data; }
|
||||
EIGEN_DEVICE_FUNC const T *data() const { return m_data; }
|
||||
EIGEN_DEVICE_FUNC T *data() { return m_data; }
|
||||
};
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
@@ -70,20 +70,30 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
|
||||
typedef typename internal::dense_xpr_base<Diagonal>::type Base;
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(Diagonal)
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index) {}
|
||||
|
||||
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Diagonal)
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index rows() const
|
||||
{ return m_index.value()<0 ? (std::min<Index>)(m_matrix.cols(),m_matrix.rows()+m_index.value()) : (std::min<Index>)(m_matrix.rows(),m_matrix.cols()-m_index.value()); }
|
||||
{
|
||||
EIGEN_USING_STD_MATH(min);
|
||||
return m_index.value()<0 ? (min)(Index(m_matrix.cols()),Index(m_matrix.rows()+m_index.value()))
|
||||
: (min)(Index(m_matrix.rows()),Index(m_matrix.cols()-m_index.value()));
|
||||
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index cols() const { return 1; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index innerStride() const
|
||||
{
|
||||
return m_matrix.outerStride() + 1;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index outerStride() const
|
||||
{
|
||||
return 0;
|
||||
@@ -95,47 +105,57 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
|
||||
const Scalar
|
||||
>::type ScalarWithConstIfNotLvalue;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline ScalarWithConstIfNotLvalue* data() { return &(m_matrix.const_cast_derived().coeffRef(rowOffset(), colOffset())); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar* data() const { return &(m_matrix.const_cast_derived().coeffRef(rowOffset(), colOffset())); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index row, Index)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
|
||||
return m_matrix.const_cast_derived().coeffRef(row+rowOffset(), row+colOffset());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index row, Index) const
|
||||
{
|
||||
return m_matrix.const_cast_derived().coeffRef(row+rowOffset(), row+colOffset());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline CoeffReturnType coeff(Index row, Index) const
|
||||
{
|
||||
return m_matrix.coeff(row+rowOffset(), row+colOffset());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index idx)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
|
||||
return m_matrix.const_cast_derived().coeffRef(idx+rowOffset(), idx+colOffset());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index idx) const
|
||||
{
|
||||
return m_matrix.const_cast_derived().coeffRef(idx+rowOffset(), idx+colOffset());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline CoeffReturnType coeff(Index idx) const
|
||||
{
|
||||
return m_matrix.coeff(idx+rowOffset(), idx+colOffset());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename MatrixType::Nested>::type&
|
||||
nestedExpression() const
|
||||
{
|
||||
return m_matrix;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
int index() const
|
||||
{
|
||||
return m_index.value();
|
||||
@@ -147,8 +167,11 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
|
||||
|
||||
private:
|
||||
// some compilers may fail to optimize std::max etc in case of compile-time constants...
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index absDiagIndex() const { return m_index.value()>0 ? m_index.value() : -m_index.value(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value()>0 ? 0 : -m_index.value(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value()>0 ? m_index.value() : 0; }
|
||||
// triger a compile time error is someone try to call packet
|
||||
template<int LoadMode> typename MatrixType::PacketReturnType packet(Index) const;
|
||||
|
||||
@@ -37,63 +37,64 @@ class DiagonalBase : public EigenBase<Derived>
|
||||
typedef DenseMatrixType DenseType;
|
||||
typedef DiagonalMatrix<Scalar,DiagonalVectorType::SizeAtCompileTime,DiagonalVectorType::MaxSizeAtCompileTime> PlainObject;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Derived& derived() const { return *static_cast<const Derived*>(this); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Derived& derived() { return *static_cast<Derived*>(this); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseMatrixType toDenseMatrix() const { return derived(); }
|
||||
template<typename DenseDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void evalTo(MatrixBase<DenseDerived> &other) const;
|
||||
template<typename DenseDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void addTo(MatrixBase<DenseDerived> &other) const
|
||||
{ other.diagonal() += diagonal(); }
|
||||
template<typename DenseDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void subTo(MatrixBase<DenseDerived> &other) const
|
||||
{ other.diagonal() -= diagonal(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const DiagonalVectorType& diagonal() const { return derived().diagonal(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline DiagonalVectorType& diagonal() { return derived().diagonal(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index rows() const { return diagonal().size(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index cols() const { return diagonal().size(); }
|
||||
|
||||
/** \returns the diagonal matrix product of \c *this by the matrix \a matrix.
|
||||
*/
|
||||
template<typename MatrixDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
const DiagonalProduct<MatrixDerived, Derived, OnTheLeft>
|
||||
operator*(const MatrixBase<MatrixDerived> &matrix) const
|
||||
{
|
||||
return DiagonalProduct<MatrixDerived, Derived, OnTheLeft>(matrix.derived(), derived());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const DiagonalWrapper<const CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const DiagonalVectorType> >
|
||||
inverse() const
|
||||
{
|
||||
return diagonal().cwiseInverse();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const DiagonalWrapper<const CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const DiagonalVectorType> >
|
||||
operator*(const Scalar& scalar) const
|
||||
{
|
||||
return diagonal() * scalar;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
friend inline const DiagonalWrapper<const CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const DiagonalVectorType> >
|
||||
operator*(const Scalar& scalar, const DiagonalBase& other)
|
||||
{
|
||||
return other.diagonal() * scalar;
|
||||
}
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
template<typename OtherDerived>
|
||||
bool isApprox(const DiagonalBase<OtherDerived>& other, typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) const
|
||||
{
|
||||
return diagonal().isApprox(other.diagonal(), precision);
|
||||
}
|
||||
template<typename OtherDerived>
|
||||
bool isApprox(const MatrixBase<OtherDerived>& other, typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) const
|
||||
{
|
||||
return toDenseMatrix().isApprox(other, precision);
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
template<typename Derived>
|
||||
@@ -151,24 +152,31 @@ class DiagonalMatrix
|
||||
public:
|
||||
|
||||
/** const version of diagonal(). */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const DiagonalVectorType& diagonal() const { return m_diagonal; }
|
||||
/** \returns a reference to the stored vector of diagonal coefficients. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline DiagonalVectorType& diagonal() { return m_diagonal; }
|
||||
|
||||
/** Default constructor without initialization */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline DiagonalMatrix() {}
|
||||
|
||||
/** Constructs a diagonal matrix with given dimension */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline DiagonalMatrix(Index dim) : m_diagonal(dim) {}
|
||||
|
||||
/** 2D constructor. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline DiagonalMatrix(const Scalar& x, const Scalar& y) : m_diagonal(x,y) {}
|
||||
|
||||
/** 3D constructor. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline DiagonalMatrix(const Scalar& x, const Scalar& y, const Scalar& z) : m_diagonal(x,y,z) {}
|
||||
|
||||
/** Copy constructor. */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline DiagonalMatrix(const DiagonalBase<OtherDerived>& other) : m_diagonal(other.diagonal()) {}
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
@@ -178,11 +186,13 @@ class DiagonalMatrix
|
||||
|
||||
/** generic constructor from expression of the diagonal coefficients */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit inline DiagonalMatrix(const MatrixBase<OtherDerived>& other) : m_diagonal(other)
|
||||
{}
|
||||
|
||||
/** Copy operator. */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
DiagonalMatrix& operator=(const DiagonalBase<OtherDerived>& other)
|
||||
{
|
||||
m_diagonal = other.diagonal();
|
||||
@@ -193,6 +203,7 @@ class DiagonalMatrix
|
||||
/** This is a special case of the templated operator=. Its purpose is to
|
||||
* prevent a default operator= from hiding the templated operator=.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
DiagonalMatrix& operator=(const DiagonalMatrix& other)
|
||||
{
|
||||
m_diagonal = other.diagonal();
|
||||
@@ -201,14 +212,19 @@ class DiagonalMatrix
|
||||
#endif
|
||||
|
||||
/** Resizes to given size. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void resize(Index size) { m_diagonal.resize(size); }
|
||||
/** Sets all coefficients to zero. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void setZero() { m_diagonal.setZero(); }
|
||||
/** Resizes and sets all coefficients to zero. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void setZero(Index size) { m_diagonal.setZero(size); }
|
||||
/** Sets this matrix to be the identity matrix of the current size. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void setIdentity() { m_diagonal.setOnes(); }
|
||||
/** Sets this matrix to be the identity matrix of the given size. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void setIdentity(Index size) { m_diagonal.setOnes(size); }
|
||||
};
|
||||
|
||||
@@ -255,9 +271,11 @@ class DiagonalWrapper
|
||||
#endif
|
||||
|
||||
/** Constructor from expression of diagonal coefficients to wrap. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline DiagonalWrapper(DiagonalVectorType& a_diagonal) : m_diagonal(a_diagonal) {}
|
||||
|
||||
/** \returns a const reference to the wrapped expression of diagonal coefficients. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const DiagonalVectorType& diagonal() const { return m_diagonal; }
|
||||
|
||||
protected:
|
||||
|
||||
@@ -29,6 +29,7 @@ template<typename T, typename U,
|
||||
struct dot_nocheck
|
||||
{
|
||||
typedef typename scalar_product_traits<typename traits<T>::Scalar,typename traits<U>::Scalar>::ReturnType ResScalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
|
||||
{
|
||||
return a.template binaryExpr<scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> >(b).sum();
|
||||
@@ -39,6 +40,7 @@ template<typename T, typename U>
|
||||
struct dot_nocheck<T, U, true>
|
||||
{
|
||||
typedef typename scalar_product_traits<typename traits<T>::Scalar,typename traits<U>::Scalar>::ReturnType ResScalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
|
||||
{
|
||||
return a.transpose().template binaryExpr<scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> >(b).sum();
|
||||
@@ -59,6 +61,7 @@ struct dot_nocheck<T, U, true>
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename internal::scalar_product_traits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
|
||||
MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
|
||||
{
|
||||
@@ -73,34 +76,6 @@ MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
|
||||
return internal::dot_nocheck<Derived,OtherDerived>::run(*this, other);
|
||||
}
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
/** \returns the dot product of *this with other, with the Eigen2 convention that the dot product is linear in the first variable
|
||||
* (conjugating the second variable). Of course this only makes a difference in the complex case.
|
||||
*
|
||||
* This method is only available in EIGEN2_SUPPORT mode.
|
||||
*
|
||||
* \only_for_vectors
|
||||
*
|
||||
* \sa dot()
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
typename internal::traits<Derived>::Scalar
|
||||
MatrixBase<Derived>::eigen2_dot(const MatrixBase<OtherDerived>& other) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
|
||||
EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived)
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<Scalar, typename OtherDerived::Scalar>::value),
|
||||
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
|
||||
|
||||
eigen_assert(size() == other.size());
|
||||
|
||||
return internal::dot_nocheck<OtherDerived,Derived>::run(other,*this);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
//---------- implementation of L2 norm and related functions ----------
|
||||
|
||||
/** \returns, for vectors, the squared \em l2 norm of \c *this, and for matrices the Frobenius norm.
|
||||
@@ -164,6 +139,7 @@ template<typename Derived, int p>
|
||||
struct lpNorm_selector
|
||||
{
|
||||
typedef typename NumTraits<typename traits<Derived>::Scalar>::Real RealScalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline RealScalar run(const MatrixBase<Derived>& m)
|
||||
{
|
||||
using std::pow;
|
||||
@@ -174,6 +150,7 @@ struct lpNorm_selector
|
||||
template<typename Derived>
|
||||
struct lpNorm_selector<Derived, 1>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
|
||||
{
|
||||
return m.cwiseAbs().sum();
|
||||
@@ -183,6 +160,7 @@ struct lpNorm_selector<Derived, 1>
|
||||
template<typename Derived>
|
||||
struct lpNorm_selector<Derived, 2>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
|
||||
{
|
||||
return m.norm();
|
||||
@@ -192,6 +170,7 @@ struct lpNorm_selector<Derived, 2>
|
||||
template<typename Derived>
|
||||
struct lpNorm_selector<Derived, Infinity>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
|
||||
{
|
||||
return m.cwiseAbs().maxCoeff();
|
||||
|
||||
@@ -31,29 +31,40 @@ template<typename Derived> struct EigenBase
|
||||
typedef typename internal::traits<Derived>::Index Index;
|
||||
|
||||
/** \returns a reference to the derived object */
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& derived() { return *static_cast<Derived*>(this); }
|
||||
/** \returns a const reference to the derived object */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const Derived& derived() const { return *static_cast<const Derived*>(this); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Derived& const_cast_derived() const
|
||||
{ return *static_cast<Derived*>(const_cast<EigenBase*>(this)); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Derived& const_derived() const
|
||||
{ return *static_cast<const Derived*>(this); }
|
||||
|
||||
/** \returns the number of rows. \sa cols(), RowsAtCompileTime */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index rows() const { return derived().rows(); }
|
||||
/** \returns the number of columns. \sa rows(), ColsAtCompileTime*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index cols() const { return derived().cols(); }
|
||||
/** \returns the number of coefficients, which is rows()*cols().
|
||||
* \sa rows(), cols(), SizeAtCompileTime. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index size() const { return rows() * cols(); }
|
||||
|
||||
/** \internal Don't use it, but do the equivalent: \code dst = *this; \endcode */
|
||||
template<typename Dest> inline void evalTo(Dest& dst) const
|
||||
template<typename Dest>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void evalTo(Dest& dst) const
|
||||
{ derived().evalTo(dst); }
|
||||
|
||||
/** \internal Don't use it, but do the equivalent: \code dst += *this; \endcode */
|
||||
template<typename Dest> inline void addTo(Dest& dst) const
|
||||
template<typename Dest>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void addTo(Dest& dst) const
|
||||
{
|
||||
// This is the default implementation,
|
||||
// derived class can reimplement it in a more optimized way.
|
||||
@@ -63,7 +74,9 @@ template<typename Derived> struct EigenBase
|
||||
}
|
||||
|
||||
/** \internal Don't use it, but do the equivalent: \code dst -= *this; \endcode */
|
||||
template<typename Dest> inline void subTo(Dest& dst) const
|
||||
template<typename Dest>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void subTo(Dest& dst) const
|
||||
{
|
||||
// This is the default implementation,
|
||||
// derived class can reimplement it in a more optimized way.
|
||||
@@ -73,7 +86,8 @@ template<typename Derived> struct EigenBase
|
||||
}
|
||||
|
||||
/** \internal Don't use it, but do the equivalent: \code dst.applyOnTheRight(*this); \endcode */
|
||||
template<typename Dest> inline void applyThisOnTheRight(Dest& dst) const
|
||||
template<typename Dest>
|
||||
EIGEN_DEVICE_FUNC inline void applyThisOnTheRight(Dest& dst) const
|
||||
{
|
||||
// This is the default implementation,
|
||||
// derived class can reimplement it in a more optimized way.
|
||||
@@ -81,7 +95,8 @@ template<typename Derived> struct EigenBase
|
||||
}
|
||||
|
||||
/** \internal Don't use it, but do the equivalent: \code dst.applyOnTheLeft(*this); \endcode */
|
||||
template<typename Dest> inline void applyThisOnTheLeft(Dest& dst) const
|
||||
template<typename Dest>
|
||||
EIGEN_DEVICE_FUNC inline void applyThisOnTheLeft(Dest& dst) const
|
||||
{
|
||||
// This is the default implementation,
|
||||
// derived class can reimplement it in a more optimized way.
|
||||
@@ -126,36 +141,6 @@ Derived& DenseBase<Derived>::operator-=(const EigenBase<OtherDerived> &other)
|
||||
return derived();
|
||||
}
|
||||
|
||||
/** replaces \c *this by \c *this * \a other.
|
||||
*
|
||||
* \returns a reference to \c *this
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
inline Derived&
|
||||
MatrixBase<Derived>::operator*=(const EigenBase<OtherDerived> &other)
|
||||
{
|
||||
other.derived().applyThisOnTheRight(derived());
|
||||
return derived();
|
||||
}
|
||||
|
||||
/** replaces \c *this by \c *this * \a other. It is equivalent to MatrixBase::operator*=().
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
inline void MatrixBase<Derived>::applyOnTheRight(const EigenBase<OtherDerived> &other)
|
||||
{
|
||||
other.derived().applyThisOnTheRight(derived());
|
||||
}
|
||||
|
||||
/** replaces \c *this by \c *this * \a other. */
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
inline void MatrixBase<Derived>::applyOnTheLeft(const EigenBase<OtherDerived> &other)
|
||||
{
|
||||
other.derived().applyThisOnTheLeft(derived());
|
||||
}
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_EIGENBASE_H
|
||||
|
||||
@@ -1,985 +0,0 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_FUNCTORS_H
|
||||
#define EIGEN_FUNCTORS_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
// associative functors:
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the sum of two scalars
|
||||
*
|
||||
* \sa class CwiseBinaryOp, MatrixBase::operator+, class VectorwiseOp, MatrixBase::sum()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_sum_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op)
|
||||
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a + b; }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
|
||||
{ return internal::padd(a,b); }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const
|
||||
{ return internal::predux(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_sum_op<Scalar> > {
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::AddCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasAdd
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the product of two scalars
|
||||
*
|
||||
* \sa class CwiseBinaryOp, Cwise::operator*(), class VectorwiseOp, MatrixBase::redux()
|
||||
*/
|
||||
template<typename LhsScalar,typename RhsScalar> struct scalar_product_op {
|
||||
enum {
|
||||
// TODO vectorize mixed product
|
||||
Vectorizable = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasMul && packet_traits<RhsScalar>::HasMul
|
||||
};
|
||||
typedef typename scalar_product_traits<LhsScalar,RhsScalar>::ReturnType result_type;
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op)
|
||||
EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a * b; }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
|
||||
{ return internal::pmul(a,b); }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const
|
||||
{ return internal::predux_mul(a); }
|
||||
};
|
||||
template<typename LhsScalar,typename RhsScalar>
|
||||
struct functor_traits<scalar_product_op<LhsScalar,RhsScalar> > {
|
||||
enum {
|
||||
Cost = (NumTraits<LhsScalar>::MulCost + NumTraits<RhsScalar>::MulCost)/2, // rough estimate!
|
||||
PacketAccess = scalar_product_op<LhsScalar,RhsScalar>::Vectorizable
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the conjugate product of two scalars
|
||||
*
|
||||
* This is a short cut for conj(x) * y which is needed for optimization purpose; in Eigen2 support mode, this becomes x * conj(y)
|
||||
*/
|
||||
template<typename LhsScalar,typename RhsScalar> struct scalar_conj_product_op {
|
||||
|
||||
enum {
|
||||
Conj = NumTraits<LhsScalar>::IsComplex
|
||||
};
|
||||
|
||||
typedef typename scalar_product_traits<LhsScalar,RhsScalar>::ReturnType result_type;
|
||||
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_conj_product_op)
|
||||
EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const
|
||||
{ return conj_helper<LhsScalar,RhsScalar,Conj,false>().pmul(a,b); }
|
||||
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
|
||||
{ return conj_helper<Packet,Packet,Conj,false>().pmul(a,b); }
|
||||
};
|
||||
template<typename LhsScalar,typename RhsScalar>
|
||||
struct functor_traits<scalar_conj_product_op<LhsScalar,RhsScalar> > {
|
||||
enum {
|
||||
Cost = NumTraits<LhsScalar>::MulCost,
|
||||
PacketAccess = internal::is_same<LhsScalar, RhsScalar>::value && packet_traits<LhsScalar>::HasMul
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the min of two scalars
|
||||
*
|
||||
* \sa class CwiseBinaryOp, MatrixBase::cwiseMin, class VectorwiseOp, MatrixBase::minCoeff()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_min_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_min_op)
|
||||
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { using std::min; return (min)(a, b); }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
|
||||
{ return internal::pmin(a,b); }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const
|
||||
{ return internal::predux_min(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_min_op<Scalar> > {
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::AddCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasMin
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the max of two scalars
|
||||
*
|
||||
* \sa class CwiseBinaryOp, MatrixBase::cwiseMax, class VectorwiseOp, MatrixBase::maxCoeff()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_max_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_max_op)
|
||||
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { using std::max; return (max)(a, b); }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
|
||||
{ return internal::pmax(a,b); }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const
|
||||
{ return internal::predux_max(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_max_op<Scalar> > {
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::AddCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasMax
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the hypot of two scalars
|
||||
*
|
||||
* \sa MatrixBase::stableNorm(), class Redux
|
||||
*/
|
||||
template<typename Scalar> struct scalar_hypot_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_hypot_op)
|
||||
// typedef typename NumTraits<Scalar>::Real result_type;
|
||||
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const
|
||||
{
|
||||
using std::max;
|
||||
using std::min;
|
||||
using std::sqrt;
|
||||
Scalar p = (max)(_x, _y);
|
||||
Scalar q = (min)(_x, _y);
|
||||
Scalar qp = q/p;
|
||||
return p * sqrt(Scalar(1) + qp*qp);
|
||||
}
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_hypot_op<Scalar> > {
|
||||
enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess=0 };
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the pow of two scalars
|
||||
*/
|
||||
template<typename Scalar, typename OtherScalar> struct scalar_binary_pow_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_binary_pow_op)
|
||||
inline Scalar operator() (const Scalar& a, const OtherScalar& b) const { return numext::pow(a, b); }
|
||||
};
|
||||
template<typename Scalar, typename OtherScalar>
|
||||
struct functor_traits<scalar_binary_pow_op<Scalar,OtherScalar> > {
|
||||
enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false };
|
||||
};
|
||||
|
||||
// other binary functors:
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the difference of two scalars
|
||||
*
|
||||
* \sa class CwiseBinaryOp, MatrixBase::operator-
|
||||
*/
|
||||
template<typename Scalar> struct scalar_difference_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op)
|
||||
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a - b; }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
|
||||
{ return internal::psub(a,b); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_difference_op<Scalar> > {
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::AddCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasSub
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the quotient of two scalars
|
||||
*
|
||||
* \sa class CwiseBinaryOp, Cwise::operator/()
|
||||
*/
|
||||
template<typename LhsScalar,typename RhsScalar> struct scalar_quotient_op {
|
||||
enum {
|
||||
// TODO vectorize mixed product
|
||||
Vectorizable = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasDiv && packet_traits<RhsScalar>::HasDiv
|
||||
};
|
||||
typedef typename scalar_product_traits<LhsScalar,RhsScalar>::ReturnType result_type;
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op)
|
||||
EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a / b; }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
|
||||
{ return internal::pdiv(a,b); }
|
||||
};
|
||||
template<typename LhsScalar,typename RhsScalar>
|
||||
struct functor_traits<scalar_quotient_op<LhsScalar,RhsScalar> > {
|
||||
enum {
|
||||
Cost = (NumTraits<LhsScalar>::MulCost + NumTraits<RhsScalar>::MulCost), // rough estimate!
|
||||
PacketAccess = scalar_quotient_op<LhsScalar,RhsScalar>::Vectorizable
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the and of two booleans
|
||||
*
|
||||
* \sa class CwiseBinaryOp, ArrayBase::operator&&
|
||||
*/
|
||||
struct scalar_boolean_and_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_and_op)
|
||||
EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a && b; }
|
||||
};
|
||||
template<> struct functor_traits<scalar_boolean_and_op> {
|
||||
enum {
|
||||
Cost = NumTraits<bool>::AddCost,
|
||||
PacketAccess = false
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the or of two booleans
|
||||
*
|
||||
* \sa class CwiseBinaryOp, ArrayBase::operator||
|
||||
*/
|
||||
struct scalar_boolean_or_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_or_op)
|
||||
EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a || b; }
|
||||
};
|
||||
template<> struct functor_traits<scalar_boolean_or_op> {
|
||||
enum {
|
||||
Cost = NumTraits<bool>::AddCost,
|
||||
PacketAccess = false
|
||||
};
|
||||
};
|
||||
|
||||
// unary functors:
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the opposite of a scalar
|
||||
*
|
||||
* \sa class CwiseUnaryOp, MatrixBase::operator-
|
||||
*/
|
||||
template<typename Scalar> struct scalar_opposite_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_opposite_op)
|
||||
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return -a; }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
|
||||
{ return internal::pnegate(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_opposite_op<Scalar> >
|
||||
{ enum {
|
||||
Cost = NumTraits<Scalar>::AddCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasNegate };
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the absolute value of a scalar
|
||||
*
|
||||
* \sa class CwiseUnaryOp, Cwise::abs
|
||||
*/
|
||||
template<typename Scalar> struct scalar_abs_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_abs_op)
|
||||
typedef typename NumTraits<Scalar>::Real result_type;
|
||||
EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { using std::abs; return abs(a); }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
|
||||
{ return internal::pabs(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_abs_op<Scalar> >
|
||||
{
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::AddCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasAbs
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the squared absolute value of a scalar
|
||||
*
|
||||
* \sa class CwiseUnaryOp, Cwise::abs2
|
||||
*/
|
||||
template<typename Scalar> struct scalar_abs2_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_abs2_op)
|
||||
typedef typename NumTraits<Scalar>::Real result_type;
|
||||
EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return numext::abs2(a); }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
|
||||
{ return internal::pmul(a,a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_abs2_op<Scalar> >
|
||||
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasAbs2 }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the conjugate of a complex value
|
||||
*
|
||||
* \sa class CwiseUnaryOp, MatrixBase::conjugate()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_conjugate_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_conjugate_op)
|
||||
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { using numext::conj; return conj(a); }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::pconj(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_conjugate_op<Scalar> >
|
||||
{
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::IsComplex ? NumTraits<Scalar>::AddCost : 0,
|
||||
PacketAccess = packet_traits<Scalar>::HasConj
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to cast a scalar to another type
|
||||
*
|
||||
* \sa class CwiseUnaryOp, MatrixBase::cast()
|
||||
*/
|
||||
template<typename Scalar, typename NewType>
|
||||
struct scalar_cast_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
|
||||
typedef NewType result_type;
|
||||
EIGEN_STRONG_INLINE const NewType operator() (const Scalar& a) const { return cast<Scalar, NewType>(a); }
|
||||
};
|
||||
template<typename Scalar, typename NewType>
|
||||
struct functor_traits<scalar_cast_op<Scalar,NewType> >
|
||||
{ enum { Cost = is_same<Scalar, NewType>::value ? 0 : NumTraits<NewType>::AddCost, PacketAccess = false }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to extract the real part of a complex
|
||||
*
|
||||
* \sa class CwiseUnaryOp, MatrixBase::real()
|
||||
*/
|
||||
template<typename Scalar>
|
||||
struct scalar_real_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_real_op)
|
||||
typedef typename NumTraits<Scalar>::Real result_type;
|
||||
EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return numext::real(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_real_op<Scalar> >
|
||||
{ enum { Cost = 0, PacketAccess = false }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to extract the imaginary part of a complex
|
||||
*
|
||||
* \sa class CwiseUnaryOp, MatrixBase::imag()
|
||||
*/
|
||||
template<typename Scalar>
|
||||
struct scalar_imag_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_imag_op)
|
||||
typedef typename NumTraits<Scalar>::Real result_type;
|
||||
EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return numext::imag(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_imag_op<Scalar> >
|
||||
{ enum { Cost = 0, PacketAccess = false }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to extract the real part of a complex as a reference
|
||||
*
|
||||
* \sa class CwiseUnaryOp, MatrixBase::real()
|
||||
*/
|
||||
template<typename Scalar>
|
||||
struct scalar_real_ref_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_real_ref_op)
|
||||
typedef typename NumTraits<Scalar>::Real result_type;
|
||||
EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return numext::real_ref(*const_cast<Scalar*>(&a)); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_real_ref_op<Scalar> >
|
||||
{ enum { Cost = 0, PacketAccess = false }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to extract the imaginary part of a complex as a reference
|
||||
*
|
||||
* \sa class CwiseUnaryOp, MatrixBase::imag()
|
||||
*/
|
||||
template<typename Scalar>
|
||||
struct scalar_imag_ref_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_imag_ref_op)
|
||||
typedef typename NumTraits<Scalar>::Real result_type;
|
||||
EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return numext::imag_ref(*const_cast<Scalar*>(&a)); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_imag_ref_op<Scalar> >
|
||||
{ enum { Cost = 0, PacketAccess = false }; };
|
||||
|
||||
/** \internal
|
||||
*
|
||||
* \brief Template functor to compute the exponential of a scalar
|
||||
*
|
||||
* \sa class CwiseUnaryOp, Cwise::exp()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_exp_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_exp_op)
|
||||
inline const Scalar operator() (const Scalar& a) const { using std::exp; return exp(a); }
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
inline Packet packetOp(const Packet& a) const { return internal::pexp(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_exp_op<Scalar> >
|
||||
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasExp }; };
|
||||
|
||||
/** \internal
|
||||
*
|
||||
* \brief Template functor to compute the logarithm of a scalar
|
||||
*
|
||||
* \sa class CwiseUnaryOp, Cwise::log()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_log_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_log_op)
|
||||
inline const Scalar operator() (const Scalar& a) const { using std::log; return log(a); }
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
inline Packet packetOp(const Packet& a) const { return internal::plog(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_log_op<Scalar> >
|
||||
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasLog }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to multiply a scalar by a fixed other one
|
||||
*
|
||||
* \sa class CwiseUnaryOp, MatrixBase::operator*, MatrixBase::operator/
|
||||
*/
|
||||
/* NOTE why doing the pset1() in packetOp *is* an optimization ?
|
||||
* indeed it seems better to declare m_other as a Packet and do the pset1() once
|
||||
* in the constructor. However, in practice:
|
||||
* - GCC does not like m_other as a Packet and generate a load every time it needs it
|
||||
* - on the other hand GCC is able to moves the pset1() outside the loop :)
|
||||
* - simpler code ;)
|
||||
* (ICC and gcc 4.4 seems to perform well in both cases, the issue is visible with y = a*x + b*y)
|
||||
*/
|
||||
template<typename Scalar>
|
||||
struct scalar_multiple_op {
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
// FIXME default copy constructors seems bugged with std::complex<>
|
||||
EIGEN_STRONG_INLINE scalar_multiple_op(const scalar_multiple_op& other) : m_other(other.m_other) { }
|
||||
EIGEN_STRONG_INLINE scalar_multiple_op(const Scalar& other) : m_other(other) { }
|
||||
EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a * m_other; }
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
|
||||
{ return internal::pmul(a, pset1<Packet>(m_other)); }
|
||||
typename add_const_on_value_type<typename NumTraits<Scalar>::Nested>::type m_other;
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_multiple_op<Scalar> >
|
||||
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasMul }; };
|
||||
|
||||
template<typename Scalar1, typename Scalar2>
|
||||
struct scalar_multiple2_op {
|
||||
typedef typename scalar_product_traits<Scalar1,Scalar2>::ReturnType result_type;
|
||||
EIGEN_STRONG_INLINE scalar_multiple2_op(const scalar_multiple2_op& other) : m_other(other.m_other) { }
|
||||
EIGEN_STRONG_INLINE scalar_multiple2_op(const Scalar2& other) : m_other(other) { }
|
||||
EIGEN_STRONG_INLINE result_type operator() (const Scalar1& a) const { return a * m_other; }
|
||||
typename add_const_on_value_type<typename NumTraits<Scalar2>::Nested>::type m_other;
|
||||
};
|
||||
template<typename Scalar1,typename Scalar2>
|
||||
struct functor_traits<scalar_multiple2_op<Scalar1,Scalar2> >
|
||||
{ enum { Cost = NumTraits<Scalar1>::MulCost, PacketAccess = false }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to divide a scalar by a fixed other one
|
||||
*
|
||||
* This functor is used to implement the quotient of a matrix by
|
||||
* a scalar where the scalar type is not necessarily a floating point type.
|
||||
*
|
||||
* \sa class CwiseUnaryOp, MatrixBase::operator/
|
||||
*/
|
||||
template<typename Scalar>
|
||||
struct scalar_quotient1_op {
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
// FIXME default copy constructors seems bugged with std::complex<>
|
||||
EIGEN_STRONG_INLINE scalar_quotient1_op(const scalar_quotient1_op& other) : m_other(other.m_other) { }
|
||||
EIGEN_STRONG_INLINE scalar_quotient1_op(const Scalar& other) : m_other(other) {}
|
||||
EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a / m_other; }
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
|
||||
{ return internal::pdiv(a, pset1<Packet>(m_other)); }
|
||||
typename add_const_on_value_type<typename NumTraits<Scalar>::Nested>::type m_other;
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_quotient1_op<Scalar> >
|
||||
{ enum { Cost = 2 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasDiv }; };
|
||||
|
||||
// nullary functors
|
||||
|
||||
template<typename Scalar>
|
||||
struct scalar_constant_op {
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
EIGEN_STRONG_INLINE scalar_constant_op(const scalar_constant_op& other) : m_other(other.m_other) { }
|
||||
EIGEN_STRONG_INLINE scalar_constant_op(const Scalar& other) : m_other(other) { }
|
||||
template<typename Index>
|
||||
EIGEN_STRONG_INLINE const Scalar operator() (Index, Index = 0) const { return m_other; }
|
||||
template<typename Index>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(Index, Index = 0) const { return internal::pset1<Packet>(m_other); }
|
||||
const Scalar m_other;
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_constant_op<Scalar> >
|
||||
// FIXME replace this packet test by a safe one
|
||||
{ enum { Cost = 1, PacketAccess = packet_traits<Scalar>::Vectorizable, IsRepeatable = true }; };
|
||||
|
||||
template<typename Scalar> struct scalar_identity_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_identity_op)
|
||||
template<typename Index>
|
||||
EIGEN_STRONG_INLINE const Scalar operator() (Index row, Index col) const { return row==col ? Scalar(1) : Scalar(0); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_identity_op<Scalar> >
|
||||
{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = false, IsRepeatable = true }; };
|
||||
|
||||
template <typename Scalar, bool RandomAccess> struct linspaced_op_impl;
|
||||
|
||||
// linear access for packet ops:
|
||||
// 1) initialization
|
||||
// base = [low, ..., low] + ([step, ..., step] * [-size, ..., 0])
|
||||
// 2) each step (where size is 1 for coeff access or PacketSize for packet access)
|
||||
// base += [size*step, ..., size*step]
|
||||
//
|
||||
// TODO: Perhaps it's better to initialize lazily (so not in the constructor but in packetOp)
|
||||
// in order to avoid the padd() in operator() ?
|
||||
template <typename Scalar>
|
||||
struct linspaced_op_impl<Scalar,false>
|
||||
{
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
|
||||
linspaced_op_impl(const Scalar& low, const Scalar& step) :
|
||||
m_low(low), m_step(step),
|
||||
m_packetStep(pset1<Packet>(packet_traits<Scalar>::size*step)),
|
||||
m_base(padd(pset1<Packet>(low), pmul(pset1<Packet>(step),plset<Scalar>(-packet_traits<Scalar>::size)))) {}
|
||||
|
||||
template<typename Index>
|
||||
EIGEN_STRONG_INLINE const Scalar operator() (Index i) const
|
||||
{
|
||||
m_base = padd(m_base, pset1<Packet>(m_step));
|
||||
return m_low+Scalar(i)*m_step;
|
||||
}
|
||||
|
||||
template<typename Index>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(Index) const { return m_base = padd(m_base,m_packetStep); }
|
||||
|
||||
const Scalar m_low;
|
||||
const Scalar m_step;
|
||||
const Packet m_packetStep;
|
||||
mutable Packet m_base;
|
||||
};
|
||||
|
||||
// random access for packet ops:
|
||||
// 1) each step
|
||||
// [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) )
|
||||
template <typename Scalar>
|
||||
struct linspaced_op_impl<Scalar,true>
|
||||
{
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
|
||||
linspaced_op_impl(const Scalar& low, const Scalar& step) :
|
||||
m_low(low), m_step(step),
|
||||
m_lowPacket(pset1<Packet>(m_low)), m_stepPacket(pset1<Packet>(m_step)), m_interPacket(plset<Scalar>(0)) {}
|
||||
|
||||
template<typename Index>
|
||||
EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return m_low+i*m_step; }
|
||||
|
||||
template<typename Index>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(Index i) const
|
||||
{ return internal::padd(m_lowPacket, pmul(m_stepPacket, padd(pset1<Packet>(i),m_interPacket))); }
|
||||
|
||||
const Scalar m_low;
|
||||
const Scalar m_step;
|
||||
const Packet m_lowPacket;
|
||||
const Packet m_stepPacket;
|
||||
const Packet m_interPacket;
|
||||
};
|
||||
|
||||
// ----- Linspace functor ----------------------------------------------------------------
|
||||
|
||||
// Forward declaration (we default to random access which does not really give
|
||||
// us a speed gain when using packet access but it allows to use the functor in
|
||||
// nested expressions).
|
||||
template <typename Scalar, bool RandomAccess = true> struct linspaced_op;
|
||||
template <typename Scalar, bool RandomAccess> struct functor_traits< linspaced_op<Scalar,RandomAccess> >
|
||||
{ enum { Cost = 1, PacketAccess = packet_traits<Scalar>::HasSetLinear, IsRepeatable = true }; };
|
||||
template <typename Scalar, bool RandomAccess> struct linspaced_op
|
||||
{
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
linspaced_op(const Scalar& low, const Scalar& high, DenseIndex num_steps) : impl((num_steps==1 ? high : low), (num_steps==1 ? Scalar() : (high-low)/(num_steps-1))) {}
|
||||
|
||||
template<typename Index>
|
||||
EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return impl(i); }
|
||||
|
||||
// We need this function when assigning e.g. a RowVectorXd to a MatrixXd since
|
||||
// there row==0 and col is used for the actual iteration.
|
||||
template<typename Index>
|
||||
EIGEN_STRONG_INLINE const Scalar operator() (Index row, Index col) const
|
||||
{
|
||||
eigen_assert(col==0 || row==0);
|
||||
return impl(col + row);
|
||||
}
|
||||
|
||||
template<typename Index>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(Index i) const { return impl.packetOp(i); }
|
||||
|
||||
// We need this function when assigning e.g. a RowVectorXd to a MatrixXd since
|
||||
// there row==0 and col is used for the actual iteration.
|
||||
template<typename Index>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(Index row, Index col) const
|
||||
{
|
||||
eigen_assert(col==0 || row==0);
|
||||
return impl.packetOp(col + row);
|
||||
}
|
||||
|
||||
// This proxy object handles the actual required temporaries, the different
|
||||
// implementations (random vs. sequential access) as well as the
|
||||
// correct piping to size 2/4 packet operations.
|
||||
const linspaced_op_impl<Scalar,RandomAccess> impl;
|
||||
};
|
||||
|
||||
// all functors allow linear access, except scalar_identity_op. So we fix here a quick meta
|
||||
// to indicate whether a functor allows linear access, just always answering 'yes' except for
|
||||
// scalar_identity_op.
|
||||
// FIXME move this to functor_traits adding a functor_default
|
||||
template<typename Functor> struct functor_has_linear_access { enum { ret = 1 }; };
|
||||
template<typename Scalar> struct functor_has_linear_access<scalar_identity_op<Scalar> > { enum { ret = 0 }; };
|
||||
|
||||
// In Eigen, any binary op (Product, CwiseBinaryOp) require the Lhs and Rhs to have the same scalar type, except for multiplication
|
||||
// where the mixing of different types is handled by scalar_product_traits
|
||||
// In particular, real * complex<real> is allowed.
|
||||
// FIXME move this to functor_traits adding a functor_default
|
||||
template<typename Functor> struct functor_is_product_like { enum { ret = 0 }; };
|
||||
template<typename LhsScalar,typename RhsScalar> struct functor_is_product_like<scalar_product_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
|
||||
template<typename LhsScalar,typename RhsScalar> struct functor_is_product_like<scalar_conj_product_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
|
||||
template<typename LhsScalar,typename RhsScalar> struct functor_is_product_like<scalar_quotient_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
|
||||
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to add a scalar to a fixed other one
|
||||
* \sa class CwiseUnaryOp, Array::operator+
|
||||
*/
|
||||
/* If you wonder why doing the pset1() in packetOp() is an optimization check scalar_multiple_op */
|
||||
template<typename Scalar>
|
||||
struct scalar_add_op {
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
// FIXME default copy constructors seems bugged with std::complex<>
|
||||
inline scalar_add_op(const scalar_add_op& other) : m_other(other.m_other) { }
|
||||
inline scalar_add_op(const Scalar& other) : m_other(other) { }
|
||||
inline Scalar operator() (const Scalar& a) const { return a + m_other; }
|
||||
inline const Packet packetOp(const Packet& a) const
|
||||
{ return internal::padd(a, pset1<Packet>(m_other)); }
|
||||
const Scalar m_other;
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_add_op<Scalar> >
|
||||
{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = packet_traits<Scalar>::HasAdd }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the square root of a scalar
|
||||
* \sa class CwiseUnaryOp, Cwise::sqrt()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_sqrt_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_sqrt_op)
|
||||
inline const Scalar operator() (const Scalar& a) const { using std::sqrt; return sqrt(a); }
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
inline Packet packetOp(const Packet& a) const { return internal::psqrt(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_sqrt_op<Scalar> >
|
||||
{ enum {
|
||||
Cost = 5 * NumTraits<Scalar>::MulCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasSqrt
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the cosine of a scalar
|
||||
* \sa class CwiseUnaryOp, ArrayBase::cos()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_cos_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_cos_op)
|
||||
inline Scalar operator() (const Scalar& a) const { using std::cos; return cos(a); }
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
inline Packet packetOp(const Packet& a) const { return internal::pcos(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_cos_op<Scalar> >
|
||||
{
|
||||
enum {
|
||||
Cost = 5 * NumTraits<Scalar>::MulCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasCos
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the sine of a scalar
|
||||
* \sa class CwiseUnaryOp, ArrayBase::sin()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_sin_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_sin_op)
|
||||
inline const Scalar operator() (const Scalar& a) const { using std::sin; return sin(a); }
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
inline Packet packetOp(const Packet& a) const { return internal::psin(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_sin_op<Scalar> >
|
||||
{
|
||||
enum {
|
||||
Cost = 5 * NumTraits<Scalar>::MulCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasSin
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the tan of a scalar
|
||||
* \sa class CwiseUnaryOp, ArrayBase::tan()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_tan_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_tan_op)
|
||||
inline const Scalar operator() (const Scalar& a) const { using std::tan; return tan(a); }
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
inline Packet packetOp(const Packet& a) const { return internal::ptan(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_tan_op<Scalar> >
|
||||
{
|
||||
enum {
|
||||
Cost = 5 * NumTraits<Scalar>::MulCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasTan
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the arc cosine of a scalar
|
||||
* \sa class CwiseUnaryOp, ArrayBase::acos()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_acos_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_acos_op)
|
||||
inline const Scalar operator() (const Scalar& a) const { using std::acos; return acos(a); }
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
inline Packet packetOp(const Packet& a) const { return internal::pacos(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_acos_op<Scalar> >
|
||||
{
|
||||
enum {
|
||||
Cost = 5 * NumTraits<Scalar>::MulCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasACos
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the arc sine of a scalar
|
||||
* \sa class CwiseUnaryOp, ArrayBase::asin()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_asin_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_asin_op)
|
||||
inline const Scalar operator() (const Scalar& a) const { using std::asin; return asin(a); }
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
inline Packet packetOp(const Packet& a) const { return internal::pasin(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_asin_op<Scalar> >
|
||||
{
|
||||
enum {
|
||||
Cost = 5 * NumTraits<Scalar>::MulCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasASin
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to raise a scalar to a power
|
||||
* \sa class CwiseUnaryOp, Cwise::pow
|
||||
*/
|
||||
template<typename Scalar>
|
||||
struct scalar_pow_op {
|
||||
// FIXME default copy constructors seems bugged with std::complex<>
|
||||
inline scalar_pow_op(const scalar_pow_op& other) : m_exponent(other.m_exponent) { }
|
||||
inline scalar_pow_op(const Scalar& exponent) : m_exponent(exponent) {}
|
||||
inline Scalar operator() (const Scalar& a) const { return numext::pow(a, m_exponent); }
|
||||
const Scalar m_exponent;
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_pow_op<Scalar> >
|
||||
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the quotient between a scalar and array entries.
|
||||
* \sa class CwiseUnaryOp, Cwise::inverse()
|
||||
*/
|
||||
template<typename Scalar>
|
||||
struct scalar_inverse_mult_op {
|
||||
scalar_inverse_mult_op(const Scalar& other) : m_other(other) {}
|
||||
inline Scalar operator() (const Scalar& a) const { return m_other / a; }
|
||||
template<typename Packet>
|
||||
inline const Packet packetOp(const Packet& a) const
|
||||
{ return internal::pdiv(pset1<Packet>(m_other),a); }
|
||||
Scalar m_other;
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the inverse of a scalar
|
||||
* \sa class CwiseUnaryOp, Cwise::inverse()
|
||||
*/
|
||||
template<typename Scalar>
|
||||
struct scalar_inverse_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_inverse_op)
|
||||
inline Scalar operator() (const Scalar& a) const { return Scalar(1)/a; }
|
||||
template<typename Packet>
|
||||
inline const Packet packetOp(const Packet& a) const
|
||||
{ return internal::pdiv(pset1<Packet>(Scalar(1)),a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_inverse_op<Scalar> >
|
||||
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasDiv }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the square of a scalar
|
||||
* \sa class CwiseUnaryOp, Cwise::square()
|
||||
*/
|
||||
template<typename Scalar>
|
||||
struct scalar_square_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_square_op)
|
||||
inline Scalar operator() (const Scalar& a) const { return a*a; }
|
||||
template<typename Packet>
|
||||
inline const Packet packetOp(const Packet& a) const
|
||||
{ return internal::pmul(a,a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_square_op<Scalar> >
|
||||
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasMul }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the cube of a scalar
|
||||
* \sa class CwiseUnaryOp, Cwise::cube()
|
||||
*/
|
||||
template<typename Scalar>
|
||||
struct scalar_cube_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_cube_op)
|
||||
inline Scalar operator() (const Scalar& a) const { return a*a*a; }
|
||||
template<typename Packet>
|
||||
inline const Packet packetOp(const Packet& a) const
|
||||
{ return internal::pmul(a,pmul(a,a)); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_cube_op<Scalar> >
|
||||
{ enum { Cost = 2*NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasMul }; };
|
||||
|
||||
// default functor traits for STL functors:
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::multiplies<T> >
|
||||
{ enum { Cost = NumTraits<T>::MulCost, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::divides<T> >
|
||||
{ enum { Cost = NumTraits<T>::MulCost, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::plus<T> >
|
||||
{ enum { Cost = NumTraits<T>::AddCost, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::minus<T> >
|
||||
{ enum { Cost = NumTraits<T>::AddCost, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::negate<T> >
|
||||
{ enum { Cost = NumTraits<T>::AddCost, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::logical_or<T> >
|
||||
{ enum { Cost = 1, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::logical_and<T> >
|
||||
{ enum { Cost = 1, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::logical_not<T> >
|
||||
{ enum { Cost = 1, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::greater<T> >
|
||||
{ enum { Cost = 1, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::less<T> >
|
||||
{ enum { Cost = 1, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::greater_equal<T> >
|
||||
{ enum { Cost = 1, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::less_equal<T> >
|
||||
{ enum { Cost = 1, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::equal_to<T> >
|
||||
{ enum { Cost = 1, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::not_equal_to<T> >
|
||||
{ enum { Cost = 1, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::binder2nd<T> >
|
||||
{ enum { Cost = functor_traits<T>::Cost, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::binder1st<T> >
|
||||
{ enum { Cost = functor_traits<T>::Cost, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::unary_negate<T> >
|
||||
{ enum { Cost = 1 + functor_traits<T>::Cost, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::binary_negate<T> >
|
||||
{ enum { Cost = 1 + functor_traits<T>::Cost, PacketAccess = false }; };
|
||||
|
||||
#ifdef EIGEN_STDEXT_SUPPORT
|
||||
|
||||
template<typename T0,typename T1>
|
||||
struct functor_traits<std::project1st<T0,T1> >
|
||||
{ enum { Cost = 0, PacketAccess = false }; };
|
||||
|
||||
template<typename T0,typename T1>
|
||||
struct functor_traits<std::project2nd<T0,T1> >
|
||||
{ enum { Cost = 0, PacketAccess = false }; };
|
||||
|
||||
template<typename T0,typename T1>
|
||||
struct functor_traits<std::select2nd<std::pair<T0,T1> > >
|
||||
{ enum { Cost = 0, PacketAccess = false }; };
|
||||
|
||||
template<typename T0,typename T1>
|
||||
struct functor_traits<std::select1st<std::pair<T0,T1> > >
|
||||
{ enum { Cost = 0, PacketAccess = false }; };
|
||||
|
||||
template<typename T0,typename T1>
|
||||
struct functor_traits<std::unary_compose<T0,T1> >
|
||||
{ enum { Cost = functor_traits<T0>::Cost + functor_traits<T1>::Cost, PacketAccess = false }; };
|
||||
|
||||
template<typename T0,typename T1,typename T2>
|
||||
struct functor_traits<std::binary_compose<T0,T1,T2> >
|
||||
{ enum { Cost = functor_traits<T0>::Cost + functor_traits<T1>::Cost + functor_traits<T2>::Cost, PacketAccess = false }; };
|
||||
|
||||
#endif // EIGEN_STDEXT_SUPPORT
|
||||
|
||||
// allow to add new functors and specializations of functor_traits from outside Eigen.
|
||||
// this macro is really needed because functor_traits must be specialized after it is declared but before it is used...
|
||||
#ifdef EIGEN_FUNCTORS_PLUGIN
|
||||
#include EIGEN_FUNCTORS_PLUGIN
|
||||
#endif
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_FUNCTORS_H
|
||||
@@ -19,9 +19,10 @@ namespace internal
|
||||
template<typename Derived, typename OtherDerived, bool is_integer = NumTraits<typename Derived::Scalar>::IsInteger>
|
||||
struct isApprox_selector
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar& prec)
|
||||
{
|
||||
using std::min;
|
||||
EIGEN_USING_STD_MATH(min);
|
||||
typename internal::nested<Derived,2>::type nested(x);
|
||||
typename internal::nested<OtherDerived,2>::type otherNested(y);
|
||||
return (nested - otherNested).cwiseAbs2().sum() <= prec * prec * (min)(nested.cwiseAbs2().sum(), otherNested.cwiseAbs2().sum());
|
||||
@@ -31,6 +32,7 @@ struct isApprox_selector
|
||||
template<typename Derived, typename OtherDerived>
|
||||
struct isApprox_selector<Derived, OtherDerived, true>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar&)
|
||||
{
|
||||
return x.matrix() == y.matrix();
|
||||
@@ -40,6 +42,7 @@ struct isApprox_selector<Derived, OtherDerived, true>
|
||||
template<typename Derived, typename OtherDerived, bool is_integer = NumTraits<typename Derived::Scalar>::IsInteger>
|
||||
struct isMuchSmallerThan_object_selector
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar& prec)
|
||||
{
|
||||
return x.cwiseAbs2().sum() <= numext::abs2(prec) * y.cwiseAbs2().sum();
|
||||
@@ -49,6 +52,7 @@ struct isMuchSmallerThan_object_selector
|
||||
template<typename Derived, typename OtherDerived>
|
||||
struct isMuchSmallerThan_object_selector<Derived, OtherDerived, true>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static bool run(const Derived& x, const OtherDerived&, const typename Derived::RealScalar&)
|
||||
{
|
||||
return x.matrix() == Derived::Zero(x.rows(), x.cols()).matrix();
|
||||
@@ -58,6 +62,7 @@ struct isMuchSmallerThan_object_selector<Derived, OtherDerived, true>
|
||||
template<typename Derived, bool is_integer = NumTraits<typename Derived::Scalar>::IsInteger>
|
||||
struct isMuchSmallerThan_scalar_selector
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static bool run(const Derived& x, const typename Derived::RealScalar& y, const typename Derived::RealScalar& prec)
|
||||
{
|
||||
return x.cwiseAbs2().sum() <= numext::abs2(prec * y);
|
||||
@@ -67,6 +72,7 @@ struct isMuchSmallerThan_scalar_selector
|
||||
template<typename Derived>
|
||||
struct isMuchSmallerThan_scalar_selector<Derived, true>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static bool run(const Derived& x, const typename Derived::RealScalar&, const typename Derived::RealScalar&)
|
||||
{
|
||||
return x.matrix() == Derived::Zero(x.rows(), x.cols()).matrix();
|
||||
|
||||
@@ -66,8 +66,7 @@ template<typename Lhs, typename Rhs> struct product_type
|
||||
MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::MaxColsAtCompileTime,
|
||||
_Rhs::MaxRowsAtCompileTime),
|
||||
Depth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::ColsAtCompileTime,
|
||||
_Rhs::RowsAtCompileTime),
|
||||
LargeThreshold = EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
||||
_Rhs::RowsAtCompileTime)
|
||||
};
|
||||
|
||||
// the splitting into different lines of code here, introducing the _select enums and the typedef below,
|
||||
@@ -232,7 +231,7 @@ EIGEN_DONT_INLINE void outer_product_selector_run(const ProductType& prod, Dest&
|
||||
// FIXME not very good if rhs is real and lhs complex while alpha is real too
|
||||
const Index cols = dest.cols();
|
||||
for (Index j=0; j<cols; ++j)
|
||||
func(dest.col(j), prod.rhs().coeff(j) * prod.lhs());
|
||||
func(dest.col(j), prod.rhs().coeff(0,j) * prod.lhs());
|
||||
}
|
||||
|
||||
// Row major
|
||||
@@ -243,7 +242,7 @@ EIGEN_DONT_INLINE void outer_product_selector_run(const ProductType& prod, Dest&
|
||||
// FIXME not very good if lhs is real and rhs complex while alpha is real too
|
||||
const Index rows = dest.rows();
|
||||
for (Index i=0; i<rows; ++i)
|
||||
func(dest.row(i), prod.lhs().coeff(i) * prod.rhs());
|
||||
func(dest.row(i), prod.lhs().coeff(i,0) * prod.rhs());
|
||||
}
|
||||
|
||||
template<typename Lhs, typename Rhs>
|
||||
@@ -397,7 +396,7 @@ struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
|
||||
internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?PacketSize:0),0> m_data;
|
||||
EIGEN_STRONG_INLINE Scalar* data() {
|
||||
return ForceAlignment
|
||||
? reinterpret_cast<Scalar*>((reinterpret_cast<size_t>(m_data.array) & ~(size_t(15))) + 16)
|
||||
? reinterpret_cast<Scalar*>((reinterpret_cast<size_t>(m_data.array) & ~(size_t(EIGEN_ALIGN_BYTES-1))) + EIGEN_ALIGN_BYTES)
|
||||
: m_data.array;
|
||||
}
|
||||
#endif
|
||||
@@ -446,7 +445,7 @@ template<> struct gemv_selector<OnTheRight,ColMajor,true>
|
||||
if(!evalToDest)
|
||||
{
|
||||
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
int size = dest.size();
|
||||
Index size = dest.size();
|
||||
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
#endif
|
||||
if(!alphaIsCompatible)
|
||||
@@ -511,7 +510,7 @@ template<> struct gemv_selector<OnTheRight,RowMajor,true>
|
||||
if(!DirectlyUseRhs)
|
||||
{
|
||||
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
int size = actualRhs.size();
|
||||
Index size = actualRhs.size();
|
||||
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
#endif
|
||||
Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
|
||||
@@ -565,6 +564,40 @@ template<> struct gemv_selector<OnTheRight,RowMajor,false>
|
||||
*
|
||||
* \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*()
|
||||
*/
|
||||
#ifndef __CUDACC__
|
||||
|
||||
#ifdef EIGEN_TEST_EVALUATORS
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
inline const Product<Derived, OtherDerived>
|
||||
MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
|
||||
{
|
||||
// A note regarding the function declaration: In MSVC, this function will sometimes
|
||||
// not be inlined since DenseStorage is an unwindable object for dynamic
|
||||
// matrices and product types are holding a member to store the result.
|
||||
// Thus it does not help tagging this function with EIGEN_STRONG_INLINE.
|
||||
enum {
|
||||
ProductIsValid = Derived::ColsAtCompileTime==Dynamic
|
||||
|| OtherDerived::RowsAtCompileTime==Dynamic
|
||||
|| int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
|
||||
AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
|
||||
SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
|
||||
};
|
||||
// note to the lost user:
|
||||
// * for a dot product use: v1.dot(v2)
|
||||
// * for a coeff-wise product use: v1.cwiseProduct(v2)
|
||||
EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
|
||||
INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
|
||||
EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
|
||||
INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
|
||||
EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
|
||||
#ifdef EIGEN_DEBUG_PRODUCT
|
||||
internal::product_type<Derived,OtherDerived>::debug();
|
||||
#endif
|
||||
|
||||
return Product<Derived, OtherDerived>(derived(), other.derived());
|
||||
}
|
||||
#else
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
inline const typename ProductReturnType<Derived, OtherDerived>::Type
|
||||
@@ -594,7 +627,9 @@ MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
|
||||
#endif
|
||||
return typename ProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
/** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation.
|
||||
*
|
||||
* The returned product will behave like any other expressions: the coefficients of the product will be
|
||||
|
||||
@@ -42,6 +42,8 @@ namespace internal {
|
||||
struct default_packet_traits
|
||||
{
|
||||
enum {
|
||||
HasHalfPacket = 0,
|
||||
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasMul = 1,
|
||||
@@ -71,10 +73,12 @@ struct default_packet_traits
|
||||
template<typename T> struct packet_traits : default_packet_traits
|
||||
{
|
||||
typedef T type;
|
||||
typedef T half;
|
||||
enum {
|
||||
Vectorizable = 0,
|
||||
size = 1,
|
||||
AlignedOnScalar = 0
|
||||
AlignedOnScalar = 0,
|
||||
HasHalfPacket = 0
|
||||
};
|
||||
enum {
|
||||
HasAdd = 0,
|
||||
@@ -91,94 +95,149 @@ template<typename T> struct packet_traits : default_packet_traits
|
||||
};
|
||||
|
||||
/** \internal \returns a + b (coeff-wise) */
|
||||
template<typename Packet> inline Packet
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
padd(const Packet& a,
|
||||
const Packet& b) { return a+b; }
|
||||
|
||||
/** \internal \returns a - b (coeff-wise) */
|
||||
template<typename Packet> inline Packet
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
psub(const Packet& a,
|
||||
const Packet& b) { return a-b; }
|
||||
|
||||
/** \internal \returns -a (coeff-wise) */
|
||||
template<typename Packet> inline Packet
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pnegate(const Packet& a) { return -a; }
|
||||
|
||||
/** \internal \returns conj(a) (coeff-wise) */
|
||||
template<typename Packet> inline Packet
|
||||
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pconj(const Packet& a) { return numext::conj(a); }
|
||||
|
||||
/** \internal \returns a * b (coeff-wise) */
|
||||
template<typename Packet> inline Packet
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pmul(const Packet& a,
|
||||
const Packet& b) { return a*b; }
|
||||
|
||||
/** \internal \returns a / b (coeff-wise) */
|
||||
template<typename Packet> inline Packet
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pdiv(const Packet& a,
|
||||
const Packet& b) { return a/b; }
|
||||
|
||||
/** \internal \returns the min of \a a and \a b (coeff-wise) */
|
||||
template<typename Packet> inline Packet
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pmin(const Packet& a,
|
||||
const Packet& b) { using std::min; return (min)(a, b); }
|
||||
const Packet& b) { EIGEN_USING_STD_MATH(min); return (min)(a, b); }
|
||||
|
||||
/** \internal \returns the max of \a a and \a b (coeff-wise) */
|
||||
template<typename Packet> inline Packet
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pmax(const Packet& a,
|
||||
const Packet& b) { using std::max; return (max)(a, b); }
|
||||
const Packet& b) { EIGEN_USING_STD_MATH(max); return (max)(a, b); }
|
||||
|
||||
/** \internal \returns the absolute value of \a a */
|
||||
template<typename Packet> inline Packet
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pabs(const Packet& a) { using std::abs; return abs(a); }
|
||||
|
||||
/** \internal \returns the bitwise and of \a a and \a b */
|
||||
template<typename Packet> inline Packet
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pand(const Packet& a, const Packet& b) { return a & b; }
|
||||
|
||||
/** \internal \returns the bitwise or of \a a and \a b */
|
||||
template<typename Packet> inline Packet
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
por(const Packet& a, const Packet& b) { return a | b; }
|
||||
|
||||
/** \internal \returns the bitwise xor of \a a and \a b */
|
||||
template<typename Packet> inline Packet
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pxor(const Packet& a, const Packet& b) { return a ^ b; }
|
||||
|
||||
/** \internal \returns the bitwise andnot of \a a and \a b */
|
||||
template<typename Packet> inline Packet
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pandnot(const Packet& a, const Packet& b) { return a & (!b); }
|
||||
|
||||
/** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */
|
||||
template<typename Packet> inline Packet
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pload(const typename unpacket_traits<Packet>::type* from) { return *from; }
|
||||
|
||||
/** \internal \returns a packet version of \a *from, (un-aligned load) */
|
||||
template<typename Packet> inline Packet
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
ploadu(const typename unpacket_traits<Packet>::type* from) { return *from; }
|
||||
|
||||
/** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pset1(const typename unpacket_traits<Packet>::type& a) { return a; }
|
||||
|
||||
/** \internal \returns a packet with constant coefficients \a a[0], e.g.: (a[0],a[0],a[0],a[0]) */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pload1(const typename unpacket_traits<Packet>::type *a) { return pset1<Packet>(*a); }
|
||||
|
||||
/** \internal \returns a packet with elements of \a *from duplicated.
|
||||
* For instance, for a packet of 8 elements, 4 scalar will be read from \a *from and
|
||||
* duplicated to form: {from[0],from[0],from[1],from[1],,from[2],from[2],,from[3],from[3]}
|
||||
* For instance, for a packet of 8 elements, 4 scalars will be read from \a *from and
|
||||
* duplicated to form: {from[0],from[0],from[1],from[1],from[2],from[2],from[3],from[3]}
|
||||
* Currently, this function is only used for scalar * complex products.
|
||||
*/
|
||||
template<typename Packet> inline Packet
|
||||
*/
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; }
|
||||
|
||||
/** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */
|
||||
template<typename Packet> inline Packet
|
||||
pset1(const typename unpacket_traits<Packet>::type& a) { return a; }
|
||||
/** \internal \returns a packet with elements of \a *from quadrupled.
|
||||
* For instance, for a packet of 8 elements, 2 scalars will be read from \a *from and
|
||||
* replicated to form: {from[0],from[0],from[0],from[0],from[1],from[1],from[1],from[1]}
|
||||
* Currently, this function is only used in matrix products.
|
||||
* For packet-size smaller or equal to 4, this function is equivalent to pload1
|
||||
*/
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
ploadquad(const typename unpacket_traits<Packet>::type* from)
|
||||
{ return pload1<Packet>(from); }
|
||||
|
||||
/** \internal equivalent to
|
||||
* \code
|
||||
* a0 = pload1(a+0);
|
||||
* a1 = pload1(a+1);
|
||||
* a2 = pload1(a+2);
|
||||
* a3 = pload1(a+3);
|
||||
* \endcode
|
||||
* \sa pset1, pload1, ploaddup, pbroadcast2
|
||||
*/
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC
|
||||
inline void pbroadcast4(const typename unpacket_traits<Packet>::type *a,
|
||||
Packet& a0, Packet& a1, Packet& a2, Packet& a3)
|
||||
{
|
||||
a0 = pload1<Packet>(a+0);
|
||||
a1 = pload1<Packet>(a+1);
|
||||
a2 = pload1<Packet>(a+2);
|
||||
a3 = pload1<Packet>(a+3);
|
||||
}
|
||||
|
||||
/** \internal equivalent to
|
||||
* \code
|
||||
* a0 = pload1(a+0);
|
||||
* a1 = pload1(a+1);
|
||||
* \endcode
|
||||
* \sa pset1, pload1, ploaddup, pbroadcast4
|
||||
*/
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC
|
||||
inline void pbroadcast2(const typename unpacket_traits<Packet>::type *a,
|
||||
Packet& a0, Packet& a1)
|
||||
{
|
||||
a0 = pload1<Packet>(a+0);
|
||||
a1 = pload1<Packet>(a+1);
|
||||
}
|
||||
|
||||
/** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */
|
||||
template<typename Scalar> inline typename packet_traits<Scalar>::type
|
||||
plset(const Scalar& a) { return a; }
|
||||
|
||||
/** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */
|
||||
template<typename Scalar, typename Packet> inline void pstore(Scalar* to, const Packet& from)
|
||||
template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from)
|
||||
{ (*to) = from; }
|
||||
|
||||
/** \internal copy the packet \a from to \a *to, (un-aligned store) */
|
||||
template<typename Scalar, typename Packet> inline void pstoreu(Scalar* to, const Packet& from)
|
||||
{ (*to) = from; }
|
||||
template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from)
|
||||
{ (*to) = from; }
|
||||
|
||||
template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, DenseIndex /*stride*/)
|
||||
{ return ploadu<Packet>(from); }
|
||||
|
||||
template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, DenseIndex /*stride*/)
|
||||
{ pstore(to, from); }
|
||||
|
||||
/** \internal tries to do cache prefetching of \a addr */
|
||||
template<typename Scalar> inline void prefetch(const Scalar* addr)
|
||||
@@ -189,36 +248,45 @@ __builtin_prefetch(addr);
|
||||
}
|
||||
|
||||
/** \internal \returns the first element of a packet */
|
||||
template<typename Packet> inline typename unpacket_traits<Packet>::type pfirst(const Packet& a)
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type pfirst(const Packet& a)
|
||||
{ return a; }
|
||||
|
||||
/** \internal \returns a packet where the element i contains the sum of the packet of \a vec[i] */
|
||||
template<typename Packet> inline Packet
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
preduxp(const Packet* vecs) { return vecs[0]; }
|
||||
|
||||
/** \internal \returns the sum of the elements of \a a*/
|
||||
template<typename Packet> inline typename unpacket_traits<Packet>::type predux(const Packet& a)
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux(const Packet& a)
|
||||
{ return a; }
|
||||
|
||||
/** \internal \returns the sum of the elements of \a a by block of 4 elements.
|
||||
* For a packet {a0, a1, a2, a3, a4, a5, a6, a7}, it returns a half packet {a0+a4, a1+a5, a2+a6, a3+a7}
|
||||
* For packet-size smaller or equal to 4, this boils down to a noop.
|
||||
*/
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline
|
||||
typename conditional<(unpacket_traits<Packet>::size%8)==0,typename unpacket_traits<Packet>::half,Packet>::type
|
||||
predux4(const Packet& a)
|
||||
{ return a; }
|
||||
|
||||
/** \internal \returns the product of the elements of \a a*/
|
||||
template<typename Packet> inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a)
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a)
|
||||
{ return a; }
|
||||
|
||||
/** \internal \returns the min of the elements of \a a*/
|
||||
template<typename Packet> inline typename unpacket_traits<Packet>::type predux_min(const Packet& a)
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(const Packet& a)
|
||||
{ return a; }
|
||||
|
||||
/** \internal \returns the max of the elements of \a a*/
|
||||
template<typename Packet> inline typename unpacket_traits<Packet>::type predux_max(const Packet& a)
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(const Packet& a)
|
||||
{ return a; }
|
||||
|
||||
/** \internal \returns the reversed elements of \a a*/
|
||||
template<typename Packet> inline Packet preverse(const Packet& a)
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a)
|
||||
{ return a; }
|
||||
|
||||
|
||||
/** \internal \returns \a a with real and imaginary part flipped (for complex type only) */
|
||||
template<typename Packet> inline Packet pcplxflip(const Packet& a)
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a)
|
||||
{
|
||||
// FIXME: uncomment the following in case we drop the internal imag and real functions.
|
||||
// using std::imag;
|
||||
@@ -250,6 +318,10 @@ Packet pasin(const Packet& a) { using std::asin; return asin(a); }
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet pacos(const Packet& a) { using std::acos; return acos(a); }
|
||||
|
||||
/** \internal \returns the atan of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet patan(const Packet& a) { using std::atan; return atan(a); }
|
||||
|
||||
/** \internal \returns the exp of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet pexp(const Packet& a) { using std::exp; return exp(a); }
|
||||
@@ -275,7 +347,7 @@ inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename u
|
||||
}
|
||||
|
||||
/** \internal \returns a * b + c (coeff-wise) */
|
||||
template<typename Packet> inline Packet
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pmadd(const Packet& a,
|
||||
const Packet& b,
|
||||
const Packet& c)
|
||||
@@ -336,15 +408,33 @@ inline void palign(PacketType& first, const PacketType& second)
|
||||
* Fast complex products (GCC generates a function call which is very slow)
|
||||
***************************************************************************/
|
||||
|
||||
// Eigen+CUDA does not support complexes.
|
||||
#ifndef __CUDACC__
|
||||
|
||||
template<> inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b)
|
||||
{ return std::complex<float>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
|
||||
|
||||
template<> inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b)
|
||||
{ return std::complex<double>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
* PacketBlock, that is a collection of N packets where the number of words
|
||||
* in the packet is a multiple of N.
|
||||
***************************************************************************/
|
||||
template <typename Packet,int N=unpacket_traits<Packet>::size> struct PacketBlock {
|
||||
Packet packet[N];
|
||||
};
|
||||
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet,1>& /*kernel*/) {
|
||||
// Nothing to do in the scalar case, i.e. a 1x1 matrix.
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_GENERIC_PACKET_MATH_H
|
||||
|
||||
|
||||
@@ -45,6 +45,7 @@ namespace Eigen
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(asin,scalar_asin_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(acos,scalar_acos_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tan,scalar_tan_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(atan,scalar_atan_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs,scalar_abs_op)
|
||||
|
||||
@@ -49,7 +49,7 @@ std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat&
|
||||
*/
|
||||
struct IOFormat
|
||||
{
|
||||
/** Default contructor, see class IOFormat for the meaning of the parameters */
|
||||
/** Default constructor, see class IOFormat for the meaning of the parameters */
|
||||
IOFormat(int _precision = StreamPrecision, int _flags = 0,
|
||||
const std::string& _coeffSeparator = " ",
|
||||
const std::string& _rowSeparator = "\n", const std::string& _rowPrefix="", const std::string& _rowSuffix="",
|
||||
@@ -57,6 +57,10 @@ struct IOFormat
|
||||
: matPrefix(_matPrefix), matSuffix(_matSuffix), rowPrefix(_rowPrefix), rowSuffix(_rowSuffix), rowSeparator(_rowSeparator),
|
||||
rowSpacer(""), coeffSeparator(_coeffSeparator), precision(_precision), flags(_flags)
|
||||
{
|
||||
// TODO check if rowPrefix, rowSuffix or rowSeparator contains a newline
|
||||
// don't add rowSpacer if columns are not to be aligned
|
||||
if((flags & DontAlignCols))
|
||||
return;
|
||||
int i = int(matSuffix.length())-1;
|
||||
while (i>=0 && matSuffix[i]!='\n')
|
||||
{
|
||||
@@ -185,21 +189,22 @@ std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat&
|
||||
explicit_precision = fmt.precision;
|
||||
}
|
||||
|
||||
std::streamsize old_precision = 0;
|
||||
if(explicit_precision) old_precision = s.precision(explicit_precision);
|
||||
|
||||
bool align_cols = !(fmt.flags & DontAlignCols);
|
||||
if(align_cols)
|
||||
{
|
||||
// compute the largest width
|
||||
for(Index j = 1; j < m.cols(); ++j)
|
||||
for(Index j = 0; j < m.cols(); ++j)
|
||||
for(Index i = 0; i < m.rows(); ++i)
|
||||
{
|
||||
std::stringstream sstr;
|
||||
if(explicit_precision) sstr.precision(explicit_precision);
|
||||
sstr.copyfmt(s);
|
||||
sstr << m.coeff(i,j);
|
||||
width = std::max<Index>(width, Index(sstr.str().length()));
|
||||
}
|
||||
}
|
||||
std::streamsize old_precision = 0;
|
||||
if(explicit_precision) old_precision = s.precision(explicit_precision);
|
||||
s << fmt.matPrefix;
|
||||
for(Index i = 0; i < m.rows(); ++i)
|
||||
{
|
||||
|
||||
@@ -88,7 +88,7 @@ struct traits<Map<PlainObjectType, MapOptions, StrideType> >
|
||||
&& ( bool(IsDynamicSize)
|
||||
|| HasNoOuterStride
|
||||
|| ( OuterStrideAtCompileTime!=Dynamic
|
||||
&& ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime)%16)==0 ) ),
|
||||
&& ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime)%EIGEN_ALIGN_BYTES)==0 ) ),
|
||||
Flags0 = TraitsBase::Flags & (~NestByRefBit),
|
||||
Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit),
|
||||
Flags2 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime))
|
||||
@@ -110,19 +110,17 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(Map)
|
||||
|
||||
typedef typename Base::PointerType PointerType;
|
||||
#if EIGEN2_SUPPORT_STAGE <= STAGE30_FULL_EIGEN3_API
|
||||
typedef const Scalar* PointerArgType;
|
||||
inline PointerType cast_to_pointer_type(PointerArgType ptr) { return const_cast<PointerType>(ptr); }
|
||||
#else
|
||||
typedef PointerType PointerArgType;
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline PointerType cast_to_pointer_type(PointerArgType ptr) { return ptr; }
|
||||
#endif
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index innerStride() const
|
||||
{
|
||||
return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index outerStride() const
|
||||
{
|
||||
return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer()
|
||||
@@ -136,6 +134,7 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
|
||||
* \param dataPtr pointer to the array to map
|
||||
* \param a_stride optional Stride object, passing the strides.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Map(PointerArgType dataPtr, const StrideType& a_stride = StrideType())
|
||||
: Base(cast_to_pointer_type(dataPtr)), m_stride(a_stride)
|
||||
{
|
||||
@@ -148,6 +147,7 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
|
||||
* \param a_size the size of the vector expression
|
||||
* \param a_stride optional Stride object, passing the strides.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Map(PointerArgType dataPtr, Index a_size, const StrideType& a_stride = StrideType())
|
||||
: Base(cast_to_pointer_type(dataPtr), a_size), m_stride(a_stride)
|
||||
{
|
||||
@@ -161,6 +161,7 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
|
||||
* \param nbCols the number of columns of the matrix expression
|
||||
* \param a_stride optional Stride object, passing the strides.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Map(PointerArgType dataPtr, Index nbRows, Index nbCols, const StrideType& a_stride = StrideType())
|
||||
: Base(cast_to_pointer_type(dataPtr), nbRows, nbCols), m_stride(a_stride)
|
||||
{
|
||||
@@ -173,19 +174,6 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
|
||||
StrideType m_stride;
|
||||
};
|
||||
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
|
||||
inline Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>
|
||||
::Array(const Scalar *data)
|
||||
{
|
||||
this->_set_noalias(Eigen::Map<const Array>(data));
|
||||
}
|
||||
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
|
||||
inline Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>
|
||||
::Matrix(const Scalar *data)
|
||||
{
|
||||
this->_set_noalias(Eigen::Map<const Matrix>(data));
|
||||
}
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
|
||||
@@ -76,8 +76,8 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
|
||||
|
||||
typedef typename Base::CoeffReturnType CoeffReturnType;
|
||||
|
||||
inline Index rows() const { return m_rows.value(); }
|
||||
inline Index cols() const { return m_cols.value(); }
|
||||
EIGEN_DEVICE_FUNC inline Index rows() const { return m_rows.value(); }
|
||||
EIGEN_DEVICE_FUNC inline Index cols() const { return m_cols.value(); }
|
||||
|
||||
/** Returns a pointer to the first coefficient of the matrix or vector.
|
||||
*
|
||||
@@ -87,22 +87,26 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
|
||||
*/
|
||||
inline const Scalar* data() const { return m_data; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeff(Index rowId, Index colId) const
|
||||
{
|
||||
return m_data[colId * colStride() + rowId * rowStride()];
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeff(Index index) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
|
||||
return m_data[index * innerStride()];
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index rowId, Index colId) const
|
||||
{
|
||||
return this->m_data[colId * colStride() + rowId * rowStride()];
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index index) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
|
||||
@@ -123,12 +127,14 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
|
||||
return internal::ploadt<PacketScalar, LoadMode>(m_data + index * innerStride());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline MapBase(PointerType dataPtr) : m_data(dataPtr), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
|
||||
checkSanity();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline MapBase(PointerType dataPtr, Index vecSize)
|
||||
: m_data(dataPtr),
|
||||
m_rows(RowsAtCompileTime == Dynamic ? vecSize : Index(RowsAtCompileTime)),
|
||||
@@ -140,6 +146,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
|
||||
checkSanity();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline MapBase(PointerType dataPtr, Index nbRows, Index nbCols)
|
||||
: m_data(dataPtr), m_rows(nbRows), m_cols(nbCols)
|
||||
{
|
||||
@@ -151,12 +158,13 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
|
||||
|
||||
protected:
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
void checkSanity() const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(internal::traits<Derived>::Flags&PacketAccessBit,
|
||||
internal::inner_stride_at_compile_time<Derived>::ret==1),
|
||||
PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1);
|
||||
eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::Flags&AlignedBit, (size_t(m_data) % 16) == 0)
|
||||
eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::Flags&AlignedBit, (size_t(m_data) % EIGEN_ALIGN_BYTES) == 0)
|
||||
&& "data is not aligned");
|
||||
}
|
||||
|
||||
@@ -195,14 +203,18 @@ template<typename Derived> class MapBase<Derived, WriteAccessors>
|
||||
const Scalar
|
||||
>::type ScalarWithConstIfNotLvalue;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar* data() const { return this->m_data; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline ScalarWithConstIfNotLvalue* data() { return this->m_data; } // no const-cast here so non-const-correct code will give a compile error
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline ScalarWithConstIfNotLvalue& coeffRef(Index row, Index col)
|
||||
{
|
||||
return this->m_data[col * colStride() + row * rowStride()];
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline ScalarWithConstIfNotLvalue& coeffRef(Index index)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
|
||||
@@ -224,10 +236,11 @@ template<typename Derived> class MapBase<Derived, WriteAccessors>
|
||||
(this->m_data + index * innerStride(), val);
|
||||
}
|
||||
|
||||
explicit inline MapBase(PointerType dataPtr) : Base(dataPtr) {}
|
||||
inline MapBase(PointerType dataPtr, Index vecSize) : Base(dataPtr, vecSize) {}
|
||||
inline MapBase(PointerType dataPtr, Index nbRows, Index nbCols) : Base(dataPtr, nbRows, nbCols) {}
|
||||
EIGEN_DEVICE_FUNC explicit inline MapBase(PointerType dataPtr) : Base(dataPtr) {}
|
||||
EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index vecSize) : Base(dataPtr, vecSize) {}
|
||||
EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index nbRows, Index nbCols) : Base(dataPtr, nbRows, nbCols) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator=(const MapBase& other)
|
||||
{
|
||||
Base::Base::operator=(other);
|
||||
@@ -237,6 +250,8 @@ template<typename Derived> class MapBase<Derived, WriteAccessors>
|
||||
using Base::Base::operator=;
|
||||
};
|
||||
|
||||
#undef EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_MAPBASE_H
|
||||
|
||||
@@ -12,6 +12,15 @@
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
// On WINCE, std::abs is defined for int only, so let's defined our own overloads:
|
||||
// This issue has been confirmed with MSVC 2008 only, but the issue might exist for more recent versions too.
|
||||
#if defined(_WIN32_WCE) && defined(_MSC_VER) && _MSC_VER<=1500
|
||||
long abs(long x) { return (labs(x)); }
|
||||
double abs(double x) { return (fabs(x)); }
|
||||
float abs(float x) { return (fabsf(x)); }
|
||||
long double abs(long double x) { return (fabsl(x)); }
|
||||
#endif
|
||||
|
||||
namespace internal {
|
||||
|
||||
/** \internal \struct global_math_functions_filtering_base
|
||||
@@ -62,6 +71,7 @@ template<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
|
||||
struct real_default_impl
|
||||
{
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline RealScalar run(const Scalar& x)
|
||||
{
|
||||
return x;
|
||||
@@ -72,6 +82,7 @@ template<typename Scalar>
|
||||
struct real_default_impl<Scalar,true>
|
||||
{
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline RealScalar run(const Scalar& x)
|
||||
{
|
||||
using std::real;
|
||||
@@ -87,7 +98,6 @@ struct real_retval
|
||||
typedef typename NumTraits<Scalar>::Real type;
|
||||
};
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
* Implementation of imag *
|
||||
****************************************************************************/
|
||||
@@ -96,6 +106,7 @@ template<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
|
||||
struct imag_default_impl
|
||||
{
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline RealScalar run(const Scalar&)
|
||||
{
|
||||
return RealScalar(0);
|
||||
@@ -106,6 +117,7 @@ template<typename Scalar>
|
||||
struct imag_default_impl<Scalar,true>
|
||||
{
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline RealScalar run(const Scalar& x)
|
||||
{
|
||||
using std::imag;
|
||||
@@ -129,10 +141,12 @@ template<typename Scalar>
|
||||
struct real_ref_impl
|
||||
{
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline RealScalar& run(Scalar& x)
|
||||
{
|
||||
return reinterpret_cast<RealScalar*>(&x)[0];
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline const RealScalar& run(const Scalar& x)
|
||||
{
|
||||
return reinterpret_cast<const RealScalar*>(&x)[0];
|
||||
@@ -153,10 +167,12 @@ template<typename Scalar, bool IsComplex>
|
||||
struct imag_ref_default_impl
|
||||
{
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline RealScalar& run(Scalar& x)
|
||||
{
|
||||
return reinterpret_cast<RealScalar*>(&x)[1];
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline const RealScalar& run(const Scalar& x)
|
||||
{
|
||||
return reinterpret_cast<RealScalar*>(&x)[1];
|
||||
@@ -166,10 +182,12 @@ struct imag_ref_default_impl
|
||||
template<typename Scalar>
|
||||
struct imag_ref_default_impl<Scalar, false>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline Scalar run(Scalar&)
|
||||
{
|
||||
return Scalar(0);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline const Scalar run(const Scalar&)
|
||||
{
|
||||
return Scalar(0);
|
||||
@@ -192,6 +210,7 @@ struct imag_ref_retval
|
||||
template<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
|
||||
struct conj_impl
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline Scalar run(const Scalar& x)
|
||||
{
|
||||
return x;
|
||||
@@ -201,6 +220,7 @@ struct conj_impl
|
||||
template<typename Scalar>
|
||||
struct conj_impl<Scalar,true>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline Scalar run(const Scalar& x)
|
||||
{
|
||||
using std::conj;
|
||||
@@ -222,6 +242,7 @@ template<typename Scalar>
|
||||
struct abs2_impl
|
||||
{
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline RealScalar run(const Scalar& x)
|
||||
{
|
||||
return x*x;
|
||||
@@ -231,6 +252,7 @@ struct abs2_impl
|
||||
template<typename RealScalar>
|
||||
struct abs2_impl<std::complex<RealScalar> >
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline RealScalar run(const std::complex<RealScalar>& x)
|
||||
{
|
||||
return real(x)*real(x) + imag(x)*imag(x);
|
||||
@@ -251,6 +273,7 @@ template<typename Scalar, bool IsComplex>
|
||||
struct norm1_default_impl
|
||||
{
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline RealScalar run(const Scalar& x)
|
||||
{
|
||||
using std::abs;
|
||||
@@ -261,6 +284,7 @@ struct norm1_default_impl
|
||||
template<typename Scalar>
|
||||
struct norm1_default_impl<Scalar, false>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline Scalar run(const Scalar& x)
|
||||
{
|
||||
using std::abs;
|
||||
@@ -287,16 +311,23 @@ struct hypot_impl
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
static inline RealScalar run(const Scalar& x, const Scalar& y)
|
||||
{
|
||||
using std::max;
|
||||
using std::min;
|
||||
EIGEN_USING_STD_MATH(max);
|
||||
EIGEN_USING_STD_MATH(min);
|
||||
using std::abs;
|
||||
using std::sqrt;
|
||||
RealScalar _x = abs(x);
|
||||
RealScalar _y = abs(y);
|
||||
RealScalar p = (max)(_x, _y);
|
||||
if(p==RealScalar(0)) return 0;
|
||||
RealScalar q = (min)(_x, _y);
|
||||
RealScalar qp = q/p;
|
||||
Scalar p, qp;
|
||||
if(_x>_y)
|
||||
{
|
||||
p = _x;
|
||||
qp = _y / p;
|
||||
}
|
||||
else
|
||||
{
|
||||
p = _y;
|
||||
qp = _x / p;
|
||||
}
|
||||
return p * sqrt(RealScalar(1) + qp*qp);
|
||||
}
|
||||
};
|
||||
@@ -332,37 +363,45 @@ inline NewType cast(const OldType& x)
|
||||
* Implementation of atanh2 *
|
||||
****************************************************************************/
|
||||
|
||||
template<typename Scalar, bool IsInteger>
|
||||
struct atanh2_default_impl
|
||||
template<typename Scalar>
|
||||
struct atanh2_impl
|
||||
{
|
||||
typedef Scalar retval;
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
static inline Scalar run(const Scalar& x, const Scalar& y)
|
||||
static inline Scalar run(const Scalar& x, const Scalar& r)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
|
||||
#if (__cplusplus >= 201103L) && !defined(__CYGWIN__)
|
||||
using std::log1p;
|
||||
return log1p(2 * x / (r - x)) / 2;
|
||||
#else
|
||||
using std::abs;
|
||||
using std::log;
|
||||
using std::sqrt;
|
||||
Scalar z = x / r;
|
||||
if (r == 0 || abs(z) > sqrt(NumTraits<Scalar>::epsilon()))
|
||||
return log((r + x) / (r - x)) / 2;
|
||||
else
|
||||
return z + z*z*z / 3;
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
template<typename RealScalar>
|
||||
struct atanh2_impl<std::complex<RealScalar> >
|
||||
{
|
||||
typedef std::complex<RealScalar> Scalar;
|
||||
static inline Scalar run(const Scalar& x, const Scalar& r)
|
||||
{
|
||||
using std::abs;
|
||||
using std::log;
|
||||
using std::norm;
|
||||
using std::sqrt;
|
||||
Scalar z = x / y;
|
||||
if (y == Scalar(0) || abs(z) > sqrt(NumTraits<RealScalar>::epsilon()))
|
||||
return RealScalar(0.5) * log((y + x) / (y - x));
|
||||
Scalar z = x / r;
|
||||
if (r == Scalar(0) || norm(z) > NumTraits<RealScalar>::epsilon())
|
||||
return RealScalar(0.5) * log((r + x) / (r - x));
|
||||
else
|
||||
return z + z*z*z / RealScalar(3);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Scalar>
|
||||
struct atanh2_default_impl<Scalar, true>
|
||||
{
|
||||
static inline Scalar run(const Scalar&, const Scalar&)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
|
||||
return Scalar(0);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Scalar>
|
||||
struct atanh2_impl : atanh2_default_impl<Scalar, NumTraits<Scalar>::IsInteger> {};
|
||||
|
||||
template<typename Scalar>
|
||||
struct atanh2_retval
|
||||
{
|
||||
@@ -554,72 +593,84 @@ inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random()
|
||||
namespace numext {
|
||||
|
||||
template<typename Scalar>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline EIGEN_MATHFUNC_RETVAL(real, Scalar) real(const Scalar& x)
|
||||
{
|
||||
return EIGEN_MATHFUNC_IMPL(real, Scalar)::run(x);
|
||||
}
|
||||
|
||||
template<typename Scalar>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline typename internal::add_const_on_value_type< EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) >::type real_ref(const Scalar& x)
|
||||
{
|
||||
return internal::real_ref_impl<Scalar>::run(x);
|
||||
}
|
||||
|
||||
template<typename Scalar>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) real_ref(Scalar& x)
|
||||
{
|
||||
return EIGEN_MATHFUNC_IMPL(real_ref, Scalar)::run(x);
|
||||
}
|
||||
|
||||
template<typename Scalar>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline EIGEN_MATHFUNC_RETVAL(imag, Scalar) imag(const Scalar& x)
|
||||
{
|
||||
return EIGEN_MATHFUNC_IMPL(imag, Scalar)::run(x);
|
||||
}
|
||||
|
||||
template<typename Scalar>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline typename internal::add_const_on_value_type< EIGEN_MATHFUNC_RETVAL(imag_ref, Scalar) >::type imag_ref(const Scalar& x)
|
||||
{
|
||||
return internal::imag_ref_impl<Scalar>::run(x);
|
||||
}
|
||||
|
||||
template<typename Scalar>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline EIGEN_MATHFUNC_RETVAL(imag_ref, Scalar) imag_ref(Scalar& x)
|
||||
{
|
||||
return EIGEN_MATHFUNC_IMPL(imag_ref, Scalar)::run(x);
|
||||
}
|
||||
|
||||
template<typename Scalar>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline EIGEN_MATHFUNC_RETVAL(conj, Scalar) conj(const Scalar& x)
|
||||
{
|
||||
return EIGEN_MATHFUNC_IMPL(conj, Scalar)::run(x);
|
||||
}
|
||||
|
||||
template<typename Scalar>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline EIGEN_MATHFUNC_RETVAL(abs2, Scalar) abs2(const Scalar& x)
|
||||
{
|
||||
return EIGEN_MATHFUNC_IMPL(abs2, Scalar)::run(x);
|
||||
}
|
||||
|
||||
template<typename Scalar>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline EIGEN_MATHFUNC_RETVAL(norm1, Scalar) norm1(const Scalar& x)
|
||||
{
|
||||
return EIGEN_MATHFUNC_IMPL(norm1, Scalar)::run(x);
|
||||
}
|
||||
|
||||
template<typename Scalar>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline EIGEN_MATHFUNC_RETVAL(hypot, Scalar) hypot(const Scalar& x, const Scalar& y)
|
||||
{
|
||||
return EIGEN_MATHFUNC_IMPL(hypot, Scalar)::run(x, y);
|
||||
}
|
||||
|
||||
template<typename Scalar>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline EIGEN_MATHFUNC_RETVAL(atanh2, Scalar) atanh2(const Scalar& x, const Scalar& y)
|
||||
{
|
||||
return EIGEN_MATHFUNC_IMPL(atanh2, Scalar)::run(x, y);
|
||||
}
|
||||
|
||||
template<typename Scalar>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline EIGEN_MATHFUNC_RETVAL(pow, Scalar) pow(const Scalar& x, const Scalar& y)
|
||||
{
|
||||
return EIGEN_MATHFUNC_IMPL(pow, Scalar)::run(x, y);
|
||||
@@ -627,11 +678,22 @@ inline EIGEN_MATHFUNC_RETVAL(pow, Scalar) pow(const Scalar& x, const Scalar& y)
|
||||
|
||||
// std::isfinite is non standard, so let's define our own version,
|
||||
// even though it is not very efficient.
|
||||
template<typename T> bool (isfinite)(const T& x)
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
bool (isfinite)(const T& x)
|
||||
{
|
||||
return x<NumTraits<T>::highest() && x>NumTraits<T>::lowest();
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
bool (isfinite)(const std::complex<T>& x)
|
||||
{
|
||||
using std::real;
|
||||
using std::imag;
|
||||
return isfinite(real(x)) && isfinite(imag(x));
|
||||
}
|
||||
|
||||
} // end namespace numext
|
||||
|
||||
namespace internal {
|
||||
@@ -649,18 +711,20 @@ template<typename Scalar>
|
||||
struct scalar_fuzzy_default_impl<Scalar, false, false>
|
||||
{
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
template<typename OtherScalar>
|
||||
template<typename OtherScalar> EIGEN_DEVICE_FUNC
|
||||
static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, const RealScalar& prec)
|
||||
{
|
||||
using std::abs;
|
||||
return abs(x) <= abs(y) * prec;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec)
|
||||
{
|
||||
using std::min;
|
||||
EIGEN_USING_STD_MATH(min);
|
||||
using std::abs;
|
||||
return abs(x - y) <= (min)(abs(x), abs(y)) * prec;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar& prec)
|
||||
{
|
||||
return x <= y || isApprox(x, y, prec);
|
||||
@@ -671,15 +735,17 @@ template<typename Scalar>
|
||||
struct scalar_fuzzy_default_impl<Scalar, false, true>
|
||||
{
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
template<typename OtherScalar>
|
||||
template<typename OtherScalar> EIGEN_DEVICE_FUNC
|
||||
static inline bool isMuchSmallerThan(const Scalar& x, const Scalar&, const RealScalar&)
|
||||
{
|
||||
return x == Scalar(0);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar&)
|
||||
{
|
||||
return x == y;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar&)
|
||||
{
|
||||
return x <= y;
|
||||
@@ -697,7 +763,7 @@ struct scalar_fuzzy_default_impl<Scalar, true, false>
|
||||
}
|
||||
static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec)
|
||||
{
|
||||
using std::min;
|
||||
EIGEN_USING_STD_MATH(min);
|
||||
return numext::abs2(x - y) <= (min)(numext::abs2(x), numext::abs2(y)) * prec * prec;
|
||||
}
|
||||
};
|
||||
@@ -705,21 +771,21 @@ struct scalar_fuzzy_default_impl<Scalar, true, false>
|
||||
template<typename Scalar>
|
||||
struct scalar_fuzzy_impl : scalar_fuzzy_default_impl<Scalar, NumTraits<Scalar>::IsComplex, NumTraits<Scalar>::IsInteger> {};
|
||||
|
||||
template<typename Scalar, typename OtherScalar>
|
||||
template<typename Scalar, typename OtherScalar> EIGEN_DEVICE_FUNC
|
||||
inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y,
|
||||
typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision())
|
||||
{
|
||||
return scalar_fuzzy_impl<Scalar>::template isMuchSmallerThan<OtherScalar>(x, y, precision);
|
||||
}
|
||||
|
||||
template<typename Scalar>
|
||||
template<typename Scalar> EIGEN_DEVICE_FUNC
|
||||
inline bool isApprox(const Scalar& x, const Scalar& y,
|
||||
typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision())
|
||||
{
|
||||
return scalar_fuzzy_impl<Scalar>::isApprox(x, y, precision);
|
||||
}
|
||||
|
||||
template<typename Scalar>
|
||||
template<typename Scalar> EIGEN_DEVICE_FUNC
|
||||
inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y,
|
||||
typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision())
|
||||
{
|
||||
@@ -742,17 +808,19 @@ template<> struct scalar_fuzzy_impl<bool>
|
||||
{
|
||||
typedef bool RealScalar;
|
||||
|
||||
template<typename OtherScalar>
|
||||
template<typename OtherScalar> EIGEN_DEVICE_FUNC
|
||||
static inline bool isMuchSmallerThan(const bool& x, const bool&, const bool&)
|
||||
{
|
||||
return !x;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline bool isApprox(bool x, bool y, bool)
|
||||
{
|
||||
return x == y;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline bool isApproxOrLessThan(const bool& x, const bool& y, const bool&)
|
||||
{
|
||||
return (!x) || y;
|
||||
|
||||
@@ -151,6 +151,7 @@ class Matrix
|
||||
*
|
||||
* \callgraph
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Matrix& operator=(const Matrix& other)
|
||||
{
|
||||
return Base::_set(other);
|
||||
@@ -167,6 +168,7 @@ class Matrix
|
||||
* remain row-vectors and vectors remain vectors.
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Matrix& operator=(const MatrixBase<OtherDerived>& other)
|
||||
{
|
||||
return Base::_set(other);
|
||||
@@ -179,12 +181,14 @@ class Matrix
|
||||
* \copydetails DenseBase::operator=(const EigenBase<OtherDerived> &other)
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Matrix& operator=(const EigenBase<OtherDerived> &other)
|
||||
{
|
||||
return Base::operator=(other);
|
||||
}
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Matrix& operator=(const ReturnByValue<OtherDerived>& func)
|
||||
{
|
||||
return Base::operator=(func);
|
||||
@@ -200,6 +204,7 @@ class Matrix
|
||||
*
|
||||
* \sa resize(Index,Index)
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Matrix() : Base()
|
||||
{
|
||||
Base::_check_template_params();
|
||||
@@ -207,45 +212,85 @@ class Matrix
|
||||
}
|
||||
|
||||
// FIXME is it still needed
|
||||
EIGEN_DEVICE_FUNC
|
||||
Matrix(internal::constructor_without_unaligned_array_assert)
|
||||
: Base(internal::constructor_without_unaligned_array_assert())
|
||||
{ Base::_check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED }
|
||||
|
||||
/** \brief Constructs a vector or row-vector with given dimension. \only_for_vectors
|
||||
*
|
||||
* Note that this is only useful for dynamic-size vectors. For fixed-size vectors,
|
||||
* it is redundant to pass the dimension here, so it makes more sense to use the default
|
||||
* constructor Matrix() instead.
|
||||
*/
|
||||
EIGEN_STRONG_INLINE explicit Matrix(Index dim)
|
||||
: Base(dim, RowsAtCompileTime == 1 ? 1 : dim, ColsAtCompileTime == 1 ? 1 : dim)
|
||||
#ifdef EIGEN_HAVE_RVALUE_REFERENCES
|
||||
Matrix(Matrix&& other)
|
||||
: Base(std::move(other))
|
||||
{
|
||||
Base::_check_template_params();
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Matrix)
|
||||
eigen_assert(dim >= 0);
|
||||
eigen_assert(SizeAtCompileTime == Dynamic || SizeAtCompileTime == dim);
|
||||
EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
|
||||
if (RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic)
|
||||
Base::_set_noalias(other);
|
||||
}
|
||||
Matrix& operator=(Matrix&& other)
|
||||
{
|
||||
other.swap(*this);
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
|
||||
// This constructor is for both 1x1 matrices and dynamic vectors
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE explicit Matrix(const T& x)
|
||||
{
|
||||
Base::_check_template_params();
|
||||
Base::template _init1<T>(x);
|
||||
}
|
||||
|
||||
template<typename T0, typename T1>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Matrix(const T0& x, const T1& y)
|
||||
{
|
||||
Base::_check_template_params();
|
||||
Base::template _init2<T0,T1>(x, y);
|
||||
}
|
||||
#else
|
||||
/** \brief Constructs a fixed-sized matrix initialized with coefficients starting at \a data */
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit Matrix(const Scalar *data);
|
||||
|
||||
/** \brief Constructs a vector or row-vector with given dimension. \only_for_vectors
|
||||
*
|
||||
* This is useful for dynamic-size vectors. For fixed-size vectors,
|
||||
* it is redundant to pass these parameters, so one should use the default constructor
|
||||
* Matrix() instead.
|
||||
*
|
||||
* \warning This constructor is disabled for fixed-size \c 1x1 matrices. For instance,
|
||||
* calling Matrix<double,1,1>(1) will call the initialization constructor: Matrix(const Scalar&).
|
||||
* For fixed-size \c 1x1 matrices it is thefore recommended to use the default
|
||||
* constructor Matrix() instead, especilly when using one of the non standard
|
||||
* \c EIGEN_INITIALIZE_MATRICES_BY_{ZERO,\c NAN} macros (see \ref TopicPreprocessorDirectives).
|
||||
*/
|
||||
EIGEN_STRONG_INLINE explicit Matrix(Index dim);
|
||||
/** \brief Constructs an initialized 1x1 matrix with the given coefficient */
|
||||
Matrix(const Scalar& x);
|
||||
/** \brief Constructs an uninitialized matrix with \a rows rows and \a cols columns.
|
||||
*
|
||||
* This is useful for dynamic-size matrices. For fixed-size matrices,
|
||||
* it is redundant to pass these parameters, so one should use the default constructor
|
||||
* Matrix() instead. */
|
||||
* Matrix() instead.
|
||||
*
|
||||
* \warning This constructor is disabled for fixed-size \c 1x2 and \c 2x1 vectors. For instance,
|
||||
* calling Matrix2f(2,1) will call the initialization constructor: Matrix(const Scalar& x, const Scalar& y).
|
||||
* For fixed-size \c 1x2 or \c 2x1 vectors it is thefore recommended to use the default
|
||||
* constructor Matrix() instead, especilly when using one of the non standard
|
||||
* \c EIGEN_INITIALIZE_MATRICES_BY_{ZERO,\c NAN} macros (see \ref TopicPreprocessorDirectives).
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
Matrix(Index rows, Index cols);
|
||||
|
||||
/** \brief Constructs an initialized 2D vector with given coefficients */
|
||||
Matrix(const Scalar& x, const Scalar& y);
|
||||
#endif
|
||||
|
||||
/** \brief Constructs an initialized 3D vector with given coefficients */
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z)
|
||||
{
|
||||
Base::_check_template_params();
|
||||
@@ -255,6 +300,7 @@ class Matrix
|
||||
m_storage.data()[2] = z;
|
||||
}
|
||||
/** \brief Constructs an initialized 4D vector with given coefficients */
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z, const Scalar& w)
|
||||
{
|
||||
Base::_check_template_params();
|
||||
@@ -265,10 +311,10 @@ class Matrix
|
||||
m_storage.data()[3] = w;
|
||||
}
|
||||
|
||||
explicit Matrix(const Scalar *data);
|
||||
|
||||
/** \brief Constructor copying the value of the expression \a other */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Matrix(const MatrixBase<OtherDerived>& other)
|
||||
: Base(other.rows() * other.cols(), other.rows(), other.cols())
|
||||
{
|
||||
@@ -281,6 +327,7 @@ class Matrix
|
||||
Base::_set_noalias(other);
|
||||
}
|
||||
/** \brief Copy constructor */
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Matrix(const Matrix& other)
|
||||
: Base(other.rows() * other.cols(), other.rows(), other.cols())
|
||||
{
|
||||
@@ -289,6 +336,7 @@ class Matrix
|
||||
}
|
||||
/** \brief Copy constructor with in-place evaluation */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Matrix(const ReturnByValue<OtherDerived>& other)
|
||||
{
|
||||
Base::_check_template_params();
|
||||
@@ -300,11 +348,12 @@ class Matrix
|
||||
* \sa MatrixBase::operator=(const EigenBase<OtherDerived>&)
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Matrix(const EigenBase<OtherDerived> &other)
|
||||
: Base(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols())
|
||||
{
|
||||
Base::_check_template_params();
|
||||
Base::resize(other.rows(), other.cols());
|
||||
Base::_resize_to_match(other);
|
||||
// FIXME/CHECK: isn't *this = other.derived() more efficient. it allows to
|
||||
// go for pure _set() implementations, right?
|
||||
*this = other;
|
||||
@@ -315,26 +364,22 @@ class Matrix
|
||||
* of same type it is enough to swap the data pointers.
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void swap(MatrixBase<OtherDerived> const & other)
|
||||
{ this->_swap(other.derived()); }
|
||||
|
||||
inline Index innerStride() const { return 1; }
|
||||
inline Index outerStride() const { return this->innerSize(); }
|
||||
EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; }
|
||||
EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); }
|
||||
|
||||
/////////// Geometry module ///////////
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit Matrix(const RotationBase<OtherDerived,ColsAtCompileTime>& r);
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Matrix& operator=(const RotationBase<OtherDerived,ColsAtCompileTime>& r);
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
template<typename OtherDerived>
|
||||
explicit Matrix(const eigen2_RotationBase<OtherDerived,ColsAtCompileTime>& r);
|
||||
template<typename OtherDerived>
|
||||
Matrix& operator=(const eigen2_RotationBase<OtherDerived,ColsAtCompileTime>& r);
|
||||
#endif
|
||||
|
||||
// allow to extend Matrix outside Eigen
|
||||
#ifdef EIGEN_MATRIX_PLUGIN
|
||||
#include EIGEN_MATRIX_PLUGIN
|
||||
|
||||
@@ -81,6 +81,7 @@ template<typename Derived> class MatrixBase
|
||||
using Base::operator-=;
|
||||
using Base::operator*=;
|
||||
using Base::operator/=;
|
||||
using Base::operator*;
|
||||
|
||||
typedef typename Base::CoeffReturnType CoeffReturnType;
|
||||
typedef typename Base::ConstTransposeReturnType ConstTransposeReturnType;
|
||||
@@ -98,6 +99,7 @@ template<typename Derived> class MatrixBase
|
||||
|
||||
/** \returns the size of the main diagonal, which is min(rows(),cols()).
|
||||
* \sa rows(), cols(), SizeAtCompileTime. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index diagonalSize() const { return (std::min)(rows(),cols()); }
|
||||
|
||||
/** \brief The plain matrix type corresponding to this expression.
|
||||
@@ -145,38 +147,59 @@ template<typename Derived> class MatrixBase
|
||||
/** Special case of the template operator=, in order to prevent the compiler
|
||||
* from generating a default operator= (issue hit with g++ 4.1)
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator=(const MatrixBase& other);
|
||||
|
||||
// We cannot inherit here via Base::operator= since it is causing
|
||||
// trouble with MSVC.
|
||||
|
||||
template <typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator=(const DenseBase<OtherDerived>& other);
|
||||
|
||||
template <typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator=(const EigenBase<OtherDerived>& other);
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator=(const ReturnByValue<OtherDerived>& other);
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename ProductDerived, typename Lhs, typename Rhs>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& lazyAssign(const ProductBase<ProductDerived, Lhs,Rhs>& other);
|
||||
|
||||
template<typename MatrixPower, typename Lhs, typename Rhs>
|
||||
Derived& lazyAssign(const MatrixPowerProduct<MatrixPower, Lhs,Rhs>& other);
|
||||
#endif // not EIGEN_PARSED_BY_DOXYGEN
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator+=(const MatrixBase<OtherDerived>& other);
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator-=(const MatrixBase<OtherDerived>& other);
|
||||
|
||||
#ifdef __CUDACC__
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename LazyProductReturnType<Derived,OtherDerived>::Type
|
||||
operator*(const MatrixBase<OtherDerived> &other) const
|
||||
{ return this->lazyProduct(other); }
|
||||
#else
|
||||
|
||||
#ifdef EIGEN_TEST_EVALUATORS
|
||||
template<typename OtherDerived>
|
||||
const Product<Derived,OtherDerived>
|
||||
operator*(const MatrixBase<OtherDerived> &other) const;
|
||||
#else
|
||||
template<typename OtherDerived>
|
||||
const typename ProductReturnType<Derived,OtherDerived>::Type
|
||||
operator*(const MatrixBase<OtherDerived> &other) const;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename LazyProductReturnType<Derived,OtherDerived>::Type
|
||||
lazyProduct(const MatrixBase<OtherDerived> &other) const;
|
||||
|
||||
@@ -190,39 +213,44 @@ template<typename Derived> class MatrixBase
|
||||
void applyOnTheRight(const EigenBase<OtherDerived>& other);
|
||||
|
||||
template<typename DiagonalDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
const DiagonalProduct<Derived, DiagonalDerived, OnTheRight>
|
||||
operator*(const DiagonalBase<DiagonalDerived> &diagonal) const;
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename internal::scalar_product_traits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
|
||||
dot(const MatrixBase<OtherDerived>& other) const;
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
template<typename OtherDerived>
|
||||
Scalar eigen2_dot(const MatrixBase<OtherDerived>& other) const;
|
||||
#endif
|
||||
|
||||
RealScalar squaredNorm() const;
|
||||
RealScalar norm() const;
|
||||
EIGEN_DEVICE_FUNC RealScalar squaredNorm() const;
|
||||
EIGEN_DEVICE_FUNC RealScalar norm() const;
|
||||
RealScalar stableNorm() const;
|
||||
RealScalar blueNorm() const;
|
||||
RealScalar hypotNorm() const;
|
||||
const PlainObject normalized() const;
|
||||
void normalize();
|
||||
EIGEN_DEVICE_FUNC const PlainObject normalized() const;
|
||||
EIGEN_DEVICE_FUNC void normalize();
|
||||
|
||||
const AdjointReturnType adjoint() const;
|
||||
void adjointInPlace();
|
||||
EIGEN_DEVICE_FUNC const AdjointReturnType adjoint() const;
|
||||
EIGEN_DEVICE_FUNC void adjointInPlace();
|
||||
|
||||
typedef Diagonal<Derived> DiagonalReturnType;
|
||||
EIGEN_DEVICE_FUNC
|
||||
DiagonalReturnType diagonal();
|
||||
typedef typename internal::add_const<Diagonal<const Derived> >::type ConstDiagonalReturnType;
|
||||
|
||||
typedef typename internal::add_const<Diagonal<const Derived> >::type ConstDiagonalReturnType;
|
||||
EIGEN_DEVICE_FUNC
|
||||
ConstDiagonalReturnType diagonal() const;
|
||||
|
||||
template<int Index> struct DiagonalIndexReturnType { typedef Diagonal<Derived,Index> Type; };
|
||||
template<int Index> struct ConstDiagonalIndexReturnType { typedef const Diagonal<const Derived,Index> Type; };
|
||||
|
||||
template<int Index> typename DiagonalIndexReturnType<Index>::Type diagonal();
|
||||
template<int Index> typename ConstDiagonalIndexReturnType<Index>::Type diagonal() const;
|
||||
template<int Index>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename DiagonalIndexReturnType<Index>::Type diagonal();
|
||||
|
||||
template<int Index>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename ConstDiagonalIndexReturnType<Index>::Type diagonal() const;
|
||||
|
||||
// Note: The "MatrixBase::" prefixes are added to help MSVC9 to match these declarations with the later implementations.
|
||||
// On the other hand they confuse MSVC8...
|
||||
@@ -230,48 +258,51 @@ template<typename Derived> class MatrixBase
|
||||
typename MatrixBase::template DiagonalIndexReturnType<DynamicIndex>::Type diagonal(Index index);
|
||||
typename MatrixBase::template ConstDiagonalIndexReturnType<DynamicIndex>::Type diagonal(Index index) const;
|
||||
#else
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename DiagonalIndexReturnType<DynamicIndex>::Type diagonal(Index index);
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename ConstDiagonalIndexReturnType<DynamicIndex>::Type diagonal(Index index) const;
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
template<unsigned int Mode> typename internal::eigen2_part_return_type<Derived, Mode>::type part();
|
||||
template<unsigned int Mode> const typename internal::eigen2_part_return_type<Derived, Mode>::type part() const;
|
||||
|
||||
// huuuge hack. make Eigen2's matrix.part<Diagonal>() work in eigen3. Problem: Diagonal is now a class template instead
|
||||
// of an integer constant. Solution: overload the part() method template wrt template parameters list.
|
||||
template<template<typename T, int N> class U>
|
||||
const DiagonalWrapper<ConstDiagonalReturnType> part() const
|
||||
{ return diagonal().asDiagonal(); }
|
||||
#endif // EIGEN2_SUPPORT
|
||||
|
||||
template<unsigned int Mode> struct TriangularViewReturnType { typedef TriangularView<Derived, Mode> Type; };
|
||||
template<unsigned int Mode> struct ConstTriangularViewReturnType { typedef const TriangularView<const Derived, Mode> Type; };
|
||||
|
||||
template<unsigned int Mode> typename TriangularViewReturnType<Mode>::Type triangularView();
|
||||
template<unsigned int Mode> typename ConstTriangularViewReturnType<Mode>::Type triangularView() const;
|
||||
template<unsigned int Mode>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename TriangularViewReturnType<Mode>::Type triangularView();
|
||||
template<unsigned int Mode>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename ConstTriangularViewReturnType<Mode>::Type triangularView() const;
|
||||
|
||||
template<unsigned int UpLo> struct SelfAdjointViewReturnType { typedef SelfAdjointView<Derived, UpLo> Type; };
|
||||
template<unsigned int UpLo> struct ConstSelfAdjointViewReturnType { typedef const SelfAdjointView<const Derived, UpLo> Type; };
|
||||
|
||||
template<unsigned int UpLo> typename SelfAdjointViewReturnType<UpLo>::Type selfadjointView();
|
||||
template<unsigned int UpLo> typename ConstSelfAdjointViewReturnType<UpLo>::Type selfadjointView() const;
|
||||
template<unsigned int UpLo>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename SelfAdjointViewReturnType<UpLo>::Type selfadjointView();
|
||||
template<unsigned int UpLo>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename ConstSelfAdjointViewReturnType<UpLo>::Type selfadjointView() const;
|
||||
|
||||
const SparseView<Derived> sparseView(const Scalar& m_reference = Scalar(0),
|
||||
const typename NumTraits<Scalar>::Real& m_epsilon = NumTraits<Scalar>::dummy_precision()) const;
|
||||
static const IdentityReturnType Identity();
|
||||
static const IdentityReturnType Identity(Index rows, Index cols);
|
||||
static const BasisReturnType Unit(Index size, Index i);
|
||||
static const BasisReturnType Unit(Index i);
|
||||
static const BasisReturnType UnitX();
|
||||
static const BasisReturnType UnitY();
|
||||
static const BasisReturnType UnitZ();
|
||||
static const BasisReturnType UnitW();
|
||||
EIGEN_DEVICE_FUNC static const IdentityReturnType Identity();
|
||||
EIGEN_DEVICE_FUNC static const IdentityReturnType Identity(Index rows, Index cols);
|
||||
EIGEN_DEVICE_FUNC static const BasisReturnType Unit(Index size, Index i);
|
||||
EIGEN_DEVICE_FUNC static const BasisReturnType Unit(Index i);
|
||||
EIGEN_DEVICE_FUNC static const BasisReturnType UnitX();
|
||||
EIGEN_DEVICE_FUNC static const BasisReturnType UnitY();
|
||||
EIGEN_DEVICE_FUNC static const BasisReturnType UnitZ();
|
||||
EIGEN_DEVICE_FUNC static const BasisReturnType UnitW();
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const DiagonalWrapper<const Derived> asDiagonal() const;
|
||||
const PermutationWrapper<const Derived> asPermutation() const;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& setIdentity();
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& setIdentity(Index rows, Index cols);
|
||||
|
||||
bool isIdentity(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
|
||||
@@ -310,42 +341,26 @@ template<typename Derived> class MatrixBase
|
||||
|
||||
Scalar trace() const;
|
||||
|
||||
/////////// Array module ///////////
|
||||
template<int p> EIGEN_DEVICE_FUNC RealScalar lpNorm() const;
|
||||
|
||||
template<int p> RealScalar lpNorm() const;
|
||||
|
||||
MatrixBase<Derived>& matrix() { return *this; }
|
||||
const MatrixBase<Derived>& matrix() const { return *this; }
|
||||
EIGEN_DEVICE_FUNC MatrixBase<Derived>& matrix() { return *this; }
|
||||
EIGEN_DEVICE_FUNC const MatrixBase<Derived>& matrix() const { return *this; }
|
||||
|
||||
/** \returns an \link Eigen::ArrayBase Array \endlink expression of this matrix
|
||||
* \sa ArrayBase::matrix() */
|
||||
ArrayWrapper<Derived> array() { return derived(); }
|
||||
const ArrayWrapper<const Derived> array() const { return derived(); }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ArrayWrapper<Derived> array() { return derived(); }
|
||||
/** \returns a const \link Eigen::ArrayBase Array \endlink expression of this matrix
|
||||
* \sa ArrayBase::matrix() */
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const ArrayWrapper<const Derived> array() const { return derived(); }
|
||||
|
||||
/////////// LU module ///////////
|
||||
|
||||
const FullPivLU<PlainObject> fullPivLu() const;
|
||||
const PartialPivLU<PlainObject> partialPivLu() const;
|
||||
EIGEN_DEVICE_FUNC const FullPivLU<PlainObject> fullPivLu() const;
|
||||
EIGEN_DEVICE_FUNC const PartialPivLU<PlainObject> partialPivLu() const;
|
||||
|
||||
#if EIGEN2_SUPPORT_STAGE < STAGE20_RESOLVE_API_CONFLICTS
|
||||
const LU<PlainObject> lu() const;
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
const LU<PlainObject> eigen2_lu() const;
|
||||
#endif
|
||||
|
||||
#if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS
|
||||
const PartialPivLU<PlainObject> lu() const;
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
template<typename ResultType>
|
||||
void computeInverse(MatrixBase<ResultType> *result) const {
|
||||
*result = this->inverse();
|
||||
}
|
||||
#endif
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const internal::inverse_impl<Derived> inverse() const;
|
||||
template<typename ResultType>
|
||||
void computeInverseAndDetWithCheck(
|
||||
@@ -372,10 +387,6 @@ template<typename Derived> class MatrixBase
|
||||
const HouseholderQR<PlainObject> householderQr() const;
|
||||
const ColPivHouseholderQR<PlainObject> colPivHouseholderQr() const;
|
||||
const FullPivHouseholderQR<PlainObject> fullPivHouseholderQr() const;
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
const QR<PlainObject> qr() const;
|
||||
#endif
|
||||
|
||||
EigenvaluesReturnType eigenvalues() const;
|
||||
RealScalar operatorNorm() const;
|
||||
@@ -384,10 +395,6 @@ template<typename Derived> class MatrixBase
|
||||
|
||||
JacobiSVD<PlainObject> jacobiSvd(unsigned int computationOptions = 0) const;
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
SVD<PlainObject> svd() const;
|
||||
#endif
|
||||
|
||||
/////////// Geometry module ///////////
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
@@ -398,20 +405,24 @@ template<typename Derived> class MatrixBase
|
||||
};
|
||||
#endif // EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename cross_product_return_type<OtherDerived>::type
|
||||
cross(const MatrixBase<OtherDerived>& other) const;
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
PlainObject cross3(const MatrixBase<OtherDerived>& other) const;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
PlainObject unitOrthogonal(void) const;
|
||||
|
||||
Matrix<Scalar,3,1> eulerAngles(Index a0, Index a1, Index a2) const;
|
||||
|
||||
#if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS
|
||||
ScalarMultipleReturnType operator*(const UniformScaling<Scalar>& s) const;
|
||||
// put this as separate enum value to work around possible GCC 4.3 bug (?)
|
||||
enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1?Vertical:Horizontal };
|
||||
typedef Homogeneous<Derived, HomogeneousReturnTypeDirection> HomogeneousReturnType;
|
||||
HomogeneousReturnType homogeneous() const;
|
||||
#endif
|
||||
|
||||
enum {
|
||||
SizeMinusOne = SizeAtCompileTime==Dynamic ? Dynamic : SizeAtCompileTime-1
|
||||
@@ -458,49 +469,15 @@ template<typename Derived> class MatrixBase
|
||||
const MatrixSquareRootReturnValue<Derived> sqrt() const;
|
||||
const MatrixLogarithmReturnValue<Derived> log() const;
|
||||
const MatrixPowerReturnValue<Derived> pow(const RealScalar& p) const;
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
template<typename ProductDerived, typename Lhs, typename Rhs>
|
||||
Derived& operator+=(const Flagged<ProductBase<ProductDerived, Lhs,Rhs>, 0,
|
||||
EvalBeforeAssigningBit>& other);
|
||||
|
||||
template<typename ProductDerived, typename Lhs, typename Rhs>
|
||||
Derived& operator-=(const Flagged<ProductBase<ProductDerived, Lhs,Rhs>, 0,
|
||||
EvalBeforeAssigningBit>& other);
|
||||
|
||||
/** \deprecated because .lazy() is deprecated
|
||||
* Overloaded for cache friendly product evaluation */
|
||||
template<typename OtherDerived>
|
||||
Derived& lazyAssign(const Flagged<OtherDerived, 0, EvalBeforeAssigningBit>& other)
|
||||
{ return lazyAssign(other._expression()); }
|
||||
|
||||
template<unsigned int Added>
|
||||
const Flagged<Derived, Added, 0> marked() const;
|
||||
const Flagged<Derived, 0, EvalBeforeAssigningBit> lazy() const;
|
||||
|
||||
inline const Cwise<Derived> cwise() const;
|
||||
inline Cwise<Derived> cwise();
|
||||
|
||||
VectorBlock<Derived> start(Index size);
|
||||
const VectorBlock<const Derived> start(Index size) const;
|
||||
VectorBlock<Derived> end(Index size);
|
||||
const VectorBlock<const Derived> end(Index size) const;
|
||||
template<int Size> VectorBlock<Derived,Size> start();
|
||||
template<int Size> const VectorBlock<const Derived,Size> start() const;
|
||||
template<int Size> VectorBlock<Derived,Size> end();
|
||||
template<int Size> const VectorBlock<const Derived,Size> end() const;
|
||||
|
||||
Minor<Derived> minor(Index row, Index col);
|
||||
const Minor<Derived> minor(Index row, Index col) const;
|
||||
#endif
|
||||
const MatrixComplexPowerReturnValue<Derived> pow(const std::complex<RealScalar>& p) const;
|
||||
|
||||
protected:
|
||||
MatrixBase() : Base() {}
|
||||
EIGEN_DEVICE_FUNC MatrixBase() : Base() {}
|
||||
|
||||
private:
|
||||
explicit MatrixBase(int);
|
||||
MatrixBase(int,int);
|
||||
template<typename OtherDerived> explicit MatrixBase(const MatrixBase<OtherDerived>&);
|
||||
EIGEN_DEVICE_FUNC explicit MatrixBase(int);
|
||||
EIGEN_DEVICE_FUNC MatrixBase(int,int);
|
||||
template<typename OtherDerived> EIGEN_DEVICE_FUNC explicit MatrixBase(const MatrixBase<OtherDerived>&);
|
||||
protected:
|
||||
// mixing arrays and matrices is not legal
|
||||
template<typename OtherDerived> Derived& operator+=(const ArrayBase<OtherDerived>& )
|
||||
@@ -510,6 +487,51 @@ template<typename Derived> class MatrixBase
|
||||
{EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
|
||||
};
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
* Implementation of matrix base methods
|
||||
***************************************************************************/
|
||||
|
||||
/** replaces \c *this by \c *this * \a other.
|
||||
*
|
||||
* \returns a reference to \c *this
|
||||
*
|
||||
* Example: \include MatrixBase_applyOnTheRight.cpp
|
||||
* Output: \verbinclude MatrixBase_applyOnTheRight.out
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
inline Derived&
|
||||
MatrixBase<Derived>::operator*=(const EigenBase<OtherDerived> &other)
|
||||
{
|
||||
other.derived().applyThisOnTheRight(derived());
|
||||
return derived();
|
||||
}
|
||||
|
||||
/** replaces \c *this by \c *this * \a other. It is equivalent to MatrixBase::operator*=().
|
||||
*
|
||||
* Example: \include MatrixBase_applyOnTheRight.cpp
|
||||
* Output: \verbinclude MatrixBase_applyOnTheRight.out
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
inline void MatrixBase<Derived>::applyOnTheRight(const EigenBase<OtherDerived> &other)
|
||||
{
|
||||
other.derived().applyThisOnTheRight(derived());
|
||||
}
|
||||
|
||||
/** replaces \c *this by \a other * \c *this.
|
||||
*
|
||||
* Example: \include MatrixBase_applyOnTheLeft.cpp
|
||||
* Output: \verbinclude MatrixBase_applyOnTheLeft.out
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
inline void MatrixBase<Derived>::applyOnTheLeft(const EigenBase<OtherDerived> &other)
|
||||
{
|
||||
other.derived().applyThisOnTheLeft(derived());
|
||||
}
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_MATRIXBASE_H
|
||||
|
||||
@@ -37,11 +37,13 @@ class NoAlias
|
||||
/** Behaves like MatrixBase::lazyAssign(other)
|
||||
* \sa MatrixBase::lazyAssign() */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE ExpressionType& operator=(const StorageBase<OtherDerived>& other)
|
||||
{ return internal::assign_selector<ExpressionType,OtherDerived,false>::run(m_expression,other.derived()); }
|
||||
|
||||
/** \sa MatrixBase::operator+= */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE ExpressionType& operator+=(const StorageBase<OtherDerived>& other)
|
||||
{
|
||||
typedef SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, ExpressionType, OtherDerived> SelfAdder;
|
||||
@@ -54,6 +56,7 @@ class NoAlias
|
||||
|
||||
/** \sa MatrixBase::operator-= */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE ExpressionType& operator-=(const StorageBase<OtherDerived>& other)
|
||||
{
|
||||
typedef SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, ExpressionType, OtherDerived> SelfAdder;
|
||||
@@ -66,10 +69,12 @@ class NoAlias
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename ProductDerived, typename Lhs, typename Rhs>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE ExpressionType& operator+=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
|
||||
{ other.derived().addTo(m_expression); return m_expression; }
|
||||
|
||||
template<typename ProductDerived, typename Lhs, typename Rhs>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE ExpressionType& operator-=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
|
||||
{ other.derived().subTo(m_expression); return m_expression; }
|
||||
|
||||
@@ -78,6 +83,7 @@ class NoAlias
|
||||
{ return m_expression.derived() += CoeffBasedProduct<Lhs,Rhs,NestByRefBit>(other.lhs(), other.rhs()); }
|
||||
|
||||
template<typename Lhs, typename Rhs, int NestingFlags>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE ExpressionType& operator-=(const CoeffBasedProduct<Lhs,Rhs,NestingFlags>& other)
|
||||
{ return m_expression.derived() -= CoeffBasedProduct<Lhs,Rhs,NestByRefBit>(other.lhs(), other.rhs()); }
|
||||
|
||||
@@ -86,6 +92,7 @@ class NoAlias
|
||||
{ return m_expression = func; }
|
||||
#endif
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
ExpressionType& expression() const
|
||||
{
|
||||
return m_expression;
|
||||
|
||||
@@ -68,7 +68,16 @@ template<typename T> struct GenericNumTraits
|
||||
>::type NonInteger;
|
||||
typedef T Nested;
|
||||
|
||||
static inline Real epsilon() { return std::numeric_limits<T>::epsilon(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline Real epsilon()
|
||||
{
|
||||
#if defined(__CUDA_ARCH__)
|
||||
return internal::device::numeric_limits<T>::epsilon();
|
||||
#else
|
||||
return std::numeric_limits<T>::epsilon();
|
||||
#endif
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline Real dummy_precision()
|
||||
{
|
||||
// make sure to override this for floating-point types
|
||||
@@ -76,13 +85,6 @@ template<typename T> struct GenericNumTraits
|
||||
}
|
||||
static inline T highest() { return (std::numeric_limits<T>::max)(); }
|
||||
static inline T lowest() { return IsInteger ? (std::numeric_limits<T>::min)() : (-(std::numeric_limits<T>::max)()); }
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
enum {
|
||||
HasFloatingPoint = !IsInteger
|
||||
};
|
||||
typedef NonInteger FloatingPoint;
|
||||
#endif
|
||||
};
|
||||
|
||||
template<typename T> struct NumTraits : GenericNumTraits<T>
|
||||
@@ -91,11 +93,13 @@ template<typename T> struct NumTraits : GenericNumTraits<T>
|
||||
template<> struct NumTraits<float>
|
||||
: GenericNumTraits<float>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline float dummy_precision() { return 1e-5f; }
|
||||
};
|
||||
|
||||
template<> struct NumTraits<double> : GenericNumTraits<double>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline double dummy_precision() { return 1e-12; }
|
||||
};
|
||||
|
||||
|
||||
@@ -66,11 +66,11 @@ class PermutationBase : public EigenBase<Derived>
|
||||
MaxRowsAtCompileTime = Traits::MaxRowsAtCompileTime,
|
||||
MaxColsAtCompileTime = Traits::MaxColsAtCompileTime
|
||||
};
|
||||
typedef typename Traits::Scalar Scalar;
|
||||
typedef typename Traits::StorageIndexType StorageIndexType;
|
||||
typedef typename Traits::Index Index;
|
||||
typedef Matrix<Scalar,RowsAtCompileTime,ColsAtCompileTime,0,MaxRowsAtCompileTime,MaxColsAtCompileTime>
|
||||
typedef Matrix<StorageIndexType,RowsAtCompileTime,ColsAtCompileTime,0,MaxRowsAtCompileTime,MaxColsAtCompileTime>
|
||||
DenseMatrixType;
|
||||
typedef PermutationMatrix<IndicesType::SizeAtCompileTime,IndicesType::MaxSizeAtCompileTime,Index>
|
||||
typedef PermutationMatrix<IndicesType::SizeAtCompileTime,IndicesType::MaxSizeAtCompileTime,StorageIndexType>
|
||||
PlainPermutationType;
|
||||
using Base::derived;
|
||||
#endif
|
||||
@@ -147,7 +147,7 @@ class PermutationBase : public EigenBase<Derived>
|
||||
/** Sets *this to be the identity permutation matrix */
|
||||
void setIdentity()
|
||||
{
|
||||
for(Index i = 0; i < size(); ++i)
|
||||
for(StorageIndexType i = 0; i < size(); ++i)
|
||||
indices().coeffRef(i) = i;
|
||||
}
|
||||
|
||||
@@ -173,8 +173,8 @@ class PermutationBase : public EigenBase<Derived>
|
||||
eigen_assert(i>=0 && j>=0 && i<size() && j<size());
|
||||
for(Index k = 0; k < size(); ++k)
|
||||
{
|
||||
if(indices().coeff(k) == i) indices().coeffRef(k) = j;
|
||||
else if(indices().coeff(k) == j) indices().coeffRef(k) = i;
|
||||
if(indices().coeff(k) == i) indices().coeffRef(k) = StorageIndexType(j);
|
||||
else if(indices().coeff(k) == j) indices().coeffRef(k) = StorageIndexType(i);
|
||||
}
|
||||
return derived();
|
||||
}
|
||||
@@ -262,7 +262,7 @@ class PermutationBase : public EigenBase<Derived>
|
||||
*
|
||||
* \param SizeAtCompileTime the number of rows/cols, or Dynamic
|
||||
* \param MaxSizeAtCompileTime the maximum number of rows/cols, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it.
|
||||
* \param IndexType the interger type of the indices
|
||||
* \param StorageIndexType the integer type of the indices
|
||||
*
|
||||
* This class represents a permutation matrix, internally stored as a vector of integers.
|
||||
*
|
||||
@@ -270,17 +270,18 @@ class PermutationBase : public EigenBase<Derived>
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType>
|
||||
struct traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType> >
|
||||
: traits<Matrix<IndexType,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndexType>
|
||||
struct traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndexType> >
|
||||
: traits<Matrix<_StorageIndexType,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
|
||||
{
|
||||
typedef IndexType Index;
|
||||
typedef Matrix<IndexType, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType;
|
||||
typedef Matrix<_StorageIndexType, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType;
|
||||
typedef typename IndicesType::Index Index;
|
||||
typedef _StorageIndexType StorageIndexType;
|
||||
};
|
||||
}
|
||||
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType>
|
||||
class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType> >
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndexType>
|
||||
class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndexType> >
|
||||
{
|
||||
typedef PermutationBase<PermutationMatrix> Base;
|
||||
typedef internal::traits<PermutationMatrix> Traits;
|
||||
@@ -288,6 +289,8 @@ class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompile
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
typedef typename Traits::IndicesType IndicesType;
|
||||
typedef typename Traits::StorageIndexType StorageIndexType;
|
||||
typedef typename Traits::Index Index;
|
||||
#endif
|
||||
|
||||
inline PermutationMatrix()
|
||||
@@ -295,7 +298,7 @@ class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompile
|
||||
|
||||
/** Constructs an uninitialized permutation matrix of given size.
|
||||
*/
|
||||
inline PermutationMatrix(int size) : m_indices(size)
|
||||
inline PermutationMatrix(Index size) : m_indices(size)
|
||||
{}
|
||||
|
||||
/** Copy constructor. */
|
||||
@@ -384,18 +387,19 @@ class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompile
|
||||
|
||||
|
||||
namespace internal {
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType, int _PacketAccess>
|
||||
struct traits<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType>,_PacketAccess> >
|
||||
: traits<Matrix<IndexType,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndexType, int _PacketAccess>
|
||||
struct traits<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndexType>,_PacketAccess> >
|
||||
: traits<Matrix<_StorageIndexType,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
|
||||
{
|
||||
typedef IndexType Index;
|
||||
typedef Map<const Matrix<IndexType, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1>, _PacketAccess> IndicesType;
|
||||
typedef Map<const Matrix<_StorageIndexType, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1>, _PacketAccess> IndicesType;
|
||||
typedef typename IndicesType::Index Index;
|
||||
typedef _StorageIndexType StorageIndexType;
|
||||
};
|
||||
}
|
||||
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType, int _PacketAccess>
|
||||
class Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType>,_PacketAccess>
|
||||
: public PermutationBase<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType>,_PacketAccess> >
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndexType, int _PacketAccess>
|
||||
class Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndexType>,_PacketAccess>
|
||||
: public PermutationBase<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndexType>,_PacketAccess> >
|
||||
{
|
||||
typedef PermutationBase<Map> Base;
|
||||
typedef internal::traits<Map> Traits;
|
||||
@@ -403,14 +407,15 @@ class Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType>,
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
typedef typename Traits::IndicesType IndicesType;
|
||||
typedef typename IndicesType::Scalar Index;
|
||||
typedef typename IndicesType::Scalar StorageIndexType;
|
||||
typedef typename IndicesType::Index Index;
|
||||
#endif
|
||||
|
||||
inline Map(const Index* indicesPtr)
|
||||
inline Map(const StorageIndexType* indicesPtr)
|
||||
: m_indices(indicesPtr)
|
||||
{}
|
||||
|
||||
inline Map(const Index* indicesPtr, Index size)
|
||||
inline Map(const StorageIndexType* indicesPtr, Index size)
|
||||
: m_indices(indicesPtr,size)
|
||||
{}
|
||||
|
||||
@@ -466,7 +471,8 @@ struct traits<PermutationWrapper<_IndicesType> >
|
||||
{
|
||||
typedef PermutationStorage StorageKind;
|
||||
typedef typename _IndicesType::Scalar Scalar;
|
||||
typedef typename _IndicesType::Scalar Index;
|
||||
typedef typename _IndicesType::Scalar StorageIndexType;
|
||||
typedef typename _IndicesType::Index Index;
|
||||
typedef _IndicesType IndicesType;
|
||||
enum {
|
||||
RowsAtCompileTime = _IndicesType::SizeAtCompileTime,
|
||||
@@ -553,7 +559,8 @@ struct permut_matrix_product_retval
|
||||
template<typename Dest> inline void evalTo(Dest& dst) const
|
||||
{
|
||||
const Index n = Side==OnTheLeft ? rows() : cols();
|
||||
|
||||
// FIXME we need an is_same for expression that is not sensitive to constness. For instance
|
||||
// is_same_xpr<Block<const Matrix>, Block<Matrix> >::value should be true.
|
||||
if(is_same<MatrixTypeNestedCleaned,Dest>::value && extract_data(dst) == extract_data(m_matrix))
|
||||
{
|
||||
// apply the permutation inplace
|
||||
|
||||
@@ -28,6 +28,7 @@ namespace internal {
|
||||
|
||||
template<int MaxSizeAtCompileTime> struct check_rows_cols_for_overflow {
|
||||
template<typename Index>
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_ALWAYS_INLINE void run(Index, Index)
|
||||
{
|
||||
}
|
||||
@@ -35,6 +36,7 @@ template<int MaxSizeAtCompileTime> struct check_rows_cols_for_overflow {
|
||||
|
||||
template<> struct check_rows_cols_for_overflow<Dynamic> {
|
||||
template<typename Index>
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_ALWAYS_INLINE void run(Index rows, Index cols)
|
||||
{
|
||||
// http://hg.mozilla.org/mozilla-central/file/6c8a909977d3/xpcom/ds/CheckedInt.h#l242
|
||||
@@ -47,7 +49,10 @@ template<> struct check_rows_cols_for_overflow<Dynamic> {
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Derived, typename OtherDerived = Derived, bool IsVector = bool(Derived::IsVectorAtCompileTime)> struct conservative_resize_like_impl;
|
||||
template <typename Derived,
|
||||
typename OtherDerived = Derived,
|
||||
bool IsVector = bool(Derived::IsVectorAtCompileTime) && bool(OtherDerived::IsVectorAtCompileTime)>
|
||||
struct conservative_resize_like_impl;
|
||||
|
||||
template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers> struct matrix_swap_impl;
|
||||
|
||||
@@ -126,12 +131,17 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
enum { NeedsToAlign = SizeAtCompileTime != Dynamic && (internal::traits<Derived>::Flags & AlignedBit) != 0 };
|
||||
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
Base& base() { return *static_cast<Base*>(this); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
const Base& base() const { return *static_cast<const Base*>(this); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index rows() const { return m_storage.rows(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index cols() const { return m_storage.cols(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar& coeff(Index rowId, Index colId) const
|
||||
{
|
||||
if(Flags & RowMajorBit)
|
||||
@@ -140,11 +150,13 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
return m_storage.data()[rowId + colId * m_storage.rows()];
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const
|
||||
{
|
||||
return m_storage.data()[index];
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar& coeffRef(Index rowId, Index colId)
|
||||
{
|
||||
if(Flags & RowMajorBit)
|
||||
@@ -153,11 +165,13 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
return m_storage.data()[rowId + colId * m_storage.rows()];
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar& coeffRef(Index index)
|
||||
{
|
||||
return m_storage.data()[index];
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar& coeffRef(Index rowId, Index colId) const
|
||||
{
|
||||
if(Flags & RowMajorBit)
|
||||
@@ -166,6 +180,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
return m_storage.data()[rowId + colId * m_storage.rows()];
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar& coeffRef(Index index) const
|
||||
{
|
||||
return m_storage.data()[index];
|
||||
@@ -229,6 +244,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
*
|
||||
* \sa resize(Index) for vectors, resize(NoChange_t, Index), resize(Index, NoChange_t)
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void resize(Index nbRows, Index nbCols)
|
||||
{
|
||||
eigen_assert( EIGEN_IMPLIES(RowsAtCompileTime!=Dynamic,nbRows==RowsAtCompileTime)
|
||||
@@ -259,6 +275,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
*
|
||||
* \sa resize(Index,Index), resize(NoChange_t, Index), resize(Index, NoChange_t)
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void resize(Index size)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(PlainObjectBase)
|
||||
@@ -283,6 +300,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
*
|
||||
* \sa resize(Index,Index)
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void resize(NoChange_t, Index nbCols)
|
||||
{
|
||||
resize(rows(), nbCols);
|
||||
@@ -296,6 +314,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
*
|
||||
* \sa resize(Index,Index)
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void resize(Index nbRows, NoChange_t)
|
||||
{
|
||||
resize(nbRows, cols());
|
||||
@@ -309,6 +328,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
* remain row-vectors and vectors remain vectors.
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void resizeLike(const EigenBase<OtherDerived>& _other)
|
||||
{
|
||||
const OtherDerived& other = _other.derived();
|
||||
@@ -336,6 +356,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
* Matrices are resized relative to the top-left element. In case values need to be
|
||||
* appended to the matrix they will be uninitialized.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void conservativeResize(Index nbRows, Index nbCols)
|
||||
{
|
||||
internal::conservative_resize_like_impl<Derived>::run(*this, nbRows, nbCols);
|
||||
@@ -348,6 +369,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
*
|
||||
* In case the matrix is growing, new rows will be uninitialized.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void conservativeResize(Index nbRows, NoChange_t)
|
||||
{
|
||||
// Note: see the comment in conservativeResize(Index,Index)
|
||||
@@ -361,6 +383,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
*
|
||||
* In case the matrix is growing, new columns will be uninitialized.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void conservativeResize(NoChange_t, Index nbCols)
|
||||
{
|
||||
// Note: see the comment in conservativeResize(Index,Index)
|
||||
@@ -375,6 +398,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
*
|
||||
* When values are appended, they will be uninitialized.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void conservativeResize(Index size)
|
||||
{
|
||||
internal::conservative_resize_like_impl<Derived>::run(*this, size);
|
||||
@@ -390,6 +414,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
* appended to the matrix they will copied from \c other.
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void conservativeResizeLike(const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
internal::conservative_resize_like_impl<Derived,OtherDerived>::run(*this, other);
|
||||
@@ -398,6 +423,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
/** This is a special case of the templated operator=. Its purpose is to
|
||||
* prevent a default operator= from hiding the templated operator=.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Derived& operator=(const PlainObjectBase& other)
|
||||
{
|
||||
return _set(other);
|
||||
@@ -405,6 +431,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
|
||||
/** \sa MatrixBase::lazyAssign() */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Derived& lazyAssign(const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
_resize_to_match(other);
|
||||
@@ -412,12 +439,14 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
}
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Derived& operator=(const ReturnByValue<OtherDerived>& func)
|
||||
{
|
||||
resize(func.rows(), func.cols());
|
||||
return Base::operator=(func);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE PlainObjectBase() : m_storage()
|
||||
{
|
||||
// _check_template_params();
|
||||
@@ -427,6 +456,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
// FIXME is it still needed ?
|
||||
/** \internal */
|
||||
EIGEN_DEVICE_FUNC
|
||||
PlainObjectBase(internal::constructor_without_unaligned_array_assert)
|
||||
: m_storage(internal::constructor_without_unaligned_array_assert())
|
||||
{
|
||||
@@ -434,6 +464,23 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN_HAVE_RVALUE_REFERENCES
|
||||
EIGEN_DEVICE_FUNC
|
||||
PlainObjectBase(PlainObjectBase&& other)
|
||||
: m_storage( std::move(other.m_storage) )
|
||||
{
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
PlainObjectBase& operator=(PlainObjectBase&& other)
|
||||
{
|
||||
using std::swap;
|
||||
swap(m_storage, other.m_storage);
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE PlainObjectBase(Index a_size, Index nbRows, Index nbCols)
|
||||
: m_storage(a_size, nbRows, nbCols)
|
||||
{
|
||||
@@ -444,6 +491,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
/** \copydoc MatrixBase::operator=(const EigenBase<OtherDerived>&)
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Derived& operator=(const EigenBase<OtherDerived> &other)
|
||||
{
|
||||
_resize_to_match(other);
|
||||
@@ -453,6 +501,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
|
||||
/** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE PlainObjectBase(const EigenBase<OtherDerived> &other)
|
||||
: m_storage(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols())
|
||||
{
|
||||
@@ -535,16 +584,16 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
//@}
|
||||
|
||||
using Base::setConstant;
|
||||
Derived& setConstant(Index size, const Scalar& value);
|
||||
Derived& setConstant(Index rows, Index cols, const Scalar& value);
|
||||
EIGEN_DEVICE_FUNC Derived& setConstant(Index size, const Scalar& value);
|
||||
EIGEN_DEVICE_FUNC Derived& setConstant(Index rows, Index cols, const Scalar& value);
|
||||
|
||||
using Base::setZero;
|
||||
Derived& setZero(Index size);
|
||||
Derived& setZero(Index rows, Index cols);
|
||||
EIGEN_DEVICE_FUNC Derived& setZero(Index size);
|
||||
EIGEN_DEVICE_FUNC Derived& setZero(Index rows, Index cols);
|
||||
|
||||
using Base::setOnes;
|
||||
Derived& setOnes(Index size);
|
||||
Derived& setOnes(Index rows, Index cols);
|
||||
EIGEN_DEVICE_FUNC Derived& setOnes(Index size);
|
||||
EIGEN_DEVICE_FUNC Derived& setOnes(Index rows, Index cols);
|
||||
|
||||
using Base::setRandom;
|
||||
Derived& setRandom(Index size);
|
||||
@@ -563,6 +612,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
* remain row-vectors and vectors remain vectors.
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void _resize_to_match(const EigenBase<OtherDerived>& other)
|
||||
{
|
||||
#ifdef EIGEN_NO_AUTOMATIC_RESIZING
|
||||
@@ -590,6 +640,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
* \internal
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Derived& _set(const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
_set_selector(other.derived(), typename internal::conditional<static_cast<bool>(int(OtherDerived::Flags) & EvalBeforeAssigningBit), internal::true_type, internal::false_type>::type());
|
||||
@@ -597,9 +648,11 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
}
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void _set_selector(const OtherDerived& other, const internal::true_type&) { _set_noalias(other.eval()); }
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void _set_selector(const OtherDerived& other, const internal::false_type&) { _set_noalias(other); }
|
||||
|
||||
/** \internal Like _set() but additionally makes the assumption that no aliasing effect can happen (which
|
||||
@@ -608,6 +661,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
* \sa operator=(const MatrixBase<OtherDerived>&), _set()
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Derived& _set_noalias(const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
// I don't think we need this resize call since the lazyAssign will anyways resize
|
||||
@@ -619,6 +673,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
}
|
||||
|
||||
template<typename T0, typename T1>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void _init2(Index nbRows, Index nbCols, typename internal::enable_if<Base::SizeAtCompileTime!=2,T0>::type* = 0)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(bool(NumTraits<T0>::IsInteger) &&
|
||||
@@ -626,13 +681,91 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED)
|
||||
resize(nbRows,nbCols);
|
||||
}
|
||||
|
||||
template<typename T0, typename T1>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void _init2(const Scalar& val0, const Scalar& val1, typename internal::enable_if<Base::SizeAtCompileTime==2,T0>::type* = 0)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2)
|
||||
m_storage.data()[0] = val0;
|
||||
m_storage.data()[1] = val1;
|
||||
}
|
||||
|
||||
template<typename T0, typename T1>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void _init2(const Index& val0, const Index& val1,
|
||||
typename internal::enable_if< (!internal::is_same<Index,Scalar>::value)
|
||||
&& (internal::is_same<T0,Index>::value)
|
||||
&& (internal::is_same<T1,Index>::value)
|
||||
&& Base::SizeAtCompileTime==2,T1>::type* = 0)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2)
|
||||
m_storage.data()[0] = Scalar(val0);
|
||||
m_storage.data()[1] = Scalar(val1);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void _init1(Index size, typename internal::enable_if<Base::SizeAtCompileTime!=1 || !internal::is_convertible<T, Scalar>::value,T>::type* = 0)
|
||||
{
|
||||
// NOTE MSVC 2008 complains if we directly put bool(NumTraits<T>::IsInteger) as the EIGEN_STATIC_ASSERT argument.
|
||||
const bool is_integer = NumTraits<T>::IsInteger;
|
||||
EIGEN_STATIC_ASSERT(is_integer,
|
||||
FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED)
|
||||
resize(size);
|
||||
}
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void _init1(const Scalar& val0, typename internal::enable_if<Base::SizeAtCompileTime==1 && internal::is_convertible<T, Scalar>::value,T>::type* = 0)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 1)
|
||||
m_storage.data()[0] = val0;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void _init1(const Index& val0,
|
||||
typename internal::enable_if< (!internal::is_same<Index,Scalar>::value)
|
||||
&& (internal::is_same<Index,T>::value)
|
||||
&& Base::SizeAtCompileTime==1
|
||||
&& internal::is_convertible<T, Scalar>::value,T*>::type* = 0)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 1)
|
||||
m_storage.data()[0] = Scalar(val0);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void _init1(const Scalar* data){
|
||||
this->_set_noalias(ConstMapType(data));
|
||||
}
|
||||
|
||||
template<typename T, typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void _init1(const DenseBase<OtherDerived>& other){
|
||||
this->_set_noalias(other);
|
||||
}
|
||||
|
||||
template<typename T, typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void _init1(const EigenBase<OtherDerived>& other){
|
||||
this->derived() = other;
|
||||
}
|
||||
|
||||
template<typename T, typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void _init1(const ReturnByValue<OtherDerived>& other)
|
||||
{
|
||||
resize(other.rows(), other.cols());
|
||||
other.evalTo(this->derived());
|
||||
}
|
||||
|
||||
template<typename T, typename OtherDerived, int ColsAtCompileTime>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void _init1(const RotationBase<OtherDerived,ColsAtCompileTime>& r)
|
||||
{
|
||||
this->derived() = r;
|
||||
}
|
||||
|
||||
template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers>
|
||||
friend struct internal::matrix_swap_impl;
|
||||
@@ -641,6 +774,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
* data pointers.
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void _swap(DenseBase<OtherDerived> const & other)
|
||||
{
|
||||
enum { SwapPointers = internal::is_same<Derived, OtherDerived>::value && Base::SizeAtCompileTime==Dynamic };
|
||||
@@ -649,6 +783,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
|
||||
public:
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE void _check_template_params()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, (Options&RowMajor)==RowMajor)
|
||||
@@ -668,8 +803,10 @@ private:
|
||||
enum { ThisConstantIsPrivateInPlainObjectBase };
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
|
||||
template <typename Derived, typename OtherDerived, bool IsVector>
|
||||
struct internal::conservative_resize_like_impl
|
||||
struct conservative_resize_like_impl
|
||||
{
|
||||
typedef typename Derived::Index Index;
|
||||
static void run(DenseBase<Derived>& _this, Index rows, Index cols)
|
||||
@@ -729,11 +866,14 @@ struct internal::conservative_resize_like_impl
|
||||
}
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
|
||||
// Here, the specialization for vectors inherits from the general matrix case
|
||||
// to allow calling .conservativeResize(rows,cols) on vectors.
|
||||
template <typename Derived, typename OtherDerived>
|
||||
struct conservative_resize_like_impl<Derived,OtherDerived,true>
|
||||
: conservative_resize_like_impl<Derived,OtherDerived,false>
|
||||
{
|
||||
using conservative_resize_like_impl<Derived,OtherDerived,false>::run;
|
||||
|
||||
typedef typename Derived::Index Index;
|
||||
static void run(DenseBase<Derived>& _this, Index size)
|
||||
{
|
||||
@@ -760,6 +900,7 @@ struct conservative_resize_like_impl<Derived,OtherDerived,true>
|
||||
template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers>
|
||||
struct matrix_swap_impl
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(MatrixTypeA& a, MatrixTypeB& b)
|
||||
{
|
||||
a.base().swap(b);
|
||||
@@ -769,6 +910,7 @@ struct matrix_swap_impl
|
||||
template<typename MatrixTypeA, typename MatrixTypeB>
|
||||
struct matrix_swap_impl<MatrixTypeA, MatrixTypeB, true>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(MatrixTypeA& a, MatrixTypeB& b)
|
||||
{
|
||||
static_cast<typename MatrixTypeA::Base&>(a).m_storage.swap(static_cast<typename MatrixTypeB::Base&>(b).m_storage);
|
||||
|
||||
107
Eigen/src/Core/Product.h
Normal file
107
Eigen/src/Core/Product.h
Normal file
@@ -0,0 +1,107 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_PRODUCT_H
|
||||
#define EIGEN_PRODUCT_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
template<typename Lhs, typename Rhs> class Product;
|
||||
template<typename Lhs, typename Rhs, typename StorageKind> class ProductImpl;
|
||||
|
||||
/** \class Product
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of the product of two arbitrary matrices or vectors
|
||||
*
|
||||
* \param Lhs the type of the left-hand side expression
|
||||
* \param Rhs the type of the right-hand side expression
|
||||
*
|
||||
* This class represents an expression of the product of two arbitrary matrices.
|
||||
*
|
||||
*/
|
||||
|
||||
// Use ProductReturnType to get correct traits, in particular vectorization flags
|
||||
namespace internal {
|
||||
template<typename Lhs, typename Rhs>
|
||||
struct traits<Product<Lhs, Rhs> >
|
||||
: traits<typename ProductReturnType<Lhs, Rhs>::Type>
|
||||
{
|
||||
// We want A+B*C to be of type Product<Matrix, Sum> and not Product<Matrix, Matrix>
|
||||
// TODO: This flag should eventually go in a separate evaluator traits class
|
||||
enum {
|
||||
Flags = traits<typename ProductReturnType<Lhs, Rhs>::Type>::Flags & ~(EvalBeforeNestingBit | DirectAccessBit)
|
||||
};
|
||||
};
|
||||
} // end namespace internal
|
||||
|
||||
|
||||
template<typename Lhs, typename Rhs>
|
||||
class Product : public ProductImpl<Lhs,Rhs,typename internal::promote_storage_type<typename internal::traits<Lhs>::StorageKind,
|
||||
typename internal::traits<Rhs>::StorageKind>::ret>
|
||||
{
|
||||
public:
|
||||
|
||||
typedef typename ProductImpl<
|
||||
Lhs, Rhs,
|
||||
typename internal::promote_storage_type<typename Lhs::StorageKind,
|
||||
typename Rhs::StorageKind>::ret>::Base Base;
|
||||
EIGEN_GENERIC_PUBLIC_INTERFACE(Product)
|
||||
|
||||
typedef typename Lhs::Nested LhsNested;
|
||||
typedef typename Rhs::Nested RhsNested;
|
||||
typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
|
||||
typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
|
||||
|
||||
Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs)
|
||||
{
|
||||
eigen_assert(lhs.cols() == rhs.rows()
|
||||
&& "invalid matrix product"
|
||||
&& "if you wanted a coeff-wise or a dot product use the respective explicit functions");
|
||||
}
|
||||
|
||||
inline Index rows() const { return m_lhs.rows(); }
|
||||
inline Index cols() const { return m_rhs.cols(); }
|
||||
|
||||
const LhsNestedCleaned& lhs() const { return m_lhs; }
|
||||
const RhsNestedCleaned& rhs() const { return m_rhs; }
|
||||
|
||||
protected:
|
||||
|
||||
LhsNested m_lhs;
|
||||
RhsNested m_rhs;
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs>
|
||||
class ProductImpl<Lhs,Rhs,Dense> : public internal::dense_xpr_base<Product<Lhs,Rhs> >::type
|
||||
{
|
||||
typedef Product<Lhs, Rhs> Derived;
|
||||
public:
|
||||
|
||||
typedef typename internal::dense_xpr_base<Product<Lhs, Rhs> >::type Base;
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
|
||||
};
|
||||
|
||||
/***************************************************************************
|
||||
* Implementation of matrix base methods
|
||||
***************************************************************************/
|
||||
|
||||
|
||||
/** \internal used to test the evaluator only
|
||||
*/
|
||||
template<typename Lhs,typename Rhs>
|
||||
const Product<Lhs,Rhs>
|
||||
prod(const Lhs& lhs, const Rhs& rhs)
|
||||
{
|
||||
return Product<Lhs,Rhs>(lhs,rhs);
|
||||
}
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_PRODUCT_H
|
||||
@@ -131,17 +131,13 @@ class ProductBase : public MatrixBase<Derived>
|
||||
const Diagonal<FullyLazyCoeffBaseProductType,Dynamic> diagonal(Index index) const
|
||||
{ return FullyLazyCoeffBaseProductType(m_lhs, m_rhs).diagonal(index); }
|
||||
|
||||
// restrict coeff accessors to 1x1 expressions. No need to care about mutators here since this isnt a Lvalue expression
|
||||
// restrict coeff accessors to 1x1 expressions. No need to care about mutators here since this isn't an Lvalue expression
|
||||
typename Base::CoeffReturnType coeff(Index row, Index col) const
|
||||
{
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
return lhs().row(row).cwiseProduct(rhs().col(col).transpose()).sum();
|
||||
#else
|
||||
EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)
|
||||
eigen_assert(this->rows() == 1 && this->cols() == 1);
|
||||
Matrix<Scalar,1,1> result = *this;
|
||||
return result.coeff(row,col);
|
||||
#endif
|
||||
}
|
||||
|
||||
typename Base::CoeffReturnType coeff(Index i) const
|
||||
|
||||
411
Eigen/src/Core/ProductEvaluators.h
Normal file
411
Eigen/src/Core/ProductEvaluators.h
Normal file
@@ -0,0 +1,411 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2011 Jitse Niesen <jitse@maths.leeds.ac.uk>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
|
||||
#ifndef EIGEN_PRODUCTEVALUATORS_H
|
||||
#define EIGEN_PRODUCTEVALUATORS_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
// We can evaluate the product either all at once, like GeneralProduct and its evalTo() function, or
|
||||
// traverse the matrix coefficient by coefficient, like CoeffBasedProduct. Use the existing logic
|
||||
// in ProductReturnType to decide.
|
||||
|
||||
template<typename XprType, typename ProductType>
|
||||
struct product_evaluator_dispatcher;
|
||||
|
||||
template<typename Lhs, typename Rhs>
|
||||
struct evaluator_impl<Product<Lhs, Rhs> >
|
||||
: product_evaluator_dispatcher<Product<Lhs, Rhs>, typename ProductReturnType<Lhs, Rhs>::Type>
|
||||
{
|
||||
typedef Product<Lhs, Rhs> XprType;
|
||||
typedef product_evaluator_dispatcher<XprType, typename ProductReturnType<Lhs, Rhs>::Type> Base;
|
||||
|
||||
evaluator_impl(const XprType& xpr) : Base(xpr)
|
||||
{ }
|
||||
};
|
||||
|
||||
template<typename XprType, typename ProductType>
|
||||
struct product_evaluator_traits_dispatcher;
|
||||
|
||||
template<typename Lhs, typename Rhs>
|
||||
struct evaluator_traits<Product<Lhs, Rhs> >
|
||||
: product_evaluator_traits_dispatcher<Product<Lhs, Rhs>, typename ProductReturnType<Lhs, Rhs>::Type>
|
||||
{
|
||||
static const int AssumeAliasing = 1;
|
||||
};
|
||||
|
||||
// Case 1: Evaluate all at once
|
||||
//
|
||||
// We can view the GeneralProduct class as a part of the product evaluator.
|
||||
// Four sub-cases: InnerProduct, OuterProduct, GemmProduct and GemvProduct.
|
||||
// InnerProduct is special because GeneralProduct does not have an evalTo() method in this case.
|
||||
|
||||
template<typename Lhs, typename Rhs>
|
||||
struct product_evaluator_traits_dispatcher<Product<Lhs, Rhs>, GeneralProduct<Lhs, Rhs, InnerProduct> >
|
||||
{
|
||||
static const int HasEvalTo = 0;
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs>
|
||||
struct product_evaluator_dispatcher<Product<Lhs, Rhs>, GeneralProduct<Lhs, Rhs, InnerProduct> >
|
||||
: public evaluator<typename Product<Lhs, Rhs>::PlainObject>::type
|
||||
{
|
||||
typedef Product<Lhs, Rhs> XprType;
|
||||
typedef typename XprType::PlainObject PlainObject;
|
||||
typedef typename evaluator<PlainObject>::type evaluator_base;
|
||||
|
||||
// TODO: Computation is too early (?)
|
||||
product_evaluator_dispatcher(const XprType& xpr) : evaluator_base(m_result)
|
||||
{
|
||||
m_result.coeffRef(0,0) = (xpr.lhs().transpose().cwiseProduct(xpr.rhs())).sum();
|
||||
}
|
||||
|
||||
protected:
|
||||
PlainObject m_result;
|
||||
};
|
||||
|
||||
// For the other three subcases, simply call the evalTo() method of GeneralProduct
|
||||
// TODO: GeneralProduct should take evaluators, not expression objects.
|
||||
|
||||
template<typename Lhs, typename Rhs, int ProductType>
|
||||
struct product_evaluator_traits_dispatcher<Product<Lhs, Rhs>, GeneralProduct<Lhs, Rhs, ProductType> >
|
||||
{
|
||||
static const int HasEvalTo = 1;
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, int ProductType>
|
||||
struct product_evaluator_dispatcher<Product<Lhs, Rhs>, GeneralProduct<Lhs, Rhs, ProductType> >
|
||||
{
|
||||
typedef Product<Lhs, Rhs> XprType;
|
||||
typedef typename XprType::PlainObject PlainObject;
|
||||
typedef typename evaluator<PlainObject>::type evaluator_base;
|
||||
|
||||
product_evaluator_dispatcher(const XprType& xpr) : m_xpr(xpr)
|
||||
{ }
|
||||
|
||||
template<typename DstEvaluatorType, typename DstXprType>
|
||||
void evalTo(DstEvaluatorType /* not used */, DstXprType& dst) const
|
||||
{
|
||||
dst.resize(m_xpr.rows(), m_xpr.cols());
|
||||
GeneralProduct<Lhs, Rhs, ProductType>(m_xpr.lhs(), m_xpr.rhs()).evalTo(dst);
|
||||
}
|
||||
|
||||
protected:
|
||||
const XprType& m_xpr;
|
||||
};
|
||||
|
||||
// Case 2: Evaluate coeff by coeff
|
||||
//
|
||||
// This is mostly taken from CoeffBasedProduct.h
|
||||
// The main difference is that we add an extra argument to the etor_product_*_impl::run() function
|
||||
// for the inner dimension of the product, because evaluator object do not know their size.
|
||||
|
||||
template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
|
||||
struct etor_product_coeff_impl;
|
||||
|
||||
template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||
struct etor_product_packet_impl;
|
||||
|
||||
template<typename Lhs, typename Rhs, typename LhsNested, typename RhsNested, int Flags>
|
||||
struct product_evaluator_traits_dispatcher<Product<Lhs, Rhs>, CoeffBasedProduct<LhsNested, RhsNested, Flags> >
|
||||
{
|
||||
static const int HasEvalTo = 0;
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename LhsNested, typename RhsNested, int Flags>
|
||||
struct product_evaluator_dispatcher<Product<Lhs, Rhs>, CoeffBasedProduct<LhsNested, RhsNested, Flags> >
|
||||
: evaluator_impl_base<Product<Lhs, Rhs> >
|
||||
{
|
||||
typedef Product<Lhs, Rhs> XprType;
|
||||
typedef CoeffBasedProduct<LhsNested, RhsNested, Flags> CoeffBasedProductType;
|
||||
|
||||
product_evaluator_dispatcher(const XprType& xpr)
|
||||
: m_lhsImpl(xpr.lhs()),
|
||||
m_rhsImpl(xpr.rhs()),
|
||||
m_innerDim(xpr.lhs().cols())
|
||||
{ }
|
||||
|
||||
typedef typename XprType::Index Index;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename XprType::PacketScalar PacketScalar;
|
||||
typedef typename XprType::PacketReturnType PacketReturnType;
|
||||
|
||||
// Everything below here is taken from CoeffBasedProduct.h
|
||||
|
||||
enum {
|
||||
RowsAtCompileTime = traits<CoeffBasedProductType>::RowsAtCompileTime,
|
||||
PacketSize = packet_traits<Scalar>::size,
|
||||
InnerSize = traits<CoeffBasedProductType>::InnerSize,
|
||||
CoeffReadCost = traits<CoeffBasedProductType>::CoeffReadCost,
|
||||
Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
|
||||
CanVectorizeInner = traits<CoeffBasedProductType>::CanVectorizeInner
|
||||
};
|
||||
|
||||
typedef typename evaluator<Lhs>::type LhsEtorType;
|
||||
typedef typename evaluator<Rhs>::type RhsEtorType;
|
||||
typedef etor_product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal,
|
||||
Unroll ? InnerSize-1 : Dynamic,
|
||||
LhsEtorType, RhsEtorType, Scalar> CoeffImpl;
|
||||
|
||||
const CoeffReturnType coeff(Index row, Index col) const
|
||||
{
|
||||
Scalar res;
|
||||
CoeffImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res);
|
||||
return res;
|
||||
}
|
||||
|
||||
/* Allow index-based non-packet access. It is impossible though to allow index-based packed access,
|
||||
* which is why we don't set the LinearAccessBit.
|
||||
*/
|
||||
const CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
Scalar res;
|
||||
const Index row = RowsAtCompileTime == 1 ? 0 : index;
|
||||
const Index col = RowsAtCompileTime == 1 ? index : 0;
|
||||
CoeffImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
const PacketReturnType packet(Index row, Index col) const
|
||||
{
|
||||
PacketScalar res;
|
||||
typedef etor_product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor,
|
||||
Unroll ? InnerSize-1 : Dynamic,
|
||||
LhsEtorType, RhsEtorType, PacketScalar, LoadMode> PacketImpl;
|
||||
PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res);
|
||||
return res;
|
||||
}
|
||||
|
||||
protected:
|
||||
typename evaluator<Lhs>::type m_lhsImpl;
|
||||
typename evaluator<Rhs>::type m_rhsImpl;
|
||||
|
||||
// TODO: Get rid of m_innerDim if known at compile time
|
||||
Index m_innerDim;
|
||||
};
|
||||
|
||||
/***************************************************************************
|
||||
* Normal product .coeff() implementation (with meta-unrolling)
|
||||
***************************************************************************/
|
||||
|
||||
/**************************************
|
||||
*** Scalar path - no vectorization ***
|
||||
**************************************/
|
||||
|
||||
template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
|
||||
struct etor_product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar &res)
|
||||
{
|
||||
etor_product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, innerDim, res);
|
||||
res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename RetScalar>
|
||||
struct etor_product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, RetScalar &res)
|
||||
{
|
||||
res = lhs.coeff(row, 0) * rhs.coeff(0, col);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename RetScalar>
|
||||
struct etor_product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar& res)
|
||||
{
|
||||
eigen_assert(innerDim>0 && "you are using a non initialized matrix");
|
||||
res = lhs.coeff(row, 0) * rhs.coeff(0, col);
|
||||
for(Index i = 1; i < innerDim; ++i)
|
||||
res += lhs.coeff(row, i) * rhs.coeff(i, col);
|
||||
}
|
||||
};
|
||||
|
||||
/*******************************************
|
||||
*** Scalar path with inner vectorization ***
|
||||
*******************************************/
|
||||
|
||||
template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet>
|
||||
struct etor_product_coeff_vectorized_unroller
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, typename Lhs::PacketScalar &pres)
|
||||
{
|
||||
etor_product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, innerDim, pres);
|
||||
pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) ));
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename Packet>
|
||||
struct etor_product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::PacketScalar &pres)
|
||||
{
|
||||
pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col));
|
||||
}
|
||||
};
|
||||
|
||||
template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
|
||||
struct etor_product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
|
||||
{
|
||||
typedef typename Lhs::PacketScalar Packet;
|
||||
typedef typename Lhs::Index Index;
|
||||
enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar &res)
|
||||
{
|
||||
Packet pres;
|
||||
etor_product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, innerDim, pres);
|
||||
etor_product_coeff_impl<DefaultTraversal,UnrollingIndex,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, innerDim, res);
|
||||
res = predux(pres);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int RhsCols = Rhs::ColsAtCompileTime>
|
||||
struct etor_product_coeff_vectorized_dyn_selector
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
|
||||
{
|
||||
res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum();
|
||||
}
|
||||
};
|
||||
|
||||
// NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower
|
||||
// NOTE maybe they are now useless since we have a specialization for Block<Matrix>
|
||||
template<typename Lhs, typename Rhs, int RhsCols>
|
||||
struct etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
|
||||
{
|
||||
res = lhs.transpose().cwiseProduct(rhs.col(col)).sum();
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, int LhsRows>
|
||||
struct etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
|
||||
{
|
||||
res = lhs.row(row).transpose().cwiseProduct(rhs).sum();
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs>
|
||||
struct etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
|
||||
{
|
||||
res = lhs.transpose().cwiseProduct(rhs).sum();
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename RetScalar>
|
||||
struct etor_product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, typename Lhs::Scalar &res)
|
||||
{
|
||||
etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, innerDim, res);
|
||||
}
|
||||
};
|
||||
|
||||
/*******************
|
||||
*** Packet path ***
|
||||
*******************/
|
||||
|
||||
template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||
struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
|
||||
{
|
||||
etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
|
||||
res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
|
||||
}
|
||||
};
|
||||
|
||||
template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||
struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
|
||||
{
|
||||
etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
|
||||
res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||
struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
|
||||
{
|
||||
res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||
struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
|
||||
{
|
||||
res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||
struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
|
||||
{
|
||||
eigen_assert(innerDim>0 && "you are using a non initialized matrix");
|
||||
res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
|
||||
for(Index i = 1; i < innerDim; ++i)
|
||||
res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||
struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
|
||||
{
|
||||
eigen_assert(innerDim>0 && "you are using a non initialized matrix");
|
||||
res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
|
||||
for(Index i = 1; i < innerDim; ++i)
|
||||
res = pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_PRODUCT_EVALUATORS_H
|
||||
@@ -28,12 +28,18 @@ struct functor_traits<scalar_random_op<Scalar> >
|
||||
|
||||
/** \returns a random matrix expression
|
||||
*
|
||||
* Numbers are uniformly spread through their whole definition range for integer types,
|
||||
* and in the [-1:1] range for floating point scalar types.
|
||||
*
|
||||
* The parameters \a rows and \a cols are the number of rows and of columns of
|
||||
* the returned matrix. Must be compatible with this MatrixBase type.
|
||||
*
|
||||
* \not_reentrant
|
||||
*
|
||||
* This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
|
||||
* it is redundant to pass \a rows and \a cols as arguments, so Random() should be used
|
||||
* instead.
|
||||
*
|
||||
*
|
||||
* Example: \include MatrixBase_random_int_int.cpp
|
||||
* Output: \verbinclude MatrixBase_random_int_int.out
|
||||
@@ -41,8 +47,10 @@ struct functor_traits<scalar_random_op<Scalar> >
|
||||
* This expression has the "evaluate before nesting" flag so that it will be evaluated into
|
||||
* a temporary matrix whenever it is nested in a larger expression. This prevents unexpected
|
||||
* behavior with expressions involving random matrices.
|
||||
*
|
||||
* See DenseBase::NullaryExpr(Index, const CustomNullaryOp&) for an example using C++11 random generators.
|
||||
*
|
||||
* \sa MatrixBase::setRandom(), MatrixBase::Random(Index), MatrixBase::Random()
|
||||
* \sa DenseBase::setRandom(), DenseBase::Random(Index), DenseBase::Random()
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline const CwiseNullaryOp<internal::scalar_random_op<typename internal::traits<Derived>::Scalar>, Derived>
|
||||
@@ -52,11 +60,15 @@ DenseBase<Derived>::Random(Index rows, Index cols)
|
||||
}
|
||||
|
||||
/** \returns a random vector expression
|
||||
*
|
||||
* Numbers are uniformly spread through their whole definition range for integer types,
|
||||
* and in the [-1:1] range for floating point scalar types.
|
||||
*
|
||||
* The parameter \a size is the size of the returned vector.
|
||||
* Must be compatible with this MatrixBase type.
|
||||
*
|
||||
* \only_for_vectors
|
||||
* \not_reentrant
|
||||
*
|
||||
* This variant is meant to be used for dynamic-size vector types. For fixed-size types,
|
||||
* it is redundant to pass \a size as argument, so Random() should be used
|
||||
@@ -69,7 +81,7 @@ DenseBase<Derived>::Random(Index rows, Index cols)
|
||||
* a temporary vector whenever it is nested in a larger expression. This prevents unexpected
|
||||
* behavior with expressions involving random matrices.
|
||||
*
|
||||
* \sa MatrixBase::setRandom(), MatrixBase::Random(Index,Index), MatrixBase::Random()
|
||||
* \sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random()
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline const CwiseNullaryOp<internal::scalar_random_op<typename internal::traits<Derived>::Scalar>, Derived>
|
||||
@@ -80,6 +92,9 @@ DenseBase<Derived>::Random(Index size)
|
||||
|
||||
/** \returns a fixed-size random matrix or vector expression
|
||||
*
|
||||
* Numbers are uniformly spread through their whole definition range for integer types,
|
||||
* and in the [-1:1] range for floating point scalar types.
|
||||
*
|
||||
* This variant is only for fixed-size MatrixBase types. For dynamic-size types, you
|
||||
* need to use the variants taking size arguments.
|
||||
*
|
||||
@@ -89,8 +104,10 @@ DenseBase<Derived>::Random(Index size)
|
||||
* This expression has the "evaluate before nesting" flag so that it will be evaluated into
|
||||
* a temporary matrix whenever it is nested in a larger expression. This prevents unexpected
|
||||
* behavior with expressions involving random matrices.
|
||||
*
|
||||
* \not_reentrant
|
||||
*
|
||||
* \sa MatrixBase::setRandom(), MatrixBase::Random(Index,Index), MatrixBase::Random(Index)
|
||||
* \sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random(Index)
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline const CwiseNullaryOp<internal::scalar_random_op<typename internal::traits<Derived>::Scalar>, Derived>
|
||||
@@ -101,6 +118,11 @@ DenseBase<Derived>::Random()
|
||||
|
||||
/** Sets all coefficients in this expression to random values.
|
||||
*
|
||||
* Numbers are uniformly spread through their whole definition range for integer types,
|
||||
* and in the [-1:1] range for floating point scalar types.
|
||||
*
|
||||
* \not_reentrant
|
||||
*
|
||||
* Example: \include MatrixBase_setRandom.cpp
|
||||
* Output: \verbinclude MatrixBase_setRandom.out
|
||||
*
|
||||
@@ -114,12 +136,16 @@ inline Derived& DenseBase<Derived>::setRandom()
|
||||
|
||||
/** Resizes to the given \a newSize, and sets all coefficients in this expression to random values.
|
||||
*
|
||||
* Numbers are uniformly spread through their whole definition range for integer types,
|
||||
* and in the [-1:1] range for floating point scalar types.
|
||||
*
|
||||
* \only_for_vectors
|
||||
* \not_reentrant
|
||||
*
|
||||
* Example: \include Matrix_setRandom_int.cpp
|
||||
* Output: \verbinclude Matrix_setRandom_int.out
|
||||
*
|
||||
* \sa MatrixBase::setRandom(), setRandom(Index,Index), class CwiseNullaryOp, MatrixBase::Random()
|
||||
* \sa DenseBase::setRandom(), setRandom(Index,Index), class CwiseNullaryOp, DenseBase::Random()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived&
|
||||
@@ -131,13 +157,18 @@ PlainObjectBase<Derived>::setRandom(Index newSize)
|
||||
|
||||
/** Resizes to the given size, and sets all coefficients in this expression to random values.
|
||||
*
|
||||
* Numbers are uniformly spread through their whole definition range for integer types,
|
||||
* and in the [-1:1] range for floating point scalar types.
|
||||
*
|
||||
* \not_reentrant
|
||||
*
|
||||
* \param nbRows the new number of rows
|
||||
* \param nbCols the new number of columns
|
||||
*
|
||||
* Example: \include Matrix_setRandom_int_int.cpp
|
||||
* Output: \verbinclude Matrix_setRandom_int_int.out
|
||||
*
|
||||
* \sa MatrixBase::setRandom(), setRandom(Index), class CwiseNullaryOp, MatrixBase::Random()
|
||||
* \sa DenseBase::setRandom(), setRandom(Index), class CwiseNullaryOp, DenseBase::Random()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived&
|
||||
|
||||
@@ -82,6 +82,7 @@ struct redux_novec_unroller
|
||||
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
|
||||
{
|
||||
return func(redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
|
||||
@@ -99,6 +100,7 @@ struct redux_novec_unroller<Func, Derived, Start, 1>
|
||||
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&)
|
||||
{
|
||||
return mat.coeffByOuterInner(outer, inner);
|
||||
@@ -112,6 +114,7 @@ template<typename Func, typename Derived, int Start>
|
||||
struct redux_novec_unroller<Func, Derived, Start, 0>
|
||||
{
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); }
|
||||
};
|
||||
|
||||
@@ -170,6 +173,7 @@ struct redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>
|
||||
{
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
typedef typename Derived::Index Index;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func)
|
||||
{
|
||||
eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
|
||||
@@ -203,7 +207,7 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
|
||||
const Index packetSize = packet_traits<Scalar>::size;
|
||||
const Index alignedStart = internal::first_aligned(mat);
|
||||
enum {
|
||||
alignment = bool(Derived::Flags & DirectAccessBit) || bool(Derived::Flags & AlignedBit)
|
||||
alignment = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) || bool(Derived::Flags & AlignedBit)
|
||||
? Aligned : Unaligned
|
||||
};
|
||||
const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
|
||||
@@ -299,10 +303,15 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
|
||||
static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func)
|
||||
{
|
||||
eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
|
||||
Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));
|
||||
if (VectorizedSize != Size)
|
||||
res = func(res,redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func));
|
||||
return res;
|
||||
if (VectorizedSize > 0) {
|
||||
Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));
|
||||
if (VectorizedSize != Size)
|
||||
res = func(res,redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func));
|
||||
return res;
|
||||
}
|
||||
else {
|
||||
return redux_novec_unroller<Func, Derived, 0, Size>::run(mat,func);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -19,17 +19,17 @@ template<typename PlainObjectType, int Options = 0,
|
||||
/** \class Ref
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief A matrix or vector expression mapping an existing expressions
|
||||
* \brief A matrix or vector expression mapping an existing expression
|
||||
*
|
||||
* \tparam PlainObjectType the equivalent matrix type of the mapped data
|
||||
* \tparam Options specifies whether the pointer is \c #Aligned, or \c #Unaligned.
|
||||
* The default is \c #Unaligned.
|
||||
* \tparam StrideType optionally specifies strides. By default, Ref implies a contiguous storage along the inner dimension (inner stride==1),
|
||||
* but accept a variable outer stride (leading dimension).
|
||||
* but accepts a variable outer stride (leading dimension).
|
||||
* This can be overridden by specifying strides.
|
||||
* The type passed here must be a specialization of the Stride template, see examples below.
|
||||
*
|
||||
* This class permits to write non template functions taking Eigen's object as parameters while limiting the number of copies.
|
||||
* This class provides a way to write non-template functions taking Eigen objects as parameters while limiting the number of copies.
|
||||
* A Ref<> object can represent either a const expression or a l-value:
|
||||
* \code
|
||||
* // in-out argument:
|
||||
@@ -39,10 +39,10 @@ template<typename PlainObjectType, int Options = 0,
|
||||
* void foo2(const Ref<const VectorXf>& x);
|
||||
* \endcode
|
||||
*
|
||||
* In the in-out case, the input argument must satisfies the constraints of the actual Ref<> type, otherwise a compilation issue will be triggered.
|
||||
* In the in-out case, the input argument must satisfy the constraints of the actual Ref<> type, otherwise a compilation issue will be triggered.
|
||||
* By default, a Ref<VectorXf> can reference any dense vector expression of float having a contiguous memory layout.
|
||||
* Likewise, a Ref<MatrixXf> can reference any column major dense matrix expression of float whose column's elements are contiguously stored with
|
||||
* the possibility to have a constant space inbetween each column, i.e.: the inner stride mmust be equal to 1, but the outer-stride (or leading dimension),
|
||||
* Likewise, a Ref<MatrixXf> can reference any column-major dense matrix expression of float whose column's elements are contiguously stored with
|
||||
* the possibility to have a constant space in-between each column, i.e. the inner stride must be equal to 1, but the outer stride (or leading dimension)
|
||||
* can be greater than the number of rows.
|
||||
*
|
||||
* In the const case, if the input expression does not match the above requirement, then it is evaluated into a temporary before being passed to the function.
|
||||
@@ -58,15 +58,15 @@ template<typename PlainObjectType, int Options = 0,
|
||||
* foo2(A.col().segment(2,4)); // No temporary
|
||||
* \endcode
|
||||
*
|
||||
* The range of inputs that can be referenced without temporary can be enlarged using the last two template parameter.
|
||||
* The range of inputs that can be referenced without temporary can be enlarged using the last two template parameters.
|
||||
* Here is an example accepting an innerstride!=1:
|
||||
* \code
|
||||
* // in-out argument:
|
||||
* void foo3(Ref<VectorXf,0,InnerStride<> > x);
|
||||
* foo3(A.row()); // OK
|
||||
* \endcode
|
||||
* The downside here is that the function foo3 might be significantly slower than foo1 because it won't be able to exploit vectorization, and will involved more
|
||||
* expensive address computations even if the input is contiguously stored in memory. To overcome this issue, one might propose to overloads internally calling a
|
||||
* The downside here is that the function foo3 might be significantly slower than foo1 because it won't be able to exploit vectorization, and will involve more
|
||||
* expensive address computations even if the input is contiguously stored in memory. To overcome this issue, one might propose to overload internally calling a
|
||||
* template function, e.g.:
|
||||
* \code
|
||||
* // in the .h:
|
||||
@@ -94,13 +94,14 @@ struct traits<Ref<_PlainObjectType, _Options, _StrideType> >
|
||||
typedef _PlainObjectType PlainObjectType;
|
||||
typedef _StrideType StrideType;
|
||||
enum {
|
||||
Options = _Options
|
||||
Options = _Options,
|
||||
Flags = traits<Map<_PlainObjectType, _Options, _StrideType> >::Flags | NestByRefBit
|
||||
};
|
||||
|
||||
template<typename Derived> struct match {
|
||||
enum {
|
||||
HasDirectAccess = internal::has_direct_access<Derived>::ret,
|
||||
StorageOrderMatch = PlainObjectType::IsVectorAtCompileTime || ((PlainObjectType::Flags&RowMajorBit)==(Derived::Flags&RowMajorBit)),
|
||||
StorageOrderMatch = PlainObjectType::IsVectorAtCompileTime || Derived::IsVectorAtCompileTime || ((PlainObjectType::Flags&RowMajorBit)==(Derived::Flags&RowMajorBit)),
|
||||
InnerStrideMatch = int(StrideType::InnerStrideAtCompileTime)==int(Dynamic)
|
||||
|| int(StrideType::InnerStrideAtCompileTime)==int(Derived::InnerStrideAtCompileTime)
|
||||
|| (int(StrideType::InnerStrideAtCompileTime)==0 && int(Derived::InnerStrideAtCompileTime)==1),
|
||||
@@ -111,7 +112,7 @@ struct traits<Ref<_PlainObjectType, _Options, _StrideType> >
|
||||
};
|
||||
typedef typename internal::conditional<MatchAtCompileTime,internal::true_type,internal::false_type>::type type;
|
||||
};
|
||||
|
||||
|
||||
};
|
||||
|
||||
template<typename Derived>
|
||||
@@ -171,8 +172,12 @@ protected:
|
||||
}
|
||||
else
|
||||
::new (static_cast<Base*>(this)) Base(expr.data(), expr.rows(), expr.cols());
|
||||
::new (&m_stride) StrideBase(StrideType::OuterStrideAtCompileTime==0?0:expr.outerStride(),
|
||||
StrideType::InnerStrideAtCompileTime==0?0:expr.innerStride());
|
||||
|
||||
if(Expression::IsVectorAtCompileTime && (!PlainObjectType::IsVectorAtCompileTime) && ((Expression::Flags&RowMajorBit)!=(PlainObjectType::Flags&RowMajorBit)))
|
||||
::new (&m_stride) StrideBase(expr.innerStride(), StrideType::InnerStrideAtCompileTime==0?0:1);
|
||||
else
|
||||
::new (&m_stride) StrideBase(StrideType::OuterStrideAtCompileTime==0?0:expr.outerStride(),
|
||||
StrideType::InnerStrideAtCompileTime==0?0:expr.innerStride());
|
||||
}
|
||||
|
||||
StrideBase m_stride;
|
||||
|
||||
@@ -57,10 +57,11 @@ template<typename Derived> class ReturnByValue
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(ReturnByValue)
|
||||
|
||||
template<typename Dest>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void evalTo(Dest& dst) const
|
||||
{ static_cast<const Derived*>(this)->evalTo(dst); }
|
||||
inline Index rows() const { return static_cast<const Derived*>(this)->rows(); }
|
||||
inline Index cols() const { return static_cast<const Derived*>(this)->cols(); }
|
||||
EIGEN_DEVICE_FUNC inline Index rows() const { return static_cast<const Derived*>(this)->rows(); }
|
||||
EIGEN_DEVICE_FUNC inline Index cols() const { return static_cast<const Derived*>(this)->cols(); }
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
#define Unusable YOU_ARE_TRYING_TO_ACCESS_A_SINGLE_COEFFICIENT_IN_A_SPECIAL_EXPRESSION_WHERE_THAT_IS_NOT_ALLOWED_BECAUSE_THAT_WOULD_BE_INEFFICIENT
|
||||
|
||||
@@ -69,17 +69,23 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
|
||||
};
|
||||
typedef typename MatrixType::PlainObject PlainObject;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline SelfAdjointView(MatrixType& matrix) : m_matrix(matrix)
|
||||
{}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index rows() const { return m_matrix.rows(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index cols() const { return m_matrix.cols(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index outerStride() const { return m_matrix.outerStride(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index innerStride() const { return m_matrix.innerStride(); }
|
||||
|
||||
/** \sa MatrixBase::coeff()
|
||||
* \warning the coordinates must fit into the referenced triangular part
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar coeff(Index row, Index col) const
|
||||
{
|
||||
Base::check_coordinates_internal(row, col);
|
||||
@@ -89,6 +95,7 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
|
||||
/** \sa MatrixBase::coeffRef()
|
||||
* \warning the coordinates must fit into the referenced triangular part
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index row, Index col)
|
||||
{
|
||||
Base::check_coordinates_internal(row, col);
|
||||
@@ -96,13 +103,17 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
|
||||
}
|
||||
|
||||
/** \internal */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const MatrixTypeNestedCleaned& _expression() const { return m_matrix; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const MatrixTypeNestedCleaned& nestedExpression() const { return m_matrix; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
MatrixTypeNestedCleaned& nestedExpression() { return *const_cast<MatrixTypeNestedCleaned*>(&m_matrix); }
|
||||
|
||||
/** Efficient self-adjoint matrix times vector/matrix product */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
SelfadjointProductMatrix<MatrixType,Mode,false,OtherDerived,0,OtherDerived::IsVectorAtCompileTime>
|
||||
operator*(const MatrixBase<OtherDerived>& rhs) const
|
||||
{
|
||||
@@ -113,6 +124,7 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
|
||||
|
||||
/** Efficient vector/matrix times self-adjoint matrix product */
|
||||
template<typename OtherDerived> friend
|
||||
EIGEN_DEVICE_FUNC
|
||||
SelfadjointProductMatrix<OtherDerived,0,OtherDerived::IsVectorAtCompileTime,MatrixType,Mode,false>
|
||||
operator*(const MatrixBase<OtherDerived>& lhs, const SelfAdjointView& rhs)
|
||||
{
|
||||
@@ -132,6 +144,7 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
|
||||
* \sa rankUpdate(const MatrixBase<DerivedU>&, Scalar)
|
||||
*/
|
||||
template<typename DerivedU, typename DerivedV>
|
||||
EIGEN_DEVICE_FUNC
|
||||
SelfAdjointView& rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, const Scalar& alpha = Scalar(1));
|
||||
|
||||
/** Perform a symmetric rank K update of the selfadjoint matrix \c *this:
|
||||
@@ -145,6 +158,7 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
|
||||
* \sa rankUpdate(const MatrixBase<DerivedU>&, const MatrixBase<DerivedV>&, Scalar)
|
||||
*/
|
||||
template<typename DerivedU>
|
||||
EIGEN_DEVICE_FUNC
|
||||
SelfAdjointView& rankUpdate(const MatrixBase<DerivedU>& u, const Scalar& alpha = Scalar(1));
|
||||
|
||||
/////////// Cholesky module ///////////
|
||||
@@ -159,31 +173,10 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
|
||||
/** Return type of eigenvalues() */
|
||||
typedef Matrix<RealScalar, internal::traits<MatrixType>::ColsAtCompileTime, 1> EigenvaluesReturnType;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EigenvaluesReturnType eigenvalues() const;
|
||||
EIGEN_DEVICE_FUNC
|
||||
RealScalar operatorNorm() const;
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
template<typename OtherDerived>
|
||||
SelfAdjointView& operator=(const MatrixBase<OtherDerived>& other)
|
||||
{
|
||||
enum {
|
||||
OtherPart = UpLo == Upper ? StrictlyLower : StrictlyUpper
|
||||
};
|
||||
m_matrix.const_cast_derived().template triangularView<UpLo>() = other;
|
||||
m_matrix.const_cast_derived().template triangularView<OtherPart>() = other.adjoint();
|
||||
return *this;
|
||||
}
|
||||
template<typename OtherMatrixType, unsigned int OtherMode>
|
||||
SelfAdjointView& operator=(const TriangularView<OtherMatrixType, OtherMode>& other)
|
||||
{
|
||||
enum {
|
||||
OtherPart = UpLo == Upper ? StrictlyLower : StrictlyUpper
|
||||
};
|
||||
m_matrix.const_cast_derived().template triangularView<UpLo>() = other.toDenseMatrix();
|
||||
m_matrix.const_cast_derived().template triangularView<OtherPart>() = other.toDenseMatrix().adjoint();
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
|
||||
protected:
|
||||
MatrixTypeNested m_matrix;
|
||||
@@ -209,6 +202,7 @@ struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), U
|
||||
row = (UnrollCount-1) % Derived1::RowsAtCompileTime
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), UnrollCount-1, ClearOpposite>::run(dst, src);
|
||||
@@ -223,6 +217,7 @@ struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), U
|
||||
template<typename Derived1, typename Derived2, bool ClearOpposite>
|
||||
struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, 0, ClearOpposite>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(Derived1 &, const Derived2 &) {}
|
||||
};
|
||||
|
||||
@@ -234,6 +229,7 @@ struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), U
|
||||
row = (UnrollCount-1) % Derived1::RowsAtCompileTime
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), UnrollCount-1, ClearOpposite>::run(dst, src);
|
||||
@@ -248,6 +244,7 @@ struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), U
|
||||
template<typename Derived1, typename Derived2, bool ClearOpposite>
|
||||
struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Lower, 0, ClearOpposite>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(Derived1 &, const Derived2 &) {}
|
||||
};
|
||||
|
||||
@@ -255,6 +252,7 @@ template<typename Derived1, typename Derived2, bool ClearOpposite>
|
||||
struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, Dynamic, ClearOpposite>
|
||||
{
|
||||
typedef typename Derived1::Index Index;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
for(Index j = 0; j < dst.cols(); ++j)
|
||||
@@ -272,6 +270,7 @@ struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, Dyn
|
||||
template<typename Derived1, typename Derived2, bool ClearOpposite>
|
||||
struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Lower, Dynamic, ClearOpposite>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
typedef typename Derived1::Index Index;
|
||||
|
||||
@@ -35,7 +35,7 @@ struct traits<SelfCwiseBinaryOp<BinaryOp,Lhs,Rhs> >
|
||||
enum {
|
||||
// Note that it is still a good idea to preserve the DirectAccessBit
|
||||
// so that assign can correctly align the data.
|
||||
Flags = traits<CwiseBinaryOp<BinaryOp,Lhs,Rhs> >::Flags | (Lhs::Flags&DirectAccessBit) | (Lhs::Flags&LvalueBit),
|
||||
Flags = traits<CwiseBinaryOp<BinaryOp,Lhs,Rhs> >::Flags | (Lhs::Flags&AlignedBit) | (Lhs::Flags&DirectAccessBit) | (Lhs::Flags&LvalueBit),
|
||||
OuterStrideAtCompileTime = Lhs::OuterStrideAtCompileTime,
|
||||
InnerStrideAtCompileTime = Lhs::InnerStrideAtCompileTime
|
||||
};
|
||||
@@ -52,21 +52,24 @@ template<typename BinaryOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp
|
||||
|
||||
typedef typename internal::packet_traits<Scalar>::type Packet;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline SelfCwiseBinaryOp(Lhs& xpr, const BinaryOp& func = BinaryOp()) : m_matrix(xpr), m_functor(func) {}
|
||||
|
||||
inline Index rows() const { return m_matrix.rows(); }
|
||||
inline Index cols() const { return m_matrix.cols(); }
|
||||
inline Index outerStride() const { return m_matrix.outerStride(); }
|
||||
inline Index innerStride() const { return m_matrix.innerStride(); }
|
||||
inline const Scalar* data() const { return m_matrix.data(); }
|
||||
EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows(); }
|
||||
EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols(); }
|
||||
EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_matrix.outerStride(); }
|
||||
EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_matrix.innerStride(); }
|
||||
EIGEN_DEVICE_FUNC inline const Scalar* data() const { return m_matrix.data(); }
|
||||
|
||||
// note that this function is needed by assign to correctly align loads/stores
|
||||
// TODO make Assign use .data()
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index row, Index col)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_LVALUE(Lhs)
|
||||
return m_matrix.const_cast_derived().coeffRef(row, col);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index row, Index col) const
|
||||
{
|
||||
return m_matrix.coeffRef(row, col);
|
||||
@@ -74,17 +77,20 @@ template<typename BinaryOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp
|
||||
|
||||
// note that this function is needed by assign to correctly align loads/stores
|
||||
// TODO make Assign use .data()
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index index)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_LVALUE(Lhs)
|
||||
return m_matrix.const_cast_derived().coeffRef(index);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index index) const
|
||||
{
|
||||
return m_matrix.const_cast_derived().coeffRef(index);
|
||||
}
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void copyCoeff(Index row, Index col, const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
OtherDerived& _other = other.const_cast_derived();
|
||||
@@ -95,6 +101,7 @@ template<typename BinaryOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp
|
||||
}
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void copyCoeff(Index index, const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
OtherDerived& _other = other.const_cast_derived();
|
||||
@@ -125,6 +132,7 @@ template<typename BinaryOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp
|
||||
// reimplement lazyAssign to handle complex *= real
|
||||
// see CwiseBinaryOp ctor for details
|
||||
template<typename RhsDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE SelfCwiseBinaryOp& lazyAssign(const DenseBase<RhsDerived>& rhs)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Lhs,RhsDerived)
|
||||
@@ -144,17 +152,20 @@ template<typename BinaryOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp
|
||||
// overloaded to honor evaluation of special matrices
|
||||
// maybe another solution would be to not use SelfCwiseBinaryOp
|
||||
// at first...
|
||||
EIGEN_DEVICE_FUNC
|
||||
SelfCwiseBinaryOp& operator=(const Rhs& _rhs)
|
||||
{
|
||||
typename internal::nested<Rhs>::type rhs(_rhs);
|
||||
return Base::operator=(rhs);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
Lhs& expression() const
|
||||
{
|
||||
return m_matrix;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const BinaryOp& functor() const
|
||||
{
|
||||
return m_functor;
|
||||
@@ -177,18 +188,30 @@ inline Derived& DenseBase<Derived>::operator*=(const Scalar& other)
|
||||
return derived();
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
inline Derived& ArrayBase<Derived>::operator+=(const Scalar& other)
|
||||
{
|
||||
typedef typename Derived::PlainObject PlainObject;
|
||||
SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, Derived, typename PlainObject::ConstantReturnType> tmp(derived());
|
||||
tmp = PlainObject::Constant(rows(),cols(),other);
|
||||
return derived();
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
inline Derived& ArrayBase<Derived>::operator-=(const Scalar& other)
|
||||
{
|
||||
typedef typename Derived::PlainObject PlainObject;
|
||||
SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, Derived, typename PlainObject::ConstantReturnType> tmp(derived());
|
||||
tmp = PlainObject::Constant(rows(),cols(),other);
|
||||
return derived();
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
inline Derived& DenseBase<Derived>::operator/=(const Scalar& other)
|
||||
{
|
||||
typedef typename internal::conditional<NumTraits<Scalar>::IsInteger,
|
||||
internal::scalar_quotient_op<Scalar>,
|
||||
internal::scalar_product_op<Scalar> >::type BinOp;
|
||||
typedef typename Derived::PlainObject PlainObject;
|
||||
SelfCwiseBinaryOp<BinOp, Derived, typename PlainObject::ConstantReturnType> tmp(derived());
|
||||
Scalar actual_other;
|
||||
if(NumTraits<Scalar>::IsInteger) actual_other = other;
|
||||
else actual_other = Scalar(1)/other;
|
||||
tmp = PlainObject::Constant(rows(),cols(), actual_other);
|
||||
SelfCwiseBinaryOp<internal::scalar_quotient_op<Scalar>, Derived, typename PlainObject::ConstantReturnType> tmp(derived());
|
||||
tmp = PlainObject::Constant(rows(),cols(), other);
|
||||
return derived();
|
||||
}
|
||||
|
||||
|
||||
@@ -17,16 +17,38 @@ namespace internal {
|
||||
template<typename ExpressionType, typename Scalar>
|
||||
inline void stable_norm_kernel(const ExpressionType& bl, Scalar& ssq, Scalar& scale, Scalar& invScale)
|
||||
{
|
||||
Scalar max = bl.cwiseAbs().maxCoeff();
|
||||
if (max>scale)
|
||||
using std::max;
|
||||
Scalar maxCoeff = bl.cwiseAbs().maxCoeff();
|
||||
|
||||
if(maxCoeff>scale)
|
||||
{
|
||||
ssq = ssq * numext::abs2(scale/max);
|
||||
scale = max;
|
||||
invScale = Scalar(1)/scale;
|
||||
ssq = ssq * numext::abs2(scale/maxCoeff);
|
||||
Scalar tmp = Scalar(1)/maxCoeff;
|
||||
if(tmp > NumTraits<Scalar>::highest())
|
||||
{
|
||||
invScale = NumTraits<Scalar>::highest();
|
||||
scale = Scalar(1)/invScale;
|
||||
}
|
||||
else if(maxCoeff>NumTraits<Scalar>::highest()) // we got a INF
|
||||
{
|
||||
invScale = Scalar(1);
|
||||
scale = maxCoeff;
|
||||
}
|
||||
else
|
||||
{
|
||||
scale = maxCoeff;
|
||||
invScale = tmp;
|
||||
}
|
||||
}
|
||||
// TODO if the max is much much smaller than the current scale,
|
||||
else if(maxCoeff!=maxCoeff) // we got a NaN
|
||||
{
|
||||
scale = maxCoeff;
|
||||
}
|
||||
|
||||
// TODO if the maxCoeff is much much smaller than the current scale,
|
||||
// then we can neglect this sub vector
|
||||
ssq += (bl*invScale).squaredNorm();
|
||||
if(scale>Scalar(0)) // if scale==0, then bl is 0
|
||||
ssq += (bl*invScale).squaredNorm();
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
@@ -36,13 +58,13 @@ blueNorm_impl(const EigenBase<Derived>& _vec)
|
||||
typedef typename Derived::RealScalar RealScalar;
|
||||
typedef typename Derived::Index Index;
|
||||
using std::pow;
|
||||
using std::min;
|
||||
using std::max;
|
||||
EIGEN_USING_STD_MATH(min);
|
||||
EIGEN_USING_STD_MATH(max);
|
||||
using std::sqrt;
|
||||
using std::abs;
|
||||
const Derived& vec(_vec.derived());
|
||||
static bool initialized = false;
|
||||
static RealScalar b1, b2, s1m, s2m, overfl, rbig, relerr;
|
||||
static RealScalar b1, b2, s1m, s2m, rbig, relerr;
|
||||
if(!initialized)
|
||||
{
|
||||
int ibeta, it, iemin, iemax, iexp;
|
||||
@@ -71,7 +93,6 @@ blueNorm_impl(const EigenBase<Derived>& _vec)
|
||||
iexp = - ((iemax+it)/2);
|
||||
s2m = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // scaling factor for upper range
|
||||
|
||||
overfl = rbig*s2m; // overflow boundary for abig
|
||||
eps = RealScalar(pow(double(ibeta), 1-it));
|
||||
relerr = sqrt(eps); // tolerance for neglecting asml
|
||||
initialized = true;
|
||||
@@ -88,13 +109,13 @@ blueNorm_impl(const EigenBase<Derived>& _vec)
|
||||
else if(ax < b1) asml += numext::abs2(ax*s1m);
|
||||
else amed += numext::abs2(ax);
|
||||
}
|
||||
if(amed!=amed)
|
||||
return amed; // we got a NaN
|
||||
if(abig > RealScalar(0))
|
||||
{
|
||||
abig = sqrt(abig);
|
||||
if(abig > overfl)
|
||||
{
|
||||
return rbig;
|
||||
}
|
||||
if(abig > rbig) // overflow, or *this contains INF values
|
||||
return abig; // return INF
|
||||
if(amed > RealScalar(0))
|
||||
{
|
||||
abig = abig/s2m;
|
||||
@@ -139,7 +160,7 @@ template<typename Derived>
|
||||
inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
|
||||
MatrixBase<Derived>::stableNorm() const
|
||||
{
|
||||
using std::min;
|
||||
EIGEN_USING_STD_MATH(min);
|
||||
using std::sqrt;
|
||||
const Index blockSize = 4096;
|
||||
RealScalar scale(0);
|
||||
|
||||
@@ -51,6 +51,7 @@ class Stride
|
||||
};
|
||||
|
||||
/** Default constructor, for use when strides are fixed at compile time */
|
||||
EIGEN_DEVICE_FUNC
|
||||
Stride()
|
||||
: m_outer(OuterStrideAtCompileTime), m_inner(InnerStrideAtCompileTime)
|
||||
{
|
||||
@@ -58,6 +59,7 @@ class Stride
|
||||
}
|
||||
|
||||
/** Constructor allowing to pass the strides at runtime */
|
||||
EIGEN_DEVICE_FUNC
|
||||
Stride(Index outerStride, Index innerStride)
|
||||
: m_outer(outerStride), m_inner(innerStride)
|
||||
{
|
||||
@@ -65,13 +67,16 @@ class Stride
|
||||
}
|
||||
|
||||
/** Copy constructor */
|
||||
EIGEN_DEVICE_FUNC
|
||||
Stride(const Stride& other)
|
||||
: m_outer(other.outer()), m_inner(other.inner())
|
||||
{}
|
||||
|
||||
/** \returns the outer stride */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index outer() const { return m_outer.value(); }
|
||||
/** \returns the inner stride */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index inner() const { return m_inner.value(); }
|
||||
|
||||
protected:
|
||||
@@ -87,8 +92,8 @@ class InnerStride : public Stride<0, Value>
|
||||
typedef Stride<0, Value> Base;
|
||||
public:
|
||||
typedef DenseIndex Index;
|
||||
InnerStride() : Base() {}
|
||||
InnerStride(Index v) : Base(0, v) {}
|
||||
EIGEN_DEVICE_FUNC InnerStride() : Base() {}
|
||||
EIGEN_DEVICE_FUNC InnerStride(Index v) : Base(0, v) {}
|
||||
};
|
||||
|
||||
/** \brief Convenience specialization of Stride to specify only an outer stride
|
||||
@@ -99,8 +104,8 @@ class OuterStride : public Stride<Value, 0>
|
||||
typedef Stride<Value, 0> Base;
|
||||
public:
|
||||
typedef DenseIndex Index;
|
||||
OuterStride() : Base() {}
|
||||
OuterStride(Index v) : Base(v,0) {}
|
||||
EIGEN_DEVICE_FUNC OuterStride() : Base() {}
|
||||
EIGEN_DEVICE_FUNC OuterStride(Index v) : Base(v,0) {}
|
||||
};
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
@@ -33,11 +33,16 @@ template<typename ExpressionType> class SwapWrapper
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(SwapWrapper)
|
||||
typedef typename internal::packet_traits<Scalar>::type Packet;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline SwapWrapper(ExpressionType& xpr) : m_expression(xpr) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index rows() const { return m_expression.rows(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index cols() const { return m_expression.cols(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index outerStride() const { return m_expression.outerStride(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index innerStride() const { return m_expression.innerStride(); }
|
||||
|
||||
typedef typename internal::conditional<
|
||||
@@ -46,30 +51,37 @@ template<typename ExpressionType> class SwapWrapper
|
||||
const Scalar
|
||||
>::type ScalarWithConstIfNotLvalue;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar* data() const { return m_expression.data(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index rowId, Index colId)
|
||||
{
|
||||
return m_expression.const_cast_derived().coeffRef(rowId, colId);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index index)
|
||||
{
|
||||
return m_expression.const_cast_derived().coeffRef(index);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index rowId, Index colId) const
|
||||
{
|
||||
return m_expression.coeffRef(rowId, colId);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index index) const
|
||||
{
|
||||
return m_expression.coeffRef(index);
|
||||
}
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void copyCoeff(Index rowId, Index colId, const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
OtherDerived& _other = other.const_cast_derived();
|
||||
@@ -81,6 +93,7 @@ template<typename ExpressionType> class SwapWrapper
|
||||
}
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void copyCoeff(Index index, const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
OtherDerived& _other = other.const_cast_derived();
|
||||
@@ -115,6 +128,7 @@ template<typename ExpressionType> class SwapWrapper
|
||||
_other.template writePacket<LoadMode>(index, tmp);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
ExpressionType& expression() const { return m_expression; }
|
||||
|
||||
protected:
|
||||
|
||||
@@ -62,18 +62,21 @@ template<typename MatrixType> class Transpose
|
||||
typedef typename TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base;
|
||||
EIGEN_GENERIC_PUBLIC_INTERFACE(Transpose)
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Transpose(MatrixType& a_matrix) : m_matrix(a_matrix) {}
|
||||
|
||||
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Transpose)
|
||||
|
||||
inline Index rows() const { return m_matrix.cols(); }
|
||||
inline Index cols() const { return m_matrix.rows(); }
|
||||
EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.cols(); }
|
||||
EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.rows(); }
|
||||
|
||||
/** \returns the nested expression */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename MatrixType::Nested>::type&
|
||||
nestedExpression() const { return m_matrix; }
|
||||
|
||||
/** \returns the nested expression */
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename internal::remove_all<typename MatrixType::Nested>::type&
|
||||
nestedExpression() { return m_matrix.const_cast_derived(); }
|
||||
|
||||
@@ -106,8 +109,8 @@ template<typename MatrixType> class TransposeImpl<MatrixType,Dense>
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(Transpose<MatrixType>)
|
||||
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(TransposeImpl)
|
||||
|
||||
inline Index innerStride() const { return derived().nestedExpression().innerStride(); }
|
||||
inline Index outerStride() const { return derived().nestedExpression().outerStride(); }
|
||||
EIGEN_DEVICE_FUNC inline Index innerStride() const { return derived().nestedExpression().innerStride(); }
|
||||
EIGEN_DEVICE_FUNC inline Index outerStride() const { return derived().nestedExpression().outerStride(); }
|
||||
|
||||
typedef typename internal::conditional<
|
||||
internal::is_lvalue<MatrixType>::value,
|
||||
@@ -118,33 +121,39 @@ template<typename MatrixType> class TransposeImpl<MatrixType,Dense>
|
||||
inline ScalarWithConstIfNotLvalue* data() { return derived().nestedExpression().data(); }
|
||||
inline const Scalar* data() const { return derived().nestedExpression().data(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline ScalarWithConstIfNotLvalue& coeffRef(Index rowId, Index colId)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
|
||||
return derived().nestedExpression().const_cast_derived().coeffRef(colId, rowId);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline ScalarWithConstIfNotLvalue& coeffRef(Index index)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
|
||||
return derived().nestedExpression().const_cast_derived().coeffRef(index);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index rowId, Index colId) const
|
||||
{
|
||||
return derived().nestedExpression().coeffRef(colId, rowId);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index index) const
|
||||
{
|
||||
return derived().nestedExpression().coeffRef(index);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline CoeffReturnType coeff(Index rowId, Index colId) const
|
||||
{
|
||||
return derived().nestedExpression().coeff(colId, rowId);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
return derived().nestedExpression().coeff(index);
|
||||
@@ -284,7 +293,8 @@ struct inplace_transpose_selector<MatrixType,false> { // non square matrix
|
||||
* Notice however that this method is only useful if you want to replace a matrix by its own transpose.
|
||||
* If you just need the transpose of a matrix, use transpose().
|
||||
*
|
||||
* \note if the matrix is not square, then \c *this must be a resizable matrix.
|
||||
* \note if the matrix is not square, then \c *this must be a resizable matrix.
|
||||
* This excludes (non-square) fixed-size matrices, block-expressions and maps.
|
||||
*
|
||||
* \sa transpose(), adjoint(), adjointInPlace() */
|
||||
template<typename Derived>
|
||||
@@ -315,6 +325,7 @@ inline void DenseBase<Derived>::transposeInPlace()
|
||||
* If you just need the adjoint of a matrix, use adjoint().
|
||||
*
|
||||
* \note if the matrix is not square, then \c *this must be a resizable matrix.
|
||||
* This excludes (non-square) fixed-size matrices, block-expressions and maps.
|
||||
*
|
||||
* \sa transpose(), adjoint(), transposeInPlace() */
|
||||
template<typename Derived>
|
||||
|
||||
@@ -53,7 +53,8 @@ class TranspositionsBase
|
||||
public:
|
||||
|
||||
typedef typename Traits::IndicesType IndicesType;
|
||||
typedef typename IndicesType::Scalar Index;
|
||||
typedef typename IndicesType::Scalar StorageIndexType;
|
||||
typedef typename IndicesType::Index Index;
|
||||
|
||||
Derived& derived() { return *static_cast<Derived*>(this); }
|
||||
const Derived& derived() const { return *static_cast<const Derived*>(this); }
|
||||
@@ -81,17 +82,17 @@ class TranspositionsBase
|
||||
inline Index size() const { return indices().size(); }
|
||||
|
||||
/** Direct access to the underlying index vector */
|
||||
inline const Index& coeff(Index i) const { return indices().coeff(i); }
|
||||
inline const StorageIndexType& coeff(Index i) const { return indices().coeff(i); }
|
||||
/** Direct access to the underlying index vector */
|
||||
inline Index& coeffRef(Index i) { return indices().coeffRef(i); }
|
||||
inline StorageIndexType& coeffRef(Index i) { return indices().coeffRef(i); }
|
||||
/** Direct access to the underlying index vector */
|
||||
inline const Index& operator()(Index i) const { return indices()(i); }
|
||||
inline const StorageIndexType& operator()(Index i) const { return indices()(i); }
|
||||
/** Direct access to the underlying index vector */
|
||||
inline Index& operator()(Index i) { return indices()(i); }
|
||||
inline StorageIndexType& operator()(Index i) { return indices()(i); }
|
||||
/** Direct access to the underlying index vector */
|
||||
inline const Index& operator[](Index i) const { return indices()(i); }
|
||||
inline const StorageIndexType& operator[](Index i) const { return indices()(i); }
|
||||
/** Direct access to the underlying index vector */
|
||||
inline Index& operator[](Index i) { return indices()(i); }
|
||||
inline StorageIndexType& operator[](Index i) { return indices()(i); }
|
||||
|
||||
/** const version of indices(). */
|
||||
const IndicesType& indices() const { return derived().indices(); }
|
||||
@@ -99,7 +100,7 @@ class TranspositionsBase
|
||||
IndicesType& indices() { return derived().indices(); }
|
||||
|
||||
/** Resizes to given size. */
|
||||
inline void resize(int newSize)
|
||||
inline void resize(Index newSize)
|
||||
{
|
||||
indices().resize(newSize);
|
||||
}
|
||||
@@ -107,7 +108,7 @@ class TranspositionsBase
|
||||
/** Sets \c *this to represents an identity transformation */
|
||||
void setIdentity()
|
||||
{
|
||||
for(int i = 0; i < indices().size(); ++i)
|
||||
for(StorageIndexType i = 0; i < indices().size(); ++i)
|
||||
coeffRef(i) = i;
|
||||
}
|
||||
|
||||
@@ -144,23 +145,26 @@ class TranspositionsBase
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType>
|
||||
struct traits<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,IndexType> >
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndexType>
|
||||
struct traits<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndexType> >
|
||||
{
|
||||
typedef IndexType Index;
|
||||
typedef Matrix<Index, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType;
|
||||
typedef Matrix<_StorageIndexType, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType;
|
||||
typedef typename IndicesType::Index Index;
|
||||
typedef _StorageIndexType StorageIndexType;
|
||||
};
|
||||
}
|
||||
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType>
|
||||
class Transpositions : public TranspositionsBase<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,IndexType> >
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndexType>
|
||||
class Transpositions : public TranspositionsBase<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndexType> >
|
||||
{
|
||||
typedef internal::traits<Transpositions> Traits;
|
||||
public:
|
||||
|
||||
typedef TranspositionsBase<Transpositions> Base;
|
||||
typedef typename Traits::IndicesType IndicesType;
|
||||
typedef typename IndicesType::Scalar Index;
|
||||
typedef typename IndicesType::Scalar StorageIndexType;
|
||||
typedef typename IndicesType::Index Index;
|
||||
|
||||
|
||||
inline Transpositions() {}
|
||||
|
||||
@@ -215,30 +219,32 @@ class Transpositions : public TranspositionsBase<Transpositions<SizeAtCompileTim
|
||||
|
||||
|
||||
namespace internal {
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType, int _PacketAccess>
|
||||
struct traits<Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,IndexType>,_PacketAccess> >
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndexType, int _PacketAccess>
|
||||
struct traits<Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndexType>,_PacketAccess> >
|
||||
{
|
||||
typedef IndexType Index;
|
||||
typedef Map<const Matrix<Index,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1>, _PacketAccess> IndicesType;
|
||||
typedef Map<const Matrix<_StorageIndexType,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1>, _PacketAccess> IndicesType;
|
||||
typedef typename IndicesType::Index Index;
|
||||
typedef _StorageIndexType StorageIndexType;
|
||||
};
|
||||
}
|
||||
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType, int PacketAccess>
|
||||
class Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,IndexType>,PacketAccess>
|
||||
: public TranspositionsBase<Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,IndexType>,PacketAccess> >
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndexType, int PacketAccess>
|
||||
class Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndexType>,PacketAccess>
|
||||
: public TranspositionsBase<Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndexType>,PacketAccess> >
|
||||
{
|
||||
typedef internal::traits<Map> Traits;
|
||||
public:
|
||||
|
||||
typedef TranspositionsBase<Map> Base;
|
||||
typedef typename Traits::IndicesType IndicesType;
|
||||
typedef typename IndicesType::Scalar Index;
|
||||
typedef typename IndicesType::Scalar StorageIndexType;
|
||||
typedef typename IndicesType::Index Index;
|
||||
|
||||
inline Map(const Index* indicesPtr)
|
||||
inline Map(const StorageIndexType* indicesPtr)
|
||||
: m_indices(indicesPtr)
|
||||
{}
|
||||
|
||||
inline Map(const Index* indicesPtr, Index size)
|
||||
inline Map(const StorageIndexType* indicesPtr, Index size)
|
||||
: m_indices(indicesPtr,size)
|
||||
{}
|
||||
|
||||
@@ -275,7 +281,8 @@ namespace internal {
|
||||
template<typename _IndicesType>
|
||||
struct traits<TranspositionsWrapper<_IndicesType> >
|
||||
{
|
||||
typedef typename _IndicesType::Scalar Index;
|
||||
typedef typename _IndicesType::Scalar StorageIndexType;
|
||||
typedef typename _IndicesType::Index Index;
|
||||
typedef _IndicesType IndicesType;
|
||||
};
|
||||
}
|
||||
@@ -289,7 +296,8 @@ class TranspositionsWrapper
|
||||
|
||||
typedef TranspositionsBase<TranspositionsWrapper> Base;
|
||||
typedef typename Traits::IndicesType IndicesType;
|
||||
typedef typename IndicesType::Scalar Index;
|
||||
typedef typename IndicesType::Scalar StorageIndexType;
|
||||
typedef typename IndicesType::Index Index;
|
||||
|
||||
inline TranspositionsWrapper(IndicesType& a_indices)
|
||||
: m_indices(a_indices)
|
||||
@@ -363,24 +371,25 @@ struct transposition_matrix_product_retval
|
||||
{
|
||||
typedef typename remove_all<typename MatrixType::Nested>::type MatrixTypeNestedCleaned;
|
||||
typedef typename TranspositionType::Index Index;
|
||||
typedef typename TranspositionType::StorageIndexType StorageIndexType;
|
||||
|
||||
transposition_matrix_product_retval(const TranspositionType& tr, const MatrixType& matrix)
|
||||
: m_transpositions(tr), m_matrix(matrix)
|
||||
{}
|
||||
|
||||
inline int rows() const { return m_matrix.rows(); }
|
||||
inline int cols() const { return m_matrix.cols(); }
|
||||
inline Index rows() const { return m_matrix.rows(); }
|
||||
inline Index cols() const { return m_matrix.cols(); }
|
||||
|
||||
template<typename Dest> inline void evalTo(Dest& dst) const
|
||||
{
|
||||
const int size = m_transpositions.size();
|
||||
Index j = 0;
|
||||
const Index size = m_transpositions.size();
|
||||
StorageIndexType j = 0;
|
||||
|
||||
if(!(is_same<MatrixTypeNestedCleaned,Dest>::value && extract_data(dst) == extract_data(m_matrix)))
|
||||
dst = m_matrix;
|
||||
|
||||
for(int k=(Transposed?size-1:0) ; Transposed?k>=0:k<size ; Transposed?--k:++k)
|
||||
if((j=m_transpositions.coeff(k))!=k)
|
||||
for(Index k=(Transposed?size-1:0) ; Transposed?k>=0:k<size ; Transposed?--k:++k)
|
||||
if(Index(j=m_transpositions.coeff(k))!=k)
|
||||
{
|
||||
if(Side==OnTheLeft)
|
||||
dst.row(k).swap(dst.row(j));
|
||||
|
||||
@@ -44,29 +44,39 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
|
||||
typedef typename internal::traits<Derived>::DenseMatrixType DenseMatrixType;
|
||||
typedef DenseMatrixType DenseType;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline TriangularBase() { eigen_assert(!((Mode&UnitDiag) && (Mode&ZeroDiag))); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index rows() const { return derived().rows(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index cols() const { return derived().cols(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index outerStride() const { return derived().outerStride(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index innerStride() const { return derived().innerStride(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar coeff(Index row, Index col) const { return derived().coeff(row,col); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index row, Index col) { return derived().coeffRef(row,col); }
|
||||
|
||||
/** \see MatrixBase::copyCoeff(row,col)
|
||||
*/
|
||||
template<typename Other>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void copyCoeff(Index row, Index col, Other& other)
|
||||
{
|
||||
derived().coeffRef(row, col) = other.coeff(row, col);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar operator()(Index row, Index col) const
|
||||
{
|
||||
check_coordinates(row, col);
|
||||
return coeff(row,col);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& operator()(Index row, Index col)
|
||||
{
|
||||
check_coordinates(row, col);
|
||||
@@ -74,15 +84,20 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
|
||||
}
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Derived& derived() const { return *static_cast<const Derived*>(this); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Derived& derived() { return *static_cast<Derived*>(this); }
|
||||
#endif // not EIGEN_PARSED_BY_DOXYGEN
|
||||
|
||||
template<typename DenseDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void evalTo(MatrixBase<DenseDerived> &other) const;
|
||||
template<typename DenseDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void evalToLazy(MatrixBase<DenseDerived> &other) const;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseMatrixType toDenseMatrix() const
|
||||
{
|
||||
DenseMatrixType res(rows(), cols());
|
||||
@@ -189,36 +204,52 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
|
||||
| (Mode & (ZeroDiag))
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline TriangularView(const MatrixType& matrix) : m_matrix(matrix)
|
||||
{}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index rows() const { return m_matrix.rows(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index cols() const { return m_matrix.cols(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index outerStride() const { return m_matrix.outerStride(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index innerStride() const { return m_matrix.innerStride(); }
|
||||
|
||||
/** \sa MatrixBase::operator+=() */
|
||||
template<typename Other> TriangularView& operator+=(const DenseBase<Other>& other) { return *this = m_matrix + other.derived(); }
|
||||
/** \sa MatrixBase::operator+=() */
|
||||
template<typename Other>
|
||||
EIGEN_DEVICE_FUNC
|
||||
TriangularView& operator+=(const DenseBase<Other>& other) { return *this = m_matrix + other.derived(); }
|
||||
/** \sa MatrixBase::operator-=() */
|
||||
template<typename Other> TriangularView& operator-=(const DenseBase<Other>& other) { return *this = m_matrix - other.derived(); }
|
||||
template<typename Other>
|
||||
EIGEN_DEVICE_FUNC
|
||||
TriangularView& operator-=(const DenseBase<Other>& other) { return *this = m_matrix - other.derived(); }
|
||||
/** \sa MatrixBase::operator*=() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
TriangularView& operator*=(const typename internal::traits<MatrixType>::Scalar& other) { return *this = m_matrix * other; }
|
||||
/** \sa MatrixBase::operator/=() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
TriangularView& operator/=(const typename internal::traits<MatrixType>::Scalar& other) { return *this = m_matrix / other; }
|
||||
|
||||
/** \sa MatrixBase::fill() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
void fill(const Scalar& value) { setConstant(value); }
|
||||
/** \sa MatrixBase::setConstant() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
TriangularView& setConstant(const Scalar& value)
|
||||
{ return *this = MatrixType::Constant(rows(), cols(), value); }
|
||||
/** \sa MatrixBase::setZero() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
TriangularView& setZero() { return setConstant(Scalar(0)); }
|
||||
/** \sa MatrixBase::setOnes() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
TriangularView& setOnes() { return setConstant(Scalar(1)); }
|
||||
|
||||
/** \sa MatrixBase::coeff()
|
||||
* \warning the coordinates must fit into the referenced triangular part
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar coeff(Index row, Index col) const
|
||||
{
|
||||
Base::check_coordinates_internal(row, col);
|
||||
@@ -228,49 +259,62 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
|
||||
/** \sa MatrixBase::coeffRef()
|
||||
* \warning the coordinates must fit into the referenced triangular part
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index row, Index col)
|
||||
{
|
||||
Base::check_coordinates_internal(row, col);
|
||||
return m_matrix.const_cast_derived().coeffRef(row, col);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const MatrixTypeNestedCleaned& nestedExpression() const { return m_matrix; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
MatrixTypeNestedCleaned& nestedExpression() { return *const_cast<MatrixTypeNestedCleaned*>(&m_matrix); }
|
||||
|
||||
/** Assigns a triangular matrix to a triangular part of a dense matrix */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
TriangularView& operator=(const TriangularBase<OtherDerived>& other);
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
TriangularView& operator=(const MatrixBase<OtherDerived>& other);
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
TriangularView& operator=(const TriangularView& other)
|
||||
{ return *this = other.nestedExpression(); }
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void lazyAssign(const TriangularBase<OtherDerived>& other);
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void lazyAssign(const MatrixBase<OtherDerived>& other);
|
||||
|
||||
/** \sa MatrixBase::conjugate() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline TriangularView<MatrixConjugateReturnType,Mode> conjugate()
|
||||
{ return m_matrix.conjugate(); }
|
||||
/** \sa MatrixBase::conjugate() const */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const TriangularView<MatrixConjugateReturnType,Mode> conjugate() const
|
||||
{ return m_matrix.conjugate(); }
|
||||
|
||||
/** \sa MatrixBase::adjoint() const */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const TriangularView<const typename MatrixType::AdjointReturnType,TransposeMode> adjoint() const
|
||||
{ return m_matrix.adjoint(); }
|
||||
|
||||
/** \sa MatrixBase::transpose() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline TriangularView<Transpose<MatrixType>,TransposeMode> transpose()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
|
||||
return m_matrix.const_cast_derived().transpose();
|
||||
}
|
||||
/** \sa MatrixBase::transpose() const */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const TriangularView<Transpose<MatrixType>,TransposeMode> transpose() const
|
||||
{
|
||||
return m_matrix.transpose();
|
||||
@@ -278,74 +322,53 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
|
||||
|
||||
/** Efficient triangular matrix times vector/matrix product */
|
||||
template<typename OtherDerived>
|
||||
TriangularProduct<Mode,true,MatrixType,false,OtherDerived, OtherDerived::IsVectorAtCompileTime>
|
||||
EIGEN_DEVICE_FUNC
|
||||
TriangularProduct<Mode, true, MatrixType, false, OtherDerived, OtherDerived::ColsAtCompileTime==1>
|
||||
operator*(const MatrixBase<OtherDerived>& rhs) const
|
||||
{
|
||||
return TriangularProduct
|
||||
<Mode,true,MatrixType,false,OtherDerived,OtherDerived::IsVectorAtCompileTime>
|
||||
<Mode, true, MatrixType, false, OtherDerived, OtherDerived::ColsAtCompileTime==1>
|
||||
(m_matrix, rhs.derived());
|
||||
}
|
||||
|
||||
/** Efficient vector/matrix times triangular matrix product */
|
||||
template<typename OtherDerived> friend
|
||||
TriangularProduct<Mode,false,OtherDerived,OtherDerived::IsVectorAtCompileTime,MatrixType,false>
|
||||
EIGEN_DEVICE_FUNC
|
||||
TriangularProduct<Mode, false, OtherDerived, OtherDerived::RowsAtCompileTime==1, MatrixType, false>
|
||||
operator*(const MatrixBase<OtherDerived>& lhs, const TriangularView& rhs)
|
||||
{
|
||||
return TriangularProduct
|
||||
<Mode,false,OtherDerived,OtherDerived::IsVectorAtCompileTime,MatrixType,false>
|
||||
<Mode, false, OtherDerived, OtherDerived::RowsAtCompileTime==1, MatrixType, false>
|
||||
(lhs.derived(),rhs.m_matrix);
|
||||
}
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
template<typename OtherDerived>
|
||||
struct eigen2_product_return_type
|
||||
{
|
||||
typedef typename TriangularView<MatrixType,Mode>::DenseMatrixType DenseMatrixType;
|
||||
typedef typename OtherDerived::PlainObject::DenseType OtherPlainObject;
|
||||
typedef typename ProductReturnType<DenseMatrixType, OtherPlainObject>::Type ProdRetType;
|
||||
typedef typename ProdRetType::PlainObject type;
|
||||
};
|
||||
template<typename OtherDerived>
|
||||
const typename eigen2_product_return_type<OtherDerived>::type
|
||||
operator*(const EigenBase<OtherDerived>& rhs) const
|
||||
{
|
||||
typename OtherDerived::PlainObject::DenseType rhsPlainObject;
|
||||
rhs.evalTo(rhsPlainObject);
|
||||
return this->toDenseMatrix() * rhsPlainObject;
|
||||
}
|
||||
template<typename OtherMatrixType>
|
||||
bool isApprox(const TriangularView<OtherMatrixType, Mode>& other, typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) const
|
||||
{
|
||||
return this->toDenseMatrix().isApprox(other.toDenseMatrix(), precision);
|
||||
}
|
||||
template<typename OtherDerived>
|
||||
bool isApprox(const MatrixBase<OtherDerived>& other, typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) const
|
||||
{
|
||||
return this->toDenseMatrix().isApprox(other, precision);
|
||||
}
|
||||
#endif // EIGEN2_SUPPORT
|
||||
|
||||
template<int Side, typename Other>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const internal::triangular_solve_retval<Side,TriangularView, Other>
|
||||
solve(const MatrixBase<Other>& other) const;
|
||||
|
||||
template<int Side, typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void solveInPlace(const MatrixBase<OtherDerived>& other) const;
|
||||
|
||||
template<typename Other>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const internal::triangular_solve_retval<OnTheLeft,TriangularView, Other>
|
||||
solve(const MatrixBase<Other>& other) const
|
||||
{ return solve<OnTheLeft>(other); }
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void solveInPlace(const MatrixBase<OtherDerived>& other) const
|
||||
{ return solveInPlace<OnTheLeft>(other); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const SelfAdjointView<MatrixTypeNestedNonRef,Mode> selfadjointView() const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((Mode&UnitDiag)==0,PROGRAMMING_ERROR);
|
||||
return SelfAdjointView<MatrixTypeNestedNonRef,Mode>(m_matrix);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
SelfAdjointView<MatrixTypeNestedNonRef,Mode> selfadjointView()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((Mode&UnitDiag)==0,PROGRAMMING_ERROR);
|
||||
@@ -353,18 +376,21 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
|
||||
}
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void swap(TriangularBase<OtherDerived> const & other)
|
||||
{
|
||||
TriangularView<SwapWrapper<MatrixType>,Mode>(const_cast<MatrixType&>(m_matrix)).lazyAssign(other.derived());
|
||||
}
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void swap(MatrixBase<OtherDerived> const & other)
|
||||
{
|
||||
SwapWrapper<MatrixType> swaper(const_cast<MatrixType&>(m_matrix));
|
||||
TriangularView<SwapWrapper<MatrixType>,Mode>(swaper).lazyAssign(other.derived());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
Scalar determinant() const
|
||||
{
|
||||
if (Mode & UnitDiag)
|
||||
@@ -377,6 +403,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
|
||||
|
||||
// TODO simplify the following:
|
||||
template<typename ProductDerived, typename Lhs, typename Rhs>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE TriangularView& operator=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
|
||||
{
|
||||
setZero();
|
||||
@@ -384,12 +411,14 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
|
||||
}
|
||||
|
||||
template<typename ProductDerived, typename Lhs, typename Rhs>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE TriangularView& operator+=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
|
||||
{
|
||||
return assignProduct(other,1);
|
||||
}
|
||||
|
||||
template<typename ProductDerived, typename Lhs, typename Rhs>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE TriangularView& operator-=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
|
||||
{
|
||||
return assignProduct(other,-1);
|
||||
@@ -397,6 +426,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
|
||||
|
||||
|
||||
template<typename ProductDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE TriangularView& operator=(const ScaledProduct<ProductDerived>& other)
|
||||
{
|
||||
setZero();
|
||||
@@ -404,12 +434,14 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
|
||||
}
|
||||
|
||||
template<typename ProductDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE TriangularView& operator+=(const ScaledProduct<ProductDerived>& other)
|
||||
{
|
||||
return assignProduct(other,other.alpha());
|
||||
}
|
||||
|
||||
template<typename ProductDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE TriangularView& operator-=(const ScaledProduct<ProductDerived>& other)
|
||||
{
|
||||
return assignProduct(other,-other.alpha());
|
||||
@@ -418,6 +450,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
|
||||
protected:
|
||||
|
||||
template<typename ProductDerived, typename Lhs, typename Rhs>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE TriangularView& assignProduct(const ProductBase<ProductDerived, Lhs,Rhs>& prod, const Scalar& alpha);
|
||||
|
||||
MatrixTypeNested m_matrix;
|
||||
@@ -439,6 +472,7 @@ struct triangular_assignment_selector
|
||||
|
||||
typedef typename Derived1::Scalar Scalar;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
triangular_assignment_selector<Derived1, Derived2, Mode, UnrollCount-1, ClearOpposite>::run(dst, src);
|
||||
@@ -467,6 +501,7 @@ struct triangular_assignment_selector
|
||||
template<typename Derived1, typename Derived2, unsigned int Mode, bool ClearOpposite>
|
||||
struct triangular_assignment_selector<Derived1, Derived2, Mode, 0, ClearOpposite>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(Derived1 &, const Derived2 &) {}
|
||||
};
|
||||
|
||||
@@ -475,6 +510,7 @@ struct triangular_assignment_selector<Derived1, Derived2, Upper, Dynamic, ClearO
|
||||
{
|
||||
typedef typename Derived1::Index Index;
|
||||
typedef typename Derived1::Scalar Scalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
for(Index j = 0; j < dst.cols(); ++j)
|
||||
@@ -493,6 +529,7 @@ template<typename Derived1, typename Derived2, bool ClearOpposite>
|
||||
struct triangular_assignment_selector<Derived1, Derived2, Lower, Dynamic, ClearOpposite>
|
||||
{
|
||||
typedef typename Derived1::Index Index;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
for(Index j = 0; j < dst.cols(); ++j)
|
||||
@@ -512,6 +549,7 @@ struct triangular_assignment_selector<Derived1, Derived2, StrictlyUpper, Dynamic
|
||||
{
|
||||
typedef typename Derived1::Index Index;
|
||||
typedef typename Derived1::Scalar Scalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
for(Index j = 0; j < dst.cols(); ++j)
|
||||
@@ -530,6 +568,7 @@ template<typename Derived1, typename Derived2, bool ClearOpposite>
|
||||
struct triangular_assignment_selector<Derived1, Derived2, StrictlyLower, Dynamic, ClearOpposite>
|
||||
{
|
||||
typedef typename Derived1::Index Index;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
for(Index j = 0; j < dst.cols(); ++j)
|
||||
@@ -548,6 +587,7 @@ template<typename Derived1, typename Derived2, bool ClearOpposite>
|
||||
struct triangular_assignment_selector<Derived1, Derived2, UnitUpper, Dynamic, ClearOpposite>
|
||||
{
|
||||
typedef typename Derived1::Index Index;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
for(Index j = 0; j < dst.cols(); ++j)
|
||||
@@ -568,6 +608,7 @@ template<typename Derived1, typename Derived2, bool ClearOpposite>
|
||||
struct triangular_assignment_selector<Derived1, Derived2, UnitLower, Dynamic, ClearOpposite>
|
||||
{
|
||||
typedef typename Derived1::Index Index;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
for(Index j = 0; j < dst.cols(); ++j)
|
||||
@@ -710,41 +751,6 @@ void TriangularBase<Derived>::evalToLazy(MatrixBase<DenseDerived> &other) const
|
||||
* Implementation of MatrixBase methods
|
||||
***************************************************************************/
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
|
||||
// implementation of part<>(), including the SelfAdjoint case.
|
||||
|
||||
namespace internal {
|
||||
template<typename MatrixType, unsigned int Mode>
|
||||
struct eigen2_part_return_type
|
||||
{
|
||||
typedef TriangularView<MatrixType, Mode> type;
|
||||
};
|
||||
|
||||
template<typename MatrixType>
|
||||
struct eigen2_part_return_type<MatrixType, SelfAdjoint>
|
||||
{
|
||||
typedef SelfAdjointView<MatrixType, Upper> type;
|
||||
};
|
||||
}
|
||||
|
||||
/** \deprecated use MatrixBase::triangularView() */
|
||||
template<typename Derived>
|
||||
template<unsigned int Mode>
|
||||
const typename internal::eigen2_part_return_type<Derived, Mode>::type MatrixBase<Derived>::part() const
|
||||
{
|
||||
return derived();
|
||||
}
|
||||
|
||||
/** \deprecated use MatrixBase::triangularView() */
|
||||
template<typename Derived>
|
||||
template<unsigned int Mode>
|
||||
typename internal::eigen2_part_return_type<Derived, Mode>::type MatrixBase<Derived>::part()
|
||||
{
|
||||
return derived();
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \returns an expression of a triangular view extracted from the current matrix
|
||||
*
|
||||
|
||||
@@ -72,6 +72,7 @@ template<typename VectorType, int Size> class VectorBlock
|
||||
|
||||
/** Dynamic-size constructor
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline VectorBlock(VectorType& vector, Index start, Index size)
|
||||
: Base(vector,
|
||||
IsColVector ? start : 0, IsColVector ? 0 : start,
|
||||
@@ -82,6 +83,7 @@ template<typename VectorType, int Size> class VectorBlock
|
||||
|
||||
/** Fixed-size constructor
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline VectorBlock(VectorType& vector, Index start)
|
||||
: Base(vector, IsColVector ? start : 0, IsColVector ? 0 : start)
|
||||
{
|
||||
|
||||
@@ -50,7 +50,7 @@ struct traits<PartialReduxExpr<MatrixType, MemberOp, Direction> >
|
||||
MaxColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::MaxColsAtCompileTime,
|
||||
Flags0 = (unsigned int)_MatrixTypeNested::Flags & HereditaryBits,
|
||||
Flags = (Flags0 & ~RowMajorBit) | (RowsAtCompileTime == 1 ? RowMajorBit : 0),
|
||||
TraversalSize = Direction==Vertical ? RowsAtCompileTime : ColsAtCompileTime
|
||||
TraversalSize = Direction==Vertical ? MatrixType::RowsAtCompileTime : MatrixType::ColsAtCompileTime
|
||||
};
|
||||
#if EIGEN_GNUC_AT_LEAST(3,4)
|
||||
typedef typename MemberOp::template Cost<InputScalar,int(TraversalSize)> CostOpType;
|
||||
@@ -58,7 +58,8 @@ struct traits<PartialReduxExpr<MatrixType, MemberOp, Direction> >
|
||||
typedef typename MemberOp::template Cost<InputScalar,TraversalSize> CostOpType;
|
||||
#endif
|
||||
enum {
|
||||
CoeffReadCost = TraversalSize * traits<_MatrixTypeNested>::CoeffReadCost + int(CostOpType::value)
|
||||
CoeffReadCost = TraversalSize==Dynamic ? Dynamic
|
||||
: TraversalSize * traits<_MatrixTypeNested>::CoeffReadCost + int(CostOpType::value)
|
||||
};
|
||||
};
|
||||
}
|
||||
@@ -301,6 +302,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
|
||||
/** \returns a row (or column) vector expression of the squared norm
|
||||
* of each column (or row) of the referenced expression.
|
||||
* This is a vector with real entries, even if the original matrix has complex entries.
|
||||
*
|
||||
* Example: \include PartialRedux_squaredNorm.cpp
|
||||
* Output: \verbinclude PartialRedux_squaredNorm.out
|
||||
@@ -311,6 +313,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
|
||||
/** \returns a row (or column) vector expression of the norm
|
||||
* of each column (or row) of the referenced expression.
|
||||
* This is a vector with real entries, even if the original matrix has complex entries.
|
||||
*
|
||||
* Example: \include PartialRedux_norm.cpp
|
||||
* Output: \verbinclude PartialRedux_norm.out
|
||||
@@ -322,7 +325,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
|
||||
/** \returns a row (or column) vector expression of the norm
|
||||
* of each column (or row) of the referenced expression, using
|
||||
* blue's algorithm.
|
||||
* Blue's algorithm.
|
||||
* This is a vector with real entries, even if the original matrix has complex entries.
|
||||
*
|
||||
* \sa DenseBase::blueNorm() */
|
||||
const typename ReturnType<internal::member_blueNorm,RealScalar>::Type blueNorm() const
|
||||
@@ -332,6 +336,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
/** \returns a row (or column) vector expression of the norm
|
||||
* of each column (or row) of the referenced expression, avoiding
|
||||
* underflow and overflow.
|
||||
* This is a vector with real entries, even if the original matrix has complex entries.
|
||||
*
|
||||
* \sa DenseBase::stableNorm() */
|
||||
const typename ReturnType<internal::member_stableNorm,RealScalar>::Type stableNorm() const
|
||||
@@ -341,6 +346,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
/** \returns a row (or column) vector expression of the norm
|
||||
* of each column (or row) of the referenced expression, avoiding
|
||||
* underflow and overflow using a concatenation of hypot() calls.
|
||||
* This is a vector with real entries, even if the original matrix has complex entries.
|
||||
*
|
||||
* \sa DenseBase::hypotNorm() */
|
||||
const typename ReturnType<internal::member_hypotNorm,RealScalar>::Type hypotNorm() const
|
||||
@@ -365,6 +371,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
|
||||
/** \returns a row (or column) vector expression representing
|
||||
* whether \b all coefficients of each respective column (or row) are \c true.
|
||||
* This expression can be assigned to a vector with entries of type \c bool.
|
||||
*
|
||||
* \sa DenseBase::all() */
|
||||
const typename ReturnType<internal::member_all>::Type all() const
|
||||
@@ -372,6 +379,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
|
||||
/** \returns a row (or column) vector expression representing
|
||||
* whether \b at \b least one coefficient of each respective column (or row) is \c true.
|
||||
* This expression can be assigned to a vector with entries of type \c bool.
|
||||
*
|
||||
* \sa DenseBase::any() */
|
||||
const typename ReturnType<internal::member_any>::Type any() const
|
||||
@@ -379,6 +387,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
|
||||
/** \returns a row (or column) vector expression representing
|
||||
* the number of \c true coefficients of each respective column (or row).
|
||||
* This expression can be assigned to a vector whose entries have the same type as is used to
|
||||
* index entries of the original matrix; for dense matrices, this is \c std::ptrdiff_t .
|
||||
*
|
||||
* Example: \include PartialRedux_count.cpp
|
||||
* Output: \verbinclude PartialRedux_count.out
|
||||
@@ -550,9 +560,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
|
||||
/////////// Geometry module ///////////
|
||||
|
||||
#if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS
|
||||
Homogeneous<ExpressionType,Direction> homogeneous() const;
|
||||
#endif
|
||||
|
||||
typedef typename ExpressionType::PlainObject CrossReturnType;
|
||||
template<typename OtherDerived>
|
||||
|
||||
@@ -194,7 +194,7 @@ DenseBase<Derived>::minCoeff(IndexType* index) const
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
internal::min_coeff_visitor<Derived> minVisitor;
|
||||
this->visit(minVisitor);
|
||||
*index = (RowsAtCompileTime==1) ? minVisitor.col : minVisitor.row;
|
||||
*index = IndexType((RowsAtCompileTime==1) ? minVisitor.col : minVisitor.row);
|
||||
return minVisitor.res;
|
||||
}
|
||||
|
||||
|
||||
6
Eigen/src/Core/arch/AVX/CMakeLists.txt
Normal file
6
Eigen/src/Core/arch/AVX/CMakeLists.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
FILE(GLOB Eigen_Core_arch_AVX_SRCS "*.h")
|
||||
|
||||
INSTALL(FILES
|
||||
${Eigen_Core_arch_AVX_SRCS}
|
||||
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/AVX COMPONENT Devel
|
||||
)
|
||||
463
Eigen/src/Core/arch/AVX/Complex.h
Normal file
463
Eigen/src/Core/arch/AVX/Complex.h
Normal file
@@ -0,0 +1,463 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2014 Benoit Steiner (benoit.steiner.goog@gmail.com)
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_COMPLEX_AVX_H
|
||||
#define EIGEN_COMPLEX_AVX_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
//---------- float ----------
|
||||
struct Packet4cf
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet4cf() {}
|
||||
EIGEN_STRONG_INLINE explicit Packet4cf(const __m256& a) : v(a) {}
|
||||
__m256 v;
|
||||
};
|
||||
|
||||
template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
||||
{
|
||||
typedef Packet4cf type;
|
||||
typedef Packet2cf half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size = 4,
|
||||
HasHalfPacket = 1,
|
||||
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasMul = 1,
|
||||
HasDiv = 1,
|
||||
HasNegate = 1,
|
||||
HasAbs = 0,
|
||||
HasAbs2 = 0,
|
||||
HasMin = 0,
|
||||
HasMax = 0,
|
||||
HasSetLinear = 0
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct unpacket_traits<Packet4cf> { typedef std::complex<float> type; enum {size=4}; typedef Packet2cf half; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf padd<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_add_ps(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf psub<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_sub_ps(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf pnegate(const Packet4cf& a)
|
||||
{
|
||||
return Packet4cf(pnegate(a.v));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf pconj(const Packet4cf& a)
|
||||
{
|
||||
const __m256 mask = _mm256_castsi256_ps(_mm256_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000,0x00000000,0x80000000,0x00000000,0x80000000));
|
||||
return Packet4cf(_mm256_xor_ps(a.v,mask));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf pmul<Packet4cf>(const Packet4cf& a, const Packet4cf& b)
|
||||
{
|
||||
__m256 tmp1 = _mm256_mul_ps(_mm256_moveldup_ps(a.v), b.v);
|
||||
__m256 tmp2 = _mm256_mul_ps(_mm256_movehdup_ps(a.v), _mm256_permute_ps(b.v, _MM_SHUFFLE(2,3,0,1)));
|
||||
__m256 result = _mm256_addsub_ps(tmp1, tmp2);
|
||||
return Packet4cf(result);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf pand <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_and_ps(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf por <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_or_ps(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf pxor <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_xor_ps(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf pandnot<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_andnot_ps(a.v,b.v)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf pload <Packet4cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet4cf(pload<Packet8f>(&numext::real_ref(*from))); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf ploadu<Packet4cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet4cf(ploadu<Packet8f>(&numext::real_ref(*from))); }
|
||||
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf pset1<Packet4cf>(const std::complex<float>& from)
|
||||
{
|
||||
return Packet4cf(_mm256_castpd_ps(_mm256_broadcast_sd((const double*)(const void*)&from)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf ploaddup<Packet4cf>(const std::complex<float>* from)
|
||||
{
|
||||
// FIXME The following might be optimized using _mm256_movedup_pd
|
||||
Packet2cf a = ploaddup<Packet2cf>(from);
|
||||
Packet2cf b = ploaddup<Packet2cf>(from+1);
|
||||
return Packet4cf(_mm256_insertf128_ps(_mm256_castps128_ps256(a.v), b.v, 1));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float>* to, const Packet4cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float>* to, const Packet4cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), from.v); }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet4cf pgather<std::complex<float>, Packet4cf>(const std::complex<float>* from, DenseIndex stride)
|
||||
{
|
||||
return Packet4cf(_mm256_set_ps(std::imag(from[3*stride]), std::real(from[3*stride]),
|
||||
std::imag(from[2*stride]), std::real(from[2*stride]),
|
||||
std::imag(from[1*stride]), std::real(from[1*stride]),
|
||||
std::imag(from[0*stride]), std::real(from[0*stride])));
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet4cf>(std::complex<float>* to, const Packet4cf& from, DenseIndex stride)
|
||||
{
|
||||
__m128 low = _mm256_extractf128_ps(from.v, 0);
|
||||
to[stride*0] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(low, low, 0)),
|
||||
_mm_cvtss_f32(_mm_shuffle_ps(low, low, 1)));
|
||||
to[stride*1] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(low, low, 2)),
|
||||
_mm_cvtss_f32(_mm_shuffle_ps(low, low, 3)));
|
||||
|
||||
__m128 high = _mm256_extractf128_ps(from.v, 1);
|
||||
to[stride*2] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(high, high, 0)),
|
||||
_mm_cvtss_f32(_mm_shuffle_ps(high, high, 1)));
|
||||
to[stride*3] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(high, high, 2)),
|
||||
_mm_cvtss_f32(_mm_shuffle_ps(high, high, 3)));
|
||||
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet4cf>(const Packet4cf& a)
|
||||
{
|
||||
return pfirst(Packet2cf(_mm256_castps256_ps128(a.v)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf preverse(const Packet4cf& a) {
|
||||
__m128 low = _mm256_extractf128_ps(a.v, 0);
|
||||
__m128 high = _mm256_extractf128_ps(a.v, 1);
|
||||
__m128d lowd = _mm_castps_pd(low);
|
||||
__m128d highd = _mm_castps_pd(high);
|
||||
low = _mm_castpd_ps(_mm_shuffle_pd(lowd,lowd,0x1));
|
||||
high = _mm_castpd_ps(_mm_shuffle_pd(highd,highd,0x1));
|
||||
__m256 result = _mm256_setzero_ps();
|
||||
result = _mm256_insertf128_ps(result, low, 1);
|
||||
result = _mm256_insertf128_ps(result, high, 0);
|
||||
return Packet4cf(result);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet4cf>(const Packet4cf& a)
|
||||
{
|
||||
return predux(padd(Packet2cf(_mm256_extractf128_ps(a.v,0)),
|
||||
Packet2cf(_mm256_extractf128_ps(a.v,1))));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf preduxp<Packet4cf>(const Packet4cf* vecs)
|
||||
{
|
||||
Packet8f t0 = _mm256_shuffle_ps(vecs[0].v, vecs[0].v, _MM_SHUFFLE(3, 1, 2 ,0));
|
||||
Packet8f t1 = _mm256_shuffle_ps(vecs[1].v, vecs[1].v, _MM_SHUFFLE(3, 1, 2 ,0));
|
||||
t0 = _mm256_hadd_ps(t0,t1);
|
||||
Packet8f t2 = _mm256_shuffle_ps(vecs[2].v, vecs[2].v, _MM_SHUFFLE(3, 1, 2 ,0));
|
||||
Packet8f t3 = _mm256_shuffle_ps(vecs[3].v, vecs[3].v, _MM_SHUFFLE(3, 1, 2 ,0));
|
||||
t2 = _mm256_hadd_ps(t2,t3);
|
||||
|
||||
t1 = _mm256_permute2f128_ps(t0,t2, 0 + (2<<4));
|
||||
t3 = _mm256_permute2f128_ps(t0,t2, 1 + (3<<4));
|
||||
|
||||
return Packet4cf(_mm256_add_ps(t1,t3));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet4cf>(const Packet4cf& a)
|
||||
{
|
||||
return predux_mul(pmul(Packet2cf(_mm256_extractf128_ps(a.v, 0)),
|
||||
Packet2cf(_mm256_extractf128_ps(a.v, 1))));
|
||||
}
|
||||
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet4cf>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Packet4cf& first, const Packet4cf& second)
|
||||
{
|
||||
if (Offset==0) return;
|
||||
palign_impl<Offset*2,Packet8f>::run(first.v, second.v);
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet4cf, Packet4cf, false,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) const
|
||||
{
|
||||
return internal::pmul(a, pconj(b));
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet4cf, Packet4cf, true,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) const
|
||||
{
|
||||
return internal::pmul(pconj(a), b);
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet4cf, Packet4cf, true,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) const
|
||||
{
|
||||
return pconj(internal::pmul(a, b));
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet8f, Packet4cf, false,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet8f& x, const Packet4cf& y, const Packet4cf& c) const
|
||||
{ return padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet8f& x, const Packet4cf& y) const
|
||||
{ return Packet4cf(Eigen::internal::pmul(x, y.v)); }
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet4cf, Packet8f, false,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet8f& y, const Packet4cf& c) const
|
||||
{ return padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& x, const Packet8f& y) const
|
||||
{ return Packet4cf(Eigen::internal::pmul(x.v, y)); }
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf pdiv<Packet4cf>(const Packet4cf& a, const Packet4cf& b)
|
||||
{
|
||||
Packet4cf num = pmul(a, pconj(b));
|
||||
__m256 tmp = _mm256_mul_ps(b.v, b.v);
|
||||
__m256 tmp2 = _mm256_shuffle_ps(tmp,tmp,0xB1);
|
||||
__m256 denom = _mm256_add_ps(tmp, tmp2);
|
||||
return Packet4cf(_mm256_div_ps(num.v, denom));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf pcplxflip<Packet4cf>(const Packet4cf& x)
|
||||
{
|
||||
return Packet4cf(_mm256_shuffle_ps(x.v, x.v, _MM_SHUFFLE(2, 3, 0 ,1)));
|
||||
}
|
||||
|
||||
//---------- double ----------
|
||||
struct Packet2cd
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cd() {}
|
||||
EIGEN_STRONG_INLINE explicit Packet2cd(const __m256d& a) : v(a) {}
|
||||
__m256d v;
|
||||
};
|
||||
|
||||
template<> struct packet_traits<std::complex<double> > : default_packet_traits
|
||||
{
|
||||
typedef Packet2cd type;
|
||||
typedef Packet1cd half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 0,
|
||||
size = 2,
|
||||
HasHalfPacket = 1,
|
||||
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasMul = 1,
|
||||
HasDiv = 1,
|
||||
HasNegate = 1,
|
||||
HasAbs = 0,
|
||||
HasAbs2 = 0,
|
||||
HasMin = 0,
|
||||
HasMax = 0,
|
||||
HasSetLinear = 0
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct unpacket_traits<Packet2cd> { typedef std::complex<double> type; enum {size=2}; typedef Packet1cd half; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd padd<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_add_pd(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd psub<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_sub_pd(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd pnegate(const Packet2cd& a) { return Packet2cd(pnegate(a.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd pconj(const Packet2cd& a)
|
||||
{
|
||||
const __m256d mask = _mm256_castsi256_pd(_mm256_set_epi32(0x80000000,0x0,0x0,0x0,0x80000000,0x0,0x0,0x0));
|
||||
return Packet2cd(_mm256_xor_pd(a.v,mask));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd pmul<Packet2cd>(const Packet2cd& a, const Packet2cd& b)
|
||||
{
|
||||
__m256d tmp1 = _mm256_shuffle_pd(a.v,a.v,0x0);
|
||||
__m256d even = _mm256_mul_pd(tmp1, b.v);
|
||||
__m256d tmp2 = _mm256_shuffle_pd(a.v,a.v,0xF);
|
||||
__m256d tmp3 = _mm256_shuffle_pd(b.v,b.v,0x5);
|
||||
__m256d odd = _mm256_mul_pd(tmp2, tmp3);
|
||||
return Packet2cd(_mm256_addsub_pd(even, odd));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd pand <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_and_pd(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd por <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_or_pd(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd pxor <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_xor_pd(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd pandnot<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_andnot_pd(a.v,b.v)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd pload <Packet2cd>(const std::complex<double>* from)
|
||||
{ EIGEN_DEBUG_ALIGNED_LOAD return Packet2cd(pload<Packet4d>((const double*)from)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd ploadu<Packet2cd>(const std::complex<double>* from)
|
||||
{ EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cd(ploadu<Packet4d>((const double*)from)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd pset1<Packet2cd>(const std::complex<double>& from)
|
||||
{
|
||||
// in case casting to a __m128d* is really not safe, then we can still fallback to this version: (much slower though)
|
||||
// return Packet2cd(_mm256_loadu2_m128d((const double*)&from,(const double*)&from));
|
||||
return Packet2cd(_mm256_broadcast_pd((const __m128d*)(const void*)&from));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd ploaddup<Packet2cd>(const std::complex<double>* from) { return pset1<Packet2cd>(*from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet2cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet2cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet2cd pgather<std::complex<double>, Packet2cd>(const std::complex<double>* from, DenseIndex stride)
|
||||
{
|
||||
return Packet2cd(_mm256_set_pd(std::imag(from[1*stride]), std::real(from[1*stride]),
|
||||
std::imag(from[0*stride]), std::real(from[0*stride])));
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet2cd>(std::complex<double>* to, const Packet2cd& from, DenseIndex stride)
|
||||
{
|
||||
__m128d low = _mm256_extractf128_pd(from.v, 0);
|
||||
to[stride*0] = std::complex<double>(_mm_cvtsd_f64(low), _mm_cvtsd_f64(_mm_shuffle_pd(low, low, 1)));
|
||||
__m128d high = _mm256_extractf128_pd(from.v, 1);
|
||||
to[stride*1] = std::complex<double>(_mm_cvtsd_f64(high), _mm_cvtsd_f64(_mm_shuffle_pd(high, high, 1)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet2cd>(const Packet2cd& a)
|
||||
{
|
||||
__m128d low = _mm256_extractf128_pd(a.v, 0);
|
||||
EIGEN_ALIGN16 double res[2];
|
||||
_mm_store_pd(res, low);
|
||||
return std::complex<double>(res[0],res[1]);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd preverse(const Packet2cd& a) {
|
||||
__m256d result = _mm256_permute2f128_pd(a.v, a.v, 1);
|
||||
return Packet2cd(result);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet2cd>(const Packet2cd& a)
|
||||
{
|
||||
return predux(padd(Packet1cd(_mm256_extractf128_pd(a.v,0)),
|
||||
Packet1cd(_mm256_extractf128_pd(a.v,1))));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd preduxp<Packet2cd>(const Packet2cd* vecs)
|
||||
{
|
||||
Packet4d t0 = _mm256_permute2f128_pd(vecs[0].v,vecs[1].v, 0 + (2<<4));
|
||||
Packet4d t1 = _mm256_permute2f128_pd(vecs[0].v,vecs[1].v, 1 + (3<<4));
|
||||
|
||||
return Packet2cd(_mm256_add_pd(t0,t1));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet2cd>(const Packet2cd& a)
|
||||
{
|
||||
return predux(pmul(Packet1cd(_mm256_extractf128_pd(a.v,0)),
|
||||
Packet1cd(_mm256_extractf128_pd(a.v,1))));
|
||||
}
|
||||
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet2cd>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Packet2cd& first, const Packet2cd& second)
|
||||
{
|
||||
if (Offset==0) return;
|
||||
palign_impl<Offset*2,Packet4d>::run(first.v, second.v);
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet2cd, Packet2cd, false,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) const
|
||||
{
|
||||
return internal::pmul(a, pconj(b));
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet2cd, Packet2cd, true,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) const
|
||||
{
|
||||
return internal::pmul(pconj(a), b);
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet2cd, Packet2cd, true,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) const
|
||||
{
|
||||
return pconj(internal::pmul(a, b));
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet4d, Packet2cd, false,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet4d& x, const Packet2cd& y, const Packet2cd& c) const
|
||||
{ return padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet4d& x, const Packet2cd& y) const
|
||||
{ return Packet2cd(Eigen::internal::pmul(x, y.v)); }
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet2cd, Packet4d, false,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet4d& y, const Packet2cd& c) const
|
||||
{ return padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& x, const Packet4d& y) const
|
||||
{ return Packet2cd(Eigen::internal::pmul(x.v, y)); }
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd pdiv<Packet2cd>(const Packet2cd& a, const Packet2cd& b)
|
||||
{
|
||||
Packet2cd num = pmul(a, pconj(b));
|
||||
__m256d tmp = _mm256_mul_pd(b.v, b.v);
|
||||
__m256d denom = _mm256_hadd_pd(tmp, tmp);
|
||||
return Packet2cd(_mm256_div_pd(num.v, denom));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd pcplxflip<Packet2cd>(const Packet2cd& x)
|
||||
{
|
||||
return Packet2cd(_mm256_shuffle_pd(x.v, x.v, 0x5));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet4cf,4>& kernel) {
|
||||
__m256d P0 = _mm256_castps_pd(kernel.packet[0].v);
|
||||
__m256d P1 = _mm256_castps_pd(kernel.packet[1].v);
|
||||
__m256d P2 = _mm256_castps_pd(kernel.packet[2].v);
|
||||
__m256d P3 = _mm256_castps_pd(kernel.packet[3].v);
|
||||
|
||||
__m256d T0 = _mm256_shuffle_pd(P0, P1, 15);
|
||||
__m256d T1 = _mm256_shuffle_pd(P0, P1, 0);
|
||||
__m256d T2 = _mm256_shuffle_pd(P2, P3, 15);
|
||||
__m256d T3 = _mm256_shuffle_pd(P2, P3, 0);
|
||||
|
||||
kernel.packet[1].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T0, T2, 32));
|
||||
kernel.packet[3].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T0, T2, 49));
|
||||
kernel.packet[0].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T1, T3, 32));
|
||||
kernel.packet[2].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T1, T3, 49));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet2cd,2>& kernel) {
|
||||
__m256d tmp = _mm256_permute2f128_pd(kernel.packet[0].v, kernel.packet[1].v, 0+(2<<4));
|
||||
kernel.packet[1].v = _mm256_permute2f128_pd(kernel.packet[0].v, kernel.packet[1].v, 1+(3<<4));
|
||||
kernel.packet[0].v = tmp;
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_COMPLEX_AVX_H
|
||||
564
Eigen/src/Core/arch/AVX/PacketMath.h
Normal file
564
Eigen/src/Core/arch/AVX/PacketMath.h
Normal file
@@ -0,0 +1,564 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2014 Benoit Steiner (benoit.steiner.goog@gmail.com)
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_PACKET_MATH_AVX_H
|
||||
#define EIGEN_PACKET_MATH_AVX_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
||||
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
|
||||
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_FMA
|
||||
#ifndef EIGEN_HAS_FUSED_MADD
|
||||
#define EIGEN_HAS_FUSED_MADD 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
typedef __m256 Packet8f;
|
||||
typedef __m256i Packet8i;
|
||||
typedef __m256d Packet4d;
|
||||
|
||||
template<> struct is_arithmetic<__m256> { enum { value = true }; };
|
||||
template<> struct is_arithmetic<__m256i> { enum { value = true }; };
|
||||
template<> struct is_arithmetic<__m256d> { enum { value = true }; };
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet8f(NAME,X) \
|
||||
const Packet8f p8f_##NAME = pset1<Packet8f>(X)
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet4d(NAME,X) \
|
||||
const Packet4d p4d_##NAME = pset1<Packet4d>(X)
|
||||
|
||||
|
||||
template<> struct packet_traits<float> : default_packet_traits
|
||||
{
|
||||
typedef Packet8f type;
|
||||
typedef Packet4f half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size=8,
|
||||
HasHalfPacket = 1,
|
||||
|
||||
HasDiv = 1,
|
||||
HasSin = 0,
|
||||
HasCos = 0,
|
||||
HasLog = 0,
|
||||
HasExp = 0,
|
||||
HasSqrt = 0
|
||||
};
|
||||
};
|
||||
template<> struct packet_traits<double> : default_packet_traits
|
||||
{
|
||||
typedef Packet4d type;
|
||||
typedef Packet2d half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size=4,
|
||||
HasHalfPacket = 1,
|
||||
|
||||
HasDiv = 1,
|
||||
HasExp = 0
|
||||
};
|
||||
};
|
||||
|
||||
/* Proper support for integers is only provided by AVX2. In the meantime, we'll
|
||||
use SSE instructions and packets to deal with integers.
|
||||
template<> struct packet_traits<int> : default_packet_traits
|
||||
{
|
||||
typedef Packet8i type;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size=8
|
||||
};
|
||||
};
|
||||
*/
|
||||
|
||||
template<> struct unpacket_traits<Packet8f> { typedef float type; typedef Packet4f half; enum {size=8}; };
|
||||
template<> struct unpacket_traits<Packet4d> { typedef double type; typedef Packet2d half; enum {size=4}; };
|
||||
template<> struct unpacket_traits<Packet8i> { typedef int type; typedef Packet4i half; enum {size=8}; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pset1<Packet8f>(const float& from) { return _mm256_set1_ps(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pset1<Packet4d>(const double& from) { return _mm256_set1_pd(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet8i pset1<Packet8i>(const int& from) { return _mm256_set1_epi32(from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pload1<Packet8f>(const float* from) { return _mm256_broadcast_ss(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pload1<Packet4d>(const double* from) { return _mm256_broadcast_sd(from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f plset<float>(const float& a) { return _mm256_add_ps(_mm256_set1_ps(a), _mm256_set_ps(7.0,6.0,5.0,4.0,3.0,2.0,1.0,0.0)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d plset<double>(const double& a) { return _mm256_add_pd(_mm256_set1_pd(a), _mm256_set_pd(3.0,2.0,1.0,0.0)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f padd<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_add_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d padd<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_add_pd(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f psub<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_sub_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d psub<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_sub_pd(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pnegate(const Packet8f& a)
|
||||
{
|
||||
return _mm256_sub_ps(_mm256_set1_ps(0.0),a);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pnegate(const Packet4d& a)
|
||||
{
|
||||
return _mm256_sub_pd(_mm256_set1_pd(0.0),a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pconj(const Packet8f& a) { return a; }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pconj(const Packet4d& a) { return a; }
|
||||
template<> EIGEN_STRONG_INLINE Packet8i pconj(const Packet8i& a) { return a; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pmul<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_mul_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pmul<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_mul_pd(a,b); }
|
||||
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pdiv<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_div_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pdiv<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_div_pd(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet8i pdiv<Packet8i>(const Packet8i& /*a*/, const Packet8i& /*b*/)
|
||||
{ eigen_assert(false && "packet integer division are not supported by AVX");
|
||||
return pset1<Packet8i>(0);
|
||||
}
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_FMA
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) {
|
||||
#if defined(__clang__) || defined(__GNUC__)
|
||||
// clang stupidly generates a vfmadd213ps instruction plus some vmovaps on registers,
|
||||
// and gcc stupidly generates a vfmadd132ps instruction,
|
||||
// so let's enforce it to generate a vfmadd231ps instruction since the most common use case is to accumulate
|
||||
// the result of the product.
|
||||
Packet8f res = c;
|
||||
__asm__("vfmadd231ps %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
|
||||
return res;
|
||||
#else
|
||||
return _mm256_fmadd_ps(a,b,c);
|
||||
#endif
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) {
|
||||
#if defined(__clang__) || defined(__GNUC__)
|
||||
// see above
|
||||
Packet4d res = c;
|
||||
__asm__("vfmadd231pd %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
|
||||
return res;
|
||||
#else
|
||||
return _mm256_fmadd_pd(a,b,c);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pmin<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_min_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pmin<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_min_pd(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pmax<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_max_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pmax<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_max_pd(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pand<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_and_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pand<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_and_pd(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f por<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_or_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d por<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_or_pd(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pxor<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_xor_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pxor<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_xor_pd(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pandnot<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_andnot_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pandnot<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_andnot_pd(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pload<Packet8f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_ps(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pload<Packet4d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_pd(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet8i pload<Packet8i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_si256(reinterpret_cast<const __m256i*>(from)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f ploadu<Packet8f>(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_ps(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d ploadu<Packet4d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_pd(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet8i ploadu<Packet8i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_si256(reinterpret_cast<const __m256i*>(from)); }
|
||||
|
||||
// Loads 4 floats from memory a returns the packet {a0, a0 a1, a1, a2, a2, a3, a3}
|
||||
template<> EIGEN_STRONG_INLINE Packet8f ploaddup<Packet8f>(const float* from)
|
||||
{
|
||||
// TODO try to find a way to avoid the need of a temporary register
|
||||
// Packet8f tmp = _mm256_castps128_ps256(_mm_loadu_ps(from));
|
||||
// tmp = _mm256_insertf128_ps(tmp, _mm_movehl_ps(_mm256_castps256_ps128(tmp),_mm256_castps256_ps128(tmp)), 1);
|
||||
// return _mm256_unpacklo_ps(tmp,tmp);
|
||||
|
||||
// _mm256_insertf128_ps is very slow on Haswell, thus:
|
||||
Packet8f tmp = _mm256_broadcast_ps((const __m128*)(const void*)from);
|
||||
// mimic an "inplace" permutation of the lower 128bits using a blend
|
||||
tmp = _mm256_blend_ps(tmp,_mm256_castps128_ps256(_mm_permute_ps( _mm256_castps256_ps128(tmp), _MM_SHUFFLE(1,0,1,0))), 15);
|
||||
// then we can perform a consistent permutation on the global register to get everything in shape:
|
||||
return _mm256_permute_ps(tmp, _MM_SHUFFLE(3,3,2,2));
|
||||
}
|
||||
// Loads 2 doubles from memory a returns the packet {a0, a0 a1, a1}
|
||||
template<> EIGEN_STRONG_INLINE Packet4d ploaddup<Packet4d>(const double* from)
|
||||
{
|
||||
Packet4d tmp = _mm256_broadcast_pd((const __m128d*)(const void*)from);
|
||||
return _mm256_permute_pd(tmp, 3<<2);
|
||||
}
|
||||
|
||||
// Loads 2 floats from memory a returns the packet {a0, a0 a0, a0, a1, a1, a1, a1}
|
||||
template<> EIGEN_STRONG_INLINE Packet8f ploadquad<Packet8f>(const float* from)
|
||||
{
|
||||
Packet8f tmp = _mm256_castps128_ps256(_mm_broadcast_ss(from));
|
||||
return _mm256_insertf128_ps(tmp, _mm_broadcast_ss(from+1), 1);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet8f& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_store_ps(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet4d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_store_pd(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet8i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_storeu_si256(reinterpret_cast<__m256i*>(to), from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet8f& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_ps(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet4d& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_pd(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet8i& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_si256(reinterpret_cast<__m256i*>(to), from); }
|
||||
|
||||
// NOTE: leverage _mm256_i32gather_ps and _mm256_i32gather_pd if AVX2 instructions are available
|
||||
// NOTE: for the record the following seems to be slower: return _mm256_i32gather_ps(from, _mm256_set1_epi32(stride), 4);
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet8f pgather<float, Packet8f>(const float* from, DenseIndex stride)
|
||||
{
|
||||
return _mm256_set_ps(from[7*stride], from[6*stride], from[5*stride], from[4*stride],
|
||||
from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet4d pgather<double, Packet4d>(const double* from, DenseIndex stride)
|
||||
{
|
||||
return _mm256_set_pd(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet8f>(float* to, const Packet8f& from, DenseIndex stride)
|
||||
{
|
||||
__m128 low = _mm256_extractf128_ps(from, 0);
|
||||
to[stride*0] = _mm_cvtss_f32(low);
|
||||
to[stride*1] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 1));
|
||||
to[stride*2] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 2));
|
||||
to[stride*3] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 3));
|
||||
|
||||
__m128 high = _mm256_extractf128_ps(from, 1);
|
||||
to[stride*4] = _mm_cvtss_f32(high);
|
||||
to[stride*5] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 1));
|
||||
to[stride*6] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 2));
|
||||
to[stride*7] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 3));
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet4d>(double* to, const Packet4d& from, DenseIndex stride)
|
||||
{
|
||||
__m128d low = _mm256_extractf128_pd(from, 0);
|
||||
to[stride*0] = _mm_cvtsd_f64(low);
|
||||
to[stride*1] = _mm_cvtsd_f64(_mm_shuffle_pd(low, low, 1));
|
||||
__m128d high = _mm256_extractf128_pd(from, 1);
|
||||
to[stride*2] = _mm_cvtsd_f64(high);
|
||||
to[stride*3] = _mm_cvtsd_f64(_mm_shuffle_pd(high, high, 1));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstore1<Packet8f>(float* to, const float& a)
|
||||
{
|
||||
Packet8f pa = pset1<Packet8f>(a);
|
||||
pstore(to, pa);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE void pstore1<Packet4d>(double* to, const double& a)
|
||||
{
|
||||
Packet4d pa = pset1<Packet4d>(a);
|
||||
pstore(to, pa);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE void pstore1<Packet8i>(int* to, const int& a)
|
||||
{
|
||||
Packet8i pa = pset1<Packet8i>(a);
|
||||
pstore(to, pa);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE float pfirst<Packet8f>(const Packet8f& a) {
|
||||
return _mm_cvtss_f32(_mm256_castps256_ps128(a));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE double pfirst<Packet4d>(const Packet4d& a) {
|
||||
return _mm_cvtsd_f64(_mm256_castpd256_pd128(a));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE int pfirst<Packet8i>(const Packet8i& a) {
|
||||
return _mm_cvtsi128_si32(_mm256_castsi256_si128(a));
|
||||
}
|
||||
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f preverse(const Packet8f& a)
|
||||
{
|
||||
__m256 tmp = _mm256_shuffle_ps(a,a,0x1b);
|
||||
return _mm256_permute2f128_ps(tmp, tmp, 1);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4d preverse(const Packet4d& a)
|
||||
{
|
||||
__m256d tmp = _mm256_shuffle_pd(a,a,5);
|
||||
return _mm256_permute2f128_pd(tmp, tmp, 1);
|
||||
|
||||
__m256d swap_halves = _mm256_permute2f128_pd(a,a,1);
|
||||
return _mm256_permute_pd(swap_halves,5);
|
||||
}
|
||||
|
||||
// pabs should be ok
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pabs(const Packet8f& a)
|
||||
{
|
||||
const Packet8f mask = _mm256_castsi256_ps(_mm256_setr_epi32(0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF));
|
||||
return _mm256_and_ps(a,mask);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pabs(const Packet4d& a)
|
||||
{
|
||||
const Packet4d mask = _mm256_castsi256_pd(_mm256_setr_epi32(0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF));
|
||||
return _mm256_and_pd(a,mask);
|
||||
}
|
||||
|
||||
// preduxp should be ok
|
||||
// FIXME: why is this ok? why isn't the simply implementation working as expected?
|
||||
template<> EIGEN_STRONG_INLINE Packet8f preduxp<Packet8f>(const Packet8f* vecs)
|
||||
{
|
||||
__m256 hsum1 = _mm256_hadd_ps(vecs[0], vecs[1]);
|
||||
__m256 hsum2 = _mm256_hadd_ps(vecs[2], vecs[3]);
|
||||
__m256 hsum3 = _mm256_hadd_ps(vecs[4], vecs[5]);
|
||||
__m256 hsum4 = _mm256_hadd_ps(vecs[6], vecs[7]);
|
||||
|
||||
__m256 hsum5 = _mm256_hadd_ps(hsum1, hsum1);
|
||||
__m256 hsum6 = _mm256_hadd_ps(hsum2, hsum2);
|
||||
__m256 hsum7 = _mm256_hadd_ps(hsum3, hsum3);
|
||||
__m256 hsum8 = _mm256_hadd_ps(hsum4, hsum4);
|
||||
|
||||
__m256 perm1 = _mm256_permute2f128_ps(hsum5, hsum5, 0x23);
|
||||
__m256 perm2 = _mm256_permute2f128_ps(hsum6, hsum6, 0x23);
|
||||
__m256 perm3 = _mm256_permute2f128_ps(hsum7, hsum7, 0x23);
|
||||
__m256 perm4 = _mm256_permute2f128_ps(hsum8, hsum8, 0x23);
|
||||
|
||||
__m256 sum1 = _mm256_add_ps(perm1, hsum5);
|
||||
__m256 sum2 = _mm256_add_ps(perm2, hsum6);
|
||||
__m256 sum3 = _mm256_add_ps(perm3, hsum7);
|
||||
__m256 sum4 = _mm256_add_ps(perm4, hsum8);
|
||||
|
||||
__m256 blend1 = _mm256_blend_ps(sum1, sum2, 0xcc);
|
||||
__m256 blend2 = _mm256_blend_ps(sum3, sum4, 0xcc);
|
||||
|
||||
__m256 final = _mm256_blend_ps(blend1, blend2, 0xf0);
|
||||
return final;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4d preduxp<Packet4d>(const Packet4d* vecs)
|
||||
{
|
||||
Packet4d tmp0, tmp1;
|
||||
|
||||
tmp0 = _mm256_hadd_pd(vecs[0], vecs[1]);
|
||||
tmp0 = _mm256_add_pd(tmp0, _mm256_permute2f128_pd(tmp0, tmp0, 1));
|
||||
|
||||
tmp1 = _mm256_hadd_pd(vecs[2], vecs[3]);
|
||||
tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1));
|
||||
|
||||
return _mm256_blend_pd(tmp0, tmp1, 0xC);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE float predux<Packet8f>(const Packet8f& a)
|
||||
{
|
||||
Packet8f tmp0 = _mm256_hadd_ps(a,_mm256_permute2f128_ps(a,a,1));
|
||||
tmp0 = _mm256_hadd_ps(tmp0,tmp0);
|
||||
return pfirst(_mm256_hadd_ps(tmp0, tmp0));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE double predux<Packet4d>(const Packet4d& a)
|
||||
{
|
||||
Packet4d tmp0 = _mm256_hadd_pd(a,_mm256_permute2f128_pd(a,a,1));
|
||||
return pfirst(_mm256_hadd_pd(tmp0,tmp0));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f predux4<Packet8f>(const Packet8f& a)
|
||||
{
|
||||
return _mm_add_ps(_mm256_castps256_ps128(a),_mm256_extractf128_ps(a,1));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE float predux_mul<Packet8f>(const Packet8f& a)
|
||||
{
|
||||
Packet8f tmp;
|
||||
tmp = _mm256_mul_ps(a, _mm256_permute2f128_ps(a,a,1));
|
||||
tmp = _mm256_mul_ps(tmp, _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(1,0,3,2)));
|
||||
return pfirst(_mm256_mul_ps(tmp, _mm256_shuffle_ps(tmp,tmp,1)));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE double predux_mul<Packet4d>(const Packet4d& a)
|
||||
{
|
||||
Packet4d tmp;
|
||||
tmp = _mm256_mul_pd(a, _mm256_permute2f128_pd(a,a,1));
|
||||
return pfirst(_mm256_mul_pd(tmp, _mm256_shuffle_pd(tmp,tmp,1)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE float predux_min<Packet8f>(const Packet8f& a)
|
||||
{
|
||||
Packet8f tmp = _mm256_min_ps(a, _mm256_permute2f128_ps(a,a,1));
|
||||
tmp = _mm256_min_ps(tmp, _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(1,0,3,2)));
|
||||
return pfirst(_mm256_min_ps(tmp, _mm256_shuffle_ps(tmp,tmp,1)));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE double predux_min<Packet4d>(const Packet4d& a)
|
||||
{
|
||||
Packet4d tmp = _mm256_min_pd(a, _mm256_permute2f128_pd(a,a,1));
|
||||
return pfirst(_mm256_min_pd(tmp, _mm256_shuffle_pd(tmp, tmp, 1)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE float predux_max<Packet8f>(const Packet8f& a)
|
||||
{
|
||||
Packet8f tmp = _mm256_max_ps(a, _mm256_permute2f128_ps(a,a,1));
|
||||
tmp = _mm256_max_ps(tmp, _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(1,0,3,2)));
|
||||
return pfirst(_mm256_max_ps(tmp, _mm256_shuffle_ps(tmp,tmp,1)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE double predux_max<Packet4d>(const Packet4d& a)
|
||||
{
|
||||
Packet4d tmp = _mm256_max_pd(a, _mm256_permute2f128_pd(a,a,1));
|
||||
return pfirst(_mm256_max_pd(tmp, _mm256_shuffle_pd(tmp, tmp, 1)));
|
||||
}
|
||||
|
||||
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet8f>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Packet8f& first, const Packet8f& second)
|
||||
{
|
||||
if (Offset==1)
|
||||
{
|
||||
first = _mm256_blend_ps(first, second, 1);
|
||||
Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(0,3,2,1));
|
||||
first = _mm256_blend_ps(tmp, _mm256_permute2f128_ps (tmp, tmp, 1), 0x88);
|
||||
}
|
||||
else if (Offset==2)
|
||||
{
|
||||
first = _mm256_blend_ps(first, second, 3);
|
||||
Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(1,0,3,2));
|
||||
first = _mm256_blend_ps(tmp, _mm256_permute2f128_ps (tmp, tmp, 1), 0xcc);
|
||||
}
|
||||
else if (Offset==3)
|
||||
{
|
||||
first = _mm256_blend_ps(first, second, 7);
|
||||
Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(2,1,0,3));
|
||||
first = _mm256_blend_ps(tmp, _mm256_permute2f128_ps (tmp, tmp, 1), 0xee);
|
||||
}
|
||||
else if (Offset==4)
|
||||
{
|
||||
first = _mm256_blend_ps(first, second, 15);
|
||||
Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(3,2,1,0));
|
||||
first = _mm256_permute_ps(_mm256_permute2f128_ps (tmp, tmp, 1), _MM_SHUFFLE(3,2,1,0));
|
||||
}
|
||||
else if (Offset==5)
|
||||
{
|
||||
first = _mm256_blend_ps(first, second, 31);
|
||||
first = _mm256_permute2f128_ps(first, first, 1);
|
||||
Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(0,3,2,1));
|
||||
first = _mm256_permute2f128_ps(tmp, tmp, 1);
|
||||
first = _mm256_blend_ps(tmp, first, 0x88);
|
||||
}
|
||||
else if (Offset==6)
|
||||
{
|
||||
first = _mm256_blend_ps(first, second, 63);
|
||||
first = _mm256_permute2f128_ps(first, first, 1);
|
||||
Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(1,0,3,2));
|
||||
first = _mm256_permute2f128_ps(tmp, tmp, 1);
|
||||
first = _mm256_blend_ps(tmp, first, 0xcc);
|
||||
}
|
||||
else if (Offset==7)
|
||||
{
|
||||
first = _mm256_blend_ps(first, second, 127);
|
||||
first = _mm256_permute2f128_ps(first, first, 1);
|
||||
Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(2,1,0,3));
|
||||
first = _mm256_permute2f128_ps(tmp, tmp, 1);
|
||||
first = _mm256_blend_ps(tmp, first, 0xee);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet4d>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Packet4d& first, const Packet4d& second)
|
||||
{
|
||||
if (Offset==1)
|
||||
{
|
||||
first = _mm256_blend_pd(first, second, 1);
|
||||
__m256d tmp = _mm256_permute_pd(first, 5);
|
||||
first = _mm256_permute2f128_pd(tmp, tmp, 1);
|
||||
first = _mm256_blend_pd(tmp, first, 0xA);
|
||||
}
|
||||
else if (Offset==2)
|
||||
{
|
||||
first = _mm256_blend_pd(first, second, 3);
|
||||
first = _mm256_permute2f128_pd(first, first, 1);
|
||||
}
|
||||
else if (Offset==3)
|
||||
{
|
||||
first = _mm256_blend_pd(first, second, 7);
|
||||
__m256d tmp = _mm256_permute_pd(first, 5);
|
||||
first = _mm256_permute2f128_pd(tmp, tmp, 1);
|
||||
first = _mm256_blend_pd(tmp, first, 5);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet8f,8>& kernel) {
|
||||
__m256 T0 = _mm256_unpacklo_ps(kernel.packet[0], kernel.packet[1]);
|
||||
__m256 T1 = _mm256_unpackhi_ps(kernel.packet[0], kernel.packet[1]);
|
||||
__m256 T2 = _mm256_unpacklo_ps(kernel.packet[2], kernel.packet[3]);
|
||||
__m256 T3 = _mm256_unpackhi_ps(kernel.packet[2], kernel.packet[3]);
|
||||
__m256 T4 = _mm256_unpacklo_ps(kernel.packet[4], kernel.packet[5]);
|
||||
__m256 T5 = _mm256_unpackhi_ps(kernel.packet[4], kernel.packet[5]);
|
||||
__m256 T6 = _mm256_unpacklo_ps(kernel.packet[6], kernel.packet[7]);
|
||||
__m256 T7 = _mm256_unpackhi_ps(kernel.packet[6], kernel.packet[7]);
|
||||
__m256 S0 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(1,0,1,0));
|
||||
__m256 S1 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(3,2,3,2));
|
||||
__m256 S2 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(1,0,1,0));
|
||||
__m256 S3 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(3,2,3,2));
|
||||
__m256 S4 = _mm256_shuffle_ps(T4,T6,_MM_SHUFFLE(1,0,1,0));
|
||||
__m256 S5 = _mm256_shuffle_ps(T4,T6,_MM_SHUFFLE(3,2,3,2));
|
||||
__m256 S6 = _mm256_shuffle_ps(T5,T7,_MM_SHUFFLE(1,0,1,0));
|
||||
__m256 S7 = _mm256_shuffle_ps(T5,T7,_MM_SHUFFLE(3,2,3,2));
|
||||
kernel.packet[0] = _mm256_permute2f128_ps(S0, S4, 0x20);
|
||||
kernel.packet[1] = _mm256_permute2f128_ps(S1, S5, 0x20);
|
||||
kernel.packet[2] = _mm256_permute2f128_ps(S2, S6, 0x20);
|
||||
kernel.packet[3] = _mm256_permute2f128_ps(S3, S7, 0x20);
|
||||
kernel.packet[4] = _mm256_permute2f128_ps(S0, S4, 0x31);
|
||||
kernel.packet[5] = _mm256_permute2f128_ps(S1, S5, 0x31);
|
||||
kernel.packet[6] = _mm256_permute2f128_ps(S2, S6, 0x31);
|
||||
kernel.packet[7] = _mm256_permute2f128_ps(S3, S7, 0x31);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet8f,4>& kernel) {
|
||||
__m256 T0 = _mm256_unpacklo_ps(kernel.packet[0], kernel.packet[1]);
|
||||
__m256 T1 = _mm256_unpackhi_ps(kernel.packet[0], kernel.packet[1]);
|
||||
__m256 T2 = _mm256_unpacklo_ps(kernel.packet[2], kernel.packet[3]);
|
||||
__m256 T3 = _mm256_unpackhi_ps(kernel.packet[2], kernel.packet[3]);
|
||||
|
||||
__m256 S0 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(1,0,1,0));
|
||||
__m256 S1 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(3,2,3,2));
|
||||
__m256 S2 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(1,0,1,0));
|
||||
__m256 S3 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(3,2,3,2));
|
||||
|
||||
kernel.packet[0] = _mm256_permute2f128_ps(S0, S1, 0x20);
|
||||
kernel.packet[1] = _mm256_permute2f128_ps(S2, S3, 0x20);
|
||||
kernel.packet[2] = _mm256_permute2f128_ps(S0, S1, 0x31);
|
||||
kernel.packet[3] = _mm256_permute2f128_ps(S2, S3, 0x31);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet4d,4>& kernel) {
|
||||
__m256d T0 = _mm256_shuffle_pd(kernel.packet[0], kernel.packet[1], 15);
|
||||
__m256d T1 = _mm256_shuffle_pd(kernel.packet[0], kernel.packet[1], 0);
|
||||
__m256d T2 = _mm256_shuffle_pd(kernel.packet[2], kernel.packet[3], 15);
|
||||
__m256d T3 = _mm256_shuffle_pd(kernel.packet[2], kernel.packet[3], 0);
|
||||
|
||||
kernel.packet[1] = _mm256_permute2f128_pd(T0, T2, 32);
|
||||
kernel.packet[3] = _mm256_permute2f128_pd(T0, T2, 49);
|
||||
kernel.packet[0] = _mm256_permute2f128_pd(T1, T3, 32);
|
||||
kernel.packet[2] = _mm256_permute2f128_pd(T1, T3, 49);
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_PACKET_MATH_AVX_H
|
||||
@@ -16,11 +16,14 @@ namespace internal {
|
||||
|
||||
static Packet4ui p4ui_CONJ_XOR = vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_ZERO_);//{ 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
|
||||
static Packet16uc p16uc_COMPLEX_RE = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };
|
||||
static Packet16uc p16uc_COMPLEX_IM = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 1), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 };
|
||||
static Packet16uc p16uc_COMPLEX_IM = vec_sld(p16uc_DUPLICATE, (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 };
|
||||
static Packet16uc p16uc_COMPLEX_REV = vec_sld(p16uc_REVERSE, p16uc_REVERSE, 8);//{ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11 };
|
||||
static Packet16uc p16uc_COMPLEX_REV2 = vec_sld(p16uc_FORWARD, p16uc_FORWARD, 8);//{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
|
||||
static Packet16uc p16uc_PSET_HI = (Packet16uc) vec_mergeh((Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 1));//{ 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 };
|
||||
static Packet16uc p16uc_PSET_LO = (Packet16uc) vec_mergeh((Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 2), (Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 3));//{ 8,9,10,11, 12,13,14,15, 8,9,10,11, 12,13,14,15 };
|
||||
static Packet16uc p16uc_PSET_HI = (Packet16uc) vec_mergeh((Packet4ui)p16uc_COMPLEX_RE, (Packet4ui)p16uc_COMPLEX_IM);//{ 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 };
|
||||
static Packet16uc p16uc_PSET_LO = (Packet16uc) vec_mergel((Packet4ui)p16uc_COMPLEX_RE, (Packet4ui)p16uc_COMPLEX_IM);//{ 8,9,10,11, 12,13,14,15, 8,9,10,11, 12,13,14,15 };
|
||||
static Packet16uc p16uc_COMPLEX_MASK16 = vec_sld((Packet16uc)p4i_ZERO, vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 3), 8);//{ 0,0,0,0, 0,0,0,0, 16,16,16,16, 16,16,16,16};
|
||||
static Packet16uc p16uc_COMPLEX_TRANSPOSE_0 = vec_add(p16uc_PSET_HI, p16uc_COMPLEX_MASK16);//{ 0,1,2,3, 4,5,6,7, 16,17,18,19, 20,21,22,23};
|
||||
static Packet16uc p16uc_COMPLEX_TRANSPOSE_1 = vec_add(p16uc_PSET_LO, p16uc_COMPLEX_MASK16);//{ 8,9,10,11, 12,13,14,15, 24,25,26,27, 28,29,30,31};
|
||||
|
||||
//---------- float ----------
|
||||
struct Packet2cf
|
||||
@@ -33,6 +36,7 @@ struct Packet2cf
|
||||
template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
||||
{
|
||||
typedef Packet2cf type;
|
||||
typedef Packet2cf half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
@@ -51,7 +55,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; };
|
||||
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; typedef Packet2cf half; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
|
||||
{
|
||||
@@ -65,6 +69,22 @@ template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<flo
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, DenseIndex stride)
|
||||
{
|
||||
std::complex<float> EIGEN_ALIGN16 af[2];
|
||||
af[0] = from[0*stride];
|
||||
af[1] = from[1*stride];
|
||||
return Packet2cf(vec_ld(0, (const float*)af));
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, DenseIndex stride)
|
||||
{
|
||||
std::complex<float> EIGEN_ALIGN16 af[2];
|
||||
vec_st(from.v, 0, (float*)af);
|
||||
to[0*stride] = af[0];
|
||||
to[1*stride] = af[1];
|
||||
}
|
||||
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_add(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_sub(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(a.v)); }
|
||||
@@ -210,6 +230,13 @@ template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& x
|
||||
return Packet2cf(vec_perm(x.v, x.v, p16uc_COMPLEX_REV));
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel)
|
||||
{
|
||||
Packet4f tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_COMPLEX_TRANSPOSE_0);
|
||||
kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_COMPLEX_TRANSPOSE_1);
|
||||
kernel.packet[0].v = tmp;
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
132
Eigen/src/Core/arch/AltiVec/PacketMath.h
Normal file → Executable file
132
Eigen/src/Core/arch/AltiVec/PacketMath.h
Normal file → Executable file
@@ -1,7 +1,7 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008 Konstantinos Margaritis <markos@codex.gr>
|
||||
// Copyright (C) 2008-2014 Konstantinos Margaritis <markos@freevec.org>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
@@ -18,6 +18,10 @@ namespace internal {
|
||||
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 4
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_HAS_FUSED_MADD
|
||||
#define EIGEN_HAS_FUSED_MADD 1
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_HAS_FUSE_CJMADD
|
||||
#define EIGEN_HAS_FUSE_CJMADD 1
|
||||
#endif
|
||||
@@ -56,29 +60,32 @@ typedef __vector unsigned char Packet16uc;
|
||||
#define DST_CTRL(size, count, stride) (((size) << 24) | ((count) << 16) | (stride))
|
||||
|
||||
// Define global static constants:
|
||||
static Packet4f p4f_COUNTDOWN = { 3.0, 2.0, 1.0, 0.0 };
|
||||
static Packet4i p4i_COUNTDOWN = { 3, 2, 1, 0 };
|
||||
static Packet16uc p16uc_REVERSE = {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3};
|
||||
static Packet16uc p16uc_FORWARD = vec_lvsl(0, (float*)0);
|
||||
static Packet16uc p16uc_DUPLICATE = {0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7};
|
||||
static Packet4f p4f_COUNTDOWN = { 0.0, 1.0, 2.0, 3.0 };
|
||||
static Packet4i p4i_COUNTDOWN = { 0, 1, 2, 3 };
|
||||
static Packet16uc p16uc_REVERSE = { 12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3};
|
||||
static Packet16uc p16uc_FORWARD = vec_lvsl(0, (float*)0); //{ 0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15}
|
||||
static Packet16uc p16uc_DUPLICATE = { 0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7};
|
||||
|
||||
static _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0);
|
||||
static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0);
|
||||
static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE,1);
|
||||
static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS16,-16);
|
||||
static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS1,-1);
|
||||
static Packet4f p4f_ONE = vec_ctf(p4i_ONE, 0);
|
||||
static Packet4f p4f_ZERO_ = (Packet4f) vec_sl((Packet4ui)p4i_MINUS1, (Packet4ui)p4i_MINUS1);
|
||||
static _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0); //{ 0.0, 0.0, 0.0, 0.0}
|
||||
static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0); //{ 0, 0, 0, 0,}
|
||||
static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE,1); //{ 1, 1, 1, 1}
|
||||
static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS16,-16); //{ -16, -16, -16, -16}
|
||||
static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS1,-1); //{ -1, -1, -1, -1}
|
||||
static Packet4f p4f_ONE = vec_ctf(p4i_ONE, 0); //{ 1.0, 1.0, 1.0, 1.0}
|
||||
static Packet4f p4f_ZERO_ = (Packet4f) vec_sl((Packet4ui)p4i_MINUS1, (Packet4ui)p4i_MINUS1); //{ 0x80000000, 0x80000000, 0x80000000, 0x80000000}
|
||||
|
||||
template<> struct packet_traits<float> : default_packet_traits
|
||||
{
|
||||
typedef Packet4f type;
|
||||
typedef Packet4f half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size=4,
|
||||
HasHalfPacket=0,
|
||||
|
||||
// FIXME check the Has*
|
||||
HasDiv = 1,
|
||||
HasSin = 0,
|
||||
HasCos = 0,
|
||||
HasLog = 0,
|
||||
@@ -89,6 +96,7 @@ template<> struct packet_traits<float> : default_packet_traits
|
||||
template<> struct packet_traits<int> : default_packet_traits
|
||||
{
|
||||
typedef Packet4i type;
|
||||
typedef Packet4i half;
|
||||
enum {
|
||||
// FIXME check the Has*
|
||||
Vectorizable = 1,
|
||||
@@ -97,8 +105,8 @@ template<> struct packet_traits<int> : default_packet_traits
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}; };
|
||||
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; };
|
||||
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}; typedef Packet4f half; };
|
||||
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; typedef Packet4i half; };
|
||||
/*
|
||||
inline std::ostream & operator <<(std::ostream & s, const Packet4f & v)
|
||||
{
|
||||
@@ -144,6 +152,7 @@ inline std::ostream & operator <<(std::ostream & s, const Packetbi & v)
|
||||
return s;
|
||||
}
|
||||
*/
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) {
|
||||
// Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
|
||||
float EIGEN_ALIGN16 af[4];
|
||||
@@ -161,6 +170,65 @@ template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) {
|
||||
return vc;
|
||||
}
|
||||
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void
|
||||
pbroadcast4<Packet4f>(const float *a,
|
||||
Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
|
||||
{
|
||||
a3 = vec_ld(0,a);
|
||||
a0 = vec_splat(a3, 0);
|
||||
a1 = vec_splat(a3, 1);
|
||||
a2 = vec_splat(a3, 2);
|
||||
a3 = vec_splat(a3, 3);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE void
|
||||
pbroadcast4<Packet4i>(const int *a,
|
||||
Packet4i& a0, Packet4i& a1, Packet4i& a2, Packet4i& a3)
|
||||
{
|
||||
a3 = vec_ld(0,a);
|
||||
a0 = vec_splat(a3, 0);
|
||||
a1 = vec_splat(a3, 1);
|
||||
a2 = vec_splat(a3, 2);
|
||||
a3 = vec_splat(a3, 3);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, DenseIndex stride)
|
||||
{
|
||||
float EIGEN_ALIGN16 af[4];
|
||||
af[0] = from[0*stride];
|
||||
af[1] = from[1*stride];
|
||||
af[2] = from[2*stride];
|
||||
af[3] = from[3*stride];
|
||||
return vec_ld(0, af);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, DenseIndex stride)
|
||||
{
|
||||
int EIGEN_ALIGN16 ai[4];
|
||||
ai[0] = from[0*stride];
|
||||
ai[1] = from[1*stride];
|
||||
ai[2] = from[2*stride];
|
||||
ai[3] = from[3*stride];
|
||||
return vec_ld(0, ai);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, DenseIndex stride)
|
||||
{
|
||||
float EIGEN_ALIGN16 af[4];
|
||||
vec_st(from, 0, af);
|
||||
to[0*stride] = af[0];
|
||||
to[1*stride] = af[1];
|
||||
to[2*stride] = af[2];
|
||||
to[3*stride] = af[3];
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, DenseIndex stride)
|
||||
{
|
||||
int EIGEN_ALIGN16 ai[4];
|
||||
vec_st(from, 0, ai);
|
||||
to[0*stride] = ai[0];
|
||||
to[1*stride] = ai[1];
|
||||
to[2*stride] = ai[2];
|
||||
to[3*stride] = ai[3];
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f plset<float>(const float& a) { return vec_add(pset1<Packet4f>(a), p4f_COUNTDOWN); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i plset<int>(const int& a) { return vec_add(pset1<Packet4i>(a), p4i_COUNTDOWN); }
|
||||
|
||||
@@ -286,15 +354,15 @@ template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
|
||||
{
|
||||
Packet4f p;
|
||||
if((ptrdiff_t(&from) % 16) == 0) p = pload<Packet4f>(from);
|
||||
else p = ploadu<Packet4f>(from);
|
||||
if((ptrdiff_t(from) % 16) == 0) p = pload<Packet4f>(from);
|
||||
else p = ploadu<Packet4f>(from);
|
||||
return vec_perm(p, p, p16uc_DUPLICATE);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
|
||||
{
|
||||
Packet4i p;
|
||||
if((ptrdiff_t(&from) % 16) == 0) p = pload<Packet4i>(from);
|
||||
else p = ploadu<Packet4i>(from);
|
||||
if((ptrdiff_t(from) % 16) == 0) p = pload<Packet4i>(from);
|
||||
else p = ploadu<Packet4i>(from);
|
||||
return vec_perm(p, p, p16uc_DUPLICATE);
|
||||
}
|
||||
|
||||
@@ -494,6 +562,32 @@ struct palign_impl<Offset,Packet4i>
|
||||
}
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet4f,4>& kernel) {
|
||||
Packet4f t0, t1, t2, t3;
|
||||
t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);
|
||||
t1 = vec_mergel(kernel.packet[0], kernel.packet[2]);
|
||||
t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]);
|
||||
t3 = vec_mergel(kernel.packet[1], kernel.packet[3]);
|
||||
kernel.packet[0] = vec_mergeh(t0, t2);
|
||||
kernel.packet[1] = vec_mergel(t0, t2);
|
||||
kernel.packet[2] = vec_mergeh(t1, t3);
|
||||
kernel.packet[3] = vec_mergel(t1, t3);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet4i,4>& kernel) {
|
||||
Packet4i t0, t1, t2, t3;
|
||||
t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);
|
||||
t1 = vec_mergel(kernel.packet[0], kernel.packet[2]);
|
||||
t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]);
|
||||
t3 = vec_mergel(kernel.packet[1], kernel.packet[3]);
|
||||
kernel.packet[0] = vec_mergeh(t0, t2);
|
||||
kernel.packet[1] = vec_mergel(t0, t2);
|
||||
kernel.packet[2] = vec_mergeh(t1, t3);
|
||||
kernel.packet[3] = vec_mergel(t1, t3);
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
ADD_SUBDIRECTORY(SSE)
|
||||
ADD_SUBDIRECTORY(AltiVec)
|
||||
ADD_SUBDIRECTORY(NEON)
|
||||
ADD_SUBDIRECTORY(AVX)
|
||||
ADD_SUBDIRECTORY(Default)
|
||||
|
||||
@@ -28,6 +28,7 @@ struct Packet2cf
|
||||
template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
||||
{
|
||||
typedef Packet2cf type;
|
||||
typedef Packet2cf half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
@@ -46,7 +47,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; };
|
||||
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; typedef Packet2cf half; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
|
||||
{
|
||||
@@ -110,7 +111,23 @@ template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<
|
||||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { __pld((float *)addr); }
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, DenseIndex stride)
|
||||
{
|
||||
Packet4f res;
|
||||
res = vsetq_lane_f32(std::real(from[0*stride]), res, 0);
|
||||
res = vsetq_lane_f32(std::imag(from[0*stride]), res, 1);
|
||||
res = vsetq_lane_f32(std::real(from[1*stride]), res, 2);
|
||||
res = vsetq_lane_f32(std::imag(from[1*stride]), res, 3);
|
||||
return Packet2cf(res);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, DenseIndex stride)
|
||||
{
|
||||
to[stride*0] = std::complex<float>(vgetq_lane_f32(from.v, 0), vgetq_lane_f32(from.v, 1));
|
||||
to[stride*1] = std::complex<float>(vgetq_lane_f32(from.v, 2), vgetq_lane_f32(from.v, 3));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_ARM_PREFETCH((float *)addr); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
|
||||
{
|
||||
@@ -246,6 +263,14 @@ template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, con
|
||||
return Packet2cf(pdiv(res.v, vaddq_f32(s,rev_s)));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet2cf,2>& kernel) {
|
||||
float32x4_t tmp = vcombine_f32(vget_high_f32(kernel.packet[0].v), vget_high_f32(kernel.packet[1].v));
|
||||
kernel.packet[0].v = vcombine_f32(vget_low_f32(kernel.packet[0].v), vget_low_f32(kernel.packet[1].v));
|
||||
kernel.packet[1].v = tmp;
|
||||
}
|
||||
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
@@ -48,14 +48,25 @@ typedef uint32x4_t Packet4ui;
|
||||
#define EIGEN_INIT_NEON_PACKET2(X, Y) {X, Y}
|
||||
#define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {X, Y, Z, W}
|
||||
#endif
|
||||
|
||||
#ifndef __pld
|
||||
#define __pld(x) asm volatile ( " pld [%[addr]]\n" :: [addr] "r" (x) : "cc" );
|
||||
|
||||
|
||||
// arm64 does have the pld instruction. If available, let's trust the __builtin_prefetch built-in function
|
||||
// which available on LLVM and GCC (at least)
|
||||
#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || defined(__GNUC__)
|
||||
#define EIGEN_ARM_PREFETCH(ADDR) __builtin_prefetch(ADDR);
|
||||
#elif defined __pld
|
||||
#define EIGEN_ARM_PREFETCH(ADDR) __pld(ADDR)
|
||||
#elif !defined(__aarch64__)
|
||||
#define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__ ( " pld [%[addr]]\n" :: [addr] "r" (ADDR) : "cc" );
|
||||
#else
|
||||
// by default no explicit prefetching
|
||||
#define EIGEN_ARM_PREFETCH(ADDR)
|
||||
#endif
|
||||
|
||||
template<> struct packet_traits<float> : default_packet_traits
|
||||
{
|
||||
typedef Packet4f type;
|
||||
typedef Packet4f half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
@@ -73,6 +84,7 @@ template<> struct packet_traits<float> : default_packet_traits
|
||||
template<> struct packet_traits<int> : default_packet_traits
|
||||
{
|
||||
typedef Packet4i type;
|
||||
typedef Packet4i half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
@@ -85,12 +97,13 @@ template<> struct packet_traits<int> : default_packet_traits
|
||||
// workaround gcc 4.2, 4.3 and 4.4 compilatin issue
|
||||
EIGEN_STRONG_INLINE float32x4_t vld1q_f32(const float* x) { return ::vld1q_f32((const float32_t*)x); }
|
||||
EIGEN_STRONG_INLINE float32x2_t vld1_f32 (const float* x) { return ::vld1_f32 ((const float32_t*)x); }
|
||||
EIGEN_STRONG_INLINE float32x2_t vld1_dup_f32 (const float* x) { return ::vld1_dup_f32 ((const float32_t*)x); }
|
||||
EIGEN_STRONG_INLINE void vst1q_f32(float* to, float32x4_t from) { ::vst1q_f32((float32_t*)to,from); }
|
||||
EIGEN_STRONG_INLINE void vst1_f32 (float* to, float32x2_t from) { ::vst1_f32 ((float32_t*)to,from); }
|
||||
#endif
|
||||
|
||||
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}; };
|
||||
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; };
|
||||
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}; typedef Packet4f half; };
|
||||
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; typedef Packet4i half; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return vdupq_n_f32(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return vdupq_n_s32(from); }
|
||||
@@ -209,8 +222,42 @@ template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& f
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f32(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_s32(to, from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { __pld(addr); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { __pld(addr); }
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, DenseIndex stride)
|
||||
{
|
||||
Packet4f res;
|
||||
res = vsetq_lane_f32(from[0*stride], res, 0);
|
||||
res = vsetq_lane_f32(from[1*stride], res, 1);
|
||||
res = vsetq_lane_f32(from[2*stride], res, 2);
|
||||
res = vsetq_lane_f32(from[3*stride], res, 3);
|
||||
return res;
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, DenseIndex stride)
|
||||
{
|
||||
Packet4i res;
|
||||
res = vsetq_lane_s32(from[0*stride], res, 0);
|
||||
res = vsetq_lane_s32(from[1*stride], res, 1);
|
||||
res = vsetq_lane_s32(from[2*stride], res, 2);
|
||||
res = vsetq_lane_s32(from[3*stride], res, 3);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, DenseIndex stride)
|
||||
{
|
||||
to[stride*0] = vgetq_lane_f32(from, 0);
|
||||
to[stride*1] = vgetq_lane_f32(from, 1);
|
||||
to[stride*2] = vgetq_lane_f32(from, 2);
|
||||
to[stride*3] = vgetq_lane_f32(from, 3);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, DenseIndex stride)
|
||||
{
|
||||
to[stride*0] = vgetq_lane_s32(from, 0);
|
||||
to[stride*1] = vgetq_lane_s32(from, 1);
|
||||
to[stride*2] = vgetq_lane_s32(from, 2);
|
||||
to[stride*3] = vgetq_lane_s32(from, 3);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { EIGEN_ARM_PREFETCH(addr); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { EIGEN_ARM_PREFETCH(addr); }
|
||||
|
||||
// FIXME only store the 2 first elements ?
|
||||
template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vst1q_f32(x, a); return x[0]; }
|
||||
@@ -375,6 +422,7 @@ template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
|
||||
a_lo = vget_low_s32(a);
|
||||
a_hi = vget_high_s32(a);
|
||||
max = vpmax_s32(a_lo, a_hi);
|
||||
max = vpmax_s32(max, max);
|
||||
|
||||
return vget_lane_s32(max, 0);
|
||||
}
|
||||
@@ -400,9 +448,30 @@ PALIGN_NEON(0,Packet4i,vextq_s32)
|
||||
PALIGN_NEON(1,Packet4i,vextq_s32)
|
||||
PALIGN_NEON(2,Packet4i,vextq_s32)
|
||||
PALIGN_NEON(3,Packet4i,vextq_s32)
|
||||
|
||||
|
||||
#undef PALIGN_NEON
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet4f,4>& kernel) {
|
||||
float32x4x2_t tmp1 = vzipq_f32(kernel.packet[0], kernel.packet[1]);
|
||||
float32x4x2_t tmp2 = vzipq_f32(kernel.packet[2], kernel.packet[3]);
|
||||
|
||||
kernel.packet[0] = vcombine_f32(vget_low_f32(tmp1.val[0]), vget_low_f32(tmp2.val[0]));
|
||||
kernel.packet[1] = vcombine_f32(vget_high_f32(tmp1.val[0]), vget_high_f32(tmp2.val[0]));
|
||||
kernel.packet[2] = vcombine_f32(vget_low_f32(tmp1.val[1]), vget_low_f32(tmp2.val[1]));
|
||||
kernel.packet[3] = vcombine_f32(vget_high_f32(tmp1.val[1]), vget_high_f32(tmp2.val[1]));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet4i,4>& kernel) {
|
||||
int32x4x2_t tmp1 = vzipq_s32(kernel.packet[0], kernel.packet[1]);
|
||||
int32x4x2_t tmp2 = vzipq_s32(kernel.packet[2], kernel.packet[3]);
|
||||
kernel.packet[0] = vcombine_s32(vget_low_s32(tmp1.val[0]), vget_low_s32(tmp2.val[0]));
|
||||
kernel.packet[1] = vcombine_s32(vget_high_s32(tmp1.val[0]), vget_high_s32(tmp2.val[0]));
|
||||
kernel.packet[2] = vcombine_s32(vget_low_s32(tmp1.val[1]), vget_low_s32(tmp2.val[1]));
|
||||
kernel.packet[3] = vcombine_s32(vget_high_s32(tmp1.val[1]), vget_high_s32(tmp2.val[1]));
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
@@ -22,13 +22,18 @@ struct Packet2cf
|
||||
__m128 v;
|
||||
};
|
||||
|
||||
// Use the packet_traits defined in AVX/PacketMath.h instead if we're going
|
||||
// to leverage AVX instructions.
|
||||
#ifndef EIGEN_VECTORIZE_AVX
|
||||
template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
||||
{
|
||||
typedef Packet2cf type;
|
||||
typedef Packet2cf half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size = 2,
|
||||
HasHalfPacket = 0,
|
||||
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
@@ -42,8 +47,9 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
||||
HasSetLinear = 0
|
||||
};
|
||||
};
|
||||
#endif
|
||||
|
||||
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; };
|
||||
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; typedef Packet2cf half; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_add_ps(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_sub_ps(a.v,b.v)); }
|
||||
@@ -104,8 +110,23 @@ template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<flo
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), Packet4f(from.v)); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), Packet4f(from.v)); }
|
||||
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, DenseIndex stride)
|
||||
{
|
||||
return Packet2cf(_mm_set_ps(std::imag(from[1*stride]), std::real(from[1*stride]),
|
||||
std::imag(from[0*stride]), std::real(from[0*stride])));
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, DenseIndex stride)
|
||||
{
|
||||
to[stride*0] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 0)),
|
||||
_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 1)));
|
||||
to[stride*1] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 2)),
|
||||
_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 3)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
|
||||
@@ -124,7 +145,7 @@ template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Pack
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a) { return Packet2cf(_mm_castpd_ps(preverse(_mm_castps_pd(a.v)))); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a) { return Packet2cf(_mm_castpd_ps(preverse(Packet2d(_mm_castps_pd(a.v))))); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
|
||||
{
|
||||
@@ -214,7 +235,7 @@ template<> struct conj_helper<Packet4f, Packet2cf, false,false>
|
||||
{ return padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet4f& x, const Packet2cf& y) const
|
||||
{ return Packet2cf(Eigen::internal::pmul(x, y.v)); }
|
||||
{ return Packet2cf(Eigen::internal::pmul<Packet4f>(x, y.v)); }
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet2cf, Packet4f, false,false>
|
||||
@@ -223,7 +244,7 @@ template<> struct conj_helper<Packet2cf, Packet4f, false,false>
|
||||
{ return padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& x, const Packet4f& y) const
|
||||
{ return Packet2cf(Eigen::internal::pmul(x.v, y)); }
|
||||
{ return Packet2cf(Eigen::internal::pmul<Packet4f>(x.v, y)); }
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
||||
@@ -248,13 +269,18 @@ struct Packet1cd
|
||||
__m128d v;
|
||||
};
|
||||
|
||||
// Use the packet_traits defined in AVX/PacketMath.h instead if we're going
|
||||
// to leverage AVX instructions.
|
||||
#ifndef EIGEN_VECTORIZE_AVX
|
||||
template<> struct packet_traits<std::complex<double> > : default_packet_traits
|
||||
{
|
||||
typedef Packet1cd type;
|
||||
typedef Packet1cd half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 0,
|
||||
size = 1,
|
||||
HasHalfPacket = 0,
|
||||
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
@@ -268,12 +294,13 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
|
||||
HasSetLinear = 0
|
||||
};
|
||||
};
|
||||
#endif
|
||||
|
||||
template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1}; };
|
||||
template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1}; typedef Packet1cd half; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_add_pd(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_sub_pd(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(a.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a)
|
||||
{
|
||||
const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
|
||||
@@ -311,8 +338,8 @@ template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<dou
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }
|
||||
|
||||
// FIXME force unaligned store, this is a temporary fix
|
||||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, Packet2d(from.v)); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, Packet2d(from.v)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
|
||||
@@ -410,7 +437,7 @@ template<> struct conj_helper<Packet2d, Packet1cd, false,false>
|
||||
{ return padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet2d& x, const Packet1cd& y) const
|
||||
{ return Packet1cd(Eigen::internal::pmul(x, y.v)); }
|
||||
{ return Packet1cd(Eigen::internal::pmul<Packet2d>(x, y.v)); }
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet1cd, Packet2d, false,false>
|
||||
@@ -419,7 +446,7 @@ template<> struct conj_helper<Packet1cd, Packet2d, false,false>
|
||||
{ return padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& x, const Packet2d& y) const
|
||||
{ return Packet1cd(Eigen::internal::pmul(x.v, y)); }
|
||||
{ return Packet1cd(Eigen::internal::pmul<Packet2d>(x.v, y)); }
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
||||
@@ -432,7 +459,17 @@ template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, con
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
|
||||
{
|
||||
return Packet1cd(preverse(x.v));
|
||||
return Packet1cd(preverse(Packet2d(x.v)));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet2cf,2>& kernel) {
|
||||
__m128d w1 = _mm_castps_pd(kernel.packet[0].v);
|
||||
__m128d w2 = _mm_castps_pd(kernel.packet[1].v);
|
||||
|
||||
__m128 tmp = _mm_castpd_ps(_mm_unpackhi_pd(w1, w2));
|
||||
kernel.packet[0].v = _mm_castpd_ps(_mm_unpacklo_pd(w1, w2));
|
||||
kernel.packet[1].v = tmp;
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
@@ -63,7 +63,7 @@ Packet4f plog<Packet4f>(const Packet4f& _x)
|
||||
x = _mm_or_ps(x, p4f_half);
|
||||
|
||||
emm0 = _mm_sub_epi32(emm0, p4i_0x7f);
|
||||
Packet4f e = padd(_mm_cvtepi32_ps(emm0), p4f_1);
|
||||
Packet4f e = padd(Packet4f(_mm_cvtepi32_ps(emm0)), p4f_1);
|
||||
|
||||
/* part2:
|
||||
if( x < SQRTHF ) {
|
||||
@@ -72,9 +72,9 @@ Packet4f plog<Packet4f>(const Packet4f& _x)
|
||||
} else { x = x - 1.0; }
|
||||
*/
|
||||
Packet4f mask = _mm_cmplt_ps(x, p4f_cephes_SQRTHF);
|
||||
Packet4f tmp = _mm_and_ps(x, mask);
|
||||
Packet4f tmp = pand(x, mask);
|
||||
x = psub(x, p4f_1);
|
||||
e = psub(e, _mm_and_ps(p4f_1, mask));
|
||||
e = psub(e, pand(p4f_1, mask));
|
||||
x = padd(x, tmp);
|
||||
|
||||
Packet4f x2 = pmul(x,x);
|
||||
@@ -126,7 +126,7 @@ Packet4f pexp<Packet4f>(const Packet4f& _x)
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f);
|
||||
|
||||
Packet4f tmp = _mm_setzero_ps(), fx;
|
||||
Packet4f tmp, fx;
|
||||
Packet4i emm0;
|
||||
|
||||
// clamp x
|
||||
@@ -138,6 +138,7 @@ Packet4f pexp<Packet4f>(const Packet4f& _x)
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
fx = _mm_floor_ps(fx);
|
||||
#else
|
||||
tmp = _mm_setzero_ps();
|
||||
emm0 = _mm_cvttps_epi32(fx);
|
||||
tmp = _mm_cvtepi32_ps(emm0);
|
||||
/* if greater, substract 1 */
|
||||
@@ -166,7 +167,7 @@ Packet4f pexp<Packet4f>(const Packet4f& _x)
|
||||
emm0 = _mm_cvttps_epi32(fx);
|
||||
emm0 = _mm_add_epi32(emm0, p4i_0x7f);
|
||||
emm0 = _mm_slli_epi32(emm0, 23);
|
||||
return pmul(y, _mm_castsi128_ps(emm0));
|
||||
return pmul(y, Packet4f(_mm_castsi128_ps(emm0)));
|
||||
}
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet2d pexp<Packet2d>(const Packet2d& _x)
|
||||
@@ -195,7 +196,7 @@ Packet2d pexp<Packet2d>(const Packet2d& _x)
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
|
||||
static const __m128i p4i_1023_0 = _mm_setr_epi32(1023, 1023, 0, 0);
|
||||
|
||||
Packet2d tmp = _mm_setzero_pd(), fx;
|
||||
Packet2d tmp, fx;
|
||||
Packet4i emm0;
|
||||
|
||||
// clamp x
|
||||
@@ -206,6 +207,7 @@ Packet2d pexp<Packet2d>(const Packet2d& _x)
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
fx = _mm_floor_pd(fx);
|
||||
#else
|
||||
tmp = _mm_setzero_pd();
|
||||
emm0 = _mm_cvttpd_epi32(fx);
|
||||
tmp = _mm_cvtepi32_pd(emm0);
|
||||
/* if greater, substract 1 */
|
||||
@@ -239,7 +241,7 @@ Packet2d pexp<Packet2d>(const Packet2d& _x)
|
||||
emm0 = _mm_add_epi32(emm0, p4i_1023_0);
|
||||
emm0 = _mm_slli_epi32(emm0, 20);
|
||||
emm0 = _mm_shuffle_epi32(emm0, _MM_SHUFFLE(1,2,0,3));
|
||||
return pmul(x, _mm_castsi128_pd(emm0));
|
||||
return pmul(x, Packet2d(_mm_castsi128_pd(emm0)));
|
||||
}
|
||||
|
||||
/* evaluation of 4 sines at onces, using SSE2 intrinsics.
|
||||
@@ -279,7 +281,7 @@ Packet4f psin<Packet4f>(const Packet4f& _x)
|
||||
_EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827E-002f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516f); // 4 / M_PI
|
||||
|
||||
Packet4f xmm1, xmm2 = _mm_setzero_ps(), xmm3, sign_bit, y;
|
||||
Packet4f xmm1, xmm2, xmm3, sign_bit, y;
|
||||
|
||||
Packet4i emm0, emm2;
|
||||
sign_bit = x;
|
||||
@@ -378,7 +380,7 @@ Packet4f pcos<Packet4f>(const Packet4f& _x)
|
||||
_EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827E-002f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516f); // 4 / M_PI
|
||||
|
||||
Packet4f xmm1, xmm2 = _mm_setzero_ps(), xmm3, y;
|
||||
Packet4f xmm1, xmm2, xmm3, y;
|
||||
Packet4i emm0, emm2;
|
||||
|
||||
x = pabs(x);
|
||||
@@ -442,8 +444,11 @@ Packet4f pcos<Packet4f>(const Packet4f& _x)
|
||||
return _mm_xor_ps(y, sign_bit);
|
||||
}
|
||||
|
||||
#if EIGEN_FAST_MATH
|
||||
|
||||
// This is based on Quake3's fast inverse square root.
|
||||
// For detail see here: http://www.beyond3d.com/content/articles/8/
|
||||
// It lacks 1 (or 2 bits in some rare cases) of precision, and does not handle negative, +inf, or denormalized numbers correctly.
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4f psqrt<Packet4f>(const Packet4f& _x)
|
||||
{
|
||||
@@ -457,6 +462,14 @@ Packet4f psqrt<Packet4f>(const Packet4f& _x)
|
||||
return pmul(_x,x);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f psqrt<Packet4f>(const Packet4f& x) { return _mm_sqrt_ps(x); }
|
||||
|
||||
#endif
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d psqrt<Packet2d>(const Packet2d& x) { return _mm_sqrt_pd(x); }
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
226
Eigen/src/Core/arch/SSE/PacketMath.h
Normal file → Executable file
226
Eigen/src/Core/arch/SSE/PacketMath.h
Normal file → Executable file
@@ -22,9 +22,41 @@ namespace internal {
|
||||
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_FMA
|
||||
#ifndef EIGEN_HAS_FUSED_MADD
|
||||
#define EIGEN_HAS_FUSED_MADD 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined EIGEN_VECTORIZE_AVX && defined __GNUC__ && !(defined __clang__ || defined __INTEL_COMPILER)
|
||||
// With GCC's default ABI version, a __m128 or __m256 are the same types and therefore we cannot
|
||||
// have overloads for both types without linking error.
|
||||
// One solution is to increase ABI version using -fabi-version=4 (or greater).
|
||||
// To workaround this inconvenince, we rather wrap 128bit types into the following helper
|
||||
// structure:
|
||||
// TODO disable this wrapper if abi-versio>=4, but to detect that without asking the user to define a macro?
|
||||
template<typename T>
|
||||
struct eigen_packet_wrapper
|
||||
{
|
||||
EIGEN_ALWAYS_INLINE operator T&() { return m_val; }
|
||||
EIGEN_ALWAYS_INLINE operator const T&() const { return m_val; }
|
||||
EIGEN_ALWAYS_INLINE eigen_packet_wrapper() {}
|
||||
EIGEN_ALWAYS_INLINE eigen_packet_wrapper(const T &v) : m_val(v) {}
|
||||
EIGEN_ALWAYS_INLINE eigen_packet_wrapper& operator=(const T &v) {
|
||||
m_val = v;
|
||||
return *this;
|
||||
}
|
||||
|
||||
T m_val;
|
||||
};
|
||||
typedef eigen_packet_wrapper<__m128> Packet4f;
|
||||
typedef eigen_packet_wrapper<__m128i> Packet4i;
|
||||
typedef eigen_packet_wrapper<__m128d> Packet2d;
|
||||
#else
|
||||
typedef __m128 Packet4f;
|
||||
typedef __m128i Packet4i;
|
||||
typedef __m128d Packet2d;
|
||||
#endif
|
||||
|
||||
template<> struct is_arithmetic<__m128> { enum { value = true }; };
|
||||
template<> struct is_arithmetic<__m128i> { enum { value = true }; };
|
||||
@@ -58,13 +90,18 @@ template<> struct is_arithmetic<__m128d> { enum { value = true }; };
|
||||
const Packet4i p4i_##NAME = pset1<Packet4i>(X)
|
||||
|
||||
|
||||
// Use the packet_traits defined in AVX/PacketMath.h instead if we're going
|
||||
// to leverage AVX instructions.
|
||||
#ifndef EIGEN_VECTORIZE_AVX
|
||||
template<> struct packet_traits<float> : default_packet_traits
|
||||
{
|
||||
typedef Packet4f type;
|
||||
typedef Packet4f half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size=4,
|
||||
HasHalfPacket = 0,
|
||||
|
||||
HasDiv = 1,
|
||||
HasSin = EIGEN_FAST_MATH,
|
||||
@@ -77,18 +114,23 @@ template<> struct packet_traits<float> : default_packet_traits
|
||||
template<> struct packet_traits<double> : default_packet_traits
|
||||
{
|
||||
typedef Packet2d type;
|
||||
typedef Packet2d half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size=2,
|
||||
HasHalfPacket = 0,
|
||||
|
||||
HasDiv = 1,
|
||||
HasExp = 1
|
||||
HasExp = 1,
|
||||
HasSqrt = 1
|
||||
};
|
||||
};
|
||||
#endif
|
||||
template<> struct packet_traits<int> : default_packet_traits
|
||||
{
|
||||
typedef Packet4i type;
|
||||
typedef Packet4i half;
|
||||
enum {
|
||||
// FIXME check the Has*
|
||||
Vectorizable = 1,
|
||||
@@ -97,9 +139,9 @@ template<> struct packet_traits<int> : default_packet_traits
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}; };
|
||||
template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2}; };
|
||||
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; };
|
||||
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}; typedef Packet4f half; };
|
||||
template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2}; typedef Packet2d half; };
|
||||
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; typedef Packet4i half; };
|
||||
|
||||
#if defined(_MSC_VER) && (_MSC_VER==1500)
|
||||
// Workaround MSVC 9 internal compiler error.
|
||||
@@ -109,13 +151,26 @@ template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { re
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set_pd(from,from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set_epi32(from,from,from,from); }
|
||||
#else
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set1_ps(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set_ps1(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set1_pd(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set1_epi32(from); }
|
||||
#endif
|
||||
|
||||
// GCC generates a shufps instruction for _mm_set1_ps/_mm_load1_ps instead of the more efficient pshufd instruction.
|
||||
// However, using inrinsics for pset1 makes gcc to generate crappy code in some cases (see bug 203)
|
||||
// Using inline assembly is also not an option because then gcc fails to reorder properly the instructions.
|
||||
// Therefore, we introduced the pload1 functions to be used in product kernels for which bug 203 does not apply.
|
||||
// Also note that with AVX, we want it to generate a vbroadcastss.
|
||||
#if (defined __GNUC__) && (!defined __INTEL_COMPILER) && (!defined __clang__) && (!defined __AVX__)
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pload1<Packet4f>(const float *from) {
|
||||
return vec4f_swizzle1(_mm_load_ss(from),0,0,0,0);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_VECTORIZE_AVX
|
||||
template<> EIGEN_STRONG_INLINE Packet4f plset<float>(const float& a) { return _mm_add_ps(pset1<Packet4f>(a), _mm_set_ps(3,2,1,0)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d plset<double>(const double& a) { return _mm_add_pd(pset1<Packet2d>(a),_mm_set_pd(1,0)); }
|
||||
#endif
|
||||
template<> EIGEN_STRONG_INLINE Packet4i plset<int>(const int& a) { return _mm_add_epi32(pset1<Packet4i>(a),_mm_set_epi32(3,2,1,0)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_add_ps(a,b); }
|
||||
@@ -138,7 +193,7 @@ template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a)
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a)
|
||||
{
|
||||
return psub(_mm_setr_epi32(0,0,0,0), a);
|
||||
return psub(Packet4i(_mm_setr_epi32(0,0,0,0)), a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }
|
||||
@@ -172,6 +227,10 @@ template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, co
|
||||
|
||||
// for some weird raisons, it has to be overloaded for packet of integers
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd(pmul(a,b), c); }
|
||||
#ifdef EIGEN_VECTORIZE_FMA
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return _mm_fmadd_ps(a,b,c); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return _mm_fmadd_pd(a,b,c); }
|
||||
#endif
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_min_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_min_pd(a,b); }
|
||||
@@ -217,7 +276,7 @@ template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, con
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_ps(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_pd(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(reinterpret_cast<const Packet4i*>(from)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(reinterpret_cast<const __m128i*>(from)); }
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) {
|
||||
@@ -235,7 +294,7 @@ template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { E
|
||||
#endif
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_pd(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_si128(reinterpret_cast<const Packet4i*>(from)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_si128(reinterpret_cast<const __m128i*>(from)); }
|
||||
#else
|
||||
// Fast unaligned loads. Note that here we cannot directly use intrinsics: this would
|
||||
// require pointer casting to incompatible pointer types and leads to invalid code
|
||||
@@ -244,14 +303,17 @@ template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { E
|
||||
// TODO: do the same for MSVC (ICC is compatible)
|
||||
// NOTE: with the code below, MSVC's compiler crashes!
|
||||
|
||||
#if defined(__GNUC__) && defined(__i386__)
|
||||
#if defined(__GNUC__) && (defined(__i386__) || (defined(__x86_64) && EIGEN_GNUC_AT_LEAST(4, 8)))
|
||||
// bug 195: gcc/i386 emits weird x87 fldl/fstpl instructions for _mm_load_sd
|
||||
#define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 1
|
||||
#define EIGEN_AVOID_CUSTOM_UNALIGNED_STORES 1
|
||||
#elif defined(__clang__)
|
||||
// bug 201: Segfaults in __mm_loadh_pd with clang 2.8
|
||||
#define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 1
|
||||
#define EIGEN_AVOID_CUSTOM_UNALIGNED_STORES 0
|
||||
#else
|
||||
#define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 0
|
||||
#define EIGEN_AVOID_CUSTOM_UNALIGNED_STORES 0
|
||||
#endif
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
|
||||
@@ -282,7 +344,7 @@ template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
|
||||
{
|
||||
EIGEN_DEBUG_UNALIGNED_LOAD
|
||||
#if EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS
|
||||
return _mm_loadu_si128(reinterpret_cast<const Packet4i*>(from));
|
||||
return _mm_loadu_si128(reinterpret_cast<const __m128i*>(from));
|
||||
#else
|
||||
__m128d res;
|
||||
res = _mm_load_sd((const double*)(from)) ;
|
||||
@@ -301,38 +363,77 @@ template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
|
||||
{
|
||||
Packet4i tmp;
|
||||
tmp = _mm_loadl_epi64(reinterpret_cast<const Packet4i*>(from));
|
||||
tmp = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(from));
|
||||
return vec4i_swizzle1(tmp, 0, 0, 1, 1);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_ps(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_pd(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<Packet4i*>(to), from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<__m128i*>(to), from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) {
|
||||
EIGEN_DEBUG_UNALIGNED_STORE
|
||||
#if EIGEN_AVOID_CUSTOM_UNALIGNED_STORES
|
||||
_mm_storeu_pd(to, from);
|
||||
#else
|
||||
_mm_storel_pd((to), from);
|
||||
_mm_storeh_pd((to+1), from);
|
||||
#endif
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<double*>(to), Packet2d(_mm_castps_pd(from))); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<double*>(to), Packet2d(_mm_castsi128_pd(from))); }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, DenseIndex stride)
|
||||
{
|
||||
return _mm_set_ps(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, DenseIndex stride)
|
||||
{
|
||||
return _mm_set_pd(from[1*stride], from[0*stride]);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, DenseIndex stride)
|
||||
{
|
||||
return _mm_set_epi32(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, DenseIndex stride)
|
||||
{
|
||||
to[stride*0] = _mm_cvtss_f32(from);
|
||||
to[stride*1] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 1));
|
||||
to[stride*2] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 2));
|
||||
to[stride*3] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 3));
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, DenseIndex stride)
|
||||
{
|
||||
to[stride*0] = _mm_cvtsd_f64(from);
|
||||
to[stride*1] = _mm_cvtsd_f64(_mm_shuffle_pd(from, from, 1));
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, DenseIndex stride)
|
||||
{
|
||||
to[stride*0] = _mm_cvtsi128_si32(from);
|
||||
to[stride*1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 1));
|
||||
to[stride*2] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 2));
|
||||
to[stride*3] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 3));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<double*>(to), _mm_castps_pd(from)); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<double*>(to), _mm_castsi128_pd(from)); }
|
||||
|
||||
// some compilers might be tempted to perform multiple moves instead of using a vector path.
|
||||
template<> EIGEN_STRONG_INLINE void pstore1<Packet4f>(float* to, const float& a)
|
||||
{
|
||||
Packet4f pa = _mm_set_ss(a);
|
||||
pstore(to, vec4f_swizzle1(pa,0,0,0,0));
|
||||
pstore(to, Packet4f(vec4f_swizzle1(pa,0,0,0,0)));
|
||||
}
|
||||
// some compilers might be tempted to perform multiple moves instead of using a vector path.
|
||||
template<> EIGEN_STRONG_INLINE void pstore1<Packet2d>(double* to, const double& a)
|
||||
{
|
||||
Packet2d pa = _mm_set_sd(a);
|
||||
pstore(to, vec2d_swizzle1(pa,0,0));
|
||||
pstore(to, Packet2d(vec2d_swizzle1(pa,0,0)));
|
||||
}
|
||||
|
||||
#ifndef EIGEN_VECTORIZE_AVX
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER) && defined(_WIN64) && !defined(__INTEL_COMPILER)
|
||||
// The temporary variable fixes an internal compilation error in vs <= 2008 and a wrong-result bug in vs 2010
|
||||
@@ -379,6 +480,38 @@ template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a)
|
||||
#endif
|
||||
}
|
||||
|
||||
// with AVX, the default implementations based on pload1 are faster
|
||||
#ifndef __AVX__
|
||||
template<> EIGEN_STRONG_INLINE void
|
||||
pbroadcast4<Packet4f>(const float *a,
|
||||
Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
|
||||
{
|
||||
a3 = pload<Packet4f>(a);
|
||||
a0 = vec4f_swizzle1(a3, 0,0,0,0);
|
||||
a1 = vec4f_swizzle1(a3, 1,1,1,1);
|
||||
a2 = vec4f_swizzle1(a3, 2,2,2,2);
|
||||
a3 = vec4f_swizzle1(a3, 3,3,3,3);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE void
|
||||
pbroadcast4<Packet2d>(const double *a,
|
||||
Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_SSE3
|
||||
a0 = _mm_loaddup_pd(a+0);
|
||||
a1 = _mm_loaddup_pd(a+1);
|
||||
a2 = _mm_loaddup_pd(a+2);
|
||||
a3 = _mm_loaddup_pd(a+3);
|
||||
#else
|
||||
a1 = pload<Packet2d>(a);
|
||||
a0 = vec2d_swizzle1(a1, 0,0);
|
||||
a1 = vec2d_swizzle1(a1, 1,1);
|
||||
a3 = pload<Packet2d>(a+2);
|
||||
a2 = vec2d_swizzle1(a3, 0,0);
|
||||
a3 = vec2d_swizzle1(a3, 1,1);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
EIGEN_STRONG_INLINE void punpackp(Packet4f* vecs)
|
||||
{
|
||||
vecs[1] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x55));
|
||||
@@ -406,10 +539,10 @@ template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
|
||||
template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet4f tmp0 = _mm_hadd_ps(a,a);
|
||||
return pfirst(_mm_hadd_ps(tmp0, tmp0));
|
||||
return pfirst<Packet4f>(_mm_hadd_ps(tmp0, tmp0));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) { return pfirst(_mm_hadd_pd(a, a)); }
|
||||
template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) { return pfirst<Packet2d>(_mm_hadd_pd(a, a)); }
|
||||
|
||||
// SSSE3 version:
|
||||
// EIGEN_STRONG_INLINE float predux(const Packet4i& a)
|
||||
@@ -452,7 +585,7 @@ template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
|
||||
template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
|
||||
{
|
||||
Packet4i tmp = _mm_add_epi32(a, _mm_unpackhi_epi64(a,a));
|
||||
return pfirst(tmp) + pfirst(_mm_shuffle_epi32(tmp, 1));
|
||||
return pfirst(tmp) + pfirst<Packet4i>(_mm_shuffle_epi32(tmp, 1));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
|
||||
@@ -475,11 +608,11 @@ template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
|
||||
template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet4f tmp = _mm_mul_ps(a, _mm_movehl_ps(a,a));
|
||||
return pfirst(_mm_mul_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
|
||||
return pfirst<Packet4f>(_mm_mul_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a)
|
||||
{
|
||||
return pfirst(_mm_mul_sd(a, _mm_unpackhi_pd(a,a)));
|
||||
return pfirst<Packet2d>(_mm_mul_sd(a, _mm_unpackhi_pd(a,a)));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
|
||||
{
|
||||
@@ -495,42 +628,52 @@ template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
|
||||
template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet4f tmp = _mm_min_ps(a, _mm_movehl_ps(a,a));
|
||||
return pfirst(_mm_min_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
|
||||
return pfirst<Packet4f>(_mm_min_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)
|
||||
{
|
||||
return pfirst(_mm_min_sd(a, _mm_unpackhi_pd(a,a)));
|
||||
return pfirst<Packet2d>(_mm_min_sd(a, _mm_unpackhi_pd(a,a)));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
Packet4i tmp = _mm_min_epi32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0,0,3,2)));
|
||||
return pfirst<Packet4i>(_mm_min_epi32(tmp,_mm_shuffle_epi32(tmp, 1)));
|
||||
#else
|
||||
// after some experiments, it is seems this is the fastest way to implement it
|
||||
// for GCC (eg., it does not like using std::min after the pstore !!)
|
||||
EIGEN_ALIGN16 int aux[4];
|
||||
pstore(aux, a);
|
||||
register int aux0 = aux[0]<aux[1] ? aux[0] : aux[1];
|
||||
register int aux2 = aux[2]<aux[3] ? aux[2] : aux[3];
|
||||
int aux0 = aux[0]<aux[1] ? aux[0] : aux[1];
|
||||
int aux2 = aux[2]<aux[3] ? aux[2] : aux[3];
|
||||
return aux0<aux2 ? aux0 : aux2;
|
||||
#endif // EIGEN_VECTORIZE_SSE4_1
|
||||
}
|
||||
|
||||
// max
|
||||
template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet4f tmp = _mm_max_ps(a, _mm_movehl_ps(a,a));
|
||||
return pfirst(_mm_max_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
|
||||
return pfirst<Packet4f>(_mm_max_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
|
||||
{
|
||||
return pfirst(_mm_max_sd(a, _mm_unpackhi_pd(a,a)));
|
||||
return pfirst<Packet2d>(_mm_max_sd(a, _mm_unpackhi_pd(a,a)));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
Packet4i tmp = _mm_max_epi32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0,0,3,2)));
|
||||
return pfirst<Packet4i>(_mm_max_epi32(tmp,_mm_shuffle_epi32(tmp, 1)));
|
||||
#else
|
||||
// after some experiments, it is seems this is the fastest way to implement it
|
||||
// for GCC (eg., it does not like using std::min after the pstore !!)
|
||||
EIGEN_ALIGN16 int aux[4];
|
||||
pstore(aux, a);
|
||||
register int aux0 = aux[0]>aux[1] ? aux[0] : aux[1];
|
||||
register int aux2 = aux[2]>aux[3] ? aux[2] : aux[3];
|
||||
int aux0 = aux[0]>aux[1] ? aux[0] : aux[1];
|
||||
int aux2 = aux[2]>aux[3] ? aux[2] : aux[3];
|
||||
return aux0>aux2 ? aux0 : aux2;
|
||||
#endif // EIGEN_VECTORIZE_SSE4_1
|
||||
}
|
||||
|
||||
#if (defined __GNUC__)
|
||||
@@ -641,6 +784,31 @@ struct palign_impl<Offset,Packet2d>
|
||||
};
|
||||
#endif
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet4f,4>& kernel) {
|
||||
_MM_TRANSPOSE4_PS(kernel.packet[0], kernel.packet[1], kernel.packet[2], kernel.packet[3]);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet2d,2>& kernel) {
|
||||
__m128d tmp = _mm_unpackhi_pd(kernel.packet[0], kernel.packet[1]);
|
||||
kernel.packet[0] = _mm_unpacklo_pd(kernel.packet[0], kernel.packet[1]);
|
||||
kernel.packet[1] = tmp;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet4i,4>& kernel) {
|
||||
__m128i T0 = _mm_unpacklo_epi32(kernel.packet[0], kernel.packet[1]);
|
||||
__m128i T1 = _mm_unpacklo_epi32(kernel.packet[2], kernel.packet[3]);
|
||||
__m128i T2 = _mm_unpackhi_epi32(kernel.packet[0], kernel.packet[1]);
|
||||
__m128i T3 = _mm_unpackhi_epi32(kernel.packet[2], kernel.packet[3]);
|
||||
|
||||
kernel.packet[0] = _mm_unpacklo_epi64(T0, T1);
|
||||
kernel.packet[1] = _mm_unpackhi_epi64(T0, T1);
|
||||
kernel.packet[2] = _mm_unpacklo_epi64(T2, T3);
|
||||
kernel.packet[3] = _mm_unpackhi_epi64(T2, T3);
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
167
Eigen/src/Core/functors/AssignmentFunctors.h
Normal file
167
Eigen/src/Core/functors/AssignmentFunctors.h
Normal file
@@ -0,0 +1,167 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_ASSIGNMENT_FUNCTORS_H
|
||||
#define EIGEN_ASSIGNMENT_FUNCTORS_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor for scalar/packet assignment
|
||||
*
|
||||
*/
|
||||
template<typename Scalar> struct assign_op {
|
||||
|
||||
EIGEN_EMPTY_STRUCT_CTOR(assign_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a = b; }
|
||||
|
||||
template<int Alignment, typename Packet>
|
||||
EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const
|
||||
{ internal::pstoret<Scalar,Packet,Alignment>(a,b); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<assign_op<Scalar> > {
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::ReadCost,
|
||||
PacketAccess = packet_traits<Scalar>::IsVectorized
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor for scalar/packet assignment with addition
|
||||
*
|
||||
*/
|
||||
template<typename Scalar> struct add_assign_op {
|
||||
|
||||
EIGEN_EMPTY_STRUCT_CTOR(add_assign_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a += b; }
|
||||
|
||||
template<int Alignment, typename Packet>
|
||||
EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const
|
||||
{ internal::pstoret<Scalar,Packet,Alignment>(a,internal::padd(internal::ploadt<Packet,Alignment>(a),b)); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<add_assign_op<Scalar> > {
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::ReadCost + NumTraits<Scalar>::AddCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasAdd
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor for scalar/packet assignment with subtraction
|
||||
*
|
||||
*/
|
||||
template<typename Scalar> struct sub_assign_op {
|
||||
|
||||
EIGEN_EMPTY_STRUCT_CTOR(sub_assign_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a -= b; }
|
||||
|
||||
template<int Alignment, typename Packet>
|
||||
EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const
|
||||
{ internal::pstoret<Scalar,Packet,Alignment>(a,internal::psub(internal::ploadt<Packet,Alignment>(a),b)); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<sub_assign_op<Scalar> > {
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::ReadCost + NumTraits<Scalar>::AddCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasAdd
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor for scalar/packet assignment with multiplication
|
||||
*
|
||||
*/
|
||||
template<typename Scalar> struct mul_assign_op {
|
||||
|
||||
EIGEN_EMPTY_STRUCT_CTOR(mul_assign_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a *= b; }
|
||||
|
||||
template<int Alignment, typename Packet>
|
||||
EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const
|
||||
{ internal::pstoret<Scalar,Packet,Alignment>(a,internal::pmul(internal::ploadt<Packet,Alignment>(a),b)); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<mul_assign_op<Scalar> > {
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::ReadCost + NumTraits<Scalar>::MulCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasMul
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor for scalar/packet assignment with diviving
|
||||
*
|
||||
*/
|
||||
template<typename Scalar> struct div_assign_op {
|
||||
|
||||
EIGEN_EMPTY_STRUCT_CTOR(div_assign_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a /= b; }
|
||||
|
||||
template<int Alignment, typename Packet>
|
||||
EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const
|
||||
{ internal::pstoret<Scalar,Packet,Alignment>(a,internal::pdiv(internal::ploadt<Packet,Alignment>(a),b)); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<div_assign_op<Scalar> > {
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::ReadCost + NumTraits<Scalar>::MulCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasMul
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor for scalar/packet assignment with swaping
|
||||
*
|
||||
* It works as follow. For a non-vectorized evaluation loop, we have:
|
||||
* for(i) func(A.coeffRef(i), B.coeff(i));
|
||||
* where B is a SwapWrapper expression. The trick is to make SwapWrapper::coeff behaves like a non-const coeffRef.
|
||||
* Actually, SwapWrapper might not even be needed since even if B is a plain expression, since it has to be writable
|
||||
* B.coeff already returns a const reference to the underlying scalar value.
|
||||
*
|
||||
* The case of a vectorized loop is more tricky:
|
||||
* for(i,j) func.assignPacket<A_Align>(&A.coeffRef(i,j), B.packet<B_Align>(i,j));
|
||||
* Here, B must be a SwapWrapper whose packet function actually returns a proxy object holding a Scalar*,
|
||||
* the actual alignment and Packet type.
|
||||
*
|
||||
*/
|
||||
template<typename Scalar> struct swap_assign_op {
|
||||
|
||||
EIGEN_EMPTY_STRUCT_CTOR(swap_assign_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const
|
||||
{
|
||||
using std::swap;
|
||||
swap(a,const_cast<Scalar&>(b));
|
||||
}
|
||||
|
||||
template<int LhsAlignment, int RhsAlignment, typename Packet>
|
||||
EIGEN_STRONG_INLINE void swapPacket(Scalar* a, Scalar* b) const
|
||||
{
|
||||
Packet tmp = internal::ploadt<Packet,RhsAlignment>(b);
|
||||
internal::pstoret<Scalar,Packet,RhsAlignment>(b, internal::ploadt<Packet,LhsAlignment>(a));
|
||||
internal::pstoret<Scalar,Packet,LhsAlignment>(a, tmp);
|
||||
}
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<swap_assign_op<Scalar> > {
|
||||
enum {
|
||||
Cost = 3 * NumTraits<Scalar>::ReadCost,
|
||||
PacketAccess = packet_traits<Scalar>::IsVectorized
|
||||
};
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
|
||||
} // namespace Eigen
|
||||
|
||||
#endif // EIGEN_ASSIGNMENT_FUNCTORS_H
|
||||
456
Eigen/src/Core/functors/BinaryFunctors.h
Normal file
456
Eigen/src/Core/functors/BinaryFunctors.h
Normal file
@@ -0,0 +1,456 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_BINARY_FUNCTORS_H
|
||||
#define EIGEN_BINARY_FUNCTORS_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
//---------- associative binary functors ----------
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the sum of two scalars
|
||||
*
|
||||
* \sa class CwiseBinaryOp, MatrixBase::operator+, class VectorwiseOp, DenseBase::sum()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_sum_op {
|
||||
// typedef Scalar result_type;
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a + b; }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
|
||||
{ return internal::padd(a,b); }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const
|
||||
{ return internal::predux(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_sum_op<Scalar> > {
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::AddCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasAdd
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template specialization to deprecate the summation of boolean expressions.
|
||||
* This is required to solve Bug 426.
|
||||
* \sa DenseBase::count(), DenseBase::any(), ArrayBase::cast(), MatrixBase::cast()
|
||||
*/
|
||||
template<> struct scalar_sum_op<bool> : scalar_sum_op<int> {
|
||||
EIGEN_DEPRECATED
|
||||
scalar_sum_op() {}
|
||||
};
|
||||
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the product of two scalars
|
||||
*
|
||||
* \sa class CwiseBinaryOp, Cwise::operator*(), class VectorwiseOp, MatrixBase::redux()
|
||||
*/
|
||||
template<typename LhsScalar,typename RhsScalar> struct scalar_product_op {
|
||||
enum {
|
||||
// TODO vectorize mixed product
|
||||
Vectorizable = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasMul && packet_traits<RhsScalar>::HasMul
|
||||
};
|
||||
typedef typename scalar_product_traits<LhsScalar,RhsScalar>::ReturnType result_type;
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a * b; }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
|
||||
{ return internal::pmul(a,b); }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const
|
||||
{ return internal::predux_mul(a); }
|
||||
};
|
||||
template<typename LhsScalar,typename RhsScalar>
|
||||
struct functor_traits<scalar_product_op<LhsScalar,RhsScalar> > {
|
||||
enum {
|
||||
Cost = (NumTraits<LhsScalar>::MulCost + NumTraits<RhsScalar>::MulCost)/2, // rough estimate!
|
||||
PacketAccess = scalar_product_op<LhsScalar,RhsScalar>::Vectorizable
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the conjugate product of two scalars
|
||||
*
|
||||
* This is a short cut for conj(x) * y which is needed for optimization purpose; in Eigen2 support mode, this becomes x * conj(y)
|
||||
*/
|
||||
template<typename LhsScalar,typename RhsScalar> struct scalar_conj_product_op {
|
||||
|
||||
enum {
|
||||
Conj = NumTraits<LhsScalar>::IsComplex
|
||||
};
|
||||
|
||||
typedef typename scalar_product_traits<LhsScalar,RhsScalar>::ReturnType result_type;
|
||||
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_conj_product_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const
|
||||
{ return conj_helper<LhsScalar,RhsScalar,Conj,false>().pmul(a,b); }
|
||||
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
|
||||
{ return conj_helper<Packet,Packet,Conj,false>().pmul(a,b); }
|
||||
};
|
||||
template<typename LhsScalar,typename RhsScalar>
|
||||
struct functor_traits<scalar_conj_product_op<LhsScalar,RhsScalar> > {
|
||||
enum {
|
||||
Cost = NumTraits<LhsScalar>::MulCost,
|
||||
PacketAccess = internal::is_same<LhsScalar, RhsScalar>::value && packet_traits<LhsScalar>::HasMul
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the min of two scalars
|
||||
*
|
||||
* \sa class CwiseBinaryOp, MatrixBase::cwiseMin, class VectorwiseOp, MatrixBase::minCoeff()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_min_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_min_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { EIGEN_USING_STD_MATH(min); return (min)(a, b); }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
|
||||
{ return internal::pmin(a,b); }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const
|
||||
{ return internal::predux_min(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_min_op<Scalar> > {
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::AddCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasMin
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the max of two scalars
|
||||
*
|
||||
* \sa class CwiseBinaryOp, MatrixBase::cwiseMax, class VectorwiseOp, MatrixBase::maxCoeff()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_max_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_max_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { EIGEN_USING_STD_MATH(max); return (max)(a, b); }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
|
||||
{ return internal::pmax(a,b); }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const
|
||||
{ return internal::predux_max(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_max_op<Scalar> > {
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::AddCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasMax
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the hypot of two scalars
|
||||
*
|
||||
* \sa MatrixBase::stableNorm(), class Redux
|
||||
*/
|
||||
template<typename Scalar> struct scalar_hypot_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_hypot_op)
|
||||
// typedef typename NumTraits<Scalar>::Real result_type;
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const
|
||||
{
|
||||
EIGEN_USING_STD_MATH(max);
|
||||
EIGEN_USING_STD_MATH(min);
|
||||
using std::sqrt;
|
||||
Scalar p, qp;
|
||||
if(_x>_y)
|
||||
{
|
||||
p = _x;
|
||||
qp = _y / p;
|
||||
}
|
||||
else
|
||||
{
|
||||
p = _y;
|
||||
qp = _x / p;
|
||||
}
|
||||
return p * sqrt(Scalar(1) + qp*qp);
|
||||
}
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_hypot_op<Scalar> > {
|
||||
enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess=0 };
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the pow of two scalars
|
||||
*/
|
||||
template<typename Scalar, typename OtherScalar> struct scalar_binary_pow_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_binary_pow_op)
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar operator() (const Scalar& a, const OtherScalar& b) const { return numext::pow(a, b); }
|
||||
};
|
||||
template<typename Scalar, typename OtherScalar>
|
||||
struct functor_traits<scalar_binary_pow_op<Scalar,OtherScalar> > {
|
||||
enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false };
|
||||
};
|
||||
|
||||
|
||||
|
||||
//---------- non associative binary functors ----------
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the difference of two scalars
|
||||
*
|
||||
* \sa class CwiseBinaryOp, MatrixBase::operator-
|
||||
*/
|
||||
template<typename Scalar> struct scalar_difference_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a - b; }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
|
||||
{ return internal::psub(a,b); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_difference_op<Scalar> > {
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::AddCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasSub
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the quotient of two scalars
|
||||
*
|
||||
* \sa class CwiseBinaryOp, Cwise::operator/()
|
||||
*/
|
||||
template<typename LhsScalar,typename RhsScalar> struct scalar_quotient_op {
|
||||
enum {
|
||||
// TODO vectorize mixed product
|
||||
Vectorizable = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasDiv && packet_traits<RhsScalar>::HasDiv
|
||||
};
|
||||
typedef typename scalar_product_traits<LhsScalar,RhsScalar>::ReturnType result_type;
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a / b; }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
|
||||
{ return internal::pdiv(a,b); }
|
||||
};
|
||||
template<typename LhsScalar,typename RhsScalar>
|
||||
struct functor_traits<scalar_quotient_op<LhsScalar,RhsScalar> > {
|
||||
enum {
|
||||
Cost = (NumTraits<LhsScalar>::MulCost + NumTraits<RhsScalar>::MulCost), // rough estimate!
|
||||
PacketAccess = scalar_quotient_op<LhsScalar,RhsScalar>::Vectorizable
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the and of two booleans
|
||||
*
|
||||
* \sa class CwiseBinaryOp, ArrayBase::operator&&
|
||||
*/
|
||||
struct scalar_boolean_and_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_and_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a && b; }
|
||||
};
|
||||
template<> struct functor_traits<scalar_boolean_and_op> {
|
||||
enum {
|
||||
Cost = NumTraits<bool>::AddCost,
|
||||
PacketAccess = false
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the or of two booleans
|
||||
*
|
||||
* \sa class CwiseBinaryOp, ArrayBase::operator||
|
||||
*/
|
||||
struct scalar_boolean_or_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_or_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a || b; }
|
||||
};
|
||||
template<> struct functor_traits<scalar_boolean_or_op> {
|
||||
enum {
|
||||
Cost = NumTraits<bool>::AddCost,
|
||||
PacketAccess = false
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
|
||||
//---------- binary functors bound to a constant, thus appearing as a unary functor ----------
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to multiply a scalar by a fixed other one
|
||||
*
|
||||
* \sa class CwiseUnaryOp, MatrixBase::operator*, MatrixBase::operator/
|
||||
*/
|
||||
/* NOTE why doing the pset1() in packetOp *is* an optimization ?
|
||||
* indeed it seems better to declare m_other as a Packet and do the pset1() once
|
||||
* in the constructor. However, in practice:
|
||||
* - GCC does not like m_other as a Packet and generate a load every time it needs it
|
||||
* - on the other hand GCC is able to moves the pset1() outside the loop :)
|
||||
* - simpler code ;)
|
||||
* (ICC and gcc 4.4 seems to perform well in both cases, the issue is visible with y = a*x + b*y)
|
||||
*/
|
||||
template<typename Scalar>
|
||||
struct scalar_multiple_op {
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
// FIXME default copy constructors seems bugged with std::complex<>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE scalar_multiple_op(const scalar_multiple_op& other) : m_other(other.m_other) { }
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE scalar_multiple_op(const Scalar& other) : m_other(other) { }
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a * m_other; }
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
|
||||
{ return internal::pmul(a, pset1<Packet>(m_other)); }
|
||||
typename add_const_on_value_type<typename NumTraits<Scalar>::Nested>::type m_other;
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_multiple_op<Scalar> >
|
||||
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasMul }; };
|
||||
|
||||
template<typename Scalar1, typename Scalar2>
|
||||
struct scalar_multiple2_op {
|
||||
typedef typename scalar_product_traits<Scalar1,Scalar2>::ReturnType result_type;
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_multiple2_op(const scalar_multiple2_op& other) : m_other(other.m_other) { }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_multiple2_op(const Scalar2& other) : m_other(other) { }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar1& a) const { return a * m_other; }
|
||||
typename add_const_on_value_type<typename NumTraits<Scalar2>::Nested>::type m_other;
|
||||
};
|
||||
template<typename Scalar1,typename Scalar2>
|
||||
struct functor_traits<scalar_multiple2_op<Scalar1,Scalar2> >
|
||||
{ enum { Cost = NumTraits<Scalar1>::MulCost, PacketAccess = false }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to divide a scalar by a fixed other one
|
||||
*
|
||||
* This functor is used to implement the quotient of a matrix by
|
||||
* a scalar where the scalar type is not necessarily a floating point type.
|
||||
*
|
||||
* \sa class CwiseUnaryOp, MatrixBase::operator/
|
||||
*/
|
||||
template<typename Scalar>
|
||||
struct scalar_quotient1_op {
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
// FIXME default copy constructors seems bugged with std::complex<>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient1_op(const scalar_quotient1_op& other) : m_other(other.m_other) { }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient1_op(const Scalar& other) : m_other(other) {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a / m_other; }
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
|
||||
{ return internal::pdiv(a, pset1<Packet>(m_other)); }
|
||||
typename add_const_on_value_type<typename NumTraits<Scalar>::Nested>::type m_other;
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_quotient1_op<Scalar> >
|
||||
{ enum { Cost = 2 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasDiv }; };
|
||||
|
||||
// In Eigen, any binary op (Product, CwiseBinaryOp) require the Lhs and Rhs to have the same scalar type, except for multiplication
|
||||
// where the mixing of different types is handled by scalar_product_traits
|
||||
// In particular, real * complex<real> is allowed.
|
||||
// FIXME move this to functor_traits adding a functor_default
|
||||
template<typename Functor> struct functor_is_product_like { enum { ret = 0 }; };
|
||||
template<typename LhsScalar,typename RhsScalar> struct functor_is_product_like<scalar_product_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
|
||||
template<typename LhsScalar,typename RhsScalar> struct functor_is_product_like<scalar_conj_product_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
|
||||
template<typename LhsScalar,typename RhsScalar> struct functor_is_product_like<scalar_quotient_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
|
||||
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to add a scalar to a fixed other one
|
||||
* \sa class CwiseUnaryOp, Array::operator+
|
||||
*/
|
||||
/* If you wonder why doing the pset1() in packetOp() is an optimization check scalar_multiple_op */
|
||||
template<typename Scalar>
|
||||
struct scalar_add_op {
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
// FIXME default copy constructors seems bugged with std::complex<>
|
||||
EIGEN_DEVICE_FUNC inline scalar_add_op(const scalar_add_op& other) : m_other(other.m_other) { }
|
||||
EIGEN_DEVICE_FUNC inline scalar_add_op(const Scalar& other) : m_other(other) { }
|
||||
EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a + m_other; }
|
||||
inline const Packet packetOp(const Packet& a) const
|
||||
{ return internal::padd(a, pset1<Packet>(m_other)); }
|
||||
const Scalar m_other;
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_add_op<Scalar> >
|
||||
{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = packet_traits<Scalar>::HasAdd }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to subtract a fixed scalar to another one
|
||||
* \sa class CwiseUnaryOp, Array::operator-, struct scalar_add_op, struct scalar_rsub_op
|
||||
*/
|
||||
template<typename Scalar>
|
||||
struct scalar_sub_op {
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
inline scalar_sub_op(const scalar_sub_op& other) : m_other(other.m_other) { }
|
||||
inline scalar_sub_op(const Scalar& other) : m_other(other) { }
|
||||
inline Scalar operator() (const Scalar& a) const { return a - m_other; }
|
||||
inline const Packet packetOp(const Packet& a) const
|
||||
{ return internal::psub(a, pset1<Packet>(m_other)); }
|
||||
const Scalar m_other;
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_sub_op<Scalar> >
|
||||
{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = packet_traits<Scalar>::HasAdd }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to subtract a scalar to fixed another one
|
||||
* \sa class CwiseUnaryOp, Array::operator-, struct scalar_add_op, struct scalar_sub_op
|
||||
*/
|
||||
template<typename Scalar>
|
||||
struct scalar_rsub_op {
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
inline scalar_rsub_op(const scalar_rsub_op& other) : m_other(other.m_other) { }
|
||||
inline scalar_rsub_op(const Scalar& other) : m_other(other) { }
|
||||
inline Scalar operator() (const Scalar& a) const { return m_other - a; }
|
||||
inline const Packet packetOp(const Packet& a) const
|
||||
{ return internal::psub(pset1<Packet>(m_other), a); }
|
||||
const Scalar m_other;
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_rsub_op<Scalar> >
|
||||
{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = packet_traits<Scalar>::HasAdd }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to raise a scalar to a power
|
||||
* \sa class CwiseUnaryOp, Cwise::pow
|
||||
*/
|
||||
template<typename Scalar>
|
||||
struct scalar_pow_op {
|
||||
// FIXME default copy constructors seems bugged with std::complex<>
|
||||
inline scalar_pow_op(const scalar_pow_op& other) : m_exponent(other.m_exponent) { }
|
||||
inline scalar_pow_op(const Scalar& exponent) : m_exponent(exponent) {}
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar operator() (const Scalar& a) const { return numext::pow(a, m_exponent); }
|
||||
const Scalar m_exponent;
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_pow_op<Scalar> >
|
||||
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the quotient between a scalar and array entries.
|
||||
* \sa class CwiseUnaryOp, Cwise::inverse()
|
||||
*/
|
||||
template<typename Scalar>
|
||||
struct scalar_inverse_mult_op {
|
||||
scalar_inverse_mult_op(const Scalar& other) : m_other(other) {}
|
||||
EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return m_other / a; }
|
||||
template<typename Packet>
|
||||
inline const Packet packetOp(const Packet& a) const
|
||||
{ return internal::pdiv(pset1<Packet>(m_other),a); }
|
||||
Scalar m_other;
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_BINARY_FUNCTORS_H
|
||||
6
Eigen/src/Core/functors/CMakeLists.txt
Normal file
6
Eigen/src/Core/functors/CMakeLists.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
FILE(GLOB Eigen_Core_Functor_SRCS "*.h")
|
||||
|
||||
INSTALL(FILES
|
||||
${Eigen_Core_Functor_SRCS}
|
||||
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/functors COMPONENT Devel
|
||||
)
|
||||
158
Eigen/src/Core/functors/NullaryFunctors.h
Normal file
158
Eigen/src/Core/functors/NullaryFunctors.h
Normal file
@@ -0,0 +1,158 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_NULLARY_FUNCTORS_H
|
||||
#define EIGEN_NULLARY_FUNCTORS_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename Scalar>
|
||||
struct scalar_constant_op {
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_constant_op(const scalar_constant_op& other) : m_other(other.m_other) { }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_constant_op(const Scalar& other) : m_other(other) { }
|
||||
template<typename Index>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index, Index = 0) const { return m_other; }
|
||||
template<typename Index>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(Index, Index = 0) const { return internal::pset1<Packet>(m_other); }
|
||||
const Scalar m_other;
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_constant_op<Scalar> >
|
||||
// FIXME replace this packet test by a safe one
|
||||
{ enum { Cost = 1, PacketAccess = packet_traits<Scalar>::Vectorizable, IsRepeatable = true }; };
|
||||
|
||||
template<typename Scalar> struct scalar_identity_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_identity_op)
|
||||
template<typename Index>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index row, Index col) const { return row==col ? Scalar(1) : Scalar(0); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_identity_op<Scalar> >
|
||||
{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = false, IsRepeatable = true }; };
|
||||
|
||||
template <typename Scalar, bool RandomAccess> struct linspaced_op_impl;
|
||||
|
||||
// linear access for packet ops:
|
||||
// 1) initialization
|
||||
// base = [low, ..., low] + ([step, ..., step] * [-size, ..., 0])
|
||||
// 2) each step (where size is 1 for coeff access or PacketSize for packet access)
|
||||
// base += [size*step, ..., size*step]
|
||||
//
|
||||
// TODO: Perhaps it's better to initialize lazily (so not in the constructor but in packetOp)
|
||||
// in order to avoid the padd() in operator() ?
|
||||
template <typename Scalar>
|
||||
struct linspaced_op_impl<Scalar,false>
|
||||
{
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
|
||||
linspaced_op_impl(const Scalar& low, const Scalar& step) :
|
||||
m_low(low), m_step(step),
|
||||
m_packetStep(pset1<Packet>(packet_traits<Scalar>::size*step)),
|
||||
m_base(padd(pset1<Packet>(low), pmul(pset1<Packet>(step),plset<Scalar>(-packet_traits<Scalar>::size)))) {}
|
||||
|
||||
template<typename Index>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const
|
||||
{
|
||||
m_base = padd(m_base, pset1<Packet>(m_step));
|
||||
return m_low+Scalar(i)*m_step;
|
||||
}
|
||||
|
||||
template<typename Index>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(Index) const { return m_base = padd(m_base,m_packetStep); }
|
||||
|
||||
const Scalar m_low;
|
||||
const Scalar m_step;
|
||||
const Packet m_packetStep;
|
||||
mutable Packet m_base;
|
||||
};
|
||||
|
||||
// random access for packet ops:
|
||||
// 1) each step
|
||||
// [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) )
|
||||
template <typename Scalar>
|
||||
struct linspaced_op_impl<Scalar,true>
|
||||
{
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
|
||||
linspaced_op_impl(const Scalar& low, const Scalar& step) :
|
||||
m_low(low), m_step(step),
|
||||
m_lowPacket(pset1<Packet>(m_low)), m_stepPacket(pset1<Packet>(m_step)), m_interPacket(plset<Scalar>(0)) {}
|
||||
|
||||
template<typename Index>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return m_low+i*m_step; }
|
||||
|
||||
template<typename Index>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(Index i) const
|
||||
{ return internal::padd(m_lowPacket, pmul(m_stepPacket, padd(pset1<Packet>(Scalar(i)),m_interPacket))); }
|
||||
|
||||
const Scalar m_low;
|
||||
const Scalar m_step;
|
||||
const Packet m_lowPacket;
|
||||
const Packet m_stepPacket;
|
||||
const Packet m_interPacket;
|
||||
};
|
||||
|
||||
// ----- Linspace functor ----------------------------------------------------------------
|
||||
|
||||
// Forward declaration (we default to random access which does not really give
|
||||
// us a speed gain when using packet access but it allows to use the functor in
|
||||
// nested expressions).
|
||||
template <typename Scalar, bool RandomAccess = true> struct linspaced_op;
|
||||
template <typename Scalar, bool RandomAccess> struct functor_traits< linspaced_op<Scalar,RandomAccess> >
|
||||
{ enum { Cost = 1, PacketAccess = packet_traits<Scalar>::HasSetLinear, IsRepeatable = true }; };
|
||||
template <typename Scalar, bool RandomAccess> struct linspaced_op
|
||||
{
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
linspaced_op(const Scalar& low, const Scalar& high, DenseIndex num_steps) : impl((num_steps==1 ? high : low), (num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1))) {}
|
||||
|
||||
template<typename Index>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return impl(i); }
|
||||
|
||||
// We need this function when assigning e.g. a RowVectorXd to a MatrixXd since
|
||||
// there row==0 and col is used for the actual iteration.
|
||||
template<typename Index>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index row, Index col) const
|
||||
{
|
||||
eigen_assert(col==0 || row==0);
|
||||
return impl(col + row);
|
||||
}
|
||||
|
||||
template<typename Index>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(Index i) const { return impl.packetOp(i); }
|
||||
|
||||
// We need this function when assigning e.g. a RowVectorXd to a MatrixXd since
|
||||
// there row==0 and col is used for the actual iteration.
|
||||
template<typename Index>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(Index row, Index col) const
|
||||
{
|
||||
eigen_assert(col==0 || row==0);
|
||||
return impl.packetOp(col + row);
|
||||
}
|
||||
|
||||
// This proxy object handles the actual required temporaries, the different
|
||||
// implementations (random vs. sequential access) as well as the
|
||||
// correct piping to size 2/4 packet operations.
|
||||
const linspaced_op_impl<Scalar,RandomAccess> impl;
|
||||
};
|
||||
|
||||
// all functors allow linear access, except scalar_identity_op. So we fix here a quick meta
|
||||
// to indicate whether a functor allows linear access, just always answering 'yes' except for
|
||||
// scalar_identity_op.
|
||||
// FIXME move this to functor_traits adding a functor_default
|
||||
template<typename Functor> struct functor_has_linear_access { enum { ret = 1 }; };
|
||||
template<typename Scalar> struct functor_has_linear_access<scalar_identity_op<Scalar> > { enum { ret = 0 }; };
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_NULLARY_FUNCTORS_H
|
||||
129
Eigen/src/Core/functors/StlFunctors.h
Normal file
129
Eigen/src/Core/functors/StlFunctors.h
Normal file
@@ -0,0 +1,129 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_STL_FUNCTORS_H
|
||||
#define EIGEN_STL_FUNCTORS_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
// default functor traits for STL functors:
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::multiplies<T> >
|
||||
{ enum { Cost = NumTraits<T>::MulCost, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::divides<T> >
|
||||
{ enum { Cost = NumTraits<T>::MulCost, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::plus<T> >
|
||||
{ enum { Cost = NumTraits<T>::AddCost, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::minus<T> >
|
||||
{ enum { Cost = NumTraits<T>::AddCost, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::negate<T> >
|
||||
{ enum { Cost = NumTraits<T>::AddCost, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::logical_or<T> >
|
||||
{ enum { Cost = 1, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::logical_and<T> >
|
||||
{ enum { Cost = 1, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::logical_not<T> >
|
||||
{ enum { Cost = 1, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::greater<T> >
|
||||
{ enum { Cost = 1, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::less<T> >
|
||||
{ enum { Cost = 1, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::greater_equal<T> >
|
||||
{ enum { Cost = 1, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::less_equal<T> >
|
||||
{ enum { Cost = 1, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::equal_to<T> >
|
||||
{ enum { Cost = 1, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::not_equal_to<T> >
|
||||
{ enum { Cost = 1, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::binder2nd<T> >
|
||||
{ enum { Cost = functor_traits<T>::Cost, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::binder1st<T> >
|
||||
{ enum { Cost = functor_traits<T>::Cost, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::unary_negate<T> >
|
||||
{ enum { Cost = 1 + functor_traits<T>::Cost, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::binary_negate<T> >
|
||||
{ enum { Cost = 1 + functor_traits<T>::Cost, PacketAccess = false }; };
|
||||
|
||||
#ifdef EIGEN_STDEXT_SUPPORT
|
||||
|
||||
template<typename T0,typename T1>
|
||||
struct functor_traits<std::project1st<T0,T1> >
|
||||
{ enum { Cost = 0, PacketAccess = false }; };
|
||||
|
||||
template<typename T0,typename T1>
|
||||
struct functor_traits<std::project2nd<T0,T1> >
|
||||
{ enum { Cost = 0, PacketAccess = false }; };
|
||||
|
||||
template<typename T0,typename T1>
|
||||
struct functor_traits<std::select2nd<std::pair<T0,T1> > >
|
||||
{ enum { Cost = 0, PacketAccess = false }; };
|
||||
|
||||
template<typename T0,typename T1>
|
||||
struct functor_traits<std::select1st<std::pair<T0,T1> > >
|
||||
{ enum { Cost = 0, PacketAccess = false }; };
|
||||
|
||||
template<typename T0,typename T1>
|
||||
struct functor_traits<std::unary_compose<T0,T1> >
|
||||
{ enum { Cost = functor_traits<T0>::Cost + functor_traits<T1>::Cost, PacketAccess = false }; };
|
||||
|
||||
template<typename T0,typename T1,typename T2>
|
||||
struct functor_traits<std::binary_compose<T0,T1,T2> >
|
||||
{ enum { Cost = functor_traits<T0>::Cost + functor_traits<T1>::Cost + functor_traits<T2>::Cost, PacketAccess = false }; };
|
||||
|
||||
#endif // EIGEN_STDEXT_SUPPORT
|
||||
|
||||
// allow to add new functors and specializations of functor_traits from outside Eigen.
|
||||
// this macro is really needed because functor_traits must be specialized after it is declared but before it is used...
|
||||
#ifdef EIGEN_FUNCTORS_PLUGIN
|
||||
#include EIGEN_FUNCTORS_PLUGIN
|
||||
#endif
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_STL_FUNCTORS_H
|
||||
396
Eigen/src/Core/functors/UnaryFunctors.h
Normal file
396
Eigen/src/Core/functors/UnaryFunctors.h
Normal file
@@ -0,0 +1,396 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_UNARY_FUNCTORS_H
|
||||
#define EIGEN_UNARY_FUNCTORS_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the opposite of a scalar
|
||||
*
|
||||
* \sa class CwiseUnaryOp, MatrixBase::operator-
|
||||
*/
|
||||
template<typename Scalar> struct scalar_opposite_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_opposite_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return -a; }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
|
||||
{ return internal::pnegate(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_opposite_op<Scalar> >
|
||||
{ enum {
|
||||
Cost = NumTraits<Scalar>::AddCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasNegate };
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the absolute value of a scalar
|
||||
*
|
||||
* \sa class CwiseUnaryOp, Cwise::abs
|
||||
*/
|
||||
template<typename Scalar> struct scalar_abs_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_abs_op)
|
||||
typedef typename NumTraits<Scalar>::Real result_type;
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { using std::abs; return abs(a); }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
|
||||
{ return internal::pabs(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_abs_op<Scalar> >
|
||||
{
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::AddCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasAbs
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the squared absolute value of a scalar
|
||||
*
|
||||
* \sa class CwiseUnaryOp, Cwise::abs2
|
||||
*/
|
||||
template<typename Scalar> struct scalar_abs2_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_abs2_op)
|
||||
typedef typename NumTraits<Scalar>::Real result_type;
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return numext::abs2(a); }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
|
||||
{ return internal::pmul(a,a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_abs2_op<Scalar> >
|
||||
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasAbs2 }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the conjugate of a complex value
|
||||
*
|
||||
* \sa class CwiseUnaryOp, MatrixBase::conjugate()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_conjugate_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_conjugate_op)
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { using numext::conj; return conj(a); }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::pconj(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_conjugate_op<Scalar> >
|
||||
{
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::IsComplex ? NumTraits<Scalar>::AddCost : 0,
|
||||
PacketAccess = packet_traits<Scalar>::HasConj
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to cast a scalar to another type
|
||||
*
|
||||
* \sa class CwiseUnaryOp, MatrixBase::cast()
|
||||
*/
|
||||
template<typename Scalar, typename NewType>
|
||||
struct scalar_cast_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
|
||||
typedef NewType result_type;
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const NewType operator() (const Scalar& a) const { return cast<Scalar, NewType>(a); }
|
||||
};
|
||||
template<typename Scalar, typename NewType>
|
||||
struct functor_traits<scalar_cast_op<Scalar,NewType> >
|
||||
{ enum { Cost = is_same<Scalar, NewType>::value ? 0 : NumTraits<NewType>::AddCost, PacketAccess = false }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to extract the real part of a complex
|
||||
*
|
||||
* \sa class CwiseUnaryOp, MatrixBase::real()
|
||||
*/
|
||||
template<typename Scalar>
|
||||
struct scalar_real_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_real_op)
|
||||
typedef typename NumTraits<Scalar>::Real result_type;
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return numext::real(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_real_op<Scalar> >
|
||||
{ enum { Cost = 0, PacketAccess = false }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to extract the imaginary part of a complex
|
||||
*
|
||||
* \sa class CwiseUnaryOp, MatrixBase::imag()
|
||||
*/
|
||||
template<typename Scalar>
|
||||
struct scalar_imag_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_imag_op)
|
||||
typedef typename NumTraits<Scalar>::Real result_type;
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return numext::imag(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_imag_op<Scalar> >
|
||||
{ enum { Cost = 0, PacketAccess = false }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to extract the real part of a complex as a reference
|
||||
*
|
||||
* \sa class CwiseUnaryOp, MatrixBase::real()
|
||||
*/
|
||||
template<typename Scalar>
|
||||
struct scalar_real_ref_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_real_ref_op)
|
||||
typedef typename NumTraits<Scalar>::Real result_type;
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return numext::real_ref(*const_cast<Scalar*>(&a)); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_real_ref_op<Scalar> >
|
||||
{ enum { Cost = 0, PacketAccess = false }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to extract the imaginary part of a complex as a reference
|
||||
*
|
||||
* \sa class CwiseUnaryOp, MatrixBase::imag()
|
||||
*/
|
||||
template<typename Scalar>
|
||||
struct scalar_imag_ref_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_imag_ref_op)
|
||||
typedef typename NumTraits<Scalar>::Real result_type;
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return numext::imag_ref(*const_cast<Scalar*>(&a)); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_imag_ref_op<Scalar> >
|
||||
{ enum { Cost = 0, PacketAccess = false }; };
|
||||
|
||||
/** \internal
|
||||
*
|
||||
* \brief Template functor to compute the exponential of a scalar
|
||||
*
|
||||
* \sa class CwiseUnaryOp, Cwise::exp()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_exp_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_exp_op)
|
||||
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::exp; return exp(a); }
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
inline Packet packetOp(const Packet& a) const { return internal::pexp(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_exp_op<Scalar> >
|
||||
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasExp }; };
|
||||
|
||||
/** \internal
|
||||
*
|
||||
* \brief Template functor to compute the logarithm of a scalar
|
||||
*
|
||||
* \sa class CwiseUnaryOp, Cwise::log()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_log_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_log_op)
|
||||
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::log; return log(a); }
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
inline Packet packetOp(const Packet& a) const { return internal::plog(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_log_op<Scalar> >
|
||||
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasLog }; };
|
||||
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the square root of a scalar
|
||||
* \sa class CwiseUnaryOp, Cwise::sqrt()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_sqrt_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_sqrt_op)
|
||||
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::sqrt; return sqrt(a); }
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
inline Packet packetOp(const Packet& a) const { return internal::psqrt(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_sqrt_op<Scalar> >
|
||||
{ enum {
|
||||
Cost = 5 * NumTraits<Scalar>::MulCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasSqrt
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the cosine of a scalar
|
||||
* \sa class CwiseUnaryOp, ArrayBase::cos()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_cos_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_cos_op)
|
||||
EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { using std::cos; return cos(a); }
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
inline Packet packetOp(const Packet& a) const { return internal::pcos(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_cos_op<Scalar> >
|
||||
{
|
||||
enum {
|
||||
Cost = 5 * NumTraits<Scalar>::MulCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasCos
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the sine of a scalar
|
||||
* \sa class CwiseUnaryOp, ArrayBase::sin()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_sin_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_sin_op)
|
||||
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::sin; return sin(a); }
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
inline Packet packetOp(const Packet& a) const { return internal::psin(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_sin_op<Scalar> >
|
||||
{
|
||||
enum {
|
||||
Cost = 5 * NumTraits<Scalar>::MulCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasSin
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the tan of a scalar
|
||||
* \sa class CwiseUnaryOp, ArrayBase::tan()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_tan_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_tan_op)
|
||||
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::tan; return tan(a); }
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
inline Packet packetOp(const Packet& a) const { return internal::ptan(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_tan_op<Scalar> >
|
||||
{
|
||||
enum {
|
||||
Cost = 5 * NumTraits<Scalar>::MulCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasTan
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the arc cosine of a scalar
|
||||
* \sa class CwiseUnaryOp, ArrayBase::acos()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_acos_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_acos_op)
|
||||
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::acos; return acos(a); }
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
inline Packet packetOp(const Packet& a) const { return internal::pacos(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_acos_op<Scalar> >
|
||||
{
|
||||
enum {
|
||||
Cost = 5 * NumTraits<Scalar>::MulCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasACos
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the arc sine of a scalar
|
||||
* \sa class CwiseUnaryOp, ArrayBase::asin()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_asin_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_asin_op)
|
||||
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::asin; return asin(a); }
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
inline Packet packetOp(const Packet& a) const { return internal::pasin(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_asin_op<Scalar> >
|
||||
{
|
||||
enum {
|
||||
Cost = 5 * NumTraits<Scalar>::MulCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasASin
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the atan of a scalar
|
||||
* \sa class CwiseUnaryOp, ArrayBase::atan()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_atan_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_atan_op)
|
||||
inline const Scalar operator() (const Scalar& a) const { using std::atan; return atan(a); }
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
inline Packet packetOp(const Packet& a) const { return internal::patan(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_atan_op<Scalar> >
|
||||
{
|
||||
enum {
|
||||
Cost = 5 * NumTraits<Scalar>::MulCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasATan
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the inverse of a scalar
|
||||
* \sa class CwiseUnaryOp, Cwise::inverse()
|
||||
*/
|
||||
template<typename Scalar>
|
||||
struct scalar_inverse_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_inverse_op)
|
||||
EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return Scalar(1)/a; }
|
||||
template<typename Packet>
|
||||
inline const Packet packetOp(const Packet& a) const
|
||||
{ return internal::pdiv(pset1<Packet>(Scalar(1)),a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_inverse_op<Scalar> >
|
||||
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasDiv }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the square of a scalar
|
||||
* \sa class CwiseUnaryOp, Cwise::square()
|
||||
*/
|
||||
template<typename Scalar>
|
||||
struct scalar_square_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_square_op)
|
||||
EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a*a; }
|
||||
template<typename Packet>
|
||||
inline const Packet packetOp(const Packet& a) const
|
||||
{ return internal::pmul(a,a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_square_op<Scalar> >
|
||||
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasMul }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the cube of a scalar
|
||||
* \sa class CwiseUnaryOp, Cwise::cube()
|
||||
*/
|
||||
template<typename Scalar>
|
||||
struct scalar_cube_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_cube_op)
|
||||
EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a*a*a; }
|
||||
template<typename Packet>
|
||||
inline const Packet packetOp(const Packet& a) const
|
||||
{ return internal::pmul(a,pmul(a,a)); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_cube_op<Scalar> >
|
||||
{ enum { Cost = 2*NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasMul }; };
|
||||
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_FUNCTORS_H
|
||||
@@ -85,7 +85,8 @@ struct traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
|
||||
Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
|
||||
| (EvalToRowMajor ? RowMajorBit : 0)
|
||||
| NestingFlags
|
||||
| (LhsFlags & RhsFlags & AlignedBit)
|
||||
| (CanVectorizeLhs ? (LhsFlags & AlignedBit) : 0)
|
||||
| (CanVectorizeRhs ? (RhsFlags & AlignedBit) : 0)
|
||||
// TODO enable vectorization for mixed types
|
||||
| (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0),
|
||||
|
||||
@@ -140,11 +141,13 @@ class CoeffBasedProduct
|
||||
|
||||
public:
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline CoeffBasedProduct(const CoeffBasedProduct& other)
|
||||
: Base(), m_lhs(other.m_lhs), m_rhs(other.m_rhs)
|
||||
{}
|
||||
|
||||
template<typename Lhs, typename Rhs>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline CoeffBasedProduct(const Lhs& lhs, const Rhs& rhs)
|
||||
: m_lhs(lhs), m_rhs(rhs)
|
||||
{
|
||||
@@ -157,9 +160,10 @@ class CoeffBasedProduct
|
||||
&& "if you wanted a coeff-wise or a dot product use the respective explicit functions");
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); }
|
||||
EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
|
||||
{
|
||||
Scalar res;
|
||||
@@ -170,6 +174,7 @@ class CoeffBasedProduct
|
||||
/* Allow index-based non-packet access. It is impossible though to allow index-based packed access,
|
||||
* which is why we don't set the LinearAccessBit.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
|
||||
{
|
||||
Scalar res;
|
||||
@@ -191,22 +196,26 @@ class CoeffBasedProduct
|
||||
}
|
||||
|
||||
// Implicit conversion to the nested type (trigger the evaluation of the product)
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE operator const PlainObject& () const
|
||||
{
|
||||
m_result.lazyAssign(*this);
|
||||
return m_result;
|
||||
}
|
||||
|
||||
const _LhsNested& lhs() const { return m_lhs; }
|
||||
const _RhsNested& rhs() const { return m_rhs; }
|
||||
EIGEN_DEVICE_FUNC const _LhsNested& lhs() const { return m_lhs; }
|
||||
EIGEN_DEVICE_FUNC const _RhsNested& rhs() const { return m_rhs; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const Diagonal<const LazyCoeffBasedProductType,0> diagonal() const
|
||||
{ return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
|
||||
|
||||
template<int DiagonalIndex>
|
||||
EIGEN_DEVICE_FUNC
|
||||
const Diagonal<const LazyCoeffBasedProductType,DiagonalIndex> diagonal() const
|
||||
{ return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const Diagonal<const LazyCoeffBasedProductType,Dynamic> diagonal(Index index) const
|
||||
{ return reinterpret_cast<const LazyCoeffBasedProductType&>(*this).diagonal(index); }
|
||||
|
||||
@@ -239,6 +248,7 @@ template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
|
||||
struct product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
|
||||
{
|
||||
product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res);
|
||||
@@ -250,6 +260,7 @@ template<typename Lhs, typename Rhs, typename RetScalar>
|
||||
struct product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
|
||||
{
|
||||
res = lhs.coeff(row, 0) * rhs.coeff(0, col);
|
||||
@@ -260,6 +271,7 @@ template<typename Lhs, typename Rhs, typename RetScalar>
|
||||
struct product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res)
|
||||
{
|
||||
eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
|
||||
@@ -305,7 +317,6 @@ struct product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, Re
|
||||
{
|
||||
Packet pres;
|
||||
product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
|
||||
product_coeff_impl<DefaultTraversal,UnrollingIndex,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, res);
|
||||
res = predux(pres);
|
||||
}
|
||||
};
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -23,6 +23,8 @@ template<
|
||||
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs>
|
||||
struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor>
|
||||
{
|
||||
typedef gebp_traits<RhsScalar,LhsScalar> Traits;
|
||||
|
||||
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
|
||||
static EIGEN_STRONG_INLINE void run(
|
||||
Index rows, Index cols, Index depth,
|
||||
@@ -51,6 +53,8 @@ template<
|
||||
struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor>
|
||||
{
|
||||
|
||||
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
|
||||
|
||||
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
|
||||
static void run(Index rows, Index cols, Index depth,
|
||||
const LhsScalar* _lhs, Index lhsStride,
|
||||
@@ -63,11 +67,9 @@ static void run(Index rows, Index cols, Index depth,
|
||||
const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
|
||||
const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
|
||||
|
||||
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
|
||||
|
||||
Index kc = blocking.kc(); // cache block size along the K direction
|
||||
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
|
||||
//Index nc = blocking.nc(); // cache block size along the N direction
|
||||
Index nc = (std::min)(cols,blocking.nc()); // cache block size along the N direction
|
||||
|
||||
gemm_pack_lhs<LhsScalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
|
||||
gemm_pack_rhs<RhsScalar, Index, Traits::nr, RhsStorageOrder> pack_rhs;
|
||||
@@ -80,68 +82,68 @@ static void run(Index rows, Index cols, Index depth,
|
||||
Index tid = omp_get_thread_num();
|
||||
Index threads = omp_get_num_threads();
|
||||
|
||||
std::size_t sizeA = kc*mc;
|
||||
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
|
||||
ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, 0);
|
||||
ei_declare_aligned_stack_constructed_variable(RhsScalar, w, sizeW, 0);
|
||||
LhsScalar* blockA = blocking.blockA();
|
||||
eigen_internal_assert(blockA!=0);
|
||||
|
||||
RhsScalar* blockB = blocking.blockB();
|
||||
eigen_internal_assert(blockB!=0);
|
||||
|
||||
std::size_t sizeB = kc*nc;
|
||||
ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, 0);
|
||||
|
||||
// For each horizontal panel of the rhs, and corresponding vertical panel of the lhs...
|
||||
for(Index k=0; k<depth; k+=kc)
|
||||
{
|
||||
const Index actual_kc = (std::min)(k+kc,depth)-k; // => rows of B', and cols of the A'
|
||||
|
||||
// In order to reduce the chance that a thread has to wait for the other,
|
||||
// let's start by packing A'.
|
||||
pack_lhs(blockA, &lhs(0,k), lhsStride, actual_kc, mc);
|
||||
// let's start by packing B'.
|
||||
pack_rhs(blockB, &rhs(k,0), rhsStride, actual_kc, nc);
|
||||
|
||||
// Pack B_k to B' in a parallel fashion:
|
||||
// each thread packs the sub block B_k,j to B'_j where j is the thread id.
|
||||
// Pack A_k to A' in a parallel fashion:
|
||||
// each thread packs the sub block A_k,i to A'_i where i is the thread id.
|
||||
|
||||
// However, before copying to B'_j, we have to make sure that no other thread is still using it,
|
||||
// However, before copying to A'_i, we have to make sure that no other thread is still using it,
|
||||
// i.e., we test that info[tid].users equals 0.
|
||||
// Then, we set info[tid].users to the number of threads to mark that all other threads are going to use it.
|
||||
while(info[tid].users!=0) {}
|
||||
info[tid].users += threads;
|
||||
|
||||
pack_lhs(blockA+info[tid].lhs_start*actual_kc, &lhs(info[tid].lhs_start,k), lhsStride, actual_kc, info[tid].lhs_length);
|
||||
|
||||
pack_rhs(blockB+info[tid].rhs_start*actual_kc, &rhs(k,info[tid].rhs_start), rhsStride, actual_kc, info[tid].rhs_length);
|
||||
|
||||
// Notify the other threads that the part B'_j is ready to go.
|
||||
// Notify the other threads that the part A'_i is ready to go.
|
||||
info[tid].sync = k;
|
||||
|
||||
// Computes C_i += A' * B' per B'_j
|
||||
|
||||
// Computes C_i += A' * B' per A'_i
|
||||
for(Index shift=0; shift<threads; ++shift)
|
||||
{
|
||||
Index j = (tid+shift)%threads;
|
||||
Index i = (tid+shift)%threads;
|
||||
|
||||
// At this point we have to make sure that B'_j has been updated by the thread j,
|
||||
// At this point we have to make sure that A'_i has been updated by the thread i,
|
||||
// we use testAndSetOrdered to mimic a volatile access.
|
||||
// However, no need to wait for the B' part which has been updated by the current thread!
|
||||
if(shift>0)
|
||||
while(info[j].sync!=k) {}
|
||||
|
||||
gebp(res+info[j].rhs_start*resStride, resStride, blockA, blockB+info[j].rhs_start*actual_kc, mc, actual_kc, info[j].rhs_length, alpha, -1,-1,0,0, w);
|
||||
while(info[i].sync!=k) {}
|
||||
gebp(res+info[i].lhs_start, resStride, blockA+info[i].lhs_start*actual_kc, blockB, info[i].lhs_length, actual_kc, nc, alpha);
|
||||
}
|
||||
|
||||
// Then keep going as usual with the remaining A'
|
||||
for(Index i=mc; i<rows; i+=mc)
|
||||
// Then keep going as usual with the remaining B'
|
||||
for(Index j=nc; j<cols; j+=nc)
|
||||
{
|
||||
const Index actual_mc = (std::min)(i+mc,rows)-i;
|
||||
const Index actual_nc = (std::min)(j+nc,cols)-j;
|
||||
|
||||
// pack A_i,k to A'
|
||||
pack_lhs(blockA, &lhs(i,k), lhsStride, actual_kc, actual_mc);
|
||||
// pack B_k,j to B'
|
||||
pack_rhs(blockB, &rhs(k,j), rhsStride, actual_kc, actual_nc);
|
||||
|
||||
// C_i += A' * B'
|
||||
gebp(res+i, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha, -1,-1,0,0, w);
|
||||
// C_j += A' * B'
|
||||
gebp(res+j*resStride, resStride, blockA, blockB, rows, actual_kc, actual_nc, alpha);
|
||||
}
|
||||
|
||||
// Release all the sub blocks B'_j of B' for the current thread,
|
||||
// Release all the sub blocks A'_i of A' for the current thread,
|
||||
// i.e., we simply decrement the number of users by 1
|
||||
for(Index j=0; j<threads; ++j)
|
||||
#pragma omp critical
|
||||
{
|
||||
for(Index i=0; i<threads; ++i)
|
||||
#pragma omp atomic
|
||||
--(info[j].users);
|
||||
--(info[i].users);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -151,38 +153,34 @@ static void run(Index rows, Index cols, Index depth,
|
||||
|
||||
// this is the sequential version!
|
||||
std::size_t sizeA = kc*mc;
|
||||
std::size_t sizeB = kc*cols;
|
||||
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
|
||||
std::size_t sizeB = kc*nc;
|
||||
|
||||
ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, blocking.blockA());
|
||||
ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, blocking.blockB());
|
||||
ei_declare_aligned_stack_constructed_variable(RhsScalar, blockW, sizeW, blocking.blockW());
|
||||
|
||||
// For each horizontal panel of the rhs, and corresponding panel of the lhs...
|
||||
// (==GEMM_VAR1)
|
||||
for(Index k2=0; k2<depth; k2+=kc)
|
||||
{
|
||||
const Index actual_kc = (std::min)(k2+kc,depth)-k2;
|
||||
|
||||
// OK, here we have selected one horizontal panel of rhs and one vertical panel of lhs.
|
||||
// => Pack rhs's panel into a sequential chunk of memory (L2 caching)
|
||||
// Note that this panel will be read as many times as the number of blocks in the lhs's
|
||||
// vertical panel which is, in practice, a very low number.
|
||||
pack_rhs(blockB, &rhs(k2,0), rhsStride, actual_kc, cols);
|
||||
// => Pack lhs's panel into a sequential chunk of memory (L2/L3 caching)
|
||||
// Note that this panel will be read as many times as the number of blocks in the rhs's
|
||||
// horizontal panel which is, in practice, a very low number.
|
||||
pack_lhs(blockA, &lhs(0,k2), lhsStride, actual_kc, rows);
|
||||
|
||||
// For each mc x kc block of the lhs's vertical panel...
|
||||
// (==GEPP_VAR1)
|
||||
for(Index i2=0; i2<rows; i2+=mc)
|
||||
// For each kc x nc block of the rhs's horizontal panel...
|
||||
for(Index j2=0; j2<cols; j2+=nc)
|
||||
{
|
||||
const Index actual_mc = (std::min)(i2+mc,rows)-i2;
|
||||
const Index actual_nc = (std::min)(j2+nc,cols)-j2;
|
||||
|
||||
// We pack the lhs's block into a sequential chunk of memory (L1 caching)
|
||||
// We pack the rhs's block into a sequential chunk of memory (L2 caching)
|
||||
// Note that this block will be read a very high number of times, which is equal to the number of
|
||||
// micro vertical panel of the large rhs's panel (e.g., cols/4 times).
|
||||
pack_lhs(blockA, &lhs(i2,k2), lhsStride, actual_kc, actual_mc);
|
||||
// micro horizontal panel of the large rhs's panel (e.g., rows/12 times).
|
||||
pack_rhs(blockB, &rhs(k2,j2), rhsStride, actual_kc, actual_nc);
|
||||
|
||||
// Everything is packed, we can now call the block * panel kernel:
|
||||
gebp(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha, -1, -1, 0, 0, blockW);
|
||||
// Everything is packed, we can now call the panel * block kernel:
|
||||
gebp(res+j2*resStride, resStride, blockA, blockB, rows, actual_kc, actual_nc, alpha);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -203,14 +201,13 @@ struct traits<GeneralProduct<Lhs,Rhs,GemmProduct> >
|
||||
template<typename Scalar, typename Index, typename Gemm, typename Lhs, typename Rhs, typename Dest, typename BlockingType>
|
||||
struct gemm_functor
|
||||
{
|
||||
gemm_functor(const Lhs& lhs, const Rhs& rhs, Dest& dest, const Scalar& actualAlpha,
|
||||
BlockingType& blocking)
|
||||
gemm_functor(const Lhs& lhs, const Rhs& rhs, Dest& dest, const Scalar& actualAlpha, BlockingType& blocking)
|
||||
: m_lhs(lhs), m_rhs(rhs), m_dest(dest), m_actualAlpha(actualAlpha), m_blocking(blocking)
|
||||
{}
|
||||
|
||||
void initParallelSession() const
|
||||
{
|
||||
m_blocking.allocateB();
|
||||
m_blocking.allocateA();
|
||||
}
|
||||
|
||||
void operator() (Index row, Index rows, Index col=0, Index cols=-1, GemmParallelInfo<Index>* info=0) const
|
||||
@@ -224,6 +221,8 @@ struct gemm_functor
|
||||
(Scalar*)&(m_dest.coeffRef(row,col)), m_dest.outerStride(),
|
||||
m_actualAlpha, m_blocking, info);
|
||||
}
|
||||
|
||||
typedef typename Gemm::Traits Traits;
|
||||
|
||||
protected:
|
||||
const Lhs& m_lhs;
|
||||
@@ -245,7 +244,6 @@ class level3_blocking
|
||||
protected:
|
||||
LhsScalar* m_blockA;
|
||||
RhsScalar* m_blockB;
|
||||
RhsScalar* m_blockW;
|
||||
|
||||
DenseIndex m_mc;
|
||||
DenseIndex m_nc;
|
||||
@@ -254,7 +252,7 @@ class level3_blocking
|
||||
public:
|
||||
|
||||
level3_blocking()
|
||||
: m_blockA(0), m_blockB(0), m_blockW(0), m_mc(0), m_nc(0), m_kc(0)
|
||||
: m_blockA(0), m_blockB(0), m_mc(0), m_nc(0), m_kc(0)
|
||||
{}
|
||||
|
||||
inline DenseIndex mc() const { return m_mc; }
|
||||
@@ -263,7 +261,6 @@ class level3_blocking
|
||||
|
||||
inline LhsScalar* blockA() { return m_blockA; }
|
||||
inline RhsScalar* blockB() { return m_blockB; }
|
||||
inline RhsScalar* blockW() { return m_blockW; }
|
||||
};
|
||||
|
||||
template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor>
|
||||
@@ -282,29 +279,25 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
|
||||
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
|
||||
enum {
|
||||
SizeA = ActualRows * MaxDepth,
|
||||
SizeB = ActualCols * MaxDepth,
|
||||
SizeW = MaxDepth * Traits::WorkSpaceFactor
|
||||
SizeB = ActualCols * MaxDepth
|
||||
};
|
||||
|
||||
EIGEN_ALIGN16 LhsScalar m_staticA[SizeA];
|
||||
EIGEN_ALIGN16 RhsScalar m_staticB[SizeB];
|
||||
EIGEN_ALIGN16 RhsScalar m_staticW[SizeW];
|
||||
EIGEN_ALIGN_DEFAULT LhsScalar m_staticA[SizeA];
|
||||
EIGEN_ALIGN_DEFAULT RhsScalar m_staticB[SizeB];
|
||||
|
||||
public:
|
||||
|
||||
gemm_blocking_space(DenseIndex /*rows*/, DenseIndex /*cols*/, DenseIndex /*depth*/)
|
||||
gemm_blocking_space(DenseIndex /*rows*/, DenseIndex /*cols*/, DenseIndex /*depth*/, bool /*full_rows*/ = false)
|
||||
{
|
||||
this->m_mc = ActualRows;
|
||||
this->m_nc = ActualCols;
|
||||
this->m_kc = MaxDepth;
|
||||
this->m_blockA = m_staticA;
|
||||
this->m_blockB = m_staticB;
|
||||
this->m_blockW = m_staticW;
|
||||
}
|
||||
|
||||
inline void allocateA() {}
|
||||
inline void allocateB() {}
|
||||
inline void allocateW() {}
|
||||
inline void allocateAll() {}
|
||||
};
|
||||
|
||||
@@ -323,20 +316,28 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
|
||||
|
||||
DenseIndex m_sizeA;
|
||||
DenseIndex m_sizeB;
|
||||
DenseIndex m_sizeW;
|
||||
|
||||
public:
|
||||
|
||||
gemm_blocking_space(DenseIndex rows, DenseIndex cols, DenseIndex depth)
|
||||
gemm_blocking_space(DenseIndex rows, DenseIndex cols, DenseIndex depth, bool full_rows = false)
|
||||
{
|
||||
this->m_mc = Transpose ? cols : rows;
|
||||
this->m_nc = Transpose ? rows : cols;
|
||||
this->m_kc = depth;
|
||||
|
||||
computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, this->m_mc, this->m_nc);
|
||||
if(full_rows)
|
||||
{
|
||||
DenseIndex m = this->m_mc;
|
||||
computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, m, this->m_nc);
|
||||
}
|
||||
else // full columns
|
||||
{
|
||||
DenseIndex n = this->m_nc;
|
||||
computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, this->m_mc, n);
|
||||
}
|
||||
|
||||
m_sizeA = this->m_mc * this->m_kc;
|
||||
m_sizeB = this->m_kc * this->m_nc;
|
||||
m_sizeW = this->m_kc*Traits::WorkSpaceFactor;
|
||||
}
|
||||
|
||||
void allocateA()
|
||||
@@ -351,24 +352,16 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
|
||||
this->m_blockB = aligned_new<RhsScalar>(m_sizeB);
|
||||
}
|
||||
|
||||
void allocateW()
|
||||
{
|
||||
if(this->m_blockW==0)
|
||||
this->m_blockW = aligned_new<RhsScalar>(m_sizeW);
|
||||
}
|
||||
|
||||
void allocateAll()
|
||||
{
|
||||
allocateA();
|
||||
allocateB();
|
||||
allocateW();
|
||||
}
|
||||
|
||||
~gemm_blocking_space()
|
||||
{
|
||||
aligned_delete(this->m_blockA, m_sizeA);
|
||||
aligned_delete(this->m_blockB, m_sizeB);
|
||||
aligned_delete(this->m_blockW, m_sizeW);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -393,7 +386,37 @@ class GeneralProduct<Lhs, Rhs, GemmProduct>
|
||||
typedef internal::scalar_product_op<LhsScalar,RhsScalar> BinOp;
|
||||
EIGEN_CHECK_BINARY_COMPATIBILIY(BinOp,LhsScalar,RhsScalar);
|
||||
}
|
||||
|
||||
template<typename Dest>
|
||||
inline void evalTo(Dest& dst) const
|
||||
{
|
||||
if((m_rhs.rows()+dst.rows()+dst.cols())<20 && m_rhs.rows()>0)
|
||||
dst.noalias() = m_lhs .lazyProduct( m_rhs );
|
||||
else
|
||||
{
|
||||
dst.setZero();
|
||||
scaleAndAddTo(dst,Scalar(1));
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Dest>
|
||||
inline void addTo(Dest& dst) const
|
||||
{
|
||||
if((m_rhs.rows()+dst.rows()+dst.cols())<20 && m_rhs.rows()>0)
|
||||
dst.noalias() += m_lhs .lazyProduct( m_rhs );
|
||||
else
|
||||
scaleAndAddTo(dst,Scalar(1));
|
||||
}
|
||||
|
||||
template<typename Dest>
|
||||
inline void subTo(Dest& dst) const
|
||||
{
|
||||
if((m_rhs.rows()+dst.rows()+dst.cols())<20 && m_rhs.rows()>0)
|
||||
dst.noalias() -= m_lhs .lazyProduct( m_rhs );
|
||||
else
|
||||
scaleAndAddTo(dst,Scalar(-1));
|
||||
}
|
||||
|
||||
template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
|
||||
{
|
||||
eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
|
||||
@@ -416,7 +439,7 @@ class GeneralProduct<Lhs, Rhs, GemmProduct>
|
||||
(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor>,
|
||||
_ActualLhsType, _ActualRhsType, Dest, BlockingType> GemmFunctor;
|
||||
|
||||
BlockingType blocking(dst.rows(), dst.cols(), lhs.cols());
|
||||
BlockingType blocking(dst.rows(), dst.cols(), lhs.cols(), true);
|
||||
|
||||
internal::parallelize_gemm<(Dest::MaxRowsAtCompileTime>32 || Dest::MaxRowsAtCompileTime==Dynamic)>(GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), this->rows(), this->cols(), Dest::Flags&RowMajorBit);
|
||||
}
|
||||
|
||||
@@ -73,11 +73,8 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
|
||||
if(mc > Traits::nr)
|
||||
mc = (mc/Traits::nr)*Traits::nr;
|
||||
|
||||
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
|
||||
std::size_t sizeB = sizeW + kc*size;
|
||||
ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, kc*mc, 0);
|
||||
ei_declare_aligned_stack_constructed_variable(RhsScalar, allocatedBlockB, sizeB, 0);
|
||||
RhsScalar* blockB = allocatedBlockB + sizeW;
|
||||
ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, kc*size, 0);
|
||||
|
||||
gemm_pack_lhs<LhsScalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
|
||||
gemm_pack_rhs<RhsScalar, Index, Traits::nr, RhsStorageOrder> pack_rhs;
|
||||
@@ -103,15 +100,15 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
|
||||
// 3 - after the diagonal => processed with gebp or skipped
|
||||
if (UpLo==Lower)
|
||||
gebp(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, (std::min)(size,i2), alpha,
|
||||
-1, -1, 0, 0, allocatedBlockB);
|
||||
-1, -1, 0, 0);
|
||||
|
||||
sybb(res+resStride*i2 + i2, resStride, blockA, blockB + actual_kc*i2, actual_mc, actual_kc, alpha, allocatedBlockB);
|
||||
sybb(res+resStride*i2 + i2, resStride, blockA, blockB + actual_kc*i2, actual_mc, actual_kc, alpha);
|
||||
|
||||
if (UpLo==Upper)
|
||||
{
|
||||
Index j2 = i2+actual_mc;
|
||||
gebp(res+resStride*j2+i2, resStride, blockA, blockB+actual_kc*j2, actual_mc, actual_kc, (std::max)(Index(0), size-j2), alpha,
|
||||
-1, -1, 0, 0, allocatedBlockB);
|
||||
-1, -1, 0, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -136,7 +133,7 @@ struct tribb_kernel
|
||||
enum {
|
||||
BlockSize = EIGEN_PLAIN_ENUM_MAX(mr,nr)
|
||||
};
|
||||
void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, const ResScalar& alpha, RhsScalar* workspace)
|
||||
void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, const ResScalar& alpha)
|
||||
{
|
||||
gebp_kernel<LhsScalar, RhsScalar, Index, mr, nr, ConjLhs, ConjRhs> gebp_kernel;
|
||||
Matrix<ResScalar,BlockSize,BlockSize,ColMajor> buffer;
|
||||
@@ -150,7 +147,7 @@ struct tribb_kernel
|
||||
|
||||
if(UpLo==Upper)
|
||||
gebp_kernel(res+j*resStride, resStride, blockA, actual_b, j, depth, actualBlockSize, alpha,
|
||||
-1, -1, 0, 0, workspace);
|
||||
-1, -1, 0, 0);
|
||||
|
||||
// selfadjoint micro block
|
||||
{
|
||||
@@ -158,7 +155,7 @@ struct tribb_kernel
|
||||
buffer.setZero();
|
||||
// 1 - apply the kernel on the temporary buffer
|
||||
gebp_kernel(buffer.data(), BlockSize, blockA+depth*i, actual_b, actualBlockSize, depth, actualBlockSize, alpha,
|
||||
-1, -1, 0, 0, workspace);
|
||||
-1, -1, 0, 0);
|
||||
// 2 - triangular accumulation
|
||||
for(Index j1=0; j1<actualBlockSize; ++j1)
|
||||
{
|
||||
@@ -173,7 +170,7 @@ struct tribb_kernel
|
||||
{
|
||||
Index i = j+actualBlockSize;
|
||||
gebp_kernel(res+j*resStride+i, resStride, blockA+depth*i, actual_b, size-i, depth, actualBlockSize, alpha,
|
||||
-1, -1, 0, 0, workspace);
|
||||
-1, -1, 0, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -268,6 +265,8 @@ template<typename MatrixType, unsigned int UpLo>
|
||||
template<typename ProductDerived, typename _Lhs, typename _Rhs>
|
||||
TriangularView<MatrixType,UpLo>& TriangularView<MatrixType,UpLo>::assignProduct(const ProductBase<ProductDerived, _Lhs,_Rhs>& prod, const Scalar& alpha)
|
||||
{
|
||||
eigen_assert(m_matrix.rows() == prod.rows() && m_matrix.cols() == prod.cols());
|
||||
|
||||
general_product_to_triangular_selector<MatrixType, ProductDerived, UpLo, (_Lhs::ColsAtCompileTime==1) || (_Rhs::RowsAtCompileTime==1)>::run(m_matrix.const_cast_derived(), prod.derived(), alpha);
|
||||
|
||||
return *this;
|
||||
|
||||
@@ -53,6 +53,8 @@ template< \
|
||||
int RhsStorageOrder, bool ConjugateRhs> \
|
||||
struct general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor> \
|
||||
{ \
|
||||
typedef gebp_traits<EIGTYPE,EIGTYPE> Traits; \
|
||||
\
|
||||
static void run(Index rows, Index cols, Index depth, \
|
||||
const EIGTYPE* _lhs, Index lhsStride, \
|
||||
const EIGTYPE* _rhs, Index rhsStride, \
|
||||
|
||||
@@ -26,6 +26,34 @@ namespace internal {
|
||||
* |real |cplx |real | alpha is converted to a cplx when calling the run function, no vectorization
|
||||
* |cplx |real |cplx | invalid, the caller has to do tmp: = A * B; C += alpha*tmp
|
||||
* |cplx |real |real | optimal case, vectorization possible via real-cplx mul
|
||||
*
|
||||
* Accesses to the matrix coefficients follow the following logic:
|
||||
*
|
||||
* - if all columns have the same alignment then
|
||||
* - if the columns have the same alignment as the result vector, then easy! (-> AllAligned case)
|
||||
* - otherwise perform unaligned loads only (-> NoneAligned case)
|
||||
* - otherwise
|
||||
* - if even columns have the same alignment then
|
||||
* // odd columns are guaranteed to have the same alignment too
|
||||
* - if even or odd columns have the same alignment as the result, then
|
||||
* // for a register size of 2 scalars, this is guarantee to be the case (e.g., SSE with double)
|
||||
* - perform half aligned and half unaligned loads (-> EvenAligned case)
|
||||
* - otherwise perform unaligned loads only (-> NoneAligned case)
|
||||
* - otherwise, if the register size is 4 scalars (e.g., SSE with float) then
|
||||
* - one over 4 consecutive columns is guaranteed to be aligned with the result vector,
|
||||
* perform simple aligned loads for this column and aligned loads plus re-alignment for the other. (-> FirstAligned case)
|
||||
* // this re-alignment is done by the palign function implemented for SSE in Eigen/src/Core/arch/SSE/PacketMath.h
|
||||
* - otherwise,
|
||||
* // if we get here, this means the register size is greater than 4 (e.g., AVX with floats),
|
||||
* // we currently fall back to the NoneAligned case
|
||||
*
|
||||
* The same reasoning apply for the transposed case.
|
||||
*
|
||||
* The last case (PacketSize>4) could probably be improved by generalizing the FirstAligned case, but since we do not support AVX yet...
|
||||
* One might also wonder why in the EvenAligned case we perform unaligned loads instead of using the aligned-loads plus re-alignment
|
||||
* strategy as in the FirstAligned case. The reason is that we observed that unaligned loads on a 8 byte boundary are not too slow
|
||||
* compared to unaligned loads on a 4 byte boundary.
|
||||
*
|
||||
*/
|
||||
template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version>
|
||||
struct general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjugateLhs,RhsScalar,ConjugateRhs,Version>
|
||||
@@ -52,11 +80,8 @@ EIGEN_DONT_INLINE static void run(
|
||||
Index rows, Index cols,
|
||||
const LhsScalar* lhs, Index lhsStride,
|
||||
const RhsScalar* rhs, Index rhsIncr,
|
||||
ResScalar* res, Index
|
||||
#ifdef EIGEN_INTERNAL_DEBUGGING
|
||||
resIncr
|
||||
#endif
|
||||
, RhsScalar alpha);
|
||||
ResScalar* res, Index resIncr,
|
||||
RhsScalar alpha);
|
||||
};
|
||||
|
||||
template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version>
|
||||
@@ -64,12 +89,10 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,ColMajor,Co
|
||||
Index rows, Index cols,
|
||||
const LhsScalar* lhs, Index lhsStride,
|
||||
const RhsScalar* rhs, Index rhsIncr,
|
||||
ResScalar* res, Index
|
||||
#ifdef EIGEN_INTERNAL_DEBUGGING
|
||||
resIncr
|
||||
#endif
|
||||
, RhsScalar alpha)
|
||||
ResScalar* res, Index resIncr,
|
||||
RhsScalar alpha)
|
||||
{
|
||||
EIGEN_UNUSED_VARIABLE(resIncr);
|
||||
eigen_internal_assert(resIncr==1);
|
||||
#ifdef _EIGEN_ACCUMULATE_PACKETS
|
||||
#error _EIGEN_ACCUMULATE_PACKETS has already been defined
|
||||
@@ -118,6 +141,12 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,ColMajor,Co
|
||||
alignedSize = 0;
|
||||
alignedStart = 0;
|
||||
}
|
||||
else if(LhsPacketSize > 4)
|
||||
{
|
||||
// TODO: extend the code to support aligned loads whenever possible when LhsPacketSize > 4.
|
||||
// Currently, it seems to be better to perform unaligned loads anyway
|
||||
alignmentPattern = NoneAligned;
|
||||
}
|
||||
else if (LhsPacketSize>1)
|
||||
{
|
||||
eigen_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || size<LhsPacketSize);
|
||||
@@ -265,7 +294,7 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,ColMajor,Co
|
||||
// process aligned result's coeffs
|
||||
if ((size_t(lhs0+alignedStart)%sizeof(LhsPacket))==0)
|
||||
for (Index i = alignedStart;i<alignedSize;i+=ResPacketSize)
|
||||
pstore(&res[i], pcj.pmadd(ploadu<LhsPacket>(&lhs0[i]), ptmp0, pload<ResPacket>(&res[i])));
|
||||
pstore(&res[i], pcj.pmadd(pload<LhsPacket>(&lhs0[i]), ptmp0, pload<ResPacket>(&res[i])));
|
||||
else
|
||||
for (Index i = alignedStart;i<alignedSize;i+=ResPacketSize)
|
||||
pstore(&res[i], pcj.pmadd(ploadu<LhsPacket>(&lhs0[i]), ptmp0, pload<ResPacket>(&res[i])));
|
||||
@@ -322,7 +351,7 @@ EIGEN_DONT_INLINE static void run(
|
||||
Index rows, Index cols,
|
||||
const LhsScalar* lhs, Index lhsStride,
|
||||
const RhsScalar* rhs, Index rhsIncr,
|
||||
ResScalar* res, Index resIncr,
|
||||
ResScalar* res, Index resIncr,
|
||||
ResScalar alpha);
|
||||
};
|
||||
|
||||
@@ -336,6 +365,7 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,RowMajor,Co
|
||||
{
|
||||
EIGEN_UNUSED_VARIABLE(rhsIncr);
|
||||
eigen_internal_assert(rhsIncr==1);
|
||||
|
||||
#ifdef _EIGEN_ACCUMULATE_PACKETS
|
||||
#error _EIGEN_ACCUMULATE_PACKETS has already been defined
|
||||
#endif
|
||||
@@ -381,6 +411,11 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,RowMajor,Co
|
||||
alignedSize = 0;
|
||||
alignedStart = 0;
|
||||
}
|
||||
else if(LhsPacketSize > 4)
|
||||
{
|
||||
// TODO: extend the code to support aligned loads whenever possible when LhsPacketSize > 4.
|
||||
alignmentPattern = NoneAligned;
|
||||
}
|
||||
else if (LhsPacketSize>1)
|
||||
{
|
||||
eigen_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || depth<LhsPacketSize);
|
||||
@@ -418,7 +453,7 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,RowMajor,Co
|
||||
Index rowBound = ((rows-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows;
|
||||
for (Index i=skipRows; i<rowBound; i+=rowsAtOnce)
|
||||
{
|
||||
EIGEN_ALIGN16 ResScalar tmp0 = ResScalar(0);
|
||||
EIGEN_ALIGN_DEFAULT ResScalar tmp0 = ResScalar(0);
|
||||
ResScalar tmp1 = ResScalar(0), tmp2 = ResScalar(0), tmp3 = ResScalar(0);
|
||||
|
||||
// this helps the compiler generating good binary code
|
||||
@@ -527,7 +562,7 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,RowMajor,Co
|
||||
{
|
||||
for (Index i=start; i<end; ++i)
|
||||
{
|
||||
EIGEN_ALIGN16 ResScalar tmp0 = ResScalar(0);
|
||||
EIGEN_ALIGN_DEFAULT ResScalar tmp0 = ResScalar(0);
|
||||
ResPacket ptmp0 = pset1<ResPacket>(tmp0);
|
||||
const LhsScalar* lhs0 = lhs + i*lhsStride;
|
||||
// process first unaligned result's coeffs
|
||||
|
||||
@@ -73,13 +73,13 @@ namespace internal {
|
||||
|
||||
template<typename Index> struct GemmParallelInfo
|
||||
{
|
||||
GemmParallelInfo() : sync(-1), users(0), rhs_start(0), rhs_length(0) {}
|
||||
GemmParallelInfo() : sync(-1), users(0), lhs_start(0), lhs_length(0) {}
|
||||
|
||||
int volatile sync;
|
||||
int volatile users;
|
||||
|
||||
Index rhs_start;
|
||||
Index rhs_length;
|
||||
Index lhs_start;
|
||||
Index lhs_length;
|
||||
};
|
||||
|
||||
template<bool Condition, typename Functor, typename Index>
|
||||
@@ -107,7 +107,7 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos
|
||||
if((!Condition) || (omp_get_num_threads()>1))
|
||||
return func(0,rows, 0,cols);
|
||||
|
||||
Index size = transpose ? cols : rows;
|
||||
Index size = transpose ? rows : cols;
|
||||
|
||||
// 2- compute the maximal number of threads from the size of the product:
|
||||
// FIXME this has to be fine tuned
|
||||
@@ -126,26 +126,25 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos
|
||||
std::swap(rows,cols);
|
||||
|
||||
Index blockCols = (cols / threads) & ~Index(0x3);
|
||||
Index blockRows = (rows / threads) & ~Index(0x7);
|
||||
Index blockRows = (rows / threads);
|
||||
blockRows = (blockRows/Functor::Traits::mr)*Functor::Traits::mr;
|
||||
|
||||
GemmParallelInfo<Index>* info = new GemmParallelInfo<Index>[threads];
|
||||
|
||||
#pragma omp parallel for schedule(static,1) num_threads(threads)
|
||||
for(Index i=0; i<threads; ++i)
|
||||
#pragma omp parallel num_threads(threads)
|
||||
{
|
||||
Index i = omp_get_thread_num();
|
||||
Index r0 = i*blockRows;
|
||||
Index actualBlockRows = (i+1==threads) ? rows-r0 : blockRows;
|
||||
|
||||
Index c0 = i*blockCols;
|
||||
Index actualBlockCols = (i+1==threads) ? cols-c0 : blockCols;
|
||||
|
||||
info[i].rhs_start = c0;
|
||||
info[i].rhs_length = actualBlockCols;
|
||||
info[i].lhs_start = r0;
|
||||
info[i].lhs_length = actualBlockRows;
|
||||
|
||||
if(transpose)
|
||||
func(0, cols, r0, actualBlockRows, info);
|
||||
else
|
||||
func(r0, actualBlockRows, 0,cols, info);
|
||||
if(transpose) func(c0, actualBlockCols, 0, rows, info);
|
||||
else func(0, rows, c0, actualBlockCols, info);
|
||||
}
|
||||
|
||||
delete[] info;
|
||||
|
||||
@@ -15,7 +15,7 @@ namespace Eigen {
|
||||
namespace internal {
|
||||
|
||||
// pack a selfadjoint block diagonal for use with the gebp_kernel
|
||||
template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder>
|
||||
template<typename Scalar, typename Index, int Pack1, int Pack2_dummy, int StorageOrder>
|
||||
struct symm_pack_lhs
|
||||
{
|
||||
template<int BlockRows> inline
|
||||
@@ -45,25 +45,32 @@ struct symm_pack_lhs
|
||||
}
|
||||
void operator()(Scalar* blockA, const Scalar* _lhs, Index lhsStride, Index cols, Index rows)
|
||||
{
|
||||
enum { PacketSize = packet_traits<Scalar>::size };
|
||||
const_blas_data_mapper<Scalar,Index,StorageOrder> lhs(_lhs,lhsStride);
|
||||
Index count = 0;
|
||||
Index peeled_mc = (rows/Pack1)*Pack1;
|
||||
for(Index i=0; i<peeled_mc; i+=Pack1)
|
||||
{
|
||||
pack<Pack1>(blockA, lhs, cols, i, count);
|
||||
}
|
||||
|
||||
if(rows-peeled_mc>=Pack2)
|
||||
{
|
||||
pack<Pack2>(blockA, lhs, cols, peeled_mc, count);
|
||||
peeled_mc += Pack2;
|
||||
}
|
||||
//Index peeled_mc3 = (rows/Pack1)*Pack1;
|
||||
|
||||
const Index peeled_mc3 = Pack1>=3*PacketSize ? (rows/(3*PacketSize))*(3*PacketSize) : 0;
|
||||
const Index peeled_mc2 = Pack1>=2*PacketSize ? peeled_mc3+((rows-peeled_mc3)/(2*PacketSize))*(2*PacketSize) : 0;
|
||||
const Index peeled_mc1 = Pack1>=1*PacketSize ? (rows/(1*PacketSize))*(1*PacketSize) : 0;
|
||||
|
||||
if(Pack1>=3*PacketSize)
|
||||
for(Index i=0; i<peeled_mc3; i+=3*PacketSize)
|
||||
pack<3*PacketSize>(blockA, lhs, cols, i, count);
|
||||
|
||||
if(Pack1>=2*PacketSize)
|
||||
for(Index i=peeled_mc3; i<peeled_mc2; i+=2*PacketSize)
|
||||
pack<2*PacketSize>(blockA, lhs, cols, i, count);
|
||||
|
||||
if(Pack1>=1*PacketSize)
|
||||
for(Index i=peeled_mc2; i<peeled_mc1; i+=1*PacketSize)
|
||||
pack<1*PacketSize>(blockA, lhs, cols, i, count);
|
||||
|
||||
// do the same with mr==1
|
||||
for(Index i=peeled_mc; i<rows; i++)
|
||||
for(Index i=peeled_mc1; i<rows; i++)
|
||||
{
|
||||
for(Index k=0; k<i; k++)
|
||||
blockA[count++] = lhs(i, k); // normal
|
||||
blockA[count++] = lhs(i, k); // normal
|
||||
|
||||
blockA[count++] = numext::real(lhs(i, i)); // real (diagonal)
|
||||
|
||||
@@ -82,7 +89,8 @@ struct symm_pack_rhs
|
||||
Index end_k = k2 + rows;
|
||||
Index count = 0;
|
||||
const_blas_data_mapper<Scalar,Index,StorageOrder> rhs(_rhs,rhsStride);
|
||||
Index packet_cols = (cols/nr)*nr;
|
||||
Index packet_cols8 = nr>=8 ? (cols/8) * 8 : 0;
|
||||
Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0;
|
||||
|
||||
// first part: normal case
|
||||
for(Index j2=0; j2<k2; j2+=nr)
|
||||
@@ -91,79 +99,151 @@ struct symm_pack_rhs
|
||||
{
|
||||
blockB[count+0] = rhs(k,j2+0);
|
||||
blockB[count+1] = rhs(k,j2+1);
|
||||
if (nr==4)
|
||||
if (nr>=4)
|
||||
{
|
||||
blockB[count+2] = rhs(k,j2+2);
|
||||
blockB[count+3] = rhs(k,j2+3);
|
||||
}
|
||||
if (nr>=8)
|
||||
{
|
||||
blockB[count+4] = rhs(k,j2+4);
|
||||
blockB[count+5] = rhs(k,j2+5);
|
||||
blockB[count+6] = rhs(k,j2+6);
|
||||
blockB[count+7] = rhs(k,j2+7);
|
||||
}
|
||||
count += nr;
|
||||
}
|
||||
}
|
||||
|
||||
// second part: diagonal block
|
||||
for(Index j2=k2; j2<(std::min)(k2+rows,packet_cols); j2+=nr)
|
||||
Index end8 = nr>=8 ? (std::min)(k2+rows,packet_cols8) : k2;
|
||||
if(nr>=8)
|
||||
{
|
||||
// again we can split vertically in three different parts (transpose, symmetric, normal)
|
||||
// transpose
|
||||
for(Index k=k2; k<j2; k++)
|
||||
for(Index j2=k2; j2<end8; j2+=8)
|
||||
{
|
||||
blockB[count+0] = numext::conj(rhs(j2+0,k));
|
||||
blockB[count+1] = numext::conj(rhs(j2+1,k));
|
||||
if (nr==4)
|
||||
// again we can split vertically in three different parts (transpose, symmetric, normal)
|
||||
// transpose
|
||||
for(Index k=k2; k<j2; k++)
|
||||
{
|
||||
blockB[count+0] = numext::conj(rhs(j2+0,k));
|
||||
blockB[count+1] = numext::conj(rhs(j2+1,k));
|
||||
blockB[count+2] = numext::conj(rhs(j2+2,k));
|
||||
blockB[count+3] = numext::conj(rhs(j2+3,k));
|
||||
blockB[count+4] = numext::conj(rhs(j2+4,k));
|
||||
blockB[count+5] = numext::conj(rhs(j2+5,k));
|
||||
blockB[count+6] = numext::conj(rhs(j2+6,k));
|
||||
blockB[count+7] = numext::conj(rhs(j2+7,k));
|
||||
count += 8;
|
||||
}
|
||||
count += nr;
|
||||
}
|
||||
// symmetric
|
||||
Index h = 0;
|
||||
for(Index k=j2; k<j2+nr; k++)
|
||||
{
|
||||
// normal
|
||||
for (Index w=0 ; w<h; ++w)
|
||||
blockB[count+w] = rhs(k,j2+w);
|
||||
|
||||
blockB[count+h] = numext::real(rhs(k,k));
|
||||
|
||||
// transpose
|
||||
for (Index w=h+1 ; w<nr; ++w)
|
||||
blockB[count+w] = numext::conj(rhs(j2+w,k));
|
||||
count += nr;
|
||||
++h;
|
||||
}
|
||||
// normal
|
||||
for(Index k=j2+nr; k<end_k; k++)
|
||||
{
|
||||
blockB[count+0] = rhs(k,j2+0);
|
||||
blockB[count+1] = rhs(k,j2+1);
|
||||
if (nr==4)
|
||||
// symmetric
|
||||
Index h = 0;
|
||||
for(Index k=j2; k<j2+8; k++)
|
||||
{
|
||||
// normal
|
||||
for (Index w=0 ; w<h; ++w)
|
||||
blockB[count+w] = rhs(k,j2+w);
|
||||
|
||||
blockB[count+h] = numext::real(rhs(k,k));
|
||||
|
||||
// transpose
|
||||
for (Index w=h+1 ; w<8; ++w)
|
||||
blockB[count+w] = numext::conj(rhs(j2+w,k));
|
||||
count += 8;
|
||||
++h;
|
||||
}
|
||||
// normal
|
||||
for(Index k=j2+8; k<end_k; k++)
|
||||
{
|
||||
blockB[count+0] = rhs(k,j2+0);
|
||||
blockB[count+1] = rhs(k,j2+1);
|
||||
blockB[count+2] = rhs(k,j2+2);
|
||||
blockB[count+3] = rhs(k,j2+3);
|
||||
blockB[count+4] = rhs(k,j2+4);
|
||||
blockB[count+5] = rhs(k,j2+5);
|
||||
blockB[count+6] = rhs(k,j2+6);
|
||||
blockB[count+7] = rhs(k,j2+7);
|
||||
count += 8;
|
||||
}
|
||||
}
|
||||
}
|
||||
if(nr>=4)
|
||||
{
|
||||
for(Index j2=end8; j2<(std::min)(k2+rows,packet_cols4); j2+=4)
|
||||
{
|
||||
// again we can split vertically in three different parts (transpose, symmetric, normal)
|
||||
// transpose
|
||||
for(Index k=k2; k<j2; k++)
|
||||
{
|
||||
blockB[count+0] = numext::conj(rhs(j2+0,k));
|
||||
blockB[count+1] = numext::conj(rhs(j2+1,k));
|
||||
blockB[count+2] = numext::conj(rhs(j2+2,k));
|
||||
blockB[count+3] = numext::conj(rhs(j2+3,k));
|
||||
count += 4;
|
||||
}
|
||||
// symmetric
|
||||
Index h = 0;
|
||||
for(Index k=j2; k<j2+4; k++)
|
||||
{
|
||||
// normal
|
||||
for (Index w=0 ; w<h; ++w)
|
||||
blockB[count+w] = rhs(k,j2+w);
|
||||
|
||||
blockB[count+h] = numext::real(rhs(k,k));
|
||||
|
||||
// transpose
|
||||
for (Index w=h+1 ; w<4; ++w)
|
||||
blockB[count+w] = numext::conj(rhs(j2+w,k));
|
||||
count += 4;
|
||||
++h;
|
||||
}
|
||||
// normal
|
||||
for(Index k=j2+4; k<end_k; k++)
|
||||
{
|
||||
blockB[count+0] = rhs(k,j2+0);
|
||||
blockB[count+1] = rhs(k,j2+1);
|
||||
blockB[count+2] = rhs(k,j2+2);
|
||||
blockB[count+3] = rhs(k,j2+3);
|
||||
count += 4;
|
||||
}
|
||||
count += nr;
|
||||
}
|
||||
}
|
||||
|
||||
// third part: transposed
|
||||
for(Index j2=k2+rows; j2<packet_cols; j2+=nr)
|
||||
if(nr>=8)
|
||||
{
|
||||
for(Index k=k2; k<end_k; k++)
|
||||
for(Index j2=k2+rows; j2<packet_cols8; j2+=8)
|
||||
{
|
||||
blockB[count+0] = numext::conj(rhs(j2+0,k));
|
||||
blockB[count+1] = numext::conj(rhs(j2+1,k));
|
||||
if (nr==4)
|
||||
for(Index k=k2; k<end_k; k++)
|
||||
{
|
||||
blockB[count+0] = numext::conj(rhs(j2+0,k));
|
||||
blockB[count+1] = numext::conj(rhs(j2+1,k));
|
||||
blockB[count+2] = numext::conj(rhs(j2+2,k));
|
||||
blockB[count+3] = numext::conj(rhs(j2+3,k));
|
||||
blockB[count+4] = numext::conj(rhs(j2+4,k));
|
||||
blockB[count+5] = numext::conj(rhs(j2+5,k));
|
||||
blockB[count+6] = numext::conj(rhs(j2+6,k));
|
||||
blockB[count+7] = numext::conj(rhs(j2+7,k));
|
||||
count += 8;
|
||||
}
|
||||
}
|
||||
}
|
||||
if(nr>=4)
|
||||
{
|
||||
for(Index j2=(std::max)(packet_cols8,k2+rows); j2<packet_cols4; j2+=4)
|
||||
{
|
||||
for(Index k=k2; k<end_k; k++)
|
||||
{
|
||||
blockB[count+0] = numext::conj(rhs(j2+0,k));
|
||||
blockB[count+1] = numext::conj(rhs(j2+1,k));
|
||||
blockB[count+2] = numext::conj(rhs(j2+2,k));
|
||||
blockB[count+3] = numext::conj(rhs(j2+3,k));
|
||||
count += 4;
|
||||
}
|
||||
count += nr;
|
||||
}
|
||||
}
|
||||
|
||||
// copy the remaining columns one at a time (=> the same with nr==1)
|
||||
for(Index j2=packet_cols; j2<cols; ++j2)
|
||||
for(Index j2=packet_cols4; j2<cols; ++j2)
|
||||
{
|
||||
// transpose
|
||||
Index half = (std::min)(end_k,j2);
|
||||
@@ -261,11 +341,10 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,t
|
||||
// kc must smaller than mc
|
||||
kc = (std::min)(kc,mc);
|
||||
|
||||
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
|
||||
std::size_t sizeB = sizeW + kc*cols;
|
||||
std::size_t sizeB = kc*cols;
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
|
||||
Scalar* blockB = allocatedBlockB + sizeW;
|
||||
Scalar* blockB = allocatedBlockB;
|
||||
|
||||
gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
|
||||
symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
|
||||
@@ -348,11 +427,10 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,f
|
||||
Index mc = rows; // cache block size along the M direction
|
||||
Index nc = cols; // cache block size along the N direction
|
||||
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
|
||||
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
|
||||
std::size_t sizeB = sizeW + kc*cols;
|
||||
std::size_t sizeB = kc*cols;
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
|
||||
Scalar* blockB = allocatedBlockB + sizeW;
|
||||
Scalar* blockB = allocatedBlockB;
|
||||
|
||||
gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
|
||||
gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
|
||||
@@ -422,11 +500,11 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>
|
||||
NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsIsUpper,bool(RhsBlasTraits::NeedToConjugate)),
|
||||
internal::traits<Dest>::Flags&RowMajorBit ? RowMajor : ColMajor>
|
||||
::run(
|
||||
lhs.rows(), rhs.cols(), // sizes
|
||||
&lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
|
||||
&rhs.coeffRef(0,0), rhs.outerStride(), // rhs info
|
||||
&dst.coeffRef(0,0), dst.outerStride(), // result info
|
||||
actualAlpha // alpha
|
||||
lhs.rows(), rhs.cols(), // sizes
|
||||
&lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
|
||||
&rhs.coeffRef(0,0), rhs.outerStride(), // rhs info
|
||||
&dst.coeffRef(0,0), dst.outerStride(), // result info
|
||||
actualAlpha // alpha
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -79,8 +79,8 @@ EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrd
|
||||
for (Index j=FirstTriangular ? bound : 0;
|
||||
j<(FirstTriangular ? size : bound);j+=2)
|
||||
{
|
||||
register const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
|
||||
register const Scalar* EIGEN_RESTRICT A1 = lhs + (j+1)*lhsStride;
|
||||
const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
|
||||
const Scalar* EIGEN_RESTRICT A1 = lhs + (j+1)*lhsStride;
|
||||
|
||||
Scalar t0 = cjAlpha * rhs[j];
|
||||
Packet ptmp0 = pset1<Packet>(t0);
|
||||
@@ -113,9 +113,9 @@ EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrd
|
||||
|
||||
for (size_t i=starti; i<alignedStart; ++i)
|
||||
{
|
||||
res[i] += t0 * A0[i] + t1 * A1[i];
|
||||
t2 += numext::conj(A0[i]) * rhs[i];
|
||||
t3 += numext::conj(A1[i]) * rhs[i];
|
||||
res[i] += cj0.pmul(A0[i], t0) + cj0.pmul(A1[i],t1);
|
||||
t2 += cj1.pmul(A0[i], rhs[i]);
|
||||
t3 += cj1.pmul(A1[i], rhs[i]);
|
||||
}
|
||||
// Yes this an optimization for gcc 4.3 and 4.4 (=> huge speed up)
|
||||
// gcc 4.2 does this optimization automatically.
|
||||
@@ -147,7 +147,7 @@ EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrd
|
||||
}
|
||||
for (Index j=FirstTriangular ? 0 : bound;j<(FirstTriangular ? bound : size);j++)
|
||||
{
|
||||
register const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
|
||||
const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
|
||||
|
||||
Scalar t1 = cjAlpha * rhs[j];
|
||||
Scalar t2(0);
|
||||
@@ -218,7 +218,7 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
|
||||
if(!EvalToDest)
|
||||
{
|
||||
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
int size = dest.size();
|
||||
Index size = dest.size();
|
||||
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
#endif
|
||||
MappedDest(actualDestPtr, dest.size()) = dest;
|
||||
@@ -227,7 +227,7 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
|
||||
if(!UseRhs)
|
||||
{
|
||||
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
int size = rhs.size();
|
||||
Index size = rhs.size();
|
||||
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
#endif
|
||||
Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, rhs.size()) = rhs;
|
||||
|
||||
@@ -125,11 +125,9 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
|
||||
|
||||
std::size_t sizeA = kc*mc;
|
||||
std::size_t sizeB = kc*cols;
|
||||
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
|
||||
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW());
|
||||
|
||||
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer;
|
||||
triangularBuffer.setZero();
|
||||
@@ -187,7 +185,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
|
||||
pack_lhs(blockA, triangularBuffer.data(), triangularBuffer.outerStride(), actualPanelWidth, actualPanelWidth);
|
||||
|
||||
gebp_kernel(res+startBlock, resStride, blockA, blockB, actualPanelWidth, actualPanelWidth, cols, alpha,
|
||||
actualPanelWidth, actual_kc, 0, blockBOffset, blockW);
|
||||
actualPanelWidth, actual_kc, 0, blockBOffset);
|
||||
|
||||
// GEBP with remaining micro panel
|
||||
if (lengthTarget>0)
|
||||
@@ -197,7 +195,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
|
||||
pack_lhs(blockA, &lhs(startTarget,startBlock), lhsStride, actualPanelWidth, lengthTarget);
|
||||
|
||||
gebp_kernel(res+startTarget, resStride, blockA, blockB, lengthTarget, actualPanelWidth, cols, alpha,
|
||||
actualPanelWidth, actual_kc, 0, blockBOffset, blockW);
|
||||
actualPanelWidth, actual_kc, 0, blockBOffset);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -211,7 +209,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
|
||||
gemm_pack_lhs<Scalar, Index, Traits::mr,Traits::LhsProgress, LhsStorageOrder,false>()
|
||||
(blockA, &lhs(i2, actual_k2), lhsStride, actual_kc, actual_mc);
|
||||
|
||||
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha, -1, -1, 0, 0, blockW);
|
||||
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha, -1, -1, 0, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -265,12 +263,10 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
|
||||
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
|
||||
|
||||
std::size_t sizeA = kc*mc;
|
||||
std::size_t sizeB = kc*cols;
|
||||
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
|
||||
std::size_t sizeB = kc*cols+EIGEN_ALIGN_BYTES/sizeof(Scalar);
|
||||
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW());
|
||||
|
||||
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer;
|
||||
triangularBuffer.setZero();
|
||||
@@ -304,6 +300,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
|
||||
Index ts = (IsLower && actual_k2>=cols) ? 0 : actual_kc;
|
||||
|
||||
Scalar* geb = blockB+ts*ts;
|
||||
geb = geb + internal::first_aligned(geb,EIGEN_ALIGN_BYTES/sizeof(Scalar));
|
||||
|
||||
pack_rhs(geb, &rhs(actual_k2,IsLower ? 0 : k2), rhsStride, actual_kc, rs);
|
||||
|
||||
@@ -357,14 +354,13 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
|
||||
actual_mc, panelLength, actualPanelWidth,
|
||||
alpha,
|
||||
actual_kc, actual_kc, // strides
|
||||
blockOffset, blockOffset,// offsets
|
||||
blockW); // workspace
|
||||
blockOffset, blockOffset);// offsets
|
||||
}
|
||||
}
|
||||
gebp_kernel(res+i2+(IsLower ? 0 : k2)*resStride, resStride,
|
||||
blockA, geb, actual_mc, actual_kc, rs,
|
||||
alpha,
|
||||
-1, -1, 0, 0, blockW);
|
||||
-1, -1, 0, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -109,7 +109,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
|
||||
/* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \
|
||||
if (rows != depth) { \
|
||||
\
|
||||
int nthr = mkl_domain_get_max_threads(MKL_BLAS); \
|
||||
int nthr = mkl_domain_get_max_threads(EIGEN_MKL_DOMAIN_BLAS); \
|
||||
\
|
||||
if (((nthr==1) && (((std::max)(rows,depth)-diagSize)/(double)diagSize < 0.5))) { \
|
||||
/* Most likely no benefit to call TRMM or GEMM from MKL*/ \
|
||||
@@ -223,7 +223,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
|
||||
/* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \
|
||||
if (cols != depth) { \
|
||||
\
|
||||
int nthr = mkl_domain_get_max_threads(MKL_BLAS); \
|
||||
int nthr = mkl_domain_get_max_threads(EIGEN_MKL_DOMAIN_BLAS); \
|
||||
\
|
||||
if ((nthr==1) && (((std::max)(cols,depth)-diagSize)/(double)diagSize < 0.5)) { \
|
||||
/* Most likely no benefit to call TRMM or GEMM from MKL*/ \
|
||||
|
||||
@@ -322,7 +322,7 @@ template<> struct trmv_selector<RowMajor>
|
||||
if(!DirectlyUseRhs)
|
||||
{
|
||||
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
int size = actualRhs.size();
|
||||
Index size = actualRhs.size();
|
||||
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
#endif
|
||||
Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
|
||||
|
||||
@@ -129,7 +129,6 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
|
||||
MKLPREFIX##axpy(&n, &alpha_,(const MKLTYPE*)x, &incx, (MKLTYPE*)_res, &incy); \
|
||||
/* Non-square case - doesn't fit to MKL ?TRMV. Fall to default triangular product*/ \
|
||||
if (size<(std::max)(rows,cols)) { \
|
||||
typedef Matrix<EIGTYPE, Dynamic, Dynamic> MatrixLhs; \
|
||||
if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
|
||||
x = x_tmp.data(); \
|
||||
if (size<rows) { \
|
||||
@@ -214,7 +213,6 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
|
||||
MKLPREFIX##axpy(&n, &alpha_,(const MKLTYPE*)x, &incx, (MKLTYPE*)_res, &incy); \
|
||||
/* Non-square case - doesn't fit to MKL ?TRMV. Fall to default triangular product*/ \
|
||||
if (size<(std::max)(rows,cols)) { \
|
||||
typedef Matrix<EIGTYPE, Dynamic, Dynamic> MatrixLhs; \
|
||||
if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
|
||||
x = x_tmp.data(); \
|
||||
if (size<rows) { \
|
||||
|
||||
@@ -66,11 +66,9 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conju
|
||||
|
||||
std::size_t sizeA = kc*mc;
|
||||
std::size_t sizeB = kc*cols;
|
||||
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
|
||||
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW());
|
||||
|
||||
conj_if<Conjugate> conj;
|
||||
gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, Conjugate, false> gebp_kernel;
|
||||
@@ -158,7 +156,7 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conju
|
||||
pack_lhs(blockA, &tri(startTarget,startBlock), triStride, actualPanelWidth, lengthTarget);
|
||||
|
||||
gebp_kernel(&other(startTarget,j2), otherStride, blockA, blockB+actual_kc*j2, lengthTarget, actualPanelWidth, actual_cols, Scalar(-1),
|
||||
actualPanelWidth, actual_kc, 0, blockBOffset, blockW);
|
||||
actualPanelWidth, actual_kc, 0, blockBOffset);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -174,7 +172,7 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conju
|
||||
{
|
||||
pack_lhs(blockA, &tri(i2, IsLower ? k2 : k2-kc), triStride, actual_kc, actual_mc);
|
||||
|
||||
gebp_kernel(_other+i2, otherStride, blockA, blockB, actual_mc, actual_kc, cols, Scalar(-1), -1, -1, 0, 0, blockW);
|
||||
gebp_kernel(_other+i2, otherStride, blockA, blockB, actual_mc, actual_kc, cols, Scalar(-1), -1, -1, 0, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -215,11 +213,9 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conj
|
||||
|
||||
std::size_t sizeA = kc*mc;
|
||||
std::size_t sizeB = kc*size;
|
||||
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
|
||||
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW());
|
||||
|
||||
conj_if<Conjugate> conj;
|
||||
gebp_kernel<Scalar,Scalar, Index, Traits::mr, Traits::nr, false, Conjugate> gebp_kernel;
|
||||
@@ -285,8 +281,7 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conj
|
||||
actual_mc, panelLength, actualPanelWidth,
|
||||
Scalar(-1),
|
||||
actual_kc, actual_kc, // strides
|
||||
panelOffset, panelOffset, // offsets
|
||||
blockW); // workspace
|
||||
panelOffset, panelOffset); // offsets
|
||||
}
|
||||
|
||||
// unblocked triangular solve
|
||||
@@ -317,7 +312,7 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conj
|
||||
if (rs>0)
|
||||
gebp_kernel(_other+i2+startPanel*otherStride, otherStride, blockA, geb,
|
||||
actual_mc, actual_kc, rs, Scalar(-1),
|
||||
-1, -1, 0, 0, blockW);
|
||||
-1, -1, 0, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user