mirror of
https://gitlab.com/libeigen/eigen.git
synced 2026-04-10 11:34:33 +08:00
Compare commits
1746 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
477d1e8192 | ||
|
|
0eaff8fdf2 | ||
|
|
582c96691b | ||
|
|
0b22158d9f | ||
|
|
dafdb0d8a8 | ||
|
|
1d1686c62b | ||
|
|
ad95b924d0 | ||
|
|
9499684320 | ||
|
|
5b6a31626b | ||
|
|
bc3fee2d8e | ||
|
|
eaa9223277 | ||
|
|
c9ba1165e7 | ||
|
|
dd2d5d67ff | ||
|
|
404322b64f | ||
|
|
ce37bae2cd | ||
|
|
3900dbc341 | ||
|
|
5f586c2bd0 | ||
|
|
215f88a417 | ||
|
|
2257f40f4a | ||
|
|
9e0fa0ef6d | ||
|
|
0fddbf3dc7 | ||
|
|
eda635bd58 | ||
|
|
26197bb467 | ||
|
|
772e59d475 | ||
|
|
e8f83cbb5d | ||
|
|
dce584d799 | ||
|
|
0bcef9557d | ||
|
|
2b3c876b2a | ||
|
|
a05f6aad0e | ||
|
|
59187285e1 | ||
|
|
1dd074ea7e | ||
|
|
24fa7a01bd | ||
|
|
e236d3443c | ||
|
|
4ec8833220 | ||
|
|
dd3685cc6a | ||
|
|
487a6e6515 | ||
|
|
75f0b8aae3 | ||
|
|
23aca8a586 | ||
|
|
28bf2bf070 | ||
|
|
0164f4c682 | ||
|
|
bbff608a42 | ||
|
|
ea56d2ff2c | ||
|
|
a4c8701e9a | ||
|
|
a9bb9796e0 | ||
|
|
449883be74 | ||
|
|
0a08d4c60b | ||
|
|
4086187e49 | ||
|
|
91864f85d3 | ||
|
|
c3597106ab | ||
|
|
aed1d6597f | ||
|
|
b6f04a2dd4 | ||
|
|
a9aa3bcf50 | ||
|
|
32b8da66e3 | ||
|
|
eb94179ea3 | ||
|
|
52a7386aef | ||
|
|
8cada1d894 | ||
|
|
6e4a664c42 | ||
|
|
1cd1a96d56 | ||
|
|
86ab00cdcf | ||
|
|
65f09be8d2 | ||
|
|
400d756b82 | ||
|
|
9d31798a84 | ||
|
|
723ed92e0e | ||
|
|
0a7de0b273 | ||
|
|
d6b9bc1ccd | ||
|
|
0eff51e2ed | ||
|
|
1b7dd46d94 | ||
|
|
b2eb1bf3dc | ||
|
|
fe48c25682 | ||
|
|
0ba6da3470 | ||
|
|
a287140f72 | ||
|
|
4d89ec8a00 | ||
|
|
441760f239 | ||
|
|
664162fb8a | ||
|
|
aa3c761002 | ||
|
|
94f2cfc9c7 | ||
|
|
4a13d79df6 | ||
|
|
463176cc44 | ||
|
|
5aab97fba6 | ||
|
|
89abc6806d | ||
|
|
baf793ebaa | ||
|
|
b4ddafcfac | ||
|
|
1079967710 | ||
|
|
eeac81b8c0 | ||
|
|
e80bc2ddb0 | ||
|
|
db3903498d | ||
|
|
dcc14bee64 | ||
|
|
b88c1117d4 | ||
|
|
912cb3d660 | ||
|
|
1b345b0895 | ||
|
|
1b95717358 | ||
|
|
d57430dd73 | ||
|
|
73985ead27 | ||
|
|
436a111792 | ||
|
|
afc55b1885 | ||
|
|
a5c2d8a3cc | ||
|
|
f8bfe10613 | ||
|
|
fc7180cda8 | ||
|
|
4d226ab5b5 | ||
|
|
ad086b03e4 | ||
|
|
dad177be01 | ||
|
|
55b4fd1d40 | ||
|
|
a354c3ca59 | ||
|
|
d46a36cc84 | ||
|
|
0ebe3808ca | ||
|
|
47d1b4a609 | ||
|
|
ba05572dcb | ||
|
|
5c3995769c | ||
|
|
fbe672d599 | ||
|
|
ca0ba0d9a4 | ||
|
|
c80587c92b | ||
|
|
3f1d0cdc22 | ||
|
|
78e93ac1ad | ||
|
|
3e37166d0b | ||
|
|
0585b2965d | ||
|
|
e6e77ed08b | ||
|
|
b238f387b4 | ||
|
|
c8db17301e | ||
|
|
a07bb428df | ||
|
|
598de8b193 | ||
|
|
e44519744e | ||
|
|
0a6ae41555 | ||
|
|
b730952414 | ||
|
|
7a0e96b80d | ||
|
|
51af6ae971 | ||
|
|
0a9ad6fc72 | ||
|
|
d5f88e2357 | ||
|
|
0b4b0f11e8 | ||
|
|
306daa24a3 | ||
|
|
8471cf1996 | ||
|
|
b0c5bfdf78 | ||
|
|
2ebb314fa7 | ||
|
|
530f20c21a | ||
|
|
c3ce4f9ac0 | ||
|
|
7d64e6752c | ||
|
|
0a4c4d40b4 | ||
|
|
3ecb343dc3 | ||
|
|
6ed571744b | ||
|
|
97feea9d39 | ||
|
|
ca6a2a5248 | ||
|
|
5f2dd503ff | ||
|
|
1644bafe29 | ||
|
|
b15a5dc3f4 | ||
|
|
aad72f3c6d | ||
|
|
3e194a6a73 | ||
|
|
58146be99b | ||
|
|
13fc18d3a2 | ||
|
|
2634f9386c | ||
|
|
9e8f07d7b5 | ||
|
|
b027d7a8cf | ||
|
|
b11aab5fcc | ||
|
|
53c77061f0 | ||
|
|
e8e56c7642 | ||
|
|
40f62974b7 | ||
|
|
cf20b30d65 | ||
|
|
03b63e182c | ||
|
|
d3943cd50c | ||
|
|
8fb162fc85 | ||
|
|
e36cb91c99 | ||
|
|
2e188dd4d4 | ||
|
|
15380f9a87 | ||
|
|
692b30ca95 | ||
|
|
050c681bdd | ||
|
|
e742da8b28 | ||
|
|
524fa4c46f | ||
|
|
737e4152c3 | ||
|
|
d0ee2267d6 | ||
|
|
a94791b69a | ||
|
|
ac63d6891c | ||
|
|
7e4a6754b2 | ||
|
|
38b6048e14 | ||
|
|
e74612b9a0 | ||
|
|
78d2926508 | ||
|
|
2e2f48e30e | ||
|
|
f939c351cb | ||
|
|
091d373ee9 | ||
|
|
471075f7ad | ||
|
|
5c366fe1d7 | ||
|
|
86711497c4 | ||
|
|
47150af1c8 | ||
|
|
89e315152c | ||
|
|
7f0599b6eb | ||
|
|
5727e4d89c | ||
|
|
5266ff8966 | ||
|
|
5c68051cd7 | ||
|
|
4860727ac2 | ||
|
|
507b661106 | ||
|
|
a498ff7df6 | ||
|
|
8ba3c41fcf | ||
|
|
a7473d6d5a | ||
|
|
5e64cea896 | ||
|
|
33fba3f08d | ||
|
|
bfc264abe8 | ||
|
|
e2e9cdd169 | ||
|
|
d485d12c51 | ||
|
|
48c635e223 | ||
|
|
9f3276981c | ||
|
|
80b5133789 | ||
|
|
4131074818 | ||
|
|
cb5cd69872 | ||
|
|
78b569f685 | ||
|
|
9c2b6c049b | ||
|
|
6f3cd529af | ||
|
|
d7f9679a34 | ||
|
|
ae1385c7e4 | ||
|
|
73b0012945 | ||
|
|
c84084c0c0 | ||
|
|
4387433acf | ||
|
|
aad20d700d | ||
|
|
8b69d5d730 | ||
|
|
ed7a220b04 | ||
|
|
ceee1c008b | ||
|
|
698ff69450 | ||
|
|
7f67e6dfdb | ||
|
|
765615609d | ||
|
|
3ed67cb0bb | ||
|
|
6af5ac7e27 | ||
|
|
2f6d1607c8 | ||
|
|
881b90e984 | ||
|
|
616a7a1912 | ||
|
|
409e887d78 | ||
|
|
9d6d0dff8f | ||
|
|
8b84801f7f | ||
|
|
422530946f | ||
|
|
67b4f45836 | ||
|
|
27f3970453 | ||
|
|
3860a0bc8f | ||
|
|
33500050c3 | ||
|
|
27d7628f16 | ||
|
|
2bda1b0d93 | ||
|
|
dd602e62c8 | ||
|
|
f3a00dd2b5 | ||
|
|
892afb9416 | ||
|
|
779774f98c | ||
|
|
6565f8d60f | ||
|
|
48dfe98abd | ||
|
|
fe29157d02 | ||
|
|
f6ac51a054 | ||
|
|
00d4e65f00 | ||
|
|
86caba838d | ||
|
|
b9f7a17e47 | ||
|
|
1301d744f8 | ||
|
|
2a69290ddb | ||
|
|
3946768916 | ||
|
|
77e27fbeee | ||
|
|
2ada122bc6 | ||
|
|
8f2bdde373 | ||
|
|
ba0f844d6b | ||
|
|
9bcdc8b756 | ||
|
|
50e3bbfc90 | ||
|
|
ca3746c6f8 | ||
|
|
8bde7da086 | ||
|
|
66cbabafed | ||
|
|
4b377715d7 | ||
|
|
aecc51a3e8 | ||
|
|
1fc3a21ed0 | ||
|
|
9fa2c8650e | ||
|
|
ac5377e161 | ||
|
|
5269d11935 | ||
|
|
26f9907542 | ||
|
|
608b1acd6d | ||
|
|
b2c6dc48d9 | ||
|
|
8a66ca4b10 | ||
|
|
59e9edfbf1 | ||
|
|
3ada6e4bed | ||
|
|
c3ca9b1e76 | ||
|
|
5dcc6d301a | ||
|
|
c771df6bc3 | ||
|
|
b91e021172 | ||
|
|
cb81975714 | ||
|
|
bf03820339 | ||
|
|
de05a18fe0 | ||
|
|
4cc2c73e6a | ||
|
|
ce3557ca69 | ||
|
|
6edd2e2851 | ||
|
|
4adeababf9 | ||
|
|
18f6e47815 | ||
|
|
ee62f168e6 | ||
|
|
ca7f061a5f | ||
|
|
50e203c717 | ||
|
|
fa9049a544 | ||
|
|
b33144e4df | ||
|
|
c0d56a543e | ||
|
|
488ad7dd1b | ||
|
|
779faaaeba | ||
|
|
1c8347e554 | ||
|
|
ff47717f25 | ||
|
|
309190cf02 | ||
|
|
c10620b2b0 | ||
|
|
73c8f2f697 | ||
|
|
e4d4d15588 | ||
|
|
4dfd888c92 | ||
|
|
028e299577 | ||
|
|
5f50f12d2c | ||
|
|
8321dcce76 | ||
|
|
eb6ba00cc8 | ||
|
|
a618094b62 | ||
|
|
228ae29591 | ||
|
|
471eac5399 | ||
|
|
d780983f59 | ||
|
|
85fb517eaf | ||
|
|
447f269561 | ||
|
|
b046a3f87d | ||
|
|
3cb914f332 | ||
|
|
e1642f485c | ||
|
|
19a95b3309 | ||
|
|
dabc81751f | ||
|
|
e13071dd13 | ||
|
|
d123717e21 | ||
|
|
87a8a1975e | ||
|
|
13df3441ae | ||
|
|
373c340b71 | ||
|
|
cadd124d73 | ||
|
|
05b0518077 | ||
|
|
adf864fec0 | ||
|
|
5a6be66cef | ||
|
|
13e93ca8b7 | ||
|
|
6c05c3dd49 | ||
|
|
49c0390ce0 | ||
|
|
d6c8366d84 | ||
|
|
039e225f7f | ||
|
|
c53f783705 | ||
|
|
ef54723dbe | ||
|
|
46475eff9a | ||
|
|
72a4d49315 | ||
|
|
f9f32e9e2d | ||
|
|
3d946e42b3 | ||
|
|
221f619bea | ||
|
|
a1e092d1e8 | ||
|
|
836fa25a82 | ||
|
|
84cf6e42ca | ||
|
|
7ae819123c | ||
|
|
218c37beb4 | ||
|
|
efe2c225c9 | ||
|
|
3456247437 | ||
|
|
8c48d42530 | ||
|
|
e7fbbc2748 | ||
|
|
1e2ab8b0b3 | ||
|
|
9c9e23858e | ||
|
|
cffe8bbff7 | ||
|
|
c57317035a | ||
|
|
1f84f0d33a | ||
|
|
68e803a26e | ||
|
|
e074f720c7 | ||
|
|
2915e1fc5d | ||
|
|
7e029d1d6e | ||
|
|
a93e354d92 | ||
|
|
6cd7b9ea6b | ||
|
|
8f4b4ad5fb | ||
|
|
35a8e94577 | ||
|
|
0decc31aa8 | ||
|
|
fd9caa1bc2 | ||
|
|
68d1897e8a | ||
|
|
fe60856fed | ||
|
|
0f56b5a6de | ||
|
|
965e595f02 | ||
|
|
1329c55875 | ||
|
|
441b7eaab2 | ||
|
|
8132a12625 | ||
|
|
bde9b456dc | ||
|
|
326320ec7b | ||
|
|
ea2e968257 | ||
|
|
0a6a50d1b0 | ||
|
|
00b2666853 | ||
|
|
504a4404f1 | ||
|
|
e47a8928ec | ||
|
|
6739f6bb1b | ||
|
|
ef3de20481 | ||
|
|
b3151bca40 | ||
|
|
a4c266f827 | ||
|
|
82147cefff | ||
|
|
068ccab9fe | ||
|
|
581b6472d1 | ||
|
|
59bacfe520 | ||
|
|
34ae80179a | ||
|
|
2556565b4b | ||
|
|
30dd6f5e34 | ||
|
|
fe73648c98 | ||
|
|
9636a8ed43 | ||
|
|
c83b754ee0 | ||
|
|
e3a8dfb02f | ||
|
|
64e68cbe87 | ||
|
|
5157ce8cbf | ||
|
|
aee693ac52 | ||
|
|
72096f3bd4 | ||
|
|
3e4a33d4ba | ||
|
|
1031223c09 | ||
|
|
5cf1e4c79b | ||
|
|
fe4b927e9c | ||
|
|
fe778427f2 | ||
|
|
5eea1c7f97 | ||
|
|
9506343349 | ||
|
|
b50d8f8c4a | ||
|
|
fad9828769 | ||
|
|
373bb12dc6 | ||
|
|
17b9a55d98 | ||
|
|
ca2cee2739 | ||
|
|
d92df04ce8 | ||
|
|
81099ef482 | ||
|
|
a20b58845f | ||
|
|
819d0cea1b | ||
|
|
f4404777ff | ||
|
|
fd220dd8b0 | ||
|
|
e256acec7c | ||
|
|
7995cec90c | ||
|
|
02fe89f5ef | ||
|
|
2693fd54bf | ||
|
|
c5b893f434 | ||
|
|
5e51a361fe | ||
|
|
ca5effa16c | ||
|
|
4057f9b1fc | ||
|
|
5fbe7aa604 | ||
|
|
a72752caac | ||
|
|
cc2f6d68b1 | ||
|
|
188590db82 | ||
|
|
8972323c08 | ||
|
|
5d94dc85e5 | ||
|
|
0d7039319c | ||
|
|
d3d7c6245d | ||
|
|
0eece608b4 | ||
|
|
f6b3cf8de9 | ||
|
|
9d16b6e1cf | ||
|
|
fd2f989b1d | ||
|
|
c9e3e438eb | ||
|
|
95113cb15c | ||
|
|
7f7e84aa36 | ||
|
|
429028b652 | ||
|
|
6b89fa802c | ||
|
|
c581c8fa79 | ||
|
|
8021aed89e | ||
|
|
757971e7ea | ||
|
|
c9425492c8 | ||
|
|
0592b4cfbf | ||
|
|
9c663e4ee8 | ||
|
|
0c06077efa | ||
|
|
4d54e3dd33 | ||
|
|
3d3d34e442 | ||
|
|
34b483e25d | ||
|
|
6d5daf32f5 | ||
|
|
f9598d73b5 | ||
|
|
fd1117f2be | ||
|
|
9908020d36 | ||
|
|
4184a3e544 | ||
|
|
1b2049fbda | ||
|
|
b118bc76eb | ||
|
|
c96af5381f | ||
|
|
e1c7c5968a | ||
|
|
8fffc81606 | ||
|
|
5f03584752 | ||
|
|
1b0353c659 | ||
|
|
c6b0de2c21 | ||
|
|
72744d93ef | ||
|
|
32d95e86c9 | ||
|
|
60d5980a41 | ||
|
|
d7a0e52478 | ||
|
|
7acf23c14c | ||
|
|
24af67a6cc | ||
|
|
395c835f4b | ||
|
|
d075d122ea | ||
|
|
47afc9a365 | ||
|
|
0f350a8b7e | ||
|
|
bf91a44f4a | ||
|
|
82798162c0 | ||
|
|
87fbda812f | ||
|
|
01d12d3e82 | ||
|
|
007edee1ac | ||
|
|
9b76be9d21 | ||
|
|
72950effdf | ||
|
|
7b4abc2b1d | ||
|
|
b64b9d0172 | ||
|
|
5e4dda8a12 | ||
|
|
87d480d785 | ||
|
|
7722913475 | ||
|
|
fd057f86b3 | ||
|
|
9e8476ef22 | ||
|
|
4b4c296d6e | ||
|
|
e3bf874c83 | ||
|
|
0f89c6d6b5 | ||
|
|
18884f17d7 | ||
|
|
79574e384e | ||
|
|
6a3c451c1c | ||
|
|
0c3fe4aca5 | ||
|
|
db9b154193 | ||
|
|
461cd819c2 | ||
|
|
5ea0864c81 | ||
|
|
20f7ef2f89 | ||
|
|
b4343aa67e | ||
|
|
e2aa58b631 | ||
|
|
263993a7b6 | ||
|
|
9ab35d8ba4 | ||
|
|
19614497ae | ||
|
|
c98bac2966 | ||
|
|
013a904237 | ||
|
|
40eb97516c | ||
|
|
03b71c273e | ||
|
|
3a2dd352ae | ||
|
|
0bc020be9d | ||
|
|
a96a7ce3f7 | ||
|
|
bec35f4c55 | ||
|
|
fd60966310 | ||
|
|
7d636349dc | ||
|
|
8b3fc31b55 | ||
|
|
3e348fdcf9 | ||
|
|
131ee4bb8e | ||
|
|
194daa3048 | ||
|
|
18c35747ce | ||
|
|
ef05463fcf | ||
|
|
9f7caa7e7d | ||
|
|
3c795c6923 | ||
|
|
57113e00f9 | ||
|
|
599f8ba617 | ||
|
|
544935101a | ||
|
|
59bf2774a3 | ||
|
|
2f7e2614e7 | ||
|
|
8b7431d8fd | ||
|
|
69378eed0b | ||
|
|
c684e37d32 | ||
|
|
179ebb88f9 | ||
|
|
5d2dada197 | ||
|
|
f5e780fb05 | ||
|
|
66917299a9 | ||
|
|
5ca2457fa5 | ||
|
|
9b68ed4537 | ||
|
|
c3b23d7dbf | ||
|
|
8ec4d6480d | ||
|
|
5b3a6f51d3 | ||
|
|
d2b5a19e0f | ||
|
|
367ef66af3 | ||
|
|
155d8d8603 | ||
|
|
43696ede8f | ||
|
|
b39fd8217f | ||
|
|
ec02af1047 | ||
|
|
fbcfc2f862 | ||
|
|
7f7839c12f | ||
|
|
32a41ee659 | ||
|
|
75e80792cc | ||
|
|
dacc544b84 | ||
|
|
b74e45906c | ||
|
|
ce9fc0ce14 | ||
|
|
440020474c | ||
|
|
e61cee7a50 | ||
|
|
91b3039013 | ||
|
|
0fa9e4a15c | ||
|
|
672076db5d | ||
|
|
78f37ca03c | ||
|
|
d161b8f03a | ||
|
|
cb2d8b8fa6 | ||
|
|
b2a47641ce | ||
|
|
b047ca765f | ||
|
|
85699850d9 | ||
|
|
6126886a67 | ||
|
|
328c5d876a | ||
|
|
38fb606052 | ||
|
|
1a9f92e781 | ||
|
|
75c333f94c | ||
|
|
c52c8d76da | ||
|
|
d937a420a2 | ||
|
|
2d5731e40a | ||
|
|
49b005181a | ||
|
|
130f891bb0 | ||
|
|
7944d4431f | ||
|
|
647a51b426 | ||
|
|
a452dedb4f | ||
|
|
18c67df31c | ||
|
|
1569a7d7ab | ||
|
|
2b17f34574 | ||
|
|
841e075154 | ||
|
|
0425118e2a | ||
|
|
9537e8b118 | ||
|
|
eeb0d880ee | ||
|
|
d476cadbb8 | ||
|
|
cfff370549 | ||
|
|
c50c73cae2 | ||
|
|
3852351793 | ||
|
|
6dd9077070 | ||
|
|
ce90647fa5 | ||
|
|
fa39f81b48 | ||
|
|
cd577a275c | ||
|
|
deb45ad4bc | ||
|
|
a9c1e4d7b7 | ||
|
|
d39df320d2 | ||
|
|
361dbd246d | ||
|
|
360a743a10 | ||
|
|
55fc04e8b5 | ||
|
|
bf2d5edecc | ||
|
|
7c6561485a | ||
|
|
be107e387b | ||
|
|
76faf4a965 | ||
|
|
a3f7edf7e7 | ||
|
|
a29a2cb4ff | ||
|
|
f8fcd6b32d | ||
|
|
c58df31747 | ||
|
|
de32f8d656 | ||
|
|
8c34b5a0e3 | ||
|
|
b410d46482 | ||
|
|
b80379bda0 | ||
|
|
8e198d6835 | ||
|
|
b055590e91 | ||
|
|
72c95383e0 | ||
|
|
67c12531e5 | ||
|
|
eb91345d64 | ||
|
|
4794834397 | ||
|
|
c55035b9c0 | ||
|
|
7d495d890a | ||
|
|
aedc5be1d6 | ||
|
|
4e7c3af874 | ||
|
|
101ea26f5e | ||
|
|
c4d10e921f | ||
|
|
76236cdea4 | ||
|
|
1004c4df99 | ||
|
|
70dad84b73 | ||
|
|
62134082aa | ||
|
|
5d38203735 | ||
|
|
396d9cfb6e | ||
|
|
a9bb653a68 | ||
|
|
a8c08e8b8e | ||
|
|
756ac4a93d | ||
|
|
f925dba3d9 | ||
|
|
12350d3ac7 | ||
|
|
bcc0f38f98 | ||
|
|
f57fd78e30 | ||
|
|
f5b1c73945 | ||
|
|
deb8306e60 | ||
|
|
64fcfd314f | ||
|
|
39781dc1e2 | ||
|
|
6edfe8771b | ||
|
|
6e1c086593 | ||
|
|
3c12e24164 | ||
|
|
7a9ef7bbb4 | ||
|
|
2ca2ffb65e | ||
|
|
4c61f00838 | ||
|
|
06206482d9 | ||
|
|
a3a4714aba | ||
|
|
83904a21c1 | ||
|
|
65d33e5898 | ||
|
|
a05607875a | ||
|
|
fabae6c9a1 | ||
|
|
5de8d7036b | ||
|
|
5fdd703629 | ||
|
|
2e238bafb6 | ||
|
|
0028049380 | ||
|
|
86aedc9282 | ||
|
|
db0118342c | ||
|
|
84b2060a9e | ||
|
|
2c462f4201 | ||
|
|
3d71d3918e | ||
|
|
7ef9f47b58 | ||
|
|
ea75dba201 | ||
|
|
33f0340188 | ||
|
|
e30133e439 | ||
|
|
df24f4a01d | ||
|
|
9137f560f0 | ||
|
|
66e99ab6a1 | ||
|
|
1f1e0b9e30 | ||
|
|
5b95b4daf9 | ||
|
|
d7e3e4bb04 | ||
|
|
c53687dd14 | ||
|
|
f1f2ff8208 | ||
|
|
76308e7fd2 | ||
|
|
bd21243821 | ||
|
|
1fc2746417 | ||
|
|
37638dafd7 | ||
|
|
66796e843d | ||
|
|
4434b16694 | ||
|
|
14a112ee15 | ||
|
|
8f92c26319 | ||
|
|
aa33446dac | ||
|
|
e2b3836326 | ||
|
|
2bd59b0e0d | ||
|
|
c1f9ca9254 | ||
|
|
15890c304e | ||
|
|
a20d2ec1c0 | ||
|
|
0beabb4776 | ||
|
|
df095cab10 | ||
|
|
9fc8379328 | ||
|
|
9dd9d58273 | ||
|
|
8fd57a97f2 | ||
|
|
d6d39c7ddb | ||
|
|
8d97ba6b22 | ||
|
|
e8b922ca63 | ||
|
|
82293f38d6 | ||
|
|
fe62c06d9b | ||
|
|
969b8959a0 | ||
|
|
f2c2465acc | ||
|
|
c3c8ad8046 | ||
|
|
39baff850c | ||
|
|
02db4e1a82 | ||
|
|
c21eaedce6 | ||
|
|
36a4500822 | ||
|
|
c2a102345f | ||
|
|
dc03b8f3a1 | ||
|
|
5b77481d58 | ||
|
|
53feb73b45 | ||
|
|
2c00ac0b53 | ||
|
|
811aadbe00 | ||
|
|
52e4cbf539 | ||
|
|
2aaaf22623 | ||
|
|
6021c90fdf | ||
|
|
8b6f53222b | ||
|
|
d616a81294 | ||
|
|
61a32f2a4c | ||
|
|
fbd7ed6ff7 | ||
|
|
0ed08fd281 | ||
|
|
8f6fedc55f | ||
|
|
c3cada38e2 | ||
|
|
360e311b66 | ||
|
|
873e6ac54b | ||
|
|
d27b0ad4c8 | ||
|
|
cd221a62ee | ||
|
|
3c69afca4c | ||
|
|
89099b0cf7 | ||
|
|
afd33539dd | ||
|
|
77e652d8ad | ||
|
|
da4970ead2 | ||
|
|
099b354ca7 | ||
|
|
5aeb3687c4 | ||
|
|
b6e306f189 | ||
|
|
1d3b253329 | ||
|
|
d79eee05ef | ||
|
|
2c1b56f4c1 | ||
|
|
c4bd3b1f21 | ||
|
|
e2946d962d | ||
|
|
e96d36d4cd | ||
|
|
abc815798b | ||
|
|
5707537592 | ||
|
|
3a5d6a3c38 | ||
|
|
f2c86384f4 | ||
|
|
22a035db95 | ||
|
|
e0cb73b46b | ||
|
|
1ae2567861 | ||
|
|
094f4a56c8 | ||
|
|
1a47844529 | ||
|
|
36369ab63c | ||
|
|
28fcb5ca2a | ||
|
|
b24cf21235 | ||
|
|
c1c7f06c35 | ||
|
|
22d02c9855 | ||
|
|
41dcd047d7 | ||
|
|
8288b0aec2 | ||
|
|
7ff5fadcc0 | ||
|
|
e8cef383b7 | ||
|
|
35df3a32eb | ||
|
|
db62719eda | ||
|
|
fdcad686ee | ||
|
|
30d97c03ce | ||
|
|
e08f54e9eb | ||
|
|
c7f54b11ec | ||
|
|
bebc5a2147 | ||
|
|
00c29c2cae | ||
|
|
2f56d91063 | ||
|
|
2a44a70142 | ||
|
|
f253e19296 | ||
|
|
2ee306e44a | ||
|
|
37197b602b | ||
|
|
27f0434233 | ||
|
|
40e4637d79 | ||
|
|
cc1ab64f29 | ||
|
|
2d7ed54ba2 | ||
|
|
e1fca8866e | ||
|
|
9b0aaf5113 | ||
|
|
3585ff585e | ||
|
|
037a463fd5 | ||
|
|
efeb89dcdb | ||
|
|
457204cb83 | ||
|
|
0322c66a3f | ||
|
|
3ac4045272 | ||
|
|
66556d0e05 | ||
|
|
034aa3b2c0 | ||
|
|
58026905ae | ||
|
|
ed783872ab | ||
|
|
bcfff64f9e | ||
|
|
f57260a997 | ||
|
|
2cd32be70b | ||
|
|
bbf9109e25 | ||
|
|
2a1bff67fd | ||
|
|
0835667329 | ||
|
|
b5d6b52a4d | ||
|
|
d041a528da | ||
|
|
cb26784d07 | ||
|
|
ff4a289572 | ||
|
|
3f715e1701 | ||
|
|
9216abe28d | ||
|
|
0fd953c217 | ||
|
|
e68e165a23 | ||
|
|
78390e4189 | ||
|
|
64bb7576eb | ||
|
|
40a16282c7 | ||
|
|
6136f4fdd4 | ||
|
|
e617711306 | ||
|
|
334e76537f | ||
|
|
b517ab349b | ||
|
|
646872cb3b | ||
|
|
3dfc391a61 | ||
|
|
3d0741f027 | ||
|
|
33a94f5dc7 | ||
|
|
6bc684ab6a | ||
|
|
283e33dea4 | ||
|
|
a5a3ba2b80 | ||
|
|
5ba0ebe7c9 | ||
|
|
7d980d74e5 | ||
|
|
5d51a7f12c | ||
|
|
7aa5bc9558 | ||
|
|
a09cbf9905 | ||
|
|
88654762da | ||
|
|
718521d5cf | ||
|
|
b5a7603822 | ||
|
|
25a03c02d6 | ||
|
|
0851d5d210 | ||
|
|
6a15e14cda | ||
|
|
ccaace03c9 | ||
|
|
c3410804cd | ||
|
|
abd1c1af7a | ||
|
|
1395056fc0 | ||
|
|
48bf5ec216 | ||
|
|
f43ae88892 | ||
|
|
8d6bd5691b | ||
|
|
998f2efc58 | ||
|
|
c028d96089 | ||
|
|
0ba32f99bd | ||
|
|
7a9d9cde94 | ||
|
|
eb0cc2573a | ||
|
|
2f656ce447 | ||
|
|
b1e080c752 | ||
|
|
5624219b6b | ||
|
|
7df811cfe5 | ||
|
|
bb3ff8e9d9 | ||
|
|
84df9142e7 | ||
|
|
21d692d054 | ||
|
|
8456bbbadb | ||
|
|
b507b82326 | ||
|
|
1fa15ceee6 | ||
|
|
548a487800 | ||
|
|
43790e009b | ||
|
|
1fbfab27a9 | ||
|
|
448d9d943c | ||
|
|
5a71eb5985 | ||
|
|
747e3290c0 | ||
|
|
f519fca72b | ||
|
|
86ae94462e | ||
|
|
997c335970 | ||
|
|
ebf6ada5ee | ||
|
|
0bb61b04ca | ||
|
|
0dbd68145f | ||
|
|
00228f2506 | ||
|
|
e7e64c3277 | ||
|
|
5fa27574dd | ||
|
|
a910bcee43 | ||
|
|
8d06c02ffd | ||
|
|
86da77cb9b | ||
|
|
92fc6add43 | ||
|
|
2d74ef9682 | ||
|
|
ccc7563ac5 | ||
|
|
575bc44c3f | ||
|
|
ccb408ee6a | ||
|
|
6761c64d60 | ||
|
|
7a54032408 | ||
|
|
ce12562710 | ||
|
|
b6ed8244b4 | ||
|
|
73693b5de6 | ||
|
|
df9a5e13c6 | ||
|
|
6a2916df80 | ||
|
|
a226f6af6b | ||
|
|
ee7da3c7c5 | ||
|
|
b6b8578a67 | ||
|
|
a80d875916 | ||
|
|
83ef39e055 | ||
|
|
b789a26804 | ||
|
|
83dfb40f66 | ||
|
|
97605c7b27 | ||
|
|
069a0b04d7 | ||
|
|
c4fc8b70ec | ||
|
|
7aa3557d31 | ||
|
|
5005b27fc8 | ||
|
|
989e419328 | ||
|
|
e55deb21c5 | ||
|
|
ae9688f313 | ||
|
|
2a54b70d45 | ||
|
|
a071629fec | ||
|
|
2f9401b061 | ||
|
|
09653e1f82 | ||
|
|
fae0493f98 | ||
|
|
886445ce4d | ||
|
|
595e890391 | ||
|
|
b6a517c47d | ||
|
|
518149e868 | ||
|
|
56a1757d74 | ||
|
|
9091351dbe | ||
|
|
02f76dae2d | ||
|
|
131e5a1a4a | ||
|
|
70195a5ff7 | ||
|
|
09a19c33a8 | ||
|
|
1a1ce6ff61 | ||
|
|
2150f13d65 | ||
|
|
7268b10203 | ||
|
|
8d4ef391b0 | ||
|
|
bda21407dd | ||
|
|
33ca7e3c8d | ||
|
|
217d984abc | ||
|
|
08348b4e48 | ||
|
|
cbb14ed47e | ||
|
|
6a5717dc74 | ||
|
|
0f61343893 | ||
|
|
3bfc9b47ca | ||
|
|
4ede059de1 | ||
|
|
bf185c3c28 | ||
|
|
661e710092 | ||
|
|
0eb69b7552 | ||
|
|
0b9e3dcd06 | ||
|
|
6bf8273bc0 | ||
|
|
4013b8feca | ||
|
|
75bd2bd32d | ||
|
|
4670d7d5ce | ||
|
|
c3859a2b58 | ||
|
|
ba95e43ea2 | ||
|
|
dc7dbc2df7 | ||
|
|
05c365fb16 | ||
|
|
691614bd2c | ||
|
|
a2d94fc216 | ||
|
|
8adf5cc70f | ||
|
|
1660e749b4 | ||
|
|
a11bd82dc3 | ||
|
|
c54ae65c83 | ||
|
|
69a8a4e1f3 | ||
|
|
678a17ba79 | ||
|
|
e3d053e14e | ||
|
|
9a48688d37 | ||
|
|
0451940fa4 | ||
|
|
910e013506 | ||
|
|
f81e413180 | ||
|
|
28d5572658 | ||
|
|
2aba40d208 | ||
|
|
a4d6e8fef0 | ||
|
|
7875437ca0 | ||
|
|
f363e533aa | ||
|
|
06d774bf58 | ||
|
|
b300a84989 | ||
|
|
dacb469bc9 | ||
|
|
62b710072e | ||
|
|
dd2b45feed | ||
|
|
be78aea6b3 | ||
|
|
75a94b9662 | ||
|
|
968ec1c2ae | ||
|
|
e2ca478485 | ||
|
|
f899e08946 | ||
|
|
4c05fb03a3 | ||
|
|
577a07a86e | ||
|
|
2c5568a757 | ||
|
|
6c3e5b85bc | ||
|
|
aad9a04da4 | ||
|
|
da50419df8 | ||
|
|
73ef5371e4 | ||
|
|
8a9228ed9b | ||
|
|
b1bd53aa6b | ||
|
|
d6c9596fd8 | ||
|
|
17fe7f354e | ||
|
|
e5f71aa6b2 | ||
|
|
44f592dceb | ||
|
|
2b890ae618 | ||
|
|
d217217842 | ||
|
|
f100d1494c | ||
|
|
d14105f158 | ||
|
|
c0882ef4d9 | ||
|
|
9d1dbd1ec0 | ||
|
|
a8c0405cf5 | ||
|
|
4f53178e62 | ||
|
|
1131a984a6 | ||
|
|
46bcb70969 | ||
|
|
c07404f6a1 | ||
|
|
ba32ded021 | ||
|
|
3b8da4be5a | ||
|
|
2f28ccbea3 | ||
|
|
7a4bd337d9 | ||
|
|
07a247dcf4 | ||
|
|
fa5a8f055a | ||
|
|
ef3ac9d05a | ||
|
|
d7b75e8d86 | ||
|
|
5e89ded685 | ||
|
|
5f85662ad8 | ||
|
|
d37ee89ca8 | ||
|
|
0f3c4c8ff4 | ||
|
|
a524a26fdc | ||
|
|
dacb23277e | ||
|
|
a5d4545083 | ||
|
|
40d1e2f8c7 | ||
|
|
87294c84a6 | ||
|
|
6ed7a7281c | ||
|
|
62f9093b31 | ||
|
|
8ed26120c8 | ||
|
|
950158f6d1 | ||
|
|
ee0459300b | ||
|
|
3ec81fc00f | ||
|
|
2b917291d9 | ||
|
|
09b9e951e3 | ||
|
|
1a325ef71c | ||
|
|
1a97fd8b4e | ||
|
|
c61170e87d | ||
|
|
318e65e0ae | ||
|
|
f629fe95c8 | ||
|
|
66b215b742 | ||
|
|
25141b69d4 | ||
|
|
ff33798acd | ||
|
|
463738ccbe | ||
|
|
6744d776ba | ||
|
|
1f48f47ab7 | ||
|
|
3dddd34133 | ||
|
|
4a164d2c46 | ||
|
|
fd9401f260 | ||
|
|
3f80696ae1 | ||
|
|
5c372d19e3 | ||
|
|
4bbc97be5e | ||
|
|
a3256d78d8 | ||
|
|
33adce5c3a | ||
|
|
79b900375f | ||
|
|
f670613e4b | ||
|
|
6015422ee6 | ||
|
|
32ffce04fc | ||
|
|
e5b2ef47d5 | ||
|
|
2dde1b1028 | ||
|
|
a792cd357d | ||
|
|
80200a1828 | ||
|
|
c7c2054bb5 | ||
|
|
1d0238375d | ||
|
|
2b72163028 | ||
|
|
04f954956d | ||
|
|
5b1106c56b | ||
|
|
7129d998db | ||
|
|
b9ea40c30d | ||
|
|
884c075058 | ||
|
|
a278414d1b | ||
|
|
f953c60705 | ||
|
|
50968a0a3e | ||
|
|
84543c8be2 | ||
|
|
5fbcfe5eb4 | ||
|
|
e4fe611e2c | ||
|
|
c8e8f93d6c | ||
|
|
1a16fb1532 | ||
|
|
7cff898e0a | ||
|
|
6c43c49e4a | ||
|
|
eb669f989f | ||
|
|
2a7115daca | ||
|
|
3718bf654b | ||
|
|
40c9923a8a | ||
|
|
1d23430628 | ||
|
|
1e80bddde3 | ||
|
|
0e8fc31087 | ||
|
|
a62e924656 | ||
|
|
18e6f67426 | ||
|
|
07ac4f7e02 | ||
|
|
9624a1ea3d | ||
|
|
6fbedf5a4e | ||
|
|
bebb89acfa | ||
|
|
9c064b5a97 | ||
|
|
1372156c41 | ||
|
|
aeb5494a0b | ||
|
|
00dfe18487 | ||
|
|
a8e8837ba7 | ||
|
|
78a51abc12 | ||
|
|
d2e95492e7 | ||
|
|
235e83aba6 | ||
|
|
68897c52f3 | ||
|
|
20f387fafa | ||
|
|
7718749fee | ||
|
|
5379d2b594 | ||
|
|
5912ad877c | ||
|
|
2b6e3de02f | ||
|
|
6f23e945f6 | ||
|
|
72510c80e1 | ||
|
|
7b3d7acebe | ||
|
|
5c13765ee3 | ||
|
|
c7167fee0e | ||
|
|
f6003f0873 | ||
|
|
3551dea887 | ||
|
|
d8a3bdaa24 | ||
|
|
d402adc3d7 | ||
|
|
ea7087ef31 | ||
|
|
36f5a10198 | ||
|
|
10b69810d1 | ||
|
|
d6105b53b8 | ||
|
|
703251f10f | ||
|
|
39211ba46b | ||
|
|
2986253259 | ||
|
|
d5de1a8220 | ||
|
|
87ca15c4e8 | ||
|
|
2c9e4fa417 | ||
|
|
7d1391d049 | ||
|
|
feef39e2d1 | ||
|
|
f4e12272f1 | ||
|
|
a95e1a273e | ||
|
|
bf3f6688f0 | ||
|
|
473c8380ea | ||
|
|
42a3352a3b | ||
|
|
6f960b83ff | ||
|
|
b7716c0328 | ||
|
|
2630d97c62 | ||
|
|
512ba0ac76 | ||
|
|
06447e0a39 | ||
|
|
bbb8854bf7 | ||
|
|
e9b12cc1f7 | ||
|
|
eaeb6ca93a | ||
|
|
aa1ba8bbd2 | ||
|
|
e49945ced4 | ||
|
|
25d05c4b8f | ||
|
|
53121c0119 | ||
|
|
b67c983291 | ||
|
|
e3a184785c | ||
|
|
3b76df64fc | ||
|
|
8bfe739cd2 | ||
|
|
6498dadc2f | ||
|
|
d6e596174d | ||
|
|
748c4c4599 | ||
|
|
833efb39bf | ||
|
|
e939b087fe | ||
|
|
1744b5b5d2 | ||
|
|
91bf925fc1 | ||
|
|
0483430283 | ||
|
|
097d1e8823 | ||
|
|
fec4c334ba | ||
|
|
ddabc992fa | ||
|
|
8191f373be | ||
|
|
6a9ca88e7e | ||
|
|
4e8e5888d7 | ||
|
|
675e0a2224 | ||
|
|
fc6a0ebb1c | ||
|
|
643b697649 | ||
|
|
1f70bd4134 | ||
|
|
096e355f8e | ||
|
|
be80fb49fc | ||
|
|
7a8176587b | ||
|
|
4a92b590a0 | ||
|
|
ee6c69733a | ||
|
|
7f4826890c | ||
|
|
de057ebe54 | ||
|
|
af2161cdb4 | ||
|
|
a05a683d83 | ||
|
|
5da90fc8dd | ||
|
|
01bd577288 | ||
|
|
89a3dc35a3 | ||
|
|
995f202cea | ||
|
|
8d22967bd9 | ||
|
|
3394379319 | ||
|
|
0d2a532fc3 | ||
|
|
0b81a18d12 | ||
|
|
2d072b38c1 | ||
|
|
cd2b667ac8 | ||
|
|
3bd16457e1 | ||
|
|
63102ee43d | ||
|
|
7c47d3e663 | ||
|
|
166b56bc61 | ||
|
|
2f2801f096 | ||
|
|
d962fe6a99 | ||
|
|
c34e55c62b | ||
|
|
7d5b17087f | ||
|
|
a6d08be9b2 | ||
|
|
283c51cd5e | ||
|
|
d51803a728 | ||
|
|
fd872aefb3 | ||
|
|
0b5546d182 | ||
|
|
2d5bb375b7 | ||
|
|
a02ec09511 | ||
|
|
c912b1d28c | ||
|
|
74f64838c5 | ||
|
|
737644366f | ||
|
|
dc45aaeb93 | ||
|
|
8db269e055 | ||
|
|
b89d3f78b2 | ||
|
|
48308ed801 | ||
|
|
cfb34d808b | ||
|
|
df838736e2 | ||
|
|
14ea7c7ec7 | ||
|
|
532fdf24cb | ||
|
|
165150e896 | ||
|
|
7be1eaad1e | ||
|
|
58c1dbff19 | ||
|
|
cf7e73addd | ||
|
|
10bdd8e378 | ||
|
|
7781f865cb | ||
|
|
72abfa11dd | ||
|
|
4d07064a3d | ||
|
|
2bba4ee2cf | ||
|
|
317384b397 | ||
|
|
726bd5f077 | ||
|
|
a350c25a39 | ||
|
|
4d7e230d2f | ||
|
|
bc0ad363c6 | ||
|
|
2d41dc9622 | ||
|
|
644d0f91d2 | ||
|
|
988344daf1 | ||
|
|
d7eeee0c1d | ||
|
|
513c372960 | ||
|
|
86e0ed81f8 | ||
|
|
158fea0f5e | ||
|
|
03f2997a11 | ||
|
|
b0143de177 | ||
|
|
b97911dd18 | ||
|
|
c4179dd470 | ||
|
|
1108b4f218 | ||
|
|
b8245cc325 | ||
|
|
2b457f8e5e | ||
|
|
30242b7565 | ||
|
|
9d51f7c457 | ||
|
|
f54137606e | ||
|
|
fb8dccc23e | ||
|
|
91414e0042 | ||
|
|
1aa89fb855 | ||
|
|
80eba21ad0 | ||
|
|
eb0ae602bd | ||
|
|
ffd770ce94 | ||
|
|
3cb0a237c1 | ||
|
|
49960adbdd | ||
|
|
57239f4a81 | ||
|
|
dd5d390daf | ||
|
|
3da495e6b9 | ||
|
|
0ea7ab4f62 | ||
|
|
92b7f7b650 | ||
|
|
f197813f37 | ||
|
|
0f5cc504fe | ||
|
|
4c859181da | ||
|
|
c36ab19902 | ||
|
|
b575fb1d02 | ||
|
|
8c8a79cec1 | ||
|
|
af4ef540bf | ||
|
|
791e5cfb69 | ||
|
|
4f1a7e51c1 | ||
|
|
bc68fc2fe7 | ||
|
|
483aaad10a | ||
|
|
1b40abbf99 | ||
|
|
01b5333e44 | ||
|
|
aa45ad2aac | ||
|
|
56df5ef1d7 | ||
|
|
1bcd82e31b | ||
|
|
09ad31aa85 | ||
|
|
1841d6d4c3 | ||
|
|
7b7d2a9fa5 | ||
|
|
e02b784ec3 | ||
|
|
c38295f0a0 | ||
|
|
6772f653c3 | ||
|
|
1bc81f7889 | ||
|
|
78f83d6f6a | ||
|
|
01e7298fe6 | ||
|
|
f48011119e | ||
|
|
ed6b9d08f1 | ||
|
|
74f91ed06c | ||
|
|
65716e99a5 | ||
|
|
d94f6ba965 | ||
|
|
a86c9f037b | ||
|
|
0968e925a0 | ||
|
|
044efea965 | ||
|
|
2e4e4cb74d | ||
|
|
41434a8a85 | ||
|
|
92693b50eb | ||
|
|
9bc9396e88 | ||
|
|
393bc3b16b | ||
|
|
81d340984a | ||
|
|
bff8cbad06 | ||
|
|
7a570e50ef | ||
|
|
7168afde5e | ||
|
|
2062ee2d26 | ||
|
|
fc3660285f | ||
|
|
0e68882604 | ||
|
|
6971146ca9 | ||
|
|
9642fd7a93 | ||
|
|
28e02996df | ||
|
|
3d1e857327 | ||
|
|
de7d92c259 | ||
|
|
002cf0d1c9 | ||
|
|
bc2b802751 | ||
|
|
e7a468c5b7 | ||
|
|
6a31b7be3e | ||
|
|
65a7113a36 | ||
|
|
f9ad25e4d8 | ||
|
|
8ef3181f15 | ||
|
|
7a07d6aa2b | ||
|
|
a9a6710e15 | ||
|
|
e91f255301 | ||
|
|
db5c14de42 | ||
|
|
b224771f40 | ||
|
|
8e03333f06 | ||
|
|
6c08943d9f | ||
|
|
134d750eab | ||
|
|
7bd551b3a9 | ||
|
|
bb0e73c191 | ||
|
|
2db4a04827 | ||
|
|
dd514de8a9 | ||
|
|
9a7ece9caf | ||
|
|
edc679f6c6 | ||
|
|
53d498ef06 | ||
|
|
e10e126cd0 | ||
|
|
70eb70f5f8 | ||
|
|
7b98de1f15 | ||
|
|
afb81b7ded | ||
|
|
95b8961a9b | ||
|
|
f7329619da | ||
|
|
46aa9772fc | ||
|
|
f1f7181f53 | ||
|
|
1f69a1b65f | ||
|
|
ab9b749b45 | ||
|
|
5a51366ea5 | ||
|
|
fcf59e1c37 | ||
|
|
97a1f1c273 | ||
|
|
9550be925d | ||
|
|
b1a9afe9a9 | ||
|
|
e29c9676b1 | ||
|
|
eecd914864 | ||
|
|
1ca8c1ec97 | ||
|
|
0423b66187 | ||
|
|
048c4d6efd | ||
|
|
b72ffcb05e | ||
|
|
25f69cb932 | ||
|
|
c5b98a58b8 | ||
|
|
456e038a4e | ||
|
|
86d45a3c83 | ||
|
|
8fd4241377 | ||
|
|
a685a6beed | ||
|
|
3149b5b148 | ||
|
|
b2100b83ad | ||
|
|
f05fb449b8 | ||
|
|
1d566417d2 | ||
|
|
836e92a051 | ||
|
|
b084133dbf | ||
|
|
6d30683113 | ||
|
|
5e7de771e3 | ||
|
|
73220d2bb0 | ||
|
|
5f17de3393 | ||
|
|
14f0fde51f | ||
|
|
46177c8d64 | ||
|
|
6d6413f768 | ||
|
|
5a427a94a9 | ||
|
|
a81b88bef7 | ||
|
|
551ff11d0d | ||
|
|
8768c063f5 | ||
|
|
e09eb835db | ||
|
|
3b614a2358 | ||
|
|
dd6dcad6c2 | ||
|
|
0bb5de05a1 | ||
|
|
769685e74e | ||
|
|
7f87cc3a3b | ||
|
|
5707004d6b | ||
|
|
e5f25622e2 | ||
|
|
9f5740cbc1 | ||
|
|
5238e03fe1 | ||
|
|
9a54c3e32b | ||
|
|
05bbca079a | ||
|
|
6093eb9ff5 | ||
|
|
57b263c5b9 | ||
|
|
23aed8f2e4 | ||
|
|
0b9e0abc96 | ||
|
|
c23e0be18f | ||
|
|
ec35068edc | ||
|
|
60d9df11c1 | ||
|
|
4e49fd5eb9 | ||
|
|
667fcc2b53 | ||
|
|
4416a5dcff | ||
|
|
c561eeb7bf | ||
|
|
174edf976b | ||
|
|
2c50fc878e | ||
|
|
7ea35bfa1c | ||
|
|
deea866bbd | ||
|
|
5cf4558c0a | ||
|
|
dac58d7c35 | ||
|
|
1032441c6f | ||
|
|
1da10a7358 | ||
|
|
2de8cc9122 | ||
|
|
ab3dc0b0fe | ||
|
|
6afea46838 | ||
|
|
3fccef6f50 | ||
|
|
fedaf19262 | ||
|
|
dfa80b2060 | ||
|
|
bee9efc203 | ||
|
|
68ac5c1738 | ||
|
|
56a3ada670 | ||
|
|
b2075cb7a2 | ||
|
|
3284842045 | ||
|
|
e9bea614ec | ||
|
|
609b3337a7 | ||
|
|
1031b31571 | ||
|
|
8e6faab51e | ||
|
|
ac2e6e0d03 | ||
|
|
caa54d888f | ||
|
|
93485d86bc | ||
|
|
002824e32d | ||
|
|
2cd32cad27 | ||
|
|
d9d05dd96e | ||
|
|
af199b4658 | ||
|
|
c36c09169e | ||
|
|
7a01cb8e4b | ||
|
|
91e1375ba9 | ||
|
|
055000a424 | ||
|
|
1d9256f7db | ||
|
|
8cb9bfab87 | ||
|
|
f442a5a5b3 | ||
|
|
72d2cf642e | ||
|
|
6270d851e3 | ||
|
|
5cd00068c0 | ||
|
|
257b640463 | ||
|
|
584832cb3c | ||
|
|
e644f60907 | ||
|
|
95fceb6452 | ||
|
|
ed69cbeef0 | ||
|
|
96a24b05cc | ||
|
|
203490017f | ||
|
|
9ff269a1d3 | ||
|
|
1e6fe6f046 | ||
|
|
8eb127022b | ||
|
|
d5e2ec7447 | ||
|
|
d90a2dac5e | ||
|
|
485823b5f5 | ||
|
|
2af04f1a57 | ||
|
|
6fa35bbd28 | ||
|
|
46fc23f91c | ||
|
|
6f0992c05b | ||
|
|
f3643eec57 | ||
|
|
670db7988d | ||
|
|
180156ba1a | ||
|
|
5c4901b83a | ||
|
|
f268db1c4b | ||
|
|
a08d2ff0c9 | ||
|
|
f3352e0fb0 | ||
|
|
f7cb755299 | ||
|
|
dc26459b99 | ||
|
|
cd042dbbfd | ||
|
|
ac5d706a94 | ||
|
|
0606a0a39b | ||
|
|
f36c0c2c65 | ||
|
|
7151bd8768 | ||
|
|
1304e1fb5e | ||
|
|
17b9fbed34 | ||
|
|
8ce46f9d89 | ||
|
|
832380c455 | ||
|
|
06a2bc7c9c | ||
|
|
f6f057bb7d | ||
|
|
8e1f1ba6a6 | ||
|
|
c8b4c4b48a | ||
|
|
6eff3e5185 | ||
|
|
4252af6897 | ||
|
|
2f5f56a820 | ||
|
|
0a537cb2d8 | ||
|
|
b35d1a122e | ||
|
|
9e3f3a2d27 | ||
|
|
de345eff2e | ||
|
|
17e93ba148 | ||
|
|
3628f7655d | ||
|
|
eeac46f980 | ||
|
|
c569cfe12a | ||
|
|
8cc9232b9a | ||
|
|
2d35c0cb5f | ||
|
|
33e2373f01 | ||
|
|
6d8b1dce06 | ||
|
|
1dfaafe28a | ||
|
|
b6fdf7468c | ||
|
|
9d6f1ad398 | ||
|
|
bfb3fcd94f | ||
|
|
2d523332b3 | ||
|
|
e6ee18d6b4 | ||
|
|
2ac59e5d36 | ||
|
|
9a21b38ccc | ||
|
|
964a95bf5e | ||
|
|
72ab7879f7 | ||
|
|
e88535634d | ||
|
|
970751ece3 | ||
|
|
6323851ea9 | ||
|
|
bb8811c655 | ||
|
|
5cc0dd5f44 | ||
|
|
a9cc6a06b9 | ||
|
|
d69946183d | ||
|
|
24d291cf16 | ||
|
|
53f60e0afc | ||
|
|
414efa47d3 | ||
|
|
c2bf2f56ef | ||
|
|
a4c76f8d34 | ||
|
|
16ec450ca1 | ||
|
|
019fff9a00 | ||
|
|
86d6201d7b | ||
|
|
d904c8ac8f | ||
|
|
010afe1619 | ||
|
|
8e599bc098 | ||
|
|
c6a12d1dc6 | ||
|
|
4d4211c04e | ||
|
|
d2cba52015 | ||
|
|
fb00a4af2b | ||
|
|
5b2d287878 | ||
|
|
e8e1d504d6 | ||
|
|
62a1c911cd | ||
|
|
093f2b3c01 | ||
|
|
3ca1ae2bb7 | ||
|
|
2e39cc40a4 | ||
|
|
23f69ab936 | ||
|
|
6c9cf117c1 | ||
|
|
bcdcdace48 | ||
|
|
659fc9c159 | ||
|
|
d5d7798b9d | ||
|
|
f535378995 | ||
|
|
4ab63a3f6f | ||
|
|
727ff26960 | ||
|
|
1cbb79cdfd | ||
|
|
bcbde37a11 | ||
|
|
f933f69021 | ||
|
|
5d82e47ef6 | ||
|
|
af8436b196 | ||
|
|
d7742d22e4 | ||
|
|
ac26e1aaf3 | ||
|
|
492fe7ce02 | ||
|
|
b70db60e4d | ||
|
|
5fb04ab2da | ||
|
|
d9a6f86cc0 | ||
|
|
70dc14e4e1 | ||
|
|
c301f99208 | ||
|
|
eb6d9aea0e | ||
|
|
040cf33e8f | ||
|
|
c85fbfd0b7 | ||
|
|
dc413dbe8a | ||
|
|
783018d8f6 | ||
|
|
99cde88341 | ||
|
|
ff0a83aaf8 | ||
|
|
aedea349aa | ||
|
|
f0fdefa96f | ||
|
|
d93b71a301 | ||
|
|
ef66f2887b | ||
|
|
85b6d82b49 | ||
|
|
64ce78c2ec | ||
|
|
0ce5d32be5 | ||
|
|
922b5f527b | ||
|
|
6b5dff875e | ||
|
|
00f9ef6c76 | ||
|
|
264f8141f8 | ||
|
|
11bb71c8fc | ||
|
|
ff1157bcbf | ||
|
|
ec469700dc | ||
|
|
6e0a86194c | ||
|
|
e1d219e5c9 | ||
|
|
2c3224924b | ||
|
|
e80ed948e1 | ||
|
|
6720b38fbf | ||
|
|
3f1ee45833 | ||
|
|
70be6f6531 | ||
|
|
4a2ddfb81d | ||
|
|
d142165942 | ||
|
|
a4e4542b89 | ||
|
|
3ba8a3ab1a | ||
|
|
483082ef6e | ||
|
|
bd21aba181 | ||
|
|
9de155d153 | ||
|
|
32088c06a1 | ||
|
|
2053478c56 | ||
|
|
d0db95f730 | ||
|
|
ba27c8a7de | ||
|
|
4281eb1e2c | ||
|
|
102fa96a96 | ||
|
|
1bc207c528 | ||
|
|
8ed1553d20 | ||
|
|
699634890a | ||
|
|
15084cf1ac | ||
|
|
d4a9e61569 | ||
|
|
d8d37349c3 | ||
|
|
e8ccc06fe5 | ||
|
|
963f2d2a8f | ||
|
|
c5d25bf1d0 | ||
|
|
e4f83bae5d | ||
|
|
10bea90c4a | ||
|
|
d3f533b395 | ||
|
|
3fde202215 | ||
|
|
211d350fc3 | ||
|
|
bd2e5a788a | ||
|
|
120e13b1b6 | ||
|
|
a68864b6bc | ||
|
|
8217281ae4 | ||
|
|
c8d5f21941 | ||
|
|
7b3044d086 | ||
|
|
acce4dd050 | ||
|
|
b908e071a8 | ||
|
|
c1d900af61 | ||
|
|
12f8bd12a2 | ||
|
|
270c4e1ecd | ||
|
|
c4e47630b1 | ||
|
|
f50bb1e6f3 | ||
|
|
ddf64babde | ||
|
|
df15fbc452 | ||
|
|
9bcadb7fd1 | ||
|
|
b4d87fff4a | ||
|
|
2bad3e78d9 | ||
|
|
7802a6bb1c | ||
|
|
4bf9eaf77a | ||
|
|
291069e885 | ||
|
|
47ca9dc809 | ||
|
|
55a5204319 | ||
|
|
4865e1e732 | ||
|
|
9dfbd4fe8d | ||
|
|
5973bcf939 | ||
|
|
c8d94ae944 | ||
|
|
9c8f7dfe94 | ||
|
|
9aa6fae123 | ||
|
|
9ac8e8c6a1 | ||
|
|
6da5d87f92 | ||
|
|
9801c959e6 | ||
|
|
21b5345782 | ||
|
|
fecea26d93 | ||
|
|
02db1228ed | ||
|
|
412bb5a631 | ||
|
|
0f8d26c6a9 | ||
|
|
cfa21f8123 | ||
|
|
6850eab33b | ||
|
|
aa8c6a251e | ||
|
|
5b0a9ee003 | ||
|
|
639b1d864a | ||
|
|
44d4674955 | ||
|
|
5eb2790be0 | ||
|
|
8328caa618 | ||
|
|
2f9e6314b1 | ||
|
|
e58827d2ed | ||
|
|
c10021c00a | ||
|
|
b114e6fd3b | ||
|
|
869b4443ac | ||
|
|
e3a15a03a4 | ||
|
|
bd207ce11e | ||
|
|
acf6f7af6b | ||
|
|
cc482e32f1 | ||
|
|
19e437daf0 | ||
|
|
1cf85bd875 | ||
|
|
369d6d1ae3 | ||
|
|
0caa4b1531 | ||
|
|
cb4e53ff7f | ||
|
|
9f94e030c1 | ||
|
|
3aeeca32af | ||
|
|
4beb447e27 | ||
|
|
5358c38589 | ||
|
|
6a44ccb58b | ||
|
|
06971223ef | ||
|
|
9b6c72958a | ||
|
|
73aec9219b | ||
|
|
7b68cf2e0f | ||
|
|
c33479324c | ||
|
|
ee37eb4eed | ||
|
|
7cae8918c0 | ||
|
|
8dca9f97e3 | ||
|
|
34340458cb | ||
|
|
690bc950f7 | ||
|
|
f2a842294f | ||
|
|
7ce932edd3 | ||
|
|
62f7e77711 | ||
|
|
47076bf00e | ||
|
|
ebd3388ee6 | ||
|
|
ed8ade9c65 | ||
|
|
4c5e96aab6 | ||
|
|
db237d0c75 | ||
|
|
0b7169d1f7 | ||
|
|
234a1094b7 | ||
|
|
915e7667cd | ||
|
|
2832175a68 | ||
|
|
df79c00901 | ||
|
|
6d472d8375 | ||
|
|
b3b722905f | ||
|
|
5b7713dd33 | ||
|
|
63fb66f53a | ||
|
|
6a75e7e0d5 | ||
|
|
34057cff23 | ||
|
|
0461f0153e | ||
|
|
aed4cb1269 | ||
|
|
c1a42c2d0d | ||
|
|
0366478df8 | ||
|
|
3cfd16f3af | ||
|
|
67f44365ea | ||
|
|
a282eb1363 | ||
|
|
7832485575 | ||
|
|
8fe2532e70 | ||
|
|
9f013a9d86 | ||
|
|
79b69b7444 | ||
|
|
d920d57f38 | ||
|
|
bd7d901da9 | ||
|
|
bbdabbb379 | ||
|
|
c5e6900400 | ||
|
|
f894736d61 | ||
|
|
4f7714d72c | ||
|
|
01c55d37e6 | ||
|
|
0504c56ea7 | ||
|
|
b523771a24 | ||
|
|
2c3b13eded | ||
|
|
2ccb1c8634 | ||
|
|
780623261e | ||
|
|
91678f489a | ||
|
|
403a7cb6c3 | ||
|
|
b557662e58 | ||
|
|
8b9dc9f0df | ||
|
|
e76904af1b | ||
|
|
d726e864ac | ||
|
|
3358dfd5dd | ||
|
|
53749ff415 | ||
|
|
f9d71a1729 | ||
|
|
6639b7d6e8 | ||
|
|
0cb2ca5de2 | ||
|
|
213459d818 | ||
|
|
ee738321aa | ||
|
|
54bf582303 | ||
|
|
99093c0fe0 | ||
|
|
cfff40b1d4 | ||
|
|
515dee0baf | ||
|
|
715f6f049f | ||
|
|
8b0d1eb0f7 | ||
|
|
9900782e88 | ||
|
|
70404e07c2 | ||
|
|
addb7066e8 | ||
|
|
eadc377b3f | ||
|
|
29bb599e03 | ||
|
|
162ccb2938 | ||
|
|
5fae3750b5 | ||
|
|
b84cefe61d | ||
|
|
16dd82ed51 | ||
|
|
978c379ed7 | ||
|
|
25f2b8d824 | ||
|
|
f2471f31e0 | ||
|
|
afb35385bf | ||
|
|
14897600b7 | ||
|
|
cef81c9084 | ||
|
|
f7362772e3 | ||
|
|
d2e288ae50 | ||
|
|
bdcbc66a5c | ||
|
|
a1e08fb2a5 | ||
|
|
9c7d96697b | ||
|
|
e7e6d01810 | ||
|
|
b5d2078c4a | ||
|
|
3504ae47ca | ||
|
|
1c3e78319d | ||
|
|
9f9d8d2f62 | ||
|
|
b74887d5f2 | ||
|
|
6ffb208c77 | ||
|
|
994d1c60b9 | ||
|
|
b407948a77 | ||
|
|
a6c243617b | ||
|
|
51be91f15e | ||
|
|
95dd423cca | ||
|
|
c006ecace1 | ||
|
|
bfed274df3 | ||
|
|
b091b7e6ea | ||
|
|
fabd8474ff | ||
|
|
6752a69aa5 | ||
|
|
6d777e1bc7 | ||
|
|
1b82969559 | ||
|
|
75a7fa1919 | ||
|
|
3abd8470ca | ||
|
|
8dd17cbe80 | ||
|
|
4aac55f684 | ||
|
|
40e6250fc3 | ||
|
|
2ca55a3ae4 | ||
|
|
55aef139ff | ||
|
|
b8861b0c25 | ||
|
|
9a415fb1e2 | ||
|
|
ccc7b0ffea | ||
|
|
5e0a178df2 |
143
CMakeLists.txt
143
CMakeLists.txt
@@ -1,4 +1,4 @@
|
||||
project(Eigen)
|
||||
project(Eigen3)
|
||||
|
||||
cmake_minimum_required(VERSION 2.8.5)
|
||||
|
||||
@@ -8,6 +8,11 @@ if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR})
|
||||
message(FATAL_ERROR "In-source builds not allowed. Please make a new directory (called a build directory) and run CMake from there. You may need to remove CMakeCache.txt. ")
|
||||
endif()
|
||||
|
||||
# Alias Eigen_*_DIR to Eigen3_*_DIR:
|
||||
|
||||
set(Eigen_SOURCE_DIR ${Eigen3_SOURCE_DIR})
|
||||
set(Eigen_BINARY_DIR ${Eigen3_BINARY_DIR})
|
||||
|
||||
# guard against bad build-type strings
|
||||
|
||||
if (NOT CMAKE_BUILD_TYPE)
|
||||
@@ -93,9 +98,11 @@ else()
|
||||
endif()
|
||||
|
||||
option(EIGEN_BUILD_BTL "Build benchmark suite" OFF)
|
||||
if(NOT WIN32)
|
||||
|
||||
# Disable pkgconfig only for native Windows builds
|
||||
if(NOT WIN32 OR NOT CMAKE_HOST_SYSTEM_NAME MATCHES Windows)
|
||||
option(EIGEN_BUILD_PKGCONFIG "Build pkg-config .pc file for Eigen" ON)
|
||||
endif(NOT WIN32)
|
||||
endif()
|
||||
|
||||
set(CMAKE_INCLUDE_CURRENT_DIR ON)
|
||||
|
||||
@@ -120,13 +127,12 @@ endmacro(ei_add_cxx_compiler_flag)
|
||||
if(NOT MSVC)
|
||||
# We assume that other compilers are partly compatible with GNUCC
|
||||
|
||||
# clang outputs some warnings for unknwon flags that are not caught by check_cxx_compiler_flag
|
||||
# clang outputs some warnings for unknown flags that are not caught by check_cxx_compiler_flag
|
||||
# adding -Werror turns such warnings into errors
|
||||
check_cxx_compiler_flag("-Werror" COMPILER_SUPPORT_WERROR)
|
||||
if(COMPILER_SUPPORT_WERROR)
|
||||
set(CMAKE_REQUIRED_FLAGS "-Werror")
|
||||
endif()
|
||||
|
||||
ei_add_cxx_compiler_flag("-pedantic")
|
||||
ei_add_cxx_compiler_flag("-Wall")
|
||||
ei_add_cxx_compiler_flag("-Wextra")
|
||||
@@ -141,8 +147,11 @@ if(NOT MSVC)
|
||||
ei_add_cxx_compiler_flag("-Wwrite-strings")
|
||||
ei_add_cxx_compiler_flag("-Wformat-security")
|
||||
ei_add_cxx_compiler_flag("-Wshorten-64-to-32")
|
||||
ei_add_cxx_compiler_flag("-Wlogical-op")
|
||||
ei_add_cxx_compiler_flag("-Wenum-conversion")
|
||||
ei_add_cxx_compiler_flag("-Wc++11-extensions")
|
||||
ei_add_cxx_compiler_flag("-Wdouble-promotion")
|
||||
# ei_add_cxx_compiler_flag("-Wconversion")
|
||||
|
||||
# -Wshadow is insanely too strict with gcc, hopefully it will become usable with gcc 6
|
||||
# if(NOT CMAKE_COMPILER_IS_GNUCXX OR (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "5.0.0"))
|
||||
@@ -158,7 +167,7 @@ if(NOT MSVC)
|
||||
ei_add_cxx_compiler_flag("-fno-common")
|
||||
ei_add_cxx_compiler_flag("-fstrict-aliasing")
|
||||
ei_add_cxx_compiler_flag("-wd981") # disable ICC's "operands are evaluated in unspecified order" remark
|
||||
ei_add_cxx_compiler_flag("-wd2304") # disbale ICC's "warning #2304: non-explicit constructor with single argument may cause implicit type conversion" produced by -Wnon-virtual-dtor
|
||||
ei_add_cxx_compiler_flag("-wd2304") # disable ICC's "warning #2304: non-explicit constructor with single argument may cause implicit type conversion" produced by -Wnon-virtual-dtor
|
||||
|
||||
|
||||
# The -ansi flag must be added last, otherwise it is also used as a linker flag by check_cxx_compiler_flag making it fails
|
||||
@@ -221,6 +230,18 @@ if(NOT MSVC)
|
||||
message(STATUS "Enabling FMA in tests/examples")
|
||||
endif()
|
||||
|
||||
option(EIGEN_TEST_AVX512 "Enable/Disable AVX512 in tests/examples" OFF)
|
||||
if(EIGEN_TEST_AVX512)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -fabi-version=6 -DEIGEN_ENABLE_AVX512")
|
||||
message(STATUS "Enabling AVX512 in tests/examples")
|
||||
endif()
|
||||
|
||||
option(EIGEN_TEST_F16C "Enable/Disable F16C in tests/examples" OFF)
|
||||
if(EIGEN_TEST_F16C)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mf16c")
|
||||
message(STATUS "Enabling F16C in tests/examples")
|
||||
endif()
|
||||
|
||||
option(EIGEN_TEST_ALTIVEC "Enable/Disable AltiVec in tests/examples" OFF)
|
||||
if(EIGEN_TEST_ALTIVEC)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maltivec -mabi=altivec")
|
||||
@@ -240,7 +261,7 @@ if(NOT MSVC)
|
||||
else()
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon")
|
||||
endif()
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfloat-abi=softfp")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfloat-abi=hard")
|
||||
message(STATUS "Enabling NEON in tests/examples")
|
||||
endif()
|
||||
|
||||
@@ -250,7 +271,11 @@ if(NOT MSVC)
|
||||
message(STATUS "Enabling NEON in tests/examples")
|
||||
endif()
|
||||
|
||||
|
||||
option(EIGEN_TEST_ZVECTOR "Enable/Disable S390X(zEC13) ZVECTOR in tests/examples" OFF)
|
||||
if(EIGEN_TEST_ZVECTOR)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=z13 -mzvector")
|
||||
message(STATUS "Enabling S390X(zEC13) ZVECTOR in tests/examples")
|
||||
endif()
|
||||
|
||||
check_cxx_compiler_flag("-fopenmp" COMPILER_SUPPORT_OPENMP)
|
||||
if(COMPILER_SUPPORT_OPENMP)
|
||||
@@ -336,6 +361,8 @@ endif()
|
||||
|
||||
option(EIGEN_TEST_CXX11 "Enable testing with C++11 and C++11 features (e.g. Tensor module)." OFF)
|
||||
|
||||
set(EIGEN_CUDA_COMPUTE_ARCH 30 CACHE STRING "The CUDA compute architecture level to target when compiling CUDA code")
|
||||
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
|
||||
|
||||
# Backward compatibility support for EIGEN_INCLUDE_INSTALL_DIR
|
||||
@@ -353,7 +380,7 @@ else()
|
||||
)
|
||||
endif()
|
||||
set(CMAKEPACKAGE_INSTALL_DIR
|
||||
"${CMAKE_INSTALL_LIBDIR}/cmake/eigen3"
|
||||
"${CMAKE_INSTALL_DATADIR}/eigen3/cmake"
|
||||
CACHE PATH "The directory relative to CMAKE_PREFIX_PATH where Eigen3Config.cmake is installed"
|
||||
)
|
||||
set(PKGCONFIG_INSTALL_DIR
|
||||
@@ -383,7 +410,7 @@ if(EIGEN_BUILD_PKGCONFIG)
|
||||
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/eigen3.pc
|
||||
DESTINATION ${PKGCONFIG_INSTALL_DIR}
|
||||
)
|
||||
endif(EIGEN_BUILD_PKGCONFIG)
|
||||
endif()
|
||||
|
||||
add_subdirectory(Eigen)
|
||||
|
||||
@@ -409,6 +436,13 @@ else()
|
||||
add_subdirectory(lapack EXCLUDE_FROM_ALL)
|
||||
endif()
|
||||
|
||||
# add SYCL
|
||||
option(EIGEN_TEST_SYCL "Add Sycl support." OFF)
|
||||
if(EIGEN_TEST_SYCL)
|
||||
set (CMAKE_MODULE_PATH "${CMAKE_ROOT}/Modules" "cmake/Modules/" "${CMAKE_MODULE_PATH}")
|
||||
include(FindComputeCpp)
|
||||
endif()
|
||||
|
||||
add_subdirectory(unsupported)
|
||||
|
||||
add_subdirectory(demos EXCLUDE_FROM_ALL)
|
||||
@@ -473,18 +507,89 @@ set ( EIGEN_VERSION_MINOR ${EIGEN_MAJOR_VERSION} )
|
||||
set ( EIGEN_VERSION_PATCH ${EIGEN_MINOR_VERSION} )
|
||||
set ( EIGEN_DEFINITIONS "")
|
||||
set ( EIGEN_INCLUDE_DIR "${CMAKE_INSTALL_PREFIX}/${INCLUDE_INSTALL_DIR}" )
|
||||
set ( EIGEN_INCLUDE_DIRS ${EIGEN_INCLUDE_DIR} )
|
||||
set ( EIGEN_ROOT_DIR ${CMAKE_INSTALL_PREFIX} )
|
||||
|
||||
configure_file ( ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Eigen3Config.cmake.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
|
||||
@ONLY ESCAPE_QUOTES
|
||||
)
|
||||
# Interface libraries require at least CMake 3.0
|
||||
if (NOT CMAKE_VERSION VERSION_LESS 3.0)
|
||||
include (CMakePackageConfigHelpers)
|
||||
|
||||
install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/cmake/UseEigen3.cmake
|
||||
${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
|
||||
DESTINATION ${CMAKEPACKAGE_INSTALL_DIR}
|
||||
)
|
||||
# Imported target support
|
||||
add_library (eigen INTERFACE)
|
||||
|
||||
target_compile_definitions (eigen INTERFACE ${EIGEN_DEFINITIONS})
|
||||
target_include_directories (eigen INTERFACE
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
|
||||
$<INSTALL_INTERFACE:${INCLUDE_INSTALL_DIR}>
|
||||
)
|
||||
|
||||
# Export as title case Eigen
|
||||
set_target_properties (eigen PROPERTIES EXPORT_NAME Eigen)
|
||||
|
||||
install (TARGETS eigen EXPORT Eigen3Targets)
|
||||
|
||||
configure_package_config_file (
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmake/Eigen3Config.cmake.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
|
||||
PATH_VARS EIGEN_INCLUDE_DIR EIGEN_ROOT_DIR
|
||||
INSTALL_DESTINATION ${CMAKEPACKAGE_INSTALL_DIR}
|
||||
NO_CHECK_REQUIRED_COMPONENTS_MACRO # Eigen does not provide components
|
||||
)
|
||||
# Remove CMAKE_SIZEOF_VOID_P from Eigen3ConfigVersion.cmake since Eigen does
|
||||
# not depend on architecture specific settings or libraries. More
|
||||
# specifically, an Eigen3Config.cmake generated from a 64 bit target can be
|
||||
# used for 32 bit targets as well (and vice versa).
|
||||
set (_Eigen3_CMAKE_SIZEOF_VOID_P ${CMAKE_SIZEOF_VOID_P})
|
||||
unset (CMAKE_SIZEOF_VOID_P)
|
||||
write_basic_package_version_file (Eigen3ConfigVersion.cmake
|
||||
VERSION ${EIGEN_VERSION_NUMBER} COMPATIBILITY SameMajorVersion)
|
||||
set (CMAKE_SIZEOF_VOID_P ${_Eigen3_CMAKE_SIZEOF_VOID_P})
|
||||
|
||||
# The Eigen target will be located in the Eigen3 namespace. Other CMake
|
||||
# targets can refer to it using Eigen3::Eigen.
|
||||
export (TARGETS eigen NAMESPACE Eigen3:: FILE Eigen3Targets.cmake)
|
||||
# Export Eigen3 package to CMake registry such that it can be easily found by
|
||||
# CMake even if it has not been installed to a standard directory.
|
||||
export (PACKAGE Eigen3)
|
||||
|
||||
install (EXPORT Eigen3Targets NAMESPACE Eigen3:: DESTINATION
|
||||
${CMAKEPACKAGE_INSTALL_DIR})
|
||||
install (FILES
|
||||
${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
|
||||
${CMAKE_CURRENT_BINARY_DIR}/Eigen3ConfigVersion.cmake
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmake/UseEigen3.cmake
|
||||
DESTINATION ${CMAKEPACKAGE_INSTALL_DIR})
|
||||
else (NOT CMAKE_VERSION VERSION_LESS 3.0)
|
||||
# Fallback to legacy Eigen3Config.cmake without the imported target
|
||||
|
||||
# If CMakePackageConfigHelpers module is available (CMake >= 2.8.8)
|
||||
# create a relocatable Config file, otherwise leave the hardcoded paths
|
||||
include(CMakePackageConfigHelpers OPTIONAL RESULT_VARIABLE CPCH_PATH)
|
||||
|
||||
if(CPCH_PATH)
|
||||
configure_package_config_file (
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmake/Eigen3ConfigLegacy.cmake.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
|
||||
PATH_VARS EIGEN_INCLUDE_DIR EIGEN_ROOT_DIR
|
||||
INSTALL_DESTINATION ${CMAKEPACKAGE_INSTALL_DIR}
|
||||
NO_CHECK_REQUIRED_COMPONENTS_MACRO # Eigen does not provide components
|
||||
)
|
||||
else()
|
||||
# The PACKAGE_* variables are defined by the configure_package_config_file
|
||||
# but without it we define them manually to the hardcoded paths
|
||||
set(PACKAGE_INIT "")
|
||||
set(PACKAGE_EIGEN_INCLUDE_DIR ${EIGEN_INCLUDE_DIR})
|
||||
set(PACKAGE_EIGEN_ROOT_DIR ${EIGEN_ROOT_DIR})
|
||||
configure_file ( ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Eigen3ConfigLegacy.cmake.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
|
||||
@ONLY ESCAPE_QUOTES
|
||||
)
|
||||
endif()
|
||||
|
||||
install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/cmake/UseEigen3.cmake
|
||||
${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
|
||||
DESTINATION ${CMAKEPACKAGE_INSTALL_DIR}
|
||||
)
|
||||
endif (NOT CMAKE_VERSION VERSION_LESS 3.0)
|
||||
|
||||
# Add uninstall target
|
||||
add_custom_target ( uninstall
|
||||
|
||||
@@ -4,14 +4,10 @@
|
||||
## # The following are required to uses Dart and the Cdash dashboard
|
||||
## ENABLE_TESTING()
|
||||
## INCLUDE(CTest)
|
||||
set(CTEST_PROJECT_NAME "Eigen")
|
||||
set(CTEST_PROJECT_NAME "Eigen3.3")
|
||||
set(CTEST_NIGHTLY_START_TIME "00:00:00 UTC")
|
||||
|
||||
set(CTEST_DROP_METHOD "http")
|
||||
set(CTEST_DROP_SITE "manao.inria.fr")
|
||||
set(CTEST_DROP_LOCATION "/CDash/submit.php?project=Eigen")
|
||||
set(CTEST_DROP_LOCATION "/CDash/submit.php?project=Eigen3.3")
|
||||
set(CTEST_DROP_SITE_CDASH TRUE)
|
||||
set(CTEST_PROJECT_SUBPROJECTS
|
||||
Official
|
||||
Unsupported
|
||||
)
|
||||
|
||||
@@ -16,4 +16,4 @@ install(FILES
|
||||
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen COMPONENT Devel
|
||||
)
|
||||
|
||||
add_subdirectory(src)
|
||||
install(DIRECTORY src DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen COMPONENT Devel FILES_MATCHING PATTERN "*.h")
|
||||
|
||||
@@ -31,7 +31,8 @@
|
||||
#include "src/Cholesky/LLT.h"
|
||||
#include "src/Cholesky/LDLT.h"
|
||||
#ifdef EIGEN_USE_LAPACKE
|
||||
#include "src/Cholesky/LLT_MKL.h"
|
||||
#include "src/misc/lapacke.h"
|
||||
#include "src/Cholesky/LLT_LAPACKE.h"
|
||||
#endif
|
||||
|
||||
#include "src/Core/util/ReenableStupidWarnings.h"
|
||||
|
||||
@@ -19,7 +19,7 @@ extern "C" {
|
||||
/** \ingroup Support_modules
|
||||
* \defgroup CholmodSupport_Module CholmodSupport module
|
||||
*
|
||||
* This module provides an interface to the Cholmod library which is part of the <a href="http://www.cise.ufl.edu/research/sparse/SuiteSparse/">suitesparse</a> package.
|
||||
* This module provides an interface to the Cholmod library which is part of the <a href="http://www.suitesparse.com">suitesparse</a> package.
|
||||
* It provides the two following main factorization classes:
|
||||
* - class CholmodSupernodalLLT: a supernodal LLT Cholesky factorization.
|
||||
* - class CholmodDecomposiiton: a general L(D)LT Cholesky factorization with automatic or explicit runtime selection of the underlying factorization method (supernodal or simplicial).
|
||||
|
||||
121
Eigen/Core
121
Eigen/Core
@@ -14,9 +14,9 @@
|
||||
// first thing Eigen does: stop the compiler from committing suicide
|
||||
#include "src/Core/util/DisableStupidWarnings.h"
|
||||
|
||||
// Handle NVCC/CUDA
|
||||
#ifdef __CUDACC__
|
||||
// Do not try asserts on CUDA!
|
||||
// Handle NVCC/CUDA/SYCL
|
||||
#if defined(__CUDACC__) || defined(__SYCL_DEVICE_ONLY__)
|
||||
// Do not try asserts on CUDA and SYCL!
|
||||
#ifndef EIGEN_NO_DEBUG
|
||||
#define EIGEN_NO_DEBUG
|
||||
#endif
|
||||
@@ -25,30 +25,40 @@
|
||||
#undef EIGEN_INTERNAL_DEBUGGING
|
||||
#endif
|
||||
|
||||
// Do not try to vectorize on CUDA!
|
||||
#ifndef EIGEN_DONT_VECTORIZE
|
||||
#define EIGEN_DONT_VECTORIZE
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN_EXCEPTIONS
|
||||
#undef EIGEN_EXCEPTIONS
|
||||
#endif
|
||||
|
||||
|
||||
// All functions callable from CUDA code must be qualified with __device__
|
||||
#define EIGEN_DEVICE_FUNC __host__ __device__
|
||||
|
||||
#ifdef __CUDACC__
|
||||
// Do not try to vectorize on CUDA and SYCL!
|
||||
#ifndef EIGEN_DONT_VECTORIZE
|
||||
#define EIGEN_DONT_VECTORIZE
|
||||
#endif
|
||||
|
||||
#define EIGEN_DEVICE_FUNC __host__ __device__
|
||||
// We need math_functions.hpp to ensure that that EIGEN_USING_STD_MATH macro
|
||||
// works properly on the device side
|
||||
#include <math_functions.hpp>
|
||||
#else
|
||||
#define EIGEN_DEVICE_FUNC
|
||||
#endif
|
||||
|
||||
#else
|
||||
#define EIGEN_DEVICE_FUNC
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__CUDA_ARCH__)
|
||||
// When compiling CUDA device code with NVCC, pull in math functions from the
|
||||
// global namespace. In host mode, and when device doee with clang, use the
|
||||
// std versions.
|
||||
#if defined(__CUDA_ARCH__) && defined(__NVCC__)
|
||||
#define EIGEN_USING_STD_MATH(FUNC) using ::FUNC;
|
||||
#else
|
||||
#define EIGEN_USING_STD_MATH(FUNC) using std::FUNC;
|
||||
#endif
|
||||
|
||||
#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(__CUDA_ARCH__) && !defined(EIGEN_EXCEPTIONS)
|
||||
#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(__CUDA_ARCH__) && !defined(EIGEN_EXCEPTIONS) && !defined(EIGEN_USE_SYCL)
|
||||
#define EIGEN_EXCEPTIONS
|
||||
#endif
|
||||
|
||||
@@ -137,6 +147,15 @@
|
||||
#ifdef __FMA__
|
||||
#define EIGEN_VECTORIZE_FMA
|
||||
#endif
|
||||
#if defined(__AVX512F__) && defined(EIGEN_ENABLE_AVX512)
|
||||
#define EIGEN_VECTORIZE_AVX512
|
||||
#define EIGEN_VECTORIZE_AVX2
|
||||
#define EIGEN_VECTORIZE_AVX
|
||||
#define EIGEN_VECTORIZE_FMA
|
||||
#ifdef __AVX512DQ__
|
||||
#define EIGEN_VECTORIZE_AVX512DQ
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// include files
|
||||
|
||||
@@ -153,6 +172,7 @@
|
||||
#if EIGEN_COMP_ICC >= 1110
|
||||
#include <immintrin.h>
|
||||
#else
|
||||
#include <mmintrin.h>
|
||||
#include <emmintrin.h>
|
||||
#include <xmmintrin.h>
|
||||
#ifdef EIGEN_VECTORIZE_SSE3
|
||||
@@ -167,7 +187,7 @@
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_2
|
||||
#include <nmmintrin.h>
|
||||
#endif
|
||||
#ifdef EIGEN_VECTORIZE_AVX
|
||||
#if defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_AVX512)
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
#endif
|
||||
@@ -194,12 +214,29 @@
|
||||
#define EIGEN_VECTORIZE
|
||||
#define EIGEN_VECTORIZE_NEON
|
||||
#include <arm_neon.h>
|
||||
#elif (defined __s390x__ && defined __VEC__)
|
||||
#define EIGEN_VECTORIZE
|
||||
#define EIGEN_VECTORIZE_ZVECTOR
|
||||
#include <vecintrin.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__F16C__) && !defined(EIGEN_COMP_CLANG)
|
||||
// We can use the optimized fp16 to float and float to fp16 conversion routines
|
||||
#define EIGEN_HAS_FP16_C
|
||||
#endif
|
||||
|
||||
#if defined __CUDACC__
|
||||
#define EIGEN_VECTORIZE_CUDA
|
||||
#include <vector_types.h>
|
||||
#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
|
||||
#define EIGEN_HAS_CUDA_FP16
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined EIGEN_HAS_CUDA_FP16
|
||||
#include <host_defines.h>
|
||||
#include <cuda_fp16.h>
|
||||
#endif
|
||||
|
||||
#if (defined _OPENMP) && (!defined EIGEN_DONT_PARALLELIZE)
|
||||
@@ -231,6 +268,11 @@
|
||||
// for min/max:
|
||||
#include <algorithm>
|
||||
|
||||
// for std::is_nothrow_move_assignable
|
||||
#ifdef EIGEN_INCLUDE_TYPE_TRAITS
|
||||
#include <type_traits>
|
||||
#endif
|
||||
|
||||
// for outputting debug info
|
||||
#ifdef EIGEN_DEBUG_ASSIGN
|
||||
#include <iostream>
|
||||
@@ -245,7 +287,9 @@
|
||||
namespace Eigen {
|
||||
|
||||
inline static const char *SimdInstructionSetsInUse(void) {
|
||||
#if defined(EIGEN_VECTORIZE_AVX)
|
||||
#if defined(EIGEN_VECTORIZE_AVX512)
|
||||
return "AVX512, FMA, AVX2, AVX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
|
||||
#elif defined(EIGEN_VECTORIZE_AVX)
|
||||
return "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
|
||||
#elif defined(EIGEN_VECTORIZE_SSE4_2)
|
||||
return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
|
||||
@@ -263,6 +307,8 @@ inline static const char *SimdInstructionSetsInUse(void) {
|
||||
return "VSX";
|
||||
#elif defined(EIGEN_VECTORIZE_NEON)
|
||||
return "ARM NEON";
|
||||
#elif defined(EIGEN_VECTORIZE_ZVECTOR)
|
||||
return "S390X ZVECTOR";
|
||||
#else
|
||||
return "None";
|
||||
#endif
|
||||
@@ -278,7 +324,7 @@ inline static const char *SimdInstructionSetsInUse(void) {
|
||||
// we use size_t frequently and we'll never remember to prepend it with std:: everytime just to
|
||||
// ensure QNX/QCC support
|
||||
using std::size_t;
|
||||
// gcc 4.6.0 wants std:: for ptrdiff_t
|
||||
// gcc 4.6.0 wants std:: for ptrdiff_t
|
||||
using std::ptrdiff_t;
|
||||
|
||||
/** \defgroup Core_Module Core module
|
||||
@@ -300,10 +346,15 @@ using std::ptrdiff_t;
|
||||
|
||||
#include "src/Core/NumTraits.h"
|
||||
#include "src/Core/MathFunctions.h"
|
||||
#include "src/Core/SpecialFunctions.h"
|
||||
#include "src/Core/GenericPacketMath.h"
|
||||
#include "src/Core/MathFunctionsImpl.h"
|
||||
|
||||
#if defined EIGEN_VECTORIZE_AVX
|
||||
#if defined EIGEN_VECTORIZE_AVX512
|
||||
#include "src/Core/arch/SSE/PacketMath.h"
|
||||
#include "src/Core/arch/AVX/PacketMath.h"
|
||||
#include "src/Core/arch/AVX512/PacketMath.h"
|
||||
#include "src/Core/arch/AVX512/MathFunctions.h"
|
||||
#elif defined EIGEN_VECTORIZE_AVX
|
||||
// Use AVX for floats and doubles, SSE for integers
|
||||
#include "src/Core/arch/SSE/PacketMath.h"
|
||||
#include "src/Core/arch/SSE/Complex.h"
|
||||
@@ -325,8 +376,17 @@ using std::ptrdiff_t;
|
||||
#include "src/Core/arch/NEON/PacketMath.h"
|
||||
#include "src/Core/arch/NEON/MathFunctions.h"
|
||||
#include "src/Core/arch/NEON/Complex.h"
|
||||
#elif defined EIGEN_VECTORIZE_ZVECTOR
|
||||
#include "src/Core/arch/ZVector/PacketMath.h"
|
||||
#include "src/Core/arch/ZVector/MathFunctions.h"
|
||||
#include "src/Core/arch/ZVector/Complex.h"
|
||||
#endif
|
||||
|
||||
// Half float support
|
||||
#include "src/Core/arch/CUDA/Half.h"
|
||||
#include "src/Core/arch/CUDA/PacketMathHalf.h"
|
||||
#include "src/Core/arch/CUDA/TypeCasting.h"
|
||||
|
||||
#if defined EIGEN_VECTORIZE_CUDA
|
||||
#include "src/Core/arch/CUDA/PacketMath.h"
|
||||
#include "src/Core/arch/CUDA/MathFunctions.h"
|
||||
@@ -334,12 +394,17 @@ using std::ptrdiff_t;
|
||||
|
||||
#include "src/Core/arch/Default/Settings.h"
|
||||
|
||||
#include "src/Core/functors/TernaryFunctors.h"
|
||||
#include "src/Core/functors/BinaryFunctors.h"
|
||||
#include "src/Core/functors/UnaryFunctors.h"
|
||||
#include "src/Core/functors/NullaryFunctors.h"
|
||||
#include "src/Core/functors/StlFunctors.h"
|
||||
#include "src/Core/functors/AssignmentFunctors.h"
|
||||
|
||||
// Specialized functors to enable the processing of complex numbers
|
||||
// on CUDA devices
|
||||
#include "src/Core/arch/CUDA/Complex.h"
|
||||
|
||||
#include "src/Core/DenseCoeffsBase.h"
|
||||
#include "src/Core/DenseBase.h"
|
||||
#include "src/Core/MatrixBase.h"
|
||||
@@ -366,6 +431,7 @@ using std::ptrdiff_t;
|
||||
#include "src/Core/PlainObjectBase.h"
|
||||
#include "src/Core/Matrix.h"
|
||||
#include "src/Core/Array.h"
|
||||
#include "src/Core/CwiseTernaryOp.h"
|
||||
#include "src/Core/CwiseBinaryOp.h"
|
||||
#include "src/Core/CwiseUnaryOp.h"
|
||||
#include "src/Core/CwiseNullaryOp.h"
|
||||
@@ -414,6 +480,7 @@ using std::ptrdiff_t;
|
||||
#include "src/Core/products/TriangularSolverVector.h"
|
||||
#include "src/Core/BandMatrix.h"
|
||||
#include "src/Core/CoreIterators.h"
|
||||
#include "src/Core/ConditionEstimator.h"
|
||||
|
||||
#include "src/Core/BooleanRedux.h"
|
||||
#include "src/Core/Select.h"
|
||||
@@ -424,14 +491,14 @@ using std::ptrdiff_t;
|
||||
#include "src/Core/ArrayWrapper.h"
|
||||
|
||||
#ifdef EIGEN_USE_BLAS
|
||||
#include "src/Core/products/GeneralMatrixMatrix_MKL.h"
|
||||
#include "src/Core/products/GeneralMatrixVector_MKL.h"
|
||||
#include "src/Core/products/GeneralMatrixMatrixTriangular_MKL.h"
|
||||
#include "src/Core/products/SelfadjointMatrixMatrix_MKL.h"
|
||||
#include "src/Core/products/SelfadjointMatrixVector_MKL.h"
|
||||
#include "src/Core/products/TriangularMatrixMatrix_MKL.h"
|
||||
#include "src/Core/products/TriangularMatrixVector_MKL.h"
|
||||
#include "src/Core/products/TriangularSolverMatrix_MKL.h"
|
||||
#include "src/Core/products/GeneralMatrixMatrix_BLAS.h"
|
||||
#include "src/Core/products/GeneralMatrixVector_BLAS.h"
|
||||
#include "src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h"
|
||||
#include "src/Core/products/SelfadjointMatrixMatrix_BLAS.h"
|
||||
#include "src/Core/products/SelfadjointMatrixVector_BLAS.h"
|
||||
#include "src/Core/products/TriangularMatrixMatrix_BLAS.h"
|
||||
#include "src/Core/products/TriangularMatrixVector_BLAS.h"
|
||||
#include "src/Core/products/TriangularSolverMatrix_BLAS.h"
|
||||
#endif // EIGEN_USE_BLAS
|
||||
|
||||
#ifdef EIGEN_USE_MKL_VML
|
||||
|
||||
@@ -32,6 +32,7 @@
|
||||
* \endcode
|
||||
*/
|
||||
|
||||
#include "src/misc/RealSvd2x2.h"
|
||||
#include "src/Eigenvalues/Tridiagonalization.h"
|
||||
#include "src/Eigenvalues/RealSchur.h"
|
||||
#include "src/Eigenvalues/EigenSolver.h"
|
||||
@@ -44,9 +45,10 @@
|
||||
#include "src/Eigenvalues/GeneralizedEigenSolver.h"
|
||||
#include "src/Eigenvalues/MatrixBaseEigenvalues.h"
|
||||
#ifdef EIGEN_USE_LAPACKE
|
||||
#include "src/Eigenvalues/RealSchur_MKL.h"
|
||||
#include "src/Eigenvalues/ComplexSchur_MKL.h"
|
||||
#include "src/Eigenvalues/SelfAdjointEigenSolver_MKL.h"
|
||||
#include "src/misc/lapacke.h"
|
||||
#include "src/Eigenvalues/RealSchur_LAPACKE.h"
|
||||
#include "src/Eigenvalues/ComplexSchur_LAPACKE.h"
|
||||
#include "src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h"
|
||||
#endif
|
||||
|
||||
#include "src/Core/util/ReenableStupidWarnings.h"
|
||||
|
||||
@@ -17,16 +17,16 @@
|
||||
#include <limits>
|
||||
|
||||
/** \defgroup Geometry_Module Geometry module
|
||||
*
|
||||
*
|
||||
*
|
||||
* This module provides support for:
|
||||
* - fixed-size homogeneous transformations
|
||||
* - translation, scaling, 2D and 3D rotations
|
||||
* - quaternions
|
||||
* - \ref MatrixBase::cross() "cross product"
|
||||
* - \ref MatrixBase::unitOrthogonal() "orthognal vector generation"
|
||||
* - some linear components: parametrized-lines and hyperplanes
|
||||
* - \link Quaternion quaternions \endlink
|
||||
* - cross products (\ref MatrixBase::cross, \ref MatrixBase::cross3)
|
||||
* - orthognal vector generation (\ref MatrixBase::unitOrthogonal)
|
||||
* - some linear components: \link ParametrizedLine parametrized-lines \endlink and \link Hyperplane hyperplanes \endlink
|
||||
* - \link AlignedBox axis aligned bounding boxes \endlink
|
||||
* - \link umeyama least-square transformation fitting \endlink
|
||||
*
|
||||
* \code
|
||||
* #include <Eigen/Geometry>
|
||||
|
||||
3
Eigen/LU
3
Eigen/LU
@@ -28,7 +28,8 @@
|
||||
#include "src/LU/FullPivLU.h"
|
||||
#include "src/LU/PartialPivLU.h"
|
||||
#ifdef EIGEN_USE_LAPACKE
|
||||
#include "src/LU/PartialPivLU_MKL.h"
|
||||
#include "src/misc/lapacke.h"
|
||||
#include "src/LU/PartialPivLU_LAPACKE.h"
|
||||
#endif
|
||||
#include "src/LU/Determinant.h"
|
||||
#include "src/LU/InverseImpl.h"
|
||||
|
||||
@@ -12,7 +12,6 @@
|
||||
|
||||
#include "src/Core/util/DisableStupidWarnings.h"
|
||||
|
||||
#include <complex.h>
|
||||
extern "C" {
|
||||
#include <pastix_nompi.h>
|
||||
#include <pastix.h>
|
||||
|
||||
6
Eigen/QR
6
Eigen/QR
@@ -34,9 +34,11 @@
|
||||
#include "src/QR/HouseholderQR.h"
|
||||
#include "src/QR/FullPivHouseholderQR.h"
|
||||
#include "src/QR/ColPivHouseholderQR.h"
|
||||
#include "src/QR/CompleteOrthogonalDecomposition.h"
|
||||
#ifdef EIGEN_USE_LAPACKE
|
||||
#include "src/QR/HouseholderQR_MKL.h"
|
||||
#include "src/QR/ColPivHouseholderQR_MKL.h"
|
||||
#include "src/misc/lapacke.h"
|
||||
#include "src/QR/HouseholderQR_LAPACKE.h"
|
||||
#include "src/QR/ColPivHouseholderQR_LAPACKE.h"
|
||||
#endif
|
||||
|
||||
#include "src/Core/util/ReenableStupidWarnings.h"
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
/** \ingroup Support_modules
|
||||
* \defgroup SPQRSupport_Module SuiteSparseQR module
|
||||
*
|
||||
* This module provides an interface to the SPQR library, which is part of the <a href="http://www.cise.ufl.edu/research/sparse/SuiteSparse/">suitesparse</a> package.
|
||||
* This module provides an interface to the SPQR library, which is part of the <a href="http://www.suitesparse.com">suitesparse</a> package.
|
||||
*
|
||||
* \code
|
||||
* #include <Eigen/SPQRSupport>
|
||||
|
||||
@@ -31,12 +31,14 @@
|
||||
* \endcode
|
||||
*/
|
||||
|
||||
#include "src/misc/RealSvd2x2.h"
|
||||
#include "src/SVD/UpperBidiagonalization.h"
|
||||
#include "src/SVD/SVDBase.h"
|
||||
#include "src/SVD/JacobiSVD.h"
|
||||
#include "src/SVD/BDCSVD.h"
|
||||
#if defined(EIGEN_USE_LAPACKE) && !defined(EIGEN_USE_LAPACKE_STRICT)
|
||||
#include "src/SVD/JacobiSVD_MKL.h"
|
||||
#include "src/misc/lapacke.h"
|
||||
#include "src/SVD/JacobiSVD_LAPACKE.h"
|
||||
#endif
|
||||
|
||||
#include "src/Core/util/ReenableStupidWarnings.h"
|
||||
|
||||
@@ -43,7 +43,7 @@ namespace Eigen { struct SluMatrix; }
|
||||
* - class SuperLU: a supernodal sequential LU factorization.
|
||||
* - class SuperILU: a supernodal sequential incomplete LU factorization (to be used as a preconditioner for iterative methods).
|
||||
*
|
||||
* \warning This wrapper is only for the 4.x versions of SuperLU. The 3.x and 5.x versions are not supported.
|
||||
* \warning This wrapper requires at least versions 4.0 of SuperLU. The 3.x versions are not supported.
|
||||
*
|
||||
* \warning When including this module, you have to use SUPERLU_EMPTY instead of EMPTY which is no longer defined because it is too polluting.
|
||||
*
|
||||
|
||||
@@ -19,7 +19,7 @@ extern "C" {
|
||||
/** \ingroup Support_modules
|
||||
* \defgroup UmfPackSupport_Module UmfPackSupport module
|
||||
*
|
||||
* This module provides an interface to the UmfPack library which is part of the <a href="http://www.cise.ufl.edu/research/sparse/SuiteSparse/">suitesparse</a> package.
|
||||
* This module provides an interface to the UmfPack library which is part of the <a href="http://www.suitesparse.com">suitesparse</a> package.
|
||||
* It provides the following factorization class:
|
||||
* - class UmfPackLU: a multifrontal sequential LU factorization.
|
||||
*
|
||||
|
||||
@@ -1,7 +0,0 @@
|
||||
file(GLOB Eigen_src_subdirectories "*")
|
||||
escape_string_as_regex(ESCAPED_CMAKE_CURRENT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
|
||||
foreach(f ${Eigen_src_subdirectories})
|
||||
if(NOT f MATCHES "\\.txt" AND NOT f MATCHES "${ESCAPED_CMAKE_CURRENT_SOURCE_DIR}/[.].+" )
|
||||
add_subdirectory(${f})
|
||||
endif()
|
||||
endforeach()
|
||||
@@ -1,6 +0,0 @@
|
||||
FILE(GLOB Eigen_Cholesky_SRCS "*.h")
|
||||
|
||||
INSTALL(FILES
|
||||
${Eigen_Cholesky_SRCS}
|
||||
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Cholesky COMPONENT Devel
|
||||
)
|
||||
@@ -13,7 +13,7 @@
|
||||
#ifndef EIGEN_LDLT_H
|
||||
#define EIGEN_LDLT_H
|
||||
|
||||
namespace Eigen {
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
template<typename MatrixType, int UpLo> struct LDLT_Traits;
|
||||
@@ -28,8 +28,8 @@ namespace internal {
|
||||
*
|
||||
* \brief Robust Cholesky decomposition of a matrix with pivoting
|
||||
*
|
||||
* \param MatrixType the type of the matrix of which to compute the LDL^T Cholesky decomposition
|
||||
* \param UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.
|
||||
* \tparam _MatrixType the type of the matrix of which to compute the LDL^T Cholesky decomposition
|
||||
* \tparam _UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.
|
||||
* The other triangular part won't be read.
|
||||
*
|
||||
* Perform a robust Cholesky decomposition of a positive semidefinite or negative semidefinite
|
||||
@@ -43,6 +43,8 @@ namespace internal {
|
||||
* Remember that Cholesky decompositions are not rank-revealing. Also, do not use a Cholesky
|
||||
* decomposition to determine whether a system of equations has a solution.
|
||||
*
|
||||
* This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism.
|
||||
*
|
||||
* \sa MatrixBase::ldlt(), SelfAdjointView::ldlt(), class LLT
|
||||
*/
|
||||
template<typename _MatrixType, int _UpLo> class LDLT
|
||||
@@ -52,7 +54,6 @@ template<typename _MatrixType, int _UpLo> class LDLT
|
||||
enum {
|
||||
RowsAtCompileTime = MatrixType::RowsAtCompileTime,
|
||||
ColsAtCompileTime = MatrixType::ColsAtCompileTime,
|
||||
Options = MatrixType::Options & ~RowMajorBit, // these are the options for the TmpMatrixType, we need a ColMajor matrix here!
|
||||
MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
|
||||
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
|
||||
UpLo = _UpLo
|
||||
@@ -61,7 +62,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
|
||||
typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
|
||||
typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
|
||||
typedef typename MatrixType::StorageIndex StorageIndex;
|
||||
typedef Matrix<Scalar, RowsAtCompileTime, 1, Options, MaxRowsAtCompileTime, 1> TmpMatrixType;
|
||||
typedef Matrix<Scalar, RowsAtCompileTime, 1, 0, MaxRowsAtCompileTime, 1> TmpMatrixType;
|
||||
|
||||
typedef Transpositions<RowsAtCompileTime, MaxRowsAtCompileTime> TranspositionType;
|
||||
typedef PermutationMatrix<RowsAtCompileTime, MaxRowsAtCompileTime> PermutationType;
|
||||
@@ -73,11 +74,11 @@ template<typename _MatrixType, int _UpLo> class LDLT
|
||||
* The default constructor is useful in cases in which the user intends to
|
||||
* perform decompositions via LDLT::compute(const MatrixType&).
|
||||
*/
|
||||
LDLT()
|
||||
: m_matrix(),
|
||||
m_transpositions(),
|
||||
LDLT()
|
||||
: m_matrix(),
|
||||
m_transpositions(),
|
||||
m_sign(internal::ZeroSign),
|
||||
m_isInitialized(false)
|
||||
m_isInitialized(false)
|
||||
{}
|
||||
|
||||
/** \brief Default Constructor with memory preallocation
|
||||
@@ -97,6 +98,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
|
||||
/** \brief Constructor with decomposition
|
||||
*
|
||||
* This calculates the decomposition for the input \a matrix.
|
||||
*
|
||||
* \sa LDLT(Index size)
|
||||
*/
|
||||
template<typename InputType>
|
||||
@@ -110,6 +112,23 @@ template<typename _MatrixType, int _UpLo> class LDLT
|
||||
compute(matrix.derived());
|
||||
}
|
||||
|
||||
/** \brief Constructs a LDLT factorization from a given matrix
|
||||
*
|
||||
* This overloaded constructor is provided for \link InplaceDecomposition inplace decomposition \endlink when \c MatrixType is a Eigen::Ref.
|
||||
*
|
||||
* \sa LDLT(const EigenBase&)
|
||||
*/
|
||||
template<typename InputType>
|
||||
explicit LDLT(EigenBase<InputType>& matrix)
|
||||
: m_matrix(matrix.derived()),
|
||||
m_transpositions(matrix.rows()),
|
||||
m_temporary(matrix.rows()),
|
||||
m_sign(internal::ZeroSign),
|
||||
m_isInitialized(false)
|
||||
{
|
||||
compute(matrix.derived());
|
||||
}
|
||||
|
||||
/** Clear any existing decomposition
|
||||
* \sa rankUpdate(w,sigma)
|
||||
*/
|
||||
@@ -168,7 +187,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
|
||||
* \note_about_checking_solutions
|
||||
*
|
||||
* More precisely, this method solves \f$ A x = b \f$ using the decomposition \f$ A = P^T L D L^* P \f$
|
||||
* by solving the systems \f$ P^T y_1 = b \f$, \f$ L y_2 = y_1 \f$, \f$ D y_3 = y_2 \f$,
|
||||
* by solving the systems \f$ P^T y_1 = b \f$, \f$ L y_2 = y_1 \f$, \f$ D y_3 = y_2 \f$,
|
||||
* \f$ L^* y_4 = y_3 \f$ and \f$ P x = y_4 \f$ in succession. If the matrix \f$ A \f$ is singular, then
|
||||
* \f$ D \f$ will also be singular (all the other matrices are invertible). In that case, the
|
||||
* least-square solution of \f$ D y_3 = y_2 \f$ is computed. This does not mean that this function
|
||||
@@ -192,6 +211,15 @@ template<typename _MatrixType, int _UpLo> class LDLT
|
||||
template<typename InputType>
|
||||
LDLT& compute(const EigenBase<InputType>& matrix);
|
||||
|
||||
/** \returns an estimate of the reciprocal condition number of the matrix of
|
||||
* which \c *this is the LDLT decomposition.
|
||||
*/
|
||||
RealScalar rcond() const
|
||||
{
|
||||
eigen_assert(m_isInitialized && "LDLT is not initialized.");
|
||||
return internal::rcond_estimate_helper(m_l1_norm, *this);
|
||||
}
|
||||
|
||||
template <typename Derived>
|
||||
LDLT& rankUpdate(const MatrixBase<Derived>& w, const RealScalar& alpha=1);
|
||||
|
||||
@@ -207,6 +235,13 @@ template<typename _MatrixType, int _UpLo> class LDLT
|
||||
|
||||
MatrixType reconstructedMatrix() const;
|
||||
|
||||
/** \returns the adjoint of \c *this, that is, a const reference to the decomposition itself as the underlying matrix is self-adjoint.
|
||||
*
|
||||
* This method is provided for compatibility with other matrix decompositions, thus enabling generic code such as:
|
||||
* \code x = decomposition.adjoint().solve(b) \endcode
|
||||
*/
|
||||
const LDLT& adjoint() const { return *this; };
|
||||
|
||||
inline Index rows() const { return m_matrix.rows(); }
|
||||
inline Index cols() const { return m_matrix.cols(); }
|
||||
|
||||
@@ -218,9 +253,9 @@ template<typename _MatrixType, int _UpLo> class LDLT
|
||||
ComputationInfo info() const
|
||||
{
|
||||
eigen_assert(m_isInitialized && "LDLT is not initialized.");
|
||||
return Success;
|
||||
return m_info;
|
||||
}
|
||||
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename RhsType, typename DstType>
|
||||
EIGEN_DEVICE_FUNC
|
||||
@@ -228,7 +263,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
|
||||
#endif
|
||||
|
||||
protected:
|
||||
|
||||
|
||||
static void check_template_parameters()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
|
||||
@@ -241,10 +276,12 @@ template<typename _MatrixType, int _UpLo> class LDLT
|
||||
* is not stored), and the diagonal entries correspond to D.
|
||||
*/
|
||||
MatrixType m_matrix;
|
||||
RealScalar m_l1_norm;
|
||||
TranspositionType m_transpositions;
|
||||
TmpMatrixType m_temporary;
|
||||
internal::SignMatrix m_sign;
|
||||
bool m_isInitialized;
|
||||
ComputationInfo m_info;
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
@@ -262,12 +299,14 @@ template<> struct ldlt_inplace<Lower>
|
||||
typedef typename TranspositionType::StorageIndex IndexType;
|
||||
eigen_assert(mat.rows()==mat.cols());
|
||||
const Index size = mat.rows();
|
||||
bool found_zero_pivot = false;
|
||||
bool ret = true;
|
||||
|
||||
if (size <= 1)
|
||||
{
|
||||
transpositions.setIdentity();
|
||||
if (numext::real(mat.coeff(0,0)) > 0) sign = PositiveSemiDef;
|
||||
else if (numext::real(mat.coeff(0,0)) < 0) sign = NegativeSemiDef;
|
||||
if (numext::real(mat.coeff(0,0)) > static_cast<RealScalar>(0) ) sign = PositiveSemiDef;
|
||||
else if (numext::real(mat.coeff(0,0)) < static_cast<RealScalar>(0)) sign = NegativeSemiDef;
|
||||
else sign = ZeroSign;
|
||||
return true;
|
||||
}
|
||||
@@ -314,26 +353,44 @@ template<> struct ldlt_inplace<Lower>
|
||||
if(rs>0)
|
||||
A21.noalias() -= A20 * temp.head(k);
|
||||
}
|
||||
|
||||
|
||||
// In some previous versions of Eigen (e.g., 3.2.1), the scaling was omitted if the pivot
|
||||
// was smaller than the cutoff value. However, since LDLT is not rank-revealing
|
||||
// we should only make sure that we do not introduce INF or NaN values.
|
||||
// Remark that LAPACK also uses 0 as the cutoff value.
|
||||
RealScalar realAkk = numext::real(mat.coeffRef(k,k));
|
||||
if((rs>0) && (abs(realAkk) > RealScalar(0)))
|
||||
bool pivot_is_valid = (abs(realAkk) > RealScalar(0));
|
||||
|
||||
if(k==0 && !pivot_is_valid)
|
||||
{
|
||||
// The entire diagonal is zero, there is nothing more to do
|
||||
// except filling the transpositions, and checking whether the matrix is zero.
|
||||
sign = ZeroSign;
|
||||
for(Index j = 0; j<size; ++j)
|
||||
{
|
||||
transpositions.coeffRef(j) = IndexType(j);
|
||||
ret = ret && (mat.col(j).tail(size-j-1).array()==Scalar(0)).all();
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
if((rs>0) && pivot_is_valid)
|
||||
A21 /= realAkk;
|
||||
|
||||
if(found_zero_pivot && pivot_is_valid) ret = false; // factorization failed
|
||||
else if(!pivot_is_valid) found_zero_pivot = true;
|
||||
|
||||
if (sign == PositiveSemiDef) {
|
||||
if (realAkk < 0) sign = Indefinite;
|
||||
if (realAkk < static_cast<RealScalar>(0)) sign = Indefinite;
|
||||
} else if (sign == NegativeSemiDef) {
|
||||
if (realAkk > 0) sign = Indefinite;
|
||||
if (realAkk > static_cast<RealScalar>(0)) sign = Indefinite;
|
||||
} else if (sign == ZeroSign) {
|
||||
if (realAkk > 0) sign = PositiveSemiDef;
|
||||
else if (realAkk < 0) sign = NegativeSemiDef;
|
||||
if (realAkk > static_cast<RealScalar>(0)) sign = PositiveSemiDef;
|
||||
else if (realAkk < static_cast<RealScalar>(0)) sign = NegativeSemiDef;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Reference for the algorithm: Davis and Hager, "Multiple Rank
|
||||
@@ -433,18 +490,31 @@ template<typename InputType>
|
||||
LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const EigenBase<InputType>& a)
|
||||
{
|
||||
check_template_parameters();
|
||||
|
||||
|
||||
eigen_assert(a.rows()==a.cols());
|
||||
const Index size = a.rows();
|
||||
|
||||
m_matrix = a.derived();
|
||||
|
||||
// Compute matrix L1 norm = max abs column sum.
|
||||
m_l1_norm = RealScalar(0);
|
||||
// TODO move this code to SelfAdjointView
|
||||
for (Index col = 0; col < size; ++col) {
|
||||
RealScalar abs_col_sum;
|
||||
if (_UpLo == Lower)
|
||||
abs_col_sum = m_matrix.col(col).tail(size - col).template lpNorm<1>() + m_matrix.row(col).head(col).template lpNorm<1>();
|
||||
else
|
||||
abs_col_sum = m_matrix.col(col).head(col).template lpNorm<1>() + m_matrix.row(col).tail(size - col).template lpNorm<1>();
|
||||
if (abs_col_sum > m_l1_norm)
|
||||
m_l1_norm = abs_col_sum;
|
||||
}
|
||||
|
||||
m_transpositions.resize(size);
|
||||
m_isInitialized = false;
|
||||
m_temporary.resize(size);
|
||||
m_sign = internal::ZeroSign;
|
||||
|
||||
internal::ldlt_inplace<UpLo>::unblocked(m_matrix, m_transpositions, m_temporary, m_sign);
|
||||
m_info = internal::ldlt_inplace<UpLo>::unblocked(m_matrix, m_transpositions, m_temporary, m_sign) ? Success : NumericalIssue;
|
||||
|
||||
m_isInitialized = true;
|
||||
return *this;
|
||||
@@ -466,7 +536,7 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Deri
|
||||
eigen_assert(m_matrix.rows()==size);
|
||||
}
|
||||
else
|
||||
{
|
||||
{
|
||||
m_matrix.resize(size,size);
|
||||
m_matrix.setZero();
|
||||
m_transpositions.resize(size);
|
||||
@@ -505,7 +575,7 @@ void LDLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) cons
|
||||
// diagonal element is not well justified and leads to numerical issues in some cases.
|
||||
// Moreover, Lapack's xSYTRS routines use 0 for the tolerance.
|
||||
RealScalar tolerance = RealScalar(1) / NumTraits<RealScalar>::highest();
|
||||
|
||||
|
||||
for (Index i = 0; i < vecD.size(); ++i)
|
||||
{
|
||||
if(abs(vecD(i)) > tolerance)
|
||||
@@ -572,7 +642,6 @@ MatrixType LDLT<MatrixType,_UpLo>::reconstructedMatrix() const
|
||||
return res;
|
||||
}
|
||||
|
||||
#ifndef __CUDACC__
|
||||
/** \cholesky_module
|
||||
* \returns the Cholesky decomposition with full pivoting without square root of \c *this
|
||||
* \sa MatrixBase::ldlt()
|
||||
@@ -594,7 +663,6 @@ MatrixBase<Derived>::ldlt() const
|
||||
{
|
||||
return LDLT<PlainObject>(derived());
|
||||
}
|
||||
#endif // __CUDACC__
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
#ifndef EIGEN_LLT_H
|
||||
#define EIGEN_LLT_H
|
||||
|
||||
namespace Eigen {
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal{
|
||||
template<typename MatrixType, int UpLo> struct LLT_Traits;
|
||||
@@ -22,8 +22,8 @@ template<typename MatrixType, int UpLo> struct LLT_Traits;
|
||||
*
|
||||
* \brief Standard Cholesky decomposition (LL^T) of a matrix and associated features
|
||||
*
|
||||
* \param MatrixType the type of the matrix of which we are computing the LL^T Cholesky decomposition
|
||||
* \param UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.
|
||||
* \tparam _MatrixType the type of the matrix of which we are computing the LL^T Cholesky decomposition
|
||||
* \tparam _UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.
|
||||
* The other triangular part won't be read.
|
||||
*
|
||||
* This class performs a LL^T Cholesky decomposition of a symmetric, positive definite
|
||||
@@ -40,7 +40,9 @@ template<typename MatrixType, int UpLo> struct LLT_Traits;
|
||||
*
|
||||
* Example: \include LLT_example.cpp
|
||||
* Output: \verbinclude LLT_example.out
|
||||
*
|
||||
*
|
||||
* This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism.
|
||||
*
|
||||
* \sa MatrixBase::llt(), SelfAdjointView::llt(), class LDLT
|
||||
*/
|
||||
/* HEY THIS DOX IS DISABLED BECAUSE THERE's A BUG EITHER HERE OR IN LDLT ABOUT THAT (OR BOTH)
|
||||
@@ -54,7 +56,6 @@ template<typename _MatrixType, int _UpLo> class LLT
|
||||
enum {
|
||||
RowsAtCompileTime = MatrixType::RowsAtCompileTime,
|
||||
ColsAtCompileTime = MatrixType::ColsAtCompileTime,
|
||||
Options = MatrixType::Options,
|
||||
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
|
||||
};
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
@@ -95,6 +96,21 @@ template<typename _MatrixType, int _UpLo> class LLT
|
||||
compute(matrix.derived());
|
||||
}
|
||||
|
||||
/** \brief Constructs a LDLT factorization from a given matrix
|
||||
*
|
||||
* This overloaded constructor is provided for \link InplaceDecomposition inplace decomposition \endlink when
|
||||
* \c MatrixType is a Eigen::Ref.
|
||||
*
|
||||
* \sa LLT(const EigenBase&)
|
||||
*/
|
||||
template<typename InputType>
|
||||
explicit LLT(EigenBase<InputType>& matrix)
|
||||
: m_matrix(matrix.derived()),
|
||||
m_isInitialized(false)
|
||||
{
|
||||
compute(matrix.derived());
|
||||
}
|
||||
|
||||
/** \returns a view of the upper triangular matrix U */
|
||||
inline typename Traits::MatrixU matrixU() const
|
||||
{
|
||||
@@ -135,6 +151,16 @@ template<typename _MatrixType, int _UpLo> class LLT
|
||||
template<typename InputType>
|
||||
LLT& compute(const EigenBase<InputType>& matrix);
|
||||
|
||||
/** \returns an estimate of the reciprocal condition number of the matrix of
|
||||
* which \c *this is the Cholesky decomposition.
|
||||
*/
|
||||
RealScalar rcond() const
|
||||
{
|
||||
eigen_assert(m_isInitialized && "LLT is not initialized.");
|
||||
eigen_assert(m_info == Success && "LLT failed because matrix appears to be negative");
|
||||
return internal::rcond_estimate_helper(m_l1_norm, *this);
|
||||
}
|
||||
|
||||
/** \returns the LLT decomposition matrix
|
||||
*
|
||||
* TODO: document the storage layout
|
||||
@@ -159,12 +185,19 @@ template<typename _MatrixType, int _UpLo> class LLT
|
||||
return m_info;
|
||||
}
|
||||
|
||||
/** \returns the adjoint of \c *this, that is, a const reference to the decomposition itself as the underlying matrix is self-adjoint.
|
||||
*
|
||||
* This method is provided for compatibility with other matrix decompositions, thus enabling generic code such as:
|
||||
* \code x = decomposition.adjoint().solve(b) \endcode
|
||||
*/
|
||||
const LLT& adjoint() const { return *this; };
|
||||
|
||||
inline Index rows() const { return m_matrix.rows(); }
|
||||
inline Index cols() const { return m_matrix.cols(); }
|
||||
|
||||
template<typename VectorType>
|
||||
LLT rankUpdate(const VectorType& vec, const RealScalar& sigma = 1);
|
||||
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename RhsType, typename DstType>
|
||||
EIGEN_DEVICE_FUNC
|
||||
@@ -172,17 +205,18 @@ template<typename _MatrixType, int _UpLo> class LLT
|
||||
#endif
|
||||
|
||||
protected:
|
||||
|
||||
|
||||
static void check_template_parameters()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
|
||||
}
|
||||
|
||||
|
||||
/** \internal
|
||||
* Used to compute and store L
|
||||
* The strict upper part is not used and even not initialized.
|
||||
*/
|
||||
MatrixType m_matrix;
|
||||
RealScalar m_l1_norm;
|
||||
bool m_isInitialized;
|
||||
ComputationInfo m_info;
|
||||
};
|
||||
@@ -268,7 +302,7 @@ template<typename Scalar> struct llt_inplace<Scalar, Lower>
|
||||
static Index unblocked(MatrixType& mat)
|
||||
{
|
||||
using std::sqrt;
|
||||
|
||||
|
||||
eigen_assert(mat.rows()==mat.cols());
|
||||
const Index size = mat.rows();
|
||||
for(Index k = 0; k < size; ++k)
|
||||
@@ -317,7 +351,7 @@ template<typename Scalar> struct llt_inplace<Scalar, Lower>
|
||||
Index ret;
|
||||
if((ret=unblocked(A11))>=0) return k+ret;
|
||||
if(rs>0) A11.adjoint().template triangularView<Upper>().template solveInPlace<OnTheRight>(A21);
|
||||
if(rs>0) A22.template selfadjointView<Lower>().rankUpdate(A21,-1); // bottleneck
|
||||
if(rs>0) A22.template selfadjointView<Lower>().rankUpdate(A21,typename NumTraits<RealScalar>::Literal(-1)); // bottleneck
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
@@ -328,7 +362,7 @@ template<typename Scalar> struct llt_inplace<Scalar, Lower>
|
||||
return Eigen::internal::llt_rank_update_lower(mat, vec, sigma);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<typename Scalar> struct llt_inplace<Scalar, Upper>
|
||||
{
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
@@ -387,12 +421,25 @@ template<typename InputType>
|
||||
LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const EigenBase<InputType>& a)
|
||||
{
|
||||
check_template_parameters();
|
||||
|
||||
|
||||
eigen_assert(a.rows()==a.cols());
|
||||
const Index size = a.rows();
|
||||
m_matrix.resize(size, size);
|
||||
m_matrix = a.derived();
|
||||
|
||||
// Compute matrix L1 norm = max abs column sum.
|
||||
m_l1_norm = RealScalar(0);
|
||||
// TODO move this code to SelfAdjointView
|
||||
for (Index col = 0; col < size; ++col) {
|
||||
RealScalar abs_col_sum;
|
||||
if (_UpLo == Lower)
|
||||
abs_col_sum = m_matrix.col(col).tail(size - col).template lpNorm<1>() + m_matrix.row(col).head(col).template lpNorm<1>();
|
||||
else
|
||||
abs_col_sum = m_matrix.col(col).head(col).template lpNorm<1>() + m_matrix.row(col).tail(size - col).template lpNorm<1>();
|
||||
if (abs_col_sum > m_l1_norm)
|
||||
m_l1_norm = abs_col_sum;
|
||||
}
|
||||
|
||||
m_isInitialized = true;
|
||||
bool ok = Traits::inplace_decomposition(m_matrix);
|
||||
m_info = ok ? Success : NumericalIssue;
|
||||
@@ -419,7 +466,7 @@ LLT<_MatrixType,_UpLo> LLT<_MatrixType,_UpLo>::rankUpdate(const VectorType& v, c
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename _MatrixType,int _UpLo>
|
||||
template<typename RhsType, typename DstType>
|
||||
@@ -431,15 +478,12 @@ void LLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const
|
||||
#endif
|
||||
|
||||
/** \internal use x = llt_object.solve(x);
|
||||
*
|
||||
*
|
||||
* This is the \em in-place version of solve().
|
||||
*
|
||||
* \param bAndX represents both the right-hand side matrix b and result x.
|
||||
*
|
||||
* \returns true always! If you need to check for existence of solutions, use another decomposition like LU, QR, or SVD.
|
||||
*
|
||||
* This version avoids a copy when the right hand side matrix b is not
|
||||
* needed anymore.
|
||||
* This version avoids a copy when the right hand side matrix b is not needed anymore.
|
||||
*
|
||||
* \sa LLT::solve(), MatrixBase::llt()
|
||||
*/
|
||||
@@ -463,7 +507,6 @@ MatrixType LLT<MatrixType,_UpLo>::reconstructedMatrix() const
|
||||
return matrixL() * matrixL().adjoint().toDenseMatrix();
|
||||
}
|
||||
|
||||
#ifndef __CUDACC__
|
||||
/** \cholesky_module
|
||||
* \returns the LLT decomposition of \c *this
|
||||
* \sa SelfAdjointView::llt()
|
||||
@@ -485,8 +528,7 @@ SelfAdjointView<MatrixType, UpLo>::llt() const
|
||||
{
|
||||
return LLT<PlainObject,UpLo>(m_matrix);
|
||||
}
|
||||
#endif // __CUDACC__
|
||||
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_LLT_H
|
||||
|
||||
@@ -25,25 +25,22 @@
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
********************************************************************************
|
||||
* Content : Eigen bindings to Intel(R) MKL
|
||||
* Content : Eigen bindings to LAPACKe
|
||||
* LLt decomposition based on LAPACKE_?potrf function.
|
||||
********************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef EIGEN_LLT_MKL_H
|
||||
#define EIGEN_LLT_MKL_H
|
||||
|
||||
#include "Eigen/src/Core/util/MKL_support.h"
|
||||
#include <iostream>
|
||||
#ifndef EIGEN_LLT_LAPACKE_H
|
||||
#define EIGEN_LLT_LAPACKE_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename Scalar> struct mkl_llt;
|
||||
template<typename Scalar> struct lapacke_llt;
|
||||
|
||||
#define EIGEN_MKL_LLT(EIGTYPE, MKLTYPE, MKLPREFIX) \
|
||||
template<> struct mkl_llt<EIGTYPE> \
|
||||
#define EIGEN_LAPACKE_LLT(EIGTYPE, BLASTYPE, LAPACKE_PREFIX) \
|
||||
template<> struct lapacke_llt<EIGTYPE> \
|
||||
{ \
|
||||
template<typename MatrixType> \
|
||||
static inline Index potrf(MatrixType& m, char uplo) \
|
||||
@@ -53,13 +50,13 @@ template<> struct mkl_llt<EIGTYPE> \
|
||||
EIGTYPE* a; \
|
||||
eigen_assert(m.rows()==m.cols()); \
|
||||
/* Set up parameters for ?potrf */ \
|
||||
size = m.rows(); \
|
||||
size = convert_index<lapack_int>(m.rows()); \
|
||||
StorageOrder = MatrixType::Flags&RowMajorBit?RowMajor:ColMajor; \
|
||||
matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \
|
||||
a = &(m.coeffRef(0,0)); \
|
||||
lda = m.outerStride(); \
|
||||
lda = convert_index<lapack_int>(m.outerStride()); \
|
||||
\
|
||||
info = LAPACKE_##MKLPREFIX##potrf( matrix_order, uplo, size, (MKLTYPE*)a, lda ); \
|
||||
info = LAPACKE_##LAPACKE_PREFIX##potrf( matrix_order, uplo, size, (BLASTYPE*)a, lda ); \
|
||||
info = (info==0) ? -1 : info>0 ? info-1 : size; \
|
||||
return info; \
|
||||
} \
|
||||
@@ -69,7 +66,7 @@ template<> struct llt_inplace<EIGTYPE, Lower> \
|
||||
template<typename MatrixType> \
|
||||
static Index blocked(MatrixType& m) \
|
||||
{ \
|
||||
return mkl_llt<EIGTYPE>::potrf(m, 'L'); \
|
||||
return lapacke_llt<EIGTYPE>::potrf(m, 'L'); \
|
||||
} \
|
||||
template<typename MatrixType, typename VectorType> \
|
||||
static Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \
|
||||
@@ -80,7 +77,7 @@ template<> struct llt_inplace<EIGTYPE, Upper> \
|
||||
template<typename MatrixType> \
|
||||
static Index blocked(MatrixType& m) \
|
||||
{ \
|
||||
return mkl_llt<EIGTYPE>::potrf(m, 'U'); \
|
||||
return lapacke_llt<EIGTYPE>::potrf(m, 'U'); \
|
||||
} \
|
||||
template<typename MatrixType, typename VectorType> \
|
||||
static Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \
|
||||
@@ -90,13 +87,13 @@ template<> struct llt_inplace<EIGTYPE, Upper> \
|
||||
} \
|
||||
};
|
||||
|
||||
EIGEN_MKL_LLT(double, double, d)
|
||||
EIGEN_MKL_LLT(float, float, s)
|
||||
EIGEN_MKL_LLT(dcomplex, MKL_Complex16, z)
|
||||
EIGEN_MKL_LLT(scomplex, MKL_Complex8, c)
|
||||
EIGEN_LAPACKE_LLT(double, double, d)
|
||||
EIGEN_LAPACKE_LLT(float, float, s)
|
||||
EIGEN_LAPACKE_LLT(dcomplex, lapack_complex_double, z)
|
||||
EIGEN_LAPACKE_LLT(scomplex, lapack_complex_float, c)
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_LLT_MKL_H
|
||||
#endif // EIGEN_LLT_LAPACKE_H
|
||||
@@ -1,6 +0,0 @@
|
||||
FILE(GLOB Eigen_CholmodSupport_SRCS "*.h")
|
||||
|
||||
INSTALL(FILES
|
||||
${Eigen_CholmodSupport_SRCS}
|
||||
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/CholmodSupport COMPONENT Devel
|
||||
)
|
||||
@@ -14,34 +14,40 @@ namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename Scalar, typename CholmodType>
|
||||
void cholmod_configure_matrix(CholmodType& mat)
|
||||
{
|
||||
if (internal::is_same<Scalar,float>::value)
|
||||
{
|
||||
mat.xtype = CHOLMOD_REAL;
|
||||
mat.dtype = CHOLMOD_SINGLE;
|
||||
}
|
||||
else if (internal::is_same<Scalar,double>::value)
|
||||
{
|
||||
template<typename Scalar> struct cholmod_configure_matrix;
|
||||
|
||||
template<> struct cholmod_configure_matrix<double> {
|
||||
template<typename CholmodType>
|
||||
static void run(CholmodType& mat) {
|
||||
mat.xtype = CHOLMOD_REAL;
|
||||
mat.dtype = CHOLMOD_DOUBLE;
|
||||
}
|
||||
else if (internal::is_same<Scalar,std::complex<float> >::value)
|
||||
{
|
||||
mat.xtype = CHOLMOD_COMPLEX;
|
||||
mat.dtype = CHOLMOD_SINGLE;
|
||||
}
|
||||
else if (internal::is_same<Scalar,std::complex<double> >::value)
|
||||
{
|
||||
};
|
||||
|
||||
template<> struct cholmod_configure_matrix<std::complex<double> > {
|
||||
template<typename CholmodType>
|
||||
static void run(CholmodType& mat) {
|
||||
mat.xtype = CHOLMOD_COMPLEX;
|
||||
mat.dtype = CHOLMOD_DOUBLE;
|
||||
}
|
||||
else
|
||||
{
|
||||
eigen_assert(false && "Scalar type not supported by CHOLMOD");
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Other scalar types are not yet suppotred by Cholmod
|
||||
// template<> struct cholmod_configure_matrix<float> {
|
||||
// template<typename CholmodType>
|
||||
// static void run(CholmodType& mat) {
|
||||
// mat.xtype = CHOLMOD_REAL;
|
||||
// mat.dtype = CHOLMOD_SINGLE;
|
||||
// }
|
||||
// };
|
||||
//
|
||||
// template<> struct cholmod_configure_matrix<std::complex<float> > {
|
||||
// template<typename CholmodType>
|
||||
// static void run(CholmodType& mat) {
|
||||
// mat.xtype = CHOLMOD_COMPLEX;
|
||||
// mat.dtype = CHOLMOD_SINGLE;
|
||||
// }
|
||||
// };
|
||||
|
||||
} // namespace internal
|
||||
|
||||
@@ -49,11 +55,11 @@ void cholmod_configure_matrix(CholmodType& mat)
|
||||
* Note that the data are shared.
|
||||
*/
|
||||
template<typename _Scalar, int _Options, typename _StorageIndex>
|
||||
cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_StorageIndex>& mat)
|
||||
cholmod_sparse viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_StorageIndex> > mat)
|
||||
{
|
||||
cholmod_sparse res;
|
||||
res.nzmax = mat.nonZeros();
|
||||
res.nrow = mat.rows();;
|
||||
res.nrow = mat.rows();
|
||||
res.ncol = mat.cols();
|
||||
res.p = mat.outerIndexPtr();
|
||||
res.i = mat.innerIndexPtr();
|
||||
@@ -78,7 +84,7 @@ cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_StorageIndex>& mat)
|
||||
{
|
||||
res.itype = CHOLMOD_INT;
|
||||
}
|
||||
else if (internal::is_same<_StorageIndex,SuiteSparse_long>::value)
|
||||
else if (internal::is_same<_StorageIndex,long>::value)
|
||||
{
|
||||
res.itype = CHOLMOD_LONG;
|
||||
}
|
||||
@@ -88,7 +94,7 @@ cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_StorageIndex>& mat)
|
||||
}
|
||||
|
||||
// setup res.xtype
|
||||
internal::cholmod_configure_matrix<_Scalar>(res);
|
||||
internal::cholmod_configure_matrix<_Scalar>::run(res);
|
||||
|
||||
res.stype = 0;
|
||||
|
||||
@@ -98,7 +104,14 @@ cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_StorageIndex>& mat)
|
||||
template<typename _Scalar, int _Options, typename _Index>
|
||||
const cholmod_sparse viewAsCholmod(const SparseMatrix<_Scalar,_Options,_Index>& mat)
|
||||
{
|
||||
cholmod_sparse res = viewAsCholmod(mat.const_cast_derived());
|
||||
cholmod_sparse res = viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_Index> >(mat.const_cast_derived()));
|
||||
return res;
|
||||
}
|
||||
|
||||
template<typename _Scalar, int _Options, typename _Index>
|
||||
const cholmod_sparse viewAsCholmod(const SparseVector<_Scalar,_Options,_Index>& mat)
|
||||
{
|
||||
cholmod_sparse res = viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_Index> >(mat.const_cast_derived()));
|
||||
return res;
|
||||
}
|
||||
|
||||
@@ -107,7 +120,7 @@ const cholmod_sparse viewAsCholmod(const SparseMatrix<_Scalar,_Options,_Index>&
|
||||
template<typename _Scalar, int _Options, typename _Index, unsigned int UpLo>
|
||||
cholmod_sparse viewAsCholmod(const SparseSelfAdjointView<const SparseMatrix<_Scalar,_Options,_Index>, UpLo>& mat)
|
||||
{
|
||||
cholmod_sparse res = viewAsCholmod(mat.matrix().const_cast_derived());
|
||||
cholmod_sparse res = viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_Index> >(mat.matrix().const_cast_derived()));
|
||||
|
||||
if(UpLo==Upper) res.stype = 1;
|
||||
if(UpLo==Lower) res.stype = -1;
|
||||
@@ -131,7 +144,7 @@ cholmod_dense viewAsCholmod(MatrixBase<Derived>& mat)
|
||||
res.x = (void*)(mat.derived().data());
|
||||
res.z = 0;
|
||||
|
||||
internal::cholmod_configure_matrix<Scalar>(res);
|
||||
internal::cholmod_configure_matrix<Scalar>::run(res);
|
||||
|
||||
return res;
|
||||
}
|
||||
@@ -178,16 +191,18 @@ class CholmodBase : public SparseSolverBase<Derived>
|
||||
public:
|
||||
|
||||
CholmodBase()
|
||||
: m_cholmodFactor(0), m_info(Success)
|
||||
: m_cholmodFactor(0), m_info(Success), m_factorizationIsOk(false), m_analysisIsOk(false)
|
||||
{
|
||||
m_shiftOffset[0] = m_shiftOffset[1] = RealScalar(0.0);
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<double,RealScalar>::value), CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY);
|
||||
m_shiftOffset[0] = m_shiftOffset[1] = 0.0;
|
||||
cholmod_start(&m_cholmod);
|
||||
}
|
||||
|
||||
explicit CholmodBase(const MatrixType& matrix)
|
||||
: m_cholmodFactor(0), m_info(Success)
|
||||
: m_cholmodFactor(0), m_info(Success), m_factorizationIsOk(false), m_analysisIsOk(false)
|
||||
{
|
||||
m_shiftOffset[0] = m_shiftOffset[1] = RealScalar(0.0);
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<double,RealScalar>::value), CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY);
|
||||
m_shiftOffset[0] = m_shiftOffset[1] = 0.0;
|
||||
cholmod_start(&m_cholmod);
|
||||
compute(matrix);
|
||||
}
|
||||
@@ -254,7 +269,7 @@ class CholmodBase : public SparseSolverBase<Derived>
|
||||
eigen_assert(m_analysisIsOk && "You must first call analyzePattern()");
|
||||
cholmod_sparse A = viewAsCholmod(matrix.template selfadjointView<UpLo>());
|
||||
cholmod_factorize_p(&A, m_shiftOffset, 0, 0, m_cholmodFactor, &m_cholmod);
|
||||
|
||||
|
||||
// If the factorization failed, minor is the column at which it did. On success minor == n.
|
||||
this->m_info = (m_cholmodFactor->minor == m_cholmodFactor->n ? Success : NumericalIssue);
|
||||
m_factorizationIsOk = true;
|
||||
@@ -273,9 +288,10 @@ class CholmodBase : public SparseSolverBase<Derived>
|
||||
const Index size = m_cholmodFactor->n;
|
||||
EIGEN_UNUSED_VARIABLE(size);
|
||||
eigen_assert(size==b.rows());
|
||||
|
||||
// Cholmod needs column-major stoarge without inner-stride, which corresponds to the default behavior of Ref.
|
||||
Ref<const Matrix<typename Rhs::Scalar,Dynamic,Dynamic,ColMajor> > b_ref(b.derived());
|
||||
|
||||
// note: cd stands for Cholmod Dense
|
||||
Rhs& b_ref(b.const_cast_derived());
|
||||
cholmod_dense b_cd = viewAsCholmod(b_ref);
|
||||
cholmod_dense* x_cd = cholmod_solve(CHOLMOD_A, m_cholmodFactor, &b_cd, &m_cholmod);
|
||||
if(!x_cd)
|
||||
@@ -289,8 +305,8 @@ class CholmodBase : public SparseSolverBase<Derived>
|
||||
}
|
||||
|
||||
/** \internal */
|
||||
template<typename RhsScalar, int RhsOptions, typename RhsIndex, typename DestScalar, int DestOptions, typename DestIndex>
|
||||
void _solve_impl(const SparseMatrix<RhsScalar,RhsOptions,RhsIndex> &b, SparseMatrix<DestScalar,DestOptions,DestIndex> &dest) const
|
||||
template<typename RhsDerived, typename DestDerived>
|
||||
void _solve_impl(const SparseMatrixBase<RhsDerived> &b, SparseMatrixBase<DestDerived> &dest) const
|
||||
{
|
||||
eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()");
|
||||
const Index size = m_cholmodFactor->n;
|
||||
@@ -298,7 +314,8 @@ class CholmodBase : public SparseSolverBase<Derived>
|
||||
eigen_assert(size==b.rows());
|
||||
|
||||
// note: cs stands for Cholmod Sparse
|
||||
cholmod_sparse b_cs = viewAsCholmod(b);
|
||||
Ref<SparseMatrix<typename RhsDerived::Scalar,ColMajor,typename RhsDerived::StorageIndex> > b_ref(b.const_cast_derived());
|
||||
cholmod_sparse b_cs = viewAsCholmod(b_ref);
|
||||
cholmod_sparse* x_cs = cholmod_spsolve(CHOLMOD_A, m_cholmodFactor, &b_cs, &m_cholmod);
|
||||
if(!x_cs)
|
||||
{
|
||||
@@ -306,7 +323,7 @@ class CholmodBase : public SparseSolverBase<Derived>
|
||||
return;
|
||||
}
|
||||
// TODO optimize this copy by swapping when possible (be careful with alignment, etc.)
|
||||
dest = viewAsEigen<DestScalar,DestOptions,DestIndex>(*x_cs);
|
||||
dest.derived() = viewAsEigen<typename DestDerived::Scalar,ColMajor,typename DestDerived::StorageIndex>(*x_cs);
|
||||
cholmod_free_sparse(&x_cs, &m_cholmod);
|
||||
}
|
||||
#endif // EIGEN_PARSED_BY_DOXYGEN
|
||||
@@ -323,10 +340,61 @@ class CholmodBase : public SparseSolverBase<Derived>
|
||||
*/
|
||||
Derived& setShift(const RealScalar& offset)
|
||||
{
|
||||
m_shiftOffset[0] = offset;
|
||||
m_shiftOffset[0] = double(offset);
|
||||
return derived();
|
||||
}
|
||||
|
||||
/** \returns the determinant of the underlying matrix from the current factorization */
|
||||
Scalar determinant() const
|
||||
{
|
||||
using std::exp;
|
||||
return exp(logDeterminant());
|
||||
}
|
||||
|
||||
/** \returns the log determinant of the underlying matrix from the current factorization */
|
||||
Scalar logDeterminant() const
|
||||
{
|
||||
using std::log;
|
||||
using numext::real;
|
||||
eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()");
|
||||
|
||||
RealScalar logDet = 0;
|
||||
Scalar *x = static_cast<Scalar*>(m_cholmodFactor->x);
|
||||
if (m_cholmodFactor->is_super)
|
||||
{
|
||||
// Supernodal factorization stored as a packed list of dense column-major blocs,
|
||||
// as described by the following structure:
|
||||
|
||||
// super[k] == index of the first column of the j-th super node
|
||||
StorageIndex *super = static_cast<StorageIndex*>(m_cholmodFactor->super);
|
||||
// pi[k] == offset to the description of row indices
|
||||
StorageIndex *pi = static_cast<StorageIndex*>(m_cholmodFactor->pi);
|
||||
// px[k] == offset to the respective dense block
|
||||
StorageIndex *px = static_cast<StorageIndex*>(m_cholmodFactor->px);
|
||||
|
||||
Index nb_super_nodes = m_cholmodFactor->nsuper;
|
||||
for (Index k=0; k < nb_super_nodes; ++k)
|
||||
{
|
||||
StorageIndex ncols = super[k + 1] - super[k];
|
||||
StorageIndex nrows = pi[k + 1] - pi[k];
|
||||
|
||||
Map<const Array<Scalar,1,Dynamic>, 0, InnerStride<> > sk(x + px[k], ncols, InnerStride<>(nrows+1));
|
||||
logDet += sk.real().log().sum();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Simplicial factorization stored as standard CSC matrix.
|
||||
StorageIndex *p = static_cast<StorageIndex*>(m_cholmodFactor->p);
|
||||
Index size = m_cholmodFactor->n;
|
||||
for (Index k=0; k<size; ++k)
|
||||
logDet += log(real( x[p[k]] ));
|
||||
}
|
||||
if (m_cholmodFactor->is_ll)
|
||||
logDet *= 2.0;
|
||||
return logDet;
|
||||
};
|
||||
|
||||
template<typename Stream>
|
||||
void dumpMemory(Stream& /*s*/)
|
||||
{}
|
||||
@@ -334,7 +402,7 @@ class CholmodBase : public SparseSolverBase<Derived>
|
||||
protected:
|
||||
mutable cholmod_common m_cholmod;
|
||||
cholmod_factor* m_cholmodFactor;
|
||||
RealScalar m_shiftOffset[2];
|
||||
double m_shiftOffset[2];
|
||||
mutable ComputationInfo m_info;
|
||||
int m_factorizationIsOk;
|
||||
int m_analysisIsOk;
|
||||
@@ -358,7 +426,9 @@ class CholmodBase : public SparseSolverBase<Derived>
|
||||
*
|
||||
* This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.
|
||||
*
|
||||
* \sa \ref TutorialSparseDirectSolvers, class CholmodSupernodalLLT, class SimplicialLLT
|
||||
* \warning Only double precision real and complex scalar types are supported by Cholmod.
|
||||
*
|
||||
* \sa \ref TutorialSparseSolverConcept, class CholmodSupernodalLLT, class SimplicialLLT
|
||||
*/
|
||||
template<typename _MatrixType, int _UpLo = Lower>
|
||||
class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLLT<_MatrixType, _UpLo> >
|
||||
@@ -407,7 +477,9 @@ class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimpl
|
||||
*
|
||||
* This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.
|
||||
*
|
||||
* \sa \ref TutorialSparseDirectSolvers, class CholmodSupernodalLLT, class SimplicialLDLT
|
||||
* \warning Only double precision real and complex scalar types are supported by Cholmod.
|
||||
*
|
||||
* \sa \ref TutorialSparseSolverConcept, class CholmodSupernodalLLT, class SimplicialLDLT
|
||||
*/
|
||||
template<typename _MatrixType, int _UpLo = Lower>
|
||||
class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLDLT<_MatrixType, _UpLo> >
|
||||
@@ -454,7 +526,9 @@ class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimp
|
||||
*
|
||||
* This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.
|
||||
*
|
||||
* \sa \ref TutorialSparseDirectSolvers
|
||||
* \warning Only double precision real and complex scalar types are supported by Cholmod.
|
||||
*
|
||||
* \sa \ref TutorialSparseSolverConcept
|
||||
*/
|
||||
template<typename _MatrixType, int _UpLo = Lower>
|
||||
class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSupernodalLLT<_MatrixType, _UpLo> >
|
||||
@@ -503,7 +577,9 @@ class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSuper
|
||||
*
|
||||
* This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.
|
||||
*
|
||||
* \sa \ref TutorialSparseDirectSolvers
|
||||
* \warning Only double precision real and complex scalar types are supported by Cholmod.
|
||||
*
|
||||
* \sa \ref TutorialSparseSolverConcept
|
||||
*/
|
||||
template<typename _MatrixType, int _UpLo = Lower>
|
||||
class CholmodDecomposition : public CholmodBase<_MatrixType, _UpLo, CholmodDecomposition<_MatrixType, _UpLo> >
|
||||
|
||||
@@ -12,7 +12,16 @@
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class Array
|
||||
namespace internal {
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
|
||||
struct traits<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > : traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
|
||||
{
|
||||
typedef ArrayXpr XprKind;
|
||||
typedef ArrayBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > XprBase;
|
||||
};
|
||||
}
|
||||
|
||||
/** \class Array
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief General-purpose arrays with easy API for coefficient-wise operations
|
||||
@@ -26,21 +35,12 @@ namespace Eigen {
|
||||
*
|
||||
* See documentation of class Matrix for detailed information on the template parameters
|
||||
* storage layout.
|
||||
*
|
||||
* This class can be extended with the help of the plugin mechanism described on the page
|
||||
* \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_ARRAY_PLUGIN.
|
||||
*
|
||||
* \sa \ref TutorialArrayClass, \ref TopicClassHierarchy
|
||||
* This class can be extended with the help of the plugin mechanism described on the page
|
||||
* \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_ARRAY_PLUGIN.
|
||||
*
|
||||
* \sa \blank \ref TutorialArrayClass, \ref TopicClassHierarchy
|
||||
*/
|
||||
namespace internal {
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
|
||||
struct traits<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > : traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
|
||||
{
|
||||
typedef ArrayXpr XprKind;
|
||||
typedef ArrayBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > XprBase;
|
||||
};
|
||||
}
|
||||
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
|
||||
class Array
|
||||
: public PlainObjectBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
|
||||
@@ -147,9 +147,9 @@ class Array
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN_HAVE_RVALUE_REFERENCES
|
||||
#if EIGEN_HAS_RVALUE_REFERENCES
|
||||
EIGEN_DEVICE_FUNC
|
||||
Array(Array&& other)
|
||||
Array(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_constructible<Scalar>::value)
|
||||
: Base(std::move(other))
|
||||
{
|
||||
Base::_check_template_params();
|
||||
@@ -157,7 +157,7 @@ class Array
|
||||
Base::_set_noalias(other);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
Array& operator=(Array&& other)
|
||||
Array& operator=(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable<Scalar>::value)
|
||||
{
|
||||
other.swap(*this);
|
||||
return *this;
|
||||
|
||||
@@ -32,7 +32,7 @@ template<typename ExpressionType> class MatrixWrapper;
|
||||
* \tparam Derived is the derived type, e.g., an array or an expression type.
|
||||
*
|
||||
* This class can be extended with the help of the plugin mechanism described on the page
|
||||
* \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_ARRAYBASE_PLUGIN.
|
||||
* \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_ARRAYBASE_PLUGIN.
|
||||
*
|
||||
* \sa class MatrixBase, \ref TopicClassHierarchy
|
||||
*/
|
||||
@@ -52,8 +52,6 @@ template<typename Derived> class ArrayBase
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
|
||||
typedef DenseBase<Derived> Base;
|
||||
using Base::operator*;
|
||||
using Base::operator/;
|
||||
using Base::RowsAtCompileTime;
|
||||
using Base::ColsAtCompileTime;
|
||||
using Base::SizeAtCompileTime;
|
||||
@@ -89,6 +87,7 @@ template<typename Derived> class ArrayBase
|
||||
#endif // not EIGEN_PARSED_BY_DOXYGEN
|
||||
|
||||
#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::ArrayBase
|
||||
#define EIGEN_DOC_UNARY_ADDONS(X,Y)
|
||||
# include "../plugins/CommonCwiseUnaryOps.h"
|
||||
# include "../plugins/MatrixCwiseUnaryOps.h"
|
||||
# include "../plugins/ArrayCwiseUnaryOps.h"
|
||||
@@ -99,11 +98,12 @@ template<typename Derived> class ArrayBase
|
||||
# include EIGEN_ARRAYBASE_PLUGIN
|
||||
# endif
|
||||
#undef EIGEN_CURRENT_STORAGE_BASE_CLASS
|
||||
#undef EIGEN_DOC_UNARY_ADDONS
|
||||
|
||||
/** Special case of the template operator=, in order to prevent the compiler
|
||||
* from generating a default operator= (issue hit with g++ 4.1)
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Derived& operator=(const ArrayBase& other)
|
||||
{
|
||||
internal::call_assignment(derived(), other.derived());
|
||||
@@ -112,28 +112,28 @@ template<typename Derived> class ArrayBase
|
||||
|
||||
/** Set all the entries to \a value.
|
||||
* \sa DenseBase::setConstant(), DenseBase::fill() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Derived& operator=(const Scalar &value)
|
||||
{ Base::setConstant(value); return derived(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Derived& operator+=(const Scalar& scalar);
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Derived& operator-=(const Scalar& scalar);
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Derived& operator+=(const ArrayBase<OtherDerived>& other);
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Derived& operator-=(const ArrayBase<OtherDerived>& other);
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Derived& operator*=(const ArrayBase<OtherDerived>& other);
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Derived& operator/=(const ArrayBase<OtherDerived>& other);
|
||||
|
||||
public:
|
||||
@@ -178,7 +178,7 @@ template<typename OtherDerived>
|
||||
EIGEN_STRONG_INLINE Derived &
|
||||
ArrayBase<Derived>::operator-=(const ArrayBase<OtherDerived> &other)
|
||||
{
|
||||
call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar>());
|
||||
call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
|
||||
return derived();
|
||||
}
|
||||
|
||||
@@ -191,7 +191,7 @@ template<typename OtherDerived>
|
||||
EIGEN_STRONG_INLINE Derived &
|
||||
ArrayBase<Derived>::operator+=(const ArrayBase<OtherDerived>& other)
|
||||
{
|
||||
call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar>());
|
||||
call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
|
||||
return derived();
|
||||
}
|
||||
|
||||
@@ -217,7 +217,7 @@ template<typename OtherDerived>
|
||||
EIGEN_STRONG_INLINE Derived &
|
||||
ArrayBase<Derived>::operator/=(const ArrayBase<OtherDerived>& other)
|
||||
{
|
||||
call_assignment(derived(), other.derived(), internal::div_assign_op<Scalar>());
|
||||
call_assignment(derived(), other.derived(), internal::div_assign_op<Scalar,typename OtherDerived::Scalar>());
|
||||
return derived();
|
||||
}
|
||||
|
||||
|
||||
@@ -52,7 +52,9 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
|
||||
const Scalar
|
||||
>::type ScalarWithConstIfNotLvalue;
|
||||
|
||||
typedef typename internal::ref_selector<ExpressionType>::type NestedExpressionType;
|
||||
typedef typename internal::ref_selector<ExpressionType>::non_const_type NestedExpressionType;
|
||||
|
||||
using Base::coeffRef;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {}
|
||||
@@ -67,68 +69,20 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
|
||||
inline Index innerStride() const { return m_expression.innerStride(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline ScalarWithConstIfNotLvalue* data() { return m_expression.const_cast_derived().data(); }
|
||||
inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar* data() const { return m_expression.data(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline CoeffReturnType coeff(Index rowId, Index colId) const
|
||||
{
|
||||
return m_expression.coeff(rowId, colId);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index rowId, Index colId)
|
||||
{
|
||||
return m_expression.const_cast_derived().coeffRef(rowId, colId);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index rowId, Index colId) const
|
||||
{
|
||||
return m_expression.const_cast_derived().coeffRef(rowId, colId);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
return m_expression.coeff(index);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index index)
|
||||
{
|
||||
return m_expression.const_cast_derived().coeffRef(index);
|
||||
return m_expression.coeffRef(rowId, colId);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index index) const
|
||||
{
|
||||
return m_expression.const_cast_derived().coeffRef(index);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline const PacketScalar packet(Index rowId, Index colId) const
|
||||
{
|
||||
return m_expression.template packet<LoadMode>(rowId, colId);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
|
||||
{
|
||||
m_expression.const_cast_derived().template writePacket<LoadMode>(rowId, colId, val);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline const PacketScalar packet(Index index) const
|
||||
{
|
||||
return m_expression.template packet<LoadMode>(index);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline void writePacket(Index index, const PacketScalar& val)
|
||||
{
|
||||
m_expression.const_cast_derived().template writePacket<LoadMode>(index, val);
|
||||
return m_expression.coeffRef(index);
|
||||
}
|
||||
|
||||
template<typename Dest>
|
||||
@@ -145,11 +99,11 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
|
||||
/** Forwards the resizing request to the nested expression
|
||||
* \sa DenseBase::resize(Index) */
|
||||
EIGEN_DEVICE_FUNC
|
||||
void resize(Index newSize) { m_expression.const_cast_derived().resize(newSize); }
|
||||
void resize(Index newSize) { m_expression.resize(newSize); }
|
||||
/** Forwards the resizing request to the nested expression
|
||||
* \sa DenseBase::resize(Index,Index)*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
void resize(Index rows, Index cols) { m_expression.const_cast_derived().resize(rows,cols); }
|
||||
void resize(Index rows, Index cols) { m_expression.resize(rows,cols); }
|
||||
|
||||
protected:
|
||||
NestedExpressionType m_expression;
|
||||
@@ -195,7 +149,9 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
|
||||
const Scalar
|
||||
>::type ScalarWithConstIfNotLvalue;
|
||||
|
||||
typedef typename internal::ref_selector<ExpressionType>::type NestedExpressionType;
|
||||
typedef typename internal::ref_selector<ExpressionType>::non_const_type NestedExpressionType;
|
||||
|
||||
using Base::coeffRef;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit inline MatrixWrapper(ExpressionType& matrix) : m_expression(matrix) {}
|
||||
@@ -210,68 +166,20 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
|
||||
inline Index innerStride() const { return m_expression.innerStride(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline ScalarWithConstIfNotLvalue* data() { return m_expression.const_cast_derived().data(); }
|
||||
inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar* data() const { return m_expression.data(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline CoeffReturnType coeff(Index rowId, Index colId) const
|
||||
{
|
||||
return m_expression.coeff(rowId, colId);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index rowId, Index colId)
|
||||
{
|
||||
return m_expression.const_cast_derived().coeffRef(rowId, colId);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index rowId, Index colId) const
|
||||
{
|
||||
return m_expression.derived().coeffRef(rowId, colId);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
return m_expression.coeff(index);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index index)
|
||||
{
|
||||
return m_expression.const_cast_derived().coeffRef(index);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index index) const
|
||||
{
|
||||
return m_expression.const_cast_derived().coeffRef(index);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline const PacketScalar packet(Index rowId, Index colId) const
|
||||
{
|
||||
return m_expression.template packet<LoadMode>(rowId, colId);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
|
||||
{
|
||||
m_expression.const_cast_derived().template writePacket<LoadMode>(rowId, colId, val);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline const PacketScalar packet(Index index) const
|
||||
{
|
||||
return m_expression.template packet<LoadMode>(index);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline void writePacket(Index index, const PacketScalar& val)
|
||||
{
|
||||
m_expression.const_cast_derived().template writePacket<LoadMode>(index, val);
|
||||
return m_expression.coeffRef(index);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
@@ -284,11 +192,11 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
|
||||
/** Forwards the resizing request to the nested expression
|
||||
* \sa DenseBase::resize(Index) */
|
||||
EIGEN_DEVICE_FUNC
|
||||
void resize(Index newSize) { m_expression.const_cast_derived().resize(newSize); }
|
||||
void resize(Index newSize) { m_expression.resize(newSize); }
|
||||
/** Forwards the resizing request to the nested expression
|
||||
* \sa DenseBase::resize(Index,Index)*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
void resize(Index rows, Index cols) { m_expression.const_cast_derived().resize(rows,cols); }
|
||||
void resize(Index rows, Index cols) { m_expression.resize(rows,cols); }
|
||||
|
||||
protected:
|
||||
NestedExpressionType m_expression;
|
||||
|
||||
298
Eigen/src/Core/AssignEvaluator.h
Executable file → Normal file
298
Eigen/src/Core/AssignEvaluator.h
Executable file → Normal file
@@ -29,13 +29,10 @@ struct copy_using_evaluator_traits
|
||||
{
|
||||
typedef typename DstEvaluator::XprType Dst;
|
||||
typedef typename Dst::Scalar DstScalar;
|
||||
// TODO distinguish between linear traversal and inner-traversals
|
||||
typedef typename find_best_packet<DstScalar,Dst::SizeAtCompileTime>::type PacketType;
|
||||
|
||||
enum {
|
||||
DstFlags = DstEvaluator::Flags,
|
||||
SrcFlags = SrcEvaluator::Flags,
|
||||
RequiredAlignment = unpacket_traits<PacketType>::alignment
|
||||
SrcFlags = SrcEvaluator::Flags
|
||||
};
|
||||
|
||||
public:
|
||||
@@ -55,36 +52,53 @@ private:
|
||||
: int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
|
||||
: int(Dst::MaxRowsAtCompileTime),
|
||||
OuterStride = int(outer_stride_at_compile_time<Dst>::ret),
|
||||
MaxSizeAtCompileTime = Dst::SizeAtCompileTime,
|
||||
PacketSize = unpacket_traits<PacketType>::size
|
||||
MaxSizeAtCompileTime = Dst::SizeAtCompileTime
|
||||
};
|
||||
|
||||
// TODO distinguish between linear traversal and inner-traversals
|
||||
typedef typename find_best_packet<DstScalar,Dst::SizeAtCompileTime>::type LinearPacketType;
|
||||
typedef typename find_best_packet<DstScalar,InnerSize>::type InnerPacketType;
|
||||
|
||||
enum {
|
||||
DstIsRowMajor = DstFlags&RowMajorBit,
|
||||
SrcIsRowMajor = SrcFlags&RowMajorBit,
|
||||
StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)),
|
||||
MightVectorize = StorageOrdersAgree
|
||||
&& (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit)
|
||||
&& (functor_traits<AssignFunc>::PacketAccess),
|
||||
MayInnerVectorize = MightVectorize
|
||||
&& int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0
|
||||
&& int(OuterStride)!=Dynamic && int(OuterStride)%int(PacketSize)==0
|
||||
&& int(JointAlignment)>=int(RequiredAlignment),
|
||||
MayLinearize = StorageOrdersAgree && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
|
||||
MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess
|
||||
&& ((int(DstAlignment)>=int(RequiredAlignment)) || MaxSizeAtCompileTime == Dynamic),
|
||||
/* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
|
||||
so it's only good for large enough sizes. */
|
||||
MaySliceVectorize = MightVectorize && DstHasDirectAccess
|
||||
&& (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=3*PacketSize)
|
||||
/* slice vectorization can be slow, so we only want it if the slices are big, which is
|
||||
indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
|
||||
in a fixed-size matrix */
|
||||
LinearPacketSize = unpacket_traits<LinearPacketType>::size,
|
||||
InnerPacketSize = unpacket_traits<InnerPacketType>::size
|
||||
};
|
||||
|
||||
public:
|
||||
enum {
|
||||
Traversal = int(MayInnerVectorize) ? int(InnerVectorizedTraversal)
|
||||
LinearRequiredAlignment = unpacket_traits<LinearPacketType>::alignment,
|
||||
InnerRequiredAlignment = unpacket_traits<InnerPacketType>::alignment
|
||||
};
|
||||
|
||||
private:
|
||||
enum {
|
||||
DstIsRowMajor = DstFlags&RowMajorBit,
|
||||
SrcIsRowMajor = SrcFlags&RowMajorBit,
|
||||
StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)),
|
||||
MightVectorize = bool(StorageOrdersAgree)
|
||||
&& (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit)
|
||||
&& bool(functor_traits<AssignFunc>::PacketAccess),
|
||||
MayInnerVectorize = MightVectorize
|
||||
&& int(InnerSize)!=Dynamic && int(InnerSize)%int(InnerPacketSize)==0
|
||||
&& int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0
|
||||
&& (EIGEN_UNALIGNED_VECTORIZE || int(JointAlignment)>=int(InnerRequiredAlignment)),
|
||||
MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
|
||||
MayLinearVectorize = bool(MightVectorize) && MayLinearize && DstHasDirectAccess
|
||||
&& (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic),
|
||||
/* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
|
||||
so it's only good for large enough sizes. */
|
||||
MaySliceVectorize = bool(MightVectorize) && bool(DstHasDirectAccess)
|
||||
&& (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=(EIGEN_UNALIGNED_VECTORIZE?InnerPacketSize:(3*InnerPacketSize)))
|
||||
/* slice vectorization can be slow, so we only want it if the slices are big, which is
|
||||
indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
|
||||
in a fixed-size matrix
|
||||
However, with EIGEN_UNALIGNED_VECTORIZE and unrolling, slice vectorization is still worth it */
|
||||
};
|
||||
|
||||
public:
|
||||
enum {
|
||||
Traversal = int(MayLinearVectorize) && (LinearPacketSize>InnerPacketSize) ? int(LinearVectorizedTraversal)
|
||||
: int(MayInnerVectorize) ? int(InnerVectorizedTraversal)
|
||||
: int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
|
||||
: int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
|
||||
: int(MayLinearize) ? int(LinearTraversal)
|
||||
@@ -94,13 +108,18 @@ public:
|
||||
|| int(Traversal) == SliceVectorizedTraversal
|
||||
};
|
||||
|
||||
typedef typename conditional<int(Traversal)==LinearVectorizedTraversal, LinearPacketType, InnerPacketType>::type PacketType;
|
||||
|
||||
private:
|
||||
enum {
|
||||
UnrollingLimit = EIGEN_UNROLLING_LIMIT * (Vectorized ? int(PacketSize) : 1),
|
||||
ActualPacketSize = int(Traversal)==LinearVectorizedTraversal ? LinearPacketSize
|
||||
: Vectorized ? InnerPacketSize
|
||||
: 1,
|
||||
UnrollingLimit = EIGEN_UNROLLING_LIMIT * ActualPacketSize,
|
||||
MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic
|
||||
&& int(Dst::SizeAtCompileTime) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit),
|
||||
&& int(Dst::SizeAtCompileTime) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit),
|
||||
MayUnrollInner = int(InnerSize) != Dynamic
|
||||
&& int(InnerSize) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit)
|
||||
&& int(InnerSize) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit)
|
||||
};
|
||||
|
||||
public:
|
||||
@@ -112,11 +131,17 @@ public:
|
||||
: int(NoUnrolling)
|
||||
)
|
||||
: int(Traversal) == int(LinearVectorizedTraversal)
|
||||
? ( bool(MayUnrollCompletely) && (int(DstAlignment)>=int(RequiredAlignment)) ? int(CompleteUnrolling)
|
||||
: int(NoUnrolling) )
|
||||
? ( bool(MayUnrollCompletely) && ( EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)))
|
||||
? int(CompleteUnrolling)
|
||||
: int(NoUnrolling) )
|
||||
: int(Traversal) == int(LinearTraversal)
|
||||
? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling)
|
||||
: int(NoUnrolling) )
|
||||
#if EIGEN_UNALIGNED_VECTORIZE
|
||||
: int(Traversal) == int(SliceVectorizedTraversal)
|
||||
? ( bool(MayUnrollInner) ? int(InnerUnrolling)
|
||||
: int(NoUnrolling) )
|
||||
#endif
|
||||
: int(NoUnrolling)
|
||||
};
|
||||
|
||||
@@ -131,11 +156,14 @@ public:
|
||||
std::cerr.unsetf(std::ios::hex);
|
||||
EIGEN_DEBUG_VAR(DstAlignment)
|
||||
EIGEN_DEBUG_VAR(SrcAlignment)
|
||||
EIGEN_DEBUG_VAR(RequiredAlignment)
|
||||
EIGEN_DEBUG_VAR(LinearRequiredAlignment)
|
||||
EIGEN_DEBUG_VAR(InnerRequiredAlignment)
|
||||
EIGEN_DEBUG_VAR(JointAlignment)
|
||||
EIGEN_DEBUG_VAR(InnerSize)
|
||||
EIGEN_DEBUG_VAR(InnerMaxSize)
|
||||
EIGEN_DEBUG_VAR(PacketSize)
|
||||
EIGEN_DEBUG_VAR(LinearPacketSize)
|
||||
EIGEN_DEBUG_VAR(InnerPacketSize)
|
||||
EIGEN_DEBUG_VAR(ActualPacketSize)
|
||||
EIGEN_DEBUG_VAR(StorageOrdersAgree)
|
||||
EIGEN_DEBUG_VAR(MightVectorize)
|
||||
EIGEN_DEBUG_VAR(MayLinearize)
|
||||
@@ -143,6 +171,7 @@ public:
|
||||
EIGEN_DEBUG_VAR(MayLinearVectorize)
|
||||
EIGEN_DEBUG_VAR(MaySliceVectorize)
|
||||
std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl;
|
||||
EIGEN_DEBUG_VAR(SrcEvaluator::CoeffReadCost)
|
||||
EIGEN_DEBUG_VAR(UnrollingLimit)
|
||||
EIGEN_DEBUG_VAR(MayUnrollCompletely)
|
||||
EIGEN_DEBUG_VAR(MayUnrollInner)
|
||||
@@ -236,12 +265,13 @@ struct copy_using_evaluator_innervec_CompleteUnrolling
|
||||
enum {
|
||||
outer = Index / DstXprType::InnerSizeAtCompileTime,
|
||||
inner = Index % DstXprType::InnerSizeAtCompileTime,
|
||||
JointAlignment = Kernel::AssignmentTraits::JointAlignment
|
||||
SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
|
||||
DstAlignment = Kernel::AssignmentTraits::DstAlignment
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
{
|
||||
kernel.template assignPacketByOuterInner<Aligned, JointAlignment, PacketType>(outer, inner);
|
||||
kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
|
||||
enum { NextIndex = Index + unpacket_traits<PacketType>::size };
|
||||
copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
|
||||
}
|
||||
@@ -253,20 +283,20 @@ struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop>
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
|
||||
};
|
||||
|
||||
template<typename Kernel, int Index_, int Stop>
|
||||
template<typename Kernel, int Index_, int Stop, int SrcAlignment, int DstAlignment>
|
||||
struct copy_using_evaluator_innervec_InnerUnrolling
|
||||
{
|
||||
typedef typename Kernel::PacketType PacketType;
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
|
||||
{
|
||||
kernel.template assignPacketByOuterInner<Aligned, Aligned, PacketType>(outer, Index_);
|
||||
kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, Index_);
|
||||
enum { NextIndex = Index_ + unpacket_traits<PacketType>::size };
|
||||
copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop>::run(kernel, outer);
|
||||
copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop, SrcAlignment, DstAlignment>::run(kernel, outer);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Kernel, int Stop>
|
||||
struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop>
|
||||
template<typename Kernel, int Stop, int SrcAlignment, int DstAlignment>
|
||||
struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop, SrcAlignment, DstAlignment>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &, Index) { }
|
||||
};
|
||||
@@ -370,14 +400,14 @@ struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling>
|
||||
typedef typename Kernel::Scalar Scalar;
|
||||
typedef typename Kernel::PacketType PacketType;
|
||||
enum {
|
||||
requestedAlignment = Kernel::AssignmentTraits::RequiredAlignment,
|
||||
requestedAlignment = Kernel::AssignmentTraits::LinearRequiredAlignment,
|
||||
packetSize = unpacket_traits<PacketType>::size,
|
||||
dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
|
||||
dstAlignment = packet_traits<Scalar>::AlignedOnScalar ? int(requestedAlignment)
|
||||
: int(Kernel::AssignmentTraits::DstAlignment),
|
||||
srcAlignment = Kernel::AssignmentTraits::JointAlignment
|
||||
};
|
||||
const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned<requestedAlignment>(&kernel.dstEvaluator().coeffRef(0), size);
|
||||
const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned<requestedAlignment>(kernel.dstDataPtr(), size);
|
||||
const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
|
||||
|
||||
unaligned_dense_assignment_loop<dstIsAligned!=0>::run(kernel, 0, alignedStart);
|
||||
@@ -395,9 +425,10 @@ struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrollin
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
{
|
||||
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
|
||||
typedef typename Kernel::PacketType PacketType;
|
||||
|
||||
enum { size = DstXprType::SizeAtCompileTime,
|
||||
packetSize = packet_traits<typename Kernel::Scalar>::size,
|
||||
packetSize =unpacket_traits<PacketType>::size,
|
||||
alignedSize = (size/packetSize)*packetSize };
|
||||
|
||||
copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, alignedSize>::run(kernel);
|
||||
@@ -413,6 +444,10 @@ template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
|
||||
{
|
||||
typedef typename Kernel::PacketType PacketType;
|
||||
enum {
|
||||
SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
|
||||
DstAlignment = Kernel::AssignmentTraits::DstAlignment
|
||||
};
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
{
|
||||
const Index innerSize = kernel.innerSize();
|
||||
@@ -420,7 +455,7 @@ struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
|
||||
const Index packetSize = unpacket_traits<PacketType>::size;
|
||||
for(Index outer = 0; outer < outerSize; ++outer)
|
||||
for(Index inner = 0; inner < innerSize; inner+=packetSize)
|
||||
kernel.template assignPacketByOuterInner<Aligned, Aligned, PacketType>(outer, inner);
|
||||
kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -440,9 +475,11 @@ struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling>
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
{
|
||||
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
|
||||
typedef typename Kernel::AssignmentTraits Traits;
|
||||
const Index outerSize = kernel.outerSize();
|
||||
for(Index outer = 0; outer < outerSize; ++outer)
|
||||
copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel, outer);
|
||||
copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime,
|
||||
Traits::SrcAlignment, Traits::DstAlignment>::run(kernel, outer);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -484,14 +521,14 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
|
||||
typedef typename Kernel::PacketType PacketType;
|
||||
enum {
|
||||
packetSize = unpacket_traits<PacketType>::size,
|
||||
requestedAlignment = int(Kernel::AssignmentTraits::RequiredAlignment),
|
||||
requestedAlignment = int(Kernel::AssignmentTraits::InnerRequiredAlignment),
|
||||
alignable = packet_traits<Scalar>::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment)>=sizeof(Scalar),
|
||||
dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
|
||||
dstAlignment = alignable ? int(requestedAlignment)
|
||||
: int(Kernel::AssignmentTraits::DstAlignment)
|
||||
};
|
||||
const Scalar *dst_ptr = &kernel.dstEvaluator().coeffRef(0,0);
|
||||
if((!bool(dstIsAligned)) && (size_t(dst_ptr) % sizeof(Scalar))>0)
|
||||
const Scalar *dst_ptr = kernel.dstDataPtr();
|
||||
if((!bool(dstIsAligned)) && (UIntPtr(dst_ptr) % sizeof(Scalar))>0)
|
||||
{
|
||||
// the pointer is not aligend-on scalar, so alignment is not possible
|
||||
return dense_assignment_loop<Kernel,DefaultTraversal,NoUnrolling>::run(kernel);
|
||||
@@ -517,11 +554,34 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
|
||||
for(Index inner = alignedEnd; inner<innerSize ; ++inner)
|
||||
kernel.assignCoeffByOuterInner(outer, inner);
|
||||
|
||||
alignedStart = std::min<Index>((alignedStart+alignedStep)%packetSize, innerSize);
|
||||
alignedStart = numext::mini((alignedStart+alignedStep)%packetSize, innerSize);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#if EIGEN_UNALIGNED_VECTORIZE
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, InnerUnrolling>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel)
|
||||
{
|
||||
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
|
||||
typedef typename Kernel::PacketType PacketType;
|
||||
|
||||
enum { size = DstXprType::InnerSizeAtCompileTime,
|
||||
packetSize =unpacket_traits<PacketType>::size,
|
||||
vectorizableSize = (size/packetSize)*packetSize };
|
||||
|
||||
for(Index outer = 0; outer < kernel.outerSize(); ++outer)
|
||||
{
|
||||
copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, vectorizableSize, 0, 0>::run(kernel, outer);
|
||||
copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, vectorizableSize, size>::run(kernel, outer);
|
||||
}
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
* Part 4 : Generic dense assignment kernel
|
||||
***************************************************************************/
|
||||
@@ -623,6 +683,11 @@ public:
|
||||
: int(DstEvaluatorType::Flags)&RowMajorBit ? inner
|
||||
: outer;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC const Scalar* dstDataPtr() const
|
||||
{
|
||||
return m_dstExpr.data();
|
||||
}
|
||||
|
||||
protected:
|
||||
DstEvaluatorType& m_dst;
|
||||
@@ -637,26 +702,32 @@ protected:
|
||||
***************************************************************************/
|
||||
|
||||
template<typename DstXprType, typename SrcXprType, typename Functor>
|
||||
EIGEN_DEVICE_FUNC void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor &func)
|
||||
{
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||
|
||||
typedef evaluator<DstXprType> DstEvaluatorType;
|
||||
typedef evaluator<SrcXprType> SrcEvaluatorType;
|
||||
|
||||
DstEvaluatorType dstEvaluator(dst);
|
||||
SrcEvaluatorType srcEvaluator(src);
|
||||
|
||||
// NOTE To properly handle A = (A*A.transpose())/s with A rectangular,
|
||||
// we need to resize the destination after the source evaluator has been created.
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
|
||||
DstEvaluatorType dstEvaluator(dst);
|
||||
|
||||
typedef generic_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;
|
||||
Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
|
||||
|
||||
|
||||
dense_assignment_loop<Kernel>::run(kernel);
|
||||
}
|
||||
|
||||
template<typename DstXprType, typename SrcXprType>
|
||||
EIGEN_DEVICE_FUNC void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src)
|
||||
{
|
||||
call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar>());
|
||||
call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar>());
|
||||
}
|
||||
|
||||
/***************************************************************************
|
||||
@@ -678,51 +749,57 @@ template<> struct AssignmentKind<DenseShape,DenseShape> { typedef Dense2Dense Ki
|
||||
// This is the main assignment class
|
||||
template< typename DstXprType, typename SrcXprType, typename Functor,
|
||||
typename Kind = typename AssignmentKind< typename evaluator_traits<DstXprType>::Shape , typename evaluator_traits<SrcXprType>::Shape >::Kind,
|
||||
typename Scalar = typename DstXprType::Scalar>
|
||||
typename EnableIf = void>
|
||||
struct Assignment;
|
||||
|
||||
|
||||
// The only purpose of this call_assignment() function is to deal with noalias() / AssumeAliasing and automatic transposition.
|
||||
// Indeed, I (Gael) think that this concept of AssumeAliasing was a mistake, and it makes thing quite complicated.
|
||||
// So this intermediate function removes everything related to AssumeAliasing such that Assignment
|
||||
// The only purpose of this call_assignment() function is to deal with noalias() / "assume-aliasing" and automatic transposition.
|
||||
// Indeed, I (Gael) think that this concept of "assume-aliasing" was a mistake, and it makes thing quite complicated.
|
||||
// So this intermediate function removes everything related to "assume-aliasing" such that Assignment
|
||||
// does not has to bother about these annoying details.
|
||||
|
||||
template<typename Dst, typename Src>
|
||||
EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
void call_assignment(Dst& dst, const Src& src)
|
||||
{
|
||||
call_assignment(dst, src, internal::assign_op<typename Dst::Scalar>());
|
||||
call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
|
||||
}
|
||||
template<typename Dst, typename Src>
|
||||
EIGEN_DEVICE_FUNC void call_assignment(const Dst& dst, const Src& src)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
void call_assignment(const Dst& dst, const Src& src)
|
||||
{
|
||||
call_assignment(dst, src, internal::assign_op<typename Dst::Scalar>());
|
||||
call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
|
||||
}
|
||||
|
||||
// Deal with AssumeAliasing
|
||||
// Deal with "assume-aliasing"
|
||||
template<typename Dst, typename Src, typename Func>
|
||||
EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<evaluator_traits<Src>::AssumeAliasing==1, void*>::type = 0)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if< evaluator_assume_aliasing<Src>::value, void*>::type = 0)
|
||||
{
|
||||
typename plain_matrix_type<Src>::type tmp(src);
|
||||
call_assignment_no_alias(dst, tmp, func);
|
||||
}
|
||||
|
||||
template<typename Dst, typename Src, typename Func>
|
||||
EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<evaluator_traits<Src>::AssumeAliasing==0, void*>::type = 0)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<!evaluator_assume_aliasing<Src>::value, void*>::type = 0)
|
||||
{
|
||||
call_assignment_no_alias(dst, src, func);
|
||||
}
|
||||
|
||||
// by-pass AssumeAliasing
|
||||
// by-pass "assume-aliasing"
|
||||
// When there is no aliasing, we require that 'dst' has been properly resized
|
||||
template<typename Dst, template <typename> class StorageBase, typename Src, typename Func>
|
||||
EIGEN_DEVICE_FUNC void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func)
|
||||
{
|
||||
call_assignment_no_alias(dst.expression(), src, func);
|
||||
}
|
||||
|
||||
|
||||
template<typename Dst, typename Src, typename Func>
|
||||
EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
|
||||
{
|
||||
enum {
|
||||
NeedToTranspose = ( (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1)
|
||||
@@ -730,11 +807,6 @@ EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src, const
|
||||
) && int(Dst::SizeAtCompileTime) != 1
|
||||
};
|
||||
|
||||
Index dstRows = NeedToTranspose ? src.cols() : src.rows();
|
||||
Index dstCols = NeedToTranspose ? src.rows() : src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
|
||||
typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst>::type ActualDstTypeCleaned;
|
||||
typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst&>::type ActualDstType;
|
||||
ActualDstType actualDst(dst);
|
||||
@@ -747,42 +819,42 @@ EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src, const
|
||||
Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func);
|
||||
}
|
||||
template<typename Dst, typename Src>
|
||||
EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
void call_assignment_no_alias(Dst& dst, const Src& src)
|
||||
{
|
||||
call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar>());
|
||||
call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
|
||||
}
|
||||
|
||||
template<typename Dst, typename Src, typename Func>
|
||||
EIGEN_DEVICE_FUNC void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func)
|
||||
{
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
|
||||
// TODO check whether this is the right place to perform these checks:
|
||||
EIGEN_STATIC_ASSERT_LVALUE(Dst)
|
||||
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst,Src)
|
||||
|
||||
EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename Dst::Scalar,typename Src::Scalar);
|
||||
|
||||
Assignment<Dst,Src,Func>::run(dst, src, func);
|
||||
}
|
||||
template<typename Dst, typename Src>
|
||||
EIGEN_DEVICE_FUNC void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src)
|
||||
{
|
||||
call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar>());
|
||||
call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
|
||||
}
|
||||
|
||||
// forward declaration
|
||||
template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, const Src &src);
|
||||
|
||||
// Generic Dense to Dense assignment
|
||||
template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
|
||||
struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Scalar>
|
||||
// Note that the last template argument "Weak" is needed to make it possible to perform
|
||||
// both partial specialization+SFINAE without ambiguous specialization
|
||||
template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
|
||||
struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Weak>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
|
||||
{
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||
|
||||
#ifndef EIGEN_NO_DEBUG
|
||||
internal::check_for_aliasing(dst, src);
|
||||
#endif
|
||||
@@ -793,14 +865,50 @@ struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Scalar>
|
||||
|
||||
// Generic assignment through evalTo.
|
||||
// TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism.
|
||||
template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
|
||||
struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Scalar>
|
||||
// Note that the last template argument "Weak" is needed to make it possible to perform
|
||||
// both partial specialization+SFINAE without ambiguous specialization
|
||||
template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
|
||||
struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Weak>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/)
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
|
||||
{
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||
src.evalTo(dst);
|
||||
}
|
||||
|
||||
// NOTE The following two functions are templated to avoid their instanciation if not needed
|
||||
// This is needed because some expressions supports evalTo only and/or have 'void' as scalar type.
|
||||
template<typename SrcScalarType>
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar,SrcScalarType> &/*func*/)
|
||||
{
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||
src.addTo(dst);
|
||||
}
|
||||
|
||||
template<typename SrcScalarType>
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar,SrcScalarType> &/*func*/)
|
||||
{
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||
src.subTo(dst);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
|
||||
@@ -81,10 +81,10 @@ class vml_assign_traits
|
||||
|
||||
#define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \
|
||||
template< typename DstXprType, typename SrcXprNested> \
|
||||
struct Assignment<DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested>, assign_op<EIGENTYPE>, \
|
||||
Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml,EIGENTYPE>::type> { \
|
||||
struct Assignment<DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested>, assign_op<EIGENTYPE,EIGENTYPE>, \
|
||||
Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> { \
|
||||
typedef CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested> SrcXprType; \
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE> &/*func*/) { \
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &/*func*/) { \
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \
|
||||
if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) { \
|
||||
VMLOP(dst.size(), (const VMLTYPE*)src.nestedExpression().data(), \
|
||||
@@ -138,22 +138,24 @@ EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(floor, Floor, _)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(ceil, Ceil, _)
|
||||
|
||||
#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \
|
||||
template< typename DstXprType, typename SrcXprNested> \
|
||||
struct Assignment<DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested>, assign_op<EIGENTYPE>, \
|
||||
Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml,EIGENTYPE>::type> { \
|
||||
typedef CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested> SrcXprType; \
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE> &/*func*/) { \
|
||||
template< typename DstXprType, typename SrcXprNested, typename Plain> \
|
||||
struct Assignment<DstXprType, CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE,EIGENTYPE>, SrcXprNested, \
|
||||
const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>,Plain> >, assign_op<EIGENTYPE,EIGENTYPE>, \
|
||||
Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> { \
|
||||
typedef CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE,EIGENTYPE>, SrcXprNested, \
|
||||
const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>,Plain> > SrcXprType; \
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &/*func*/) { \
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \
|
||||
VMLTYPE exponent = reinterpret_cast<const VMLTYPE&>(src.functor().m_exponent); \
|
||||
VMLTYPE exponent = reinterpret_cast<const VMLTYPE&>(src.rhs().functor().m_other); \
|
||||
if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) \
|
||||
{ \
|
||||
VMLOP( dst.size(), (const VMLTYPE*)src.nestedExpression().data(), exponent, \
|
||||
VMLOP( dst.size(), (const VMLTYPE*)src.lhs().data(), exponent, \
|
||||
(VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE) ); \
|
||||
} else { \
|
||||
const Index outerSize = dst.outerSize(); \
|
||||
for(Index outer = 0; outer < outerSize; ++outer) { \
|
||||
const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) : \
|
||||
&(src.nestedExpression().coeffRef(0, outer)); \
|
||||
const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.lhs().coeffRef(outer,0)) : \
|
||||
&(src.lhs().coeffRef(0, outer)); \
|
||||
EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); \
|
||||
VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr, exponent, \
|
||||
(VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE)); \
|
||||
|
||||
@@ -161,15 +161,15 @@ class BandMatrixBase : public EigenBase<Derived>
|
||||
*
|
||||
* \brief Represents a rectangular matrix with a banded storage
|
||||
*
|
||||
* \param _Scalar Numeric type, i.e. float, double, int
|
||||
* \param Rows Number of rows, or \b Dynamic
|
||||
* \param Cols Number of columns, or \b Dynamic
|
||||
* \param Supers Number of super diagonal
|
||||
* \param Subs Number of sub diagonal
|
||||
* \param _Options A combination of either \b #RowMajor or \b #ColMajor, and of \b #SelfAdjoint
|
||||
* The former controls \ref TopicStorageOrders "storage order", and defaults to
|
||||
* column-major. The latter controls whether the matrix represents a selfadjoint
|
||||
* matrix in which case either Supers of Subs have to be null.
|
||||
* \tparam _Scalar Numeric type, i.e. float, double, int
|
||||
* \tparam _Rows Number of rows, or \b Dynamic
|
||||
* \tparam _Cols Number of columns, or \b Dynamic
|
||||
* \tparam _Supers Number of super diagonal
|
||||
* \tparam _Subs Number of sub diagonal
|
||||
* \tparam _Options A combination of either \b #RowMajor or \b #ColMajor, and of \b #SelfAdjoint
|
||||
* The former controls \ref TopicStorageOrders "storage order", and defaults to
|
||||
* column-major. The latter controls whether the matrix represents a selfadjoint
|
||||
* matrix in which case either Supers of Subs have to be null.
|
||||
*
|
||||
* \sa class TridiagonalMatrix
|
||||
*/
|
||||
@@ -302,9 +302,9 @@ class BandMatrixWrapper : public BandMatrixBase<BandMatrixWrapper<_CoefficientsT
|
||||
*
|
||||
* \brief Represents a tridiagonal matrix with a compact banded storage
|
||||
*
|
||||
* \param _Scalar Numeric type, i.e. float, double, int
|
||||
* \param Size Number of rows and cols, or \b Dynamic
|
||||
* \param _Options Can be 0 or \b SelfAdjoint
|
||||
* \tparam Scalar Numeric type, i.e. float, double, int
|
||||
* \tparam Size Number of rows and cols, or \b Dynamic
|
||||
* \tparam Options Can be 0 or \b SelfAdjoint
|
||||
*
|
||||
* \sa class BandMatrix
|
||||
*/
|
||||
|
||||
@@ -13,41 +13,6 @@
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class Block
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of a fixed-size or dynamic-size block
|
||||
*
|
||||
* \param XprType the type of the expression in which we are taking a block
|
||||
* \param BlockRows the number of rows of the block we are taking at compile time (optional)
|
||||
* \param BlockCols the number of columns of the block we are taking at compile time (optional)
|
||||
* \param InnerPanel is true, if the block maps to a set of rows of a row major matrix or
|
||||
* to set of columns of a column major matrix (optional). The parameter allows to determine
|
||||
* at compile time whether aligned access is possible on the block expression.
|
||||
*
|
||||
* This class represents an expression of either a fixed-size or dynamic-size block. It is the return
|
||||
* type of DenseBase::block(Index,Index,Index,Index) and DenseBase::block<int,int>(Index,Index) and
|
||||
* most of the time this is the only way it is used.
|
||||
*
|
||||
* However, if you want to directly maniputate block expressions,
|
||||
* for instance if you want to write a function returning such an expression, you
|
||||
* will need to use this class.
|
||||
*
|
||||
* Here is an example illustrating the dynamic case:
|
||||
* \include class_Block.cpp
|
||||
* Output: \verbinclude class_Block.out
|
||||
*
|
||||
* \note Even though this expression has dynamic size, in the case where \a XprType
|
||||
* has fixed size, this expression inherits a fixed maximal size which means that evaluating
|
||||
* it does not cause a dynamic memory allocation.
|
||||
*
|
||||
* Here is an example illustrating the fixed-size case:
|
||||
* \include class_FixedBlock.cpp
|
||||
* Output: \verbinclude class_FixedBlock.out
|
||||
*
|
||||
* \sa DenseBase::block(Index,Index,Index,Index), DenseBase::block(Index,Index), class VectorBlock
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
|
||||
struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprType>
|
||||
@@ -101,6 +66,40 @@ template<typename XprType, int BlockRows=Dynamic, int BlockCols=Dynamic, bool In
|
||||
|
||||
template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, typename StorageKind> class BlockImpl;
|
||||
|
||||
/** \class Block
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of a fixed-size or dynamic-size block
|
||||
*
|
||||
* \tparam XprType the type of the expression in which we are taking a block
|
||||
* \tparam BlockRows the number of rows of the block we are taking at compile time (optional)
|
||||
* \tparam BlockCols the number of columns of the block we are taking at compile time (optional)
|
||||
* \tparam InnerPanel is true, if the block maps to a set of rows of a row major matrix or
|
||||
* to set of columns of a column major matrix (optional). The parameter allows to determine
|
||||
* at compile time whether aligned access is possible on the block expression.
|
||||
*
|
||||
* This class represents an expression of either a fixed-size or dynamic-size block. It is the return
|
||||
* type of DenseBase::block(Index,Index,Index,Index) and DenseBase::block<int,int>(Index,Index) and
|
||||
* most of the time this is the only way it is used.
|
||||
*
|
||||
* However, if you want to directly maniputate block expressions,
|
||||
* for instance if you want to write a function returning such an expression, you
|
||||
* will need to use this class.
|
||||
*
|
||||
* Here is an example illustrating the dynamic case:
|
||||
* \include class_Block.cpp
|
||||
* Output: \verbinclude class_Block.out
|
||||
*
|
||||
* \note Even though this expression has dynamic size, in the case where \a XprType
|
||||
* has fixed size, this expression inherits a fixed maximal size which means that evaluating
|
||||
* it does not cause a dynamic memory allocation.
|
||||
*
|
||||
* Here is an example illustrating the fixed-size case:
|
||||
* \include class_FixedBlock.cpp
|
||||
* Output: \verbinclude class_FixedBlock.out
|
||||
*
|
||||
* \sa DenseBase::block(Index,Index,Index,Index), DenseBase::block(Index,Index), class VectorBlock
|
||||
*/
|
||||
template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class Block
|
||||
: public BlockImpl<XprType, BlockRows, BlockCols, InnerPanel, typename internal::traits<XprType>::StorageKind>
|
||||
{
|
||||
@@ -130,8 +129,8 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class
|
||||
: Impl(xpr, startRow, startCol)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic,THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE)
|
||||
eigen_assert(startRow >= 0 && BlockRows >= 1 && startRow + BlockRows <= xpr.rows()
|
||||
&& startCol >= 0 && BlockCols >= 1 && startCol + BlockCols <= xpr.cols());
|
||||
eigen_assert(startRow >= 0 && BlockRows >= 0 && startRow + BlockRows <= xpr.rows()
|
||||
&& startCol >= 0 && BlockCols >= 0 && startCol + BlockCols <= xpr.cols());
|
||||
}
|
||||
|
||||
/** Dynamic-size constructor
|
||||
@@ -174,6 +173,7 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
|
||||
: public internal::dense_xpr_base<Block<XprType, BlockRows, BlockCols, InnerPanel> >::type
|
||||
{
|
||||
typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType;
|
||||
typedef typename internal::ref_selector<XprType>::non_const_type XprTypeNested;
|
||||
public:
|
||||
|
||||
typedef typename internal::dense_xpr_base<BlockType>::type Base;
|
||||
@@ -222,15 +222,13 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
|
||||
inline Scalar& coeffRef(Index rowId, Index colId)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_LVALUE(XprType)
|
||||
return m_xpr.const_cast_derived()
|
||||
.coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
|
||||
return m_xpr.coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index rowId, Index colId) const
|
||||
{
|
||||
return m_xpr.derived()
|
||||
.coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
|
||||
return m_xpr.derived().coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
@@ -243,39 +241,34 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
|
||||
inline Scalar& coeffRef(Index index)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_LVALUE(XprType)
|
||||
return m_xpr.const_cast_derived()
|
||||
.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
|
||||
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
|
||||
return m_xpr.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
|
||||
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index index) const
|
||||
{
|
||||
return m_xpr.const_cast_derived()
|
||||
.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
|
||||
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
|
||||
return m_xpr.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
|
||||
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
return m_xpr
|
||||
.coeff(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
|
||||
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
|
||||
return m_xpr.coeff(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
|
||||
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline PacketScalar packet(Index rowId, Index colId) const
|
||||
{
|
||||
return m_xpr.template packet<Unaligned>
|
||||
(rowId + m_startRow.value(), colId + m_startCol.value());
|
||||
return m_xpr.template packet<Unaligned>(rowId + m_startRow.value(), colId + m_startCol.value());
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
|
||||
{
|
||||
m_xpr.const_cast_derived().template writePacket<Unaligned>
|
||||
(rowId + m_startRow.value(), colId + m_startCol.value(), val);
|
||||
m_xpr.template writePacket<Unaligned>(rowId + m_startRow.value(), colId + m_startCol.value(), val);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
@@ -289,7 +282,7 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
|
||||
template<int LoadMode>
|
||||
inline void writePacket(Index index, const PacketScalar& val)
|
||||
{
|
||||
m_xpr.const_cast_derived().template writePacket<Unaligned>
|
||||
m_xpr.template writePacket<Unaligned>
|
||||
(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
|
||||
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0), val);
|
||||
}
|
||||
@@ -302,10 +295,13 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
|
||||
#endif
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename XprType::Nested>::type& nestedExpression() const
|
||||
const typename internal::remove_all<XprTypeNested>::type& nestedExpression() const
|
||||
{
|
||||
return m_xpr;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
XprType& nestedExpression() { return m_xpr; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
StorageIndex startRow() const
|
||||
@@ -321,9 +317,9 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
|
||||
|
||||
protected:
|
||||
|
||||
const typename XprType::Nested m_xpr;
|
||||
const internal::variable_if_dynamic<StorageIndex, XprType::RowsAtCompileTime == 1 ? 0 : Dynamic> m_startRow;
|
||||
const internal::variable_if_dynamic<StorageIndex, XprType::ColsAtCompileTime == 1 ? 0 : Dynamic> m_startCol;
|
||||
XprTypeNested m_xpr;
|
||||
const internal::variable_if_dynamic<StorageIndex, (XprType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow;
|
||||
const internal::variable_if_dynamic<StorageIndex, (XprType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol;
|
||||
const internal::variable_if_dynamic<StorageIndex, RowsAtCompileTime> m_blockRows;
|
||||
const internal::variable_if_dynamic<StorageIndex, ColsAtCompileTime> m_blockCols;
|
||||
};
|
||||
@@ -334,6 +330,7 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
|
||||
: public MapBase<Block<XprType, BlockRows, BlockCols, InnerPanel> >
|
||||
{
|
||||
typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType;
|
||||
typedef typename internal::ref_selector<XprType>::non_const_type XprTypeNested;
|
||||
enum {
|
||||
XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0
|
||||
};
|
||||
@@ -351,7 +348,9 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
|
||||
|| ((BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) && ( XprTypeIsRowMajor)) ? xpr.innerStride() : xpr.outerStride()),
|
||||
BlockRows==1 ? 1 : xpr.rows(),
|
||||
BlockCols==1 ? 1 : xpr.cols()),
|
||||
m_xpr(xpr)
|
||||
m_xpr(xpr),
|
||||
m_startRow( (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0),
|
||||
m_startCol( (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0)
|
||||
{
|
||||
init();
|
||||
}
|
||||
@@ -361,7 +360,7 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol)
|
||||
: Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol)),
|
||||
m_xpr(xpr)
|
||||
m_xpr(xpr), m_startRow(startRow), m_startCol(startCol)
|
||||
{
|
||||
init();
|
||||
}
|
||||
@@ -373,16 +372,19 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
|
||||
Index startRow, Index startCol,
|
||||
Index blockRows, Index blockCols)
|
||||
: Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol), blockRows, blockCols),
|
||||
m_xpr(xpr)
|
||||
m_xpr(xpr), m_startRow(startRow), m_startCol(startCol)
|
||||
{
|
||||
init();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename XprType::Nested>::type& nestedExpression() const
|
||||
const typename internal::remove_all<XprTypeNested>::type& nestedExpression() const
|
||||
{
|
||||
return m_xpr;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
XprType& nestedExpression() { return m_xpr; }
|
||||
|
||||
/** \sa MapBase::innerStride() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
@@ -400,6 +402,18 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
|
||||
return m_outerStride;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
StorageIndex startRow() const
|
||||
{
|
||||
return m_startRow.value();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
StorageIndex startCol() const
|
||||
{
|
||||
return m_startCol.value();
|
||||
}
|
||||
|
||||
#ifndef __SUNPRO_CC
|
||||
// FIXME sunstudio is not friendly with the above friend...
|
||||
// META-FIXME there is no 'friend' keyword around here. Is this obsolete?
|
||||
@@ -425,7 +439,9 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
|
||||
: m_xpr.innerStride();
|
||||
}
|
||||
|
||||
typename XprType::Nested m_xpr;
|
||||
XprTypeNested m_xpr;
|
||||
const internal::variable_if_dynamic<StorageIndex, (XprType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow;
|
||||
const internal::variable_if_dynamic<StorageIndex, (XprType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol;
|
||||
Index m_outerStride;
|
||||
};
|
||||
|
||||
|
||||
@@ -1,11 +0,0 @@
|
||||
FILE(GLOB Eigen_Core_SRCS "*.h")
|
||||
|
||||
INSTALL(FILES
|
||||
${Eigen_Core_SRCS}
|
||||
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core COMPONENT Devel
|
||||
)
|
||||
|
||||
ADD_SUBDIRECTORY(products)
|
||||
ADD_SUBDIRECTORY(util)
|
||||
ADD_SUBDIRECTORY(arch)
|
||||
ADD_SUBDIRECTORY(functors)
|
||||
@@ -22,7 +22,7 @@ namespace Eigen {
|
||||
* the return type of MatrixBase::operator<<, and most of the time this is the only
|
||||
* way it is used.
|
||||
*
|
||||
* \sa \ref MatrixBaseCommaInitRef "MatrixBase::operator<<", CommaInitializer::finished()
|
||||
* \sa \blank \ref MatrixBaseCommaInitRef "MatrixBase::operator<<", CommaInitializer::finished()
|
||||
*/
|
||||
template<typename XprType>
|
||||
struct CommaInitializer
|
||||
@@ -80,9 +80,7 @@ struct CommaInitializer
|
||||
EIGEN_DEVICE_FUNC
|
||||
CommaInitializer& operator,(const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
if(other.cols()==0 || other.rows()==0)
|
||||
return *this;
|
||||
if (m_col==m_xpr.cols())
|
||||
if (m_col==m_xpr.cols() && (other.cols()!=0 || other.rows()!=m_currentBlockRows))
|
||||
{
|
||||
m_row+=m_currentBlockRows;
|
||||
m_col = 0;
|
||||
@@ -90,15 +88,11 @@ struct CommaInitializer
|
||||
eigen_assert(m_row+m_currentBlockRows<=m_xpr.rows()
|
||||
&& "Too many rows passed to comma initializer (operator<<)");
|
||||
}
|
||||
eigen_assert(m_col<m_xpr.cols()
|
||||
eigen_assert((m_col + other.cols() <= m_xpr.cols())
|
||||
&& "Too many coefficients passed to comma initializer (operator<<)");
|
||||
eigen_assert(m_currentBlockRows==other.rows());
|
||||
if (OtherDerived::SizeAtCompileTime != Dynamic)
|
||||
m_xpr.template block<OtherDerived::RowsAtCompileTime != Dynamic ? OtherDerived::RowsAtCompileTime : 1,
|
||||
OtherDerived::ColsAtCompileTime != Dynamic ? OtherDerived::ColsAtCompileTime : 1>
|
||||
(m_row, m_col) = other;
|
||||
else
|
||||
m_xpr.block(m_row, m_col, other.rows(), other.cols()) = other;
|
||||
m_xpr.template block<OtherDerived::RowsAtCompileTime, OtherDerived::ColsAtCompileTime>
|
||||
(m_row, m_col, other.rows(), other.cols()) = other;
|
||||
m_col += other.cols();
|
||||
return *this;
|
||||
}
|
||||
@@ -109,9 +103,7 @@ struct CommaInitializer
|
||||
EIGEN_EXCEPTION_SPEC(Eigen::eigen_assert_exception)
|
||||
#endif
|
||||
{
|
||||
eigen_assert((m_row+m_currentBlockRows) == m_xpr.rows()
|
||||
&& m_col == m_xpr.cols()
|
||||
&& "Too few coefficients passed to comma initializer (operator<<)");
|
||||
finished();
|
||||
}
|
||||
|
||||
/** \returns the built matrix once all its coefficients have been set.
|
||||
@@ -122,7 +114,12 @@ struct CommaInitializer
|
||||
* \endcode
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline XprType& finished() { return m_xpr; }
|
||||
inline XprType& finished() {
|
||||
eigen_assert(((m_row+m_currentBlockRows) == m_xpr.rows() || m_xpr.cols() == 0)
|
||||
&& m_col == m_xpr.cols()
|
||||
&& "Too few coefficients passed to comma initializer (operator<<)");
|
||||
return m_xpr;
|
||||
}
|
||||
|
||||
XprType& m_xpr; // target expression
|
||||
Index m_row; // current row id
|
||||
|
||||
175
Eigen/src/Core/ConditionEstimator.h
Normal file
175
Eigen/src/Core/ConditionEstimator.h
Normal file
@@ -0,0 +1,175 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2016 Rasmus Munk Larsen (rmlarsen@google.com)
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_CONDITIONESTIMATOR_H
|
||||
#define EIGEN_CONDITIONESTIMATOR_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template <typename Vector, typename RealVector, bool IsComplex>
|
||||
struct rcond_compute_sign {
|
||||
static inline Vector run(const Vector& v) {
|
||||
const RealVector v_abs = v.cwiseAbs();
|
||||
return (v_abs.array() == static_cast<typename Vector::RealScalar>(0))
|
||||
.select(Vector::Ones(v.size()), v.cwiseQuotient(v_abs));
|
||||
}
|
||||
};
|
||||
|
||||
// Partial specialization to avoid elementwise division for real vectors.
|
||||
template <typename Vector>
|
||||
struct rcond_compute_sign<Vector, Vector, false> {
|
||||
static inline Vector run(const Vector& v) {
|
||||
return (v.array() < static_cast<typename Vector::RealScalar>(0))
|
||||
.select(-Vector::Ones(v.size()), Vector::Ones(v.size()));
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* \returns an estimate of ||inv(matrix)||_1 given a decomposition of
|
||||
* \a matrix that implements .solve() and .adjoint().solve() methods.
|
||||
*
|
||||
* This function implements Algorithms 4.1 and 5.1 from
|
||||
* http://www.maths.manchester.ac.uk/~higham/narep/narep135.pdf
|
||||
* which also forms the basis for the condition number estimators in
|
||||
* LAPACK. Since at most 10 calls to the solve method of dec are
|
||||
* performed, the total cost is O(dims^2), as opposed to O(dims^3)
|
||||
* needed to compute the inverse matrix explicitly.
|
||||
*
|
||||
* The most common usage is in estimating the condition number
|
||||
* ||matrix||_1 * ||inv(matrix)||_1. The first term ||matrix||_1 can be
|
||||
* computed directly in O(n^2) operations.
|
||||
*
|
||||
* Supports the following decompositions: FullPivLU, PartialPivLU, LDLT, and
|
||||
* LLT.
|
||||
*
|
||||
* \sa FullPivLU, PartialPivLU, LDLT, LLT.
|
||||
*/
|
||||
template <typename Decomposition>
|
||||
typename Decomposition::RealScalar rcond_invmatrix_L1_norm_estimate(const Decomposition& dec)
|
||||
{
|
||||
typedef typename Decomposition::MatrixType MatrixType;
|
||||
typedef typename Decomposition::Scalar Scalar;
|
||||
typedef typename Decomposition::RealScalar RealScalar;
|
||||
typedef typename internal::plain_col_type<MatrixType>::type Vector;
|
||||
typedef typename internal::plain_col_type<MatrixType, RealScalar>::type RealVector;
|
||||
const bool is_complex = (NumTraits<Scalar>::IsComplex != 0);
|
||||
|
||||
eigen_assert(dec.rows() == dec.cols());
|
||||
const Index n = dec.rows();
|
||||
if (n == 0)
|
||||
return 0;
|
||||
|
||||
// Disable Index to float conversion warning
|
||||
#ifdef __INTEL_COMPILER
|
||||
#pragma warning push
|
||||
#pragma warning ( disable : 2259 )
|
||||
#endif
|
||||
Vector v = dec.solve(Vector::Ones(n) / Scalar(n));
|
||||
#ifdef __INTEL_COMPILER
|
||||
#pragma warning pop
|
||||
#endif
|
||||
|
||||
// lower_bound is a lower bound on
|
||||
// ||inv(matrix)||_1 = sup_v ||inv(matrix) v||_1 / ||v||_1
|
||||
// and is the objective maximized by the ("super-") gradient ascent
|
||||
// algorithm below.
|
||||
RealScalar lower_bound = v.template lpNorm<1>();
|
||||
if (n == 1)
|
||||
return lower_bound;
|
||||
|
||||
// Gradient ascent algorithm follows: We know that the optimum is achieved at
|
||||
// one of the simplices v = e_i, so in each iteration we follow a
|
||||
// super-gradient to move towards the optimal one.
|
||||
RealScalar old_lower_bound = lower_bound;
|
||||
Vector sign_vector(n);
|
||||
Vector old_sign_vector;
|
||||
Index v_max_abs_index = -1;
|
||||
Index old_v_max_abs_index = v_max_abs_index;
|
||||
for (int k = 0; k < 4; ++k)
|
||||
{
|
||||
sign_vector = internal::rcond_compute_sign<Vector, RealVector, is_complex>::run(v);
|
||||
if (k > 0 && !is_complex && sign_vector == old_sign_vector) {
|
||||
// Break if the solution stagnated.
|
||||
break;
|
||||
}
|
||||
// v_max_abs_index = argmax |real( inv(matrix)^T * sign_vector )|
|
||||
v = dec.adjoint().solve(sign_vector);
|
||||
v.real().cwiseAbs().maxCoeff(&v_max_abs_index);
|
||||
if (v_max_abs_index == old_v_max_abs_index) {
|
||||
// Break if the solution stagnated.
|
||||
break;
|
||||
}
|
||||
// Move to the new simplex e_j, where j = v_max_abs_index.
|
||||
v = dec.solve(Vector::Unit(n, v_max_abs_index)); // v = inv(matrix) * e_j.
|
||||
lower_bound = v.template lpNorm<1>();
|
||||
if (lower_bound <= old_lower_bound) {
|
||||
// Break if the gradient step did not increase the lower_bound.
|
||||
break;
|
||||
}
|
||||
if (!is_complex) {
|
||||
old_sign_vector = sign_vector;
|
||||
}
|
||||
old_v_max_abs_index = v_max_abs_index;
|
||||
old_lower_bound = lower_bound;
|
||||
}
|
||||
// The following calculates an independent estimate of ||matrix||_1 by
|
||||
// multiplying matrix by a vector with entries of slowly increasing
|
||||
// magnitude and alternating sign:
|
||||
// v_i = (-1)^{i} (1 + (i / (dim-1))), i = 0,...,dim-1.
|
||||
// This improvement to Hager's algorithm above is due to Higham. It was
|
||||
// added to make the algorithm more robust in certain corner cases where
|
||||
// large elements in the matrix might otherwise escape detection due to
|
||||
// exact cancellation (especially when op and op_adjoint correspond to a
|
||||
// sequence of backsubstitutions and permutations), which could cause
|
||||
// Hager's algorithm to vastly underestimate ||matrix||_1.
|
||||
Scalar alternating_sign(RealScalar(1));
|
||||
for (Index i = 0; i < n; ++i) {
|
||||
// The static_cast is needed when Scalar is a complex and RealScalar implements expression templates
|
||||
v[i] = alternating_sign * static_cast<RealScalar>(RealScalar(1) + (RealScalar(i) / (RealScalar(n - 1))));
|
||||
alternating_sign = -alternating_sign;
|
||||
}
|
||||
v = dec.solve(v);
|
||||
const RealScalar alternate_lower_bound = (2 * v.template lpNorm<1>()) / (3 * RealScalar(n));
|
||||
return numext::maxi(lower_bound, alternate_lower_bound);
|
||||
}
|
||||
|
||||
/** \brief Reciprocal condition number estimator.
|
||||
*
|
||||
* Computing a decomposition of a dense matrix takes O(n^3) operations, while
|
||||
* this method estimates the condition number quickly and reliably in O(n^2)
|
||||
* operations.
|
||||
*
|
||||
* \returns an estimate of the reciprocal condition number
|
||||
* (1 / (||matrix||_1 * ||inv(matrix)||_1)) of matrix, given ||matrix||_1 and
|
||||
* its decomposition. Supports the following decompositions: FullPivLU,
|
||||
* PartialPivLU, LDLT, and LLT.
|
||||
*
|
||||
* \sa FullPivLU, PartialPivLU, LDLT, LLT.
|
||||
*/
|
||||
template <typename Decomposition>
|
||||
typename Decomposition::RealScalar
|
||||
rcond_estimate_helper(typename Decomposition::RealScalar matrix_norm, const Decomposition& dec)
|
||||
{
|
||||
typedef typename Decomposition::RealScalar RealScalar;
|
||||
eigen_assert(dec.rows() == dec.cols());
|
||||
if (dec.rows() == 0) return RealScalar(1);
|
||||
if (matrix_norm == RealScalar(0)) return RealScalar(0);
|
||||
if (dec.rows() == 1) return RealScalar(1);
|
||||
const RealScalar inverse_matrix_norm = rcond_invmatrix_L1_norm_estimate(dec);
|
||||
return (inverse_matrix_norm == RealScalar(0) ? RealScalar(0)
|
||||
: (RealScalar(1) / inverse_matrix_norm) / matrix_norm);
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
|
||||
} // namespace Eigen
|
||||
|
||||
#endif
|
||||
@@ -41,10 +41,19 @@ template<> struct storage_kind_to_shape<TranspositionsStorage> { typedef Transp
|
||||
// We currently distinguish the following kind of evaluators:
|
||||
// - unary_evaluator for expressions taking only one arguments (CwiseUnaryOp, CwiseUnaryView, Transpose, MatrixWrapper, ArrayWrapper, Reverse, Replicate)
|
||||
// - binary_evaluator for expression taking two arguments (CwiseBinaryOp)
|
||||
// - ternary_evaluator for expression taking three arguments (CwiseTernaryOp)
|
||||
// - product_evaluator for linear algebra products (Product); special case of binary_evaluator because it requires additional tags for dispatching.
|
||||
// - mapbase_evaluator for Map, Block, Ref
|
||||
// - block_evaluator for Block (special dispatching to a mapbase_evaluator or unary_evaluator)
|
||||
|
||||
template< typename T,
|
||||
typename Arg1Kind = typename evaluator_traits<typename T::Arg1>::Kind,
|
||||
typename Arg2Kind = typename evaluator_traits<typename T::Arg2>::Kind,
|
||||
typename Arg3Kind = typename evaluator_traits<typename T::Arg3>::Kind,
|
||||
typename Arg1Scalar = typename traits<typename T::Arg1>::Scalar,
|
||||
typename Arg2Scalar = typename traits<typename T::Arg2>::Scalar,
|
||||
typename Arg3Scalar = typename traits<typename T::Arg3>::Scalar> struct ternary_evaluator;
|
||||
|
||||
template< typename T,
|
||||
typename LhsKind = typename evaluator_traits<typename T::Lhs>::Kind,
|
||||
typename RhsKind = typename evaluator_traits<typename T::Rhs>::Kind,
|
||||
@@ -63,10 +72,6 @@ struct evaluator_traits_base
|
||||
// by default, get evaluator kind and shape from storage
|
||||
typedef typename storage_kind_to_evaluator_kind<typename traits<T>::StorageKind>::Kind Kind;
|
||||
typedef typename storage_kind_to_shape<typename traits<T>::StorageKind>::Shape Shape;
|
||||
|
||||
// 1 if assignment A = B assumes aliasing when B is of type T and thus B needs to be evaluated into a
|
||||
// temporary; 0 if not.
|
||||
static const int AssumeAliasing = 0;
|
||||
};
|
||||
|
||||
// Default evaluator traits
|
||||
@@ -75,6 +80,10 @@ struct evaluator_traits : public evaluator_traits_base<T>
|
||||
{
|
||||
};
|
||||
|
||||
template<typename T, typename Shape = typename evaluator_traits<T>::Shape >
|
||||
struct evaluator_assume_aliasing {
|
||||
static const bool value = false;
|
||||
};
|
||||
|
||||
// By default, we assume a unary expression:
|
||||
template<typename T>
|
||||
@@ -148,7 +157,8 @@ struct evaluator<PlainObjectBase<Derived> >
|
||||
EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
CoeffReturnType coeff(Index row, Index col) const
|
||||
{
|
||||
if (IsRowMajor)
|
||||
return m_data[row * m_outerStride.value() + col];
|
||||
@@ -156,12 +166,14 @@ struct evaluator<PlainObjectBase<Derived> >
|
||||
return m_data[row + col * m_outerStride.value()];
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
return m_data[index];
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Scalar& coeffRef(Index row, Index col)
|
||||
{
|
||||
if (IsRowMajor)
|
||||
return const_cast<Scalar*>(m_data)[row * m_outerStride.value() + col];
|
||||
@@ -169,12 +181,14 @@ struct evaluator<PlainObjectBase<Derived> >
|
||||
return const_cast<Scalar*>(m_data)[row + col * m_outerStride.value()];
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Scalar& coeffRef(Index index)
|
||||
{
|
||||
return const_cast<Scalar*>(m_data)[index];
|
||||
}
|
||||
|
||||
template<int LoadMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE
|
||||
PacketType packet(Index row, Index col) const
|
||||
{
|
||||
if (IsRowMajor)
|
||||
@@ -184,12 +198,14 @@ struct evaluator<PlainObjectBase<Derived> >
|
||||
}
|
||||
|
||||
template<int LoadMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE
|
||||
PacketType packet(Index index) const
|
||||
{
|
||||
return ploadt<PacketType, LoadMode>(m_data + index);
|
||||
}
|
||||
|
||||
template<int StoreMode,typename PacketType>
|
||||
EIGEN_STRONG_INLINE
|
||||
void writePacket(Index row, Index col, const PacketType& x)
|
||||
{
|
||||
if (IsRowMajor)
|
||||
@@ -201,6 +217,7 @@ struct evaluator<PlainObjectBase<Derived> >
|
||||
}
|
||||
|
||||
template<int StoreMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE
|
||||
void writePacket(Index index, const PacketType& x)
|
||||
{
|
||||
return pstoret<Scalar, PacketType, StoreMode>(const_cast<Scalar*>(m_data) + index, x);
|
||||
@@ -260,45 +277,53 @@ struct unary_evaluator<Transpose<ArgType>, IndexBased>
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
CoeffReturnType coeff(Index row, Index col) const
|
||||
{
|
||||
return m_argImpl.coeff(col, row);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
return m_argImpl.coeff(index);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Scalar& coeffRef(Index row, Index col)
|
||||
{
|
||||
return m_argImpl.coeffRef(col, row);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC typename XprType::Scalar& coeffRef(Index index)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
typename XprType::Scalar& coeffRef(Index index)
|
||||
{
|
||||
return m_argImpl.coeffRef(index);
|
||||
}
|
||||
|
||||
template<int LoadMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE
|
||||
PacketType packet(Index row, Index col) const
|
||||
{
|
||||
return m_argImpl.template packet<LoadMode,PacketType>(col, row);
|
||||
}
|
||||
|
||||
template<int LoadMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE
|
||||
PacketType packet(Index index) const
|
||||
{
|
||||
return m_argImpl.template packet<LoadMode,PacketType>(index);
|
||||
}
|
||||
|
||||
template<int StoreMode, typename PacketType>
|
||||
template<int StoreMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE
|
||||
void writePacket(Index row, Index col, const PacketType& x)
|
||||
{
|
||||
m_argImpl.template writePacket<StoreMode,PacketType>(col, row, x);
|
||||
}
|
||||
|
||||
template<int StoreMode, typename PacketType>
|
||||
template<int StoreMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE
|
||||
void writePacket(Index index, const PacketType& x)
|
||||
{
|
||||
m_argImpl.template writePacket<StoreMode,PacketType>(index, x);
|
||||
@@ -312,6 +337,120 @@ protected:
|
||||
// Like Matrix and Array, this is not really a unary expression, so we directly specialize evaluator.
|
||||
// Likewise, there is not need to more sophisticated dispatching here.
|
||||
|
||||
template<typename Scalar,typename NullaryOp,
|
||||
bool has_nullary = has_nullary_operator<NullaryOp>::value,
|
||||
bool has_unary = has_unary_operator<NullaryOp>::value,
|
||||
bool has_binary = has_binary_operator<NullaryOp>::value>
|
||||
struct nullary_wrapper
|
||||
{
|
||||
template <typename IndexType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const { return op(i,j); }
|
||||
template <typename IndexType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const { return op(i); }
|
||||
|
||||
template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j) const { return op.template packetOp<T>(i,j); }
|
||||
template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i) const { return op.template packetOp<T>(i); }
|
||||
};
|
||||
|
||||
template<typename Scalar,typename NullaryOp>
|
||||
struct nullary_wrapper<Scalar,NullaryOp,true,false,false>
|
||||
{
|
||||
template <typename IndexType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType=0, IndexType=0) const { return op(); }
|
||||
template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType=0, IndexType=0) const { return op.template packetOp<T>(); }
|
||||
};
|
||||
|
||||
template<typename Scalar,typename NullaryOp>
|
||||
struct nullary_wrapper<Scalar,NullaryOp,false,false,true>
|
||||
{
|
||||
template <typename IndexType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j=0) const { return op(i,j); }
|
||||
template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j=0) const { return op.template packetOp<T>(i,j); }
|
||||
};
|
||||
|
||||
// We need the following specialization for vector-only functors assigned to a runtime vector,
|
||||
// for instance, using linspace and assigning a RowVectorXd to a MatrixXd or even a row of a MatrixXd.
|
||||
// In this case, i==0 and j is used for the actual iteration.
|
||||
template<typename Scalar,typename NullaryOp>
|
||||
struct nullary_wrapper<Scalar,NullaryOp,false,true,false>
|
||||
{
|
||||
template <typename IndexType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const {
|
||||
eigen_assert(i==0 || j==0);
|
||||
return op(i+j);
|
||||
}
|
||||
template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j) const {
|
||||
eigen_assert(i==0 || j==0);
|
||||
return op.template packetOp<T>(i+j);
|
||||
}
|
||||
|
||||
template <typename IndexType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const { return op(i); }
|
||||
template <typename T, typename IndexType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i) const { return op.template packetOp<T>(i); }
|
||||
};
|
||||
|
||||
template<typename Scalar,typename NullaryOp>
|
||||
struct nullary_wrapper<Scalar,NullaryOp,false,false,false> {};
|
||||
|
||||
#if 0 && EIGEN_COMP_MSVC>0
|
||||
// Disable this ugly workaround. This is now handled in traits<Ref>::match,
|
||||
// but this piece of code might still become handly if some other weird compilation
|
||||
// erros pop up again.
|
||||
|
||||
// MSVC exhibits a weird compilation error when
|
||||
// compiling:
|
||||
// Eigen::MatrixXf A = MatrixXf::Random(3,3);
|
||||
// Ref<const MatrixXf> R = 2.f*A;
|
||||
// and that has_*ary_operator<scalar_constant_op<float>> have not been instantiated yet.
|
||||
// The "problem" is that evaluator<2.f*A> is instantiated by traits<Ref>::match<2.f*A>
|
||||
// and at that time has_*ary_operator<T> returns true regardless of T.
|
||||
// Then nullary_wrapper is badly instantiated as nullary_wrapper<.,.,true,true,true>.
|
||||
// The trick is thus to defer the proper instantiation of nullary_wrapper when coeff(),
|
||||
// and packet() are really instantiated as implemented below:
|
||||
|
||||
// This is a simple wrapper around Index to enforce the re-instantiation of
|
||||
// has_*ary_operator when needed.
|
||||
template<typename T> struct nullary_wrapper_workaround_msvc {
|
||||
nullary_wrapper_workaround_msvc(const T&);
|
||||
operator T()const;
|
||||
};
|
||||
|
||||
template<typename Scalar,typename NullaryOp>
|
||||
struct nullary_wrapper<Scalar,NullaryOp,true,true,true>
|
||||
{
|
||||
template <typename IndexType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const {
|
||||
return nullary_wrapper<Scalar,NullaryOp,
|
||||
has_nullary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
|
||||
has_unary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
|
||||
has_binary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value>().operator()(op,i,j);
|
||||
}
|
||||
template <typename IndexType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const {
|
||||
return nullary_wrapper<Scalar,NullaryOp,
|
||||
has_nullary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
|
||||
has_unary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
|
||||
has_binary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value>().operator()(op,i);
|
||||
}
|
||||
|
||||
template <typename T, typename IndexType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j) const {
|
||||
return nullary_wrapper<Scalar,NullaryOp,
|
||||
has_nullary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
|
||||
has_unary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
|
||||
has_binary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value>().template packetOp<T>(op,i,j);
|
||||
}
|
||||
template <typename T, typename IndexType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i) const {
|
||||
return nullary_wrapper<Scalar,NullaryOp,
|
||||
has_nullary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
|
||||
has_unary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
|
||||
has_binary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value>().template packetOp<T>(op,i);
|
||||
}
|
||||
};
|
||||
#endif // MSVC workaround
|
||||
|
||||
template<typename NullaryOp, typename PlainObjectType>
|
||||
struct evaluator<CwiseNullaryOp<NullaryOp,PlainObjectType> >
|
||||
: evaluator_base<CwiseNullaryOp<NullaryOp,PlainObjectType> >
|
||||
@@ -331,37 +470,44 @@ struct evaluator<CwiseNullaryOp<NullaryOp,PlainObjectType> >
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& n)
|
||||
: m_functor(n.functor())
|
||||
: m_functor(n.functor()), m_wrapper()
|
||||
{
|
||||
EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
|
||||
}
|
||||
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
|
||||
template <typename IndexType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
CoeffReturnType coeff(IndexType row, IndexType col) const
|
||||
{
|
||||
return m_functor(row, col);
|
||||
return m_wrapper(m_functor, row, col);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
|
||||
template <typename IndexType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
CoeffReturnType coeff(IndexType index) const
|
||||
{
|
||||
return m_functor(index);
|
||||
return m_wrapper(m_functor,index);
|
||||
}
|
||||
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packet(Index row, Index col) const
|
||||
template<int LoadMode, typename PacketType, typename IndexType>
|
||||
EIGEN_STRONG_INLINE
|
||||
PacketType packet(IndexType row, IndexType col) const
|
||||
{
|
||||
return m_functor.template packetOp<Index,PacketType>(row, col);
|
||||
return m_wrapper.template packetOp<PacketType>(m_functor, row, col);
|
||||
}
|
||||
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packet(Index index) const
|
||||
template<int LoadMode, typename PacketType, typename IndexType>
|
||||
EIGEN_STRONG_INLINE
|
||||
PacketType packet(IndexType index) const
|
||||
{
|
||||
return m_functor.template packetOp<Index,PacketType>(index);
|
||||
return m_wrapper.template packetOp<PacketType>(m_functor, index);
|
||||
}
|
||||
|
||||
protected:
|
||||
const NullaryOp m_functor;
|
||||
const internal::nullary_wrapper<CoeffReturnType,NullaryOp> m_wrapper;
|
||||
};
|
||||
|
||||
// -------------------- CwiseUnaryOp --------------------
|
||||
@@ -380,7 +526,8 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased >
|
||||
Alignment = evaluator<ArgType>::Alignment
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
explicit unary_evaluator(const XprType& op)
|
||||
: m_functor(op.functor()),
|
||||
m_argImpl(op.nestedExpression())
|
||||
{
|
||||
@@ -390,23 +537,27 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased >
|
||||
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
CoeffReturnType coeff(Index row, Index col) const
|
||||
{
|
||||
return m_functor(m_argImpl.coeff(row, col));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
return m_functor(m_argImpl.coeff(index));
|
||||
}
|
||||
|
||||
template<int LoadMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE
|
||||
PacketType packet(Index row, Index col) const
|
||||
{
|
||||
return m_functor.packetOp(m_argImpl.template packet<LoadMode, PacketType>(row, col));
|
||||
}
|
||||
|
||||
template<int LoadMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE
|
||||
PacketType packet(Index index) const
|
||||
{
|
||||
return m_functor.packetOp(m_argImpl.template packet<LoadMode, PacketType>(index));
|
||||
@@ -417,6 +568,96 @@ protected:
|
||||
evaluator<ArgType> m_argImpl;
|
||||
};
|
||||
|
||||
// -------------------- CwiseTernaryOp --------------------
|
||||
|
||||
// this is a ternary expression
|
||||
template<typename TernaryOp, typename Arg1, typename Arg2, typename Arg3>
|
||||
struct evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> >
|
||||
: public ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> >
|
||||
{
|
||||
typedef CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> XprType;
|
||||
typedef ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> > Base;
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {}
|
||||
};
|
||||
|
||||
template<typename TernaryOp, typename Arg1, typename Arg2, typename Arg3>
|
||||
struct ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3>, IndexBased, IndexBased>
|
||||
: evaluator_base<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> >
|
||||
{
|
||||
typedef CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> XprType;
|
||||
|
||||
enum {
|
||||
CoeffReadCost = evaluator<Arg1>::CoeffReadCost + evaluator<Arg2>::CoeffReadCost + evaluator<Arg3>::CoeffReadCost + functor_traits<TernaryOp>::Cost,
|
||||
|
||||
Arg1Flags = evaluator<Arg1>::Flags,
|
||||
Arg2Flags = evaluator<Arg2>::Flags,
|
||||
Arg3Flags = evaluator<Arg3>::Flags,
|
||||
SameType = is_same<typename Arg1::Scalar,typename Arg2::Scalar>::value && is_same<typename Arg1::Scalar,typename Arg3::Scalar>::value,
|
||||
StorageOrdersAgree = (int(Arg1Flags)&RowMajorBit)==(int(Arg2Flags)&RowMajorBit) && (int(Arg1Flags)&RowMajorBit)==(int(Arg3Flags)&RowMajorBit),
|
||||
Flags0 = (int(Arg1Flags) | int(Arg2Flags) | int(Arg3Flags)) & (
|
||||
HereditaryBits
|
||||
| (int(Arg1Flags) & int(Arg2Flags) & int(Arg3Flags) &
|
||||
( (StorageOrdersAgree ? LinearAccessBit : 0)
|
||||
| (functor_traits<TernaryOp>::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0)
|
||||
)
|
||||
)
|
||||
),
|
||||
Flags = (Flags0 & ~RowMajorBit) | (Arg1Flags & RowMajorBit),
|
||||
Alignment = EIGEN_PLAIN_ENUM_MIN(
|
||||
EIGEN_PLAIN_ENUM_MIN(evaluator<Arg1>::Alignment, evaluator<Arg2>::Alignment),
|
||||
evaluator<Arg3>::Alignment)
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit ternary_evaluator(const XprType& xpr)
|
||||
: m_functor(xpr.functor()),
|
||||
m_arg1Impl(xpr.arg1()),
|
||||
m_arg2Impl(xpr.arg2()),
|
||||
m_arg3Impl(xpr.arg3())
|
||||
{
|
||||
EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<TernaryOp>::Cost);
|
||||
EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
|
||||
}
|
||||
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
CoeffReturnType coeff(Index row, Index col) const
|
||||
{
|
||||
return m_functor(m_arg1Impl.coeff(row, col), m_arg2Impl.coeff(row, col), m_arg3Impl.coeff(row, col));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
return m_functor(m_arg1Impl.coeff(index), m_arg2Impl.coeff(index), m_arg3Impl.coeff(index));
|
||||
}
|
||||
|
||||
template<int LoadMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE
|
||||
PacketType packet(Index row, Index col) const
|
||||
{
|
||||
return m_functor.packetOp(m_arg1Impl.template packet<LoadMode,PacketType>(row, col),
|
||||
m_arg2Impl.template packet<LoadMode,PacketType>(row, col),
|
||||
m_arg3Impl.template packet<LoadMode,PacketType>(row, col));
|
||||
}
|
||||
|
||||
template<int LoadMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE
|
||||
PacketType packet(Index index) const
|
||||
{
|
||||
return m_functor.packetOp(m_arg1Impl.template packet<LoadMode,PacketType>(index),
|
||||
m_arg2Impl.template packet<LoadMode,PacketType>(index),
|
||||
m_arg3Impl.template packet<LoadMode,PacketType>(index));
|
||||
}
|
||||
|
||||
protected:
|
||||
const TernaryOp m_functor;
|
||||
evaluator<Arg1> m_arg1Impl;
|
||||
evaluator<Arg2> m_arg2Impl;
|
||||
evaluator<Arg3> m_arg3Impl;
|
||||
};
|
||||
|
||||
// -------------------- CwiseBinaryOp --------------------
|
||||
|
||||
// this is a binary expression
|
||||
@@ -466,17 +707,20 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase
|
||||
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
CoeffReturnType coeff(Index row, Index col) const
|
||||
{
|
||||
return m_functor(m_lhsImpl.coeff(row, col), m_rhsImpl.coeff(row, col));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
return m_functor(m_lhsImpl.coeff(index), m_rhsImpl.coeff(index));
|
||||
}
|
||||
|
||||
template<int LoadMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE
|
||||
PacketType packet(Index row, Index col) const
|
||||
{
|
||||
return m_functor.packetOp(m_lhsImpl.template packet<LoadMode,PacketType>(row, col),
|
||||
@@ -484,6 +728,7 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase
|
||||
}
|
||||
|
||||
template<int LoadMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE
|
||||
PacketType packet(Index index) const
|
||||
{
|
||||
return m_functor.packetOp(m_lhsImpl.template packet<LoadMode,PacketType>(index),
|
||||
@@ -523,22 +768,26 @@ struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType>, IndexBased>
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
CoeffReturnType coeff(Index row, Index col) const
|
||||
{
|
||||
return m_unaryOp(m_argImpl.coeff(row, col));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
return m_unaryOp(m_argImpl.coeff(index));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Scalar& coeffRef(Index row, Index col)
|
||||
{
|
||||
return m_unaryOp(m_argImpl.coeffRef(row, col));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Scalar& coeffRef(Index index)
|
||||
{
|
||||
return m_unaryOp(m_argImpl.coeffRef(index));
|
||||
}
|
||||
@@ -568,65 +817,79 @@ struct mapbase_evaluator : evaluator_base<Derived>
|
||||
ColsAtCompileTime = XprType::ColsAtCompileTime,
|
||||
CoeffReadCost = NumTraits<Scalar>::ReadCost
|
||||
};
|
||||
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit mapbase_evaluator(const XprType& map)
|
||||
: m_data(const_cast<PointerType>(map.data())),
|
||||
m_xpr(map)
|
||||
: m_data(const_cast<PointerType>(map.data())),
|
||||
m_innerStride(map.innerStride()),
|
||||
m_outerStride(map.outerStride())
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(evaluator<Derived>::Flags&PacketAccessBit, internal::inner_stride_at_compile_time<Derived>::ret==1),
|
||||
PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1);
|
||||
EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
CoeffReturnType coeff(Index row, Index col) const
|
||||
{
|
||||
return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()];
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
return m_data[index * m_xpr.innerStride()];
|
||||
return m_data[col * colStride() + row * rowStride()];
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()];
|
||||
return m_data[index * m_innerStride.value()];
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index)
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Scalar& coeffRef(Index row, Index col)
|
||||
{
|
||||
return m_data[index * m_xpr.innerStride()];
|
||||
return m_data[col * colStride() + row * rowStride()];
|
||||
}
|
||||
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packet(Index row, Index col) const
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Scalar& coeffRef(Index index)
|
||||
{
|
||||
PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride();
|
||||
return m_data[index * m_innerStride.value()];
|
||||
}
|
||||
|
||||
template<int LoadMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE
|
||||
PacketType packet(Index row, Index col) const
|
||||
{
|
||||
PointerType ptr = m_data + row * rowStride() + col * colStride();
|
||||
return internal::ploadt<PacketType, LoadMode>(ptr);
|
||||
}
|
||||
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packet(Index index) const
|
||||
template<int LoadMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE
|
||||
PacketType packet(Index index) const
|
||||
{
|
||||
return internal::ploadt<PacketType, LoadMode>(m_data + index * m_xpr.innerStride());
|
||||
return internal::ploadt<PacketType, LoadMode>(m_data + index * m_innerStride.value());
|
||||
}
|
||||
|
||||
template<int StoreMode, typename PacketType>
|
||||
void writePacket(Index row, Index col, const PacketType& x)
|
||||
|
||||
template<int StoreMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE
|
||||
void writePacket(Index row, Index col, const PacketType& x)
|
||||
{
|
||||
PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride();
|
||||
PointerType ptr = m_data + row * rowStride() + col * colStride();
|
||||
return internal::pstoret<Scalar, PacketType, StoreMode>(ptr, x);
|
||||
}
|
||||
|
||||
template<int StoreMode, typename PacketType>
|
||||
void writePacket(Index index, const PacketType& x)
|
||||
|
||||
template<int StoreMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE
|
||||
void writePacket(Index index, const PacketType& x)
|
||||
{
|
||||
internal::pstoret<Scalar, PacketType, StoreMode>(m_data + index * m_xpr.innerStride(), x);
|
||||
internal::pstoret<Scalar, PacketType, StoreMode>(m_data + index * m_innerStride.value(), x);
|
||||
}
|
||||
|
||||
protected:
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index rowStride() const { return XprType::IsRowMajor ? m_outerStride.value() : m_innerStride.value(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index colStride() const { return XprType::IsRowMajor ? m_innerStride.value() : m_outerStride.value(); }
|
||||
|
||||
PointerType m_data;
|
||||
const XprType& m_xpr;
|
||||
const internal::variable_if_dynamic<Index, XprType::InnerStrideAtCompileTime> m_innerStride;
|
||||
const internal::variable_if_dynamic<Index, XprType::OuterStrideAtCompileTime> m_outerStride;
|
||||
};
|
||||
|
||||
template<typename PlainObjectType, int MapOptions, typename StrideType>
|
||||
@@ -714,9 +977,7 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
|
||||
OuterStrideAtCompileTime = HasSameStorageOrderAsArgType
|
||||
? int(outer_stride_at_compile_time<ArgType>::ret)
|
||||
: int(inner_stride_at_compile_time<ArgType>::ret),
|
||||
MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % packet_traits<Scalar>::size) == 0)
|
||||
&& (InnerStrideAtCompileTime == 1)
|
||||
? PacketAccessBit : 0,
|
||||
MaskPacketAccessBit = (InnerStrideAtCompileTime == 1) ? PacketAccessBit : 0,
|
||||
|
||||
FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator<ArgType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0,
|
||||
FlagsRowMajorBit = XprType::Flags&RowMajorBit,
|
||||
@@ -767,48 +1028,56 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
|
||||
RowsAtCompileTime = XprType::RowsAtCompileTime
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
CoeffReturnType coeff(Index row, Index col) const
|
||||
{
|
||||
return m_argImpl.coeff(m_startRow.value() + row, m_startCol.value() + col);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Scalar& coeffRef(Index row, Index col)
|
||||
{
|
||||
return m_argImpl.coeffRef(m_startRow.value() + row, m_startCol.value() + col);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Scalar& coeffRef(Index index)
|
||||
{
|
||||
return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
|
||||
}
|
||||
|
||||
template<int LoadMode, typename PacketType>
|
||||
template<int LoadMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE
|
||||
PacketType packet(Index row, Index col) const
|
||||
{
|
||||
return m_argImpl.template packet<LoadMode,PacketType>(m_startRow.value() + row, m_startCol.value() + col);
|
||||
}
|
||||
|
||||
template<int LoadMode, typename PacketType>
|
||||
template<int LoadMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE
|
||||
PacketType packet(Index index) const
|
||||
{
|
||||
return packet<LoadMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
|
||||
RowsAtCompileTime == 1 ? index : 0);
|
||||
}
|
||||
|
||||
template<int StoreMode, typename PacketType>
|
||||
template<int StoreMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE
|
||||
void writePacket(Index row, Index col, const PacketType& x)
|
||||
{
|
||||
{
|
||||
return m_argImpl.template writePacket<StoreMode,PacketType>(m_startRow.value() + row, m_startCol.value() + col, x);
|
||||
}
|
||||
|
||||
template<int StoreMode, typename PacketType>
|
||||
template<int StoreMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE
|
||||
void writePacket(Index index, const PacketType& x)
|
||||
{
|
||||
{
|
||||
return writePacket<StoreMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
|
||||
RowsAtCompileTime == 1 ? index : 0,
|
||||
x);
|
||||
@@ -816,8 +1085,8 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
|
||||
|
||||
protected:
|
||||
evaluator<ArgType> m_argImpl;
|
||||
const variable_if_dynamic<Index, ArgType::RowsAtCompileTime == 1 ? 0 : Dynamic> m_startRow;
|
||||
const variable_if_dynamic<Index, ArgType::ColsAtCompileTime == 1 ? 0 : Dynamic> m_startCol;
|
||||
const variable_if_dynamic<Index, (ArgType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow;
|
||||
const variable_if_dynamic<Index, (ArgType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol;
|
||||
};
|
||||
|
||||
// TODO: This evaluator does not actually use the child evaluator;
|
||||
@@ -835,7 +1104,7 @@ struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /* HasDirectAc
|
||||
: mapbase_evaluator<XprType, typename XprType::PlainObject>(block)
|
||||
{
|
||||
// TODO: for the 3.3 release, this should be turned to an internal assertion, but let's keep it as is for the beta lifetime
|
||||
eigen_assert(((size_t(block.data()) % EIGEN_PLAIN_ENUM_MAX(1,evaluator<XprType>::Alignment)) == 0) && "data is not aligned");
|
||||
eigen_assert(((internal::UIntPtr(block.data()) % EIGEN_PLAIN_ENUM_MAX(1,evaluator<XprType>::Alignment)) == 0) && "data is not aligned");
|
||||
}
|
||||
};
|
||||
|
||||
@@ -859,7 +1128,7 @@ struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
|
||||
Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ThenMatrixType>::Alignment, evaluator<ElseMatrixType>::Alignment)
|
||||
};
|
||||
|
||||
inline EIGEN_DEVICE_FUNC explicit evaluator(const XprType& select)
|
||||
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& select)
|
||||
: m_conditionImpl(select.conditionMatrix()),
|
||||
m_thenImpl(select.thenMatrix()),
|
||||
m_elseImpl(select.elseMatrix())
|
||||
@@ -869,7 +1138,8 @@ struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
|
||||
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
|
||||
inline EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
CoeffReturnType coeff(Index row, Index col) const
|
||||
{
|
||||
if (m_conditionImpl.coeff(row, col))
|
||||
return m_thenImpl.coeff(row, col);
|
||||
@@ -877,7 +1147,8 @@ struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
|
||||
return m_elseImpl.coeff(row, col);
|
||||
}
|
||||
|
||||
inline EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
if (m_conditionImpl.coeff(index))
|
||||
return m_thenImpl.coeff(index);
|
||||
@@ -921,7 +1192,8 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
|
||||
m_cols(replicate.nestedExpression().cols())
|
||||
{}
|
||||
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
CoeffReturnType coeff(Index row, Index col) const
|
||||
{
|
||||
// try to avoid using modulo; this is a pure optimization strategy
|
||||
const Index actual_row = internal::traits<XprType>::RowsAtCompileTime==1 ? 0
|
||||
@@ -934,7 +1206,8 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
|
||||
return m_argImpl.coeff(actual_row, actual_col);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
// try to avoid using modulo; this is a pure optimization strategy
|
||||
const Index actual_index = internal::traits<XprType>::RowsAtCompileTime==1
|
||||
@@ -945,6 +1218,7 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
|
||||
}
|
||||
|
||||
template<int LoadMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE
|
||||
PacketType packet(Index row, Index col) const
|
||||
{
|
||||
const Index actual_row = internal::traits<XprType>::RowsAtCompileTime==1 ? 0
|
||||
@@ -958,6 +1232,7 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
|
||||
}
|
||||
|
||||
template<int LoadMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE
|
||||
PacketType packet(Index index) const
|
||||
{
|
||||
const Index actual_index = internal::traits<XprType>::RowsAtCompileTime==1
|
||||
@@ -994,7 +1269,7 @@ struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
|
||||
CoeffReadCost = TraversalSize==Dynamic ? HugeCost
|
||||
: TraversalSize * evaluator<ArgType>::CoeffReadCost + int(CostOpType::value),
|
||||
|
||||
Flags = (traits<XprType>::Flags&RowMajorBit) | (evaluator<ArgType>::Flags&(HereditaryBits&(~RowMajorBit))),
|
||||
Flags = (traits<XprType>::Flags&RowMajorBit) | (evaluator<ArgType>::Flags&(HereditaryBits&(~RowMajorBit))) | LinearAccessBit,
|
||||
|
||||
Alignment = 0 // FIXME this will need to be improved once PartialReduxExpr is vectorized
|
||||
};
|
||||
@@ -1008,7 +1283,8 @@ struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
|
||||
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index i, Index j) const
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const Scalar coeff(Index i, Index j) const
|
||||
{
|
||||
if (Direction==Vertical)
|
||||
return m_functor(m_arg.col(j));
|
||||
@@ -1016,7 +1292,8 @@ struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
|
||||
return m_functor(m_arg.row(i));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const Scalar coeff(Index index) const
|
||||
{
|
||||
if (Direction==Vertical)
|
||||
return m_functor(m_arg.col(index));
|
||||
@@ -1025,7 +1302,7 @@ struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
|
||||
}
|
||||
|
||||
protected:
|
||||
const ArgTypeNested m_arg;
|
||||
typename internal::add_const_on_value_type<ArgTypeNested>::type m_arg;
|
||||
const MemberOp m_functor;
|
||||
};
|
||||
|
||||
@@ -1051,45 +1328,53 @@ struct evaluator_wrapper_base
|
||||
typedef typename ArgType::Scalar Scalar;
|
||||
typedef typename ArgType::CoeffReturnType CoeffReturnType;
|
||||
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
CoeffReturnType coeff(Index row, Index col) const
|
||||
{
|
||||
return m_argImpl.coeff(row, col);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
return m_argImpl.coeff(index);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Scalar& coeffRef(Index row, Index col)
|
||||
{
|
||||
return m_argImpl.coeffRef(row, col);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Scalar& coeffRef(Index index)
|
||||
{
|
||||
return m_argImpl.coeffRef(index);
|
||||
}
|
||||
|
||||
template<int LoadMode, typename PacketType>
|
||||
template<int LoadMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE
|
||||
PacketType packet(Index row, Index col) const
|
||||
{
|
||||
return m_argImpl.template packet<LoadMode,PacketType>(row, col);
|
||||
}
|
||||
|
||||
template<int LoadMode, typename PacketType>
|
||||
template<int LoadMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE
|
||||
PacketType packet(Index index) const
|
||||
{
|
||||
return m_argImpl.template packet<LoadMode,PacketType>(index);
|
||||
}
|
||||
|
||||
template<int StoreMode, typename PacketType>
|
||||
template<int StoreMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE
|
||||
void writePacket(Index row, Index col, const PacketType& x)
|
||||
{
|
||||
m_argImpl.template writePacket<StoreMode>(row, col, x);
|
||||
}
|
||||
|
||||
template<int StoreMode, typename PacketType>
|
||||
template<int StoreMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE
|
||||
void writePacket(Index index, const PacketType& x)
|
||||
{
|
||||
m_argImpl.template writePacket<StoreMode>(index, x);
|
||||
@@ -1164,29 +1449,34 @@ struct unary_evaluator<Reverse<ArgType, Direction> >
|
||||
m_cols(ReverseCol ? reverse.nestedExpression().cols() : 1)
|
||||
{ }
|
||||
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
CoeffReturnType coeff(Index row, Index col) const
|
||||
{
|
||||
return m_argImpl.coeff(ReverseRow ? m_rows.value() - row - 1 : row,
|
||||
ReverseCol ? m_cols.value() - col - 1 : col);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
return m_argImpl.coeff(m_rows.value() * m_cols.value() - index - 1);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Scalar& coeffRef(Index row, Index col)
|
||||
{
|
||||
return m_argImpl.coeffRef(ReverseRow ? m_rows.value() - row - 1 : row,
|
||||
ReverseCol ? m_cols.value() - col - 1 : col);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Scalar& coeffRef(Index index)
|
||||
{
|
||||
return m_argImpl.coeffRef(m_rows.value() * m_cols.value() - index - 1);
|
||||
}
|
||||
|
||||
template<int LoadMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE
|
||||
PacketType packet(Index row, Index col) const
|
||||
{
|
||||
enum {
|
||||
@@ -1201,6 +1491,7 @@ struct unary_evaluator<Reverse<ArgType, Direction> >
|
||||
}
|
||||
|
||||
template<int LoadMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE
|
||||
PacketType packet(Index index) const
|
||||
{
|
||||
enum { PacketSize = unpacket_traits<PacketType>::size };
|
||||
@@ -1208,6 +1499,7 @@ struct unary_evaluator<Reverse<ArgType, Direction> >
|
||||
}
|
||||
|
||||
template<int LoadMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE
|
||||
void writePacket(Index row, Index col, const PacketType& x)
|
||||
{
|
||||
// FIXME we could factorize some code with packet(i,j)
|
||||
@@ -1224,6 +1516,7 @@ struct unary_evaluator<Reverse<ArgType, Direction> >
|
||||
}
|
||||
|
||||
template<int LoadMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE
|
||||
void writePacket(Index index, const PacketType& x)
|
||||
{
|
||||
enum { PacketSize = unpacket_traits<PacketType>::size };
|
||||
@@ -1252,7 +1545,7 @@ struct evaluator<Diagonal<ArgType, DiagIndex> >
|
||||
enum {
|
||||
CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
|
||||
|
||||
Flags = (unsigned int)evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit) & ~RowMajorBit,
|
||||
Flags = (unsigned int)(evaluator<ArgType>::Flags & (HereditaryBits | DirectAccessBit) & ~RowMajorBit) | LinearAccessBit,
|
||||
|
||||
Alignment = 0
|
||||
};
|
||||
@@ -1267,22 +1560,26 @@ struct evaluator<Diagonal<ArgType, DiagIndex> >
|
||||
typedef typename internal::conditional<!internal::is_same<typename ArgType::StorageKind,Sparse>::value,
|
||||
typename XprType::CoeffReturnType,Scalar>::type CoeffReturnType;
|
||||
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index) const
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
CoeffReturnType coeff(Index row, Index) const
|
||||
{
|
||||
return m_argImpl.coeff(row + rowOffset(), row + colOffset());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
return m_argImpl.coeff(index + rowOffset(), index + colOffset());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Scalar& coeffRef(Index row, Index)
|
||||
{
|
||||
return m_argImpl.coeffRef(row + rowOffset(), row + colOffset());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Scalar& coeffRef(Index index)
|
||||
{
|
||||
return m_argImpl.coeffRef(index + rowOffset(), index + colOffset());
|
||||
}
|
||||
|
||||
@@ -13,26 +13,6 @@
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class CwiseBinaryOp
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Generic expression where a coefficient-wise binary operator is applied to two expressions
|
||||
*
|
||||
* \param BinaryOp template functor implementing the operator
|
||||
* \param Lhs the type of the left-hand side
|
||||
* \param Rhs the type of the right-hand side
|
||||
*
|
||||
* This class represents an expression where a coefficient-wise binary operator is applied to two expressions.
|
||||
* It is the return type of binary operators, by which we mean only those binary operators where
|
||||
* both the left-hand side and the right-hand side are Eigen expressions.
|
||||
* For example, the return type of matrix1+matrix2 is a CwiseBinaryOp.
|
||||
*
|
||||
* Most of the time, this is the only way that it is used, so you typically don't have to name
|
||||
* CwiseBinaryOp types explicitly.
|
||||
*
|
||||
* \sa MatrixBase::binaryExpr(const MatrixBase<OtherDerived> &,const CustomBinaryOp &) const, class CwiseUnaryOp, class CwiseNullaryOp
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
template<typename BinaryOp, typename Lhs, typename Rhs>
|
||||
struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
|
||||
@@ -52,8 +32,8 @@ struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
|
||||
// we still want to handle the case when the result type is different.
|
||||
typedef typename result_of<
|
||||
BinaryOp(
|
||||
typename Lhs::Scalar,
|
||||
typename Rhs::Scalar
|
||||
const typename Lhs::Scalar&,
|
||||
const typename Rhs::Scalar&
|
||||
)
|
||||
>::type Scalar;
|
||||
typedef typename cwise_promote_storage_type<typename traits<Lhs>::StorageKind,
|
||||
@@ -66,7 +46,7 @@ struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
|
||||
typedef typename remove_reference<LhsNested>::type _LhsNested;
|
||||
typedef typename remove_reference<RhsNested>::type _RhsNested;
|
||||
enum {
|
||||
Flags = _LhsNested::Flags & RowMajorBit
|
||||
Flags = cwise_promote_storage_order<typename traits<Lhs>::StorageKind,typename traits<Rhs>::StorageKind,_LhsNested::Flags & RowMajorBit,_RhsNested::Flags & RowMajorBit>::value
|
||||
};
|
||||
};
|
||||
} // end namespace internal
|
||||
@@ -74,6 +54,25 @@ struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
|
||||
template<typename BinaryOp, typename Lhs, typename Rhs, typename StorageKind>
|
||||
class CwiseBinaryOpImpl;
|
||||
|
||||
/** \class CwiseBinaryOp
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Generic expression where a coefficient-wise binary operator is applied to two expressions
|
||||
*
|
||||
* \tparam BinaryOp template functor implementing the operator
|
||||
* \tparam LhsType the type of the left-hand side
|
||||
* \tparam RhsType the type of the right-hand side
|
||||
*
|
||||
* This class represents an expression where a coefficient-wise binary operator is applied to two expressions.
|
||||
* It is the return type of binary operators, by which we mean only those binary operators where
|
||||
* both the left-hand side and the right-hand side are Eigen expressions.
|
||||
* For example, the return type of matrix1+matrix2 is a CwiseBinaryOp.
|
||||
*
|
||||
* Most of the time, this is the only way that it is used, so you typically don't have to name
|
||||
* CwiseBinaryOp types explicitly.
|
||||
*
|
||||
* \sa MatrixBase::binaryExpr(const MatrixBase<OtherDerived> &,const CustomBinaryOp &) const, class CwiseUnaryOp, class CwiseNullaryOp
|
||||
*/
|
||||
template<typename BinaryOp, typename LhsType, typename RhsType>
|
||||
class CwiseBinaryOp :
|
||||
public CwiseBinaryOpImpl<
|
||||
@@ -85,6 +84,7 @@ class CwiseBinaryOp :
|
||||
{
|
||||
public:
|
||||
|
||||
typedef typename internal::remove_all<BinaryOp>::type Functor;
|
||||
typedef typename internal::remove_all<LhsType>::type Lhs;
|
||||
typedef typename internal::remove_all<RhsType>::type Rhs;
|
||||
|
||||
@@ -161,7 +161,7 @@ template<typename OtherDerived>
|
||||
EIGEN_STRONG_INLINE Derived &
|
||||
MatrixBase<Derived>::operator-=(const MatrixBase<OtherDerived> &other)
|
||||
{
|
||||
call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar>());
|
||||
call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
|
||||
return derived();
|
||||
}
|
||||
|
||||
@@ -174,7 +174,7 @@ template<typename OtherDerived>
|
||||
EIGEN_STRONG_INLINE Derived &
|
||||
MatrixBase<Derived>::operator+=(const MatrixBase<OtherDerived>& other)
|
||||
{
|
||||
call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar>());
|
||||
call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
|
||||
return derived();
|
||||
}
|
||||
|
||||
|
||||
@@ -12,24 +12,6 @@
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class CwiseNullaryOp
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Generic expression of a matrix where all coefficients are defined by a functor
|
||||
*
|
||||
* \param NullaryOp template functor implementing the operator
|
||||
* \param PlainObjectType the underlying plain matrix/array type
|
||||
*
|
||||
* This class represents an expression of a generic nullary operator.
|
||||
* It is the return type of the Ones(), Zero(), Constant(), Identity() and Random() methods,
|
||||
* and most of the time this is the only way it is used.
|
||||
*
|
||||
* However, if you want to write a function returning such an expression, you
|
||||
* will need to use this class.
|
||||
*
|
||||
* \sa class CwiseUnaryOp, class CwiseBinaryOp, DenseBase::NullaryExpr()
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
template<typename NullaryOp, typename PlainObjectType>
|
||||
struct traits<CwiseNullaryOp<NullaryOp, PlainObjectType> > : traits<PlainObjectType>
|
||||
@@ -38,8 +20,42 @@ struct traits<CwiseNullaryOp<NullaryOp, PlainObjectType> > : traits<PlainObjectT
|
||||
Flags = traits<PlainObjectType>::Flags & RowMajorBit
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
|
||||
/** \class CwiseNullaryOp
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Generic expression of a matrix where all coefficients are defined by a functor
|
||||
*
|
||||
* \tparam NullaryOp template functor implementing the operator
|
||||
* \tparam PlainObjectType the underlying plain matrix/array type
|
||||
*
|
||||
* This class represents an expression of a generic nullary operator.
|
||||
* It is the return type of the Ones(), Zero(), Constant(), Identity() and Random() methods,
|
||||
* and most of the time this is the only way it is used.
|
||||
*
|
||||
* However, if you want to write a function returning such an expression, you
|
||||
* will need to use this class.
|
||||
*
|
||||
* The functor NullaryOp must expose one of the following method:
|
||||
<table class="manual">
|
||||
<tr ><td>\c operator()() </td><td>if the procedural generation does not depend on the coefficient entries (e.g., random numbers)</td></tr>
|
||||
<tr class="alt"><td>\c operator()(Index i)</td><td>if the procedural generation makes sense for vectors only and that it depends on the coefficient index \c i (e.g., linspace) </td></tr>
|
||||
<tr ><td>\c operator()(Index i,Index j)</td><td>if the procedural generation depends on the matrix coordinates \c i, \c j (e.g., to generate a checkerboard with 0 and 1)</td></tr>
|
||||
</table>
|
||||
* It is also possible to expose the last two operators if the generation makes sense for matrices but can be optimized for vectors.
|
||||
*
|
||||
* See DenseBase::NullaryExpr(Index,const CustomNullaryOp&) for an example binding
|
||||
* C++11 random number generators.
|
||||
*
|
||||
* A nullary expression can also be used to implement custom sophisticated matrix manipulations
|
||||
* that cannot be covered by the existing set of natively supported matrix manipulations.
|
||||
* See this \ref TopicCustomizing_NullaryExpr "page" for some examples and additional explanations
|
||||
* on the behavior of CwiseNullaryOp.
|
||||
*
|
||||
* \sa class CwiseUnaryOp, class CwiseBinaryOp, DenseBase::NullaryExpr
|
||||
*/
|
||||
template<typename NullaryOp, typename PlainObjectType>
|
||||
class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp, PlainObjectType> >::type, internal::no_assignment_operator
|
||||
{
|
||||
@@ -63,30 +79,6 @@ class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index cols() const { return m_cols.value(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar coeff(Index rowId, Index colId) const
|
||||
{
|
||||
return m_functor(rowId, colId);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
EIGEN_STRONG_INLINE PacketScalar packet(Index rowId, Index colId) const
|
||||
{
|
||||
return m_functor.packetOp(rowId, colId);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
|
||||
{
|
||||
return m_functor(index);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
EIGEN_STRONG_INLINE PacketScalar packet(Index index) const
|
||||
{
|
||||
return m_functor.packetOp(index);
|
||||
}
|
||||
|
||||
/** \returns the functor representing the nullary operation */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const NullaryOp& functor() const { return m_functor; }
|
||||
@@ -223,46 +215,33 @@ DenseBase<Derived>::Constant(const Scalar& value)
|
||||
return DenseBase<Derived>::NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_constant_op<Scalar>(value));
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Sets a linearly space vector.
|
||||
/** \deprecated because of accuracy loss. In Eigen 3.3, it is an alias for LinSpaced(Index,const Scalar&,const Scalar&)
|
||||
*
|
||||
* The function generates 'size' equally spaced values in the closed interval [low,high].
|
||||
* This particular version of LinSpaced() uses sequential access, i.e. vector access is
|
||||
* assumed to be a(0), a(1), ..., a(size). This assumption allows for better vectorization
|
||||
* and yields faster code than the random access version.
|
||||
*
|
||||
* When size is set to 1, a vector of length 1 containing 'high' is returned.
|
||||
*
|
||||
* \only_for_vectors
|
||||
*
|
||||
* Example: \include DenseBase_LinSpaced_seq.cpp
|
||||
* Output: \verbinclude DenseBase_LinSpaced_seq.out
|
||||
*
|
||||
* \sa setLinSpaced(Index,const Scalar&,const Scalar&), LinSpaced(Index,Scalar,Scalar), CwiseNullaryOp
|
||||
* \sa LinSpaced(Index,Scalar,Scalar), setLinSpaced(Index,const Scalar&,const Scalar&)
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::SequentialLinSpacedReturnType
|
||||
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
|
||||
DenseBase<Derived>::LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,PacketScalar,false>(low,high,size));
|
||||
return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,PacketScalar>(low,high,size));
|
||||
}
|
||||
|
||||
/**
|
||||
* \copydoc DenseBase::LinSpaced(Sequential_t, Index, const Scalar&, const Scalar&)
|
||||
* Special version for fixed size types which does not require the size parameter.
|
||||
/** \deprecated because of accuracy loss. In Eigen 3.3, it is an alias for LinSpaced(const Scalar&,const Scalar&)
|
||||
*
|
||||
* \sa LinSpaced(Scalar,Scalar)
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::SequentialLinSpacedReturnType
|
||||
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
|
||||
DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& high)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
|
||||
return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,PacketScalar,false>(low,high,Derived::SizeAtCompileTime));
|
||||
return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,PacketScalar>(low,high,Derived::SizeAtCompileTime));
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Sets a linearly space vector.
|
||||
* \brief Sets a linearly spaced vector.
|
||||
*
|
||||
* The function generates 'size' equally spaced values in the closed interval [low,high].
|
||||
* When size is set to 1, a vector of length 1 containing 'high' is returned.
|
||||
@@ -272,14 +251,24 @@ DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& hig
|
||||
* Example: \include DenseBase_LinSpaced.cpp
|
||||
* Output: \verbinclude DenseBase_LinSpaced.out
|
||||
*
|
||||
* \sa setLinSpaced(Index,const Scalar&,const Scalar&), LinSpaced(Sequential_t,Index,const Scalar&,const Scalar&,Index), CwiseNullaryOp
|
||||
* For integer scalar types, an even spacing is possible if and only if the length of the range,
|
||||
* i.e., \c high-low is a scalar multiple of \c size-1, or if \c size is a scalar multiple of the
|
||||
* number of values \c high-low+1 (meaning each value can be repeated the same number of time).
|
||||
* If one of these two considions is not satisfied, then \c high is lowered to the largest value
|
||||
* satisfying one of this constraint.
|
||||
* Here are some examples:
|
||||
*
|
||||
* Example: \include DenseBase_LinSpacedInt.cpp
|
||||
* Output: \verbinclude DenseBase_LinSpacedInt.out
|
||||
*
|
||||
* \sa setLinSpaced(Index,const Scalar&,const Scalar&), CwiseNullaryOp
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
|
||||
DenseBase<Derived>::LinSpaced(Index size, const Scalar& low, const Scalar& high)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,PacketScalar,true>(low,high,size));
|
||||
return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,PacketScalar>(low,high,size));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -292,7 +281,7 @@ DenseBase<Derived>::LinSpaced(const Scalar& low, const Scalar& high)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
|
||||
return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,PacketScalar,true>(low,high,Derived::SizeAtCompileTime));
|
||||
return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,PacketScalar>(low,high,Derived::SizeAtCompileTime));
|
||||
}
|
||||
|
||||
/** \returns true if all coefficients in this matrix are approximately equal to \a val, to within precision \a prec */
|
||||
@@ -328,7 +317,7 @@ EIGEN_STRONG_INLINE void DenseBase<Derived>::fill(const Scalar& val)
|
||||
setConstant(val);
|
||||
}
|
||||
|
||||
/** Sets all coefficients in this expression to \a value.
|
||||
/** Sets all coefficients in this expression to value \a val.
|
||||
*
|
||||
* \sa fill(), setConstant(Index,const Scalar&), setConstant(Index,Index,const Scalar&), setZero(), setOnes(), Constant(), class CwiseNullaryOp, setZero(), setOnes()
|
||||
*/
|
||||
@@ -338,7 +327,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setConstant(const Scalar& val)
|
||||
return derived() = Constant(rows(), cols(), val);
|
||||
}
|
||||
|
||||
/** Resizes to the given \a size, and sets all coefficients in this expression to the given \a value.
|
||||
/** Resizes to the given \a size, and sets all coefficients in this expression to the given value \a val.
|
||||
*
|
||||
* \only_for_vectors
|
||||
*
|
||||
@@ -355,7 +344,7 @@ PlainObjectBase<Derived>::setConstant(Index size, const Scalar& val)
|
||||
return setConstant(val);
|
||||
}
|
||||
|
||||
/** Resizes to the given size, and sets all coefficients in this expression to the given \a value.
|
||||
/** Resizes to the given size, and sets all coefficients in this expression to the given value \a val.
|
||||
*
|
||||
* \param rows the new number of rows
|
||||
* \param cols the new number of columns
|
||||
@@ -375,7 +364,7 @@ PlainObjectBase<Derived>::setConstant(Index rows, Index cols, const Scalar& val)
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Sets a linearly space vector.
|
||||
* \brief Sets a linearly spaced vector.
|
||||
*
|
||||
* The function generates 'size' equally spaced values in the closed interval [low,high].
|
||||
* When size is set to 1, a vector of length 1 containing 'high' is returned.
|
||||
@@ -385,24 +374,30 @@ PlainObjectBase<Derived>::setConstant(Index rows, Index cols, const Scalar& val)
|
||||
* Example: \include DenseBase_setLinSpaced.cpp
|
||||
* Output: \verbinclude DenseBase_setLinSpaced.out
|
||||
*
|
||||
* \sa CwiseNullaryOp
|
||||
* For integer scalar types, do not miss the explanations on the definition
|
||||
* of \link LinSpaced(Index,const Scalar&,const Scalar&) even spacing \endlink.
|
||||
*
|
||||
* \sa LinSpaced(Index,const Scalar&,const Scalar&), CwiseNullaryOp
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index newSize, const Scalar& low, const Scalar& high)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
return derived() = Derived::NullaryExpr(newSize, internal::linspaced_op<Scalar,PacketScalar,false>(low,high,newSize));
|
||||
return derived() = Derived::NullaryExpr(newSize, internal::linspaced_op<Scalar,PacketScalar>(low,high,newSize));
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Sets a linearly space vector.
|
||||
* \brief Sets a linearly spaced vector.
|
||||
*
|
||||
* The function fill *this with equally spaced values in the closed interval [low,high].
|
||||
* The function fills \c *this with equally spaced values in the closed interval [low,high].
|
||||
* When size is set to 1, a vector of length 1 containing 'high' is returned.
|
||||
*
|
||||
* \only_for_vectors
|
||||
*
|
||||
* \sa setLinSpaced(Index, const Scalar&, const Scalar&), CwiseNullaryOp
|
||||
* For integer scalar types, do not miss the explanations on the definition
|
||||
* of \link LinSpaced(Index,const Scalar&,const Scalar&) even spacing \endlink.
|
||||
*
|
||||
* \sa LinSpaced(Index,const Scalar&,const Scalar&), setLinSpaced(Index, const Scalar&, const Scalar&), CwiseNullaryOp
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(const Scalar& low, const Scalar& high)
|
||||
@@ -760,7 +755,7 @@ struct setIdentity_impl<Derived, true>
|
||||
static EIGEN_STRONG_INLINE Derived& run(Derived& m)
|
||||
{
|
||||
m.setZero();
|
||||
const Index size = (std::min)(m.rows(), m.cols());
|
||||
const Index size = numext::mini(m.rows(), m.cols());
|
||||
for(Index i = 0; i < size; ++i) m.coeffRef(i,i) = typename Derived::Scalar(1);
|
||||
return m;
|
||||
}
|
||||
|
||||
197
Eigen/src/Core/CwiseTernaryOp.h
Normal file
197
Eigen/src/Core/CwiseTernaryOp.h
Normal file
@@ -0,0 +1,197 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2016 Eugene Brevdo <ebrevdo@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_CWISE_TERNARY_OP_H
|
||||
#define EIGEN_CWISE_TERNARY_OP_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
template <typename TernaryOp, typename Arg1, typename Arg2, typename Arg3>
|
||||
struct traits<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> > {
|
||||
// we must not inherit from traits<Arg1> since it has
|
||||
// the potential to cause problems with MSVC
|
||||
typedef typename remove_all<Arg1>::type Ancestor;
|
||||
typedef typename traits<Ancestor>::XprKind XprKind;
|
||||
enum {
|
||||
RowsAtCompileTime = traits<Ancestor>::RowsAtCompileTime,
|
||||
ColsAtCompileTime = traits<Ancestor>::ColsAtCompileTime,
|
||||
MaxRowsAtCompileTime = traits<Ancestor>::MaxRowsAtCompileTime,
|
||||
MaxColsAtCompileTime = traits<Ancestor>::MaxColsAtCompileTime
|
||||
};
|
||||
|
||||
// even though we require Arg1, Arg2, and Arg3 to have the same scalar type
|
||||
// (see CwiseTernaryOp constructor),
|
||||
// we still want to handle the case when the result type is different.
|
||||
typedef typename result_of<TernaryOp(
|
||||
const typename Arg1::Scalar&, const typename Arg2::Scalar&,
|
||||
const typename Arg3::Scalar&)>::type Scalar;
|
||||
|
||||
typedef typename internal::traits<Arg1>::StorageKind StorageKind;
|
||||
typedef typename internal::traits<Arg1>::StorageIndex StorageIndex;
|
||||
|
||||
typedef typename Arg1::Nested Arg1Nested;
|
||||
typedef typename Arg2::Nested Arg2Nested;
|
||||
typedef typename Arg3::Nested Arg3Nested;
|
||||
typedef typename remove_reference<Arg1Nested>::type _Arg1Nested;
|
||||
typedef typename remove_reference<Arg2Nested>::type _Arg2Nested;
|
||||
typedef typename remove_reference<Arg3Nested>::type _Arg3Nested;
|
||||
enum { Flags = _Arg1Nested::Flags & RowMajorBit };
|
||||
};
|
||||
} // end namespace internal
|
||||
|
||||
template <typename TernaryOp, typename Arg1, typename Arg2, typename Arg3,
|
||||
typename StorageKind>
|
||||
class CwiseTernaryOpImpl;
|
||||
|
||||
/** \class CwiseTernaryOp
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Generic expression where a coefficient-wise ternary operator is
|
||||
* applied to two expressions
|
||||
*
|
||||
* \tparam TernaryOp template functor implementing the operator
|
||||
* \tparam Arg1Type the type of the first argument
|
||||
* \tparam Arg2Type the type of the second argument
|
||||
* \tparam Arg3Type the type of the third argument
|
||||
*
|
||||
* This class represents an expression where a coefficient-wise ternary
|
||||
* operator is applied to three expressions.
|
||||
* It is the return type of ternary operators, by which we mean only those
|
||||
* ternary operators where
|
||||
* all three arguments are Eigen expressions.
|
||||
* For example, the return type of betainc(matrix1, matrix2, matrix3) is a
|
||||
* CwiseTernaryOp.
|
||||
*
|
||||
* Most of the time, this is the only way that it is used, so you typically
|
||||
* don't have to name
|
||||
* CwiseTernaryOp types explicitly.
|
||||
*
|
||||
* \sa MatrixBase::ternaryExpr(const MatrixBase<Argument2> &, const
|
||||
* MatrixBase<Argument3> &, const CustomTernaryOp &) const, class CwiseBinaryOp,
|
||||
* class CwiseUnaryOp, class CwiseNullaryOp
|
||||
*/
|
||||
template <typename TernaryOp, typename Arg1Type, typename Arg2Type,
|
||||
typename Arg3Type>
|
||||
class CwiseTernaryOp : public CwiseTernaryOpImpl<
|
||||
TernaryOp, Arg1Type, Arg2Type, Arg3Type,
|
||||
typename internal::traits<Arg1Type>::StorageKind>,
|
||||
internal::no_assignment_operator
|
||||
{
|
||||
public:
|
||||
typedef typename internal::remove_all<Arg1Type>::type Arg1;
|
||||
typedef typename internal::remove_all<Arg2Type>::type Arg2;
|
||||
typedef typename internal::remove_all<Arg3Type>::type Arg3;
|
||||
|
||||
typedef typename CwiseTernaryOpImpl<
|
||||
TernaryOp, Arg1Type, Arg2Type, Arg3Type,
|
||||
typename internal::traits<Arg1Type>::StorageKind>::Base Base;
|
||||
EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseTernaryOp)
|
||||
|
||||
typedef typename internal::ref_selector<Arg1Type>::type Arg1Nested;
|
||||
typedef typename internal::ref_selector<Arg2Type>::type Arg2Nested;
|
||||
typedef typename internal::ref_selector<Arg3Type>::type Arg3Nested;
|
||||
typedef typename internal::remove_reference<Arg1Nested>::type _Arg1Nested;
|
||||
typedef typename internal::remove_reference<Arg2Nested>::type _Arg2Nested;
|
||||
typedef typename internal::remove_reference<Arg3Nested>::type _Arg3Nested;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE CwiseTernaryOp(const Arg1& a1, const Arg2& a2,
|
||||
const Arg3& a3,
|
||||
const TernaryOp& func = TernaryOp())
|
||||
: m_arg1(a1), m_arg2(a2), m_arg3(a3), m_functor(func) {
|
||||
// require the sizes to match
|
||||
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Arg1, Arg2)
|
||||
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Arg1, Arg3)
|
||||
|
||||
// The index types should match
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<
|
||||
typename internal::traits<Arg1Type>::StorageKind,
|
||||
typename internal::traits<Arg2Type>::StorageKind>::value),
|
||||
STORAGE_KIND_MUST_MATCH)
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<
|
||||
typename internal::traits<Arg1Type>::StorageKind,
|
||||
typename internal::traits<Arg3Type>::StorageKind>::value),
|
||||
STORAGE_KIND_MUST_MATCH)
|
||||
|
||||
eigen_assert(a1.rows() == a2.rows() && a1.cols() == a2.cols() &&
|
||||
a1.rows() == a3.rows() && a1.cols() == a3.cols());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index rows() const {
|
||||
// return the fixed size type if available to enable compile time
|
||||
// optimizations
|
||||
if (internal::traits<typename internal::remove_all<Arg1Nested>::type>::
|
||||
RowsAtCompileTime == Dynamic &&
|
||||
internal::traits<typename internal::remove_all<Arg2Nested>::type>::
|
||||
RowsAtCompileTime == Dynamic)
|
||||
return m_arg3.rows();
|
||||
else if (internal::traits<typename internal::remove_all<Arg1Nested>::type>::
|
||||
RowsAtCompileTime == Dynamic &&
|
||||
internal::traits<typename internal::remove_all<Arg3Nested>::type>::
|
||||
RowsAtCompileTime == Dynamic)
|
||||
return m_arg2.rows();
|
||||
else
|
||||
return m_arg1.rows();
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index cols() const {
|
||||
// return the fixed size type if available to enable compile time
|
||||
// optimizations
|
||||
if (internal::traits<typename internal::remove_all<Arg1Nested>::type>::
|
||||
ColsAtCompileTime == Dynamic &&
|
||||
internal::traits<typename internal::remove_all<Arg2Nested>::type>::
|
||||
ColsAtCompileTime == Dynamic)
|
||||
return m_arg3.cols();
|
||||
else if (internal::traits<typename internal::remove_all<Arg1Nested>::type>::
|
||||
ColsAtCompileTime == Dynamic &&
|
||||
internal::traits<typename internal::remove_all<Arg3Nested>::type>::
|
||||
ColsAtCompileTime == Dynamic)
|
||||
return m_arg2.cols();
|
||||
else
|
||||
return m_arg1.cols();
|
||||
}
|
||||
|
||||
/** \returns the first argument nested expression */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const _Arg1Nested& arg1() const { return m_arg1; }
|
||||
/** \returns the first argument nested expression */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const _Arg2Nested& arg2() const { return m_arg2; }
|
||||
/** \returns the third argument nested expression */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const _Arg3Nested& arg3() const { return m_arg3; }
|
||||
/** \returns the functor representing the ternary operation */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const TernaryOp& functor() const { return m_functor; }
|
||||
|
||||
protected:
|
||||
Arg1Nested m_arg1;
|
||||
Arg2Nested m_arg2;
|
||||
Arg3Nested m_arg3;
|
||||
const TernaryOp m_functor;
|
||||
};
|
||||
|
||||
// Generic API dispatcher
|
||||
template <typename TernaryOp, typename Arg1, typename Arg2, typename Arg3,
|
||||
typename StorageKind>
|
||||
class CwiseTernaryOpImpl
|
||||
: public internal::generic_xpr_base<
|
||||
CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> >::type {
|
||||
public:
|
||||
typedef typename internal::generic_xpr_base<
|
||||
CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> >::type Base;
|
||||
};
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_CWISE_TERNARY_OP_H
|
||||
@@ -13,33 +13,13 @@
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class CwiseUnaryOp
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Generic expression where a coefficient-wise unary operator is applied to an expression
|
||||
*
|
||||
* \param UnaryOp template functor implementing the operator
|
||||
* \param XprType the type of the expression to which we are applying the unary operator
|
||||
*
|
||||
* This class represents an expression where a unary operator is applied to an expression.
|
||||
* It is the return type of all operations taking exactly 1 input expression, regardless of the
|
||||
* presence of other inputs such as scalars. For example, the operator* in the expression 3*matrix
|
||||
* is considered unary, because only the right-hand side is an expression, and its
|
||||
* return type is a specialization of CwiseUnaryOp.
|
||||
*
|
||||
* Most of the time, this is the only way that it is used, so you typically don't have to name
|
||||
* CwiseUnaryOp types explicitly.
|
||||
*
|
||||
* \sa MatrixBase::unaryExpr(const CustomUnaryOp &) const, class CwiseBinaryOp, class CwiseNullaryOp
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
template<typename UnaryOp, typename XprType>
|
||||
struct traits<CwiseUnaryOp<UnaryOp, XprType> >
|
||||
: traits<XprType>
|
||||
{
|
||||
typedef typename result_of<
|
||||
UnaryOp(typename XprType::Scalar)
|
||||
UnaryOp(const typename XprType::Scalar&)
|
||||
>::type Scalar;
|
||||
typedef typename XprType::Nested XprTypeNested;
|
||||
typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
|
||||
@@ -52,6 +32,25 @@ struct traits<CwiseUnaryOp<UnaryOp, XprType> >
|
||||
template<typename UnaryOp, typename XprType, typename StorageKind>
|
||||
class CwiseUnaryOpImpl;
|
||||
|
||||
/** \class CwiseUnaryOp
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Generic expression where a coefficient-wise unary operator is applied to an expression
|
||||
*
|
||||
* \tparam UnaryOp template functor implementing the operator
|
||||
* \tparam XprType the type of the expression to which we are applying the unary operator
|
||||
*
|
||||
* This class represents an expression where a unary operator is applied to an expression.
|
||||
* It is the return type of all operations taking exactly 1 input expression, regardless of the
|
||||
* presence of other inputs such as scalars. For example, the operator* in the expression 3*matrix
|
||||
* is considered unary, because only the right-hand side is an expression, and its
|
||||
* return type is a specialization of CwiseUnaryOp.
|
||||
*
|
||||
* Most of the time, this is the only way that it is used, so you typically don't have to name
|
||||
* CwiseUnaryOp types explicitly.
|
||||
*
|
||||
* \sa MatrixBase::unaryExpr(const CustomUnaryOp &) const, class CwiseBinaryOp, class CwiseNullaryOp
|
||||
*/
|
||||
template<typename UnaryOp, typename XprType>
|
||||
class CwiseUnaryOp : public CwiseUnaryOpImpl<UnaryOp, XprType, typename internal::traits<XprType>::StorageKind>, internal::no_assignment_operator
|
||||
{
|
||||
@@ -59,33 +58,34 @@ class CwiseUnaryOp : public CwiseUnaryOpImpl<UnaryOp, XprType, typename internal
|
||||
|
||||
typedef typename CwiseUnaryOpImpl<UnaryOp, XprType,typename internal::traits<XprType>::StorageKind>::Base Base;
|
||||
EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryOp)
|
||||
typedef typename internal::ref_selector<XprType>::type XprTypeNested;
|
||||
typedef typename internal::remove_all<XprType>::type NestedExpression;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit inline CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp())
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
explicit CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp())
|
||||
: m_xpr(xpr), m_functor(func) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index rows() const { return m_xpr.rows(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index cols() const { return m_xpr.cols(); }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Index rows() const { return m_xpr.rows(); }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Index cols() const { return m_xpr.cols(); }
|
||||
|
||||
/** \returns the functor representing the unary operation */
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const UnaryOp& functor() const { return m_functor; }
|
||||
|
||||
/** \returns the nested expression */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename XprType::Nested>::type&
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const typename internal::remove_all<XprTypeNested>::type&
|
||||
nestedExpression() const { return m_xpr; }
|
||||
|
||||
/** \returns the nested expression */
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename internal::remove_all<typename XprType::Nested>::type&
|
||||
nestedExpression() { return m_xpr.const_cast_derived(); }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
typename internal::remove_all<XprTypeNested>::type&
|
||||
nestedExpression() { return m_xpr; }
|
||||
|
||||
protected:
|
||||
typename XprType::Nested m_xpr;
|
||||
XprTypeNested m_xpr;
|
||||
const UnaryOp m_functor;
|
||||
};
|
||||
|
||||
|
||||
@@ -12,27 +12,13 @@
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class CwiseUnaryView
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Generic lvalue expression of a coefficient-wise unary operator of a matrix or a vector
|
||||
*
|
||||
* \param ViewOp template functor implementing the view
|
||||
* \param MatrixType the type of the matrix we are applying the unary operator
|
||||
*
|
||||
* This class represents a lvalue expression of a generic unary view operator of a matrix or a vector.
|
||||
* It is the return type of real() and imag(), and most of the time this is the only way it is used.
|
||||
*
|
||||
* \sa MatrixBase::unaryViewExpr(const CustomUnaryOp &) const, class CwiseUnaryOp
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
template<typename ViewOp, typename MatrixType>
|
||||
struct traits<CwiseUnaryView<ViewOp, MatrixType> >
|
||||
: traits<MatrixType>
|
||||
{
|
||||
typedef typename result_of<
|
||||
ViewOp(typename traits<MatrixType>::Scalar)
|
||||
ViewOp(const typename traits<MatrixType>::Scalar&)
|
||||
>::type Scalar;
|
||||
typedef typename MatrixType::Nested MatrixTypeNested;
|
||||
typedef typename remove_all<MatrixTypeNested>::type _MatrixTypeNested;
|
||||
@@ -55,6 +41,19 @@ struct traits<CwiseUnaryView<ViewOp, MatrixType> >
|
||||
template<typename ViewOp, typename MatrixType, typename StorageKind>
|
||||
class CwiseUnaryViewImpl;
|
||||
|
||||
/** \class CwiseUnaryView
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Generic lvalue expression of a coefficient-wise unary operator of a matrix or a vector
|
||||
*
|
||||
* \tparam ViewOp template functor implementing the view
|
||||
* \tparam MatrixType the type of the matrix we are applying the unary operator
|
||||
*
|
||||
* This class represents a lvalue expression of a generic unary view operator of a matrix or a vector.
|
||||
* It is the return type of real() and imag(), and most of the time this is the only way it is used.
|
||||
*
|
||||
* \sa MatrixBase::unaryViewExpr(const CustomUnaryOp &) const, class CwiseUnaryOp
|
||||
*/
|
||||
template<typename ViewOp, typename MatrixType>
|
||||
class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename internal::traits<MatrixType>::StorageKind>
|
||||
{
|
||||
@@ -62,6 +61,7 @@ class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename in
|
||||
|
||||
typedef typename CwiseUnaryViewImpl<ViewOp, MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base;
|
||||
EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryView)
|
||||
typedef typename internal::ref_selector<MatrixType>::non_const_type MatrixTypeNested;
|
||||
typedef typename internal::remove_all<MatrixType>::type NestedExpression;
|
||||
|
||||
explicit inline CwiseUnaryView(MatrixType& mat, const ViewOp& func = ViewOp())
|
||||
@@ -76,15 +76,15 @@ class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename in
|
||||
const ViewOp& functor() const { return m_functor; }
|
||||
|
||||
/** \returns the nested expression */
|
||||
const typename internal::remove_all<typename MatrixType::Nested>::type&
|
||||
const typename internal::remove_all<MatrixTypeNested>::type&
|
||||
nestedExpression() const { return m_matrix; }
|
||||
|
||||
/** \returns the nested expression */
|
||||
typename internal::remove_all<typename MatrixType::Nested>::type&
|
||||
typename internal::remove_reference<MatrixTypeNested>::type&
|
||||
nestedExpression() { return m_matrix.const_cast_derived(); }
|
||||
|
||||
protected:
|
||||
typename internal::ref_selector<MatrixType>::type m_matrix;
|
||||
MatrixTypeNested m_matrix;
|
||||
ViewOp m_functor;
|
||||
};
|
||||
|
||||
|
||||
@@ -34,17 +34,15 @@ static inline void check_DenseIndex_is_signed() {
|
||||
* \tparam Derived is the derived type, e.g., a matrix type or an expression.
|
||||
*
|
||||
* This class can be extended with the help of the plugin mechanism described on the page
|
||||
* \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_DENSEBASE_PLUGIN.
|
||||
* \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_DENSEBASE_PLUGIN.
|
||||
*
|
||||
* \sa \ref TopicClassHierarchy
|
||||
* \sa \blank \ref TopicClassHierarchy
|
||||
*/
|
||||
template<typename Derived> class DenseBase
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
: public internal::special_scalar_op_base<Derived, typename internal::traits<Derived>::Scalar,
|
||||
typename NumTraits<typename internal::traits<Derived>::Scalar>::Real,
|
||||
DenseCoeffsBase<Derived> >
|
||||
#else
|
||||
: public DenseCoeffsBase<Derived>
|
||||
#else
|
||||
: public DenseCoeffsBase<Derived,DirectWriteAccessors>
|
||||
#endif // not EIGEN_PARSED_BY_DOXYGEN
|
||||
{
|
||||
public:
|
||||
@@ -60,7 +58,7 @@ template<typename Derived> class DenseBase
|
||||
* \brief The type used to store indices
|
||||
* \details This typedef is relevant for types that store multiple indices such as
|
||||
* PermutationMatrix or Transpositions, otherwise it defaults to Eigen::Index
|
||||
* \sa \ref TopicPreprocessorDirectives, Eigen::Index, SparseMatrixBase.
|
||||
* \sa \blank \ref TopicPreprocessorDirectives, Eigen::Index, SparseMatrixBase.
|
||||
*/
|
||||
typedef typename internal::traits<Derived>::StorageIndex StorageIndex;
|
||||
|
||||
@@ -73,10 +71,8 @@ template<typename Derived> class DenseBase
|
||||
typedef Scalar value_type;
|
||||
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
typedef internal::special_scalar_op_base<Derived,Scalar,RealScalar, DenseCoeffsBase<Derived> > Base;
|
||||
typedef DenseCoeffsBase<Derived> Base;
|
||||
|
||||
using Base::operator*;
|
||||
using Base::operator/;
|
||||
using Base::derived;
|
||||
using Base::const_cast_derived;
|
||||
using Base::rows;
|
||||
@@ -264,10 +260,10 @@ template<typename Derived> class DenseBase
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
/** \internal Represents a matrix with all coefficients equal to one another*/
|
||||
typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,PlainObject> ConstantReturnType;
|
||||
/** \internal Represents a vector with linearly spaced coefficients that allows sequential access only. */
|
||||
typedef CwiseNullaryOp<internal::linspaced_op<Scalar,PacketScalar,false>,PlainObject> SequentialLinSpacedReturnType;
|
||||
/** \internal \deprecated Represents a vector with linearly spaced coefficients that allows sequential access only. */
|
||||
typedef CwiseNullaryOp<internal::linspaced_op<Scalar,PacketScalar>,PlainObject> SequentialLinSpacedReturnType;
|
||||
/** \internal Represents a vector with linearly spaced coefficients that allows random access. */
|
||||
typedef CwiseNullaryOp<internal::linspaced_op<Scalar,PacketScalar,true>,PlainObject> RandomAccessLinSpacedReturnType;
|
||||
typedef CwiseNullaryOp<internal::linspaced_op<Scalar,PacketScalar>,PlainObject> RandomAccessLinSpacedReturnType;
|
||||
/** \internal the return type of MatrixBase::eigenvalues() */
|
||||
typedef Matrix<typename NumTraits<typename internal::traits<Derived>::Scalar>::Real, internal::traits<Derived>::ColsAtCompileTime, 1> EigenvaluesReturnType;
|
||||
|
||||
@@ -275,13 +271,13 @@ template<typename Derived> class DenseBase
|
||||
|
||||
/** Copies \a other into *this. \returns a reference to *this. */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Derived& operator=(const DenseBase<OtherDerived>& other);
|
||||
|
||||
/** Special case of the template operator=, in order to prevent the compiler
|
||||
* from generating a default operator= (issue hit with g++ 4.1)
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Derived& operator=(const DenseBase& other);
|
||||
|
||||
template<typename OtherDerived>
|
||||
@@ -388,10 +384,10 @@ template<typename Derived> class DenseBase
|
||||
inline bool hasNaN() const;
|
||||
inline bool allFinite() const;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Derived& operator*=(const Scalar& other);
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Derived& operator/=(const Scalar& other);
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Derived& operator*=(const Scalar& other);
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Derived& operator/=(const Scalar& other);
|
||||
|
||||
typedef typename internal::add_const_on_value_type<typename internal::eval<Derived>::type>::type EvalReturnType;
|
||||
/** \returns the matrix or vector obtained by evaluating this expression.
|
||||
@@ -562,12 +558,15 @@ template<typename Derived> class DenseBase
|
||||
EIGEN_DEVICE_FUNC void reverseInPlace();
|
||||
|
||||
#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::DenseBase
|
||||
#define EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
|
||||
#define EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(COND)
|
||||
# include "../plugins/BlockMethods.h"
|
||||
# ifdef EIGEN_DENSEBASE_PLUGIN
|
||||
# include EIGEN_DENSEBASE_PLUGIN
|
||||
# endif
|
||||
#undef EIGEN_CURRENT_STORAGE_BASE_CLASS
|
||||
|
||||
#undef EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
|
||||
#undef EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF
|
||||
|
||||
// disable the use of evalTo for dense objects with a nice compilation error
|
||||
template<typename Dest>
|
||||
|
||||
@@ -191,19 +191,31 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE CoeffReturnType
|
||||
y() const { return (*this)[1]; }
|
||||
y() const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=2, OUT_OF_RANGE_ACCESS);
|
||||
return (*this)[1];
|
||||
}
|
||||
|
||||
/** equivalent to operator[](2). */
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE CoeffReturnType
|
||||
z() const { return (*this)[2]; }
|
||||
z() const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=3, OUT_OF_RANGE_ACCESS);
|
||||
return (*this)[2];
|
||||
}
|
||||
|
||||
/** equivalent to operator[](3). */
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE CoeffReturnType
|
||||
w() const { return (*this)[3]; }
|
||||
w() const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=4, OUT_OF_RANGE_ACCESS);
|
||||
return (*this)[3];
|
||||
}
|
||||
|
||||
/** \internal
|
||||
* \returns the packet of coefficients starting at the given row and column. It is your responsibility
|
||||
@@ -424,19 +436,31 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar&
|
||||
y() { return (*this)[1]; }
|
||||
y()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=2, OUT_OF_RANGE_ACCESS);
|
||||
return (*this)[1];
|
||||
}
|
||||
|
||||
/** equivalent to operator[](2). */
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar&
|
||||
z() { return (*this)[2]; }
|
||||
z()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=3, OUT_OF_RANGE_ACCESS);
|
||||
return (*this)[2];
|
||||
}
|
||||
|
||||
/** equivalent to operator[](3). */
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar&
|
||||
w() { return (*this)[3]; }
|
||||
w()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=4, OUT_OF_RANGE_ACCESS);
|
||||
return (*this)[3];
|
||||
}
|
||||
};
|
||||
|
||||
/** \brief Base class providing direct read-only coefficient access to matrices and arrays.
|
||||
@@ -448,7 +472,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
|
||||
* inherits DenseCoeffsBase<Derived, ReadOnlyAccessors> which defines functions to access entries read-only using
|
||||
* \c operator() .
|
||||
*
|
||||
* \sa \ref TopicClassHierarchy
|
||||
* \sa \blank \ref TopicClassHierarchy
|
||||
*/
|
||||
template<typename Derived>
|
||||
class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived, ReadOnlyAccessors>
|
||||
@@ -521,7 +545,7 @@ class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived
|
||||
* inherits DenseCoeffsBase<Derived, WriteAccessors> which defines functions to access entries read/write using
|
||||
* \c operator().
|
||||
*
|
||||
* \sa \ref TopicClassHierarchy
|
||||
* \sa \blank \ref TopicClassHierarchy
|
||||
*/
|
||||
template<typename Derived>
|
||||
class DenseCoeffsBase<Derived, DirectWriteAccessors>
|
||||
@@ -600,7 +624,7 @@ struct first_aligned_impl<Alignment, Derived, false>
|
||||
{
|
||||
static inline Index run(const Derived& m)
|
||||
{
|
||||
return internal::first_aligned<Alignment>(&m.const_cast_derived().coeffRef(0,0), m.size());
|
||||
return internal::first_aligned<Alignment>(m.data(), m.size());
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -67,13 +67,13 @@ struct plain_array
|
||||
template<typename PtrType>
|
||||
EIGEN_ALWAYS_INLINE PtrType eigen_unaligned_array_assert_workaround_gcc47(PtrType array) { return array; }
|
||||
#define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \
|
||||
eigen_assert((reinterpret_cast<size_t>(eigen_unaligned_array_assert_workaround_gcc47(array)) & (sizemask)) == 0 \
|
||||
eigen_assert((internal::UIntPtr(eigen_unaligned_array_assert_workaround_gcc47(array)) & (sizemask)) == 0 \
|
||||
&& "this assertion is explained here: " \
|
||||
"http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \
|
||||
" **** READ THIS WEB PAGE !!! ****");
|
||||
#else
|
||||
#define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \
|
||||
eigen_assert((reinterpret_cast<size_t>(array) & (sizemask)) == 0 \
|
||||
eigen_assert((internal::UIntPtr(array) & (sizemask)) == 0 \
|
||||
&& "this assertion is explained here: " \
|
||||
"http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \
|
||||
" **** READ THIS WEB PAGE !!! ****");
|
||||
@@ -362,9 +362,9 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
#ifdef EIGEN_HAVE_RVALUE_REFERENCES
|
||||
#if EIGEN_HAS_RVALUE_REFERENCES
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseStorage(DenseStorage&& other)
|
||||
DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT
|
||||
: m_data(std::move(other.m_data))
|
||||
, m_rows(std::move(other.m_rows))
|
||||
, m_cols(std::move(other.m_cols))
|
||||
@@ -374,7 +374,7 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
|
||||
other.m_cols = 0;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseStorage& operator=(DenseStorage&& other)
|
||||
DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT
|
||||
{
|
||||
using std::swap;
|
||||
swap(m_data, other.m_data);
|
||||
@@ -441,9 +441,9 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
#ifdef EIGEN_HAVE_RVALUE_REFERENCES
|
||||
#if EIGEN_HAS_RVALUE_REFERENCES
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseStorage(DenseStorage&& other)
|
||||
DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT
|
||||
: m_data(std::move(other.m_data))
|
||||
, m_cols(std::move(other.m_cols))
|
||||
{
|
||||
@@ -451,7 +451,7 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
|
||||
other.m_cols = 0;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseStorage& operator=(DenseStorage&& other)
|
||||
DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT
|
||||
{
|
||||
using std::swap;
|
||||
swap(m_data, other.m_data);
|
||||
@@ -514,9 +514,9 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
#ifdef EIGEN_HAVE_RVALUE_REFERENCES
|
||||
#if EIGEN_HAS_RVALUE_REFERENCES
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseStorage(DenseStorage&& other)
|
||||
DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT
|
||||
: m_data(std::move(other.m_data))
|
||||
, m_rows(std::move(other.m_rows))
|
||||
{
|
||||
@@ -524,7 +524,7 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
|
||||
other.m_rows = 0;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseStorage& operator=(DenseStorage&& other)
|
||||
DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT
|
||||
{
|
||||
using std::swap;
|
||||
swap(m_data, other.m_data);
|
||||
|
||||
@@ -103,21 +103,21 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
|
||||
>::type ScalarWithConstIfNotLvalue;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline ScalarWithConstIfNotLvalue* data() { return &(m_matrix.const_cast_derived().coeffRef(rowOffset(), colOffset())); }
|
||||
inline ScalarWithConstIfNotLvalue* data() { return &(m_matrix.coeffRef(rowOffset(), colOffset())); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar* data() const { return &(m_matrix.const_cast_derived().coeffRef(rowOffset(), colOffset())); }
|
||||
inline const Scalar* data() const { return &(m_matrix.coeffRef(rowOffset(), colOffset())); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index row, Index)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
|
||||
return m_matrix.const_cast_derived().coeffRef(row+rowOffset(), row+colOffset());
|
||||
return m_matrix.coeffRef(row+rowOffset(), row+colOffset());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index row, Index) const
|
||||
{
|
||||
return m_matrix.const_cast_derived().coeffRef(row+rowOffset(), row+colOffset());
|
||||
return m_matrix.coeffRef(row+rowOffset(), row+colOffset());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
@@ -130,13 +130,13 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
|
||||
inline Scalar& coeffRef(Index idx)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
|
||||
return m_matrix.const_cast_derived().coeffRef(idx+rowOffset(), idx+colOffset());
|
||||
return m_matrix.coeffRef(idx+rowOffset(), idx+colOffset());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index idx) const
|
||||
{
|
||||
return m_matrix.const_cast_derived().coeffRef(idx+rowOffset(), idx+colOffset());
|
||||
return m_matrix.coeffRef(idx+rowOffset(), idx+colOffset());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
@@ -159,7 +159,7 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
|
||||
}
|
||||
|
||||
protected:
|
||||
typename MatrixType::Nested m_matrix;
|
||||
typename internal::ref_selector<MatrixType>::non_const_type m_matrix;
|
||||
const internal::variable_if_dynamicindex<Index, DiagIndex> m_index;
|
||||
|
||||
private:
|
||||
|
||||
@@ -71,18 +71,17 @@ class DiagonalBase : public EigenBase<Derived>
|
||||
return InverseReturnType(diagonal().cwiseInverse());
|
||||
}
|
||||
|
||||
typedef DiagonalWrapper<const CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const DiagonalVectorType> > ScalarMultipleReturnType;
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const ScalarMultipleReturnType
|
||||
inline const DiagonalWrapper<const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DiagonalVectorType,Scalar,product) >
|
||||
operator*(const Scalar& scalar) const
|
||||
{
|
||||
return ScalarMultipleReturnType(diagonal() * scalar);
|
||||
return DiagonalWrapper<const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DiagonalVectorType,Scalar,product) >(diagonal() * scalar);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
friend inline const ScalarMultipleReturnType
|
||||
friend inline const DiagonalWrapper<const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,DiagonalVectorType,product) >
|
||||
operator*(const Scalar& scalar, const DiagonalBase& other)
|
||||
{
|
||||
return ScalarMultipleReturnType(other.diagonal() * scalar);
|
||||
return DiagonalWrapper<const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,DiagonalVectorType,product) >(scalar * other.diagonal());
|
||||
}
|
||||
};
|
||||
|
||||
@@ -291,12 +290,11 @@ MatrixBase<Derived>::asDiagonal() const
|
||||
template<typename Derived>
|
||||
bool MatrixBase<Derived>::isDiagonal(const RealScalar& prec) const
|
||||
{
|
||||
using std::abs;
|
||||
if(cols() != rows()) return false;
|
||||
RealScalar maxAbsOnDiagonal = static_cast<RealScalar>(-1);
|
||||
for(Index j = 0; j < cols(); ++j)
|
||||
{
|
||||
RealScalar absOnDiagonal = abs(coeff(j,j));
|
||||
RealScalar absOnDiagonal = numext::abs(coeff(j,j));
|
||||
if(absOnDiagonal > maxAbsOnDiagonal) maxAbsOnDiagonal = absOnDiagonal;
|
||||
}
|
||||
for(Index j = 0; j < cols(); ++j)
|
||||
@@ -317,19 +315,24 @@ struct Diagonal2Dense {};
|
||||
template<> struct AssignmentKind<DenseShape,DiagonalShape> { typedef Diagonal2Dense Kind; };
|
||||
|
||||
// Diagonal matrix to Dense assignment
|
||||
template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
|
||||
struct Assignment<DstXprType, SrcXprType, Functor, Diagonal2Dense, Scalar>
|
||||
template< typename DstXprType, typename SrcXprType, typename Functor>
|
||||
struct Assignment<DstXprType, SrcXprType, Functor, Diagonal2Dense>
|
||||
{
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/)
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
|
||||
{
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
|
||||
dst.setZero();
|
||||
dst.diagonal() = src.diagonal();
|
||||
}
|
||||
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar> &/*func*/)
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
|
||||
{ dst.diagonal() += src.diagonal(); }
|
||||
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar> &/*func*/)
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
|
||||
{ dst.diagonal() -= src.diagonal(); }
|
||||
};
|
||||
|
||||
|
||||
@@ -28,28 +28,31 @@ template<typename T, typename U,
|
||||
>
|
||||
struct dot_nocheck
|
||||
{
|
||||
typedef typename scalar_product_traits<typename traits<T>::Scalar,typename traits<U>::Scalar>::ReturnType ResScalar;
|
||||
typedef scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> conj_prod;
|
||||
typedef typename conj_prod::result_type ResScalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
|
||||
{
|
||||
return a.template binaryExpr<scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> >(b).sum();
|
||||
return a.template binaryExpr<conj_prod>(b).sum();
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T, typename U>
|
||||
struct dot_nocheck<T, U, true>
|
||||
{
|
||||
typedef typename scalar_product_traits<typename traits<T>::Scalar,typename traits<U>::Scalar>::ReturnType ResScalar;
|
||||
typedef scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> conj_prod;
|
||||
typedef typename conj_prod::result_type ResScalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
|
||||
{
|
||||
return a.transpose().template binaryExpr<scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> >(b).sum();
|
||||
return a.transpose().template binaryExpr<conj_prod>(b).sum();
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/** \returns the dot product of *this with other.
|
||||
/** \fn MatrixBase::dot
|
||||
* \returns the dot product of *this with other.
|
||||
*
|
||||
* \only_for_vectors
|
||||
*
|
||||
@@ -62,15 +65,17 @@ struct dot_nocheck<T, U, true>
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename internal::scalar_product_traits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
|
||||
typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
|
||||
MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
|
||||
EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived)
|
||||
#if !(defined(EIGEN_NO_STATIC_ASSERT) && defined(EIGEN_NO_DEBUG))
|
||||
typedef internal::scalar_conj_product_op<Scalar,typename OtherDerived::Scalar> func;
|
||||
EIGEN_CHECK_BINARY_COMPATIBILIY(func,Scalar,typename OtherDerived::Scalar);
|
||||
|
||||
#endif
|
||||
|
||||
eigen_assert(size() == other.size());
|
||||
|
||||
return internal::dot_nocheck<Derived,OtherDerived>::run(*this, other);
|
||||
@@ -82,7 +87,7 @@ MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
|
||||
* In both cases, it consists in the sum of the square of all the matrix entries.
|
||||
* For vectors, this is also equals to the dot product of \c *this with itself.
|
||||
*
|
||||
* \sa dot(), norm()
|
||||
* \sa dot(), norm(), lpNorm()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::squaredNorm() const
|
||||
@@ -94,16 +99,18 @@ EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scala
|
||||
* In both cases, it consists in the square root of the sum of the square of all the matrix entries.
|
||||
* For vectors, this is also equals to the square root of the dot product of \c *this with itself.
|
||||
*
|
||||
* \sa dot(), squaredNorm()
|
||||
* \sa lpNorm(), dot(), squaredNorm()
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const
|
||||
{
|
||||
EIGEN_USING_STD_MATH(sqrt)
|
||||
return sqrt(squaredNorm());
|
||||
return numext::sqrt(squaredNorm());
|
||||
}
|
||||
|
||||
/** \returns an expression of the quotient of *this by its own norm.
|
||||
/** \returns an expression of the quotient of \c *this by its own norm.
|
||||
*
|
||||
* \warning If the input vector is too small (i.e., this->norm()==0),
|
||||
* then this function returns a copy of the input.
|
||||
*
|
||||
* \only_for_vectors
|
||||
*
|
||||
@@ -115,19 +122,75 @@ MatrixBase<Derived>::normalized() const
|
||||
{
|
||||
typedef typename internal::nested_eval<Derived,2>::type _Nested;
|
||||
_Nested n(derived());
|
||||
return n / n.norm();
|
||||
RealScalar z = n.squaredNorm();
|
||||
// NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU
|
||||
if(z>RealScalar(0))
|
||||
return n / numext::sqrt(z);
|
||||
else
|
||||
return n;
|
||||
}
|
||||
|
||||
/** Normalizes the vector, i.e. divides it by its own norm.
|
||||
*
|
||||
* \only_for_vectors
|
||||
*
|
||||
* \warning If the input vector is too small (i.e., this->norm()==0), then \c *this is left unchanged.
|
||||
*
|
||||
* \sa norm(), normalized()
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline void MatrixBase<Derived>::normalize()
|
||||
{
|
||||
*this /= norm();
|
||||
RealScalar z = squaredNorm();
|
||||
// NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU
|
||||
if(z>RealScalar(0))
|
||||
derived() /= numext::sqrt(z);
|
||||
}
|
||||
|
||||
/** \returns an expression of the quotient of \c *this by its own norm while avoiding underflow and overflow.
|
||||
*
|
||||
* \only_for_vectors
|
||||
*
|
||||
* This method is analogue to the normalized() method, but it reduces the risk of
|
||||
* underflow and overflow when computing the norm.
|
||||
*
|
||||
* \warning If the input vector is too small (i.e., this->norm()==0),
|
||||
* then this function returns a copy of the input.
|
||||
*
|
||||
* \sa stableNorm(), stableNormalize(), normalized()
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline const typename MatrixBase<Derived>::PlainObject
|
||||
MatrixBase<Derived>::stableNormalized() const
|
||||
{
|
||||
typedef typename internal::nested_eval<Derived,3>::type _Nested;
|
||||
_Nested n(derived());
|
||||
RealScalar w = n.cwiseAbs().maxCoeff();
|
||||
RealScalar z = (n/w).squaredNorm();
|
||||
if(z>RealScalar(0))
|
||||
return n / (numext::sqrt(z)*w);
|
||||
else
|
||||
return n;
|
||||
}
|
||||
|
||||
/** Normalizes the vector while avoid underflow and overflow
|
||||
*
|
||||
* \only_for_vectors
|
||||
*
|
||||
* This method is analogue to the normalize() method, but it reduces the risk of
|
||||
* underflow and overflow when computing the norm.
|
||||
*
|
||||
* \warning If the input vector is too small (i.e., this->norm()==0), then \c *this is left unchanged.
|
||||
*
|
||||
* \sa stableNorm(), stableNormalized(), normalize()
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline void MatrixBase<Derived>::stableNormalize()
|
||||
{
|
||||
RealScalar w = cwiseAbs().maxCoeff();
|
||||
RealScalar z = (derived()/w).squaredNorm();
|
||||
if(z>RealScalar(0))
|
||||
derived() /= numext::sqrt(z)*w;
|
||||
}
|
||||
|
||||
//---------- implementation of other norms ----------
|
||||
@@ -169,9 +232,12 @@ struct lpNorm_selector<Derived, 2>
|
||||
template<typename Derived>
|
||||
struct lpNorm_selector<Derived, Infinity>
|
||||
{
|
||||
typedef typename NumTraits<typename traits<Derived>::Scalar>::Real RealScalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
|
||||
static inline RealScalar run(const MatrixBase<Derived>& m)
|
||||
{
|
||||
if(Derived::SizeAtCompileTime==0 || (Derived::SizeAtCompileTime==Dynamic && m.size()==0))
|
||||
return RealScalar(0);
|
||||
return m.cwiseAbs().maxCoeff();
|
||||
}
|
||||
};
|
||||
@@ -182,13 +248,19 @@ struct lpNorm_selector<Derived, Infinity>
|
||||
* of the coefficients of \c *this. If \a p is the special value \a Eigen::Infinity, this function returns the \f$ \ell^\infty \f$
|
||||
* norm, that is the maximum of the absolute values of the coefficients of \c *this.
|
||||
*
|
||||
* In all cases, if \c *this is empty, then the value 0 is returned.
|
||||
*
|
||||
* \note For matrices, this function does not compute the <a href="https://en.wikipedia.org/wiki/Operator_norm">operator-norm</a>. That is, if \c *this is a matrix, then its coefficients are interpreted as a 1D vector. Nonetheless, you can easily compute the 1-norm and \f$\infty\f$-norm matrix operator norms using \link TutorialReductionsVisitorsBroadcastingReductionsNorm partial reductions \endlink.
|
||||
*
|
||||
* \sa norm()
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<int p>
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
|
||||
#else
|
||||
MatrixBase<Derived>::RealScalar
|
||||
#endif
|
||||
MatrixBase<Derived>::lpNorm() const
|
||||
{
|
||||
return internal::lpNorm_selector<Derived, p>::run(*this);
|
||||
|
||||
@@ -23,7 +23,7 @@ namespace Eigen {
|
||||
*
|
||||
* Notice that this class is trivial, it is only used to disambiguate overloaded functions.
|
||||
*
|
||||
* \sa \ref TopicClassHierarchy
|
||||
* \sa \blank \ref TopicClassHierarchy
|
||||
*/
|
||||
template<typename Derived> struct EigenBase
|
||||
{
|
||||
@@ -138,7 +138,7 @@ template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
Derived& DenseBase<Derived>::operator+=(const EigenBase<OtherDerived> &other)
|
||||
{
|
||||
call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar>());
|
||||
call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
|
||||
return derived();
|
||||
}
|
||||
|
||||
@@ -146,7 +146,7 @@ template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
Derived& DenseBase<Derived>::operator-=(const EigenBase<OtherDerived> &other)
|
||||
{
|
||||
call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar>());
|
||||
call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
|
||||
return derived();
|
||||
}
|
||||
|
||||
|
||||
@@ -25,7 +25,8 @@ template<int Rows, int Cols, int Depth> struct product_type_selector;
|
||||
template<int Size, int MaxSize> struct product_size_category
|
||||
{
|
||||
enum { is_large = MaxSize == Dynamic ||
|
||||
Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD,
|
||||
Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ||
|
||||
(Size==Dynamic && MaxSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD),
|
||||
value = is_large ? Large
|
||||
: Size == 1 ? 1
|
||||
: Small
|
||||
@@ -76,37 +77,13 @@ public:
|
||||
#endif
|
||||
};
|
||||
|
||||
// template<typename Lhs, typename Rhs> struct product_tag
|
||||
// {
|
||||
// private:
|
||||
//
|
||||
// typedef typename remove_all<Lhs>::type _Lhs;
|
||||
// typedef typename remove_all<Rhs>::type _Rhs;
|
||||
// enum {
|
||||
// Rows = _Lhs::RowsAtCompileTime,
|
||||
// Cols = _Rhs::ColsAtCompileTime,
|
||||
// Depth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::ColsAtCompileTime, _Rhs::RowsAtCompileTime)
|
||||
// };
|
||||
//
|
||||
// enum {
|
||||
// rows_select = Rows==1 ? int(Rows) : int(Large),
|
||||
// cols_select = Cols==1 ? int(Cols) : int(Large),
|
||||
// depth_select = Depth==1 ? int(Depth) : int(Large)
|
||||
// };
|
||||
// typedef product_type_selector<rows_select, cols_select, depth_select> selector;
|
||||
//
|
||||
// public:
|
||||
// enum {
|
||||
// ret = selector::ret
|
||||
// };
|
||||
//
|
||||
// };
|
||||
|
||||
/* The following allows to select the kind of product at compile time
|
||||
* based on the three dimensions of the product.
|
||||
* This is a compile time mapping from {1,Small,Large}^3 -> {product types} */
|
||||
// FIXME I'm not sure the current mapping is the ideal one.
|
||||
template<int M, int N> struct product_type_selector<M,N,1> { enum { ret = OuterProduct }; };
|
||||
template<int M> struct product_type_selector<M, 1, 1> { enum { ret = LazyCoeffBasedProductMode }; };
|
||||
template<int N> struct product_type_selector<1, N, 1> { enum { ret = LazyCoeffBasedProductMode }; };
|
||||
template<int Depth> struct product_type_selector<1, 1, Depth> { enum { ret = InnerProduct }; };
|
||||
template<> struct product_type_selector<1, 1, 1> { enum { ret = InnerProduct }; };
|
||||
template<> struct product_type_selector<Small,1, Small> { enum { ret = CoeffBasedProductMode }; };
|
||||
@@ -125,8 +102,8 @@ template<> struct product_type_selector<Small,Small,Large> { enum
|
||||
template<> struct product_type_selector<Large,Small,Large> { enum { ret = GemmProduct }; };
|
||||
template<> struct product_type_selector<Small,Large,Large> { enum { ret = GemmProduct }; };
|
||||
template<> struct product_type_selector<Large,Large,Large> { enum { ret = GemmProduct }; };
|
||||
template<> struct product_type_selector<Large,Small,Small> { enum { ret = GemmProduct }; };
|
||||
template<> struct product_type_selector<Small,Large,Small> { enum { ret = GemmProduct }; };
|
||||
template<> struct product_type_selector<Large,Small,Small> { enum { ret = CoeffBasedProductMode }; };
|
||||
template<> struct product_type_selector<Small,Large,Small> { enum { ret = CoeffBasedProductMode }; };
|
||||
template<> struct product_type_selector<Large,Large,Small> { enum { ret = GemmProduct }; };
|
||||
|
||||
} // end namespace internal
|
||||
@@ -183,20 +160,20 @@ struct gemv_static_vector_if<Scalar,Size,Dynamic,true>
|
||||
template<typename Scalar,int Size,int MaxSize>
|
||||
struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
|
||||
{
|
||||
#if EIGEN_MAX_STATIC_ALIGN_BYTES!=0
|
||||
internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0> m_data;
|
||||
EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
|
||||
#else
|
||||
// Some architectures cannot align on the stack,
|
||||
// => let's manually enforce alignment by allocating more data and return the address of the first aligned element.
|
||||
enum {
|
||||
ForceAlignment = internal::packet_traits<Scalar>::Vectorizable,
|
||||
PacketSize = internal::packet_traits<Scalar>::size
|
||||
};
|
||||
internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?PacketSize:0),0> m_data;
|
||||
#if EIGEN_MAX_STATIC_ALIGN_BYTES!=0
|
||||
internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0,EIGEN_PLAIN_ENUM_MIN(AlignedMax,PacketSize)> m_data;
|
||||
EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
|
||||
#else
|
||||
// Some architectures cannot align on the stack,
|
||||
// => let's manually enforce alignment by allocating more data and return the address of the first aligned element.
|
||||
internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?EIGEN_MAX_ALIGN_BYTES:0),0> m_data;
|
||||
EIGEN_STRONG_INLINE Scalar* data() {
|
||||
return ForceAlignment
|
||||
? reinterpret_cast<Scalar*>((reinterpret_cast<size_t>(m_data.array) & ~(size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES)
|
||||
? reinterpret_cast<Scalar*>((internal::UIntPtr(m_data.array) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES)
|
||||
: m_data.array;
|
||||
}
|
||||
#endif
|
||||
@@ -231,7 +208,7 @@ template<> struct gemv_dense_selector<OnTheRight,ColMajor,true>
|
||||
typedef internal::blas_traits<Rhs> RhsBlasTraits;
|
||||
typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
|
||||
|
||||
typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
|
||||
typedef Map<Matrix<ResScalar,Dynamic,1>, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits<ResScalar>::size)> MappedDest;
|
||||
|
||||
ActualLhsType actualLhs = LhsBlasTraits::extract(lhs);
|
||||
ActualRhsType actualRhs = RhsBlasTraits::extract(rhs);
|
||||
@@ -239,55 +216,73 @@ template<> struct gemv_dense_selector<OnTheRight,ColMajor,true>
|
||||
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
|
||||
* RhsBlasTraits::extractScalarFactor(rhs);
|
||||
|
||||
// make sure Dest is a compile-time vector type (bug 1166)
|
||||
typedef typename conditional<Dest::IsVectorAtCompileTime, Dest, typename Dest::ColXpr>::type ActualDest;
|
||||
|
||||
enum {
|
||||
// FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
|
||||
// on, the other hand it is good for the cache to pack the vector anyways...
|
||||
EvalToDestAtCompileTime = Dest::InnerStrideAtCompileTime==1,
|
||||
EvalToDestAtCompileTime = (ActualDest::InnerStrideAtCompileTime==1),
|
||||
ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
|
||||
MightCannotUseDest = (Dest::InnerStrideAtCompileTime!=1) || ComplexByReal
|
||||
MightCannotUseDest = (!EvalToDestAtCompileTime) || ComplexByReal
|
||||
};
|
||||
|
||||
gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
|
||||
|
||||
const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0));
|
||||
const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
|
||||
|
||||
RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
|
||||
|
||||
ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
|
||||
evalToDest ? dest.data() : static_dest.data());
|
||||
|
||||
if(!evalToDest)
|
||||
{
|
||||
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
Index size = dest.size();
|
||||
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
#endif
|
||||
if(!alphaIsCompatible)
|
||||
{
|
||||
MappedDest(actualDestPtr, dest.size()).setZero();
|
||||
compatibleAlpha = RhsScalar(1);
|
||||
}
|
||||
else
|
||||
MappedDest(actualDestPtr, dest.size()) = dest;
|
||||
}
|
||||
|
||||
typedef const_blas_data_mapper<LhsScalar,Index,ColMajor> LhsMapper;
|
||||
typedef const_blas_data_mapper<RhsScalar,Index,RowMajor> RhsMapper;
|
||||
general_matrix_vector_product
|
||||
<Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
|
||||
actualLhs.rows(), actualLhs.cols(),
|
||||
LhsMapper(actualLhs.data(), actualLhs.outerStride()),
|
||||
RhsMapper(actualRhs.data(), actualRhs.innerStride()),
|
||||
actualDestPtr, 1,
|
||||
compatibleAlpha);
|
||||
RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
|
||||
|
||||
if (!evalToDest)
|
||||
if(!MightCannotUseDest)
|
||||
{
|
||||
if(!alphaIsCompatible)
|
||||
dest += actualAlpha * MappedDest(actualDestPtr, dest.size());
|
||||
else
|
||||
dest = MappedDest(actualDestPtr, dest.size());
|
||||
// shortcut if we are sure to be able to use dest directly,
|
||||
// this ease the compiler to generate cleaner and more optimzized code for most common cases
|
||||
general_matrix_vector_product
|
||||
<Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
|
||||
actualLhs.rows(), actualLhs.cols(),
|
||||
LhsMapper(actualLhs.data(), actualLhs.outerStride()),
|
||||
RhsMapper(actualRhs.data(), actualRhs.innerStride()),
|
||||
dest.data(), 1,
|
||||
compatibleAlpha);
|
||||
}
|
||||
else
|
||||
{
|
||||
gemv_static_vector_if<ResScalar,ActualDest::SizeAtCompileTime,ActualDest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
|
||||
|
||||
const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0));
|
||||
const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
|
||||
|
||||
ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
|
||||
evalToDest ? dest.data() : static_dest.data());
|
||||
|
||||
if(!evalToDest)
|
||||
{
|
||||
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
Index size = dest.size();
|
||||
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
#endif
|
||||
if(!alphaIsCompatible)
|
||||
{
|
||||
MappedDest(actualDestPtr, dest.size()).setZero();
|
||||
compatibleAlpha = RhsScalar(1);
|
||||
}
|
||||
else
|
||||
MappedDest(actualDestPtr, dest.size()) = dest;
|
||||
}
|
||||
|
||||
general_matrix_vector_product
|
||||
<Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
|
||||
actualLhs.rows(), actualLhs.cols(),
|
||||
LhsMapper(actualLhs.data(), actualLhs.outerStride()),
|
||||
RhsMapper(actualRhs.data(), actualRhs.innerStride()),
|
||||
actualDestPtr, 1,
|
||||
compatibleAlpha);
|
||||
|
||||
if (!evalToDest)
|
||||
{
|
||||
if(!alphaIsCompatible)
|
||||
dest.matrix() += actualAlpha * MappedDest(actualDestPtr, dest.size());
|
||||
else
|
||||
dest = MappedDest(actualDestPtr, dest.size());
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
@@ -340,7 +335,7 @@ template<> struct gemv_dense_selector<OnTheRight,RowMajor,true>
|
||||
actualLhs.rows(), actualLhs.cols(),
|
||||
LhsMapper(actualLhs.data(), actualLhs.outerStride()),
|
||||
RhsMapper(actualRhsPtr, 1),
|
||||
dest.data(), dest.innerStride(),
|
||||
dest.data(), dest.col(0).innerStride(), //NOTE if dest is not a vector at compile-time, then dest.innerStride() might be wrong. (bug 1166)
|
||||
actualAlpha);
|
||||
}
|
||||
};
|
||||
@@ -350,6 +345,7 @@ template<> struct gemv_dense_selector<OnTheRight,ColMajor,false>
|
||||
template<typename Lhs, typename Rhs, typename Dest>
|
||||
static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((!nested_eval<Lhs,1>::Evaluate),EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||
// TODO if rhs is large enough it might be beneficial to make sure that dest is sequentially stored in memory, otherwise use a temp
|
||||
typename nested_eval<Rhs,1>::type actual_rhs(rhs);
|
||||
const Index size = rhs.rows();
|
||||
@@ -363,6 +359,7 @@ template<> struct gemv_dense_selector<OnTheRight,RowMajor,false>
|
||||
template<typename Lhs, typename Rhs, typename Dest>
|
||||
static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((!nested_eval<Lhs,1>::Evaluate),EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||
typename nested_eval<Rhs,Lhs::RowsAtCompileTime>::type actual_rhs(rhs);
|
||||
const Index rows = dest.rows();
|
||||
for(Index i=0; i<rows; ++i)
|
||||
|
||||
@@ -62,7 +62,8 @@ struct default_packet_traits
|
||||
HasRsqrt = 0,
|
||||
HasExp = 0,
|
||||
HasLog = 0,
|
||||
HasLog10 = 0,
|
||||
HasLog1p = 0,
|
||||
HasLog10 = 0,
|
||||
HasPow = 0,
|
||||
|
||||
HasSin = 0,
|
||||
@@ -71,12 +72,18 @@ struct default_packet_traits
|
||||
HasASin = 0,
|
||||
HasACos = 0,
|
||||
HasATan = 0,
|
||||
HasSinh = 0,
|
||||
HasCosh = 0,
|
||||
HasTanh = 0,
|
||||
HasSinh = 0,
|
||||
HasCosh = 0,
|
||||
HasTanh = 0,
|
||||
HasLGamma = 0,
|
||||
HasDiGamma = 0,
|
||||
HasZeta = 0,
|
||||
HasPolygamma = 0,
|
||||
HasErf = 0,
|
||||
HasErfc = 0,
|
||||
HasIGamma = 0,
|
||||
HasIGammac = 0,
|
||||
HasBetaInc = 0,
|
||||
|
||||
HasRound = 0,
|
||||
HasFloor = 0,
|
||||
@@ -133,6 +140,11 @@ pcast(const SrcPacket& a, const SrcPacket& /*b*/) {
|
||||
return static_cast<TgtPacket>(a);
|
||||
}
|
||||
|
||||
template <typename SrcPacket, typename TgtPacket>
|
||||
EIGEN_DEVICE_FUNC inline TgtPacket
|
||||
pcast(const SrcPacket& a, const SrcPacket& /*b*/, const SrcPacket& /*c*/, const SrcPacket& /*d*/) {
|
||||
return static_cast<TgtPacket>(a);
|
||||
}
|
||||
|
||||
/** \internal \returns a + b (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
@@ -284,7 +296,7 @@ template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu
|
||||
{ pstore(to, from); }
|
||||
|
||||
/** \internal tries to do cache prefetching of \a addr */
|
||||
template<typename Scalar> inline void prefetch(const Scalar* addr)
|
||||
template<typename Scalar> EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr)
|
||||
{
|
||||
#ifdef __CUDA_ARCH__
|
||||
#if defined(__LP64__)
|
||||
@@ -294,7 +306,7 @@ template<typename Scalar> inline void prefetch(const Scalar* addr)
|
||||
// 32-bit pointer operand constraint for inlined asm
|
||||
asm(" prefetch.L1 [ %1 ];" : "=r"(addr) : "r"(addr));
|
||||
#endif
|
||||
#elif !EIGEN_COMP_MSVC
|
||||
#elif (!EIGEN_COMP_MSVC) && (EIGEN_COMP_GNUC || EIGEN_COMP_CLANG || EIGEN_COMP_ICC)
|
||||
__builtin_prefetch(addr);
|
||||
#endif
|
||||
}
|
||||
@@ -317,7 +329,7 @@ template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Pack
|
||||
*/
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline
|
||||
typename conditional<(unpacket_traits<Packet>::size%8)==0,typename unpacket_traits<Packet>::half,Packet>::type
|
||||
predux4(const Packet& a)
|
||||
predux_downto4(const Packet& a)
|
||||
{ return a; }
|
||||
|
||||
/** \internal \returns the product of the elements of \a a*/
|
||||
@@ -336,22 +348,6 @@ template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Pack
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a)
|
||||
{ return a; }
|
||||
|
||||
template<size_t offset, typename Packet>
|
||||
struct protate_impl
|
||||
{
|
||||
// Empty so attempts to use this unimplemented path will fail to compile.
|
||||
// Only specializations of this template should be used.
|
||||
};
|
||||
|
||||
/** \internal \returns a packet with the coefficients rotated to the right in little-endian convention,
|
||||
* by the given offset, e.g. for offset == 1:
|
||||
* (packet[3], packet[2], packet[1], packet[0]) becomes (packet[0], packet[3], packet[2], packet[1])
|
||||
*/
|
||||
template<size_t offset, typename Packet> EIGEN_DEVICE_FUNC inline Packet protate(const Packet& a)
|
||||
{
|
||||
return offset ? protate_impl<offset, Packet>::run(a) : a;
|
||||
}
|
||||
|
||||
/** \internal \returns \a a with real and imaginary part flipped (for complex type only) */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a)
|
||||
{
|
||||
@@ -409,6 +405,10 @@ Packet pexp(const Packet& a) { using std::exp; return exp(a); }
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet plog(const Packet& a) { using std::log; return log(a); }
|
||||
|
||||
/** \internal \returns the log1p of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet plog1p(const Packet& a) { return numext::log1p(a); }
|
||||
|
||||
/** \internal \returns the log10 of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet plog10(const Packet& a) { using std::log10; return log10(a); }
|
||||
@@ -435,18 +435,6 @@ Packet pfloor(const Packet& a) { using numext::floor; return floor(a); }
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); }
|
||||
|
||||
/** \internal \returns the ln(|gamma(\a a)|) (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet plgamma(const Packet& a) { using numext::lgamma; return lgamma(a); }
|
||||
|
||||
/** \internal \returns the erf(\a a) (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet perf(const Packet& a) { using numext::erf; return erf(a); }
|
||||
|
||||
/** \internal \returns the erfc(\a a) (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet perfc(const Packet& a) { using numext::erfc; return erfc(a); }
|
||||
|
||||
/***************************************************************************
|
||||
* The following functions might not have to be overwritten for vectorized types
|
||||
***************************************************************************/
|
||||
@@ -570,6 +558,34 @@ pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket, const Packet& th
|
||||
return ifPacket.select[0] ? thenPacket : elsePacket;
|
||||
}
|
||||
|
||||
/** \internal \returns \a a with the first coefficient replaced by the scalar b */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pinsertfirst(const Packet& a, typename unpacket_traits<Packet>::type b)
|
||||
{
|
||||
// Default implementation based on pblend.
|
||||
// It must be specialized for higher performance.
|
||||
Selector<unpacket_traits<Packet>::size> mask;
|
||||
mask.select[0] = true;
|
||||
// This for loop should be optimized away by the compiler.
|
||||
for(Index i=1; i<unpacket_traits<Packet>::size; ++i)
|
||||
mask.select[i] = false;
|
||||
return pblend(mask, pset1<Packet>(b), a);
|
||||
}
|
||||
|
||||
/** \internal \returns \a a with the last coefficient replaced by the scalar b */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pinsertlast(const Packet& a, typename unpacket_traits<Packet>::type b)
|
||||
{
|
||||
// Default implementation based on pblend.
|
||||
// It must be specialized for higher performance.
|
||||
Selector<unpacket_traits<Packet>::size> mask;
|
||||
// This for loop should be optimized away by the compiler.
|
||||
for(Index i=0; i<unpacket_traits<Packet>::size-1; ++i)
|
||||
mask.select[i] = false;
|
||||
mask.select[unpacket_traits<Packet>::size-1] = true;
|
||||
return pblend(mask, pset1<Packet>(b), a);
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2010-2012 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2010-2016 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2010 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
@@ -11,13 +11,30 @@
|
||||
#ifndef EIGEN_GLOBAL_FUNCTIONS_H
|
||||
#define EIGEN_GLOBAL_FUNCTIONS_H
|
||||
|
||||
#define EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(NAME,FUNCTOR) \
|
||||
#ifdef EIGEN_PARSED_BY_DOXYGEN
|
||||
|
||||
#define EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(NAME,FUNCTOR,DOC_OP,DOC_DETAILS) \
|
||||
/** \returns an expression of the coefficient-wise DOC_OP of \a x
|
||||
|
||||
DOC_DETAILS
|
||||
|
||||
\sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_##NAME">Math functions</a>, class CwiseUnaryOp
|
||||
*/ \
|
||||
template<typename Derived> \
|
||||
inline const Eigen::CwiseUnaryOp<Eigen::internal::FUNCTOR<typename Derived::Scalar>, const Derived> \
|
||||
NAME(const Eigen::ArrayBase<Derived>& x);
|
||||
|
||||
#else
|
||||
|
||||
#define EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(NAME,FUNCTOR,DOC_OP,DOC_DETAILS) \
|
||||
template<typename Derived> \
|
||||
inline const Eigen::CwiseUnaryOp<Eigen::internal::FUNCTOR<typename Derived::Scalar>, const Derived> \
|
||||
(NAME)(const Eigen::ArrayBase<Derived>& x) { \
|
||||
return Eigen::CwiseUnaryOp<Eigen::internal::FUNCTOR<typename Derived::Scalar>, const Derived>(x.derived()); \
|
||||
}
|
||||
|
||||
#endif // EIGEN_PARSED_BY_DOXYGEN
|
||||
|
||||
#define EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(NAME,FUNCTOR) \
|
||||
\
|
||||
template<typename Derived> \
|
||||
@@ -36,44 +53,68 @@
|
||||
|
||||
namespace Eigen
|
||||
{
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(real,scalar_real_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(imag,scalar_imag_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(conj,scalar_conjugate_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(inverse,scalar_inverse_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sin,scalar_sin_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cos,scalar_cos_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tan,scalar_tan_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(atan,scalar_atan_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(asin,scalar_asin_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(acos,scalar_acos_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sinh,scalar_sinh_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cosh,scalar_cosh_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh,scalar_tanh_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(lgamma,scalar_lgamma_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log10,scalar_log10_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs,scalar_abs_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs2,scalar_abs2_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(arg,scalar_arg_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sqrt,scalar_sqrt_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(square,scalar_square_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cube,scalar_cube_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(round,scalar_round_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(floor,scalar_floor_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(ceil,scalar_ceil_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isnan,scalar_isnan_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isinf,scalar_isinf_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isfinite,scalar_isfinite_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sign,scalar_sign_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(real,scalar_real_op,real part,\sa ArrayBase::real)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(imag,scalar_imag_op,imaginary part,\sa ArrayBase::imag)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(conj,scalar_conjugate_op,complex conjugate,\sa ArrayBase::conjugate)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(inverse,scalar_inverse_op,inverse,\sa ArrayBase::inverse)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sin,scalar_sin_op,sine,\sa ArrayBase::sin)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cos,scalar_cos_op,cosine,\sa ArrayBase::cos)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tan,scalar_tan_op,tangent,\sa ArrayBase::tan)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(atan,scalar_atan_op,arc-tangent,\sa ArrayBase::atan)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(asin,scalar_asin_op,arc-sine,\sa ArrayBase::asin)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(acos,scalar_acos_op,arc-consine,\sa ArrayBase::acos)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sinh,scalar_sinh_op,hyperbolic sine,\sa ArrayBase::sinh)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cosh,scalar_cosh_op,hyperbolic cosine,\sa ArrayBase::cosh)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh,scalar_tanh_op,hyperbolic tangent,\sa ArrayBase::tanh)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(lgamma,scalar_lgamma_op,natural logarithm of the gamma function,\sa ArrayBase::lgamma)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(digamma,scalar_digamma_op,derivative of lgamma,\sa ArrayBase::digamma)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op,error function,\sa ArrayBase::erf)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op,complement error function,\sa ArrayBase::erfc)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op,exponential,\sa ArrayBase::exp)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op,natural logarithm,\sa Eigen::log10 DOXCOMMA ArrayBase::log)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log1p,scalar_log1p_op,natural logarithm of 1 plus the value,\sa ArrayBase::log1p)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log10,scalar_log10_op,base 10 logarithm,\sa Eigen::log DOXCOMMA ArrayBase::log)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs,scalar_abs_op,absolute value,\sa ArrayBase::abs DOXCOMMA MatrixBase::cwiseAbs)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs2,scalar_abs2_op,squared absolute value,\sa ArrayBase::abs2 DOXCOMMA MatrixBase::cwiseAbs2)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(arg,scalar_arg_op,complex argument,\sa ArrayBase::arg)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sqrt,scalar_sqrt_op,square root,\sa ArrayBase::sqrt DOXCOMMA MatrixBase::cwiseSqrt)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(rsqrt,scalar_rsqrt_op,reciprocal square root,\sa ArrayBase::rsqrt)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(square,scalar_square_op,square (power 2),\sa Eigen::abs2 DOXCOMMA Eigen::pow DOXCOMMA ArrayBase::square)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cube,scalar_cube_op,cube (power 3),\sa Eigen::pow DOXCOMMA ArrayBase::cube)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(round,scalar_round_op,nearest integer,\sa Eigen::floor DOXCOMMA Eigen::ceil DOXCOMMA ArrayBase::round)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(floor,scalar_floor_op,nearest integer not greater than the giben value,\sa Eigen::ceil DOXCOMMA ArrayBase::floor)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(ceil,scalar_ceil_op,nearest integer not less than the giben value,\sa Eigen::floor DOXCOMMA ArrayBase::ceil)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isnan,scalar_isnan_op,not-a-number test,\sa Eigen::isinf DOXCOMMA Eigen::isfinite DOXCOMMA ArrayBase::isnan)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isinf,scalar_isinf_op,infinite value test,\sa Eigen::isnan DOXCOMMA Eigen::isfinite DOXCOMMA ArrayBase::isinf)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isfinite,scalar_isfinite_op,finite value test,\sa Eigen::isinf DOXCOMMA Eigen::isnan DOXCOMMA ArrayBase::isfinite)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sign,scalar_sign_op,sign (or 0),\sa ArrayBase::sign)
|
||||
|
||||
/** \returns an expression of the coefficient-wise power of \a x to the given constant \a exponent.
|
||||
*
|
||||
* \tparam ScalarExponent is the scalar type of \a exponent. It must be compatible with the scalar type of the given expression (\c Derived::Scalar).
|
||||
*
|
||||
* \sa ArrayBase::pow()
|
||||
*
|
||||
* \relates ArrayBase
|
||||
*/
|
||||
#ifdef EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename Derived,typename ScalarExponent>
|
||||
inline const CwiseBinaryOp<internal::scalar_pow_op<Derived::Scalar,ScalarExponent>,Derived,Constant<ScalarExponent> >
|
||||
pow(const Eigen::ArrayBase<Derived>& x, const ScalarExponent& exponent);
|
||||
#else
|
||||
template<typename Derived,typename ScalarExponent>
|
||||
inline typename internal::enable_if< !(internal::is_same<typename Derived::Scalar,ScalarExponent>::value) && EIGEN_SCALAR_BINARY_SUPPORTED(pow,typename Derived::Scalar,ScalarExponent),
|
||||
const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,ScalarExponent,pow) >::type
|
||||
pow(const Eigen::ArrayBase<Derived>& x, const ScalarExponent& exponent) {
|
||||
return x.derived().pow(exponent);
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
inline const Eigen::CwiseUnaryOp<Eigen::internal::scalar_pow_op<typename Derived::Scalar>, const Derived>
|
||||
inline const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,typename Derived::Scalar,pow)
|
||||
pow(const Eigen::ArrayBase<Derived>& x, const typename Derived::Scalar& exponent) {
|
||||
return x.derived().pow(exponent);
|
||||
}
|
||||
#endif
|
||||
|
||||
/** \returns an expression of the coefficient-wise power of \a x to the given array of \a exponents.
|
||||
*
|
||||
@@ -83,12 +124,14 @@ namespace Eigen
|
||||
* Output: \verbinclude Cwise_array_power_array.out
|
||||
*
|
||||
* \sa ArrayBase::pow()
|
||||
*
|
||||
* \relates ArrayBase
|
||||
*/
|
||||
template<typename Derived,typename ExponentDerived>
|
||||
inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_binary_pow_op<typename Derived::Scalar, typename ExponentDerived::Scalar>, const Derived, const ExponentDerived>
|
||||
inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_pow_op<typename Derived::Scalar, typename ExponentDerived::Scalar>, const Derived, const ExponentDerived>
|
||||
pow(const Eigen::ArrayBase<Derived>& x, const Eigen::ArrayBase<ExponentDerived>& exponents)
|
||||
{
|
||||
return Eigen::CwiseBinaryOp<Eigen::internal::scalar_binary_pow_op<typename Derived::Scalar, typename ExponentDerived::Scalar>, const Derived, const ExponentDerived>(
|
||||
return Eigen::CwiseBinaryOp<Eigen::internal::scalar_pow_op<typename Derived::Scalar, typename ExponentDerived::Scalar>, const Derived, const ExponentDerived>(
|
||||
x.derived(),
|
||||
exponents.derived()
|
||||
);
|
||||
@@ -97,36 +140,39 @@ namespace Eigen
|
||||
/** \returns an expression of the coefficient-wise power of the scalar \a x to the given array of \a exponents.
|
||||
*
|
||||
* This function computes the coefficient-wise power between a scalar and an array of exponents.
|
||||
* Beaware that the scalar type of the input scalar \a x and the exponents \a exponents must be the same.
|
||||
*
|
||||
* \tparam Scalar is the scalar type of \a x. It must be compatible with the scalar type of the given array expression (\c Derived::Scalar).
|
||||
*
|
||||
* Example: \include Cwise_scalar_power_array.cpp
|
||||
* Output: \verbinclude Cwise_scalar_power_array.out
|
||||
*
|
||||
* \sa ArrayBase::pow()
|
||||
*
|
||||
* \relates ArrayBase
|
||||
*/
|
||||
#ifdef EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename Scalar,typename Derived>
|
||||
inline const CwiseBinaryOp<internal::scalar_pow_op<Scalar,Derived::Scalar>,Constant<Scalar>,Derived>
|
||||
pow(const Scalar& x,const Eigen::ArrayBase<Derived>& x);
|
||||
#else
|
||||
template<typename Scalar, typename Derived>
|
||||
inline typename internal::enable_if< !(internal::is_same<typename Derived::Scalar,Scalar>::value) && EIGEN_SCALAR_BINARY_SUPPORTED(pow,Scalar,typename Derived::Scalar),
|
||||
const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,pow) >::type
|
||||
pow(const Scalar& x, const Eigen::ArrayBase<Derived>& exponents)
|
||||
{
|
||||
return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,pow)(
|
||||
typename internal::plain_constant_type<Derived,Scalar>::type(exponents.rows(), exponents.cols(), x), exponents.derived() );
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_binary_pow_op<typename Derived::Scalar, typename Derived::Scalar>, const typename Derived::ConstantReturnType, const Derived>
|
||||
pow(const typename Derived::Scalar& x, const Eigen::ArrayBase<Derived>& exponents)
|
||||
inline const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename Derived::Scalar,Derived,pow)
|
||||
pow(const typename Derived::Scalar& x, const Eigen::ArrayBase<Derived>& exponents)
|
||||
{
|
||||
typename Derived::ConstantReturnType constant_x(exponents.rows(), exponents.cols(), x);
|
||||
return Eigen::CwiseBinaryOp<Eigen::internal::scalar_binary_pow_op<typename Derived::Scalar, typename Derived::Scalar>, const typename Derived::ConstantReturnType, const Derived>(
|
||||
constant_x,
|
||||
exponents.derived()
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Component-wise division of a scalar by array elements.
|
||||
**/
|
||||
template <typename Derived>
|
||||
inline const Eigen::CwiseUnaryOp<Eigen::internal::scalar_inverse_mult_op<typename Derived::Scalar>, const Derived>
|
||||
operator/(const typename Derived::Scalar& s, const Eigen::ArrayBase<Derived>& a)
|
||||
{
|
||||
return Eigen::CwiseUnaryOp<Eigen::internal::scalar_inverse_mult_op<typename Derived::Scalar>, const Derived>(
|
||||
a.derived(),
|
||||
Eigen::internal::scalar_inverse_mult_op<typename Derived::Scalar>(s)
|
||||
);
|
||||
return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename Derived::Scalar,Derived,pow)(
|
||||
typename internal::plain_constant_type<Derived,typename Derived::Scalar>::type(exponents.rows(), exponents.cols(), x), exponents.derived() );
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
namespace internal
|
||||
{
|
||||
|
||||
@@ -80,7 +80,7 @@ struct IOFormat
|
||||
*
|
||||
* \brief Pseudo expression providing matrix output with given format
|
||||
*
|
||||
* \param ExpressionType the type of the object on which IO stream operations are performed
|
||||
* \tparam ExpressionType the type of the object on which IO stream operations are performed
|
||||
*
|
||||
* This class represents an expression with stream operators controlled by a given IOFormat.
|
||||
* It is the return type of DenseBase::format()
|
||||
@@ -105,7 +105,7 @@ class WithFormat
|
||||
}
|
||||
|
||||
protected:
|
||||
const typename ExpressionType::Nested m_matrix;
|
||||
typename ExpressionType::Nested m_matrix;
|
||||
IOFormat m_format;
|
||||
};
|
||||
|
||||
@@ -125,31 +125,17 @@ DenseBase<Derived>::format(const IOFormat& fmt) const
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename Scalar, bool IsInteger>
|
||||
struct significant_decimals_default_impl
|
||||
{
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
static inline int run()
|
||||
{
|
||||
using std::ceil;
|
||||
using std::log;
|
||||
return cast<RealScalar,int>(ceil(-log(NumTraits<RealScalar>::epsilon())/log(RealScalar(10))));
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Scalar>
|
||||
struct significant_decimals_default_impl<Scalar, true>
|
||||
{
|
||||
static inline int run()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
// NOTE: This helper is kept for backward compatibility with previous code specializing
|
||||
// this internal::significant_decimals_impl structure. In the future we should directly
|
||||
// call digits10() which has been introduced in July 2016 in 3.3.
|
||||
template<typename Scalar>
|
||||
struct significant_decimals_impl
|
||||
: significant_decimals_default_impl<Scalar, NumTraits<Scalar>::IsInteger>
|
||||
{};
|
||||
{
|
||||
static inline int run()
|
||||
{
|
||||
return NumTraits<Scalar>::digits10();
|
||||
}
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* print the matrix \a _m to the output stream \a s using the output format \a fmt */
|
||||
|
||||
@@ -45,12 +45,13 @@ class Inverse : public InverseImpl<XprType,typename internal::traits<XprType>::S
|
||||
public:
|
||||
typedef typename XprType::StorageIndex StorageIndex;
|
||||
typedef typename XprType::PlainObject PlainObject;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename internal::ref_selector<XprType>::type XprTypeNested;
|
||||
typedef typename internal::remove_all<XprTypeNested>::type XprTypeNestedCleaned;
|
||||
typedef typename internal::ref_selector<Inverse>::type Nested;
|
||||
typedef typename internal::remove_all<XprType>::type NestedExpression;
|
||||
|
||||
explicit Inverse(const XprType &xpr)
|
||||
explicit EIGEN_DEVICE_FUNC Inverse(const XprType &xpr)
|
||||
: m_xpr(xpr)
|
||||
{}
|
||||
|
||||
|
||||
@@ -13,6 +13,28 @@
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
template<typename PlainObjectType, int MapOptions, typename StrideType>
|
||||
struct traits<Map<PlainObjectType, MapOptions, StrideType> >
|
||||
: public traits<PlainObjectType>
|
||||
{
|
||||
typedef traits<PlainObjectType> TraitsBase;
|
||||
enum {
|
||||
InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
|
||||
? int(PlainObjectType::InnerStrideAtCompileTime)
|
||||
: int(StrideType::InnerStrideAtCompileTime),
|
||||
OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
|
||||
? int(PlainObjectType::OuterStrideAtCompileTime)
|
||||
: int(StrideType::OuterStrideAtCompileTime),
|
||||
Alignment = int(MapOptions)&int(AlignedMask),
|
||||
Flags0 = TraitsBase::Flags & (~NestByRefBit),
|
||||
Flags = is_lvalue<PlainObjectType>::value ? int(Flags0) : (int(Flags0) & ~LvalueBit)
|
||||
};
|
||||
private:
|
||||
enum { Options }; // Expressions don't have Options
|
||||
};
|
||||
}
|
||||
|
||||
/** \class Map
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
@@ -63,29 +85,6 @@ namespace Eigen {
|
||||
*
|
||||
* \sa PlainObjectBase::Map(), \ref TopicStorageOrders
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
template<typename PlainObjectType, int MapOptions, typename StrideType>
|
||||
struct traits<Map<PlainObjectType, MapOptions, StrideType> >
|
||||
: public traits<PlainObjectType>
|
||||
{
|
||||
typedef traits<PlainObjectType> TraitsBase;
|
||||
enum {
|
||||
InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
|
||||
? int(PlainObjectType::InnerStrideAtCompileTime)
|
||||
: int(StrideType::InnerStrideAtCompileTime),
|
||||
OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
|
||||
? int(PlainObjectType::OuterStrideAtCompileTime)
|
||||
: int(StrideType::OuterStrideAtCompileTime),
|
||||
Alignment = int(MapOptions)&int(AlignedMask),
|
||||
Flags0 = TraitsBase::Flags & (~NestByRefBit),
|
||||
Flags = is_lvalue<PlainObjectType>::value ? int(Flags0) : (int(Flags0) & ~LvalueBit)
|
||||
};
|
||||
private:
|
||||
enum { Options }; // Expressions don't have Options
|
||||
};
|
||||
}
|
||||
|
||||
template<typename PlainObjectType, int MapOptions, typename StrideType> class Map
|
||||
: public MapBase<Map<PlainObjectType, MapOptions, StrideType> >
|
||||
{
|
||||
|
||||
@@ -17,10 +17,20 @@
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class MapBase
|
||||
* \ingroup Core_Module
|
||||
/** \ingroup Core_Module
|
||||
*
|
||||
* \brief Base class for Map and Block expression with direct access
|
||||
* \brief Base class for dense Map and Block expression with direct access
|
||||
*
|
||||
* This base class provides the const low-level accessors (e.g. coeff, coeffRef) of dense
|
||||
* Map and Block objects with direct access.
|
||||
* Typical users do not have to directly deal with this class.
|
||||
*
|
||||
* This class can be extended by through the macro plugin \c EIGEN_MAPBASE_PLUGIN.
|
||||
* See \link TopicCustomizing_Plugins customizing Eigen \endlink for details.
|
||||
*
|
||||
* The \c Derived class has to provide the following two methods describing the memory layout:
|
||||
* \code Index innerStride() const; \endcode
|
||||
* \code Index outerStride() const; \endcode
|
||||
*
|
||||
* \sa class Map, class Block
|
||||
*/
|
||||
@@ -75,7 +85,9 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
|
||||
|
||||
typedef typename Base::CoeffReturnType CoeffReturnType;
|
||||
|
||||
/** \copydoc DenseBase::rows() */
|
||||
EIGEN_DEVICE_FUNC inline Index rows() const { return m_rows.value(); }
|
||||
/** \copydoc DenseBase::cols() */
|
||||
EIGEN_DEVICE_FUNC inline Index cols() const { return m_cols.value(); }
|
||||
|
||||
/** Returns a pointer to the first coefficient of the matrix or vector.
|
||||
@@ -86,12 +98,14 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC inline const Scalar* data() const { return m_data; }
|
||||
|
||||
/** \copydoc PlainObjectBase::coeff(Index,Index) const */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeff(Index rowId, Index colId) const
|
||||
{
|
||||
return m_data[colId * colStride() + rowId * rowStride()];
|
||||
}
|
||||
|
||||
/** \copydoc PlainObjectBase::coeff(Index) const */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeff(Index index) const
|
||||
{
|
||||
@@ -99,12 +113,14 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
|
||||
return m_data[index * innerStride()];
|
||||
}
|
||||
|
||||
/** \copydoc PlainObjectBase::coeffRef(Index,Index) const */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index rowId, Index colId) const
|
||||
{
|
||||
return this->m_data[colId * colStride() + rowId * rowStride()];
|
||||
}
|
||||
|
||||
/** \copydoc PlainObjectBase::coeffRef(Index) const */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index index) const
|
||||
{
|
||||
@@ -112,6 +128,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
|
||||
return this->m_data[index * innerStride()];
|
||||
}
|
||||
|
||||
/** \internal */
|
||||
template<int LoadMode>
|
||||
inline PacketScalar packet(Index rowId, Index colId) const
|
||||
{
|
||||
@@ -119,6 +136,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
|
||||
(m_data + (colId * colStride() + rowId * rowStride()));
|
||||
}
|
||||
|
||||
/** \internal */
|
||||
template<int LoadMode>
|
||||
inline PacketScalar packet(Index index) const
|
||||
{
|
||||
@@ -126,13 +144,15 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
|
||||
return internal::ploadt<PacketScalar, LoadMode>(m_data + index * innerStride());
|
||||
}
|
||||
|
||||
/** \internal Constructor for fixed size matrices or vectors */
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit inline MapBase(PointerType dataPtr) : m_data(dataPtr), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
|
||||
checkSanity();
|
||||
checkSanity<Derived>();
|
||||
}
|
||||
|
||||
/** \internal Constructor for dynamically sized vectors */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline MapBase(PointerType dataPtr, Index vecSize)
|
||||
: m_data(dataPtr),
|
||||
@@ -142,9 +162,10 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
eigen_assert(vecSize >= 0);
|
||||
eigen_assert(dataPtr == 0 || SizeAtCompileTime == Dynamic || SizeAtCompileTime == vecSize);
|
||||
checkSanity();
|
||||
checkSanity<Derived>();
|
||||
}
|
||||
|
||||
/** \internal Constructor for dynamically sized matrices */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline MapBase(PointerType dataPtr, Index rows, Index cols)
|
||||
: m_data(dataPtr), m_rows(rows), m_cols(cols)
|
||||
@@ -152,7 +173,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
|
||||
eigen_assert( (dataPtr == 0)
|
||||
|| ( rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
|
||||
&& cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols)));
|
||||
checkSanity();
|
||||
checkSanity<Derived>();
|
||||
}
|
||||
|
||||
#ifdef EIGEN_MAPBASE_PLUGIN
|
||||
@@ -161,19 +182,36 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
|
||||
|
||||
protected:
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void checkSanity() const
|
||||
void checkSanity(typename internal::enable_if<(internal::traits<T>::Alignment>0),void*>::type = 0) const
|
||||
{
|
||||
#if EIGEN_MAX_ALIGN_BYTES>0
|
||||
eigen_assert(((size_t(m_data) % EIGEN_PLAIN_ENUM_MAX(1,internal::traits<Derived>::Alignment)) == 0) && "data is not aligned");
|
||||
eigen_assert(( ((internal::UIntPtr(m_data) % internal::traits<Derived>::Alignment) == 0)
|
||||
|| (cols() * rows() * innerStride() * sizeof(Scalar)) < internal::traits<Derived>::Alignment ) && "data is not aligned");
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void checkSanity(typename internal::enable_if<internal::traits<T>::Alignment==0,void*>::type = 0) const
|
||||
{}
|
||||
|
||||
PointerType m_data;
|
||||
const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_rows;
|
||||
const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_cols;
|
||||
};
|
||||
|
||||
/** \ingroup Core_Module
|
||||
*
|
||||
* \brief Base class for non-const dense Map and Block expression with direct access
|
||||
*
|
||||
* This base class provides the non-const low-level accessors (e.g. coeff and coeffRef) of
|
||||
* dense Map and Block objects with direct access.
|
||||
* It inherits MapBase<Derived, ReadOnlyAccessors> which defines the const variant for reading specific entries.
|
||||
*
|
||||
* \sa class Map, class Block
|
||||
*/
|
||||
template<typename Derived> class MapBase<Derived, WriteAccessors>
|
||||
: public MapBase<Derived, ReadOnlyAccessors>
|
||||
{
|
||||
|
||||
@@ -11,7 +11,9 @@
|
||||
#define EIGEN_MATHFUNCTIONS_H
|
||||
|
||||
// source: http://www.geom.uiuc.edu/~huberty/math5337/groupe/digits.html
|
||||
#define EIGEN_PI 3.141592653589793238462643383279502884197169399375105820974944592307816406
|
||||
// TODO this should better be moved to NumTraits
|
||||
#define EIGEN_PI 3.141592653589793238462643383279502884197169399375105820974944592307816406L
|
||||
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
@@ -23,10 +25,10 @@ double abs(double x) { return (fabs(x)); }
|
||||
float abs(float x) { return (fabsf(x)); }
|
||||
long double abs(long double x) { return (fabsl(x)); }
|
||||
#endif
|
||||
|
||||
|
||||
namespace internal {
|
||||
|
||||
/** \internal \struct global_math_functions_filtering_base
|
||||
/** \internal \class global_math_functions_filtering_base
|
||||
*
|
||||
* What it does:
|
||||
* Defines a typedef 'type' as follows:
|
||||
@@ -95,6 +97,19 @@ struct real_default_impl<Scalar,true>
|
||||
|
||||
template<typename Scalar> struct real_impl : real_default_impl<Scalar> {};
|
||||
|
||||
#ifdef __CUDA_ARCH__
|
||||
template<typename T>
|
||||
struct real_impl<std::complex<T> >
|
||||
{
|
||||
typedef T RealScalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline T run(const std::complex<T>& x)
|
||||
{
|
||||
return x.real();
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
template<typename Scalar>
|
||||
struct real_retval
|
||||
{
|
||||
@@ -130,6 +145,19 @@ struct imag_default_impl<Scalar,true>
|
||||
|
||||
template<typename Scalar> struct imag_impl : imag_default_impl<Scalar> {};
|
||||
|
||||
#ifdef __CUDA_ARCH__
|
||||
template<typename T>
|
||||
struct imag_impl<std::complex<T> >
|
||||
{
|
||||
typedef T RealScalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline T run(const std::complex<T>& x)
|
||||
{
|
||||
return x.imag();
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
template<typename Scalar>
|
||||
struct imag_retval
|
||||
{
|
||||
@@ -457,30 +485,33 @@ struct arg_retval
|
||||
/****************************************************************************
|
||||
* Implementation of log1p *
|
||||
****************************************************************************/
|
||||
template<typename Scalar, bool isComplex = NumTraits<Scalar>::IsComplex >
|
||||
struct log1p_impl
|
||||
{
|
||||
static inline Scalar run(const Scalar& x)
|
||||
{
|
||||
|
||||
namespace std_fallback {
|
||||
// fallback log1p implementation in case there is no log1p(Scalar) function in namespace of Scalar,
|
||||
// or that there is no suitable std::log1p function available
|
||||
template<typename Scalar>
|
||||
EIGEN_DEVICE_FUNC inline Scalar log1p(const Scalar& x) {
|
||||
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
EIGEN_USING_STD_MATH(log);
|
||||
Scalar x1p = RealScalar(1) + x;
|
||||
return ( x1p == Scalar(1) ) ? x : x * ( log(x1p) / (x1p - RealScalar(1)) );
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#if EIGEN_HAS_CXX11_MATH
|
||||
template<typename Scalar>
|
||||
struct log1p_impl<Scalar, false> {
|
||||
struct log1p_impl {
|
||||
static inline Scalar run(const Scalar& x)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
|
||||
#if EIGEN_HAS_CXX11_MATH
|
||||
using std::log1p;
|
||||
#endif
|
||||
using std_fallback::log1p;
|
||||
return log1p(x);
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
template<typename Scalar>
|
||||
struct log1p_retval
|
||||
@@ -492,24 +523,26 @@ struct log1p_retval
|
||||
* Implementation of pow *
|
||||
****************************************************************************/
|
||||
|
||||
template<typename Scalar, bool IsInteger>
|
||||
struct pow_default_impl
|
||||
template<typename ScalarX,typename ScalarY, bool IsInteger = NumTraits<ScalarX>::IsInteger&&NumTraits<ScalarY>::IsInteger>
|
||||
struct pow_impl
|
||||
{
|
||||
typedef Scalar retval;
|
||||
static inline Scalar run(const Scalar& x, const Scalar& y)
|
||||
//typedef Scalar retval;
|
||||
typedef typename ScalarBinaryOpTraits<ScalarX,ScalarY,internal::scalar_pow_op<ScalarX,ScalarY> >::ReturnType result_type;
|
||||
static EIGEN_DEVICE_FUNC inline result_type run(const ScalarX& x, const ScalarY& y)
|
||||
{
|
||||
EIGEN_USING_STD_MATH(pow);
|
||||
return pow(x, y);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Scalar>
|
||||
struct pow_default_impl<Scalar, true>
|
||||
template<typename ScalarX,typename ScalarY>
|
||||
struct pow_impl<ScalarX,ScalarY, true>
|
||||
{
|
||||
static inline Scalar run(Scalar x, Scalar y)
|
||||
typedef ScalarX result_type;
|
||||
static EIGEN_DEVICE_FUNC inline ScalarX run(ScalarX x, ScalarY y)
|
||||
{
|
||||
Scalar res(1);
|
||||
eigen_assert(!NumTraits<Scalar>::IsSigned || y >= 0);
|
||||
ScalarX res(1);
|
||||
eigen_assert(!NumTraits<ScalarY>::IsSigned || y >= 0);
|
||||
if(y & 1) res *= x;
|
||||
y >>= 1;
|
||||
while(y)
|
||||
@@ -522,15 +555,6 @@ struct pow_default_impl<Scalar, true>
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Scalar>
|
||||
struct pow_impl : pow_default_impl<Scalar, NumTraits<Scalar>::IsInteger> {};
|
||||
|
||||
template<typename Scalar>
|
||||
struct pow_retval
|
||||
{
|
||||
typedef Scalar type;
|
||||
};
|
||||
|
||||
/****************************************************************************
|
||||
* Implementation of random *
|
||||
****************************************************************************/
|
||||
@@ -620,16 +644,18 @@ struct random_default_impl<Scalar, false, true>
|
||||
typedef typename conditional<NumTraits<Scalar>::IsSigned,std::ptrdiff_t,std::size_t>::type ScalarX;
|
||||
if(y<x)
|
||||
return x;
|
||||
// the following difference might overflow on a 32 bits system,
|
||||
// but since y>=x the result converted to an unsigned long is still correct.
|
||||
std::size_t range = ScalarX(y)-ScalarX(x);
|
||||
std::size_t offset = 0;
|
||||
// rejection sampling
|
||||
std::size_t divisor = (range+RAND_MAX-1)/(range+1);
|
||||
std::size_t multiplier = (range+RAND_MAX-1)/std::size_t(RAND_MAX);
|
||||
|
||||
std::size_t divisor = 1;
|
||||
std::size_t multiplier = 1;
|
||||
if(range<RAND_MAX) divisor = (std::size_t(RAND_MAX)+1)/(range+1);
|
||||
else multiplier = 1 + range/(std::size_t(RAND_MAX)+1);
|
||||
do {
|
||||
offset = ( (std::size_t(std::rand()) * multiplier) / divisor );
|
||||
offset = (std::size_t(std::rand()) * multiplier) / divisor;
|
||||
} while (offset > range);
|
||||
|
||||
return Scalar(ScalarX(x) + offset);
|
||||
}
|
||||
|
||||
@@ -704,11 +730,13 @@ EIGEN_DEVICE_FUNC
|
||||
typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type
|
||||
isfinite_impl(const T& x)
|
||||
{
|
||||
#if EIGEN_USE_STD_FPCLASSIFY
|
||||
#ifdef __CUDA_ARCH__
|
||||
return (::isfinite)(x);
|
||||
#elif EIGEN_USE_STD_FPCLASSIFY
|
||||
using std::isfinite;
|
||||
return isfinite EIGEN_NOT_A_MACRO (x);
|
||||
#else
|
||||
return x<NumTraits<T>::highest() && x>NumTraits<T>::lowest();
|
||||
return x<=NumTraits<T>::highest() && x>=NumTraits<T>::lowest();
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -717,7 +745,9 @@ EIGEN_DEVICE_FUNC
|
||||
typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type
|
||||
isinf_impl(const T& x)
|
||||
{
|
||||
#if EIGEN_USE_STD_FPCLASSIFY
|
||||
#ifdef __CUDA_ARCH__
|
||||
return (::isinf)(x);
|
||||
#elif EIGEN_USE_STD_FPCLASSIFY
|
||||
using std::isinf;
|
||||
return isinf EIGEN_NOT_A_MACRO (x);
|
||||
#else
|
||||
@@ -730,7 +760,9 @@ EIGEN_DEVICE_FUNC
|
||||
typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type
|
||||
isnan_impl(const T& x)
|
||||
{
|
||||
#if EIGEN_USE_STD_FPCLASSIFY
|
||||
#ifdef __CUDA_ARCH__
|
||||
return (::isnan)(x);
|
||||
#elif EIGEN_USE_STD_FPCLASSIFY
|
||||
using std::isnan;
|
||||
return isnan EIGEN_NOT_A_MACRO (x);
|
||||
#else
|
||||
@@ -748,9 +780,9 @@ template<typename T> EIGEN_DEVICE_FUNC bool isinf_msvc_helper(T x)
|
||||
}
|
||||
|
||||
//MSVC defines a _isnan builtin function, but for double only
|
||||
EIGEN_DEVICE_FUNC inline bool isnan_impl(const long double& x) { return _isnan(x); }
|
||||
EIGEN_DEVICE_FUNC inline bool isnan_impl(const double& x) { return _isnan(x); }
|
||||
EIGEN_DEVICE_FUNC inline bool isnan_impl(const float& x) { return _isnan(x); }
|
||||
EIGEN_DEVICE_FUNC inline bool isnan_impl(const long double& x) { return _isnan(x)!=0; }
|
||||
EIGEN_DEVICE_FUNC inline bool isnan_impl(const double& x) { return _isnan(x)!=0; }
|
||||
EIGEN_DEVICE_FUNC inline bool isnan_impl(const float& x) { return _isnan(x)!=0; }
|
||||
|
||||
EIGEN_DEVICE_FUNC inline bool isinf_impl(const long double& x) { return isinf_msvc_helper(x); }
|
||||
EIGEN_DEVICE_FUNC inline bool isinf_impl(const double& x) { return isinf_msvc_helper(x); }
|
||||
@@ -780,9 +812,11 @@ template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const long double& x) { return
|
||||
#endif
|
||||
|
||||
// The following overload are defined at the end of this file
|
||||
template<typename T> bool isfinite_impl(const std::complex<T>& x);
|
||||
template<typename T> bool isnan_impl(const std::complex<T>& x);
|
||||
template<typename T> bool isinf_impl(const std::complex<T>& x);
|
||||
template<typename T> EIGEN_DEVICE_FUNC bool isfinite_impl(const std::complex<T>& x);
|
||||
template<typename T> EIGEN_DEVICE_FUNC bool isnan_impl(const std::complex<T>& x);
|
||||
template<typename T> EIGEN_DEVICE_FUNC bool isinf_impl(const std::complex<T>& x);
|
||||
|
||||
template<typename T> T generic_fast_tanh_float(const T& a_x);
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
@@ -819,7 +853,7 @@ template<>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_ALWAYS_INLINE float mini(const float& x, const float& y)
|
||||
{
|
||||
return fmin(x, y);
|
||||
return fminf(x, y);
|
||||
}
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
@@ -831,7 +865,7 @@ template<>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_ALWAYS_INLINE float maxi(const float& x, const float& y)
|
||||
{
|
||||
return fmax(x, y);
|
||||
return fmaxf(x, y);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -841,7 +875,7 @@ EIGEN_DEVICE_FUNC
|
||||
inline EIGEN_MATHFUNC_RETVAL(real, Scalar) real(const Scalar& x)
|
||||
{
|
||||
return EIGEN_MATHFUNC_IMPL(real, Scalar)::run(x);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Scalar>
|
||||
EIGEN_DEVICE_FUNC
|
||||
@@ -920,11 +954,19 @@ inline EIGEN_MATHFUNC_RETVAL(log1p, Scalar) log1p(const Scalar& x)
|
||||
return EIGEN_MATHFUNC_IMPL(log1p, Scalar)::run(x);
|
||||
}
|
||||
|
||||
template<typename Scalar>
|
||||
#ifdef __CUDACC__
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
float log1p(const float &x) { return ::log1pf(x); }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
double log1p(const double &x) { return ::log1p(x); }
|
||||
#endif
|
||||
|
||||
template<typename ScalarX,typename ScalarY>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline EIGEN_MATHFUNC_RETVAL(pow, Scalar) pow(const Scalar& x, const Scalar& y)
|
||||
inline typename internal::pow_impl<ScalarX,ScalarY>::result_type pow(const ScalarX& x, const ScalarY& y)
|
||||
{
|
||||
return EIGEN_MATHFUNC_IMPL(pow, Scalar)::run(x, y);
|
||||
return internal::pow_impl<ScalarX,ScalarY>::run(x, y);
|
||||
}
|
||||
|
||||
template<typename T> EIGEN_DEVICE_FUNC bool (isnan) (const T &x) { return internal::isnan_impl(x); }
|
||||
@@ -946,6 +988,14 @@ T (floor)(const T& x)
|
||||
return floor(x);
|
||||
}
|
||||
|
||||
#ifdef __CUDACC__
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
float floor(const float &x) { return ::floorf(x); }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
double floor(const double &x) { return ::floor(x); }
|
||||
#endif
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
T (ceil)(const T& x)
|
||||
@@ -954,8 +1004,17 @@ T (ceil)(const T& x)
|
||||
return ceil(x);
|
||||
}
|
||||
|
||||
// Log base 2 for 32 bits positive integers.
|
||||
// Conveniently returns 0 for x==0.
|
||||
#ifdef __CUDACC__
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
float ceil(const float &x) { return ::ceilf(x); }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
double ceil(const double &x) { return ::ceil(x); }
|
||||
#endif
|
||||
|
||||
|
||||
/** Log base 2 for 32 bits positive integers.
|
||||
* Conveniently returns 0 for x==0. */
|
||||
inline int log2(int x)
|
||||
{
|
||||
eigen_assert(x>=0);
|
||||
@@ -969,24 +1028,257 @@ inline int log2(int x)
|
||||
return table[(v * 0x07C4ACDDU) >> 27];
|
||||
}
|
||||
|
||||
/** \returns the square root of \a x.
|
||||
*
|
||||
* It is essentially equivalent to \code using std::sqrt; return sqrt(x); \endcode,
|
||||
* but slightly faster for float/double and some compilers (e.g., gcc), thanks to
|
||||
* specializations when SSE is enabled.
|
||||
*
|
||||
* It's usage is justified in performance critical functions, like norm/normalize.
|
||||
*/
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
T sqrt(const T &x)
|
||||
{
|
||||
EIGEN_USING_STD_MATH(sqrt);
|
||||
return sqrt(x);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
T log(const T &x) {
|
||||
EIGEN_USING_STD_MATH(log);
|
||||
return log(x);
|
||||
}
|
||||
|
||||
#ifdef __CUDACC__
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
float log(const float &x) { return ::logf(x); }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
double log(const double &x) { return ::log(x); }
|
||||
#endif
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
typename NumTraits<T>::Real abs(const T &x) {
|
||||
EIGEN_USING_STD_MATH(abs);
|
||||
return abs(x);
|
||||
}
|
||||
|
||||
#ifdef __CUDACC__
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
float abs(const float &x) { return ::fabsf(x); }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
double abs(const double &x) { return ::fabs(x); }
|
||||
|
||||
template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
float abs(const std::complex<float>& x) {
|
||||
return ::hypotf(x.real(), x.imag());
|
||||
}
|
||||
|
||||
template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
double abs(const std::complex<double>& x) {
|
||||
return ::hypot(x.real(), x.imag());
|
||||
}
|
||||
#endif
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
T exp(const T &x) {
|
||||
EIGEN_USING_STD_MATH(exp);
|
||||
return exp(x);
|
||||
}
|
||||
|
||||
#ifdef __CUDACC__
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
float exp(const float &x) { return ::expf(x); }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
double exp(const double &x) { return ::exp(x); }
|
||||
#endif
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
T cos(const T &x) {
|
||||
EIGEN_USING_STD_MATH(cos);
|
||||
return cos(x);
|
||||
}
|
||||
|
||||
#ifdef __CUDACC__
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
float cos(const float &x) { return ::cosf(x); }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
double cos(const double &x) { return ::cos(x); }
|
||||
#endif
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
T sin(const T &x) {
|
||||
EIGEN_USING_STD_MATH(sin);
|
||||
return sin(x);
|
||||
}
|
||||
|
||||
#ifdef __CUDACC__
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
float sin(const float &x) { return ::sinf(x); }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
double sin(const double &x) { return ::sin(x); }
|
||||
#endif
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
T tan(const T &x) {
|
||||
EIGEN_USING_STD_MATH(tan);
|
||||
return tan(x);
|
||||
}
|
||||
|
||||
#ifdef __CUDACC__
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
float tan(const float &x) { return ::tanf(x); }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
double tan(const double &x) { return ::tan(x); }
|
||||
#endif
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
T acos(const T &x) {
|
||||
EIGEN_USING_STD_MATH(acos);
|
||||
return acos(x);
|
||||
}
|
||||
|
||||
#ifdef __CUDACC__
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
float acos(const float &x) { return ::acosf(x); }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
double acos(const double &x) { return ::acos(x); }
|
||||
#endif
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
T asin(const T &x) {
|
||||
EIGEN_USING_STD_MATH(asin);
|
||||
return asin(x);
|
||||
}
|
||||
|
||||
#ifdef __CUDACC__
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
float asin(const float &x) { return ::asinf(x); }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
double asin(const double &x) { return ::asin(x); }
|
||||
#endif
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
T atan(const T &x) {
|
||||
EIGEN_USING_STD_MATH(atan);
|
||||
return atan(x);
|
||||
}
|
||||
|
||||
#ifdef __CUDACC__
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
float atan(const float &x) { return ::atanf(x); }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
double atan(const double &x) { return ::atan(x); }
|
||||
#endif
|
||||
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
T cosh(const T &x) {
|
||||
EIGEN_USING_STD_MATH(cosh);
|
||||
return cosh(x);
|
||||
}
|
||||
|
||||
#ifdef __CUDACC__
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
float cosh(const float &x) { return ::coshf(x); }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
double cosh(const double &x) { return ::cosh(x); }
|
||||
#endif
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
T sinh(const T &x) {
|
||||
EIGEN_USING_STD_MATH(sinh);
|
||||
return sinh(x);
|
||||
}
|
||||
|
||||
#ifdef __CUDACC__
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
float sinh(const float &x) { return ::sinhf(x); }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
double sinh(const double &x) { return ::sinh(x); }
|
||||
#endif
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
T tanh(const T &x) {
|
||||
EIGEN_USING_STD_MATH(tanh);
|
||||
return tanh(x);
|
||||
}
|
||||
|
||||
#if (!defined(__CUDACC__)) && EIGEN_FAST_MATH
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
float tanh(float x) { return internal::generic_fast_tanh_float(x); }
|
||||
#endif
|
||||
|
||||
#ifdef __CUDACC__
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
float tanh(const float &x) { return ::tanhf(x); }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
double tanh(const double &x) { return ::tanh(x); }
|
||||
#endif
|
||||
|
||||
template <typename T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
T fmod(const T& a, const T& b) {
|
||||
EIGEN_USING_STD_MATH(fmod);
|
||||
return fmod(a, b);
|
||||
}
|
||||
|
||||
#ifdef __CUDACC__
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
float fmod(const float& a, const float& b) {
|
||||
return ::fmodf(a, b);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
double fmod(const double& a, const double& b) {
|
||||
return ::fmod(a, b);
|
||||
}
|
||||
#endif
|
||||
|
||||
} // end namespace numext
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename T>
|
||||
bool isfinite_impl(const std::complex<T>& x)
|
||||
EIGEN_DEVICE_FUNC bool isfinite_impl(const std::complex<T>& x)
|
||||
{
|
||||
return (numext::isfinite)(numext::real(x)) && (numext::isfinite)(numext::imag(x));
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
bool isnan_impl(const std::complex<T>& x)
|
||||
EIGEN_DEVICE_FUNC bool isnan_impl(const std::complex<T>& x)
|
||||
{
|
||||
return (numext::isnan)(numext::real(x)) || (numext::isnan)(numext::imag(x));
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
bool isinf_impl(const std::complex<T>& x)
|
||||
EIGEN_DEVICE_FUNC bool isinf_impl(const std::complex<T>& x)
|
||||
{
|
||||
return ((numext::isinf)(numext::real(x)) || (numext::isinf)(numext::imag(x))) && (!(numext::isnan)(x));
|
||||
}
|
||||
@@ -1007,14 +1299,12 @@ struct scalar_fuzzy_default_impl<Scalar, false, false>
|
||||
template<typename OtherScalar> EIGEN_DEVICE_FUNC
|
||||
static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, const RealScalar& prec)
|
||||
{
|
||||
EIGEN_USING_STD_MATH(abs);
|
||||
return abs(x) <= abs(y) * prec;
|
||||
return numext::abs(x) <= numext::abs(y) * prec;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec)
|
||||
{
|
||||
EIGEN_USING_STD_MATH(abs);
|
||||
return abs(x - y) <= numext::mini(abs(x), abs(y)) * prec;
|
||||
return numext::abs(x - y) <= numext::mini(numext::abs(x), numext::abs(y)) * prec;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar& prec)
|
||||
@@ -1048,11 +1338,12 @@ template<typename Scalar>
|
||||
struct scalar_fuzzy_default_impl<Scalar, true, false>
|
||||
{
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
template<typename OtherScalar>
|
||||
template<typename OtherScalar> EIGEN_DEVICE_FUNC
|
||||
static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, const RealScalar& prec)
|
||||
{
|
||||
return numext::abs2(x) <= numext::abs2(y) * prec * prec;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec)
|
||||
{
|
||||
return numext::abs2(x - y) <= numext::mini(numext::abs2(x), numext::abs2(y)) * prec * prec;
|
||||
@@ -1064,21 +1355,21 @@ struct scalar_fuzzy_impl : scalar_fuzzy_default_impl<Scalar, NumTraits<Scalar>::
|
||||
|
||||
template<typename Scalar, typename OtherScalar> EIGEN_DEVICE_FUNC
|
||||
inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y,
|
||||
typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision())
|
||||
const typename NumTraits<Scalar>::Real &precision = NumTraits<Scalar>::dummy_precision())
|
||||
{
|
||||
return scalar_fuzzy_impl<Scalar>::template isMuchSmallerThan<OtherScalar>(x, y, precision);
|
||||
}
|
||||
|
||||
template<typename Scalar> EIGEN_DEVICE_FUNC
|
||||
inline bool isApprox(const Scalar& x, const Scalar& y,
|
||||
typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision())
|
||||
const typename NumTraits<Scalar>::Real &precision = NumTraits<Scalar>::dummy_precision())
|
||||
{
|
||||
return scalar_fuzzy_impl<Scalar>::isApprox(x, y, precision);
|
||||
}
|
||||
|
||||
template<typename Scalar> EIGEN_DEVICE_FUNC
|
||||
inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y,
|
||||
typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision())
|
||||
const typename NumTraits<Scalar>::Real &precision = NumTraits<Scalar>::dummy_precision())
|
||||
{
|
||||
return scalar_fuzzy_impl<Scalar>::isApproxOrLessThan(x, y, precision);
|
||||
}
|
||||
|
||||
78
Eigen/src/Core/MathFunctionsImpl.h
Normal file
78
Eigen/src/Core/MathFunctionsImpl.h
Normal file
@@ -0,0 +1,78 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2014 Pedro Gonnet (pedro.gonnet@gmail.com)
|
||||
// Copyright (C) 2016 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_MATHFUNCTIONSIMPL_H
|
||||
#define EIGEN_MATHFUNCTIONSIMPL_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
/** \internal \returns the hyperbolic tan of \a a (coeff-wise)
|
||||
Doesn't do anything fancy, just a 13/6-degree rational interpolant which
|
||||
is accurate up to a couple of ulp in the range [-9, 9], outside of which
|
||||
the tanh(x) = +/-1.
|
||||
|
||||
This implementation works on both scalars and packets.
|
||||
*/
|
||||
template<typename T>
|
||||
T generic_fast_tanh_float(const T& a_x)
|
||||
{
|
||||
// Clamp the inputs to the range [-9, 9] since anything outside
|
||||
// this range is +/-1.0f in single-precision.
|
||||
const T plus_9 = pset1<T>(9.f);
|
||||
const T minus_9 = pset1<T>(-9.f);
|
||||
// NOTE GCC prior to 6.3 might improperly optimize this max/min
|
||||
// step such that if a_x is nan, x will be either 9 or -9,
|
||||
// and tanh will return 1 or -1 instead of nan.
|
||||
// This is supposed to be fixed in gcc6.3,
|
||||
// see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867
|
||||
const T x = pmax(minus_9,pmin(plus_9,a_x));
|
||||
// The monomial coefficients of the numerator polynomial (odd).
|
||||
const T alpha_1 = pset1<T>(4.89352455891786e-03f);
|
||||
const T alpha_3 = pset1<T>(6.37261928875436e-04f);
|
||||
const T alpha_5 = pset1<T>(1.48572235717979e-05f);
|
||||
const T alpha_7 = pset1<T>(5.12229709037114e-08f);
|
||||
const T alpha_9 = pset1<T>(-8.60467152213735e-11f);
|
||||
const T alpha_11 = pset1<T>(2.00018790482477e-13f);
|
||||
const T alpha_13 = pset1<T>(-2.76076847742355e-16f);
|
||||
|
||||
// The monomial coefficients of the denominator polynomial (even).
|
||||
const T beta_0 = pset1<T>(4.89352518554385e-03f);
|
||||
const T beta_2 = pset1<T>(2.26843463243900e-03f);
|
||||
const T beta_4 = pset1<T>(1.18534705686654e-04f);
|
||||
const T beta_6 = pset1<T>(1.19825839466702e-06f);
|
||||
|
||||
// Since the polynomials are odd/even, we need x^2.
|
||||
const T x2 = pmul(x, x);
|
||||
|
||||
// Evaluate the numerator polynomial p.
|
||||
T p = pmadd(x2, alpha_13, alpha_11);
|
||||
p = pmadd(x2, p, alpha_9);
|
||||
p = pmadd(x2, p, alpha_7);
|
||||
p = pmadd(x2, p, alpha_5);
|
||||
p = pmadd(x2, p, alpha_3);
|
||||
p = pmadd(x2, p, alpha_1);
|
||||
p = pmul(x, p);
|
||||
|
||||
// Evaluate the denominator polynomial p.
|
||||
T q = pmadd(x2, beta_6, beta_4);
|
||||
q = pmadd(x2, q, beta_2);
|
||||
q = pmadd(x2, q, beta_0);
|
||||
|
||||
// Divide the numerator by the denominator.
|
||||
return pdiv(p, q);
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_MATHFUNCTIONSIMPL_H
|
||||
@@ -13,6 +13,45 @@
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
|
||||
struct traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
|
||||
{
|
||||
private:
|
||||
enum { size = internal::size_at_compile_time<_Rows,_Cols>::ret };
|
||||
typedef typename find_best_packet<_Scalar,size>::type PacketScalar;
|
||||
enum {
|
||||
row_major_bit = _Options&RowMajor ? RowMajorBit : 0,
|
||||
is_dynamic_size_storage = _MaxRows==Dynamic || _MaxCols==Dynamic,
|
||||
max_size = is_dynamic_size_storage ? Dynamic : _MaxRows*_MaxCols,
|
||||
default_alignment = compute_default_alignment<_Scalar,max_size>::value,
|
||||
actual_alignment = ((_Options&DontAlign)==0) ? default_alignment : 0,
|
||||
required_alignment = unpacket_traits<PacketScalar>::alignment,
|
||||
packet_access_bit = (packet_traits<_Scalar>::Vectorizable && (EIGEN_UNALIGNED_VECTORIZE || (actual_alignment>=required_alignment))) ? PacketAccessBit : 0
|
||||
};
|
||||
|
||||
public:
|
||||
typedef _Scalar Scalar;
|
||||
typedef Dense StorageKind;
|
||||
typedef Eigen::Index StorageIndex;
|
||||
typedef MatrixXpr XprKind;
|
||||
enum {
|
||||
RowsAtCompileTime = _Rows,
|
||||
ColsAtCompileTime = _Cols,
|
||||
MaxRowsAtCompileTime = _MaxRows,
|
||||
MaxColsAtCompileTime = _MaxCols,
|
||||
Flags = compute_matrix_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret,
|
||||
Options = _Options,
|
||||
InnerStrideAtCompileTime = 1,
|
||||
OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime,
|
||||
|
||||
// FIXME, the following flag in only used to define NeedsToAlign in PlainObjectBase
|
||||
EvaluatorFlags = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit,
|
||||
Alignment = actual_alignment
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
/** \class Matrix
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
@@ -67,7 +106,7 @@ namespace Eigen {
|
||||
* \endcode
|
||||
*
|
||||
* This class can be extended with the help of the plugin mechanism described on the page
|
||||
* \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_MATRIX_PLUGIN.
|
||||
* \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_MATRIX_PLUGIN.
|
||||
*
|
||||
* <i><b>Some notes:</b></i>
|
||||
*
|
||||
@@ -98,7 +137,7 @@ namespace Eigen {
|
||||
* </dl>
|
||||
*
|
||||
* <i><b>ABI and storage layout</b></i>
|
||||
*
|
||||
*
|
||||
* The table below summarizes the ABI of some possible Matrix instances which is fixed thorough the lifetime of Eigen 3.
|
||||
* <table class="manual">
|
||||
* <tr><th>Matrix type</th><th>Equivalent C structure</th></tr>
|
||||
@@ -130,50 +169,11 @@ namespace Eigen {
|
||||
* </table>
|
||||
* Note that in this table Rows, Cols, MaxRows and MaxCols are all positive integers. A(S) is defined to the largest possible power-of-two
|
||||
* smaller to EIGEN_MAX_STATIC_ALIGN_BYTES.
|
||||
*
|
||||
* \see MatrixBase for the majority of the API methods for matrices, \ref TopicClassHierarchy,
|
||||
* \ref TopicStorageOrders
|
||||
*
|
||||
* \see MatrixBase for the majority of the API methods for matrices, \ref TopicClassHierarchy,
|
||||
* \ref TopicStorageOrders
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
|
||||
struct traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
|
||||
{
|
||||
private:
|
||||
enum { size = internal::size_at_compile_time<_Rows,_Cols>::ret };
|
||||
typedef typename find_best_packet<_Scalar,size>::type PacketScalar;
|
||||
enum {
|
||||
row_major_bit = _Options&RowMajor ? RowMajorBit : 0,
|
||||
is_dynamic_size_storage = _MaxRows==Dynamic || _MaxCols==Dynamic,
|
||||
max_size = is_dynamic_size_storage ? Dynamic : _MaxRows*_MaxCols,
|
||||
default_alignment = compute_default_alignment<_Scalar,max_size>::value,
|
||||
actual_alignment = ((_Options&DontAlign)==0) ? default_alignment : 0,
|
||||
required_alignment = unpacket_traits<PacketScalar>::alignment,
|
||||
packet_access_bit = packet_traits<_Scalar>::Vectorizable && (actual_alignment>=required_alignment) ? PacketAccessBit : 0
|
||||
};
|
||||
|
||||
public:
|
||||
typedef _Scalar Scalar;
|
||||
typedef Dense StorageKind;
|
||||
typedef Eigen::Index StorageIndex;
|
||||
typedef MatrixXpr XprKind;
|
||||
enum {
|
||||
RowsAtCompileTime = _Rows,
|
||||
ColsAtCompileTime = _Cols,
|
||||
MaxRowsAtCompileTime = _MaxRows,
|
||||
MaxColsAtCompileTime = _MaxCols,
|
||||
Flags = compute_matrix_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret,
|
||||
Options = _Options,
|
||||
InnerStrideAtCompileTime = 1,
|
||||
OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime,
|
||||
|
||||
// FIXME, the following flag in only used to define NeedsToAlign in PlainObjectBase
|
||||
EvaluatorFlags = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit,
|
||||
Alignment = actual_alignment
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
|
||||
class Matrix
|
||||
: public PlainObjectBase<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
|
||||
@@ -268,9 +268,9 @@ class Matrix
|
||||
: Base(internal::constructor_without_unaligned_array_assert())
|
||||
{ Base::_check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED }
|
||||
|
||||
#ifdef EIGEN_HAVE_RVALUE_REFERENCES
|
||||
#if EIGEN_HAS_RVALUE_REFERENCES
|
||||
EIGEN_DEVICE_FUNC
|
||||
Matrix(Matrix&& other)
|
||||
Matrix(Matrix&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_constructible<Scalar>::value)
|
||||
: Base(std::move(other))
|
||||
{
|
||||
Base::_check_template_params();
|
||||
@@ -278,7 +278,7 @@ class Matrix
|
||||
Base::_set_noalias(other);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
Matrix& operator=(Matrix&& other)
|
||||
Matrix& operator=(Matrix&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable<Scalar>::value)
|
||||
{
|
||||
other.swap(*this);
|
||||
return *this;
|
||||
|
||||
@@ -41,9 +41,9 @@ namespace Eigen {
|
||||
* \endcode
|
||||
*
|
||||
* This class can be extended with the help of the plugin mechanism described on the page
|
||||
* \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_MATRIXBASE_PLUGIN.
|
||||
* \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_MATRIXBASE_PLUGIN.
|
||||
*
|
||||
* \sa \ref TopicClassHierarchy
|
||||
* \sa \blank \ref TopicClassHierarchy
|
||||
*/
|
||||
template<typename Derived> class MatrixBase
|
||||
: public DenseBase<Derived>
|
||||
@@ -66,7 +66,7 @@ template<typename Derived> class MatrixBase
|
||||
using Base::MaxSizeAtCompileTime;
|
||||
using Base::IsVectorAtCompileTime;
|
||||
using Base::Flags;
|
||||
|
||||
|
||||
using Base::derived;
|
||||
using Base::const_cast_derived;
|
||||
using Base::rows;
|
||||
@@ -80,8 +80,6 @@ template<typename Derived> class MatrixBase
|
||||
using Base::operator-=;
|
||||
using Base::operator*=;
|
||||
using Base::operator/=;
|
||||
using Base::operator*;
|
||||
using Base::operator/;
|
||||
|
||||
typedef typename Base::CoeffReturnType CoeffReturnType;
|
||||
typedef typename Base::ConstTransposeReturnType ConstTransposeReturnType;
|
||||
@@ -100,7 +98,7 @@ template<typename Derived> class MatrixBase
|
||||
/** \returns the size of the main diagonal, which is min(rows(),cols()).
|
||||
* \sa rows(), cols(), SizeAtCompileTime. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index diagonalSize() const { return (std::min)(rows(),cols()); }
|
||||
inline Index diagonalSize() const { return (numext::mini)(rows(),cols()); }
|
||||
|
||||
typedef typename Base::PlainObject PlainObject;
|
||||
|
||||
@@ -123,6 +121,7 @@ template<typename Derived> class MatrixBase
|
||||
#endif // not EIGEN_PARSED_BY_DOXYGEN
|
||||
|
||||
#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::MatrixBase
|
||||
#define EIGEN_DOC_UNARY_ADDONS(X,Y)
|
||||
# include "../plugins/CommonCwiseUnaryOps.h"
|
||||
# include "../plugins/CommonCwiseBinaryOps.h"
|
||||
# include "../plugins/MatrixCwiseUnaryOps.h"
|
||||
@@ -131,18 +130,19 @@ template<typename Derived> class MatrixBase
|
||||
# include EIGEN_MATRIXBASE_PLUGIN
|
||||
# endif
|
||||
#undef EIGEN_CURRENT_STORAGE_BASE_CLASS
|
||||
#undef EIGEN_DOC_UNARY_ADDONS
|
||||
|
||||
/** Special case of the template operator=, in order to prevent the compiler
|
||||
* from generating a default operator= (issue hit with g++ 4.1)
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Derived& operator=(const MatrixBase& other);
|
||||
|
||||
// We cannot inherit here via Base::operator= since it is causing
|
||||
// trouble with MSVC.
|
||||
|
||||
template <typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Derived& operator=(const DenseBase<OtherDerived>& other);
|
||||
|
||||
template <typename OtherDerived>
|
||||
@@ -154,10 +154,10 @@ template<typename Derived> class MatrixBase
|
||||
Derived& operator=(const ReturnByValue<OtherDerived>& other);
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Derived& operator+=(const MatrixBase<OtherDerived>& other);
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Derived& operator-=(const MatrixBase<OtherDerived>& other);
|
||||
|
||||
#ifdef __CUDACC__
|
||||
@@ -175,7 +175,7 @@ template<typename Derived> class MatrixBase
|
||||
#endif
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_DEVICE_FUNC
|
||||
const Product<Derived,OtherDerived,LazyProduct>
|
||||
lazyProduct(const MatrixBase<OtherDerived> &other) const;
|
||||
|
||||
@@ -195,7 +195,7 @@ template<typename Derived> class MatrixBase
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename internal::scalar_product_traits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
|
||||
typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
|
||||
dot(const MatrixBase<OtherDerived>& other) const;
|
||||
|
||||
EIGEN_DEVICE_FUNC RealScalar squaredNorm() const;
|
||||
@@ -204,7 +204,9 @@ template<typename Derived> class MatrixBase
|
||||
RealScalar blueNorm() const;
|
||||
RealScalar hypotNorm() const;
|
||||
EIGEN_DEVICE_FUNC const PlainObject normalized() const;
|
||||
EIGEN_DEVICE_FUNC const PlainObject stableNormalized() const;
|
||||
EIGEN_DEVICE_FUNC void normalize();
|
||||
EIGEN_DEVICE_FUNC void stableNormalize();
|
||||
|
||||
EIGEN_DEVICE_FUNC const AdjointReturnType adjoint() const;
|
||||
EIGEN_DEVICE_FUNC void adjointInPlace();
|
||||
@@ -212,7 +214,7 @@ template<typename Derived> class MatrixBase
|
||||
typedef Diagonal<Derived> DiagonalReturnType;
|
||||
EIGEN_DEVICE_FUNC
|
||||
DiagonalReturnType diagonal();
|
||||
|
||||
|
||||
typedef typename internal::add_const<Diagonal<const Derived> >::type ConstDiagonalReturnType;
|
||||
EIGEN_DEVICE_FUNC
|
||||
ConstDiagonalReturnType diagonal() const;
|
||||
@@ -220,14 +222,14 @@ template<typename Derived> class MatrixBase
|
||||
template<int Index> struct DiagonalIndexReturnType { typedef Diagonal<Derived,Index> Type; };
|
||||
template<int Index> struct ConstDiagonalIndexReturnType { typedef const Diagonal<const Derived,Index> Type; };
|
||||
|
||||
template<int Index>
|
||||
template<int Index>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename DiagonalIndexReturnType<Index>::Type diagonal();
|
||||
|
||||
template<int Index>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename ConstDiagonalIndexReturnType<Index>::Type diagonal() const;
|
||||
|
||||
|
||||
typedef Diagonal<Derived,DynamicIndex> DiagonalDynamicIndexReturnType;
|
||||
typedef typename internal::add_const<Diagonal<const Derived,DynamicIndex> >::type ConstDiagonalDynamicIndexReturnType;
|
||||
|
||||
@@ -249,7 +251,7 @@ template<typename Derived> class MatrixBase
|
||||
template<unsigned int UpLo> struct SelfAdjointViewReturnType { typedef SelfAdjointView<Derived, UpLo> Type; };
|
||||
template<unsigned int UpLo> struct ConstSelfAdjointViewReturnType { typedef const SelfAdjointView<const Derived, UpLo> Type; };
|
||||
|
||||
template<unsigned int UpLo>
|
||||
template<unsigned int UpLo>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename SelfAdjointViewReturnType<UpLo>::Type selfadjointView();
|
||||
template<unsigned int UpLo>
|
||||
@@ -328,17 +330,13 @@ template<typename Derived> class MatrixBase
|
||||
|
||||
/////////// LU module ///////////
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const FullPivLU<PlainObject> fullPivLu() const;
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const PartialPivLU<PlainObject> partialPivLu() const;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const PartialPivLU<PlainObject> lu() const;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Inverse<Derived> inverse() const;
|
||||
|
||||
|
||||
template<typename ResultType>
|
||||
inline void computeInverseAndDetWithCheck(
|
||||
ResultType& inverse,
|
||||
@@ -364,6 +362,7 @@ template<typename Derived> class MatrixBase
|
||||
inline const HouseholderQR<PlainObject> householderQr() const;
|
||||
inline const ColPivHouseholderQR<PlainObject> colPivHouseholderQr() const;
|
||||
inline const FullPivHouseholderQR<PlainObject> fullPivHouseholderQr() const;
|
||||
inline const CompleteOrthogonalDecomposition<PlainObject> completeOrthogonalDecomposition() const;
|
||||
|
||||
/////////// Eigenvalues module ///////////
|
||||
|
||||
@@ -380,40 +379,44 @@ template<typename Derived> class MatrixBase
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
/// \internal helper struct to form the return type of the cross product
|
||||
template<typename OtherDerived> struct cross_product_return_type {
|
||||
typedef typename internal::scalar_product_traits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType Scalar;
|
||||
typedef typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType Scalar;
|
||||
typedef Matrix<Scalar,MatrixBase::RowsAtCompileTime,MatrixBase::ColsAtCompileTime> type;
|
||||
};
|
||||
#endif // EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
inline typename cross_product_return_type<OtherDerived>::type
|
||||
#else
|
||||
inline PlainObject
|
||||
#endif
|
||||
cross(const MatrixBase<OtherDerived>& other) const;
|
||||
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline PlainObject cross3(const MatrixBase<OtherDerived>& other) const;
|
||||
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline PlainObject unitOrthogonal(void) const;
|
||||
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Matrix<Scalar,3,1> eulerAngles(Index a0, Index a1, Index a2) const;
|
||||
|
||||
inline ScalarMultipleReturnType operator*(const UniformScaling<Scalar>& s) const;
|
||||
|
||||
// put this as separate enum value to work around possible GCC 4.3 bug (?)
|
||||
enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1&&RowsAtCompileTime==1 ? ((internal::traits<Derived>::Flags&RowMajorBit)==RowMajorBit ? Horizontal : Vertical)
|
||||
: ColsAtCompileTime==1 ? Vertical : Horizontal };
|
||||
typedef Homogeneous<Derived, HomogeneousReturnTypeDirection> HomogeneousReturnType;
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline HomogeneousReturnType homogeneous() const;
|
||||
|
||||
|
||||
enum {
|
||||
SizeMinusOne = SizeAtCompileTime==Dynamic ? Dynamic : SizeAtCompileTime-1
|
||||
};
|
||||
typedef Block<const Derived,
|
||||
internal::traits<Derived>::ColsAtCompileTime==1 ? SizeMinusOne : 1,
|
||||
internal::traits<Derived>::ColsAtCompileTime==1 ? 1 : SizeMinusOne> ConstStartMinusOne;
|
||||
typedef CwiseUnaryOp<internal::scalar_quotient1_op<typename internal::traits<Derived>::Scalar>,
|
||||
const ConstStartMinusOne > HNormalizedReturnType;
|
||||
|
||||
typedef EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(ConstStartMinusOne,Scalar,quotient) HNormalizedReturnType;
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const HNormalizedReturnType hnormalized() const;
|
||||
|
||||
////////// Householder module ///////////
|
||||
|
||||
@@ -13,25 +13,24 @@
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class NestByValue
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression which must be nested by value
|
||||
*
|
||||
* \param ExpressionType the type of the object of which we are requiring nesting-by-value
|
||||
*
|
||||
* This class is the return type of MatrixBase::nestByValue()
|
||||
* and most of the time this is the only way it is used.
|
||||
*
|
||||
* \sa MatrixBase::nestByValue()
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
template<typename ExpressionType>
|
||||
struct traits<NestByValue<ExpressionType> > : public traits<ExpressionType>
|
||||
{};
|
||||
}
|
||||
|
||||
/** \class NestByValue
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression which must be nested by value
|
||||
*
|
||||
* \tparam ExpressionType the type of the object of which we are requiring nesting-by-value
|
||||
*
|
||||
* This class is the return type of MatrixBase::nestByValue()
|
||||
* and most of the time this is the only way it is used.
|
||||
*
|
||||
* \sa MatrixBase::nestByValue()
|
||||
*/
|
||||
template<typename ExpressionType> class NestByValue
|
||||
: public internal::dense_xpr_base< NestByValue<ExpressionType> >::type
|
||||
{
|
||||
|
||||
@@ -17,7 +17,7 @@ namespace Eigen {
|
||||
*
|
||||
* \brief Pseudo expression providing an operator = assuming no aliasing
|
||||
*
|
||||
* \param ExpressionType the type of the object on which to do the lazy assignment
|
||||
* \tparam ExpressionType the type of the object on which to do the lazy assignment
|
||||
*
|
||||
* This class represents an expression with special assignment operators
|
||||
* assuming no aliasing between the target expression and the source expression.
|
||||
@@ -39,7 +39,7 @@ class NoAlias
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE ExpressionType& operator=(const StorageBase<OtherDerived>& other)
|
||||
{
|
||||
call_assignment_no_alias(m_expression, other.derived(), internal::assign_op<Scalar>());
|
||||
call_assignment_no_alias(m_expression, other.derived(), internal::assign_op<Scalar,typename OtherDerived::Scalar>());
|
||||
return m_expression;
|
||||
}
|
||||
|
||||
@@ -47,7 +47,7 @@ class NoAlias
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE ExpressionType& operator+=(const StorageBase<OtherDerived>& other)
|
||||
{
|
||||
call_assignment_no_alias(m_expression, other.derived(), internal::add_assign_op<Scalar>());
|
||||
call_assignment_no_alias(m_expression, other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
|
||||
return m_expression;
|
||||
}
|
||||
|
||||
@@ -55,7 +55,7 @@ class NoAlias
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE ExpressionType& operator-=(const StorageBase<OtherDerived>& other)
|
||||
{
|
||||
call_assignment_no_alias(m_expression, other.derived(), internal::sub_assign_op<Scalar>());
|
||||
call_assignment_no_alias(m_expression, other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
|
||||
return m_expression;
|
||||
}
|
||||
|
||||
|
||||
@@ -12,24 +12,57 @@
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
// default implementation of digits10(), based on numeric_limits if specialized,
|
||||
// 0 for integer types, and log10(epsilon()) otherwise.
|
||||
template< typename T,
|
||||
bool use_numeric_limits = std::numeric_limits<T>::is_specialized,
|
||||
bool is_integer = NumTraits<T>::IsInteger>
|
||||
struct default_digits10_impl
|
||||
{
|
||||
static int run() { return std::numeric_limits<T>::digits10; }
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct default_digits10_impl<T,false,false> // Floating point
|
||||
{
|
||||
static int run() {
|
||||
using std::log10;
|
||||
using std::ceil;
|
||||
typedef typename NumTraits<T>::Real Real;
|
||||
return int(ceil(-log10(NumTraits<Real>::epsilon())));
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct default_digits10_impl<T,false,true> // Integer
|
||||
{
|
||||
static int run() { return 0; }
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/** \class NumTraits
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Holds information about the various numeric (i.e. scalar) types allowed by Eigen.
|
||||
*
|
||||
* \param T the numeric type at hand
|
||||
* \tparam T the numeric type at hand
|
||||
*
|
||||
* This class stores enums, typedefs and static methods giving information about a numeric type.
|
||||
*
|
||||
* The provided data consists of:
|
||||
* \li A typedef \a Real, giving the "real part" type of \a T. If \a T is already real,
|
||||
* then \a Real is just a typedef to \a T. If \a T is \c std::complex<U> then \a Real
|
||||
* \li A typedef \c Real, giving the "real part" type of \a T. If \a T is already real,
|
||||
* then \c Real is just a typedef to \a T. If \a T is \c std::complex<U> then \c Real
|
||||
* is a typedef to \a U.
|
||||
* \li A typedef \a NonInteger, giving the type that should be used for operations producing non-integral values,
|
||||
* \li A typedef \c NonInteger, giving the type that should be used for operations producing non-integral values,
|
||||
* such as quotients, square roots, etc. If \a T is a floating-point type, then this typedef just gives
|
||||
* \a T again. Note however that many Eigen functions such as internal::sqrt simply refuse to
|
||||
* take integers. Outside of a few cases, Eigen doesn't do automatic type promotion. Thus, this typedef is
|
||||
* only intended as a helper for code that needs to explicitly promote types.
|
||||
* \li A typedef \c Literal giving the type to use for numeric literals such as "2" or "0.5". For instance, for \c std::complex<U>, Literal is defined as \c U.
|
||||
* Of course, this type must be fully compatible with \a T. In doubt, just use \a T here.
|
||||
* \li A typedef \a Nested giving the type to use to nest a value inside of the expression tree. If you don't know what
|
||||
* this means, just use \a T here.
|
||||
* \li An enum value \a IsComplex. It is equal to 1 if \a T is a \c std::complex
|
||||
@@ -42,10 +75,14 @@ namespace Eigen {
|
||||
* \li An enum value \a IsSigned. It is equal to \c 1 if \a T is a signed type and to 0 if \a T is unsigned.
|
||||
* \li An enum value \a RequireInitialization. It is equal to \c 1 if the constructor of the numeric type \a T must
|
||||
* be called, and to 0 if it is safe not to call it. Default is 0 if \a T is an arithmetic type, and 1 otherwise.
|
||||
* \li An epsilon() function which, unlike std::numeric_limits::epsilon(), returns a \a Real instead of a \a T.
|
||||
* \li An epsilon() function which, unlike <a href="http://en.cppreference.com/w/cpp/types/numeric_limits/epsilon">std::numeric_limits::epsilon()</a>,
|
||||
* it returns a \a Real instead of a \a T.
|
||||
* \li A dummy_precision() function returning a weak epsilon value. It is mainly used as a default
|
||||
* value by the fuzzy comparison operators.
|
||||
* \li highest() and lowest() functions returning the highest and lowest possible values respectively.
|
||||
* \li digits10() function returning the number of decimal digits that can be represented without change. This is
|
||||
* the analogue of <a href="http://en.cppreference.com/w/cpp/types/numeric_limits/digits10">std::numeric_limits<T>::digits10</a>
|
||||
* which is used as the default implementation if specialized.
|
||||
*/
|
||||
|
||||
template<typename T> struct GenericNumTraits
|
||||
@@ -67,16 +104,20 @@ template<typename T> struct GenericNumTraits
|
||||
T
|
||||
>::type NonInteger;
|
||||
typedef T Nested;
|
||||
typedef T Literal;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline Real epsilon()
|
||||
{
|
||||
#if defined(__CUDA_ARCH__)
|
||||
return internal::device::numeric_limits<T>::epsilon();
|
||||
#else
|
||||
return std::numeric_limits<T>::epsilon();
|
||||
#endif
|
||||
return numext::numeric_limits<T>::epsilon();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline int digits10()
|
||||
{
|
||||
return internal::default_digits10_impl<T>::run();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline Real dummy_precision()
|
||||
{
|
||||
@@ -87,20 +128,22 @@ template<typename T> struct GenericNumTraits
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline T highest() {
|
||||
#if defined(__CUDA_ARCH__)
|
||||
return (internal::device::numeric_limits<T>::max)();
|
||||
#else
|
||||
return (std::numeric_limits<T>::max)();
|
||||
#endif
|
||||
return (numext::numeric_limits<T>::max)();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline T lowest() {
|
||||
#if defined(__CUDA_ARCH__)
|
||||
return IsInteger ? (internal::device::numeric_limits<T>::min)() : (-(internal::device::numeric_limits<T>::max)());
|
||||
#else
|
||||
return IsInteger ? (std::numeric_limits<T>::min)() : (-(std::numeric_limits<T>::max)());
|
||||
#endif
|
||||
return IsInteger ? (numext::numeric_limits<T>::min)() : (-(numext::numeric_limits<T>::max)());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline T infinity() {
|
||||
return numext::numeric_limits<T>::infinity();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline T quiet_NaN() {
|
||||
return numext::numeric_limits<T>::quiet_NaN();
|
||||
}
|
||||
};
|
||||
|
||||
@@ -130,6 +173,7 @@ template<typename _Real> struct NumTraits<std::complex<_Real> >
|
||||
: GenericNumTraits<std::complex<_Real> >
|
||||
{
|
||||
typedef _Real Real;
|
||||
typedef typename NumTraits<_Real>::Literal Literal;
|
||||
enum {
|
||||
IsComplex = 1,
|
||||
RequireInitialization = NumTraits<_Real>::RequireInitialization,
|
||||
@@ -138,8 +182,12 @@ template<typename _Real> struct NumTraits<std::complex<_Real> >
|
||||
MulCost = 4 * NumTraits<Real>::MulCost + 2 * NumTraits<Real>::AddCost
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline Real epsilon() { return NumTraits<Real>::epsilon(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline Real dummy_precision() { return NumTraits<Real>::dummy_precision(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline int digits10() { return NumTraits<Real>::digits10(); }
|
||||
};
|
||||
|
||||
template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
|
||||
@@ -151,7 +199,8 @@ struct NumTraits<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
|
||||
typedef typename NumTraits<Scalar>::NonInteger NonIntegerScalar;
|
||||
typedef Array<NonIntegerScalar, Rows, Cols, Options, MaxRows, MaxCols> NonInteger;
|
||||
typedef ArrayType & Nested;
|
||||
|
||||
typedef typename NumTraits<Scalar>::Literal Literal;
|
||||
|
||||
enum {
|
||||
IsComplex = NumTraits<Scalar>::IsComplex,
|
||||
IsInteger = NumTraits<Scalar>::IsInteger,
|
||||
@@ -161,11 +210,37 @@ struct NumTraits<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
|
||||
AddCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::AddCost,
|
||||
MulCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::MulCost
|
||||
};
|
||||
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline RealScalar epsilon() { return NumTraits<RealScalar>::epsilon(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline RealScalar dummy_precision() { return NumTraits<RealScalar>::dummy_precision(); }
|
||||
};
|
||||
|
||||
template<> struct NumTraits<std::string>
|
||||
: GenericNumTraits<std::string>
|
||||
{
|
||||
enum {
|
||||
RequireInitialization = 1,
|
||||
ReadCost = HugeCost,
|
||||
AddCost = HugeCost,
|
||||
MulCost = HugeCost
|
||||
};
|
||||
|
||||
static inline int digits10() { return 0; }
|
||||
|
||||
private:
|
||||
static inline std::string epsilon();
|
||||
static inline std::string dummy_precision();
|
||||
static inline std::string lowest();
|
||||
static inline std::string highest();
|
||||
static inline std::string infinity();
|
||||
static inline std::string quiet_NaN();
|
||||
};
|
||||
|
||||
// Empty specialization for void to allow template specialization based on NumTraits<T>::Real with T==void and SFINAE.
|
||||
template<> struct NumTraits<void> {};
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_NUMTRAITS_H
|
||||
|
||||
@@ -13,12 +13,18 @@
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
enum PermPermProduct_t {PermPermProduct};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/** \class PermutationBase
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Base class for permutations
|
||||
*
|
||||
* \param Derived the derived class
|
||||
* \tparam Derived the derived class
|
||||
*
|
||||
* This class is the base class for all expressions representing a permutation matrix,
|
||||
* internally stored as a vector of integers.
|
||||
@@ -36,13 +42,6 @@ namespace Eigen {
|
||||
*
|
||||
* \sa class PermutationMatrix, class PermutationWrapper
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
|
||||
enum PermPermProduct_t {PermPermProduct};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
template<typename Derived>
|
||||
class PermutationBase : public EigenBase<Derived>
|
||||
{
|
||||
@@ -192,13 +191,13 @@ class PermutationBase : public EigenBase<Derived>
|
||||
|
||||
/** \returns the inverse permutation matrix.
|
||||
*
|
||||
* \note \note_try_to_help_rvo
|
||||
* \note \blank \note_try_to_help_rvo
|
||||
*/
|
||||
inline InverseReturnType inverse() const
|
||||
{ return InverseReturnType(derived()); }
|
||||
/** \returns the tranpose permutation matrix.
|
||||
*
|
||||
* \note \note_try_to_help_rvo
|
||||
* \note \blank \note_try_to_help_rvo
|
||||
*/
|
||||
inline InverseReturnType transpose() const
|
||||
{ return InverseReturnType(derived()); }
|
||||
@@ -225,7 +224,7 @@ class PermutationBase : public EigenBase<Derived>
|
||||
|
||||
/** \returns the product permutation matrix.
|
||||
*
|
||||
* \note \note_try_to_help_rvo
|
||||
* \note \blank \note_try_to_help_rvo
|
||||
*/
|
||||
template<typename Other>
|
||||
inline PlainPermutationType operator*(const PermutationBase<Other>& other) const
|
||||
@@ -233,7 +232,7 @@ class PermutationBase : public EigenBase<Derived>
|
||||
|
||||
/** \returns the product of a permutation with another inverse permutation.
|
||||
*
|
||||
* \note \note_try_to_help_rvo
|
||||
* \note \blank \note_try_to_help_rvo
|
||||
*/
|
||||
template<typename Other>
|
||||
inline PlainPermutationType operator*(const InverseImpl<Other,PermutationStorage>& other) const
|
||||
@@ -241,7 +240,7 @@ class PermutationBase : public EigenBase<Derived>
|
||||
|
||||
/** \returns the product of an inverse permutation with another permutation.
|
||||
*
|
||||
* \note \note_try_to_help_rvo
|
||||
* \note \blank \note_try_to_help_rvo
|
||||
*/
|
||||
template<typename Other> friend
|
||||
inline PlainPermutationType operator*(const InverseImpl<Other, PermutationStorage>& other, const PermutationBase& perm)
|
||||
@@ -280,20 +279,6 @@ class PermutationBase : public EigenBase<Derived>
|
||||
|
||||
};
|
||||
|
||||
/** \class PermutationMatrix
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Permutation matrix
|
||||
*
|
||||
* \param SizeAtCompileTime the number of rows/cols, or Dynamic
|
||||
* \param MaxSizeAtCompileTime the maximum number of rows/cols, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it.
|
||||
* \param StorageIndex the integer type of the indices
|
||||
*
|
||||
* This class represents a permutation matrix, internally stored as a vector of integers.
|
||||
*
|
||||
* \sa class PermutationBase, class PermutationWrapper, class DiagonalMatrix
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex>
|
||||
struct traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex> >
|
||||
@@ -306,6 +291,19 @@ struct traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _Storag
|
||||
};
|
||||
}
|
||||
|
||||
/** \class PermutationMatrix
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Permutation matrix
|
||||
*
|
||||
* \tparam SizeAtCompileTime the number of rows/cols, or Dynamic
|
||||
* \tparam MaxSizeAtCompileTime the maximum number of rows/cols, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it.
|
||||
* \tparam _StorageIndex the integer type of the indices
|
||||
*
|
||||
* This class represents a permutation matrix, internally stored as a vector of integers.
|
||||
*
|
||||
* \sa class PermutationBase, class PermutationWrapper, class DiagonalMatrix
|
||||
*/
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex>
|
||||
class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex> >
|
||||
{
|
||||
@@ -482,18 +480,6 @@ class Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageInd
|
||||
IndicesType m_indices;
|
||||
};
|
||||
|
||||
/** \class PermutationWrapper
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Class to view a vector of integers as a permutation matrix
|
||||
*
|
||||
* \param _IndicesType the type of the vector of integer (can be any compatible expression)
|
||||
*
|
||||
* This class allows to view any vector expression of integers as a permutation matrix.
|
||||
*
|
||||
* \sa class PermutationBase, class PermutationMatrix
|
||||
*/
|
||||
|
||||
template<typename _IndicesType> class TranspositionsWrapper;
|
||||
namespace internal {
|
||||
template<typename _IndicesType>
|
||||
@@ -513,6 +499,17 @@ struct traits<PermutationWrapper<_IndicesType> >
|
||||
};
|
||||
}
|
||||
|
||||
/** \class PermutationWrapper
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Class to view a vector of integers as a permutation matrix
|
||||
*
|
||||
* \tparam _IndicesType the type of the vector of integer (can be any compatible expression)
|
||||
*
|
||||
* This class allows to view any vector expression of integers as a permutation matrix.
|
||||
*
|
||||
* \sa class PermutationBase, class PermutationMatrix
|
||||
*/
|
||||
template<typename _IndicesType>
|
||||
class PermutationWrapper : public PermutationBase<PermutationWrapper<_IndicesType> >
|
||||
{
|
||||
|
||||
@@ -58,34 +58,41 @@ template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers> struct m
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
#ifdef EIGEN_PARSED_BY_DOXYGEN
|
||||
namespace doxygen {
|
||||
|
||||
// This is a workaround to doxygen not being able to understand the inheritance logic
|
||||
// when it is hidden by the dense_xpr_base helper struct.
|
||||
// Moreover, doxygen fails to include members that are not documented in the declaration body of
|
||||
// MatrixBase if we inherits MatrixBase<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >,
|
||||
// this is why we simply inherits MatrixBase, though this does not make sense.
|
||||
|
||||
/** This class is just a workaround for Doxygen and it does not not actually exist. */
|
||||
template<typename Derived> struct dense_xpr_base_dispatcher;
|
||||
/** This class is just a workaround for Doxygen and it does not not actually exist. */
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
|
||||
struct dense_xpr_base_dispatcher<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
|
||||
: public MatrixBase {};
|
||||
/** This class is just a workaround for Doxygen and it does not not actually exist. */
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
|
||||
struct dense_xpr_base_dispatcher<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
|
||||
: public ArrayBase {};
|
||||
|
||||
} // namespace doxygen
|
||||
|
||||
/** \class PlainObjectBase
|
||||
* \ingroup Core_Module
|
||||
* \brief %Dense storage base class for matrices and arrays.
|
||||
*
|
||||
* This class can be extended with the help of the plugin mechanism described on the page
|
||||
* \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_PLAINOBJECTBASE_PLUGIN.
|
||||
* \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_PLAINOBJECTBASE_PLUGIN.
|
||||
*
|
||||
* \tparam Derived is the derived type, e.g., a Matrix or Array
|
||||
*
|
||||
* \sa \ref TopicClassHierarchy
|
||||
*/
|
||||
#ifdef EIGEN_PARSED_BY_DOXYGEN
|
||||
namespace internal {
|
||||
|
||||
// this is a workaround to doxygen not being able to understand the inheritance logic
|
||||
// when it is hidden by the dense_xpr_base helper struct.
|
||||
/** This class is just a workaround for Doxygen and it does not not actually exist. */
|
||||
template<typename Derived> struct dense_xpr_base_dispatcher_for_doxygen;// : public MatrixBase<Derived> {};
|
||||
/** This class is just a workaround for Doxygen and it does not not actually exist. */
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
|
||||
struct dense_xpr_base_dispatcher_for_doxygen<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
|
||||
: public MatrixBase<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > {};
|
||||
/** This class is just a workaround for Doxygen and it does not not actually exist. */
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
|
||||
struct dense_xpr_base_dispatcher_for_doxygen<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
|
||||
: public ArrayBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > {};
|
||||
|
||||
} // namespace internal
|
||||
|
||||
template<typename Derived>
|
||||
class PlainObjectBase : public internal::dense_xpr_base_dispatcher_for_doxygen<Derived>
|
||||
class PlainObjectBase : public doxygen::dense_xpr_base_dispatcher<Derived>
|
||||
#else
|
||||
template<typename Derived>
|
||||
class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
@@ -145,6 +152,10 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index cols() const { return m_storage.cols(); }
|
||||
|
||||
/** This is an overloaded version of DenseCoeffsBase<Derived,ReadOnlyAccessors>::coeff(Index,Index) const
|
||||
* provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts.
|
||||
*
|
||||
* See DenseCoeffsBase<Derived,ReadOnlyAccessors>::coeff(Index) const for details. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar& coeff(Index rowId, Index colId) const
|
||||
{
|
||||
@@ -154,12 +165,20 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
return m_storage.data()[rowId + colId * m_storage.rows()];
|
||||
}
|
||||
|
||||
/** This is an overloaded version of DenseCoeffsBase<Derived,ReadOnlyAccessors>::coeff(Index) const
|
||||
* provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts.
|
||||
*
|
||||
* See DenseCoeffsBase<Derived,ReadOnlyAccessors>::coeff(Index) const for details. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const
|
||||
{
|
||||
return m_storage.data()[index];
|
||||
}
|
||||
|
||||
/** This is an overloaded version of DenseCoeffsBase<Derived,WriteAccessors>::coeffRef(Index,Index) const
|
||||
* provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts.
|
||||
*
|
||||
* See DenseCoeffsBase<Derived,WriteAccessors>::coeffRef(Index,Index) const for details. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar& coeffRef(Index rowId, Index colId)
|
||||
{
|
||||
@@ -169,12 +188,18 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
return m_storage.data()[rowId + colId * m_storage.rows()];
|
||||
}
|
||||
|
||||
/** This is an overloaded version of DenseCoeffsBase<Derived,WriteAccessors>::coeffRef(Index) const
|
||||
* provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts.
|
||||
*
|
||||
* See DenseCoeffsBase<Derived,WriteAccessors>::coeffRef(Index) const for details. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar& coeffRef(Index index)
|
||||
{
|
||||
return m_storage.data()[index];
|
||||
}
|
||||
|
||||
/** This is the const version of coeffRef(Index,Index) which is thus synonym of coeff(Index,Index).
|
||||
* It is provided for convenience. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar& coeffRef(Index rowId, Index colId) const
|
||||
{
|
||||
@@ -184,6 +209,8 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
return m_storage.data()[rowId + colId * m_storage.rows()];
|
||||
}
|
||||
|
||||
/** This is the const version of coeffRef(Index) which is thus synonym of coeff(Index).
|
||||
* It is provided for convenience. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar& coeffRef(Index index) const
|
||||
{
|
||||
@@ -471,15 +498,15 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN_HAVE_RVALUE_REFERENCES
|
||||
#if EIGEN_HAS_RVALUE_REFERENCES
|
||||
EIGEN_DEVICE_FUNC
|
||||
PlainObjectBase(PlainObjectBase&& other)
|
||||
PlainObjectBase(PlainObjectBase&& other) EIGEN_NOEXCEPT
|
||||
: m_storage( std::move(other.m_storage) )
|
||||
{
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
PlainObjectBase& operator=(PlainObjectBase&& other)
|
||||
PlainObjectBase& operator=(PlainObjectBase&& other) EIGEN_NOEXCEPT
|
||||
{
|
||||
using std::swap;
|
||||
swap(m_storage, other.m_storage);
|
||||
@@ -533,7 +560,8 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
|
||||
public:
|
||||
|
||||
/** \copydoc MatrixBase::operator=(const EigenBase<OtherDerived>&)
|
||||
/** \brief Copies the generic expression \a other into *this.
|
||||
* \copydetails DenseBase::operator=(const EigenBase<OtherDerived> &other)
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
@@ -618,8 +646,8 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
//@}
|
||||
|
||||
using Base::setConstant;
|
||||
EIGEN_DEVICE_FUNC Derived& setConstant(Index size, const Scalar& value);
|
||||
EIGEN_DEVICE_FUNC Derived& setConstant(Index rows, Index cols, const Scalar& value);
|
||||
EIGEN_DEVICE_FUNC Derived& setConstant(Index size, const Scalar& val);
|
||||
EIGEN_DEVICE_FUNC Derived& setConstant(Index rows, Index cols, const Scalar& val);
|
||||
|
||||
using Base::setZero;
|
||||
EIGEN_DEVICE_FUNC Derived& setZero(Index size);
|
||||
@@ -697,7 +725,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
//_resize_to_match(other);
|
||||
// the 'false' below means to enforce lazy evaluation. We don't use lazyAssign() because
|
||||
// it wouldn't allow to copy a row-vector into a column-vector.
|
||||
internal::call_assignment_no_alias(this->derived(), other.derived(), internal::assign_op<Scalar>());
|
||||
internal::call_assignment_no_alias(this->derived(), other.derived(), internal::assign_op<Scalar,typename OtherDerived::Scalar>());
|
||||
return this->derived();
|
||||
}
|
||||
|
||||
@@ -713,11 +741,11 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
|
||||
template<typename T0, typename T1>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void _init2(const Scalar& val0, const Scalar& val1, typename internal::enable_if<Base::SizeAtCompileTime==2,T0>::type* = 0)
|
||||
EIGEN_STRONG_INLINE void _init2(const T0& val0, const T1& val1, typename internal::enable_if<Base::SizeAtCompileTime==2,T0>::type* = 0)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2)
|
||||
m_storage.data()[0] = val0;
|
||||
m_storage.data()[1] = val1;
|
||||
m_storage.data()[0] = Scalar(val0);
|
||||
m_storage.data()[1] = Scalar(val1);
|
||||
}
|
||||
|
||||
template<typename T0, typename T1>
|
||||
@@ -742,6 +770,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
{
|
||||
// NOTE MSVC 2008 complains if we directly put bool(NumTraits<T>::IsInteger) as the EIGEN_STATIC_ASSERT argument.
|
||||
const bool is_integer = NumTraits<T>::IsInteger;
|
||||
EIGEN_UNUSED_VARIABLE(is_integer);
|
||||
EIGEN_STATIC_ASSERT(is_integer,
|
||||
FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED)
|
||||
resize(size);
|
||||
@@ -895,8 +924,8 @@ struct conservative_resize_like_impl
|
||||
{
|
||||
// The storage order does not allow us to use reallocation.
|
||||
typename Derived::PlainObject tmp(rows,cols);
|
||||
const Index common_rows = (std::min)(rows, _this.rows());
|
||||
const Index common_cols = (std::min)(cols, _this.cols());
|
||||
const Index common_rows = numext::mini(rows, _this.rows());
|
||||
const Index common_cols = numext::mini(cols, _this.cols());
|
||||
tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
|
||||
_this.derived().swap(tmp);
|
||||
}
|
||||
@@ -929,8 +958,8 @@ struct conservative_resize_like_impl
|
||||
{
|
||||
// The storage order does not allow us to use reallocation.
|
||||
typename Derived::PlainObject tmp(other);
|
||||
const Index common_rows = (std::min)(tmp.rows(), _this.rows());
|
||||
const Index common_cols = (std::min)(tmp.cols(), _this.cols());
|
||||
const Index common_rows = numext::mini(tmp.rows(), _this.rows());
|
||||
const Index common_cols = numext::mini(tmp.cols(), _this.cols());
|
||||
tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
|
||||
_this.derived().swap(tmp);
|
||||
}
|
||||
|
||||
@@ -14,57 +14,8 @@ namespace Eigen {
|
||||
|
||||
template<typename Lhs, typename Rhs, int Option, typename StorageKind> class ProductImpl;
|
||||
|
||||
/** \class Product
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of the product of two arbitrary matrices or vectors
|
||||
*
|
||||
* \param Lhs the type of the left-hand side expression
|
||||
* \param Rhs the type of the right-hand side expression
|
||||
*
|
||||
* This class represents an expression of the product of two arbitrary matrices.
|
||||
*
|
||||
* The other template parameters are:
|
||||
* \tparam Option can be DefaultProduct, AliasFreeProduct, or LazyProduct
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
namespace internal {
|
||||
|
||||
// Determine the scalar of Product<Lhs, Rhs>. This is normally the same as Lhs::Scalar times
|
||||
// Rhs::Scalar, but product with permutation matrices inherit the scalar of the other factor.
|
||||
template<typename Lhs, typename Rhs, typename LhsShape = typename evaluator_traits<Lhs>::Shape,
|
||||
typename RhsShape = typename evaluator_traits<Rhs>::Shape >
|
||||
struct product_result_scalar
|
||||
{
|
||||
typedef typename scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType Scalar;
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename RhsShape>
|
||||
struct product_result_scalar<Lhs, Rhs, PermutationShape, RhsShape>
|
||||
{
|
||||
typedef typename Rhs::Scalar Scalar;
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename LhsShape>
|
||||
struct product_result_scalar<Lhs, Rhs, LhsShape, PermutationShape>
|
||||
{
|
||||
typedef typename Lhs::Scalar Scalar;
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename RhsShape>
|
||||
struct product_result_scalar<Lhs, Rhs, TranspositionsShape, RhsShape>
|
||||
{
|
||||
typedef typename Rhs::Scalar Scalar;
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename LhsShape>
|
||||
struct product_result_scalar<Lhs, Rhs, LhsShape, TranspositionsShape>
|
||||
{
|
||||
typedef typename Lhs::Scalar Scalar;
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, int Option>
|
||||
struct traits<Product<Lhs, Rhs, Option> >
|
||||
{
|
||||
@@ -75,7 +26,7 @@ struct traits<Product<Lhs, Rhs, Option> >
|
||||
|
||||
typedef MatrixXpr XprKind;
|
||||
|
||||
typedef typename product_result_scalar<LhsCleaned,RhsCleaned>::Scalar Scalar;
|
||||
typedef typename ScalarBinaryOpTraits<typename traits<LhsCleaned>::Scalar, typename traits<RhsCleaned>::Scalar>::ReturnType Scalar;
|
||||
typedef typename product_promote_storage_type<typename LhsTraits::StorageKind,
|
||||
typename RhsTraits::StorageKind,
|
||||
internal::product_type<Lhs,Rhs>::ret>::ret StorageKind;
|
||||
@@ -102,7 +53,20 @@ struct traits<Product<Lhs, Rhs, Option> >
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
|
||||
/** \class Product
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of the product of two arbitrary matrices or vectors
|
||||
*
|
||||
* \tparam _Lhs the type of the left-hand side expression
|
||||
* \tparam _Rhs the type of the right-hand side expression
|
||||
*
|
||||
* This class represents an expression of the product of two arbitrary matrices.
|
||||
*
|
||||
* The other template parameters are:
|
||||
* \tparam Option can be DefaultProduct, AliasFreeProduct, or LazyProduct
|
||||
*
|
||||
*/
|
||||
template<typename _Lhs, typename _Rhs, int Option>
|
||||
class Product : public ProductImpl<_Lhs,_Rhs,Option,
|
||||
typename internal::product_promote_storage_type<typename internal::traits<_Lhs>::StorageKind,
|
||||
|
||||
268
Eigen/src/Core/ProductEvaluators.h
Executable file → Normal file
268
Eigen/src/Core/ProductEvaluators.h
Executable file → Normal file
@@ -35,23 +35,28 @@ struct evaluator<Product<Lhs, Rhs, Options> >
|
||||
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {}
|
||||
};
|
||||
|
||||
// Catch scalar * ( A * B ) and transform it to (A*scalar) * B
|
||||
// Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B"
|
||||
// TODO we should apply that rule only if that's really helpful
|
||||
template<typename Lhs, typename Rhs, typename Scalar>
|
||||
struct evaluator_traits<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Product<Lhs, Rhs, DefaultProduct> > >
|
||||
: evaluator_traits_base<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Product<Lhs, Rhs, DefaultProduct> > >
|
||||
template<typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1>
|
||||
struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
|
||||
const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,
|
||||
const Product<Lhs, Rhs, DefaultProduct> > >
|
||||
{
|
||||
enum { AssumeAliasing = 1 };
|
||||
static const bool value = true;
|
||||
};
|
||||
template<typename Lhs, typename Rhs, typename Scalar>
|
||||
struct evaluator<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Product<Lhs, Rhs, DefaultProduct> > >
|
||||
: public evaluator<Product<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>,const Lhs>, Rhs, DefaultProduct> >
|
||||
template<typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1>
|
||||
struct evaluator<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
|
||||
const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,
|
||||
const Product<Lhs, Rhs, DefaultProduct> > >
|
||||
: public evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1,Lhs,product), Rhs, DefaultProduct> >
|
||||
{
|
||||
typedef CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Product<Lhs, Rhs, DefaultProduct> > XprType;
|
||||
typedef evaluator<Product<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>,const Lhs>, Rhs, DefaultProduct> > Base;
|
||||
|
||||
typedef CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
|
||||
const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,
|
||||
const Product<Lhs, Rhs, DefaultProduct> > XprType;
|
||||
typedef evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1,Lhs,product), Rhs, DefaultProduct> > Base;
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
|
||||
: Base(xpr.functor().m_other * xpr.nestedExpression().lhs() * xpr.nestedExpression().rhs())
|
||||
: Base(xpr.lhs().functor().m_other * xpr.rhs().lhs() * xpr.rhs().rhs())
|
||||
{}
|
||||
};
|
||||
|
||||
@@ -81,17 +86,8 @@ template< typename Lhs, typename Rhs,
|
||||
struct generic_product_impl;
|
||||
|
||||
template<typename Lhs, typename Rhs>
|
||||
struct evaluator_traits<Product<Lhs, Rhs, DefaultProduct> >
|
||||
: evaluator_traits_base<Product<Lhs, Rhs, DefaultProduct> >
|
||||
{
|
||||
enum { AssumeAliasing = 1 };
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs>
|
||||
struct evaluator_traits<Product<Lhs, Rhs, AliasFreeProduct> >
|
||||
: evaluator_traits_base<Product<Lhs, Rhs, AliasFreeProduct> >
|
||||
{
|
||||
enum { AssumeAliasing = 0 };
|
||||
struct evaluator_assume_aliasing<Product<Lhs, Rhs, DefaultProduct> > {
|
||||
static const bool value = true;
|
||||
};
|
||||
|
||||
// This is the default evaluator implementation for products:
|
||||
@@ -107,7 +103,8 @@ struct product_evaluator<Product<Lhs, Rhs, Options>, ProductTag, LhsShape, RhsSh
|
||||
Flags = Base::Flags | EvalBeforeNestingBit
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
explicit product_evaluator(const XprType& xpr)
|
||||
: m_result(xpr.rows(), xpr.cols())
|
||||
{
|
||||
::new (static_cast<Base*>(this)) Base(m_result);
|
||||
@@ -131,14 +128,22 @@ protected:
|
||||
PlainObject m_result;
|
||||
};
|
||||
|
||||
// The following three shortcuts are enabled only if the scalar types match excatly.
|
||||
// TODO: we could enable them for different scalar types when the product is not vectorized.
|
||||
|
||||
// Dense = Product
|
||||
template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
|
||||
struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scalar>, Dense2Dense,
|
||||
typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type>
|
||||
struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scalar,Scalar>, Dense2Dense,
|
||||
typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
|
||||
{
|
||||
typedef Product<Lhs,Rhs,Options> SrcXprType;
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
|
||||
static EIGEN_STRONG_INLINE
|
||||
void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
|
||||
{
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
// FIXME shall we handle nested_eval here?
|
||||
generic_product_impl<Lhs, Rhs>::evalTo(dst, src.lhs(), src.rhs());
|
||||
}
|
||||
@@ -146,12 +151,14 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scal
|
||||
|
||||
// Dense += Product
|
||||
template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
|
||||
struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<Scalar>, Dense2Dense,
|
||||
typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type>
|
||||
struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<Scalar,Scalar>, Dense2Dense,
|
||||
typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
|
||||
{
|
||||
typedef Product<Lhs,Rhs,Options> SrcXprType;
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar> &)
|
||||
static EIGEN_STRONG_INLINE
|
||||
void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,Scalar> &)
|
||||
{
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||
// FIXME shall we handle nested_eval here?
|
||||
generic_product_impl<Lhs, Rhs>::addTo(dst, src.lhs(), src.rhs());
|
||||
}
|
||||
@@ -159,12 +166,14 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<
|
||||
|
||||
// Dense -= Product
|
||||
template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
|
||||
struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<Scalar>, Dense2Dense,
|
||||
typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type>
|
||||
struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<Scalar,Scalar>, Dense2Dense,
|
||||
typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
|
||||
{
|
||||
typedef Product<Lhs,Rhs,Options> SrcXprType;
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar> &)
|
||||
static EIGEN_STRONG_INLINE
|
||||
void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,Scalar> &)
|
||||
{
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||
// FIXME shall we handle nested_eval here?
|
||||
generic_product_impl<Lhs, Rhs>::subTo(dst, src.lhs(), src.rhs());
|
||||
}
|
||||
@@ -174,15 +183,17 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<
|
||||
// Dense ?= scalar * Product
|
||||
// TODO we should apply that rule if that's really helpful
|
||||
// for instance, this is not good for inner products
|
||||
template< typename DstXprType, typename Lhs, typename Rhs, typename AssignFunc, typename Scalar, typename ScalarBis>
|
||||
struct Assignment<DstXprType, CwiseUnaryOp<internal::scalar_multiple_op<ScalarBis>,
|
||||
const Product<Lhs,Rhs,DefaultProduct> >, AssignFunc, Dense2Dense, Scalar>
|
||||
template< typename DstXprType, typename Lhs, typename Rhs, typename AssignFunc, typename Scalar, typename ScalarBis, typename Plain>
|
||||
struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_product_op<ScalarBis,Scalar>, const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>,Plain>,
|
||||
const Product<Lhs,Rhs,DefaultProduct> >, AssignFunc, Dense2Dense>
|
||||
{
|
||||
typedef CwiseUnaryOp<internal::scalar_multiple_op<ScalarBis>,
|
||||
const Product<Lhs,Rhs,DefaultProduct> > SrcXprType;
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func)
|
||||
typedef CwiseBinaryOp<internal::scalar_product_op<ScalarBis,Scalar>,
|
||||
const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>,Plain>,
|
||||
const Product<Lhs,Rhs,DefaultProduct> > SrcXprType;
|
||||
static EIGEN_STRONG_INLINE
|
||||
void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func)
|
||||
{
|
||||
call_assignment_no_alias(dst, (src.functor().m_other * src.nestedExpression().lhs())*src.nestedExpression().rhs(), func);
|
||||
call_assignment_no_alias(dst, (src.lhs().functor().m_other * src.rhs().lhs())*src.rhs().rhs(), func);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -190,32 +201,39 @@ struct Assignment<DstXprType, CwiseUnaryOp<internal::scalar_multiple_op<ScalarBi
|
||||
// Catch "Dense ?= xpr + Product<>" expression to save one temporary
|
||||
// FIXME we could probably enable these rules for any product, i.e., not only Dense and DefaultProduct
|
||||
|
||||
template<typename DstXprType, typename OtherXpr, typename ProductType, typename Scalar, typename Func1, typename Func2>
|
||||
struct assignment_from_xpr_plus_product
|
||||
template<typename OtherXpr, typename Lhs, typename Rhs>
|
||||
struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_sum_op<typename OtherXpr::Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, const OtherXpr,
|
||||
const Product<Lhs,Rhs,DefaultProduct> >, DenseShape > {
|
||||
static const bool value = true;
|
||||
};
|
||||
|
||||
template<typename DstXprType, typename OtherXpr, typename ProductType, typename Func1, typename Func2>
|
||||
struct assignment_from_xpr_op_product
|
||||
{
|
||||
typedef CwiseBinaryOp<internal::scalar_sum_op<Scalar>, const OtherXpr, const ProductType> SrcXprType;
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const Func1& func)
|
||||
template<typename SrcXprType, typename InitialFunc>
|
||||
static EIGEN_STRONG_INLINE
|
||||
void run(DstXprType &dst, const SrcXprType &src, const InitialFunc& /*func*/)
|
||||
{
|
||||
call_assignment_no_alias(dst, src.lhs(), func);
|
||||
call_assignment_no_alias(dst, src.lhs(), Func1());
|
||||
call_assignment_no_alias(dst, src.rhs(), Func2());
|
||||
}
|
||||
};
|
||||
|
||||
template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar>
|
||||
struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_sum_op<Scalar>, const OtherXpr,
|
||||
const Product<Lhs,Rhs,DefaultProduct> >, internal::assign_op<Scalar>, Dense2Dense>
|
||||
: assignment_from_xpr_plus_product<DstXprType, OtherXpr, Product<Lhs,Rhs,DefaultProduct>, Scalar, internal::assign_op<Scalar>, internal::add_assign_op<Scalar> >
|
||||
{};
|
||||
template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar>
|
||||
struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_sum_op<Scalar>, const OtherXpr,
|
||||
const Product<Lhs,Rhs,DefaultProduct> >, internal::add_assign_op<Scalar>, Dense2Dense>
|
||||
: assignment_from_xpr_plus_product<DstXprType, OtherXpr, Product<Lhs,Rhs,DefaultProduct>, Scalar, internal::add_assign_op<Scalar>, internal::add_assign_op<Scalar> >
|
||||
{};
|
||||
template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar>
|
||||
struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_sum_op<Scalar>, const OtherXpr,
|
||||
const Product<Lhs,Rhs,DefaultProduct> >, internal::sub_assign_op<Scalar>, Dense2Dense>
|
||||
: assignment_from_xpr_plus_product<DstXprType, OtherXpr, Product<Lhs,Rhs,DefaultProduct>, Scalar, internal::sub_assign_op<Scalar>, internal::sub_assign_op<Scalar> >
|
||||
{};
|
||||
#define EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(ASSIGN_OP,BINOP,ASSIGN_OP2) \
|
||||
template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename DstScalar, typename SrcScalar, typename OtherScalar,typename ProdScalar> \
|
||||
struct Assignment<DstXprType, CwiseBinaryOp<internal::BINOP<OtherScalar,ProdScalar>, const OtherXpr, \
|
||||
const Product<Lhs,Rhs,DefaultProduct> >, internal::ASSIGN_OP<DstScalar,SrcScalar>, Dense2Dense> \
|
||||
: assignment_from_xpr_op_product<DstXprType, OtherXpr, Product<Lhs,Rhs,DefaultProduct>, internal::ASSIGN_OP<DstScalar,OtherScalar>, internal::ASSIGN_OP2<DstScalar,ProdScalar> > \
|
||||
{}
|
||||
|
||||
EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op, scalar_sum_op,add_assign_op);
|
||||
EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op,scalar_sum_op,add_assign_op);
|
||||
EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op,scalar_sum_op,sub_assign_op);
|
||||
|
||||
EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op, scalar_difference_op,sub_assign_op);
|
||||
EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op,scalar_difference_op,sub_assign_op);
|
||||
EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op,scalar_difference_op,add_assign_op);
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
template<typename Lhs, typename Rhs>
|
||||
@@ -245,7 +263,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct>
|
||||
|
||||
// Column major result
|
||||
template<typename Dst, typename Lhs, typename Rhs, typename Func>
|
||||
EIGEN_DONT_INLINE void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&)
|
||||
void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&)
|
||||
{
|
||||
evaluator<Rhs> rhsEval(rhs);
|
||||
typename nested_eval<Lhs,Rhs::SizeAtCompileTime>::type actual_lhs(lhs);
|
||||
@@ -253,12 +271,12 @@ EIGEN_DONT_INLINE void outer_product_selector_run(Dst& dst, const Lhs &lhs, cons
|
||||
// FIXME not very good if rhs is real and lhs complex while alpha is real too
|
||||
const Index cols = dst.cols();
|
||||
for (Index j=0; j<cols; ++j)
|
||||
func(dst.col(j), rhsEval.coeff(0,j) * actual_lhs);
|
||||
func(dst.col(j), rhsEval.coeff(Index(0),j) * actual_lhs);
|
||||
}
|
||||
|
||||
// Row major result
|
||||
template<typename Dst, typename Lhs, typename Rhs, typename Func>
|
||||
EIGEN_DONT_INLINE void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&)
|
||||
void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&)
|
||||
{
|
||||
evaluator<Lhs> lhsEval(lhs);
|
||||
typename nested_eval<Rhs,Lhs::SizeAtCompileTime>::type actual_rhs(rhs);
|
||||
@@ -266,7 +284,7 @@ EIGEN_DONT_INLINE void outer_product_selector_run(Dst& dst, const Lhs &lhs, cons
|
||||
// FIXME not very good if lhs is real and rhs complex while alpha is real too
|
||||
const Index rows = dst.rows();
|
||||
for (Index i=0; i<rows; ++i)
|
||||
func(dst.row(i), lhsEval.coeff(i,0) * actual_rhs);
|
||||
func(dst.row(i), lhsEval.coeff(i,Index(0)) * actual_rhs);
|
||||
}
|
||||
|
||||
template<typename Lhs, typename Rhs>
|
||||
@@ -321,19 +339,19 @@ struct generic_product_impl_base
|
||||
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
|
||||
|
||||
template<typename Dst>
|
||||
static void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
{ dst.setZero(); scaleAndAddTo(dst, lhs, rhs, Scalar(1)); }
|
||||
|
||||
template<typename Dst>
|
||||
static void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
{ scaleAndAddTo(dst,lhs, rhs, Scalar(1)); }
|
||||
|
||||
template<typename Dst>
|
||||
static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
{ scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); }
|
||||
|
||||
template<typename Dst>
|
||||
static void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
||||
static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
||||
{ Derived::scaleAndAddTo(dst,lhs,rhs,alpha); }
|
||||
|
||||
};
|
||||
@@ -342,17 +360,21 @@ template<typename Lhs, typename Rhs>
|
||||
struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct>
|
||||
: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct> >
|
||||
{
|
||||
typedef typename nested_eval<Lhs,1>::type LhsNested;
|
||||
typedef typename nested_eval<Rhs,1>::type RhsNested;
|
||||
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
|
||||
enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
|
||||
typedef typename internal::conditional<int(Side)==OnTheRight,Lhs,Rhs>::type MatrixType;
|
||||
typedef typename internal::remove_all<typename internal::conditional<int(Side)==OnTheRight,LhsNested,RhsNested>::type>::type MatrixType;
|
||||
|
||||
template<typename Dest>
|
||||
static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
||||
static EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
||||
{
|
||||
LhsNested actual_lhs(lhs);
|
||||
RhsNested actual_rhs(rhs);
|
||||
internal::gemv_dense_selector<Side,
|
||||
(int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
|
||||
bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)
|
||||
>::run(lhs, rhs, dst, alpha);
|
||||
>::run(actual_lhs, actual_rhs, dst, alpha);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -362,25 +384,25 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
|
||||
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
|
||||
|
||||
template<typename Dst>
|
||||
static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
{
|
||||
// Same as: dst.noalias() = lhs.lazyProduct(rhs);
|
||||
// but easier on the compiler side
|
||||
call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op<Scalar>());
|
||||
call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op<typename Dst::Scalar,Scalar>());
|
||||
}
|
||||
|
||||
template<typename Dst>
|
||||
static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
{
|
||||
// dst.noalias() += lhs.lazyProduct(rhs);
|
||||
call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op<Scalar>());
|
||||
call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op<typename Dst::Scalar,Scalar>());
|
||||
}
|
||||
|
||||
template<typename Dst>
|
||||
static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
{
|
||||
// dst.noalias() -= lhs.lazyProduct(rhs);
|
||||
call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op<Scalar>());
|
||||
call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op<typename Dst::Scalar,Scalar>());
|
||||
}
|
||||
|
||||
// template<typename Dst>
|
||||
@@ -412,10 +434,9 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
||||
typedef Product<Lhs, Rhs, LazyProduct> XprType;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename XprType::PacketScalar PacketScalar;
|
||||
typedef typename XprType::PacketReturnType PacketReturnType;
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
explicit product_evaluator(const XprType& xpr)
|
||||
: m_lhs(xpr.lhs()),
|
||||
m_rhs(xpr.rhs()),
|
||||
m_lhsImpl(m_lhs), // FIXME the creation of the evaluator objects should result in a no-op, but check that!
|
||||
@@ -426,6 +447,18 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
||||
EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost);
|
||||
EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::AddCost);
|
||||
EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
|
||||
#if 0
|
||||
std::cerr << "LhsOuterStrideBytes= " << LhsOuterStrideBytes << "\n";
|
||||
std::cerr << "RhsOuterStrideBytes= " << RhsOuterStrideBytes << "\n";
|
||||
std::cerr << "LhsAlignment= " << LhsAlignment << "\n";
|
||||
std::cerr << "RhsAlignment= " << RhsAlignment << "\n";
|
||||
std::cerr << "CanVectorizeLhs= " << CanVectorizeLhs << "\n";
|
||||
std::cerr << "CanVectorizeRhs= " << CanVectorizeRhs << "\n";
|
||||
std::cerr << "CanVectorizeInner= " << CanVectorizeInner << "\n";
|
||||
std::cerr << "EvalToRowMajor= " << EvalToRowMajor << "\n";
|
||||
std::cerr << "Alignment= " << Alignment << "\n";
|
||||
std::cerr << "Flags= " << Flags << "\n";
|
||||
#endif
|
||||
}
|
||||
|
||||
// Everything below here is taken from CoeffBasedProduct.h
|
||||
@@ -438,16 +471,20 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
||||
|
||||
typedef evaluator<LhsNestedCleaned> LhsEtorType;
|
||||
typedef evaluator<RhsNestedCleaned> RhsEtorType;
|
||||
|
||||
|
||||
enum {
|
||||
RowsAtCompileTime = LhsNestedCleaned::RowsAtCompileTime,
|
||||
ColsAtCompileTime = RhsNestedCleaned::ColsAtCompileTime,
|
||||
InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsNestedCleaned::ColsAtCompileTime, RhsNestedCleaned::RowsAtCompileTime),
|
||||
MaxRowsAtCompileTime = LhsNestedCleaned::MaxRowsAtCompileTime,
|
||||
MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime,
|
||||
|
||||
PacketSize = packet_traits<Scalar>::size,
|
||||
MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime
|
||||
};
|
||||
|
||||
typedef typename find_best_packet<Scalar,RowsAtCompileTime>::type LhsVecPacketType;
|
||||
typedef typename find_best_packet<Scalar,ColsAtCompileTime>::type RhsVecPacketType;
|
||||
|
||||
enum {
|
||||
|
||||
LhsCoeffReadCost = LhsEtorType::CoeffReadCost,
|
||||
RhsCoeffReadCost = RhsEtorType::CoeffReadCost,
|
||||
CoeffReadCost = InnerSize==0 ? NumTraits<Scalar>::ReadCost
|
||||
@@ -460,23 +497,24 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
||||
LhsFlags = LhsEtorType::Flags,
|
||||
RhsFlags = RhsEtorType::Flags,
|
||||
|
||||
LhsAlignment = LhsEtorType::Alignment,
|
||||
RhsAlignment = RhsEtorType::Alignment,
|
||||
|
||||
LhsRowMajor = LhsFlags & RowMajorBit,
|
||||
RhsRowMajor = RhsFlags & RowMajorBit,
|
||||
|
||||
LhsVecPacketSize = unpacket_traits<LhsVecPacketType>::size,
|
||||
RhsVecPacketSize = unpacket_traits<RhsVecPacketType>::size,
|
||||
|
||||
// Here, we don't care about alignment larger than the usable packet size.
|
||||
LhsAlignment = EIGEN_PLAIN_ENUM_MIN(LhsEtorType::Alignment,LhsVecPacketSize*int(sizeof(typename LhsNestedCleaned::Scalar))),
|
||||
RhsAlignment = EIGEN_PLAIN_ENUM_MIN(RhsEtorType::Alignment,RhsVecPacketSize*int(sizeof(typename RhsNestedCleaned::Scalar))),
|
||||
|
||||
SameType = is_same<typename LhsNestedCleaned::Scalar,typename RhsNestedCleaned::Scalar>::value,
|
||||
|
||||
CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit)
|
||||
&& (ColsAtCompileTime == Dynamic || ((ColsAtCompileTime % PacketSize) == 0) ),
|
||||
|
||||
CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit)
|
||||
&& (RowsAtCompileTime == Dynamic || ((RowsAtCompileTime % PacketSize) == 0) ),
|
||||
CanVectorizeRhs = bool(RhsRowMajor) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime!=1),
|
||||
CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime!=1),
|
||||
|
||||
EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
|
||||
: (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
|
||||
: (RhsRowMajor && !CanVectorizeLhs),
|
||||
: (bool(RhsRowMajor) && !CanVectorizeLhs),
|
||||
|
||||
Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
|
||||
| (EvalToRowMajor ? RowMajorBit : 0)
|
||||
@@ -487,15 +525,15 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
||||
LhsOuterStrideBytes = int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)),
|
||||
RhsOuterStrideBytes = int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)),
|
||||
|
||||
Alignment = CanVectorizeLhs ? (LhsOuterStrideBytes<0 || (int(LhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,LhsAlignment))!=0 ? 0 : LhsAlignment)
|
||||
: CanVectorizeRhs ? (RhsOuterStrideBytes<0 || (int(RhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,RhsAlignment))!=0 ? 0 : RhsAlignment)
|
||||
Alignment = bool(CanVectorizeLhs) ? (LhsOuterStrideBytes<=0 || (int(LhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,LhsAlignment))!=0 ? 0 : LhsAlignment)
|
||||
: bool(CanVectorizeRhs) ? (RhsOuterStrideBytes<=0 || (int(RhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,RhsAlignment))!=0 ? 0 : RhsAlignment)
|
||||
: 0,
|
||||
|
||||
/* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
|
||||
* of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner
|
||||
* loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
|
||||
* the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI.
|
||||
*/
|
||||
* of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner
|
||||
* loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
|
||||
* the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI.
|
||||
*/
|
||||
CanVectorizeInner = SameType
|
||||
&& LhsRowMajor
|
||||
&& (!RhsRowMajor)
|
||||
@@ -514,8 +552,8 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC const CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
const Index row = RowsAtCompileTime == 1 ? 0 : index;
|
||||
const Index col = RowsAtCompileTime == 1 ? index : 0;
|
||||
const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index;
|
||||
const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? index : 0;
|
||||
return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum();
|
||||
}
|
||||
|
||||
@@ -533,14 +571,14 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
||||
template<int LoadMode, typename PacketType>
|
||||
const PacketType packet(Index index) const
|
||||
{
|
||||
const Index row = RowsAtCompileTime == 1 ? 0 : index;
|
||||
const Index col = RowsAtCompileTime == 1 ? index : 0;
|
||||
const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index;
|
||||
const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? index : 0;
|
||||
return packet<LoadMode,PacketType>(row,col);
|
||||
}
|
||||
|
||||
protected:
|
||||
const LhsNested m_lhs;
|
||||
const RhsNested m_rhs;
|
||||
typename internal::add_const_on_value_type<LhsNested>::type m_lhs;
|
||||
typename internal::add_const_on_value_type<RhsNested>::type m_rhs;
|
||||
|
||||
LhsEtorType m_lhsImpl;
|
||||
RhsEtorType m_rhsImpl;
|
||||
@@ -574,7 +612,7 @@ struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, Load
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
|
||||
{
|
||||
etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
|
||||
res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex-1)), rhs.template packet<LoadMode,Packet>(UnrollingIndex-1, col), res);
|
||||
res = pmadd(pset1<Packet>(lhs.coeff(row, Index(UnrollingIndex-1))), rhs.template packet<LoadMode,Packet>(Index(UnrollingIndex-1), col), res);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -584,7 +622,7 @@ struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, Load
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
|
||||
{
|
||||
etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
|
||||
res = pmadd(lhs.template packet<LoadMode,Packet>(row, UnrollingIndex-1), pset1<Packet>(rhs.coeff(UnrollingIndex-1, col)), res);
|
||||
res = pmadd(lhs.template packet<LoadMode,Packet>(row, Index(UnrollingIndex-1)), pset1<Packet>(rhs.coeff(Index(UnrollingIndex-1), col)), res);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -593,7 +631,7 @@ struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
|
||||
{
|
||||
res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode,Packet>(0, col));
|
||||
res = pmul(pset1<Packet>(lhs.coeff(row, Index(0))),rhs.template packet<LoadMode,Packet>(Index(0), col));
|
||||
}
|
||||
};
|
||||
|
||||
@@ -602,7 +640,7 @@ struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
|
||||
{
|
||||
res = pmul(lhs.template packet<LoadMode,Packet>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
|
||||
res = pmul(lhs.template packet<LoadMode,Packet>(row, Index(0)), pset1<Packet>(rhs.coeff(Index(0), col)));
|
||||
}
|
||||
};
|
||||
|
||||
@@ -611,7 +649,7 @@ struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
|
||||
{
|
||||
res = pset1<Packet>(0);
|
||||
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
|
||||
}
|
||||
};
|
||||
|
||||
@@ -620,7 +658,7 @@ struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
|
||||
{
|
||||
res = pset1<Packet>(0);
|
||||
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
|
||||
}
|
||||
};
|
||||
|
||||
@@ -629,7 +667,7 @@ struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
|
||||
{
|
||||
res = pset1<Packet>(0);
|
||||
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
|
||||
for(Index i = 0; i < innerDim; ++i)
|
||||
res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode,Packet>(i, col), res);
|
||||
}
|
||||
@@ -640,7 +678,7 @@ struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
|
||||
{
|
||||
res = pset1<Packet>(0);
|
||||
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
|
||||
for(Index i = 0; i < innerDim; ++i)
|
||||
res = pmadd(lhs.template packet<LoadMode,Packet>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
|
||||
}
|
||||
@@ -725,7 +763,7 @@ template<typename MatrixType, typename DiagonalType, typename Derived, int Produ
|
||||
struct diagonal_product_evaluator_base
|
||||
: evaluator_base<Derived>
|
||||
{
|
||||
typedef typename scalar_product_traits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar;
|
||||
typedef typename ScalarBinaryOpTraits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar;
|
||||
public:
|
||||
enum {
|
||||
CoeffReadCost = NumTraits<Scalar>::MulCost + evaluator<MatrixType>::CoeffReadCost + evaluator<DiagonalType>::CoeffReadCost,
|
||||
@@ -1001,7 +1039,7 @@ struct transposition_matrix_product
|
||||
const Index size = tr.size();
|
||||
StorageIndex j = 0;
|
||||
|
||||
if(!(is_same<MatrixTypeCleaned,Dest>::value && extract_data(dst) == extract_data(mat)))
|
||||
if(!is_same_dense(dst,mat))
|
||||
dst = mat;
|
||||
|
||||
for(Index k=(Transposed?size-1:0) ; Transposed?k>=0:k<size ; Transposed?--k:++k)
|
||||
|
||||
@@ -16,8 +16,7 @@ namespace internal {
|
||||
|
||||
template<typename Scalar> struct scalar_random_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_random_op)
|
||||
template<typename Index>
|
||||
inline const Scalar operator() (Index, Index = 0) const { return random<Scalar>(); }
|
||||
inline const Scalar operator() () const { return random<Scalar>(); }
|
||||
};
|
||||
|
||||
template<typename Scalar>
|
||||
|
||||
@@ -27,8 +27,9 @@ template<typename Func, typename Derived>
|
||||
struct redux_traits
|
||||
{
|
||||
public:
|
||||
typedef typename find_best_packet<typename Derived::Scalar,Derived::SizeAtCompileTime>::type PacketType;
|
||||
enum {
|
||||
PacketSize = packet_traits<typename Derived::Scalar>::size,
|
||||
PacketSize = unpacket_traits<PacketType>::size,
|
||||
InnerMaxSize = int(Derived::IsRowMajor)
|
||||
? Derived::MaxColsAtCompileTime
|
||||
: Derived::MaxRowsAtCompileTime
|
||||
@@ -37,8 +38,8 @@ public:
|
||||
enum {
|
||||
MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit)
|
||||
&& (functor_traits<Func>::PacketAccess),
|
||||
MayLinearVectorize = MightVectorize && (int(Derived::Flags)&LinearAccessBit),
|
||||
MaySliceVectorize = MightVectorize && int(InnerMaxSize)>=3*PacketSize
|
||||
MayLinearVectorize = bool(MightVectorize) && (int(Derived::Flags)&LinearAccessBit),
|
||||
MaySliceVectorize = bool(MightVectorize) && int(InnerMaxSize)>=3*PacketSize
|
||||
};
|
||||
|
||||
public:
|
||||
@@ -137,12 +138,12 @@ template<typename Func, typename Derived, int Start, int Length>
|
||||
struct redux_vec_unroller
|
||||
{
|
||||
enum {
|
||||
PacketSize = packet_traits<typename Derived::Scalar>::size,
|
||||
PacketSize = redux_traits<Func, Derived>::PacketSize,
|
||||
HalfLength = Length/2
|
||||
};
|
||||
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
typedef typename packet_traits<Scalar>::type PacketScalar;
|
||||
typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
|
||||
|
||||
static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func& func)
|
||||
{
|
||||
@@ -156,14 +157,14 @@ template<typename Func, typename Derived, int Start>
|
||||
struct redux_vec_unroller<Func, Derived, Start, 1>
|
||||
{
|
||||
enum {
|
||||
index = Start * packet_traits<typename Derived::Scalar>::size,
|
||||
index = Start * redux_traits<Func, Derived>::PacketSize,
|
||||
outer = index / int(Derived::InnerSizeAtCompileTime),
|
||||
inner = index % int(Derived::InnerSizeAtCompileTime),
|
||||
alignment = Derived::Alignment
|
||||
};
|
||||
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
typedef typename packet_traits<Scalar>::type PacketScalar;
|
||||
typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
|
||||
|
||||
static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&)
|
||||
{
|
||||
@@ -209,13 +210,13 @@ template<typename Func, typename Derived>
|
||||
struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
|
||||
{
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
typedef typename packet_traits<Scalar>::type PacketScalar;
|
||||
typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
|
||||
|
||||
static Scalar run(const Derived &mat, const Func& func)
|
||||
{
|
||||
const Index size = mat.size();
|
||||
|
||||
const Index packetSize = packet_traits<Scalar>::size;
|
||||
const Index packetSize = redux_traits<Func, Derived>::PacketSize;
|
||||
const int packetAlignment = unpacket_traits<PacketScalar>::alignment;
|
||||
enum {
|
||||
alignment0 = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned),
|
||||
@@ -268,7 +269,7 @@ template<typename Func, typename Derived, int Unrolling>
|
||||
struct redux_impl<Func, Derived, SliceVectorizedTraversal, Unrolling>
|
||||
{
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
typedef typename packet_traits<Scalar>::type PacketType;
|
||||
typedef typename redux_traits<Func, Derived>::PacketType PacketType;
|
||||
|
||||
EIGEN_DEVICE_FUNC static Scalar run(const Derived &mat, const Func& func)
|
||||
{
|
||||
@@ -276,7 +277,7 @@ struct redux_impl<Func, Derived, SliceVectorizedTraversal, Unrolling>
|
||||
const Index innerSize = mat.innerSize();
|
||||
const Index outerSize = mat.outerSize();
|
||||
enum {
|
||||
packetSize = packet_traits<Scalar>::size
|
||||
packetSize = redux_traits<Func, Derived>::PacketSize
|
||||
};
|
||||
const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize;
|
||||
Scalar res;
|
||||
@@ -306,9 +307,10 @@ template<typename Func, typename Derived>
|
||||
struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
|
||||
{
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
typedef typename packet_traits<Scalar>::type PacketScalar;
|
||||
|
||||
typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
|
||||
enum {
|
||||
PacketSize = packet_traits<Scalar>::size,
|
||||
PacketSize = redux_traits<Func, Derived>::PacketSize,
|
||||
Size = Derived::SizeAtCompileTime,
|
||||
VectorizedSize = (Size / PacketSize) * PacketSize
|
||||
};
|
||||
@@ -367,11 +369,11 @@ public:
|
||||
{ return m_evaluator.coeff(index); }
|
||||
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketReturnType packet(Index row, Index col) const
|
||||
PacketType packet(Index row, Index col) const
|
||||
{ return m_evaluator.template packet<LoadMode,PacketType>(row, col); }
|
||||
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketReturnType packet(Index index) const
|
||||
PacketType packet(Index index) const
|
||||
{ return m_evaluator.template packet<LoadMode,PacketType>(index); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
@@ -379,7 +381,7 @@ public:
|
||||
{ return m_evaluator.coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
|
||||
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketReturnType packetByOuterInner(Index outer, Index inner) const
|
||||
PacketType packetByOuterInner(Index outer, Index inner) const
|
||||
{ return m_evaluator.template packet<LoadMode,PacketType>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
|
||||
|
||||
const XprType & nestedExpression() const { return m_xpr; }
|
||||
@@ -423,7 +425,7 @@ template<typename Derived>
|
||||
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
|
||||
DenseBase<Derived>::minCoeff() const
|
||||
{
|
||||
return derived().redux(Eigen::internal::scalar_min_op<Scalar>());
|
||||
return derived().redux(Eigen::internal::scalar_min_op<Scalar,Scalar>());
|
||||
}
|
||||
|
||||
/** \returns the maximum of all coefficients of \c *this.
|
||||
@@ -433,10 +435,12 @@ template<typename Derived>
|
||||
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
|
||||
DenseBase<Derived>::maxCoeff() const
|
||||
{
|
||||
return derived().redux(Eigen::internal::scalar_max_op<Scalar>());
|
||||
return derived().redux(Eigen::internal::scalar_max_op<Scalar,Scalar>());
|
||||
}
|
||||
|
||||
/** \returns the sum of all coefficients of *this
|
||||
/** \returns the sum of all coefficients of \c *this
|
||||
*
|
||||
* If \c *this is empty, then the value 0 is returned.
|
||||
*
|
||||
* \sa trace(), prod(), mean()
|
||||
*/
|
||||
@@ -446,7 +450,7 @@ DenseBase<Derived>::sum() const
|
||||
{
|
||||
if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
|
||||
return Scalar(0);
|
||||
return derived().redux(Eigen::internal::scalar_sum_op<Scalar>());
|
||||
return derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>());
|
||||
}
|
||||
|
||||
/** \returns the mean of all coefficients of *this
|
||||
@@ -457,7 +461,14 @@ template<typename Derived>
|
||||
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
|
||||
DenseBase<Derived>::mean() const
|
||||
{
|
||||
return Scalar(derived().redux(Eigen::internal::scalar_sum_op<Scalar>())) / Scalar(this->size());
|
||||
#ifdef __INTEL_COMPILER
|
||||
#pragma warning push
|
||||
#pragma warning ( disable : 2259 )
|
||||
#endif
|
||||
return Scalar(derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>())) / Scalar(this->size());
|
||||
#ifdef __INTEL_COMPILER
|
||||
#pragma warning pop
|
||||
#endif
|
||||
}
|
||||
|
||||
/** \returns the product of all coefficients of *this
|
||||
|
||||
@@ -12,76 +12,6 @@
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class Ref
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief A matrix or vector expression mapping an existing expression
|
||||
*
|
||||
* \tparam PlainObjectType the equivalent matrix type of the mapped data
|
||||
* \tparam MapOptions specifies the pointer alignment in bytes. It can be: \c #Aligned128, , \c #Aligned64, \c #Aligned32, \c #Aligned16, \c #Aligned8 or \c #Unaligned.
|
||||
* The default is \c #Unaligned.
|
||||
* \tparam StrideType optionally specifies strides. By default, Ref implies a contiguous storage along the inner dimension (inner stride==1),
|
||||
* but accepts a variable outer stride (leading dimension).
|
||||
* This can be overridden by specifying strides.
|
||||
* The type passed here must be a specialization of the Stride template, see examples below.
|
||||
*
|
||||
* This class provides a way to write non-template functions taking Eigen objects as parameters while limiting the number of copies.
|
||||
* A Ref<> object can represent either a const expression or a l-value:
|
||||
* \code
|
||||
* // in-out argument:
|
||||
* void foo1(Ref<VectorXf> x);
|
||||
*
|
||||
* // read-only const argument:
|
||||
* void foo2(const Ref<const VectorXf>& x);
|
||||
* \endcode
|
||||
*
|
||||
* In the in-out case, the input argument must satisfy the constraints of the actual Ref<> type, otherwise a compilation issue will be triggered.
|
||||
* By default, a Ref<VectorXf> can reference any dense vector expression of float having a contiguous memory layout.
|
||||
* Likewise, a Ref<MatrixXf> can reference any column-major dense matrix expression of float whose column's elements are contiguously stored with
|
||||
* the possibility to have a constant space in-between each column, i.e. the inner stride must be equal to 1, but the outer stride (or leading dimension)
|
||||
* can be greater than the number of rows.
|
||||
*
|
||||
* In the const case, if the input expression does not match the above requirement, then it is evaluated into a temporary before being passed to the function.
|
||||
* Here are some examples:
|
||||
* \code
|
||||
* MatrixXf A;
|
||||
* VectorXf a;
|
||||
* foo1(a.head()); // OK
|
||||
* foo1(A.col()); // OK
|
||||
* foo1(A.row()); // Compilation error because here innerstride!=1
|
||||
* foo2(A.row()); // Compilation error because A.row() is a 1xN object while foo2 is expecting a Nx1 object
|
||||
* foo2(A.row().transpose()); // The row is copied into a contiguous temporary
|
||||
* foo2(2*a); // The expression is evaluated into a temporary
|
||||
* foo2(A.col().segment(2,4)); // No temporary
|
||||
* \endcode
|
||||
*
|
||||
* The range of inputs that can be referenced without temporary can be enlarged using the last two template parameters.
|
||||
* Here is an example accepting an innerstride!=1:
|
||||
* \code
|
||||
* // in-out argument:
|
||||
* void foo3(Ref<VectorXf,0,InnerStride<> > x);
|
||||
* foo3(A.row()); // OK
|
||||
* \endcode
|
||||
* The downside here is that the function foo3 might be significantly slower than foo1 because it won't be able to exploit vectorization, and will involve more
|
||||
* expensive address computations even if the input is contiguously stored in memory. To overcome this issue, one might propose to overload internally calling a
|
||||
* template function, e.g.:
|
||||
* \code
|
||||
* // in the .h:
|
||||
* void foo(const Ref<MatrixXf>& A);
|
||||
* void foo(const Ref<MatrixXf,0,Stride<> >& A);
|
||||
*
|
||||
* // in the .cpp:
|
||||
* template<typename TypeOfA> void foo_impl(const TypeOfA& A) {
|
||||
* ... // crazy code goes here
|
||||
* }
|
||||
* void foo(const Ref<MatrixXf>& A) { foo_impl(A); }
|
||||
* void foo(const Ref<MatrixXf,0,Stride<> >& A) { foo_impl(A); }
|
||||
* \endcode
|
||||
*
|
||||
*
|
||||
* \sa PlainObjectBase::Map(), \ref TopicStorageOrders
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename _PlainObjectType, int _Options, typename _StrideType>
|
||||
@@ -105,7 +35,13 @@ struct traits<Ref<_PlainObjectType, _Options, _StrideType> >
|
||||
|| (int(StrideType::InnerStrideAtCompileTime)==0 && int(Derived::InnerStrideAtCompileTime)==1),
|
||||
OuterStrideMatch = Derived::IsVectorAtCompileTime
|
||||
|| int(StrideType::OuterStrideAtCompileTime)==int(Dynamic) || int(StrideType::OuterStrideAtCompileTime)==int(Derived::OuterStrideAtCompileTime),
|
||||
AlignmentMatch = (int(traits<PlainObjectType>::Alignment)==int(Unaligned)) || (int(evaluator<Derived>::Alignment) >= int(Alignment)), // FIXME the first condition is not very clear, it should be replaced by the required alignment
|
||||
// NOTE, this indirection of evaluator<Derived>::Alignment is needed
|
||||
// to workaround a very strange bug in MSVC related to the instantiation
|
||||
// of has_*ary_operator in evaluator<CwiseNullaryOp>.
|
||||
// This line is surprisingly very sensitive. For instance, simply adding parenthesis
|
||||
// as "DerivedAlignment = (int(evaluator<Derived>::Alignment))," will make MSVC fail...
|
||||
DerivedAlignment = int(evaluator<Derived>::Alignment),
|
||||
AlignmentMatch = (int(traits<PlainObjectType>::Alignment)==int(Unaligned)) || (DerivedAlignment >= int(Alignment)), // FIXME the first condition is not very clear, it should be replaced by the required alignment
|
||||
ScalarTypeMatch = internal::is_same<typename PlainObjectType::Scalar, typename Derived::Scalar>::value,
|
||||
MatchAtCompileTime = HasDirectAccess && StorageOrderMatch && InnerStrideMatch && OuterStrideMatch && AlignmentMatch && ScalarTypeMatch
|
||||
};
|
||||
@@ -182,7 +118,75 @@ protected:
|
||||
StrideBase m_stride;
|
||||
};
|
||||
|
||||
|
||||
/** \class Ref
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief A matrix or vector expression mapping an existing expression
|
||||
*
|
||||
* \tparam PlainObjectType the equivalent matrix type of the mapped data
|
||||
* \tparam Options specifies the pointer alignment in bytes. It can be: \c #Aligned128, , \c #Aligned64, \c #Aligned32, \c #Aligned16, \c #Aligned8 or \c #Unaligned.
|
||||
* The default is \c #Unaligned.
|
||||
* \tparam StrideType optionally specifies strides. By default, Ref implies a contiguous storage along the inner dimension (inner stride==1),
|
||||
* but accepts a variable outer stride (leading dimension).
|
||||
* This can be overridden by specifying strides.
|
||||
* The type passed here must be a specialization of the Stride template, see examples below.
|
||||
*
|
||||
* This class provides a way to write non-template functions taking Eigen objects as parameters while limiting the number of copies.
|
||||
* A Ref<> object can represent either a const expression or a l-value:
|
||||
* \code
|
||||
* // in-out argument:
|
||||
* void foo1(Ref<VectorXf> x);
|
||||
*
|
||||
* // read-only const argument:
|
||||
* void foo2(const Ref<const VectorXf>& x);
|
||||
* \endcode
|
||||
*
|
||||
* In the in-out case, the input argument must satisfy the constraints of the actual Ref<> type, otherwise a compilation issue will be triggered.
|
||||
* By default, a Ref<VectorXf> can reference any dense vector expression of float having a contiguous memory layout.
|
||||
* Likewise, a Ref<MatrixXf> can reference any column-major dense matrix expression of float whose column's elements are contiguously stored with
|
||||
* the possibility to have a constant space in-between each column, i.e. the inner stride must be equal to 1, but the outer stride (or leading dimension)
|
||||
* can be greater than the number of rows.
|
||||
*
|
||||
* In the const case, if the input expression does not match the above requirement, then it is evaluated into a temporary before being passed to the function.
|
||||
* Here are some examples:
|
||||
* \code
|
||||
* MatrixXf A;
|
||||
* VectorXf a;
|
||||
* foo1(a.head()); // OK
|
||||
* foo1(A.col()); // OK
|
||||
* foo1(A.row()); // Compilation error because here innerstride!=1
|
||||
* foo2(A.row()); // Compilation error because A.row() is a 1xN object while foo2 is expecting a Nx1 object
|
||||
* foo2(A.row().transpose()); // The row is copied into a contiguous temporary
|
||||
* foo2(2*a); // The expression is evaluated into a temporary
|
||||
* foo2(A.col().segment(2,4)); // No temporary
|
||||
* \endcode
|
||||
*
|
||||
* The range of inputs that can be referenced without temporary can be enlarged using the last two template parameters.
|
||||
* Here is an example accepting an innerstride!=1:
|
||||
* \code
|
||||
* // in-out argument:
|
||||
* void foo3(Ref<VectorXf,0,InnerStride<> > x);
|
||||
* foo3(A.row()); // OK
|
||||
* \endcode
|
||||
* The downside here is that the function foo3 might be significantly slower than foo1 because it won't be able to exploit vectorization, and will involve more
|
||||
* expensive address computations even if the input is contiguously stored in memory. To overcome this issue, one might propose to overload internally calling a
|
||||
* template function, e.g.:
|
||||
* \code
|
||||
* // in the .h:
|
||||
* void foo(const Ref<MatrixXf>& A);
|
||||
* void foo(const Ref<MatrixXf,0,Stride<> >& A);
|
||||
*
|
||||
* // in the .cpp:
|
||||
* template<typename TypeOfA> void foo_impl(const TypeOfA& A) {
|
||||
* ... // crazy code goes here
|
||||
* }
|
||||
* void foo(const Ref<MatrixXf>& A) { foo_impl(A); }
|
||||
* void foo(const Ref<MatrixXf,0,Stride<> >& A) { foo_impl(A); }
|
||||
* \endcode
|
||||
*
|
||||
*
|
||||
* \sa PlainObjectBase::Map(), \ref TopicStorageOrders
|
||||
*/
|
||||
template<typename PlainObjectType, int Options, typename StrideType> class Ref
|
||||
: public RefBase<Ref<PlainObjectType, Options, StrideType> >
|
||||
{
|
||||
@@ -209,6 +213,7 @@ template<typename PlainObjectType, int Options, typename StrideType> class Ref
|
||||
EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr,
|
||||
typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0)
|
||||
#else
|
||||
/** Implicit constructor from any dense expression */
|
||||
template<typename Derived>
|
||||
inline Ref(DenseBase<Derived>& expr)
|
||||
#endif
|
||||
@@ -263,7 +268,7 @@ template<typename TPlainObjectType, int Options, typename StrideType> class Ref<
|
||||
template<typename Expression>
|
||||
EIGEN_DEVICE_FUNC void construct(const Expression& expr, internal::false_type)
|
||||
{
|
||||
internal::call_assignment_no_alias(m_object,expr,internal::assign_op<Scalar>());
|
||||
internal::call_assignment_no_alias(m_object,expr,internal::assign_op<Scalar,Scalar>());
|
||||
Base::construct(m_object);
|
||||
}
|
||||
|
||||
|
||||
@@ -12,21 +12,6 @@
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/**
|
||||
* \class Replicate
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of the multiple replication of a matrix or vector
|
||||
*
|
||||
* \param MatrixType the type of the object we are replicating
|
||||
*
|
||||
* This class represents an expression of the multiple replication of a matrix or vector.
|
||||
* It is the return type of DenseBase::replicate() and most of the time
|
||||
* this is the only way it is used.
|
||||
*
|
||||
* \sa DenseBase::replicate()
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
template<typename MatrixType,int RowFactor,int ColFactor>
|
||||
struct traits<Replicate<MatrixType,RowFactor,ColFactor> >
|
||||
@@ -57,6 +42,22 @@ struct traits<Replicate<MatrixType,RowFactor,ColFactor> >
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* \class Replicate
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of the multiple replication of a matrix or vector
|
||||
*
|
||||
* \tparam MatrixType the type of the object we are replicating
|
||||
* \tparam RowFactor number of repetitions at compile time along the vertical direction, can be Dynamic.
|
||||
* \tparam ColFactor number of repetitions at compile time along the horizontal direction, can be Dynamic.
|
||||
*
|
||||
* This class represents an expression of the multiple replication of a matrix or vector.
|
||||
* It is the return type of DenseBase::replicate() and most of the time
|
||||
* this is the only way it is used.
|
||||
*
|
||||
* \sa DenseBase::replicate()
|
||||
*/
|
||||
template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
|
||||
: public internal::dense_xpr_base< Replicate<MatrixType,RowFactor,ColFactor> >::type
|
||||
{
|
||||
|
||||
@@ -13,11 +13,6 @@
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class ReturnByValue
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename Derived>
|
||||
@@ -48,6 +43,10 @@ struct nested_eval<ReturnByValue<Derived>, n, PlainObject>
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/** \class ReturnByValue
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
*/
|
||||
template<typename Derived> class ReturnByValue
|
||||
: public internal::dense_xpr_base< ReturnByValue<Derived> >::type, internal::no_assignment_operator
|
||||
{
|
||||
|
||||
@@ -14,20 +14,6 @@
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class Reverse
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of the reverse of a vector or matrix
|
||||
*
|
||||
* \param MatrixType the type of the object of which we are taking the reverse
|
||||
*
|
||||
* This class represents an expression of the reverse of a vector.
|
||||
* It is the return type of MatrixBase::reverse() and VectorwiseOp::reverse()
|
||||
* and most of the time this is the only way it is used.
|
||||
*
|
||||
* \sa MatrixBase::reverse(), VectorwiseOp::reverse()
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename MatrixType, int Direction>
|
||||
@@ -60,6 +46,20 @@ template<typename PacketType> struct reverse_packet_cond<PacketType,false>
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/** \class Reverse
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of the reverse of a vector or matrix
|
||||
*
|
||||
* \tparam MatrixType the type of the object of which we are taking the reverse
|
||||
* \tparam Direction defines the direction of the reverse operation, can be Vertical, Horizontal, or BothDirections
|
||||
*
|
||||
* This class represents an expression of the reverse of a vector.
|
||||
* It is the return type of MatrixBase::reverse() and VectorwiseOp::reverse()
|
||||
* and most of the time this is the only way it is used.
|
||||
*
|
||||
* \sa MatrixBase::reverse(), VectorwiseOp::reverse()
|
||||
*/
|
||||
template<typename MatrixType, int Direction> class Reverse
|
||||
: public internal::dense_xpr_base< Reverse<MatrixType, Direction> >::type
|
||||
{
|
||||
|
||||
@@ -32,7 +32,7 @@ namespace internal {
|
||||
template<typename MatrixType, unsigned int UpLo>
|
||||
struct traits<SelfAdjointView<MatrixType, UpLo> > : traits<MatrixType>
|
||||
{
|
||||
typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
|
||||
typedef typename ref_selector<MatrixType>::non_const_type MatrixTypeNested;
|
||||
typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
|
||||
typedef MatrixType ExpressionType;
|
||||
typedef typename MatrixType::PlainObject FullMatrixType;
|
||||
@@ -45,7 +45,7 @@ struct traits<SelfAdjointView<MatrixType, UpLo> > : traits<MatrixType>
|
||||
};
|
||||
}
|
||||
|
||||
// FIXME could also be called SelfAdjointWrapper to be consistent with DiagonalWrapper ??
|
||||
|
||||
template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
|
||||
: public TriangularBase<SelfAdjointView<_MatrixType, UpLo> >
|
||||
{
|
||||
@@ -55,14 +55,17 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
|
||||
typedef TriangularBase<SelfAdjointView> Base;
|
||||
typedef typename internal::traits<SelfAdjointView>::MatrixTypeNested MatrixTypeNested;
|
||||
typedef typename internal::traits<SelfAdjointView>::MatrixTypeNestedCleaned MatrixTypeNestedCleaned;
|
||||
typedef MatrixTypeNestedCleaned NestedExpression;
|
||||
|
||||
/** \brief The type of coefficients in this matrix */
|
||||
typedef typename internal::traits<SelfAdjointView>::Scalar Scalar;
|
||||
typedef typename MatrixType::StorageIndex StorageIndex;
|
||||
typedef typename internal::remove_all<typename MatrixType::ConjugateReturnType>::type MatrixConjugateReturnType;
|
||||
|
||||
enum {
|
||||
Mode = internal::traits<SelfAdjointView>::Mode,
|
||||
Flags = internal::traits<SelfAdjointView>::Flags
|
||||
Flags = internal::traits<SelfAdjointView>::Flags,
|
||||
TransposeMode = ((Mode & Upper) ? Lower : 0) | ((Mode & Lower) ? Upper : 0)
|
||||
};
|
||||
typedef typename MatrixType::PlainObject PlainObject;
|
||||
|
||||
@@ -97,7 +100,7 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_LVALUE(SelfAdjointView);
|
||||
Base::check_coordinates_internal(row, col);
|
||||
return m_matrix.const_cast_derived().coeffRef(row, col);
|
||||
return m_matrix.coeffRef(row, col);
|
||||
}
|
||||
|
||||
/** \internal */
|
||||
@@ -107,7 +110,7 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
|
||||
EIGEN_DEVICE_FUNC
|
||||
const MatrixTypeNestedCleaned& nestedExpression() const { return m_matrix; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
MatrixTypeNestedCleaned& nestedExpression() { return *const_cast<MatrixTypeNestedCleaned*>(&m_matrix); }
|
||||
MatrixTypeNestedCleaned& nestedExpression() { return m_matrix; }
|
||||
|
||||
/** Efficient triangular matrix times vector/matrix product */
|
||||
template<typename OtherDerived>
|
||||
@@ -128,7 +131,7 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
|
||||
}
|
||||
|
||||
friend EIGEN_DEVICE_FUNC
|
||||
const SelfAdjointView<const CwiseUnaryOp<internal::scalar_multiple_op<Scalar>,MatrixType>,UpLo>
|
||||
const SelfAdjointView<const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,MatrixType,product),UpLo>
|
||||
operator*(const Scalar& s, const SelfAdjointView& mat)
|
||||
{
|
||||
return (s*mat.nestedExpression()).template selfadjointView<UpLo>();
|
||||
@@ -162,6 +165,71 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
|
||||
EIGEN_DEVICE_FUNC
|
||||
SelfAdjointView& rankUpdate(const MatrixBase<DerivedU>& u, const Scalar& alpha = Scalar(1));
|
||||
|
||||
/** \returns an expression of a triangular view extracted from the current selfadjoint view of a given triangular part
|
||||
*
|
||||
* The parameter \a TriMode can have the following values: \c #Upper, \c #StrictlyUpper, \c #UnitUpper,
|
||||
* \c #Lower, \c #StrictlyLower, \c #UnitLower.
|
||||
*
|
||||
* If \c TriMode references the same triangular part than \c *this, then this method simply return a \c TriangularView of the nested expression,
|
||||
* otherwise, the nested expression is first transposed, thus returning a \c TriangularView<Transpose<MatrixType>> object.
|
||||
*
|
||||
* \sa MatrixBase::triangularView(), class TriangularView
|
||||
*/
|
||||
template<unsigned int TriMode>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename internal::conditional<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)),
|
||||
TriangularView<MatrixType,TriMode>,
|
||||
TriangularView<typename MatrixType::AdjointReturnType,TriMode> >::type
|
||||
triangularView() const
|
||||
{
|
||||
typename internal::conditional<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)), MatrixType&, typename MatrixType::ConstTransposeReturnType>::type tmp1(m_matrix);
|
||||
typename internal::conditional<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)), MatrixType&, typename MatrixType::AdjointReturnType>::type tmp2(tmp1);
|
||||
return typename internal::conditional<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)),
|
||||
TriangularView<MatrixType,TriMode>,
|
||||
TriangularView<typename MatrixType::AdjointReturnType,TriMode> >::type(tmp2);
|
||||
}
|
||||
|
||||
typedef SelfAdjointView<const MatrixConjugateReturnType,Mode> ConjugateReturnType;
|
||||
/** \sa MatrixBase::conjugate() const */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const ConjugateReturnType conjugate() const
|
||||
{ return ConjugateReturnType(m_matrix.conjugate()); }
|
||||
|
||||
typedef SelfAdjointView<const typename MatrixType::AdjointReturnType,TransposeMode> AdjointReturnType;
|
||||
/** \sa MatrixBase::adjoint() const */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const AdjointReturnType adjoint() const
|
||||
{ return AdjointReturnType(m_matrix.adjoint()); }
|
||||
|
||||
typedef SelfAdjointView<typename MatrixType::TransposeReturnType,TransposeMode> TransposeReturnType;
|
||||
/** \sa MatrixBase::transpose() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline TransposeReturnType transpose()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
|
||||
typename MatrixType::TransposeReturnType tmp(m_matrix);
|
||||
return TransposeReturnType(tmp);
|
||||
}
|
||||
|
||||
typedef SelfAdjointView<const typename MatrixType::ConstTransposeReturnType,TransposeMode> ConstTransposeReturnType;
|
||||
/** \sa MatrixBase::transpose() const */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const ConstTransposeReturnType transpose() const
|
||||
{
|
||||
return ConstTransposeReturnType(m_matrix.transpose());
|
||||
}
|
||||
|
||||
/** \returns a const expression of the main diagonal of the matrix \c *this
|
||||
*
|
||||
* This method simply returns the diagonal of the nested expression, thus by-passing the SelfAdjointView decorator.
|
||||
*
|
||||
* \sa MatrixBase::diagonal(), class Diagonal */
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename MatrixType::ConstDiagonalReturnType diagonal() const
|
||||
{
|
||||
return typename MatrixType::ConstDiagonalReturnType(m_matrix);
|
||||
}
|
||||
|
||||
/////////// Cholesky module ///////////
|
||||
|
||||
const LLT<PlainObject, UpLo> llt() const;
|
||||
@@ -203,8 +271,6 @@ struct evaluator_traits<SelfAdjointView<MatrixType,Mode> >
|
||||
{
|
||||
typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind;
|
||||
typedef SelfAdjointShape Shape;
|
||||
|
||||
static const int AssumeAliasing = 0;
|
||||
};
|
||||
|
||||
template<int UpLo, int SetOpposite, typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version>
|
||||
@@ -253,6 +319,7 @@ public:
|
||||
* Implementation of MatrixBase methods
|
||||
***************************************************************************/
|
||||
|
||||
/** This is the const version of MatrixBase::selfadjointView() */
|
||||
template<typename Derived>
|
||||
template<unsigned int UpLo>
|
||||
typename MatrixBase<Derived>::template ConstSelfAdjointViewReturnType<UpLo>::Type
|
||||
@@ -261,6 +328,15 @@ MatrixBase<Derived>::selfadjointView() const
|
||||
return typename ConstSelfAdjointViewReturnType<UpLo>::Type(derived());
|
||||
}
|
||||
|
||||
/** \returns an expression of a symmetric/self-adjoint view extracted from the upper or lower triangular part of the current matrix
|
||||
*
|
||||
* The parameter \a UpLo can be either \c #Upper or \c #Lower
|
||||
*
|
||||
* Example: \include MatrixBase_selfadjointView.cpp
|
||||
* Output: \verbinclude MatrixBase_selfadjointView.out
|
||||
*
|
||||
* \sa class SelfAdjointView
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<unsigned int UpLo>
|
||||
typename MatrixBase<Derived>::template SelfAdjointViewReturnType<UpLo>::Type
|
||||
|
||||
@@ -12,35 +12,37 @@
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
// TODO generalize the scalar type of 'other'
|
||||
|
||||
template<typename Derived>
|
||||
inline Derived& DenseBase<Derived>::operator*=(const Scalar& other)
|
||||
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(const Scalar& other)
|
||||
{
|
||||
typedef typename Derived::PlainObject PlainObject;
|
||||
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op<Scalar>());
|
||||
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op<Scalar,Scalar>());
|
||||
return derived();
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
inline Derived& ArrayBase<Derived>::operator+=(const Scalar& other)
|
||||
EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator+=(const Scalar& other)
|
||||
{
|
||||
typedef typename Derived::PlainObject PlainObject;
|
||||
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op<Scalar>());
|
||||
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op<Scalar,Scalar>());
|
||||
return derived();
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
inline Derived& ArrayBase<Derived>::operator-=(const Scalar& other)
|
||||
EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator-=(const Scalar& other)
|
||||
{
|
||||
typedef typename Derived::PlainObject PlainObject;
|
||||
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op<Scalar>());
|
||||
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op<Scalar,Scalar>());
|
||||
return derived();
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
inline Derived& DenseBase<Derived>::operator/=(const Scalar& other)
|
||||
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator/=(const Scalar& other)
|
||||
{
|
||||
typedef typename Derived::PlainObject PlainObject;
|
||||
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op<Scalar>());
|
||||
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op<Scalar,Scalar>());
|
||||
return derived();
|
||||
}
|
||||
|
||||
|
||||
@@ -134,34 +134,49 @@ protected:
|
||||
// Specialization for "dst = dec.solve(rhs)"
|
||||
// NOTE we need to specialize it for Dense2Dense to avoid ambiguous specialization error and a Sparse2Sparse specialization must exist somewhere
|
||||
template<typename DstXprType, typename DecType, typename RhsType, typename Scalar>
|
||||
struct Assignment<DstXprType, Solve<DecType,RhsType>, internal::assign_op<Scalar>, Dense2Dense, Scalar>
|
||||
struct Assignment<DstXprType, Solve<DecType,RhsType>, internal::assign_op<Scalar,Scalar>, Dense2Dense>
|
||||
{
|
||||
typedef Solve<DecType,RhsType> SrcXprType;
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
|
||||
{
|
||||
// FIXME shall we resize dst here?
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
|
||||
src.dec()._solve_impl(src.rhs(), dst);
|
||||
}
|
||||
};
|
||||
|
||||
// Specialization for "dst = dec.transpose().solve(rhs)"
|
||||
template<typename DstXprType, typename DecType, typename RhsType, typename Scalar>
|
||||
struct Assignment<DstXprType, Solve<Transpose<const DecType>,RhsType>, internal::assign_op<Scalar>, Dense2Dense, Scalar>
|
||||
struct Assignment<DstXprType, Solve<Transpose<const DecType>,RhsType>, internal::assign_op<Scalar,Scalar>, Dense2Dense>
|
||||
{
|
||||
typedef Solve<Transpose<const DecType>,RhsType> SrcXprType;
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
|
||||
{
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
|
||||
src.dec().nestedExpression().template _solve_impl_transposed<false>(src.rhs(), dst);
|
||||
}
|
||||
};
|
||||
|
||||
// Specialization for "dst = dec.adjoint().solve(rhs)"
|
||||
template<typename DstXprType, typename DecType, typename RhsType, typename Scalar>
|
||||
struct Assignment<DstXprType, Solve<CwiseUnaryOp<internal::scalar_conjugate_op<typename DecType::Scalar>, const Transpose<const DecType> >,RhsType>, internal::assign_op<Scalar>, Dense2Dense, Scalar>
|
||||
struct Assignment<DstXprType, Solve<CwiseUnaryOp<internal::scalar_conjugate_op<typename DecType::Scalar>, const Transpose<const DecType> >,RhsType>,
|
||||
internal::assign_op<Scalar,Scalar>, Dense2Dense>
|
||||
{
|
||||
typedef Solve<CwiseUnaryOp<internal::scalar_conjugate_op<typename DecType::Scalar>, const Transpose<const DecType> >,RhsType> SrcXprType;
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
|
||||
{
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
|
||||
src.dec().nestedExpression().nestedExpression().template _solve_impl_transposed<true>(src.rhs(), dst);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -161,6 +161,7 @@ struct triangular_solver_selector<Lhs,Rhs,OnTheRight,Mode,CompleteUnrolling,1> {
|
||||
* TriangularView methods
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename MatrixType, unsigned int Mode>
|
||||
template<int Side, typename OtherDerived>
|
||||
void TriangularViewImpl<MatrixType,Mode,Dense>::solveInPlace(const MatrixBase<OtherDerived>& _other) const
|
||||
@@ -169,7 +170,7 @@ void TriangularViewImpl<MatrixType,Mode,Dense>::solveInPlace(const MatrixBase<Ot
|
||||
eigen_assert( derived().cols() == derived().rows() && ((Side==OnTheLeft && derived().cols() == other.rows()) || (Side==OnTheRight && derived().cols() == other.cols())) );
|
||||
eigen_assert((!(Mode & ZeroDiag)) && bool(Mode & (Upper|Lower)));
|
||||
|
||||
enum { copy = internal::traits<OtherDerived>::Flags & RowMajorBit && OtherDerived::IsVectorAtCompileTime };
|
||||
enum { copy = (internal::traits<OtherDerived>::Flags & RowMajorBit) && OtherDerived::IsVectorAtCompileTime && OtherDerived::SizeAtCompileTime!=1};
|
||||
typedef typename internal::conditional<copy,
|
||||
typename internal::plain_matrix_type_column_major<OtherDerived>::type, OtherDerived&>::type OtherCopy;
|
||||
OtherCopy otherCopy(other);
|
||||
@@ -188,6 +189,7 @@ TriangularViewImpl<Derived,Mode,Dense>::solve(const MatrixBase<Other>& other) co
|
||||
{
|
||||
return internal::triangular_solve_retval<Side,TriangularViewType,Other>(derived(), other.derived());
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace internal {
|
||||
|
||||
@@ -213,7 +215,7 @@ template<int Side, typename TriangularType, typename Rhs> struct triangular_solv
|
||||
|
||||
template<typename Dest> inline void evalTo(Dest& dst) const
|
||||
{
|
||||
if(!(is_same<RhsNestedCleaned,Dest>::value && extract_data(dst) == extract_data(m_rhs)))
|
||||
if(!is_same_dense(dst,m_rhs))
|
||||
dst = m_rhs;
|
||||
m_triangularMatrix.template solveInPlace<Side>(dst);
|
||||
}
|
||||
|
||||
@@ -1,160 +0,0 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2015 Eugene Brevdo <ebrevdo@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_SPECIAL_FUNCTIONS_H
|
||||
#define EIGEN_SPECIAL_FUNCTIONS_H
|
||||
|
||||
namespace Eigen {
|
||||
namespace internal {
|
||||
|
||||
/****************************************************************************
|
||||
* Implementation of lgamma *
|
||||
****************************************************************************/
|
||||
|
||||
template<typename Scalar>
|
||||
struct lgamma_impl
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE Scalar run(const Scalar&)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
|
||||
THIS_TYPE_IS_NOT_SUPPORTED);
|
||||
return Scalar(0);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Scalar>
|
||||
struct lgamma_retval
|
||||
{
|
||||
typedef Scalar type;
|
||||
};
|
||||
|
||||
#ifdef EIGEN_HAS_C99_MATH
|
||||
template<>
|
||||
struct lgamma_impl<float>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE double run(const float& x) { return ::lgammaf(x); }
|
||||
};
|
||||
|
||||
template<>
|
||||
struct lgamma_impl<double>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE double run(const double& x) { return ::lgamma(x); }
|
||||
};
|
||||
#endif
|
||||
|
||||
/****************************************************************************
|
||||
* Implementation of erf *
|
||||
****************************************************************************/
|
||||
|
||||
template<typename Scalar>
|
||||
struct erf_impl
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE Scalar run(const Scalar&)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
|
||||
THIS_TYPE_IS_NOT_SUPPORTED);
|
||||
return Scalar(0);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Scalar>
|
||||
struct erf_retval
|
||||
{
|
||||
typedef Scalar type;
|
||||
};
|
||||
|
||||
#ifdef EIGEN_HAS_C99_MATH
|
||||
template<>
|
||||
struct erf_impl<float>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE float run(const float& x) { return ::erff(x); }
|
||||
};
|
||||
|
||||
template<>
|
||||
struct erf_impl<double>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE double run(const double& x) { return ::erf(x); }
|
||||
};
|
||||
#endif // EIGEN_HAS_C99_MATH
|
||||
|
||||
/***************************************************************************
|
||||
* Implementation of erfc *
|
||||
****************************************************************************/
|
||||
|
||||
template<typename Scalar>
|
||||
struct erfc_impl
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE Scalar run(const Scalar&)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
|
||||
THIS_TYPE_IS_NOT_SUPPORTED);
|
||||
return Scalar(0);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Scalar>
|
||||
struct erfc_retval
|
||||
{
|
||||
typedef Scalar type;
|
||||
};
|
||||
|
||||
#ifdef EIGEN_HAS_C99_MATH
|
||||
template<>
|
||||
struct erfc_impl<float>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE float run(const float x) { return ::erfcf(x); }
|
||||
};
|
||||
|
||||
template<>
|
||||
struct erfc_impl<double>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE double run(const double x) { return ::erfc(x); }
|
||||
};
|
||||
#endif // EIGEN_HAS_C99_MATH
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
|
||||
namespace numext {
|
||||
|
||||
template<typename Scalar>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline EIGEN_MATHFUNC_RETVAL(lgamma, Scalar) lgamma(const Scalar& x)
|
||||
{
|
||||
return EIGEN_MATHFUNC_IMPL(lgamma, Scalar)::run(x);
|
||||
}
|
||||
|
||||
template<typename Scalar>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline EIGEN_MATHFUNC_RETVAL(erf, Scalar) erf(const Scalar& x)
|
||||
{
|
||||
return EIGEN_MATHFUNC_IMPL(erf, Scalar)::run(x);
|
||||
}
|
||||
|
||||
template<typename Scalar>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline EIGEN_MATHFUNC_RETVAL(erfc, Scalar) erfc(const Scalar& x)
|
||||
{
|
||||
return EIGEN_MATHFUNC_IMPL(erfc, Scalar)::run(x);
|
||||
}
|
||||
|
||||
} // end namespace numext
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_SPECIAL_FUNCTIONS_H
|
||||
@@ -168,11 +168,12 @@ MatrixBase<Derived>::stableNorm() const
|
||||
DerivedCopy copy(derived());
|
||||
|
||||
enum {
|
||||
CanAlign = (int(Flags)&DirectAccessBit) || (int(internal::evaluator<DerivedCopyClean>::Alignment)>0) // FIXME
|
||||
CanAlign = ( (int(DerivedCopyClean::Flags)&DirectAccessBit)
|
||||
|| (int(internal::evaluator<DerivedCopyClean>::Alignment)>0) // FIXME Alignment)>0 might not be enough
|
||||
) && (blockSize*sizeof(Scalar)*2<EIGEN_STACK_ALLOCATION_LIMIT) // ifwe cannot allocate on the stack, then let's not bother about this optimization
|
||||
};
|
||||
typedef typename internal::conditional<CanAlign, Ref<const Matrix<Scalar,Dynamic,1,0,blockSize,1>, internal::evaluator<DerivedCopyClean>::Alignment>,
|
||||
typename DerivedCopyClean
|
||||
::ConstSegmentReturnType>::type SegmentWrapper;
|
||||
typename DerivedCopyClean::ConstSegmentReturnType>::type SegmentWrapper;
|
||||
Index n = size();
|
||||
|
||||
if(n==1)
|
||||
|
||||
@@ -31,8 +31,8 @@ namespace Eigen {
|
||||
* arguments to the constructor.
|
||||
*
|
||||
* Indeed, this class takes two template parameters:
|
||||
* \param _OuterStrideAtCompileTime the outer stride, or Dynamic if you want to specify it at runtime.
|
||||
* \param _InnerStrideAtCompileTime the inner stride, or Dynamic if you want to specify it at runtime.
|
||||
* \tparam _OuterStrideAtCompileTime the outer stride, or Dynamic if you want to specify it at runtime.
|
||||
* \tparam _InnerStrideAtCompileTime the inner stride, or Dynamic if you want to specify it at runtime.
|
||||
*
|
||||
* Here is an example:
|
||||
* \include Map_general_stride.cpp
|
||||
|
||||
@@ -13,20 +13,6 @@
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class Transpose
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of the transpose of a matrix
|
||||
*
|
||||
* \param MatrixType the type of the object of which we are taking the transpose
|
||||
*
|
||||
* This class represents an expression of the transpose of a matrix.
|
||||
* It is the return type of MatrixBase::transpose() and MatrixBase::adjoint()
|
||||
* and most of the time this is the only way it is used.
|
||||
*
|
||||
* \sa MatrixBase::transpose(), MatrixBase::adjoint()
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
template<typename MatrixType>
|
||||
struct traits<Transpose<MatrixType> > : public traits<MatrixType>
|
||||
@@ -50,11 +36,26 @@ struct traits<Transpose<MatrixType> > : public traits<MatrixType>
|
||||
|
||||
template<typename MatrixType, typename StorageKind> class TransposeImpl;
|
||||
|
||||
/** \class Transpose
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of the transpose of a matrix
|
||||
*
|
||||
* \tparam MatrixType the type of the object of which we are taking the transpose
|
||||
*
|
||||
* This class represents an expression of the transpose of a matrix.
|
||||
* It is the return type of MatrixBase::transpose() and MatrixBase::adjoint()
|
||||
* and most of the time this is the only way it is used.
|
||||
*
|
||||
* \sa MatrixBase::transpose(), MatrixBase::adjoint()
|
||||
*/
|
||||
template<typename MatrixType> class Transpose
|
||||
: public TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind>
|
||||
{
|
||||
public:
|
||||
|
||||
typedef typename internal::ref_selector<MatrixType>::non_const_type MatrixTypeNested;
|
||||
|
||||
typedef typename TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base;
|
||||
EIGEN_GENERIC_PUBLIC_INTERFACE(Transpose)
|
||||
typedef typename internal::remove_all<MatrixType>::type NestedExpression;
|
||||
@@ -69,16 +70,21 @@ template<typename MatrixType> class Transpose
|
||||
|
||||
/** \returns the nested expression */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename MatrixType::Nested>::type&
|
||||
const typename internal::remove_all<MatrixTypeNested>::type&
|
||||
nestedExpression() const { return m_matrix; }
|
||||
|
||||
/** \returns the nested expression */
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename internal::remove_all<typename MatrixType::Nested>::type&
|
||||
nestedExpression() { return m_matrix.const_cast_derived(); }
|
||||
typename internal::remove_reference<MatrixTypeNested>::type&
|
||||
nestedExpression() { return m_matrix; }
|
||||
|
||||
/** \internal */
|
||||
void resize(Index nrows, Index ncols) {
|
||||
m_matrix.resize(ncols,nrows);
|
||||
}
|
||||
|
||||
protected:
|
||||
typename MatrixType::Nested m_matrix;
|
||||
typename internal::ref_selector<MatrixType>::non_const_type m_matrix;
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
|
||||
@@ -12,35 +12,6 @@
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class Transpositions
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Represents a sequence of transpositions (row/column interchange)
|
||||
*
|
||||
* \param SizeAtCompileTime the number of transpositions, or Dynamic
|
||||
* \param MaxSizeAtCompileTime the maximum number of transpositions, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it.
|
||||
*
|
||||
* This class represents a permutation transformation as a sequence of \em n transpositions
|
||||
* \f$[T_{n-1} \ldots T_{i} \ldots T_{0}]\f$. It is internally stored as a vector of integers \c indices.
|
||||
* Each transposition \f$ T_{i} \f$ applied on the left of a matrix (\f$ T_{i} M\f$) interchanges
|
||||
* the rows \c i and \c indices[i] of the matrix \c M.
|
||||
* A transposition applied on the right (e.g., \f$ M T_{i}\f$) yields a column interchange.
|
||||
*
|
||||
* Compared to the class PermutationMatrix, such a sequence of transpositions is what is
|
||||
* computed during a decomposition with pivoting, and it is faster when applying the permutation in-place.
|
||||
*
|
||||
* To apply a sequence of transpositions to a matrix, simply use the operator * as in the following example:
|
||||
* \code
|
||||
* Transpositions tr;
|
||||
* MatrixXf mat;
|
||||
* mat = tr * mat;
|
||||
* \endcode
|
||||
* In this example, we detect that the matrix appears on both side, and so the transpositions
|
||||
* are applied in-place without any temporary or extra copy.
|
||||
*
|
||||
* \sa class PermutationMatrix
|
||||
*/
|
||||
|
||||
template<typename Derived>
|
||||
class TranspositionsBase
|
||||
{
|
||||
@@ -154,6 +125,35 @@ struct traits<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageInde
|
||||
};
|
||||
}
|
||||
|
||||
/** \class Transpositions
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Represents a sequence of transpositions (row/column interchange)
|
||||
*
|
||||
* \tparam SizeAtCompileTime the number of transpositions, or Dynamic
|
||||
* \tparam MaxSizeAtCompileTime the maximum number of transpositions, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it.
|
||||
*
|
||||
* This class represents a permutation transformation as a sequence of \em n transpositions
|
||||
* \f$[T_{n-1} \ldots T_{i} \ldots T_{0}]\f$. It is internally stored as a vector of integers \c indices.
|
||||
* Each transposition \f$ T_{i} \f$ applied on the left of a matrix (\f$ T_{i} M\f$) interchanges
|
||||
* the rows \c i and \c indices[i] of the matrix \c M.
|
||||
* A transposition applied on the right (e.g., \f$ M T_{i}\f$) yields a column interchange.
|
||||
*
|
||||
* Compared to the class PermutationMatrix, such a sequence of transpositions is what is
|
||||
* computed during a decomposition with pivoting, and it is faster when applying the permutation in-place.
|
||||
*
|
||||
* To apply a sequence of transpositions to a matrix, simply use the operator * as in the following example:
|
||||
* \code
|
||||
* Transpositions tr;
|
||||
* MatrixXf mat;
|
||||
* mat = tr * mat;
|
||||
* \endcode
|
||||
* In this example, we detect that the matrix appears on both side, and so the transpositions
|
||||
* are applied in-place without any temporary or extra copy.
|
||||
*
|
||||
* \sa class PermutationMatrix
|
||||
*/
|
||||
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex>
|
||||
class Transpositions : public TranspositionsBase<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex> >
|
||||
{
|
||||
@@ -325,7 +325,7 @@ class TranspositionsWrapper
|
||||
|
||||
protected:
|
||||
|
||||
const typename IndicesType::Nested m_indices;
|
||||
typename IndicesType::Nested m_indices;
|
||||
};
|
||||
|
||||
|
||||
|
||||
@@ -168,7 +168,7 @@ namespace internal {
|
||||
template<typename MatrixType, unsigned int _Mode>
|
||||
struct traits<TriangularView<MatrixType, _Mode> > : traits<MatrixType>
|
||||
{
|
||||
typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
|
||||
typedef typename ref_selector<MatrixType>::non_const_type MatrixTypeNested;
|
||||
typedef typename remove_reference<MatrixTypeNested>::type MatrixTypeNestedNonRef;
|
||||
typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
|
||||
typedef typename MatrixType::PlainObject FullMatrixType;
|
||||
@@ -213,7 +213,6 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
|
||||
IsVectorAtCompileTime = false
|
||||
};
|
||||
|
||||
// FIXME This, combined with const_cast_derived in transpose() leads to a const-correctness loophole
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit inline TriangularView(MatrixType& matrix) : m_matrix(matrix)
|
||||
{}
|
||||
@@ -235,7 +234,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
|
||||
|
||||
/** \returns a reference to the nested expression */
|
||||
EIGEN_DEVICE_FUNC
|
||||
NestedExpression& nestedExpression() { return *const_cast<NestedExpression*>(&m_matrix); }
|
||||
NestedExpression& nestedExpression() { return m_matrix; }
|
||||
|
||||
typedef TriangularView<const MatrixConjugateReturnType,Mode> ConjugateReturnType;
|
||||
/** \sa MatrixBase::conjugate() const */
|
||||
@@ -255,7 +254,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
|
||||
inline TransposeReturnType transpose()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
|
||||
typename MatrixType::TransposeReturnType tmp(m_matrix.const_cast_derived());
|
||||
typename MatrixType::TransposeReturnType tmp(m_matrix);
|
||||
return TransposeReturnType(tmp);
|
||||
}
|
||||
|
||||
@@ -368,14 +367,14 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_Mat
|
||||
template<typename Other>
|
||||
EIGEN_DEVICE_FUNC
|
||||
TriangularViewType& operator+=(const DenseBase<Other>& other) {
|
||||
internal::call_assignment_no_alias(derived(), other.derived(), internal::add_assign_op<Scalar>());
|
||||
internal::call_assignment_no_alias(derived(), other.derived(), internal::add_assign_op<Scalar,typename Other::Scalar>());
|
||||
return derived();
|
||||
}
|
||||
/** \sa MatrixBase::operator-=() */
|
||||
template<typename Other>
|
||||
EIGEN_DEVICE_FUNC
|
||||
TriangularViewType& operator-=(const DenseBase<Other>& other) {
|
||||
internal::call_assignment_no_alias(derived(), other.derived(), internal::sub_assign_op<Scalar>());
|
||||
internal::call_assignment_no_alias(derived(), other.derived(), internal::sub_assign_op<Scalar,typename Other::Scalar>());
|
||||
return derived();
|
||||
}
|
||||
|
||||
@@ -418,7 +417,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_Mat
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_LVALUE(TriangularViewType);
|
||||
Base::check_coordinates_internal(row, col);
|
||||
return derived().nestedExpression().const_cast_derived().coeffRef(row, col);
|
||||
return derived().nestedExpression().coeffRef(row, col);
|
||||
}
|
||||
|
||||
/** Assigns a triangular matrix to a triangular part of a dense matrix */
|
||||
@@ -471,6 +470,8 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_Mat
|
||||
* \a Side==OnTheLeft (the default), or the right-inverse-multiply \a other * inverse(\c *this) if
|
||||
* \a Side==OnTheRight.
|
||||
*
|
||||
* Note that the template parameter \c Side can be ommitted, in which case \c Side==OnTheLeft
|
||||
*
|
||||
* The matrix \c *this must be triangular and invertible (i.e., all the coefficients of the
|
||||
* diagonal must be non zero). It works as a forward (resp. backward) substitution if \c *this
|
||||
* is an upper (resp. lower) triangular matrix.
|
||||
@@ -496,6 +497,8 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_Mat
|
||||
* \warning The parameter is only marked 'const' to make the C++ compiler accept a temporary expression here.
|
||||
* This function will const_cast it, so constness isn't honored here.
|
||||
*
|
||||
* Note that the template parameter \c Side can be ommitted, in which case \c Side==OnTheLeft
|
||||
*
|
||||
* See TriangularView:solve() for the details.
|
||||
*/
|
||||
template<int Side, typename OtherDerived>
|
||||
@@ -533,27 +536,28 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_Mat
|
||||
template<typename RhsType, typename DstType>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void _solve_impl(const RhsType &rhs, DstType &dst) const {
|
||||
if(!(internal::is_same<RhsType,DstType>::value && internal::extract_data(dst) == internal::extract_data(rhs)))
|
||||
if(!internal::is_same_dense(dst,rhs))
|
||||
dst = rhs;
|
||||
this->solveInPlace(dst);
|
||||
}
|
||||
|
||||
template<typename ProductType>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE TriangularViewType& _assignProduct(const ProductType& prod, const Scalar& alpha);
|
||||
EIGEN_STRONG_INLINE TriangularViewType& _assignProduct(const ProductType& prod, const Scalar& alpha, bool beta);
|
||||
};
|
||||
|
||||
/***************************************************************************
|
||||
* Implementation of triangular evaluation/assignment
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
// FIXME should we keep that possibility
|
||||
template<typename MatrixType, unsigned int Mode>
|
||||
template<typename OtherDerived>
|
||||
inline TriangularView<MatrixType, Mode>&
|
||||
TriangularViewImpl<MatrixType, Mode, Dense>::operator=(const MatrixBase<OtherDerived>& other)
|
||||
{
|
||||
internal::call_assignment_no_alias(derived(), other.derived(), internal::assign_op<Scalar>());
|
||||
internal::call_assignment_no_alias(derived(), other.derived(), internal::assign_op<Scalar,typename OtherDerived::Scalar>());
|
||||
return derived();
|
||||
}
|
||||
|
||||
@@ -584,6 +588,7 @@ void TriangularViewImpl<MatrixType, Mode, Dense>::lazyAssign(const TriangularBas
|
||||
eigen_assert(Mode == int(OtherDerived::Mode));
|
||||
internal::call_assignment_no_alias(derived(), other.derived());
|
||||
}
|
||||
#endif
|
||||
|
||||
/***************************************************************************
|
||||
* Implementation of TriangularBase methods
|
||||
@@ -595,14 +600,7 @@ template<typename Derived>
|
||||
template<typename DenseDerived>
|
||||
void TriangularBase<Derived>::evalTo(MatrixBase<DenseDerived> &other) const
|
||||
{
|
||||
if(internal::traits<Derived>::Flags & EvalBeforeAssigningBit)
|
||||
{
|
||||
typename internal::plain_matrix_type<Derived>::type other_evaluated(rows(), cols());
|
||||
evalToLazy(other_evaluated);
|
||||
other.derived().swap(other_evaluated);
|
||||
}
|
||||
else
|
||||
evalToLazy(other.derived());
|
||||
evalToLazy(other.derived());
|
||||
}
|
||||
|
||||
/***************************************************************************
|
||||
@@ -649,21 +647,20 @@ MatrixBase<Derived>::triangularView() const
|
||||
template<typename Derived>
|
||||
bool MatrixBase<Derived>::isUpperTriangular(const RealScalar& prec) const
|
||||
{
|
||||
using std::abs;
|
||||
RealScalar maxAbsOnUpperPart = static_cast<RealScalar>(-1);
|
||||
for(Index j = 0; j < cols(); ++j)
|
||||
{
|
||||
Index maxi = (std::min)(j, rows()-1);
|
||||
Index maxi = numext::mini(j, rows()-1);
|
||||
for(Index i = 0; i <= maxi; ++i)
|
||||
{
|
||||
RealScalar absValue = abs(coeff(i,j));
|
||||
RealScalar absValue = numext::abs(coeff(i,j));
|
||||
if(absValue > maxAbsOnUpperPart) maxAbsOnUpperPart = absValue;
|
||||
}
|
||||
}
|
||||
RealScalar threshold = maxAbsOnUpperPart * prec;
|
||||
for(Index j = 0; j < cols(); ++j)
|
||||
for(Index i = j+1; i < rows(); ++i)
|
||||
if(abs(coeff(i, j)) > threshold) return false;
|
||||
if(numext::abs(coeff(i, j)) > threshold) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -675,20 +672,19 @@ bool MatrixBase<Derived>::isUpperTriangular(const RealScalar& prec) const
|
||||
template<typename Derived>
|
||||
bool MatrixBase<Derived>::isLowerTriangular(const RealScalar& prec) const
|
||||
{
|
||||
using std::abs;
|
||||
RealScalar maxAbsOnLowerPart = static_cast<RealScalar>(-1);
|
||||
for(Index j = 0; j < cols(); ++j)
|
||||
for(Index i = j; i < rows(); ++i)
|
||||
{
|
||||
RealScalar absValue = abs(coeff(i,j));
|
||||
RealScalar absValue = numext::abs(coeff(i,j));
|
||||
if(absValue > maxAbsOnLowerPart) maxAbsOnLowerPart = absValue;
|
||||
}
|
||||
RealScalar threshold = maxAbsOnLowerPart * prec;
|
||||
for(Index j = 1; j < cols(); ++j)
|
||||
{
|
||||
Index maxi = (std::min)(j, rows()-1);
|
||||
Index maxi = numext::mini(j, rows()-1);
|
||||
for(Index i = 0; i < maxi; ++i)
|
||||
if(abs(coeff(i, j)) > threshold) return false;
|
||||
if(numext::abs(coeff(i, j)) > threshold) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@@ -711,10 +707,6 @@ struct evaluator_traits<TriangularView<MatrixType,Mode> >
|
||||
{
|
||||
typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind;
|
||||
typedef typename glue_shapes<typename evaluator_traits<MatrixType>::Shape, TriangularShape>::type Shape;
|
||||
|
||||
// 1 if assignment A = B assumes aliasing when B is of type T and thus B needs to be evaluated into a
|
||||
// temporary; 0 if not.
|
||||
static const int AssumeAliasing = 0;
|
||||
};
|
||||
|
||||
template<typename MatrixType, unsigned int Mode>
|
||||
@@ -788,15 +780,19 @@ public:
|
||||
};
|
||||
|
||||
template<int Mode, bool SetOpposite, typename DstXprType, typename SrcXprType, typename Functor>
|
||||
EIGEN_DEVICE_FUNC void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
void call_triangular_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor &func)
|
||||
{
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||
|
||||
typedef evaluator<DstXprType> DstEvaluatorType;
|
||||
typedef evaluator<SrcXprType> SrcEvaluatorType;
|
||||
|
||||
DstEvaluatorType dstEvaluator(dst);
|
||||
SrcEvaluatorType srcEvaluator(src);
|
||||
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
DstEvaluatorType dstEvaluator(dst);
|
||||
|
||||
typedef triangular_dense_assignment_kernel< Mode&(Lower|Upper),Mode&(UnitDiag|ZeroDiag|SelfAdjoint),SetOpposite,
|
||||
DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;
|
||||
@@ -805,16 +801,17 @@ EIGEN_DEVICE_FUNC void call_triangular_assignment_loop(const DstXprType& dst, co
|
||||
enum {
|
||||
unroll = DstXprType::SizeAtCompileTime != Dynamic
|
||||
&& SrcEvaluatorType::CoeffReadCost < HugeCost
|
||||
&& DstXprType::SizeAtCompileTime * SrcEvaluatorType::CoeffReadCost / 2 <= EIGEN_UNROLLING_LIMIT
|
||||
&& DstXprType::SizeAtCompileTime * (DstEvaluatorType::CoeffReadCost+SrcEvaluatorType::CoeffReadCost) / 2 <= EIGEN_UNROLLING_LIMIT
|
||||
};
|
||||
|
||||
triangular_assignment_loop<Kernel, Mode, unroll ? int(DstXprType::SizeAtCompileTime) : Dynamic, SetOpposite>::run(kernel);
|
||||
}
|
||||
|
||||
template<int Mode, bool SetOpposite, typename DstXprType, typename SrcXprType>
|
||||
EIGEN_DEVICE_FUNC void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& src)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
void call_triangular_assignment_loop(DstXprType& dst, const SrcXprType& src)
|
||||
{
|
||||
call_triangular_assignment_loop<Mode,SetOpposite>(dst, src, internal::assign_op<typename DstXprType::Scalar>());
|
||||
call_triangular_assignment_loop<Mode,SetOpposite>(dst, src, internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar>());
|
||||
}
|
||||
|
||||
template<> struct AssignmentKind<TriangularShape,TriangularShape> { typedef Triangular2Triangular Kind; };
|
||||
@@ -822,8 +819,8 @@ template<> struct AssignmentKind<DenseShape,TriangularShape> { typedef Tria
|
||||
template<> struct AssignmentKind<TriangularShape,DenseShape> { typedef Dense2Triangular Kind; };
|
||||
|
||||
|
||||
template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
|
||||
struct Assignment<DstXprType, SrcXprType, Functor, Triangular2Triangular, Scalar>
|
||||
template< typename DstXprType, typename SrcXprType, typename Functor>
|
||||
struct Assignment<DstXprType, SrcXprType, Functor, Triangular2Triangular>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
|
||||
{
|
||||
@@ -833,8 +830,8 @@ struct Assignment<DstXprType, SrcXprType, Functor, Triangular2Triangular, Scalar
|
||||
}
|
||||
};
|
||||
|
||||
template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
|
||||
struct Assignment<DstXprType, SrcXprType, Functor, Triangular2Dense, Scalar>
|
||||
template< typename DstXprType, typename SrcXprType, typename Functor>
|
||||
struct Assignment<DstXprType, SrcXprType, Functor, Triangular2Dense>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
|
||||
{
|
||||
@@ -842,8 +839,8 @@ struct Assignment<DstXprType, SrcXprType, Functor, Triangular2Dense, Scalar>
|
||||
}
|
||||
};
|
||||
|
||||
template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
|
||||
struct Assignment<DstXprType, SrcXprType, Functor, Dense2Triangular, Scalar>
|
||||
template< typename DstXprType, typename SrcXprType, typename Functor>
|
||||
struct Assignment<DstXprType, SrcXprType, Functor, Dense2Triangular>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
|
||||
{
|
||||
@@ -903,7 +900,7 @@ struct triangular_assignment_loop<Kernel, Mode, Dynamic, SetOpposite>
|
||||
{
|
||||
for(Index j = 0; j < kernel.cols(); ++j)
|
||||
{
|
||||
Index maxi = (std::min)(j, kernel.rows());
|
||||
Index maxi = numext::mini(j, kernel.rows());
|
||||
Index i = 0;
|
||||
if (((Mode&Lower) && SetOpposite) || (Mode&Upper))
|
||||
{
|
||||
@@ -943,35 +940,39 @@ namespace internal {
|
||||
|
||||
// Triangular = Product
|
||||
template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar>
|
||||
struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::assign_op<Scalar>, Dense2Triangular, Scalar>
|
||||
struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::assign_op<Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, Dense2Triangular>
|
||||
{
|
||||
typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,typename SrcXprType::Scalar> &)
|
||||
{
|
||||
dst.setZero();
|
||||
dst._assignProduct(src, 1);
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
|
||||
dst._assignProduct(src, 1, 0);
|
||||
}
|
||||
};
|
||||
|
||||
// Triangular += Product
|
||||
template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar>
|
||||
struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::add_assign_op<Scalar>, Dense2Triangular, Scalar>
|
||||
struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::add_assign_op<Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, Dense2Triangular>
|
||||
{
|
||||
typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar> &)
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,typename SrcXprType::Scalar> &)
|
||||
{
|
||||
dst._assignProduct(src, 1);
|
||||
dst._assignProduct(src, 1, 1);
|
||||
}
|
||||
};
|
||||
|
||||
// Triangular -= Product
|
||||
template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar>
|
||||
struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::sub_assign_op<Scalar>, Dense2Triangular, Scalar>
|
||||
struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::sub_assign_op<Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, Dense2Triangular>
|
||||
{
|
||||
typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar> &)
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,typename SrcXprType::Scalar> &)
|
||||
{
|
||||
dst._assignProduct(src, -1);
|
||||
dst._assignProduct(src, -1, 1);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -13,13 +13,23 @@
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
template<typename VectorType, int Size>
|
||||
struct traits<VectorBlock<VectorType, Size> >
|
||||
: public traits<Block<VectorType,
|
||||
traits<VectorType>::Flags & RowMajorBit ? 1 : Size,
|
||||
traits<VectorType>::Flags & RowMajorBit ? Size : 1> >
|
||||
{
|
||||
};
|
||||
}
|
||||
|
||||
/** \class VectorBlock
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of a fixed-size or dynamic-size sub-vector
|
||||
*
|
||||
* \param VectorType the type of the object in which we are taking a sub-vector
|
||||
* \param Size size of the sub-vector we are taking at compile time (optional)
|
||||
* \tparam VectorType the type of the object in which we are taking a sub-vector
|
||||
* \tparam Size size of the sub-vector we are taking at compile time (optional)
|
||||
*
|
||||
* This class represents an expression of either a fixed-size or dynamic-size sub-vector.
|
||||
* It is the return type of DenseBase::segment(Index,Index) and DenseBase::segment<int>(Index) and
|
||||
@@ -43,17 +53,6 @@ namespace Eigen {
|
||||
*
|
||||
* \sa class Block, DenseBase::segment(Index,Index,Index,Index), DenseBase::segment(Index,Index)
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
template<typename VectorType, int Size>
|
||||
struct traits<VectorBlock<VectorType, Size> >
|
||||
: public traits<Block<VectorType,
|
||||
traits<VectorType>::Flags & RowMajorBit ? 1 : Size,
|
||||
traits<VectorType>::Flags & RowMajorBit ? Size : 1> >
|
||||
{
|
||||
};
|
||||
}
|
||||
|
||||
template<typename VectorType, int Size> class VectorBlock
|
||||
: public Block<VectorType,
|
||||
internal::traits<VectorType>::Flags & RowMajorBit ? 1 : Size,
|
||||
|
||||
29
Eigen/src/Core/VectorwiseOp.h
Executable file → Normal file
29
Eigen/src/Core/VectorwiseOp.h
Executable file → Normal file
@@ -124,7 +124,7 @@ struct member_lpnorm {
|
||||
template <typename BinaryOp, typename Scalar>
|
||||
struct member_redux {
|
||||
typedef typename result_of<
|
||||
BinaryOp(Scalar,Scalar)
|
||||
BinaryOp(const Scalar&,const Scalar&)
|
||||
>::type result_type;
|
||||
template<typename _Scalar, int Size> struct Cost
|
||||
{ enum { value = (Size-1) * functor_traits<BinaryOp>::Cost }; };
|
||||
@@ -141,8 +141,8 @@ struct member_redux {
|
||||
*
|
||||
* \brief Pseudo expression providing partial reduction operations
|
||||
*
|
||||
* \param ExpressionType the type of the object on which to do partial reductions
|
||||
* \param Direction indicates the direction of the redux (#Vertical or #Horizontal)
|
||||
* \tparam ExpressionType the type of the object on which to do partial reductions
|
||||
* \tparam Direction indicates the direction of the redux (#Vertical or #Horizontal)
|
||||
*
|
||||
* This class represents a pseudo expression with partial reduction features.
|
||||
* It is the return type of DenseBase::colwise() and DenseBase::rowwise()
|
||||
@@ -187,11 +187,11 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
|
||||
protected:
|
||||
|
||||
/** \internal
|
||||
* \returns the i-th subvector according to the \c Direction */
|
||||
typedef typename internal::conditional<isVertical,
|
||||
typename ExpressionType::ColXpr,
|
||||
typename ExpressionType::RowXpr>::type SubVector;
|
||||
/** \internal
|
||||
* \returns the i-th subvector according to the \c Direction */
|
||||
EIGEN_DEVICE_FUNC
|
||||
SubVector subVector(Index i)
|
||||
{
|
||||
@@ -284,6 +284,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
typedef typename ReturnType<internal::member_any>::Type AnyReturnType;
|
||||
typedef PartialReduxExpr<ExpressionType, internal::member_count<Index>, Direction> CountReturnType;
|
||||
typedef typename ReturnType<internal::member_prod>::Type ProdReturnType;
|
||||
typedef Reverse<const ExpressionType, Direction> ConstReverseReturnType;
|
||||
typedef Reverse<ExpressionType, Direction> ReverseReturnType;
|
||||
|
||||
template<int p> struct LpNormReturnType {
|
||||
@@ -456,7 +457,15 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
*
|
||||
* \sa DenseBase::reverse() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const ReverseReturnType reverse() const
|
||||
const ConstReverseReturnType reverse() const
|
||||
{ return ConstReverseReturnType( _expression() ); }
|
||||
|
||||
/** \returns a writable matrix expression
|
||||
* where each column (or row) are reversed.
|
||||
*
|
||||
* \sa reverse() const */
|
||||
EIGEN_DEVICE_FUNC
|
||||
ReverseReturnType reverse()
|
||||
{ return ReverseReturnType( _expression() ); }
|
||||
|
||||
typedef Replicate<ExpressionType,(isVertical?Dynamic:1),(isHorizontal?Dynamic:1)> ReplicateReturnType;
|
||||
@@ -540,7 +549,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
|
||||
/** Returns the expression of the sum of the vector \a other to each subvector of \c *this */
|
||||
template<typename OtherDerived> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
|
||||
CwiseBinaryOp<internal::scalar_sum_op<Scalar>,
|
||||
CwiseBinaryOp<internal::scalar_sum_op<Scalar,typename OtherDerived::Scalar>,
|
||||
const ExpressionTypeNestedCleaned,
|
||||
const typename ExtendedType<OtherDerived>::Type>
|
||||
operator+(const DenseBase<OtherDerived>& other) const
|
||||
@@ -553,7 +562,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
/** Returns the expression of the difference between each subvector of \c *this and the vector \a other */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
CwiseBinaryOp<internal::scalar_difference_op<Scalar>,
|
||||
CwiseBinaryOp<internal::scalar_difference_op<Scalar,typename OtherDerived::Scalar>,
|
||||
const ExpressionTypeNestedCleaned,
|
||||
const typename ExtendedType<OtherDerived>::Type>
|
||||
operator-(const DenseBase<OtherDerived>& other) const
|
||||
@@ -593,7 +602,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
return m_matrix / extendedTo(other.derived());
|
||||
}
|
||||
|
||||
/** \returns an expression where each column of row of the referenced matrix are normalized.
|
||||
/** \returns an expression where each column (or row) of the referenced matrix are normalized.
|
||||
* The referenced matrix is \b not modified.
|
||||
* \sa MatrixBase::normalized(), normalize()
|
||||
*/
|
||||
@@ -616,6 +625,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
/////////// Geometry module ///////////
|
||||
|
||||
typedef Homogeneous<ExpressionType,Direction> HomogeneousReturnType;
|
||||
EIGEN_DEVICE_FUNC
|
||||
HomogeneousReturnType homogeneous() const;
|
||||
|
||||
typedef typename ExpressionType::PlainObject CrossReturnType;
|
||||
@@ -645,6 +655,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
Direction==Horizontal ? HNormalized_SizeMinusOne : 1> >
|
||||
HNormalizedReturnType;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const HNormalizedReturnType hnormalized() const;
|
||||
|
||||
protected:
|
||||
|
||||
@@ -194,10 +194,11 @@ struct functor_traits<max_coeff_visitor<Scalar> > {
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/** \returns the minimum of all coefficients of *this and puts in *row and *col its location.
|
||||
/** \fn DenseBase<Derived>::minCoeff(IndexType* rowId, IndexType* colId) const
|
||||
* \returns the minimum of all coefficients of *this and puts in *row and *col its location.
|
||||
* \warning the result is undefined if \c *this contains NaN.
|
||||
*
|
||||
* \sa DenseBase::minCoeff(Index*), DenseBase::maxCoeff(Index*,Index*), DenseBase::visitor(), DenseBase::minCoeff()
|
||||
* \sa DenseBase::minCoeff(Index*), DenseBase::maxCoeff(Index*,Index*), DenseBase::visit(), DenseBase::minCoeff()
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename IndexType>
|
||||
@@ -215,7 +216,7 @@ DenseBase<Derived>::minCoeff(IndexType* rowId, IndexType* colId) const
|
||||
/** \returns the minimum of all coefficients of *this and puts in *index its location.
|
||||
* \warning the result is undefined if \c *this contains NaN.
|
||||
*
|
||||
* \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::visitor(), DenseBase::minCoeff()
|
||||
* \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::visit(), DenseBase::minCoeff()
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename IndexType>
|
||||
@@ -230,10 +231,11 @@ DenseBase<Derived>::minCoeff(IndexType* index) const
|
||||
return minVisitor.res;
|
||||
}
|
||||
|
||||
/** \returns the maximum of all coefficients of *this and puts in *row and *col its location.
|
||||
/** \fn DenseBase<Derived>::maxCoeff(IndexType* rowId, IndexType* colId) const
|
||||
* \returns the maximum of all coefficients of *this and puts in *row and *col its location.
|
||||
* \warning the result is undefined if \c *this contains NaN.
|
||||
*
|
||||
* \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visitor(), DenseBase::maxCoeff()
|
||||
* \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visit(), DenseBase::maxCoeff()
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename IndexType>
|
||||
|
||||
@@ -1,6 +0,0 @@
|
||||
FILE(GLOB Eigen_Core_arch_AVX_SRCS "*.h")
|
||||
|
||||
INSTALL(FILES
|
||||
${Eigen_Core_arch_AVX_SRCS}
|
||||
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/AVX COMPONENT Devel
|
||||
)
|
||||
@@ -456,6 +456,26 @@ ptranspose(PacketBlock<Packet2cd,2>& kernel) {
|
||||
kernel.packet[0].v = tmp;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf pinsertfirst(const Packet4cf& a, std::complex<float> b)
|
||||
{
|
||||
return Packet4cf(_mm256_blend_ps(a.v,pset1<Packet4cf>(b).v,1|2));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd pinsertfirst(const Packet2cd& a, std::complex<double> b)
|
||||
{
|
||||
return Packet2cd(_mm256_blend_pd(a.v,pset1<Packet2cd>(b).v,1|2));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf pinsertlast(const Packet4cf& a, std::complex<float> b)
|
||||
{
|
||||
return Packet4cf(_mm256_blend_ps(a.v,pset1<Packet4cf>(b).v,(1<<7)|(1<<6)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd pinsertlast(const Packet2cd& a, std::complex<double> b)
|
||||
{
|
||||
return Packet2cd(_mm256_blend_pd(a.v,pset1<Packet2cd>(b).v,(1<<3)|(1<<2)));
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
@@ -10,11 +10,6 @@
|
||||
#ifndef EIGEN_MATH_FUNCTIONS_AVX_H
|
||||
#define EIGEN_MATH_FUNCTIONS_AVX_H
|
||||
|
||||
// For some reason, this function didn't make it into the avxintirn.h
|
||||
// used by the compiler, so we'll just wrap it.
|
||||
#define _mm256_setr_m128(lo, hi) \
|
||||
_mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1)
|
||||
|
||||
/* The sin, cos, exp, and log functions of this file are loosely derived from
|
||||
* Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
|
||||
*/
|
||||
@@ -23,6 +18,28 @@ namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
inline Packet8i pshiftleft(Packet8i v, int n)
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_AVX2
|
||||
return _mm256_slli_epi32(v, n);
|
||||
#else
|
||||
__m128i lo = _mm_slli_epi32(_mm256_extractf128_si256(v, 0), n);
|
||||
__m128i hi = _mm_slli_epi32(_mm256_extractf128_si256(v, 1), n);
|
||||
return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
|
||||
#endif
|
||||
}
|
||||
|
||||
inline Packet8f pshiftright(Packet8f v, int n)
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_AVX2
|
||||
return _mm256_cvtepi32_ps(_mm256_srli_epi32(_mm256_castps_si256(v), n));
|
||||
#else
|
||||
__m128i lo = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(v), 0), n);
|
||||
__m128i hi = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(v), 1), n);
|
||||
return _mm256_cvtepi32_ps(_mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1));
|
||||
#endif
|
||||
}
|
||||
|
||||
// Sine function
|
||||
// Computes sin(x) by wrapping x to the interval [-Pi/4,3*Pi/4] and
|
||||
// evaluating interpolants in [-Pi/4,Pi/4] or [Pi/4,3*Pi/4]. The interpolants
|
||||
@@ -54,17 +71,8 @@ psin<Packet8f>(const Packet8f& _x) {
|
||||
// Make a mask for the entries that need flipping, i.e. wherever the shift
|
||||
// is odd.
|
||||
Packet8i shift_ints = _mm256_cvtps_epi32(shift);
|
||||
Packet8i shift_isodd =
|
||||
_mm256_castps_si256(_mm256_and_ps(_mm256_castsi256_ps(shift_ints), _mm256_castsi256_ps(p8i_one)));
|
||||
#ifdef EIGEN_VECTORIZE_AVX2
|
||||
Packet8i sign_flip_mask = _mm256_slli_epi32(shift_isodd, 31);
|
||||
#else
|
||||
__m128i lo =
|
||||
_mm_slli_epi32(_mm256_extractf128_si256(shift_isodd, 0), 31);
|
||||
__m128i hi =
|
||||
_mm_slli_epi32(_mm256_extractf128_si256(shift_isodd, 1), 31);
|
||||
Packet8i sign_flip_mask = _mm256_setr_m128(lo, hi);
|
||||
#endif
|
||||
Packet8i shift_isodd = _mm256_castps_si256(_mm256_and_ps(_mm256_castsi256_ps(shift_ints), _mm256_castsi256_ps(p8i_one)));
|
||||
Packet8i sign_flip_mask = pshiftleft(shift_isodd, 31);
|
||||
|
||||
// Create a mask for which interpolant to use, i.e. if z > 1, then the mask
|
||||
// is set to ones for that entry.
|
||||
@@ -142,15 +150,7 @@ plog<Packet8f>(const Packet8f& _x) {
|
||||
// Truncate input values to the minimum positive normal.
|
||||
x = pmax(x, p8f_min_norm_pos);
|
||||
|
||||
// Extract the shifted exponents (No bitwise shifting in regular AVX, so
|
||||
// convert to SSE and do it there).
|
||||
#ifdef EIGEN_VECTORIZE_AVX2
|
||||
Packet8f emm0 = _mm256_cvtepi32_ps(_mm256_srli_epi32(_mm256_castps_si256(x), 23));
|
||||
#else
|
||||
__m128i lo = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(x), 0), 23);
|
||||
__m128i hi = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(x), 1), 23);
|
||||
Packet8f emm0 = _mm256_cvtepi32_ps(_mm256_setr_m128(lo, hi));
|
||||
#endif
|
||||
Packet8f emm0 = pshiftright(x,23);
|
||||
Packet8f e = _mm256_sub_ps(emm0, p8f_126f);
|
||||
|
||||
// Set the exponents to -1, i.e. x are in the range [0.5,1).
|
||||
@@ -259,18 +259,19 @@ pexp<Packet8f>(const Packet8f& _x) {
|
||||
|
||||
// Build emm0 = 2^m.
|
||||
Packet8i emm0 = _mm256_cvttps_epi32(padd(m, p8f_127));
|
||||
#ifdef EIGEN_VECTORIZE_AVX2
|
||||
emm0 = _mm256_slli_epi32(emm0, 23);
|
||||
#else
|
||||
__m128i lo = _mm_slli_epi32(_mm256_extractf128_si256(emm0, 0), 23);
|
||||
__m128i hi = _mm_slli_epi32(_mm256_extractf128_si256(emm0, 1), 23);
|
||||
emm0 = _mm256_setr_m128(lo, hi);
|
||||
#endif
|
||||
emm0 = pshiftleft(emm0, 23);
|
||||
|
||||
// Return 2^m * exp(r).
|
||||
return pmax(pmul(y, _mm256_castsi256_ps(emm0)), _x);
|
||||
}
|
||||
|
||||
// Hyperbolic Tangent function.
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
|
||||
ptanh<Packet8f>(const Packet8f& x) {
|
||||
return internal::generic_fast_tanh_float(x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d
|
||||
pexp<Packet4d>(const Packet4d& _x) {
|
||||
@@ -354,30 +355,27 @@ pexp<Packet4d>(const Packet4d& _x) {
|
||||
// Functions for sqrt.
|
||||
// The EIGEN_FAST_MATH version uses the _mm_rsqrt_ps approximation and one step
|
||||
// of Newton's method, at a cost of 1-2 bits of precision as opposed to the
|
||||
// exact solution. The main advantage of this approach is not just speed, but
|
||||
// also the fact that it can be inlined and pipelined with other computations,
|
||||
// further reducing its effective latency.
|
||||
// exact solution. It does not handle +inf, or denormalized numbers correctly.
|
||||
// The main advantage of this approach is not just speed, but also the fact that
|
||||
// it can be inlined and pipelined with other computations, further reducing its
|
||||
// effective latency. This is similar to Quake3's fast inverse square root.
|
||||
// For detail see here: http://www.beyond3d.com/content/articles/8/
|
||||
#if EIGEN_FAST_MATH
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
|
||||
psqrt<Packet8f>(const Packet8f& _x) {
|
||||
_EIGEN_DECLARE_CONST_Packet8f(one_point_five, 1.5f);
|
||||
_EIGEN_DECLARE_CONST_Packet8f(minus_half, -0.5f);
|
||||
_EIGEN_DECLARE_CONST_Packet8f_FROM_INT(flt_min, 0x00800000);
|
||||
|
||||
Packet8f neg_half = pmul(_x, p8f_minus_half);
|
||||
|
||||
// select only the inverse sqrt of positive normal inputs (denormals are
|
||||
// flushed to zero and cause infs as well).
|
||||
Packet8f non_zero_mask = _mm256_cmp_ps(_x, p8f_flt_min, _CMP_GE_OQ);
|
||||
Packet8f x = _mm256_and_ps(non_zero_mask, _mm256_rsqrt_ps(_x));
|
||||
Packet8f half = pmul(_x, pset1<Packet8f>(.5f));
|
||||
Packet8f denormal_mask = _mm256_and_ps(
|
||||
_mm256_cmp_ps(_x, pset1<Packet8f>((std::numeric_limits<float>::min)()),
|
||||
_CMP_LT_OQ),
|
||||
_mm256_cmp_ps(_x, _mm256_setzero_ps(), _CMP_GE_OQ));
|
||||
|
||||
// Compute approximate reciprocal sqrt.
|
||||
Packet8f x = _mm256_rsqrt_ps(_x);
|
||||
// Do a single step of Newton's iteration.
|
||||
x = pmul(x, pmadd(neg_half, pmul(x, x), p8f_one_point_five));
|
||||
|
||||
// Multiply the original _x by it's reciprocal square root to extract the
|
||||
// square root.
|
||||
return pmul(_x, x);
|
||||
x = pmul(x, psub(pset1<Packet8f>(1.5f), pmul(half, pmul(x,x))));
|
||||
// Flush results for denormals to zero.
|
||||
return _mm256_andnot_ps(denormal_mask, pmul(_x,x));
|
||||
}
|
||||
#else
|
||||
template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
|
||||
@@ -48,7 +48,9 @@ template<> struct is_arithmetic<__m256d> { enum { value = true }; };
|
||||
#define _EIGEN_DECLARE_CONST_Packet8i(NAME,X) \
|
||||
const Packet8i p8i_##NAME = pset1<Packet8i>(X)
|
||||
|
||||
|
||||
// Use the packet_traits defined in AVX512/PacketMath.h instead if we're going
|
||||
// to leverage AVX512 instructions.
|
||||
#ifndef EIGEN_VECTORIZE_AVX512
|
||||
template<> struct packet_traits<float> : default_packet_traits
|
||||
{
|
||||
typedef Packet8f type;
|
||||
@@ -66,6 +68,7 @@ template<> struct packet_traits<float> : default_packet_traits
|
||||
HasExp = 1,
|
||||
HasSqrt = 1,
|
||||
HasRsqrt = 1,
|
||||
HasTanh = EIGEN_FAST_MATH,
|
||||
HasBlend = 1,
|
||||
HasRound = 1,
|
||||
HasFloor = 1,
|
||||
@@ -92,6 +95,10 @@ template<> struct packet_traits<double> : default_packet_traits
|
||||
HasCeil = 1
|
||||
};
|
||||
};
|
||||
#endif
|
||||
|
||||
template<> struct scalar_div_cost<float,true> { enum { value = 14 }; };
|
||||
template<> struct scalar_div_cost<double,true> { enum { value = 16 }; };
|
||||
|
||||
/* Proper support for integers is only provided by AVX2. In the meantime, we'll
|
||||
use SSE instructions and packets to deal with integers.
|
||||
@@ -152,7 +159,7 @@ template<> EIGEN_STRONG_INLINE Packet8i pdiv<Packet8i>(const Packet8i& /*a*/, co
|
||||
|
||||
#ifdef __FMA__
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) {
|
||||
#if EIGEN_COMP_GNUC || EIGEN_COMP_CLANG
|
||||
#if ( EIGEN_COMP_GNUC_STRICT || (EIGEN_COMP_CLANG && (EIGEN_COMP_CLANG<308)) )
|
||||
// clang stupidly generates a vfmadd213ps instruction plus some vmovaps on registers,
|
||||
// and gcc stupidly generates a vfmadd132ps instruction,
|
||||
// so let's enforce it to generate a vfmadd231ps instruction since the most common use case is to accumulate
|
||||
@@ -165,7 +172,7 @@ template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f&
|
||||
#endif
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) {
|
||||
#if EIGEN_COMP_GNUC || EIGEN_COMP_CLANG
|
||||
#if ( EIGEN_COMP_GNUC_STRICT || (EIGEN_COMP_CLANG && (EIGEN_COMP_CLANG<308)) )
|
||||
// see above
|
||||
Packet4d res = c;
|
||||
__asm__("vfmadd231pd %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
|
||||
@@ -300,9 +307,11 @@ template<> EIGEN_STRONG_INLINE void pstore1<Packet8i>(int* to, const int& a)
|
||||
pstore(to, pa);
|
||||
}
|
||||
|
||||
#ifndef EIGEN_VECTORIZE_AVX512
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
#endif
|
||||
|
||||
template<> EIGEN_STRONG_INLINE float pfirst<Packet8f>(const Packet8f& a) {
|
||||
return _mm_cvtss_f32(_mm256_castps256_ps128(a));
|
||||
@@ -386,17 +395,14 @@ template<> EIGEN_STRONG_INLINE Packet4d preduxp<Packet4d>(const Packet4d* vecs)
|
||||
|
||||
template<> EIGEN_STRONG_INLINE float predux<Packet8f>(const Packet8f& a)
|
||||
{
|
||||
Packet8f tmp0 = _mm256_hadd_ps(a,_mm256_permute2f128_ps(a,a,1));
|
||||
tmp0 = _mm256_hadd_ps(tmp0,tmp0);
|
||||
return pfirst(_mm256_hadd_ps(tmp0, tmp0));
|
||||
return predux(Packet4f(_mm_add_ps(_mm256_castps256_ps128(a),_mm256_extractf128_ps(a,1))));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE double predux<Packet4d>(const Packet4d& a)
|
||||
{
|
||||
Packet4d tmp0 = _mm256_hadd_pd(a,_mm256_permute2f128_pd(a,a,1));
|
||||
return pfirst(_mm256_hadd_pd(tmp0,tmp0));
|
||||
return predux(Packet2d(_mm_add_pd(_mm256_castpd256_pd128(a),_mm256_extractf128_pd(a,1))));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f predux4<Packet8f>(const Packet8f& a)
|
||||
template<> EIGEN_STRONG_INLINE Packet4f predux_downto4<Packet8f>(const Packet8f& a)
|
||||
{
|
||||
return _mm_add_ps(_mm256_castps256_ps128(a),_mm256_extractf128_ps(a,1));
|
||||
}
|
||||
@@ -600,6 +606,26 @@ template<> EIGEN_STRONG_INLINE Packet4d pblend(const Selector<4>& ifPacket, cons
|
||||
return _mm256_blendv_pd(thenPacket, elsePacket, false_mask);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pinsertfirst(const Packet8f& a, float b)
|
||||
{
|
||||
return _mm256_blend_ps(a,pset1<Packet8f>(b),1);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pinsertfirst(const Packet4d& a, double b)
|
||||
{
|
||||
return _mm256_blend_pd(a,pset1<Packet4d>(b),1);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pinsertlast(const Packet8f& a, float b)
|
||||
{
|
||||
return _mm256_blend_ps(a,pset1<Packet8f>(b),(1<<7));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pinsertlast(const Packet4d& a, double b)
|
||||
{
|
||||
return _mm256_blend_pd(a,pset1<Packet4d>(b),(1<<3));
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
396
Eigen/src/Core/arch/AVX512/MathFunctions.h
Normal file
396
Eigen/src/Core/arch/AVX512/MathFunctions.h
Normal file
@@ -0,0 +1,396 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2016 Pedro Gonnet (pedro.gonnet@gmail.com)
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef THIRD_PARTY_EIGEN3_EIGEN_SRC_CORE_ARCH_AVX512_MATHFUNCTIONS_H_
|
||||
#define THIRD_PARTY_EIGEN3_EIGEN_SRC_CORE_ARCH_AVX512_MATHFUNCTIONS_H_
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
// Disable the code for older versions of gcc that don't support many of the required avx512 instrinsics.
|
||||
#if EIGEN_GNUC_AT_LEAST(5, 3)
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet16f(NAME, X) \
|
||||
const Packet16f p16f_##NAME = pset1<Packet16f>(X)
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(NAME, X) \
|
||||
const Packet16f p16f_##NAME = (__m512)pset1<Packet16i>(X)
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet8d(NAME, X) \
|
||||
const Packet8d p8d_##NAME = pset1<Packet8d>(X)
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(NAME, X) \
|
||||
const Packet8d p8d_##NAME = _mm512_castsi512_pd(_mm512_set1_epi64(X))
|
||||
|
||||
// Natural logarithm
|
||||
// Computes log(x) as log(2^e * m) = C*e + log(m), where the constant C =log(2)
|
||||
// and m is in the range [sqrt(1/2),sqrt(2)). In this range, the logarithm can
|
||||
// be easily approximated by a polynomial centered on m=1 for stability.
|
||||
#if defined(EIGEN_VECTORIZE_AVX512DQ)
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
|
||||
plog<Packet16f>(const Packet16f& _x) {
|
||||
Packet16f x = _x;
|
||||
_EIGEN_DECLARE_CONST_Packet16f(1, 1.0f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(half, 0.5f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(126f, 126.0f);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet16f_FROM_INT(inv_mant_mask, ~0x7f800000);
|
||||
|
||||
// The smallest non denormalized float number.
|
||||
_EIGEN_DECLARE_CONST_Packet16f_FROM_INT(min_norm_pos, 0x00800000);
|
||||
_EIGEN_DECLARE_CONST_Packet16f_FROM_INT(minus_inf, 0xff800000);
|
||||
_EIGEN_DECLARE_CONST_Packet16f_FROM_INT(nan, 0x7fc00000);
|
||||
|
||||
// Polynomial coefficients.
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_SQRTHF, 0.707106781186547524f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_p0, 7.0376836292E-2f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_p1, -1.1514610310E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_p2, 1.1676998740E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_p3, -1.2420140846E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_p4, +1.4249322787E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_p5, -1.6668057665E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_p6, +2.0000714765E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_p7, -2.4999993993E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_p8, +3.3333331174E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_q1, -2.12194440e-4f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_q2, 0.693359375f);
|
||||
|
||||
// invalid_mask is set to true when x is NaN
|
||||
__mmask16 invalid_mask =
|
||||
_mm512_cmp_ps_mask(x, _mm512_setzero_ps(), _CMP_NGE_UQ);
|
||||
__mmask16 iszero_mask =
|
||||
_mm512_cmp_ps_mask(x, _mm512_setzero_ps(), _CMP_EQ_UQ);
|
||||
|
||||
// Truncate input values to the minimum positive normal.
|
||||
x = pmax(x, p16f_min_norm_pos);
|
||||
|
||||
// Extract the shifted exponents.
|
||||
Packet16f emm0 = _mm512_cvtepi32_ps(_mm512_srli_epi32((__m512i)x, 23));
|
||||
Packet16f e = _mm512_sub_ps(emm0, p16f_126f);
|
||||
|
||||
// Set the exponents to -1, i.e. x are in the range [0.5,1).
|
||||
x = _mm512_and_ps(x, p16f_inv_mant_mask);
|
||||
x = _mm512_or_ps(x, p16f_half);
|
||||
|
||||
// part2: Shift the inputs from the range [0.5,1) to [sqrt(1/2),sqrt(2))
|
||||
// and shift by -1. The values are then centered around 0, which improves
|
||||
// the stability of the polynomial evaluation.
|
||||
// if( x < SQRTHF ) {
|
||||
// e -= 1;
|
||||
// x = x + x - 1.0;
|
||||
// } else { x = x - 1.0; }
|
||||
__mmask16 mask = _mm512_cmp_ps_mask(x, p16f_cephes_SQRTHF, _CMP_LT_OQ);
|
||||
Packet16f tmp = _mm512_mask_blend_ps(mask, x, _mm512_setzero_ps());
|
||||
x = psub(x, p16f_1);
|
||||
e = psub(e, _mm512_mask_blend_ps(mask, p16f_1, _mm512_setzero_ps()));
|
||||
x = padd(x, tmp);
|
||||
|
||||
Packet16f x2 = pmul(x, x);
|
||||
Packet16f x3 = pmul(x2, x);
|
||||
|
||||
// Evaluate the polynomial approximant of degree 8 in three parts, probably
|
||||
// to improve instruction-level parallelism.
|
||||
Packet16f y, y1, y2;
|
||||
y = pmadd(p16f_cephes_log_p0, x, p16f_cephes_log_p1);
|
||||
y1 = pmadd(p16f_cephes_log_p3, x, p16f_cephes_log_p4);
|
||||
y2 = pmadd(p16f_cephes_log_p6, x, p16f_cephes_log_p7);
|
||||
y = pmadd(y, x, p16f_cephes_log_p2);
|
||||
y1 = pmadd(y1, x, p16f_cephes_log_p5);
|
||||
y2 = pmadd(y2, x, p16f_cephes_log_p8);
|
||||
y = pmadd(y, x3, y1);
|
||||
y = pmadd(y, x3, y2);
|
||||
y = pmul(y, x3);
|
||||
|
||||
// Add the logarithm of the exponent back to the result of the interpolation.
|
||||
y1 = pmul(e, p16f_cephes_log_q1);
|
||||
tmp = pmul(x2, p16f_half);
|
||||
y = padd(y, y1);
|
||||
x = psub(x, tmp);
|
||||
y2 = pmul(e, p16f_cephes_log_q2);
|
||||
x = padd(x, y);
|
||||
x = padd(x, y2);
|
||||
|
||||
// Filter out invalid inputs, i.e. negative arg will be NAN, 0 will be -INF.
|
||||
return _mm512_mask_blend_ps(iszero_mask, p16f_minus_inf,
|
||||
_mm512_mask_blend_ps(invalid_mask, p16f_nan, x));
|
||||
}
|
||||
#endif
|
||||
|
||||
// Exponential function. Works by writing "x = m*log(2) + r" where
|
||||
// "m = floor(x/log(2)+1/2)" and "r" is the remainder. The result is then
|
||||
// "exp(x) = 2^m*exp(r)" where exp(r) is in the range [-1,1).
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
|
||||
pexp<Packet16f>(const Packet16f& _x) {
|
||||
_EIGEN_DECLARE_CONST_Packet16f(1, 1.0f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(half, 0.5f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(127, 127.0f);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet16f(exp_hi, 88.3762626647950f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(exp_lo, -88.3762626647949f);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_LOG2EF, 1.44269504088896341f);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p0, 1.9875691500E-4f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p1, 1.3981999507E-3f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p2, 8.3334519073E-3f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p3, 4.1665795894E-2f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p4, 1.6666665459E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p5, 5.0000001201E-1f);
|
||||
|
||||
// Clamp x.
|
||||
Packet16f x = pmax(pmin(_x, p16f_exp_hi), p16f_exp_lo);
|
||||
|
||||
// Express exp(x) as exp(m*ln(2) + r), start by extracting
|
||||
// m = floor(x/ln(2) + 0.5).
|
||||
Packet16f m = _mm512_floor_ps(pmadd(x, p16f_cephes_LOG2EF, p16f_half));
|
||||
|
||||
// Get r = x - m*ln(2). Note that we can do this without losing more than one
|
||||
// ulp precision due to the FMA instruction.
|
||||
_EIGEN_DECLARE_CONST_Packet16f(nln2, -0.6931471805599453f);
|
||||
Packet16f r = _mm512_fmadd_ps(m, p16f_nln2, x);
|
||||
Packet16f r2 = pmul(r, r);
|
||||
|
||||
// TODO(gonnet): Split into odd/even polynomials and try to exploit
|
||||
// instruction-level parallelism.
|
||||
Packet16f y = p16f_cephes_exp_p0;
|
||||
y = pmadd(y, r, p16f_cephes_exp_p1);
|
||||
y = pmadd(y, r, p16f_cephes_exp_p2);
|
||||
y = pmadd(y, r, p16f_cephes_exp_p3);
|
||||
y = pmadd(y, r, p16f_cephes_exp_p4);
|
||||
y = pmadd(y, r, p16f_cephes_exp_p5);
|
||||
y = pmadd(y, r2, r);
|
||||
y = padd(y, p16f_1);
|
||||
|
||||
// Build emm0 = 2^m.
|
||||
Packet16i emm0 = _mm512_cvttps_epi32(padd(m, p16f_127));
|
||||
emm0 = _mm512_slli_epi32(emm0, 23);
|
||||
|
||||
// Return 2^m * exp(r).
|
||||
return pmax(pmul(y, _mm512_castsi512_ps(emm0)), _x);
|
||||
}
|
||||
|
||||
/*template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
|
||||
pexp<Packet8d>(const Packet8d& _x) {
|
||||
Packet8d x = _x;
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet8d(1, 1.0);
|
||||
_EIGEN_DECLARE_CONST_Packet8d(2, 2.0);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet8d(exp_hi, 709.437);
|
||||
_EIGEN_DECLARE_CONST_Packet8d(exp_lo, -709.436139303);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet8d(cephes_LOG2EF, 1.4426950408889634073599);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet8d(cephes_exp_p0, 1.26177193074810590878e-4);
|
||||
_EIGEN_DECLARE_CONST_Packet8d(cephes_exp_p1, 3.02994407707441961300e-2);
|
||||
_EIGEN_DECLARE_CONST_Packet8d(cephes_exp_p2, 9.99999999999999999910e-1);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet8d(cephes_exp_q0, 3.00198505138664455042e-6);
|
||||
_EIGEN_DECLARE_CONST_Packet8d(cephes_exp_q1, 2.52448340349684104192e-3);
|
||||
_EIGEN_DECLARE_CONST_Packet8d(cephes_exp_q2, 2.27265548208155028766e-1);
|
||||
_EIGEN_DECLARE_CONST_Packet8d(cephes_exp_q3, 2.00000000000000000009e0);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet8d(cephes_exp_C1, 0.693145751953125);
|
||||
_EIGEN_DECLARE_CONST_Packet8d(cephes_exp_C2, 1.42860682030941723212e-6);
|
||||
|
||||
// clamp x
|
||||
x = pmax(pmin(x, p8d_exp_hi), p8d_exp_lo);
|
||||
|
||||
// Express exp(x) as exp(g + n*log(2)).
|
||||
const Packet8d n =
|
||||
_mm512_mul_round_pd(p8d_cephes_LOG2EF, x, _MM_FROUND_TO_NEAREST_INT);
|
||||
|
||||
// Get the remainder modulo log(2), i.e. the "g" described above. Subtract
|
||||
// n*log(2) out in two steps, i.e. n*C1 + n*C2, C1+C2=log2 to get the last
|
||||
// digits right.
|
||||
const Packet8d nC1 = pmul(n, p8d_cephes_exp_C1);
|
||||
const Packet8d nC2 = pmul(n, p8d_cephes_exp_C2);
|
||||
x = psub(x, nC1);
|
||||
x = psub(x, nC2);
|
||||
|
||||
const Packet8d x2 = pmul(x, x);
|
||||
|
||||
// Evaluate the numerator polynomial of the rational interpolant.
|
||||
Packet8d px = p8d_cephes_exp_p0;
|
||||
px = pmadd(px, x2, p8d_cephes_exp_p1);
|
||||
px = pmadd(px, x2, p8d_cephes_exp_p2);
|
||||
px = pmul(px, x);
|
||||
|
||||
// Evaluate the denominator polynomial of the rational interpolant.
|
||||
Packet8d qx = p8d_cephes_exp_q0;
|
||||
qx = pmadd(qx, x2, p8d_cephes_exp_q1);
|
||||
qx = pmadd(qx, x2, p8d_cephes_exp_q2);
|
||||
qx = pmadd(qx, x2, p8d_cephes_exp_q3);
|
||||
|
||||
// I don't really get this bit, copied from the SSE2 routines, so...
|
||||
// TODO(gonnet): Figure out what is going on here, perhaps find a better
|
||||
// rational interpolant?
|
||||
x = _mm512_div_pd(px, psub(qx, px));
|
||||
x = pmadd(p8d_2, x, p8d_1);
|
||||
|
||||
// Build e=2^n.
|
||||
const Packet8d e = _mm512_castsi512_pd(_mm512_slli_epi64(
|
||||
_mm512_add_epi64(_mm512_cvtpd_epi64(n), _mm512_set1_epi64(1023)), 52));
|
||||
|
||||
// Construct the result 2^n * exp(g) = e * x. The max is used to catch
|
||||
// non-finite values in the input.
|
||||
return pmax(pmul(x, e), _x);
|
||||
}*/
|
||||
|
||||
// Functions for sqrt.
|
||||
// The EIGEN_FAST_MATH version uses the _mm_rsqrt_ps approximation and one step
|
||||
// of Newton's method, at a cost of 1-2 bits of precision as opposed to the
|
||||
// exact solution. The main advantage of this approach is not just speed, but
|
||||
// also the fact that it can be inlined and pipelined with other computations,
|
||||
// further reducing its effective latency.
|
||||
#if EIGEN_FAST_MATH
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
|
||||
psqrt<Packet16f>(const Packet16f& _x) {
|
||||
_EIGEN_DECLARE_CONST_Packet16f(one_point_five, 1.5f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(minus_half, -0.5f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f_FROM_INT(flt_min, 0x00800000);
|
||||
|
||||
Packet16f neg_half = pmul(_x, p16f_minus_half);
|
||||
|
||||
// select only the inverse sqrt of positive normal inputs (denormals are
|
||||
// flushed to zero and cause infs as well).
|
||||
__mmask16 non_zero_mask = _mm512_cmp_ps_mask(_x, p16f_flt_min, _CMP_GE_OQ);
|
||||
Packet16f x = _mm512_mask_blend_ps(non_zero_mask, _mm512_rsqrt14_ps(_x),
|
||||
_mm512_setzero_ps());
|
||||
|
||||
// Do a single step of Newton's iteration.
|
||||
x = pmul(x, pmadd(neg_half, pmul(x, x), p16f_one_point_five));
|
||||
|
||||
// Multiply the original _x by it's reciprocal square root to extract the
|
||||
// square root.
|
||||
return pmul(_x, x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
|
||||
psqrt<Packet8d>(const Packet8d& _x) {
|
||||
_EIGEN_DECLARE_CONST_Packet8d(one_point_five, 1.5);
|
||||
_EIGEN_DECLARE_CONST_Packet8d(minus_half, -0.5);
|
||||
_EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(dbl_min, 0x0010000000000000LL);
|
||||
|
||||
Packet8d neg_half = pmul(_x, p8d_minus_half);
|
||||
|
||||
// select only the inverse sqrt of positive normal inputs (denormals are
|
||||
// flushed to zero and cause infs as well).
|
||||
__mmask8 non_zero_mask = _mm512_cmp_pd_mask(_x, p8d_dbl_min, _CMP_GE_OQ);
|
||||
Packet8d x = _mm512_mask_blend_pd(non_zero_mask, _mm512_rsqrt14_pd(_x),
|
||||
_mm512_setzero_pd());
|
||||
|
||||
// Do a first step of Newton's iteration.
|
||||
x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
|
||||
|
||||
// Do a second step of Newton's iteration.
|
||||
x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
|
||||
|
||||
// Multiply the original _x by it's reciprocal square root to extract the
|
||||
// square root.
|
||||
return pmul(_x, x);
|
||||
}
|
||||
#else
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16f psqrt<Packet16f>(const Packet16f& x) {
|
||||
return _mm512_sqrt_ps(x);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8d psqrt<Packet8d>(const Packet8d& x) {
|
||||
return _mm512_sqrt_pd(x);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Functions for rsqrt.
|
||||
// Almost identical to the sqrt routine, just leave out the last multiplication
|
||||
// and fill in NaN/Inf where needed. Note that this function only exists as an
|
||||
// iterative version for doubles since there is no instruction for diretly
|
||||
// computing the reciprocal square root in AVX-512.
|
||||
#ifdef EIGEN_FAST_MATH
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
|
||||
prsqrt<Packet16f>(const Packet16f& _x) {
|
||||
_EIGEN_DECLARE_CONST_Packet16f_FROM_INT(inf, 0x7f800000);
|
||||
_EIGEN_DECLARE_CONST_Packet16f_FROM_INT(nan, 0x7fc00000);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(one_point_five, 1.5f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(minus_half, -0.5f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f_FROM_INT(flt_min, 0x00800000);
|
||||
|
||||
Packet16f neg_half = pmul(_x, p16f_minus_half);
|
||||
|
||||
// select only the inverse sqrt of positive normal inputs (denormals are
|
||||
// flushed to zero and cause infs as well).
|
||||
__mmask16 le_zero_mask = _mm512_cmp_ps_mask(_x, p16f_flt_min, _CMP_LT_OQ);
|
||||
Packet16f x = _mm512_mask_blend_ps(le_zero_mask, _mm512_setzero_ps(),
|
||||
_mm512_rsqrt14_ps(_x));
|
||||
|
||||
// Fill in NaNs and Infs for the negative/zero entries.
|
||||
__mmask16 neg_mask = _mm512_cmp_ps_mask(_x, _mm512_setzero_ps(), _CMP_LT_OQ);
|
||||
Packet16f infs_and_nans = _mm512_mask_blend_ps(
|
||||
neg_mask, p16f_nan,
|
||||
_mm512_mask_blend_ps(le_zero_mask, p16f_inf, _mm512_setzero_ps()));
|
||||
|
||||
// Do a single step of Newton's iteration.
|
||||
x = pmul(x, pmadd(neg_half, pmul(x, x), p16f_one_point_five));
|
||||
|
||||
// Insert NaNs and Infs in all the right places.
|
||||
return _mm512_mask_blend_ps(le_zero_mask, infs_and_nans, x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
|
||||
prsqrt<Packet8d>(const Packet8d& _x) {
|
||||
_EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(inf, 0x7ff0000000000000LL);
|
||||
_EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(nan, 0x7ff1000000000000LL);
|
||||
_EIGEN_DECLARE_CONST_Packet8d(one_point_five, 1.5);
|
||||
_EIGEN_DECLARE_CONST_Packet8d(minus_half, -0.5);
|
||||
_EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(dbl_min, 0x0010000000000000LL);
|
||||
|
||||
Packet8d neg_half = pmul(_x, p8d_minus_half);
|
||||
|
||||
// select only the inverse sqrt of positive normal inputs (denormals are
|
||||
// flushed to zero and cause infs as well).
|
||||
__mmask8 le_zero_mask = _mm512_cmp_pd_mask(_x, p8d_dbl_min, _CMP_LT_OQ);
|
||||
Packet8d x = _mm512_mask_blend_pd(le_zero_mask, _mm512_setzero_pd(),
|
||||
_mm512_rsqrt14_pd(_x));
|
||||
|
||||
// Fill in NaNs and Infs for the negative/zero entries.
|
||||
__mmask8 neg_mask = _mm512_cmp_pd_mask(_x, _mm512_setzero_pd(), _CMP_LT_OQ);
|
||||
Packet8d infs_and_nans = _mm512_mask_blend_pd(
|
||||
neg_mask, p8d_nan,
|
||||
_mm512_mask_blend_pd(le_zero_mask, p8d_inf, _mm512_setzero_pd()));
|
||||
|
||||
// Do a first step of Newton's iteration.
|
||||
x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
|
||||
|
||||
// Do a second step of Newton's iteration.
|
||||
x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
|
||||
|
||||
// Insert NaNs and Infs in all the right places.
|
||||
return _mm512_mask_blend_pd(le_zero_mask, infs_and_nans, x);
|
||||
}
|
||||
#else
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16f prsqrt<Packet16f>(const Packet16f& x) {
|
||||
return _mm512_rsqrt28_ps(x);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // THIRD_PARTY_EIGEN3_EIGEN_SRC_CORE_ARCH_AVX512_MATHFUNCTIONS_H_
|
||||
1316
Eigen/src/Core/arch/AVX512/PacketMath.h
Normal file
1316
Eigen/src/Core/arch/AVX512/PacketMath.h
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,6 +0,0 @@
|
||||
FILE(GLOB Eigen_Core_arch_AltiVec_SRCS "*.h")
|
||||
|
||||
INSTALL(FILES
|
||||
${Eigen_Core_arch_AltiVec_SRCS}
|
||||
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/AltiVec COMPONENT Devel
|
||||
)
|
||||
@@ -2,6 +2,7 @@
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2010-2016 Konstantinos Margaritis <markos@freevec.org>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
@@ -14,19 +15,21 @@ namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
static Packet4ui p4ui_CONJ_XOR = vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_ZERO_);//{ 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
|
||||
#ifdef _BIG_ENDIAN
|
||||
static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2d_ZERO_, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
|
||||
static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_ZERO_, 8);//{ 0x8000000000000000, 0x0000000000000000 };
|
||||
static Packet4ui p4ui_CONJ_XOR = vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_MZERO);//{ 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
|
||||
#ifdef __VSX__
|
||||
#if defined(_BIG_ENDIAN)
|
||||
static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2d_MZERO, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
|
||||
static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_MZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
|
||||
#else
|
||||
static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_ZERO_, 8);//{ 0x8000000000000000, 0x0000000000000000 };
|
||||
static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2d_ZERO_, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
|
||||
static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_MZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
|
||||
static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2d_MZERO, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
|
||||
#endif
|
||||
#endif
|
||||
|
||||
//---------- float ----------
|
||||
struct Packet2cf
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf() {}
|
||||
EIGEN_STRONG_INLINE explicit Packet2cf() : v(p4f_ZERO) {}
|
||||
EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
|
||||
Packet4f v;
|
||||
};
|
||||
@@ -39,6 +42,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size = 2,
|
||||
HasHalfPacket = 0,
|
||||
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
@@ -49,6 +53,9 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
||||
HasAbs2 = 0,
|
||||
HasMin = 0,
|
||||
HasMax = 0,
|
||||
#ifdef __VSX__
|
||||
HasBlend = 1,
|
||||
#endif
|
||||
HasSetLinear = 0
|
||||
};
|
||||
};
|
||||
@@ -58,7 +65,6 @@ template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type;
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
|
||||
{
|
||||
Packet2cf res;
|
||||
/* On AltiVec we cannot load 64-bit registers, so wa have to take care of alignment */
|
||||
if((ptrdiff_t(&from) % 16) == 0)
|
||||
res.v = pload<Packet4f>((const float *)&from);
|
||||
else
|
||||
@@ -67,26 +73,32 @@ template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<flo
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(const std::complex<float>* from) { return Packet2cf(pload<Packet4f>((const float *) from)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { return Packet2cf(ploadu<Packet4f>((const float*) from)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { pstore((float*)to, from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { pstoreu((float*)to, from.v); }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
|
||||
{
|
||||
std::complex<float> EIGEN_ALIGN16 af[2];
|
||||
af[0] = from[0*stride];
|
||||
af[1] = from[1*stride];
|
||||
return Packet2cf(vec_ld(0, (const float*)af));
|
||||
return pload<Packet2cf>(af);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
|
||||
{
|
||||
std::complex<float> EIGEN_ALIGN16 af[2];
|
||||
vec_st(from.v, 0, (float*)af);
|
||||
pstore<std::complex<float> >((std::complex<float> *) af, from);
|
||||
to[0*stride] = af[0];
|
||||
to[1*stride] = af[1];
|
||||
}
|
||||
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_add(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_sub(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(a.v + b.v); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(a.v - b.v); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(a.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) { return Packet2cf((Packet4f)vec_xor((Packet4ui)a.v, p4ui_CONJ_XOR)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) { return Packet2cf(pxor<Packet4f>(a.v, reinterpret_cast<Packet4f>(p4ui_CONJ_XOR))); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
||||
{
|
||||
@@ -100,30 +112,19 @@ template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, con
|
||||
v1 = vec_madd(v1, b.v, p4f_ZERO);
|
||||
// multiply a_im * b and get the conjugate result
|
||||
v2 = vec_madd(v2, b.v, p4f_ZERO);
|
||||
v2 = (Packet4f) vec_xor((Packet4ui)v2, p4ui_CONJ_XOR);
|
||||
v2 = reinterpret_cast<Packet4f>(pxor(v2, reinterpret_cast<Packet4f>(p4ui_CONJ_XOR)));
|
||||
// permute back to a proper order
|
||||
v2 = vec_perm(v2, v2, p16uc_COMPLEX32_REV);
|
||||
|
||||
return Packet2cf(vec_add(v1, v2));
|
||||
return Packet2cf(padd<Packet4f>(v1, v2));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_and(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_or(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_xor(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_and(a.v, vec_nor(b.v,b.v))); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pand<Packet4f>(a.v, b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(por<Packet4f>(a.v, b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pxor<Packet4f>(a.v, b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pandnot<Packet4f>(a.v, b.v)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pload <Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>((const float*)from)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>((const float*)from)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from)
|
||||
{
|
||||
return pset1<Packet2cf>(*from);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { vec_dstt((float *)addr, DST_CTRL(2,2,32), DST_CHAN); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_PPC_PREFETCH(addr); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
|
||||
{
|
||||
@@ -143,23 +144,23 @@ template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
|
||||
{
|
||||
Packet4f b;
|
||||
b = (Packet4f) vec_sld(a.v, a.v, 8);
|
||||
b = padd(a.v, b);
|
||||
return pfirst(Packet2cf(b));
|
||||
b = vec_sld(a.v, a.v, 8);
|
||||
b = padd<Packet4f>(a.v, b);
|
||||
return pfirst<Packet2cf>(Packet2cf(b));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
|
||||
{
|
||||
Packet4f b1, b2;
|
||||
#ifdef _BIG_ENDIAN
|
||||
b1 = (Packet4f) vec_sld(vecs[0].v, vecs[1].v, 8);
|
||||
b2 = (Packet4f) vec_sld(vecs[1].v, vecs[0].v, 8);
|
||||
b1 = vec_sld(vecs[0].v, vecs[1].v, 8);
|
||||
b2 = vec_sld(vecs[1].v, vecs[0].v, 8);
|
||||
#else
|
||||
b1 = (Packet4f) vec_sld(vecs[1].v, vecs[0].v, 8);
|
||||
b2 = (Packet4f) vec_sld(vecs[0].v, vecs[1].v, 8);
|
||||
b1 = vec_sld(vecs[1].v, vecs[0].v, 8);
|
||||
b2 = vec_sld(vecs[0].v, vecs[1].v, 8);
|
||||
#endif
|
||||
b2 = (Packet4f) vec_sld(b2, b2, 8);
|
||||
b2 = padd(b1, b2);
|
||||
b2 = vec_sld(b2, b2, 8);
|
||||
b2 = padd<Packet4f>(b1, b2);
|
||||
|
||||
return Packet2cf(b2);
|
||||
}
|
||||
@@ -168,10 +169,10 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const P
|
||||
{
|
||||
Packet4f b;
|
||||
Packet2cf prod;
|
||||
b = (Packet4f) vec_sld(a.v, a.v, 8);
|
||||
prod = pmul(a, Packet2cf(b));
|
||||
b = vec_sld(a.v, a.v, 8);
|
||||
prod = pmul<Packet2cf>(a, Packet2cf(b));
|
||||
|
||||
return pfirst(prod);
|
||||
return pfirst<Packet2cf>(prod);
|
||||
}
|
||||
|
||||
template<int Offset>
|
||||
@@ -223,12 +224,30 @@ template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet4f, Packet2cf, false,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet4f& x, const Packet2cf& y, const Packet2cf& c) const
|
||||
{ return padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet4f& x, const Packet2cf& y) const
|
||||
{ return Packet2cf(internal::pmul<Packet4f>(x, y.v)); }
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet2cf, Packet4f, false,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet4f& y, const Packet2cf& c) const
|
||||
{ return padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& x, const Packet4f& y) const
|
||||
{ return Packet2cf(internal::pmul<Packet4f>(x.v, y)); }
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
||||
{
|
||||
// TODO optimize it for AltiVec
|
||||
Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
|
||||
Packet4f s = vec_madd(b.v, b.v, p4f_ZERO);
|
||||
return Packet2cf(pdiv(res.v, vec_add(s,vec_perm(s, s, p16uc_COMPLEX32_REV))));
|
||||
Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a, b);
|
||||
Packet4f s = pmul<Packet4f>(b.v, b.v);
|
||||
return Packet2cf(pdiv(res.v, padd<Packet4f>(s, vec_perm(s, s, p16uc_COMPLEX32_REV))));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& x)
|
||||
@@ -243,6 +262,14 @@ EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel)
|
||||
kernel.packet[0].v = tmp;
|
||||
}
|
||||
|
||||
#ifdef __VSX__
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) {
|
||||
Packet2cf result;
|
||||
result.v = reinterpret_cast<Packet4f>(pblend<Packet2d>(ifPacket, reinterpret_cast<Packet2d>(thenPacket.v), reinterpret_cast<Packet2d>(elsePacket.v)));
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
|
||||
//---------- double ----------
|
||||
#ifdef __VSX__
|
||||
struct Packet1cd
|
||||
@@ -277,10 +304,10 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
|
||||
|
||||
template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
|
||||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from) { return Packet1cd(pload<Packet2d>((const double*)from)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { return Packet1cd(ploadu<Packet2d>((const double*)from)); }
|
||||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { pstore((double*)to, from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { pstoreu((double*)to, from.v); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
|
||||
{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }
|
||||
@@ -300,10 +327,10 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1c
|
||||
to[1*stride] = af[1];
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_add(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_sub(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v + b.v); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v - b.v); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd((Packet2d)vec_xor((Packet2d)a.v, (Packet2d)p2ul_CONJ_XOR2)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd(pxor(a.v, reinterpret_cast<Packet2d>(p2ul_CONJ_XOR2))); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
||||
{
|
||||
@@ -317,23 +344,20 @@ template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, con
|
||||
v1 = vec_madd(a_re, b.v, p2d_ZERO);
|
||||
// multiply a_im * b and get the conjugate result
|
||||
v2 = vec_madd(a_im, b.v, p2d_ZERO);
|
||||
v2 = (Packet2d) vec_sld((Packet4ui)v2, (Packet4ui)v2, 8);
|
||||
v2 = (Packet2d) vec_xor((Packet2d)v2, (Packet2d) p2ul_CONJ_XOR1);
|
||||
v2 = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(v2), reinterpret_cast<Packet4ui>(v2), 8));
|
||||
v2 = pxor(v2, reinterpret_cast<Packet2d>(p2ul_CONJ_XOR1));
|
||||
|
||||
return Packet1cd(vec_add(v1, v2));
|
||||
return Packet1cd(padd<Packet2d>(v1, v2));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_or(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_xor(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v, vec_nor(b.v,b.v))); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pand(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(por(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pxor(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pandnot(a.v, b.v)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from)
|
||||
{
|
||||
return pset1<Packet1cd>(*from);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { vec_dstt((long *)addr, DST_CTRL(2,2,32), DST_CHAN); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { EIGEN_PPC_PREFETCH(addr); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
|
||||
{
|
||||
@@ -345,20 +369,10 @@ template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Pac
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a)
|
||||
{
|
||||
return pfirst(a);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs) { return vecs[0]; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs)
|
||||
{
|
||||
return vecs[0];
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a)
|
||||
{
|
||||
return pfirst(a);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
|
||||
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet1cd>
|
||||
@@ -402,13 +416,30 @@ template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
|
||||
return pconj(internal::pmul(a, b));
|
||||
}
|
||||
};
|
||||
template<> struct conj_helper<Packet2d, Packet1cd, false,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet2d& x, const Packet1cd& y, const Packet1cd& c) const
|
||||
{ return padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet2d& x, const Packet1cd& y) const
|
||||
{ return Packet1cd(internal::pmul<Packet2d>(x, y.v)); }
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet1cd, Packet2d, false,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet2d& y, const Packet1cd& c) const
|
||||
{ return padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& x, const Packet2d& y) const
|
||||
{ return Packet1cd(internal::pmul<Packet2d>(x.v, y)); }
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
||||
{
|
||||
// TODO optimize it for AltiVec
|
||||
Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
|
||||
Packet2d s = vec_madd(b.v, b.v, p2d_ZERO_);
|
||||
return Packet1cd(pdiv(res.v, vec_add(s,vec_perm(s, s, p16uc_REVERSE64))));
|
||||
Packet2d s = pmul<Packet2d>(b.v, b.v);
|
||||
return Packet1cd(pdiv(res.v, padd<Packet2d>(s, vec_perm(s, s, p16uc_REVERSE64))));
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
//
|
||||
// Copyright (C) 2007 Julien Pommier
|
||||
// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2016 Konstantinos Margaritis <markos@freevec.org>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
@@ -19,38 +20,81 @@ namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
|
||||
static _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
|
||||
static _EIGEN_DECLARE_CONST_Packet4i(23, 23);
|
||||
|
||||
static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask, ~0x7f800000);
|
||||
|
||||
/* the smallest non denormalized float number */
|
||||
static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos, 0x00800000);
|
||||
static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_inf, 0xff800000); // -1.f/0.f
|
||||
static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_nan, 0xffffffff);
|
||||
|
||||
/* natural logarithm computed for 4 simultaneous float
|
||||
return NaN for x <= 0
|
||||
*/
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524f);
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292E-2f);
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, - 1.1514610310E-1f);
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740E-1f);
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, - 1.2420140846E-1f);
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, + 1.4249322787E-1f);
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, - 1.6668057665E-1f);
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, + 2.0000714765E-1f);
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, - 2.4999993993E-1f);
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, + 3.3333331174E-1f);
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4f);
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375f);
|
||||
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647950f);
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f);
|
||||
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f);
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f);
|
||||
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4f);
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3f);
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3f);
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2f);
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f);
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f);
|
||||
|
||||
#ifdef __VSX__
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(1 , 1.0);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(2 , 2.0);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
|
||||
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(exp_hi, 709.437);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303);
|
||||
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
|
||||
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
|
||||
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
|
||||
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
|
||||
|
||||
#ifdef __POWER8_VECTOR__
|
||||
static Packet2l p2l_1023 = { 1023, 1023 };
|
||||
static Packet2ul p2ul_52 = { 52, 52 };
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4f plog<Packet4f>(const Packet4f& _x)
|
||||
{
|
||||
Packet4f x = _x;
|
||||
_EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
|
||||
_EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
|
||||
_EIGEN_DECLARE_CONST_Packet4i(23, 23);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask, ~0x7f800000);
|
||||
|
||||
/* the smallest non denormalized float number */
|
||||
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos, 0x00800000);
|
||||
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_inf, 0xff800000); // -1.f/0.f
|
||||
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_nan, 0xffffffff);
|
||||
|
||||
/* natural logarithm computed for 4 simultaneous float
|
||||
return NaN for x <= 0
|
||||
*/
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292E-2f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, - 1.1514610310E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, - 1.2420140846E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, + 1.4249322787E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, - 1.6668057665E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, + 2.0000714765E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, - 2.4999993993E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, + 3.3333331174E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375f);
|
||||
|
||||
|
||||
Packet4i emm0;
|
||||
|
||||
@@ -112,36 +156,17 @@ template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4f pexp<Packet4f>(const Packet4f& _x)
|
||||
{
|
||||
Packet4f x = _x;
|
||||
_EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
|
||||
_EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
|
||||
_EIGEN_DECLARE_CONST_Packet4i(23, 23);
|
||||
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647950f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f);
|
||||
|
||||
Packet4f tmp, fx;
|
||||
Packet4i emm0;
|
||||
|
||||
// clamp x
|
||||
x = vec_max(vec_min(x, p4f_exp_hi), p4f_exp_lo);
|
||||
x = pmax(pmin(x, p4f_exp_hi), p4f_exp_lo);
|
||||
|
||||
/* express exp(x) as exp(g + n*log(2)) */
|
||||
// express exp(x) as exp(g + n*log(2))
|
||||
fx = pmadd(x, p4f_cephes_LOG2EF, p4f_half);
|
||||
|
||||
fx = vec_floor(fx);
|
||||
fx = pfloor(fx);
|
||||
|
||||
tmp = pmul(fx, p4f_cephes_exp_C1);
|
||||
Packet4f z = pmul(fx, p4f_cephes_exp_C2);
|
||||
@@ -171,14 +196,44 @@ Packet4f pexp<Packet4f>(const Packet4f& _x)
|
||||
isnumber_mask);
|
||||
}
|
||||
|
||||
#ifndef EIGEN_COMP_CLANG
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4f prsqrt<Packet4f>(const Packet4f& x)
|
||||
{
|
||||
return vec_rsqrt(x);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __VSX__
|
||||
#ifndef EIGEN_COMP_CLANG
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet2d prsqrt<Packet2d>(const Packet2d& x)
|
||||
{
|
||||
return vec_rsqrt(x);
|
||||
}
|
||||
#endif
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4f psqrt<Packet4f>(const Packet4f& x)
|
||||
{
|
||||
return vec_sqrt(x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet2d psqrt<Packet2d>(const Packet2d& x)
|
||||
{
|
||||
return vec_sqrt(x);
|
||||
}
|
||||
|
||||
// VSX support varies between different compilers and even different
|
||||
// versions of the same compiler. For gcc version >= 4.9.3, we can use
|
||||
// vec_cts to efficiently convert Packet2d to Packet2l. Otherwise, use
|
||||
// a slow version that works with older compilers.
|
||||
// Update: apparently vec_cts/vec_ctf intrinsics for 64-bit doubles
|
||||
// are buggy, https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70963
|
||||
static inline Packet2l ConvertToPacket2l(const Packet2d& x) {
|
||||
#if EIGEN_GNUC_AT_LEAST(5, 0) || \
|
||||
(EIGEN_GNUC_AT(4, 9) && __GNUC_PATCHLEVEL__ >= 3)
|
||||
#if EIGEN_GNUC_AT_LEAST(5, 4) || \
|
||||
(EIGEN_GNUC_AT(6, 1) && __GNUC_PATCHLEVEL__ >= 1)
|
||||
return vec_cts(x, 0); // TODO: check clang version.
|
||||
#else
|
||||
double tmp[2];
|
||||
@@ -194,36 +249,16 @@ Packet2d pexp<Packet2d>(const Packet2d& _x)
|
||||
{
|
||||
Packet2d x = _x;
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet2d(1 , 1.0);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(2 , 2.0);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet2d(exp_hi, 709.437);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
|
||||
|
||||
Packet2d tmp, fx;
|
||||
Packet2l emm0;
|
||||
|
||||
// clamp x
|
||||
x = pmax(pmin(x, p2d_exp_hi), p2d_exp_lo);
|
||||
/* express exp(x) as exp(g + n*log(2)) */
|
||||
fx = pmadd(p2d_cephes_LOG2EF, x, p2d_half);
|
||||
|
||||
fx = vec_floor(fx);
|
||||
/* express exp(x) as exp(g + n*log(2)) */
|
||||
fx = pmadd(x, p2d_cephes_LOG2EF, p2d_half);
|
||||
|
||||
fx = pfloor(fx);
|
||||
|
||||
tmp = pmul(fx, p2d_cephes_exp_C1);
|
||||
Packet2d z = pmul(fx, p2d_cephes_exp_C2);
|
||||
@@ -249,9 +284,6 @@ Packet2d pexp<Packet2d>(const Packet2d& _x)
|
||||
emm0 = ConvertToPacket2l(fx);
|
||||
|
||||
#ifdef __POWER8_VECTOR__
|
||||
static const Packet2l p2l_1023 = { 1023, 1023 };
|
||||
static const Packet2ul p2ul_52 = { 52, 52 };
|
||||
|
||||
emm0 = vec_add(emm0, p2l_1023);
|
||||
emm0 = vec_sl(emm0, p2ul_52);
|
||||
#else
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008-2014 Konstantinos Margaritis <markos@freevec.org>
|
||||
// Copyright (C) 2008-2016 Konstantinos Margaritis <markos@freevec.org>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
@@ -42,7 +42,7 @@ typedef __vector unsigned char Packet16uc;
|
||||
// and it doesn't really work to declare them global, so we define macros instead
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_FAST_Packet4f(NAME,X) \
|
||||
Packet4f p4f_##NAME = (Packet4f) vec_splat_s32(X)
|
||||
Packet4f p4f_##NAME = reinterpret_cast<Packet4f>(vec_splat_s32(X))
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_FAST_Packet4i(NAME,X) \
|
||||
Packet4i p4i_##NAME = vec_splat_s32(X)
|
||||
@@ -69,13 +69,13 @@ typedef __vector unsigned char Packet16uc;
|
||||
// These constants are endian-agnostic
|
||||
static _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0); //{ 0.0, 0.0, 0.0, 0.0}
|
||||
static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0); //{ 0, 0, 0, 0,}
|
||||
#ifndef __VSX__
|
||||
static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE,1); //{ 1, 1, 1, 1}
|
||||
static Packet4f p4f_ONE = vec_ctf(p4i_ONE, 0); //{ 1.0, 1.0, 1.0, 1.0}
|
||||
#endif
|
||||
static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS16,-16); //{ -16, -16, -16, -16}
|
||||
static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS1,-1); //{ -1, -1, -1, -1}
|
||||
static Packet4f p4f_ZERO_ = (Packet4f) vec_sl((Packet4ui)p4i_MINUS1, (Packet4ui)p4i_MINUS1); //{ 0x80000000, 0x80000000, 0x80000000, 0x80000000}
|
||||
static Packet4f p4f_MZERO = (Packet4f) vec_sl((Packet4ui)p4i_MINUS1, (Packet4ui)p4i_MINUS1); //{ 0x80000000, 0x80000000, 0x80000000, 0x80000000}
|
||||
#ifndef __VSX__
|
||||
static Packet4f p4f_ONE = vec_ctf(p4i_ONE, 0); //{ 1.0, 1.0, 1.0, 1.0}
|
||||
#endif
|
||||
|
||||
static Packet4f p4f_COUNTDOWN = { 0.0, 1.0, 2.0, 3.0 };
|
||||
static Packet4i p4i_COUNTDOWN = { 0, 1, 2, 3 };
|
||||
@@ -95,8 +95,10 @@ static Packet16uc p16uc_DUPLICATE32_HI = { 0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7 };
|
||||
// Handle endianness properly while loading constants
|
||||
// Define global static constants:
|
||||
#ifdef _BIG_ENDIAN
|
||||
static Packet16uc p16uc_FORWARD = vec_lvsl(0, (float*)0);
|
||||
static Packet16uc p16uc_FORWARD = vec_lvsl(0, (float*)0);
|
||||
#ifdef __VSX__
|
||||
static Packet16uc p16uc_REVERSE64 = { 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
|
||||
#endif
|
||||
static Packet16uc p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };
|
||||
static Packet16uc p16uc_PSET32_WEVEN = vec_sld(p16uc_DUPLICATE32_HI, (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 };
|
||||
static Packet16uc p16uc_HALF64_0_16 = vec_sld((Packet16uc)p4i_ZERO, vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 3), 8); //{ 0,0,0,0, 0,0,0,0, 16,16,16,16, 16,16,16,16};
|
||||
@@ -110,8 +112,8 @@ static Packet16uc p16uc_HALF64_0_16 = vec_sld(vec_splat((Packet16uc) vec_abs(p4i
|
||||
|
||||
static Packet16uc p16uc_PSET64_HI = (Packet16uc) vec_mergeh((Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN); //{ 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 };
|
||||
static Packet16uc p16uc_PSET64_LO = (Packet16uc) vec_mergel((Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN); //{ 8,9,10,11, 12,13,14,15, 8,9,10,11, 12,13,14,15 };
|
||||
static Packet16uc p16uc_TRANSPOSE64_HI = vec_add(p16uc_PSET64_HI, p16uc_HALF64_0_16); //{ 0,1,2,3, 4,5,6,7, 16,17,18,19, 20,21,22,23};
|
||||
static Packet16uc p16uc_TRANSPOSE64_LO = vec_add(p16uc_PSET64_LO, p16uc_HALF64_0_16); //{ 8,9,10,11, 12,13,14,15, 24,25,26,27, 28,29,30,31};
|
||||
static Packet16uc p16uc_TRANSPOSE64_HI = p16uc_PSET64_HI + p16uc_HALF64_0_16; //{ 0,1,2,3, 4,5,6,7, 16,17,18,19, 20,21,22,23};
|
||||
static Packet16uc p16uc_TRANSPOSE64_LO = p16uc_PSET64_LO + p16uc_HALF64_0_16; //{ 8,9,10,11, 12,13,14,15, 24,25,26,27, 28,29,30,31};
|
||||
|
||||
static Packet16uc p16uc_COMPLEX32_REV = vec_sld(p16uc_REVERSE32, p16uc_REVERSE32, 8); //{ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11 };
|
||||
|
||||
@@ -121,6 +123,12 @@ static Packet16uc p16uc_COMPLEX32_REV2 = vec_sld(p16uc_FORWARD, p16uc_FORWARD, 8
|
||||
static Packet16uc p16uc_COMPLEX32_REV2 = vec_sld(p16uc_PSET64_HI, p16uc_PSET64_LO, 8); //{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
|
||||
#endif // _BIG_ENDIAN
|
||||
|
||||
#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC
|
||||
#define EIGEN_PPC_PREFETCH(ADDR) __builtin_prefetch(ADDR);
|
||||
#else
|
||||
#define EIGEN_PPC_PREFETCH(ADDR) asm( " dcbt [%[addr]]\n" :: [addr] "r" (ADDR) : "cc" );
|
||||
#endif
|
||||
|
||||
template<> struct packet_traits<float> : default_packet_traits
|
||||
{
|
||||
typedef Packet4f type;
|
||||
@@ -129,15 +137,35 @@ template<> struct packet_traits<float> : default_packet_traits
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size=4,
|
||||
HasHalfPacket=0,
|
||||
HasHalfPacket = 1,
|
||||
|
||||
// FIXME check the Has*
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasMul = 1,
|
||||
HasDiv = 1,
|
||||
HasMin = 1,
|
||||
HasMax = 1,
|
||||
HasAbs = 1,
|
||||
HasSin = 0,
|
||||
HasCos = 0,
|
||||
HasLog = 1,
|
||||
HasLog = 0,
|
||||
HasExp = 1,
|
||||
HasSqrt = 0
|
||||
#ifdef __VSX__
|
||||
HasSqrt = 1,
|
||||
#if !EIGEN_COMP_CLANG
|
||||
HasRsqrt = 1,
|
||||
#else
|
||||
HasRsqrt = 0,
|
||||
#endif
|
||||
#else
|
||||
HasSqrt = 0,
|
||||
HasRsqrt = 0,
|
||||
#endif
|
||||
HasRound = 1,
|
||||
HasFloor = 1,
|
||||
HasCeil = 1,
|
||||
HasNegate = 1,
|
||||
HasBlend = 1
|
||||
};
|
||||
};
|
||||
template<> struct packet_traits<int> : default_packet_traits
|
||||
@@ -145,10 +173,16 @@ template<> struct packet_traits<int> : default_packet_traits
|
||||
typedef Packet4i type;
|
||||
typedef Packet4i half;
|
||||
enum {
|
||||
// FIXME check the Has*
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size=4
|
||||
size = 4,
|
||||
HasHalfPacket = 0,
|
||||
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasMul = 1,
|
||||
HasDiv = 0,
|
||||
HasBlend = 1
|
||||
};
|
||||
};
|
||||
|
||||
@@ -200,41 +234,56 @@ inline std::ostream & operator <<(std::ostream & s, const Packet4ui & v)
|
||||
s << vt.n[0] << ", " << vt.n[1] << ", " << vt.n[2] << ", " << vt.n[3];
|
||||
return s;
|
||||
}
|
||||
/*
|
||||
inline std::ostream & operator <<(std::ostream & s, const Packetbi & v)
|
||||
{
|
||||
union {
|
||||
Packet4bi v;
|
||||
unsigned int n[4];
|
||||
} vt;
|
||||
vt.v = v;
|
||||
s << vt.n[0] << ", " << vt.n[1] << ", " << vt.n[2] << ", " << vt.n[3];
|
||||
return s;
|
||||
}*/
|
||||
|
||||
|
||||
// Need to define them first or we get specialization after instantiation errors
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from)
|
||||
{
|
||||
EIGEN_DEBUG_ALIGNED_LOAD
|
||||
#ifdef __VSX__
|
||||
return vec_vsx_ld(0, from);
|
||||
#else
|
||||
return vec_ld(0, from);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vec_st(from, 0, to); }
|
||||
template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vec_st(from, 0, to); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from)
|
||||
{
|
||||
EIGEN_DEBUG_ALIGNED_LOAD
|
||||
#ifdef __VSX__
|
||||
return vec_vsx_ld(0, from);
|
||||
#else
|
||||
return vec_ld(0, from);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from)
|
||||
{
|
||||
EIGEN_DEBUG_ALIGNED_STORE
|
||||
#ifdef __VSX__
|
||||
vec_vsx_st(from, 0, to);
|
||||
#else
|
||||
vec_st(from, 0, to);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from)
|
||||
{
|
||||
EIGEN_DEBUG_ALIGNED_STORE
|
||||
#ifdef __VSX__
|
||||
vec_vsx_st(from, 0, to);
|
||||
#else
|
||||
vec_st(from, 0, to);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) {
|
||||
// Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
|
||||
float EIGEN_ALIGN16 af[4];
|
||||
af[0] = from;
|
||||
Packet4f vc = pload<Packet4f>(af);
|
||||
vc = vec_splat(vc, 0);
|
||||
return vc;
|
||||
Packet4f v = {from, from, from, from};
|
||||
return v;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) {
|
||||
int EIGEN_ALIGN16 ai[4];
|
||||
ai[0] = from;
|
||||
Packet4i vc = pload<Packet4i>(ai);
|
||||
vc = vec_splat(vc, 0);
|
||||
return vc;
|
||||
Packet4i v = {from, from, from, from};
|
||||
return v;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE void
|
||||
pbroadcast4<Packet4f>(const float *a,
|
||||
@@ -294,58 +343,24 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const
|
||||
to[3*stride] = ai[3];
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) { return vec_add(pset1<Packet4f>(a), p4f_COUNTDOWN); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a) { return vec_add(pset1<Packet4i>(a), p4i_COUNTDOWN); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) { return pset1<Packet4f>(a) + p4f_COUNTDOWN; }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a) { return pset1<Packet4i>(a) + p4i_COUNTDOWN; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_add(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_add(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return a + b; }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return a + b; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_sub(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_sub(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return a - b; }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return a - b; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) { return psub<Packet4f>(p4f_ZERO, a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return psub<Packet4i>(p4i_ZERO, a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) { return p4f_ZERO - a; }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return p4i_ZERO - a; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_madd(a,b,p4f_ZERO); }
|
||||
/* Commented out: it's actually slower than processing it scalar
|
||||
*
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b)
|
||||
{
|
||||
// Detailed in: http://freevec.org/content/32bit_signed_integer_multiplication_altivec
|
||||
//Set up constants, variables
|
||||
Packet4i a1, b1, bswap, low_prod, high_prod, prod, prod_, v1sel;
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_madd(a,b, p4f_MZERO); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { return a * b; }
|
||||
|
||||
// Get the absolute values
|
||||
a1 = vec_abs(a);
|
||||
b1 = vec_abs(b);
|
||||
|
||||
// Get the signs using xor
|
||||
Packet4bi sgn = (Packet4bi) vec_cmplt(vec_xor(a, b), p4i_ZERO);
|
||||
|
||||
// Do the multiplication for the asbolute values.
|
||||
bswap = (Packet4i) vec_rl((Packet4ui) b1, (Packet4ui) p4i_MINUS16 );
|
||||
low_prod = vec_mulo((Packet8i) a1, (Packet8i)b1);
|
||||
high_prod = vec_msum((Packet8i) a1, (Packet8i) bswap, p4i_ZERO);
|
||||
high_prod = (Packet4i) vec_sl((Packet4ui) high_prod, (Packet4ui) p4i_MINUS16);
|
||||
prod = vec_add( low_prod, high_prod );
|
||||
|
||||
// NOR the product and select only the negative elements according to the sign mask
|
||||
prod_ = vec_nor(prod, prod);
|
||||
prod_ = vec_sel(p4i_ZERO, prod_, sgn);
|
||||
|
||||
// Add 1 to the result to get the negative numbers
|
||||
v1sel = vec_sel(p4i_ZERO, p4i_ONE, sgn);
|
||||
prod_ = vec_add(prod_, v1sel);
|
||||
|
||||
// Merge the results back to the final vector.
|
||||
prod = vec_sel(prod, prod_, sgn);
|
||||
|
||||
return prod;
|
||||
}
|
||||
*/
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
#ifndef __VSX__ // VSX actually provides a div instruction
|
||||
@@ -358,7 +373,7 @@ template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const
|
||||
t = vec_nmsub(y_0, b, p4f_ONE);
|
||||
y_1 = vec_madd(y_0, t, y_0);
|
||||
|
||||
return vec_madd(a, y_1, p4f_ZERO);
|
||||
return vec_madd(a, y_1, p4f_MZERO);
|
||||
#else
|
||||
return vec_div(a, b);
|
||||
#endif
|
||||
@@ -370,8 +385,8 @@ template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, co
|
||||
}
|
||||
|
||||
// for some weird raisons, it has to be overloaded for packet of integers
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_madd(a, b, c); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd(pmul(a,b), c); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_madd(a,b,c); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return a*b + c; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_min(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); }
|
||||
@@ -391,6 +406,10 @@ template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, vec_nor(b, b)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, vec_nor(b, b)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a) { return vec_round(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a) { return vec_ceil(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a) { return vec_floor(a); }
|
||||
|
||||
#ifdef _BIG_ENDIAN
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
|
||||
{
|
||||
@@ -418,12 +437,12 @@ template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
|
||||
// We also need ot redefine little endian loading of Packet4i/Packet4f using VSX
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
|
||||
{
|
||||
EIGEN_DEBUG_ALIGNED_LOAD
|
||||
EIGEN_DEBUG_UNALIGNED_LOAD
|
||||
return (Packet4i) vec_vsx_ld((long)from & 15, (const int*) _EIGEN_ALIGNED_PTR(from));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
|
||||
{
|
||||
EIGEN_DEBUG_ALIGNED_LOAD
|
||||
EIGEN_DEBUG_UNALIGNED_LOAD
|
||||
return (Packet4f) vec_vsx_ld((long)from & 15, (const float*) _EIGEN_ALIGNED_PTR(from));
|
||||
}
|
||||
#endif
|
||||
@@ -494,16 +513,19 @@ template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f&
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef __VSX__
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { vec_dstt(addr, DST_CTRL(2,2,32), DST_CHAN); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { vec_dstt(addr, DST_CTRL(2,2,32), DST_CHAN); }
|
||||
#endif
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { EIGEN_PPC_PREFETCH(addr); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { EIGEN_PPC_PREFETCH(addr); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vec_st(a, 0, x); return x[0]; }
|
||||
template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; vec_st(a, 0, x); return x[0]; }
|
||||
template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x; vec_ste(a, 0, &x); return x; }
|
||||
template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x; vec_ste(a, 0, &x); return x; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) { return (Packet4f)vec_perm((Packet16uc)a,(Packet16uc)a, p16uc_REVERSE32); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) { return (Packet4i)vec_perm((Packet16uc)a,(Packet16uc)a, p16uc_REVERSE32); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a)
|
||||
{
|
||||
return reinterpret_cast<Packet4f>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE32));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a)
|
||||
{
|
||||
return reinterpret_cast<Packet4i>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE32)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vec_abs(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vec_abs(a); }
|
||||
@@ -511,10 +533,10 @@ template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vec_abs
|
||||
template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet4f b, sum;
|
||||
b = (Packet4f) vec_sld(a, a, 8);
|
||||
sum = vec_add(a, b);
|
||||
b = (Packet4f) vec_sld(sum, sum, 4);
|
||||
sum = vec_add(sum, b);
|
||||
b = vec_sld(a, a, 8);
|
||||
sum = a + b;
|
||||
b = vec_sld(sum, sum, 4);
|
||||
sum += b;
|
||||
return pfirst(sum);
|
||||
}
|
||||
|
||||
@@ -537,11 +559,11 @@ template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
|
||||
|
||||
// Now do the summation:
|
||||
// Lines 0+1
|
||||
sum[0] = vec_add(sum[0], sum[1]);
|
||||
sum[0] = sum[0] + sum[1];
|
||||
// Lines 2+3
|
||||
sum[1] = vec_add(sum[2], sum[3]);
|
||||
sum[1] = sum[2] + sum[3];
|
||||
// Add the results
|
||||
sum[0] = vec_add(sum[0], sum[1]);
|
||||
sum[0] = sum[0] + sum[1];
|
||||
|
||||
return sum[0];
|
||||
}
|
||||
@@ -577,11 +599,11 @@ template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
|
||||
|
||||
// Now do the summation:
|
||||
// Lines 0+1
|
||||
sum[0] = vec_add(sum[0], sum[1]);
|
||||
sum[0] = sum[0] + sum[1];
|
||||
// Lines 2+3
|
||||
sum[1] = vec_add(sum[2], sum[3]);
|
||||
sum[1] = sum[2] + sum[3];
|
||||
// Add the results
|
||||
sum[0] = vec_add(sum[0], sum[1]);
|
||||
sum[0] = sum[0] + sum[1];
|
||||
|
||||
return sum[0];
|
||||
}
|
||||
@@ -591,8 +613,8 @@ template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
|
||||
template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet4f prod;
|
||||
prod = pmul(a, (Packet4f)vec_sld(a, a, 8));
|
||||
return pfirst(pmul(prod, (Packet4f)vec_sld(prod, prod, 4)));
|
||||
prod = pmul(a, vec_sld(a, a, 8));
|
||||
return pfirst(pmul(prod, vec_sld(prod, prod, 4)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
|
||||
@@ -716,33 +738,52 @@ ptranspose(PacketBlock<Packet4i,4>& kernel) {
|
||||
kernel.packet[3] = vec_mergel(t1, t3);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) {
|
||||
Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3] };
|
||||
Packet4ui mask = reinterpret_cast<Packet4ui>(vec_cmpeq(reinterpret_cast<Packet4ui>(select), reinterpret_cast<Packet4ui>(p4i_ONE)));
|
||||
return vec_sel(elsePacket, thenPacket, mask);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket) {
|
||||
Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3] };
|
||||
Packet4ui mask = reinterpret_cast<Packet4ui>(vec_cmpeq(reinterpret_cast<Packet4ui>(select), reinterpret_cast<Packet4ui>(p4i_ONE)));
|
||||
return vec_sel(elsePacket, thenPacket, mask);
|
||||
}
|
||||
|
||||
|
||||
//---------- double ----------
|
||||
#ifdef __VSX__
|
||||
typedef __vector double Packet2d;
|
||||
typedef __vector unsigned long long Packet2ul;
|
||||
typedef __vector long long Packet2l;
|
||||
|
||||
static Packet2l p2l_ZERO = (Packet2l) p4i_ZERO;
|
||||
static Packet2d p2d_ONE = { 1.0, 1.0 };
|
||||
static Packet2d p2d_ZERO = (Packet2d) p4f_ZERO;
|
||||
static Packet2d p2d_ZERO_ = { -0.0, -0.0 };
|
||||
|
||||
#ifdef _BIG_ENDIAN
|
||||
static Packet2d p2d_COUNTDOWN = (Packet2d) vec_sld((Packet16uc) p2d_ZERO, (Packet16uc) p2d_ONE, 8);
|
||||
#if EIGEN_COMP_CLANG
|
||||
typedef Packet2ul Packet2bl;
|
||||
#else
|
||||
static Packet2d p2d_COUNTDOWN = (Packet2d) vec_sld((Packet16uc) p2d_ONE, (Packet16uc) p2d_ZERO, 8);
|
||||
typedef __vector __bool long Packet2bl;
|
||||
#endif
|
||||
|
||||
static EIGEN_STRONG_INLINE Packet2d vec_splat_dbl(Packet2d& a, int index)
|
||||
static Packet2l p2l_ONE = { 1, 1 };
|
||||
static Packet2l p2l_ZERO = reinterpret_cast<Packet2l>(p4i_ZERO);
|
||||
static Packet2d p2d_ONE = { 1.0, 1.0 };
|
||||
static Packet2d p2d_ZERO = reinterpret_cast<Packet2d>(p4f_ZERO);
|
||||
static Packet2d p2d_MZERO = { -0.0, -0.0 };
|
||||
|
||||
#ifdef _BIG_ENDIAN
|
||||
static Packet2d p2d_COUNTDOWN = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(p2d_ZERO), reinterpret_cast<Packet4f>(p2d_ONE), 8));
|
||||
#else
|
||||
static Packet2d p2d_COUNTDOWN = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(p2d_ONE), reinterpret_cast<Packet4f>(p2d_ZERO), 8));
|
||||
#endif
|
||||
|
||||
template<int index> Packet2d vec_splat_dbl(Packet2d& a);
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d vec_splat_dbl<0>(Packet2d& a)
|
||||
{
|
||||
switch (index) {
|
||||
case 0:
|
||||
return (Packet2d) vec_perm(a, a, p16uc_PSET64_HI);
|
||||
case 1:
|
||||
return (Packet2d) vec_perm(a, a, p16uc_PSET64_LO);
|
||||
}
|
||||
return a;
|
||||
return reinterpret_cast<Packet2d>(vec_perm(a, a, p16uc_PSET64_HI));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d vec_splat_dbl<1>(Packet2d& a)
|
||||
{
|
||||
return reinterpret_cast<Packet2d>(vec_perm(a, a, p16uc_PSET64_LO));
|
||||
}
|
||||
|
||||
template<> struct packet_traits<double> : default_packet_traits
|
||||
@@ -753,16 +794,41 @@ template<> struct packet_traits<double> : default_packet_traits
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size=2,
|
||||
HasHalfPacket = 0,
|
||||
HasHalfPacket = 1,
|
||||
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasMul = 1,
|
||||
HasDiv = 1,
|
||||
HasMin = 1,
|
||||
HasMax = 1,
|
||||
HasAbs = 1,
|
||||
HasSin = 0,
|
||||
HasCos = 0,
|
||||
HasLog = 0,
|
||||
HasExp = 1,
|
||||
HasSqrt = 0
|
||||
HasSqrt = 1,
|
||||
HasRsqrt = 1,
|
||||
HasRound = 1,
|
||||
HasFloor = 1,
|
||||
HasCeil = 1,
|
||||
HasNegate = 1,
|
||||
HasBlend = 1
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
|
||||
|
||||
inline std::ostream & operator <<(std::ostream & s, const Packet2l & v)
|
||||
{
|
||||
union {
|
||||
Packet2l v;
|
||||
int64_t n[2];
|
||||
} vt;
|
||||
vt.v = v;
|
||||
s << vt.n[0] << ", " << vt.n[1];
|
||||
return s;
|
||||
}
|
||||
|
||||
inline std::ostream & operator <<(std::ostream & s, const Packet2d & v)
|
||||
{
|
||||
@@ -776,28 +842,43 @@ inline std::ostream & operator <<(std::ostream & s, const Packet2d & v)
|
||||
}
|
||||
|
||||
// Need to define them first or we get specialization after instantiation errors
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return (Packet2d) vec_ld(0, (const float *) from); } //FIXME
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from)
|
||||
{
|
||||
EIGEN_DEBUG_ALIGNED_LOAD
|
||||
#ifdef __VSX__
|
||||
return vec_vsx_ld(0, from);
|
||||
#else
|
||||
return vec_ld(0, from);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE vec_st((Packet4f)from, 0, (float *)to); }
|
||||
template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from)
|
||||
{
|
||||
EIGEN_DEBUG_ALIGNED_STORE
|
||||
#ifdef __VSX__
|
||||
vec_vsx_st(from, 0, to);
|
||||
#else
|
||||
vec_st(from, 0, to);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) {
|
||||
double EIGEN_ALIGN16 af[2];
|
||||
af[0] = from;
|
||||
Packet2d vc = pload<Packet2d>(af);
|
||||
vc = vec_splat_dbl(vc, 0);
|
||||
return vc;
|
||||
Packet2d v = {from, from};
|
||||
return v;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void
|
||||
pbroadcast4<Packet2d>(const double *a,
|
||||
Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)
|
||||
{
|
||||
a1 = pload<Packet2d>(a);
|
||||
a0 = vec_splat_dbl(a1, 0);
|
||||
a1 = vec_splat_dbl(a1, 1);
|
||||
a0 = vec_splat_dbl<0>(a1);
|
||||
a1 = vec_splat_dbl<1>(a1);
|
||||
a3 = pload<Packet2d>(a+2);
|
||||
a2 = vec_splat_dbl(a3, 0);
|
||||
a3 = vec_splat_dbl(a3, 1);
|
||||
a2 = vec_splat_dbl<0>(a3);
|
||||
a3 = vec_splat_dbl<1>(a3);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride)
|
||||
{
|
||||
double EIGEN_ALIGN16 af[2];
|
||||
@@ -812,17 +893,18 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to,
|
||||
to[0*stride] = af[0];
|
||||
to[1*stride] = af[1];
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) { return vec_add(pset1<Packet2d>(a), p2d_COUNTDOWN); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_add(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) { return pset1<Packet2d>(a) + p2d_COUNTDOWN; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_sub(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return a + b; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return psub<Packet2d>(p2d_ZERO, a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return a - b; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return p2d_ZERO - a; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_madd(a,b,p2d_ZERO); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_madd(a,b,p2d_MZERO); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_div(a,b); }
|
||||
|
||||
// for some weird raisons, it has to be overloaded for packet of integers
|
||||
@@ -840,17 +922,22 @@ template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, vec_nor(b, b)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a) { return vec_round(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a) { return vec_ceil(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) { return vec_floor(a); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from)
|
||||
{
|
||||
EIGEN_DEBUG_ALIGNED_LOAD
|
||||
return (Packet2d) vec_vsx_ld((long)from & 15, (const float*) _EIGEN_ALIGNED_PTR(from));
|
||||
return (Packet2d) vec_vsx_ld((long)from & 15, (const double*) _EIGEN_ALIGNED_PTR(from));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
|
||||
{
|
||||
Packet2d p;
|
||||
if((ptrdiff_t(from) % 16) == 0) p = pload<Packet2d>(from);
|
||||
else p = ploadu<Packet2d>(from);
|
||||
return vec_perm(p, p, p16uc_PSET64_HI);
|
||||
return vec_splat_dbl<0>(p);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from)
|
||||
@@ -859,32 +946,34 @@ template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d&
|
||||
vec_vsx_st((Packet4f)from, (long)to & 15, (float*) _EIGEN_ALIGNED_PTR(to));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { vec_dstt((const float *) addr, DST_CTRL(2,2,32), DST_CHAN); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { EIGEN_PPC_PREFETCH(addr); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { double EIGEN_ALIGN16 x[2]; pstore(x, a); return x[0]; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) { return (Packet2d)vec_perm((Packet16uc)a,(Packet16uc)a, p16uc_REVERSE64); }
|
||||
template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { double EIGEN_ALIGN16 x[2]; pstore<double>(x, a); return x[0]; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a)
|
||||
{
|
||||
return reinterpret_cast<Packet2d>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE64));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vec_abs(a); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
|
||||
{
|
||||
Packet2d b, sum;
|
||||
b = (Packet2d) vec_sld((Packet4ui) a, (Packet4ui)a, 8);
|
||||
sum = vec_add(a, b);
|
||||
return pfirst(sum);
|
||||
b = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(a), reinterpret_cast<Packet4f>(a), 8));
|
||||
sum = a + b;
|
||||
return pfirst<Packet2d>(sum);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
|
||||
{
|
||||
Packet2d v[2], sum;
|
||||
v[0] = vec_add(vecs[0], (Packet2d) vec_sld((Packet4ui) vecs[0], (Packet4ui) vecs[0], 8));
|
||||
v[1] = vec_add(vecs[1], (Packet2d) vec_sld((Packet4ui) vecs[1], (Packet4ui) vecs[1], 8));
|
||||
v[0] = vecs[0] + reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(vecs[0]), reinterpret_cast<Packet4f>(vecs[0]), 8));
|
||||
v[1] = vecs[1] + reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(vecs[1]), reinterpret_cast<Packet4f>(vecs[1]), 8));
|
||||
|
||||
#ifdef _BIG_ENDIAN
|
||||
sum = (Packet2d) vec_sld((Packet4ui) v[0], (Packet4ui) v[1], 8);
|
||||
sum = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(v[0]), reinterpret_cast<Packet4f>(v[1]), 8));
|
||||
#else
|
||||
sum = (Packet2d) vec_sld((Packet4ui) v[1], (Packet4ui) v[0], 8);
|
||||
sum = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(v[1]), reinterpret_cast<Packet4f>(v[0]), 8));
|
||||
#endif
|
||||
|
||||
return sum;
|
||||
@@ -893,19 +982,19 @@ template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
|
||||
// mul
|
||||
template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a)
|
||||
{
|
||||
return pfirst(pmul(a, (Packet2d)vec_sld((Packet4ui) a, (Packet4ui) a, 8)));
|
||||
return pfirst(pmul(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(a), reinterpret_cast<Packet4ui>(a), 8))));
|
||||
}
|
||||
|
||||
// min
|
||||
template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)
|
||||
{
|
||||
return pfirst(vec_min(a, (Packet2d) vec_sld((Packet4ui) a, (Packet4ui) a, 8)));
|
||||
return pfirst(pmin(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(a), reinterpret_cast<Packet4ui>(a), 8))));
|
||||
}
|
||||
|
||||
// max
|
||||
template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
|
||||
{
|
||||
return pfirst(vec_max(a, (Packet2d) vec_sld((Packet4ui) a, (Packet4ui) a, 8)));
|
||||
return pfirst(pmax(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(a), reinterpret_cast<Packet4ui>(a), 8))));
|
||||
}
|
||||
|
||||
template<int Offset>
|
||||
@@ -915,9 +1004,9 @@ struct palign_impl<Offset,Packet2d>
|
||||
{
|
||||
if (Offset == 1)
|
||||
#ifdef _BIG_ENDIAN
|
||||
first = (Packet2d) vec_sld((Packet4ui) first, (Packet4ui) second, 8);
|
||||
first = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(first), reinterpret_cast<Packet4ui>(second), 8));
|
||||
#else
|
||||
first = (Packet2d) vec_sld((Packet4ui) second, (Packet4ui) first, 8);
|
||||
first = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(second), reinterpret_cast<Packet4ui>(first), 8));
|
||||
#endif
|
||||
}
|
||||
};
|
||||
@@ -931,6 +1020,11 @@ ptranspose(PacketBlock<Packet2d,2>& kernel) {
|
||||
kernel.packet[1] = t1;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) {
|
||||
Packet2l select = { ifPacket.select[0], ifPacket.select[1] };
|
||||
Packet2bl mask = vec_cmpeq(reinterpret_cast<Packet2d>(select), reinterpret_cast<Packet2d>(p2l_ONE));
|
||||
return vec_sel(elsePacket, thenPacket, mask);
|
||||
}
|
||||
#endif // __VSX__
|
||||
} // end namespace internal
|
||||
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
ADD_SUBDIRECTORY(AltiVec)
|
||||
ADD_SUBDIRECTORY(AVX)
|
||||
ADD_SUBDIRECTORY(CUDA)
|
||||
ADD_SUBDIRECTORY(Default)
|
||||
ADD_SUBDIRECTORY(NEON)
|
||||
ADD_SUBDIRECTORY(SSE)
|
||||
|
||||
|
||||
|
||||
@@ -1,6 +0,0 @@
|
||||
FILE(GLOB Eigen_Core_arch_CUDA_SRCS "*.h")
|
||||
|
||||
INSTALL(FILES
|
||||
${Eigen_Core_arch_CUDA_SRCS}
|
||||
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/CUDA COMPONENT Devel
|
||||
)
|
||||
103
Eigen/src/Core/arch/CUDA/Complex.h
Normal file
103
Eigen/src/Core/arch/CUDA/Complex.h
Normal file
@@ -0,0 +1,103 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_COMPLEX_CUDA_H
|
||||
#define EIGEN_COMPLEX_CUDA_H
|
||||
|
||||
// clang-format off
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
#if defined(__CUDACC__) && defined(EIGEN_USE_GPU)
|
||||
|
||||
// Many std::complex methods such as operator+, operator-, operator* and
|
||||
// operator/ are not constexpr. Due to this, clang does not treat them as device
|
||||
// functions and thus Eigen functors making use of these operators fail to
|
||||
// compile. Here, we manually specialize these functors for complex types when
|
||||
// building for CUDA to avoid non-constexpr methods.
|
||||
|
||||
// Sum
|
||||
template<typename T> struct scalar_sum_op<const std::complex<T>, const std::complex<T> > : binary_op_base<const std::complex<T>, const std::complex<T> > {
|
||||
typedef typename std::complex<T> result_type;
|
||||
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
|
||||
return std::complex<T>(numext::real(a) + numext::real(b),
|
||||
numext::imag(a) + numext::imag(b));
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T> struct scalar_sum_op<std::complex<T>, std::complex<T> > : scalar_sum_op<const std::complex<T>, const std::complex<T> > {};
|
||||
|
||||
|
||||
// Difference
|
||||
template<typename T> struct scalar_difference_op<const std::complex<T>, const std::complex<T> > : binary_op_base<const std::complex<T>, const std::complex<T> > {
|
||||
typedef typename std::complex<T> result_type;
|
||||
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
|
||||
return std::complex<T>(numext::real(a) - numext::real(b),
|
||||
numext::imag(a) - numext::imag(b));
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T> struct scalar_difference_op<std::complex<T>, std::complex<T> > : scalar_difference_op<const std::complex<T>, const std::complex<T> > {};
|
||||
|
||||
|
||||
// Product
|
||||
template<typename T> struct scalar_product_op<const std::complex<T>, const std::complex<T> > : binary_op_base<const std::complex<T>, const std::complex<T> > {
|
||||
enum {
|
||||
Vectorizable = packet_traits<std::complex<T>>::HasMul
|
||||
};
|
||||
typedef typename std::complex<T> result_type;
|
||||
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
|
||||
const T a_real = numext::real(a);
|
||||
const T a_imag = numext::imag(a);
|
||||
const T b_real = numext::real(b);
|
||||
const T b_imag = numext::imag(b);
|
||||
return std::complex<T>(a_real * b_real - a_imag * b_imag,
|
||||
a_real * b_imag + a_imag * b_real);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T> struct scalar_product_op<std::complex<T>, std::complex<T> > : scalar_product_op<const std::complex<T>, const std::complex<T> > {};
|
||||
|
||||
|
||||
// Quotient
|
||||
template<typename T> struct scalar_quotient_op<const std::complex<T>, const std::complex<T> > : binary_op_base<const std::complex<T>, const std::complex<T> > {
|
||||
enum {
|
||||
Vectorizable = packet_traits<std::complex<T>>::HasDiv
|
||||
};
|
||||
typedef typename std::complex<T> result_type;
|
||||
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
|
||||
const T a_real = numext::real(a);
|
||||
const T a_imag = numext::imag(a);
|
||||
const T b_real = numext::real(b);
|
||||
const T b_imag = numext::imag(b);
|
||||
const T norm = T(1) / (b_real * b_real + b_imag * b_imag);
|
||||
return std::complex<T>((a_real * b_real + a_imag * b_imag) * norm,
|
||||
(a_imag * b_real - a_real * b_imag) * norm);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T> struct scalar_quotient_op<std::complex<T>, std::complex<T> > : scalar_quotient_op<const std::complex<T>, const std::complex<T> > {};
|
||||
|
||||
#endif
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_COMPLEX_CUDA_H
|
||||
585
Eigen/src/Core/arch/CUDA/Half.h
Normal file
585
Eigen/src/Core/arch/CUDA/Half.h
Normal file
@@ -0,0 +1,585 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
//
|
||||
// The conversion routines are Copyright (c) Fabian Giesen, 2016.
|
||||
// The original license follows:
|
||||
//
|
||||
// Copyright (c) Fabian Giesen, 2016
|
||||
// All rights reserved.
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted.
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
// Standard 16-bit float type, mostly useful for GPUs. Defines a new
|
||||
// type Eigen::half (inheriting from CUDA's __half struct) with
|
||||
// operator overloads such that it behaves basically as an arithmetic
|
||||
// type. It will be quite slow on CPUs (so it is recommended to stay
|
||||
// in fp32 for CPUs, except for simple parameter conversions, I/O
|
||||
// to disk and the likes), but fast on GPUs.
|
||||
|
||||
|
||||
#ifndef EIGEN_HALF_CUDA_H
|
||||
#define EIGEN_HALF_CUDA_H
|
||||
|
||||
#if __cplusplus > 199711L
|
||||
#define EIGEN_EXPLICIT_CAST(tgt_type) explicit operator tgt_type()
|
||||
#else
|
||||
#define EIGEN_EXPLICIT_CAST(tgt_type) operator tgt_type()
|
||||
#endif
|
||||
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
struct half;
|
||||
|
||||
namespace half_impl {
|
||||
|
||||
#if !defined(EIGEN_HAS_CUDA_FP16)
|
||||
|
||||
// Make our own __half definition that is similar to CUDA's.
|
||||
struct __half {
|
||||
EIGEN_DEVICE_FUNC __half() {}
|
||||
explicit EIGEN_DEVICE_FUNC __half(unsigned short raw) : x(raw) {}
|
||||
unsigned short x;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half raw_uint16_to_half(unsigned short x);
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff);
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half h);
|
||||
|
||||
struct half_base : public __half {
|
||||
EIGEN_DEVICE_FUNC half_base() {}
|
||||
EIGEN_DEVICE_FUNC half_base(const half_base& h) : __half(h) {}
|
||||
EIGEN_DEVICE_FUNC half_base(const __half& h) : __half(h) {}
|
||||
};
|
||||
|
||||
} // namespace half_impl
|
||||
|
||||
// Class definition.
|
||||
struct half : public half_impl::half_base {
|
||||
#if !defined(EIGEN_HAS_CUDA_FP16)
|
||||
typedef half_impl::__half __half;
|
||||
#endif
|
||||
|
||||
EIGEN_DEVICE_FUNC half() {}
|
||||
|
||||
EIGEN_DEVICE_FUNC half(const __half& h) : half_impl::half_base(h) {}
|
||||
EIGEN_DEVICE_FUNC half(const half& h) : half_impl::half_base(h) {}
|
||||
|
||||
explicit EIGEN_DEVICE_FUNC half(bool b)
|
||||
: half_impl::half_base(half_impl::raw_uint16_to_half(b ? 0x3c00 : 0)) {}
|
||||
template<class T>
|
||||
explicit EIGEN_DEVICE_FUNC half(const T& val)
|
||||
: half_impl::half_base(half_impl::float_to_half_rtne(static_cast<float>(val))) {}
|
||||
explicit EIGEN_DEVICE_FUNC half(float f)
|
||||
: half_impl::half_base(half_impl::float_to_half_rtne(f)) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(bool) const {
|
||||
// +0.0 and -0.0 become false, everything else becomes true.
|
||||
return (x & 0x7fff) != 0;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(signed char) const {
|
||||
return static_cast<signed char>(half_impl::half_to_float(*this));
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned char) const {
|
||||
return static_cast<unsigned char>(half_impl::half_to_float(*this));
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(short) const {
|
||||
return static_cast<short>(half_impl::half_to_float(*this));
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned short) const {
|
||||
return static_cast<unsigned short>(half_impl::half_to_float(*this));
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(int) const {
|
||||
return static_cast<int>(half_impl::half_to_float(*this));
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned int) const {
|
||||
return static_cast<unsigned int>(half_impl::half_to_float(*this));
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(long) const {
|
||||
return static_cast<long>(half_impl::half_to_float(*this));
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned long) const {
|
||||
return static_cast<unsigned long>(half_impl::half_to_float(*this));
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(long long) const {
|
||||
return static_cast<long long>(half_impl::half_to_float(*this));
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned long long) const {
|
||||
return static_cast<unsigned long long>(half_to_float(*this));
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(float) const {
|
||||
return half_impl::half_to_float(*this);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(double) const {
|
||||
return static_cast<double>(half_impl::half_to_float(*this));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC half& operator=(const half& other) {
|
||||
x = other.x;
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
namespace half_impl {
|
||||
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
|
||||
|
||||
// Intrinsics for native fp16 support. Note that on current hardware,
|
||||
// these are no faster than fp32 arithmetic (you need to use the half2
|
||||
// versions to get the ALU speed increased), but you do save the
|
||||
// conversion steps back and forth.
|
||||
|
||||
__device__ half operator + (const half& a, const half& b) {
|
||||
return __hadd(a, b);
|
||||
}
|
||||
__device__ half operator * (const half& a, const half& b) {
|
||||
return __hmul(a, b);
|
||||
}
|
||||
__device__ half operator - (const half& a, const half& b) {
|
||||
return __hsub(a, b);
|
||||
}
|
||||
__device__ half operator / (const half& a, const half& b) {
|
||||
float num = __half2float(a);
|
||||
float denom = __half2float(b);
|
||||
return __float2half(num / denom);
|
||||
}
|
||||
__device__ half operator - (const half& a) {
|
||||
return __hneg(a);
|
||||
}
|
||||
__device__ half& operator += (half& a, const half& b) {
|
||||
a = a + b;
|
||||
return a;
|
||||
}
|
||||
__device__ half& operator *= (half& a, const half& b) {
|
||||
a = a * b;
|
||||
return a;
|
||||
}
|
||||
__device__ half& operator -= (half& a, const half& b) {
|
||||
a = a - b;
|
||||
return a;
|
||||
}
|
||||
__device__ half& operator /= (half& a, const half& b) {
|
||||
a = a / b;
|
||||
return a;
|
||||
}
|
||||
__device__ bool operator == (const half& a, const half& b) {
|
||||
return __heq(a, b);
|
||||
}
|
||||
__device__ bool operator != (const half& a, const half& b) {
|
||||
return __hne(a, b);
|
||||
}
|
||||
__device__ bool operator < (const half& a, const half& b) {
|
||||
return __hlt(a, b);
|
||||
}
|
||||
__device__ bool operator <= (const half& a, const half& b) {
|
||||
return __hle(a, b);
|
||||
}
|
||||
__device__ bool operator > (const half& a, const half& b) {
|
||||
return __hgt(a, b);
|
||||
}
|
||||
__device__ bool operator >= (const half& a, const half& b) {
|
||||
return __hge(a, b);
|
||||
}
|
||||
|
||||
#else // Emulate support for half floats
|
||||
|
||||
// Definitions for CPUs and older CUDA, mostly working through conversion
|
||||
// to/from fp32.
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator + (const half& a, const half& b) {
|
||||
return half(float(a) + float(b));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator * (const half& a, const half& b) {
|
||||
return half(float(a) * float(b));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator - (const half& a, const half& b) {
|
||||
return half(float(a) - float(b));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator / (const half& a, const half& b) {
|
||||
return half(float(a) / float(b));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator - (const half& a) {
|
||||
half result;
|
||||
result.x = a.x ^ 0x8000;
|
||||
return result;
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator += (half& a, const half& b) {
|
||||
a = half(float(a) + float(b));
|
||||
return a;
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator *= (half& a, const half& b) {
|
||||
a = half(float(a) * float(b));
|
||||
return a;
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator -= (half& a, const half& b) {
|
||||
a = half(float(a) - float(b));
|
||||
return a;
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator /= (half& a, const half& b) {
|
||||
a = half(float(a) / float(b));
|
||||
return a;
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator == (const half& a, const half& b) {
|
||||
return float(a) == float(b);
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator != (const half& a, const half& b) {
|
||||
return float(a) != float(b);
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator < (const half& a, const half& b) {
|
||||
return float(a) < float(b);
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator <= (const half& a, const half& b) {
|
||||
return float(a) <= float(b);
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator > (const half& a, const half& b) {
|
||||
return float(a) > float(b);
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator >= (const half& a, const half& b) {
|
||||
return float(a) >= float(b);
|
||||
}
|
||||
|
||||
#endif // Emulate support for half floats
|
||||
|
||||
// Division by an index. Do it in full float precision to avoid accuracy
|
||||
// issues in converting the denominator to half.
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator / (const half& a, Index b) {
|
||||
return half(static_cast<float>(a) / static_cast<float>(b));
|
||||
}
|
||||
|
||||
// Conversion routines, including fallbacks for the host or older CUDA.
|
||||
// Note that newer Intel CPUs (Haswell or newer) have vectorized versions of
|
||||
// these in hardware. If we need more performance on older/other CPUs, they are
|
||||
// also possible to vectorize directly.
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half raw_uint16_to_half(unsigned short x) {
|
||||
__half h;
|
||||
h.x = x;
|
||||
return h;
|
||||
}
|
||||
|
||||
union FP32 {
|
||||
unsigned int u;
|
||||
float f;
|
||||
};
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff) {
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
||||
return __float2half(ff);
|
||||
|
||||
#elif defined(EIGEN_HAS_FP16_C)
|
||||
__half h;
|
||||
h.x = _cvtss_sh(ff, 0);
|
||||
return h;
|
||||
|
||||
#else
|
||||
FP32 f; f.f = ff;
|
||||
|
||||
const FP32 f32infty = { 255 << 23 };
|
||||
const FP32 f16max = { (127 + 16) << 23 };
|
||||
const FP32 denorm_magic = { ((127 - 15) + (23 - 10) + 1) << 23 };
|
||||
unsigned int sign_mask = 0x80000000u;
|
||||
__half o;
|
||||
o.x = static_cast<unsigned short>(0x0u);
|
||||
|
||||
unsigned int sign = f.u & sign_mask;
|
||||
f.u ^= sign;
|
||||
|
||||
// NOTE all the integer compares in this function can be safely
|
||||
// compiled into signed compares since all operands are below
|
||||
// 0x80000000. Important if you want fast straight SSE2 code
|
||||
// (since there's no unsigned PCMPGTD).
|
||||
|
||||
if (f.u >= f16max.u) { // result is Inf or NaN (all exponent bits set)
|
||||
o.x = (f.u > f32infty.u) ? 0x7e00 : 0x7c00; // NaN->qNaN and Inf->Inf
|
||||
} else { // (De)normalized number or zero
|
||||
if (f.u < (113 << 23)) { // resulting FP16 is subnormal or zero
|
||||
// use a magic value to align our 10 mantissa bits at the bottom of
|
||||
// the float. as long as FP addition is round-to-nearest-even this
|
||||
// just works.
|
||||
f.f += denorm_magic.f;
|
||||
|
||||
// and one integer subtract of the bias later, we have our final float!
|
||||
o.x = static_cast<unsigned short>(f.u - denorm_magic.u);
|
||||
} else {
|
||||
unsigned int mant_odd = (f.u >> 13) & 1; // resulting mantissa is odd
|
||||
|
||||
// update exponent, rounding bias part 1
|
||||
f.u += ((unsigned int)(15 - 127) << 23) + 0xfff;
|
||||
// rounding bias part 2
|
||||
f.u += mant_odd;
|
||||
// take the bits!
|
||||
o.x = static_cast<unsigned short>(f.u >> 13);
|
||||
}
|
||||
}
|
||||
|
||||
o.x |= static_cast<unsigned short>(sign >> 16);
|
||||
return o;
|
||||
#endif
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half h) {
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
||||
return __half2float(h);
|
||||
|
||||
#elif defined(EIGEN_HAS_FP16_C)
|
||||
return _cvtsh_ss(h.x);
|
||||
|
||||
#else
|
||||
const FP32 magic = { 113 << 23 };
|
||||
const unsigned int shifted_exp = 0x7c00 << 13; // exponent mask after shift
|
||||
FP32 o;
|
||||
|
||||
o.u = (h.x & 0x7fff) << 13; // exponent/mantissa bits
|
||||
unsigned int exp = shifted_exp & o.u; // just the exponent
|
||||
o.u += (127 - 15) << 23; // exponent adjust
|
||||
|
||||
// handle exponent special cases
|
||||
if (exp == shifted_exp) { // Inf/NaN?
|
||||
o.u += (128 - 16) << 23; // extra exp adjust
|
||||
} else if (exp == 0) { // Zero/Denormal?
|
||||
o.u += 1 << 23; // extra exp adjust
|
||||
o.f -= magic.f; // renormalize
|
||||
}
|
||||
|
||||
o.u |= (h.x & 0x8000) << 16; // sign bit
|
||||
return o.f;
|
||||
#endif
|
||||
}
|
||||
|
||||
// --- standard functions ---
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isinf)(const half& a) {
|
||||
return (a.x & 0x7fff) == 0x7c00;
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isnan)(const half& a) {
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
|
||||
return __hisnan(a);
|
||||
#else
|
||||
return (a.x & 0x7fff) > 0x7c00;
|
||||
#endif
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isfinite)(const half& a) {
|
||||
return !(isinf EIGEN_NOT_A_MACRO (a)) && !(isnan EIGEN_NOT_A_MACRO (a));
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half abs(const half& a) {
|
||||
half result;
|
||||
result.x = a.x & 0x7FFF;
|
||||
return result;
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half exp(const half& a) {
|
||||
return half(::expf(float(a)));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log(const half& a) {
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
|
||||
return Eigen::half(::hlog(a));
|
||||
#else
|
||||
return half(::logf(float(a)));
|
||||
#endif
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log1p(const half& a) {
|
||||
return half(numext::log1p(float(a)));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log10(const half& a) {
|
||||
return half(::log10f(float(a)));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sqrt(const half& a) {
|
||||
return half(::sqrtf(float(a)));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half pow(const half& a, const half& b) {
|
||||
return half(::powf(float(a), float(b)));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sin(const half& a) {
|
||||
return half(::sinf(float(a)));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half cos(const half& a) {
|
||||
return half(::cosf(float(a)));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tan(const half& a) {
|
||||
return half(::tanf(float(a)));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tanh(const half& a) {
|
||||
return half(::tanhf(float(a)));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half floor(const half& a) {
|
||||
return half(::floorf(float(a)));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half ceil(const half& a) {
|
||||
return half(::ceilf(float(a)));
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (min)(const half& a, const half& b) {
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
|
||||
return __hlt(b, a) ? b : a;
|
||||
#else
|
||||
const float f1 = static_cast<float>(a);
|
||||
const float f2 = static_cast<float>(b);
|
||||
return f2 < f1 ? b : a;
|
||||
#endif
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (max)(const half& a, const half& b) {
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
|
||||
return __hlt(a, b) ? b : a;
|
||||
#else
|
||||
const float f1 = static_cast<float>(a);
|
||||
const float f2 = static_cast<float>(b);
|
||||
return f1 < f2 ? b : a;
|
||||
#endif
|
||||
}
|
||||
|
||||
EIGEN_ALWAYS_INLINE std::ostream& operator << (std::ostream& os, const half& v) {
|
||||
os << static_cast<float>(v);
|
||||
return os;
|
||||
}
|
||||
|
||||
} // end namespace half_impl
|
||||
|
||||
// import Eigen::half_impl::half into Eigen namespace
|
||||
// using half_impl::half;
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<>
|
||||
struct random_default_impl<half, false, false>
|
||||
{
|
||||
static inline half run(const half& x, const half& y)
|
||||
{
|
||||
return x + (y-x) * half(float(std::rand()) / float(RAND_MAX));
|
||||
}
|
||||
static inline half run()
|
||||
{
|
||||
return run(half(-1.f), half(1.f));
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct is_arithmetic<half> { enum { value = true }; };
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
template<> struct NumTraits<Eigen::half>
|
||||
: GenericNumTraits<Eigen::half>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half epsilon() {
|
||||
return half_impl::raw_uint16_to_half(0x0800);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half dummy_precision() { return Eigen::half(1e-2f); }
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half highest() {
|
||||
return half_impl::raw_uint16_to_half(0x7bff);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half lowest() {
|
||||
return half_impl::raw_uint16_to_half(0xfbff);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half infinity() {
|
||||
return half_impl::raw_uint16_to_half(0x7c00);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half quiet_NaN() {
|
||||
return half_impl::raw_uint16_to_half(0x7c01);
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
// C-like standard mathematical functions and trancendentals.
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half fabsh(const Eigen::half& a) {
|
||||
Eigen::half result;
|
||||
result.x = a.x & 0x7FFF;
|
||||
return result;
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half exph(const Eigen::half& a) {
|
||||
return Eigen::half(::expf(float(a)));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half logh(const Eigen::half& a) {
|
||||
#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
|
||||
return Eigen::half(::hlog(a));
|
||||
#else
|
||||
return Eigen::half(::logf(float(a)));
|
||||
#endif
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half sqrth(const Eigen::half& a) {
|
||||
return Eigen::half(::sqrtf(float(a)));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half powh(const Eigen::half& a, const Eigen::half& b) {
|
||||
return Eigen::half(::powf(float(a), float(b)));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half floorh(const Eigen::half& a) {
|
||||
return Eigen::half(::floorf(float(a)));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half ceilh(const Eigen::half& a) {
|
||||
return Eigen::half(::ceilf(float(a)));
|
||||
}
|
||||
|
||||
namespace std {
|
||||
|
||||
#if __cplusplus > 199711L
|
||||
template <>
|
||||
struct hash<Eigen::half> {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t operator()(const Eigen::half& a) const {
|
||||
return static_cast<std::size_t>(a.x);
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
} // end namespace std
|
||||
|
||||
|
||||
// Add the missing shfl_xor intrinsic
|
||||
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
||||
__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_xor(Eigen::half var, int laneMask, int width=warpSize) {
|
||||
return static_cast<Eigen::half>(__shfl_xor(static_cast<float>(var), laneMask, width));
|
||||
}
|
||||
#endif
|
||||
|
||||
// ldg() has an overload for __half, but we also need one for Eigen::half.
|
||||
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half __ldg(const Eigen::half* ptr) {
|
||||
return Eigen::half_impl::raw_uint16_to_half(
|
||||
__ldg(reinterpret_cast<const unsigned short*>(ptr)));
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__CUDA_ARCH__)
|
||||
namespace Eigen {
|
||||
namespace numext {
|
||||
|
||||
template<>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
bool (isnan)(const Eigen::half& h) {
|
||||
return (half_impl::isnan)(h);
|
||||
}
|
||||
|
||||
template<>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
bool (isinf)(const Eigen::half& h) {
|
||||
return (half_impl::isinf)(h);
|
||||
}
|
||||
|
||||
template<>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
bool (isfinite)(const Eigen::half& h) {
|
||||
return (half_impl::isfinite)(h);
|
||||
}
|
||||
|
||||
} // namespace Eigen
|
||||
} // namespace numext
|
||||
#endif
|
||||
|
||||
#endif // EIGEN_HALF_CUDA_H
|
||||
@@ -27,9 +27,22 @@ float4 plog<float4>(const float4& a)
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
double2 plog<double2>(const double2& a)
|
||||
{
|
||||
using ::log;
|
||||
return make_double2(log(a.x), log(a.y));
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
float4 plog1p<float4>(const float4& a)
|
||||
{
|
||||
return make_float4(log1pf(a.x), log1pf(a.y), log1pf(a.z), log1pf(a.w));
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
double2 plog1p<double2>(const double2& a)
|
||||
{
|
||||
return make_double2(log1p(a.x), log1p(a.y));
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
float4 pexp<float4>(const float4& a)
|
||||
{
|
||||
@@ -39,6 +52,7 @@ float4 pexp<float4>(const float4& a)
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
double2 pexp<double2>(const double2& a)
|
||||
{
|
||||
using ::exp;
|
||||
return make_double2(exp(a.x), exp(a.y));
|
||||
}
|
||||
|
||||
@@ -51,6 +65,7 @@ float4 psqrt<float4>(const float4& a)
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
double2 psqrt<double2>(const double2& a)
|
||||
{
|
||||
using ::sqrt;
|
||||
return make_double2(sqrt(a.x), sqrt(a.y));
|
||||
}
|
||||
|
||||
@@ -66,42 +81,6 @@ double2 prsqrt<double2>(const double2& a)
|
||||
return make_double2(rsqrt(a.x), rsqrt(a.y));
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
float4 plgamma<float4>(const float4& a)
|
||||
{
|
||||
return make_float4(lgammaf(a.x), lgammaf(a.y), lgammaf(a.z), lgammaf(a.w));
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
double2 plgamma<double2>(const double2& a)
|
||||
{
|
||||
return make_double2(lgamma(a.x), lgamma(a.y));
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
float4 perf<float4>(const float4& a)
|
||||
{
|
||||
return make_float4(erf(a.x), erf(a.y), erf(a.z), erf(a.w));
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
double2 perf<double2>(const double2& a)
|
||||
{
|
||||
return make_double2(erf(a.x), erf(a.y));
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
float4 perfc<float4>(const float4& a)
|
||||
{
|
||||
return make_float4(erfc(a.x), erfc(a.y), erfc(a.z), erfc(a.w));
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
double2 perfc<double2>(const double2& a)
|
||||
{
|
||||
return make_double2(erfc(a.x), erfc(a.y));
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@@ -21,7 +21,6 @@ namespace internal {
|
||||
template<> struct is_arithmetic<float4> { enum { value = true }; };
|
||||
template<> struct is_arithmetic<double2> { enum { value = true }; };
|
||||
|
||||
|
||||
template<> struct packet_traits<float> : default_packet_traits
|
||||
{
|
||||
typedef float4 type;
|
||||
@@ -40,8 +39,14 @@ template<> struct packet_traits<float> : default_packet_traits
|
||||
HasSqrt = 1,
|
||||
HasRsqrt = 1,
|
||||
HasLGamma = 1,
|
||||
HasDiGamma = 1,
|
||||
HasZeta = 1,
|
||||
HasPolygamma = 1,
|
||||
HasErf = 1,
|
||||
HasErfc = 1,
|
||||
HasIGamma = 1,
|
||||
HasIGammac = 1,
|
||||
HasBetaInc = 1,
|
||||
|
||||
HasBlend = 0,
|
||||
};
|
||||
@@ -63,8 +68,14 @@ template<> struct packet_traits<double> : default_packet_traits
|
||||
HasSqrt = 1,
|
||||
HasRsqrt = 1,
|
||||
HasLGamma = 1,
|
||||
HasDiGamma = 1,
|
||||
HasZeta = 1,
|
||||
HasPolygamma = 1,
|
||||
HasErf = 1,
|
||||
HasErfc = 1,
|
||||
HasIGamma = 1,
|
||||
HasIGammac = 1,
|
||||
HasBetaInc = 1,
|
||||
|
||||
HasBlend = 0,
|
||||
};
|
||||
@@ -183,25 +194,39 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<double>(double* to
|
||||
to[1] = from.y;
|
||||
}
|
||||
|
||||
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
|
||||
template<>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Aligned>(const float* from) {
|
||||
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
|
||||
return __ldg((const float4*)from);
|
||||
#else
|
||||
return make_float4(from[0], from[1], from[2], from[3]);
|
||||
#endif
|
||||
}
|
||||
template<>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Aligned>(const double* from) {
|
||||
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
|
||||
return __ldg((const double2*)from);
|
||||
#else
|
||||
return make_double2(from[0], from[1]);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Unaligned>(const float* from) {
|
||||
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
|
||||
return make_float4(__ldg(from+0), __ldg(from+1), __ldg(from+2), __ldg(from+3));
|
||||
#else
|
||||
return make_float4(from[0], from[1], from[2], from[3]);
|
||||
#endif
|
||||
}
|
||||
template<>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Unaligned>(const double* from) {
|
||||
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
|
||||
return make_double2(__ldg(from+0), __ldg(from+1));
|
||||
}
|
||||
#else
|
||||
return make_double2(from[0], from[1]);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline float4 pgather<float, float4>(const float* from, Index stride) {
|
||||
return make_float4(from[0*stride], from[1*stride], from[2*stride], from[3*stride]);
|
||||
@@ -264,7 +289,6 @@ template<> EIGEN_DEVICE_FUNC inline double2 pabs<double2>(const double2& a) {
|
||||
return make_double2(fabs(a.x), fabs(a.y));
|
||||
}
|
||||
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<float4,4>& kernel) {
|
||||
double tmp = kernel.packet[0].y;
|
||||
|
||||
1123
Eigen/src/Core/arch/CUDA/PacketMathHalf.h
Normal file
1123
Eigen/src/Core/arch/CUDA/PacketMathHalf.h
Normal file
File diff suppressed because it is too large
Load Diff
212
Eigen/src/Core/arch/CUDA/TypeCasting.h
Normal file
212
Eigen/src/Core/arch/CUDA/TypeCasting.h
Normal file
@@ -0,0 +1,212 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_TYPE_CASTING_CUDA_H
|
||||
#define EIGEN_TYPE_CASTING_CUDA_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<>
|
||||
struct scalar_cast_op<float, Eigen::half> {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
|
||||
typedef Eigen::half result_type;
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half operator() (const float& a) const {
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
||||
return __float2half(a);
|
||||
#else
|
||||
return Eigen::half(a);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct functor_traits<scalar_cast_op<float, Eigen::half> >
|
||||
{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
|
||||
|
||||
|
||||
template<>
|
||||
struct scalar_cast_op<int, Eigen::half> {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
|
||||
typedef Eigen::half result_type;
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half operator() (const int& a) const {
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
||||
return __float2half(static_cast<float>(a));
|
||||
#else
|
||||
return Eigen::half(static_cast<float>(a));
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct functor_traits<scalar_cast_op<int, Eigen::half> >
|
||||
{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
|
||||
|
||||
|
||||
template<>
|
||||
struct scalar_cast_op<Eigen::half, float> {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
|
||||
typedef float result_type;
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float operator() (const Eigen::half& a) const {
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
||||
return __half2float(a);
|
||||
#else
|
||||
return static_cast<float>(a);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct functor_traits<scalar_cast_op<Eigen::half, float> >
|
||||
{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
|
||||
|
||||
|
||||
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
||||
|
||||
template <>
|
||||
struct type_casting_traits<Eigen::half, float> {
|
||||
enum {
|
||||
VectorizedCast = 1,
|
||||
SrcCoeffRatio = 2,
|
||||
TgtCoeffRatio = 1
|
||||
};
|
||||
};
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<half2, float4>(const half2& a, const half2& b) {
|
||||
float2 r1 = __half22float2(a);
|
||||
float2 r2 = __half22float2(b);
|
||||
return make_float4(r1.x, r1.y, r2.x, r2.y);
|
||||
}
|
||||
|
||||
template <>
|
||||
struct type_casting_traits<float, Eigen::half> {
|
||||
enum {
|
||||
VectorizedCast = 1,
|
||||
SrcCoeffRatio = 1,
|
||||
TgtCoeffRatio = 2
|
||||
};
|
||||
};
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcast<float4, half2>(const float4& a) {
|
||||
// Simply discard the second half of the input
|
||||
return __floats2half2_rn(a.x, a.y);
|
||||
}
|
||||
|
||||
#elif defined EIGEN_VECTORIZE_AVX512
|
||||
template <>
|
||||
struct type_casting_traits<half, float> {
|
||||
enum {
|
||||
VectorizedCast = 1,
|
||||
SrcCoeffRatio = 1,
|
||||
TgtCoeffRatio = 1
|
||||
};
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet16f pcast<Packet16h, Packet16f>(const Packet16h& a) {
|
||||
return half2float(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
struct type_casting_traits<float, half> {
|
||||
enum {
|
||||
VectorizedCast = 1,
|
||||
SrcCoeffRatio = 1,
|
||||
TgtCoeffRatio = 1
|
||||
};
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet16h pcast<Packet16f, Packet16h>(const Packet16f& a) {
|
||||
return float2half(a);
|
||||
}
|
||||
|
||||
#elif defined EIGEN_VECTORIZE_AVX
|
||||
|
||||
template <>
|
||||
struct type_casting_traits<Eigen::half, float> {
|
||||
enum {
|
||||
VectorizedCast = 1,
|
||||
SrcCoeffRatio = 1,
|
||||
TgtCoeffRatio = 1
|
||||
};
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8h, Packet8f>(const Packet8h& a) {
|
||||
return half2float(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
struct type_casting_traits<float, Eigen::half> {
|
||||
enum {
|
||||
VectorizedCast = 1,
|
||||
SrcCoeffRatio = 1,
|
||||
TgtCoeffRatio = 1
|
||||
};
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8h pcast<Packet8f, Packet8h>(const Packet8f& a) {
|
||||
return float2half(a);
|
||||
}
|
||||
|
||||
// Disable the following code since it's broken on too many platforms / compilers.
|
||||
//#elif defined(EIGEN_VECTORIZE_SSE) && (!EIGEN_ARCH_x86_64) && (!EIGEN_COMP_MSVC)
|
||||
#elif 0
|
||||
|
||||
template <>
|
||||
struct type_casting_traits<Eigen::half, float> {
|
||||
enum {
|
||||
VectorizedCast = 1,
|
||||
SrcCoeffRatio = 1,
|
||||
TgtCoeffRatio = 1
|
||||
};
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4h, Packet4f>(const Packet4h& a) {
|
||||
__int64_t a64 = _mm_cvtm64_si64(a.x);
|
||||
Eigen::half h = raw_uint16_to_half(static_cast<unsigned short>(a64));
|
||||
float f1 = static_cast<float>(h);
|
||||
h = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 16));
|
||||
float f2 = static_cast<float>(h);
|
||||
h = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 32));
|
||||
float f3 = static_cast<float>(h);
|
||||
h = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 48));
|
||||
float f4 = static_cast<float>(h);
|
||||
return _mm_set_ps(f4, f3, f2, f1);
|
||||
}
|
||||
|
||||
template <>
|
||||
struct type_casting_traits<float, Eigen::half> {
|
||||
enum {
|
||||
VectorizedCast = 1,
|
||||
SrcCoeffRatio = 1,
|
||||
TgtCoeffRatio = 1
|
||||
};
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4h pcast<Packet4f, Packet4h>(const Packet4f& a) {
|
||||
EIGEN_ALIGN16 float aux[4];
|
||||
pstore(aux, a);
|
||||
Eigen::half h0(aux[0]);
|
||||
Eigen::half h1(aux[1]);
|
||||
Eigen::half h2(aux[2]);
|
||||
Eigen::half h3(aux[3]);
|
||||
|
||||
Packet4h result;
|
||||
result.x = _mm_set_pi16(h3.x, h2.x, h1.x, h0.x);
|
||||
return result;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_TYPE_CASTING_CUDA_H
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user