mirror of
https://gitlab.com/libeigen/eigen.git
synced 2026-04-10 11:34:33 +08:00
Compare commits
921 Commits
before-eva
...
3.2.7
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b9827c495e | ||
|
|
6056f4404c | ||
|
|
efd484546e | ||
|
|
a92681e0d2 | ||
|
|
47592d31ea | ||
|
|
1a9dda6bfd | ||
|
|
4c1a2b5614 | ||
|
|
c308cb1b24 | ||
|
|
85e9e6e780 | ||
|
|
c030925a66 | ||
|
|
fd074be1a0 | ||
|
|
e685bd7f46 | ||
|
|
e82f507747 | ||
|
|
1eea595550 | ||
|
|
d0980c7706 | ||
|
|
9055400f3d | ||
|
|
acb3c60295 | ||
|
|
f8b88d21a6 | ||
|
|
89a222ce50 | ||
|
|
960ec7aef2 | ||
|
|
e8bd2d49b3 | ||
|
|
f444996a7a | ||
|
|
a7c2e62a52 | ||
|
|
9ff967199a | ||
|
|
dc0ef2cbed | ||
|
|
7aa90a3b0f | ||
|
|
56488ddc0f | ||
|
|
165b69ca74 | ||
|
|
7abf6d02db | ||
|
|
73cb54835c | ||
|
|
cfe315476f | ||
|
|
f1583e86f6 | ||
|
|
4bd69750ed | ||
|
|
d40e32c94e | ||
|
|
a0bf1b4242 | ||
|
|
cf645db95b | ||
|
|
13135a82bd | ||
|
|
769cb99845 | ||
|
|
ba9add3c59 | ||
|
|
ddfb72a92f | ||
|
|
8c7e281c9e | ||
|
|
66c092e44e | ||
|
|
3ec6d38f35 | ||
|
|
96f64441f7 | ||
|
|
5af4d77511 | ||
|
|
88ac8ffad5 | ||
|
|
edb0183e0c | ||
|
|
befa141699 | ||
|
|
5c70b43abd | ||
|
|
6a3797f46f | ||
|
|
c4432aad15 | ||
|
|
ea0168c5a5 | ||
|
|
05fad4959a | ||
|
|
98eedb0c9a | ||
|
|
71424c4bf8 | ||
|
|
e59b246b08 | ||
|
|
4aa7038074 | ||
|
|
d9c80169e0 | ||
|
|
b514c943c7 | ||
|
|
8ba643a903 | ||
|
|
595c00157c | ||
|
|
1c6b224fb3 | ||
|
|
2361ec9c0e | ||
|
|
fcd213a297 | ||
|
|
37ed0d991a | ||
|
|
62b08cf9f9 | ||
|
|
46f011466b | ||
|
|
f600bdd76b | ||
|
|
421aa4f358 | ||
|
|
554356b034 | ||
|
|
97119f854f | ||
|
|
51ab034f63 | ||
|
|
0ebce69424 | ||
|
|
a748673bbb | ||
|
|
8597ee502b | ||
|
|
ac66f1c73d | ||
|
|
b392e6b21c | ||
|
|
e88aaae5f4 | ||
|
|
2d217a60a7 | ||
|
|
ef1439252c | ||
|
|
847bb317cd | ||
|
|
62d334c7d3 | ||
|
|
7713b29084 | ||
|
|
a08df3ff34 | ||
|
|
5bb9459124 | ||
|
|
80fd8fab87 | ||
|
|
84eeabd223 | ||
|
|
058fa781d7 | ||
|
|
b03209a7a6 | ||
|
|
71590d0ac7 | ||
|
|
1e1b4b6678 | ||
|
|
2e3353634f | ||
|
|
2461531e5a | ||
|
|
a68917594b | ||
|
|
3b93b1afb3 | ||
|
|
0fb74c1f8b | ||
|
|
bf650a3686 | ||
|
|
8fa951e31d | ||
|
|
1b64edbfd4 | ||
|
|
c74284ed81 | ||
|
|
b09316fbea | ||
|
|
c5fc8e6bdc | ||
|
|
88c844ae2f | ||
|
|
500c36de61 | ||
|
|
26234720bd | ||
|
|
0e38796e1c | ||
|
|
a2d9a4806a | ||
|
|
a72bf09e6d | ||
|
|
bb3e5c29cc | ||
|
|
81b3d29b26 | ||
|
|
f0b1b1df9b | ||
|
|
e061b7a538 | ||
|
|
8768ff3c31 | ||
|
|
77af14fb62 | ||
|
|
64b29e06b9 | ||
|
|
1c0e8bcf09 | ||
|
|
69fa405096 | ||
|
|
0f464d9d87 | ||
|
|
470d26d580 | ||
|
|
4dded73227 | ||
|
|
953d5ccfd5 | ||
|
|
98604576d1 | ||
|
|
45cbb0bbb1 | ||
|
|
cc641aabb7 | ||
|
|
aa6c516ec1 | ||
|
|
b39413794e | ||
|
|
bd511dde9d | ||
|
|
e2cfddf75f | ||
|
|
0927801a84 | ||
|
|
e972b55ec4 | ||
|
|
fc202bab39 | ||
|
|
fe51319980 | ||
|
|
0918c51e60 | ||
|
|
409547a0c8 | ||
|
|
4470c99975 | ||
|
|
6620aaa4b3 | ||
|
|
f669f5656a | ||
|
|
029d236ceb | ||
|
|
fe25f3b8e3 | ||
|
|
ceb4c9c10b | ||
|
|
cc5d7ff523 | ||
|
|
d771295554 | ||
|
|
fefec723aa | ||
|
|
780b2422e2 | ||
|
|
c21e45fbc5 | ||
|
|
057cfd2f02 | ||
|
|
114e863f08 | ||
|
|
410895a7e4 | ||
|
|
4716c2c666 | ||
|
|
91fe3a3004 | ||
|
|
84bba80916 | ||
|
|
91953d2d37 | ||
|
|
7b35b4cacc | ||
|
|
c3f3580b8f | ||
|
|
deecff97ed | ||
|
|
c6e8caf090 | ||
|
|
d10d6a40dd | ||
|
|
87629cd639 | ||
|
|
bde98df03f | ||
|
|
d4ec48575e | ||
|
|
554aa9b31d | ||
|
|
3af29caae8 | ||
|
|
f2ff8c091e | ||
|
|
f3be317614 | ||
|
|
08081f8293 | ||
|
|
7838fda82c | ||
|
|
3ba6647398 | ||
|
|
01f7918788 | ||
|
|
b50ffaddf2 | ||
|
|
74e460b995 | ||
|
|
c03c73c9b7 | ||
|
|
668518aed6 | ||
|
|
c739102ef9 | ||
|
|
2559fa9b0f | ||
|
|
dcb2a8b184 | ||
|
|
a8f2c6eec7 | ||
|
|
b1eca55328 | ||
|
|
f9931a0392 | ||
|
|
ebdf6a2dbb | ||
|
|
f64045a060 | ||
|
|
590f4b0aa3 | ||
|
|
5ef95fabee | ||
|
|
0f21613698 | ||
|
|
340b8afb14 | ||
|
|
9f99f61e69 | ||
|
|
759bd92a85 | ||
|
|
f89ba2a58b | ||
|
|
f1092d2f73 | ||
|
|
8296c4aaed | ||
|
|
9d82f7e30d | ||
|
|
e896c0ade7 | ||
|
|
5a6ea4edf6 | ||
|
|
b613173350 | ||
|
|
a727a2c4ed | ||
|
|
9dfdbd7e56 | ||
|
|
46fc881e4a | ||
|
|
c6eb84aabc | ||
|
|
e1f1091fde | ||
|
|
638c6948d7 | ||
|
|
e6952a51ba | ||
|
|
b9d314ae19 | ||
|
|
0039cd9cf9 | ||
|
|
14f537c296 | ||
|
|
641e824c56 | ||
|
|
b5124e7cfd | ||
|
|
54e3633b43 | ||
|
|
f697df7237 | ||
|
|
6559d09c60 | ||
|
|
8a382aa119 | ||
|
|
703c526355 | ||
|
|
4cdf3fe427 | ||
|
|
0feff6e987 | ||
|
|
cd679f2c47 | ||
|
|
f074d43f4b | ||
|
|
1ac8600126 | ||
|
|
378bdfb7f0 | ||
|
|
0526dc1bb4 | ||
|
|
1a36590e84 | ||
|
|
7e0b6c56b4 | ||
|
|
b12dd1ae3c | ||
|
|
71676eaddd | ||
|
|
0a0ab6dd15 | ||
|
|
5692723c58 | ||
|
|
8f4b8d204b | ||
|
|
3bd2b41b2e | ||
|
|
4928ea1212 | ||
|
|
b00fe1590d | ||
|
|
c94174b4fe | ||
|
|
91dd53e54d | ||
|
|
279786e987 | ||
|
|
699c80e404 | ||
|
|
ae4644cc68 | ||
|
|
36f7c1337f | ||
|
|
5023afc0af | ||
|
|
63974bcb88 | ||
|
|
79f4a59ed9 | ||
|
|
9f98650d0a | ||
|
|
8638dbb809 | ||
|
|
db5b0741b5 | ||
|
|
8efa5bb439 | ||
|
|
f5f6e2c6f4 | ||
|
|
a5a3a994c8 | ||
|
|
25c7d9164f | ||
|
|
ba44761435 | ||
|
|
1a96594607 | ||
|
|
61db9a0e89 | ||
|
|
bc129ad79c | ||
|
|
f5328be65a | ||
|
|
735f1fda39 | ||
|
|
57ab550a17 | ||
|
|
e887c61b3d | ||
|
|
b8d9eaa19b | ||
|
|
f806c23012 | ||
|
|
99501a2c4c | ||
|
|
7dad5f797e | ||
|
|
dcad508986 | ||
|
|
26977e281e | ||
|
|
1e109e1757 | ||
|
|
e469ac55c3 | ||
|
|
874f345562 | ||
|
|
608733415a | ||
|
|
57ec399ec9 | ||
|
|
d85abc89c5 | ||
|
|
309620ee1f | ||
|
|
56ca44ad1a | ||
|
|
e8cdbedefb | ||
|
|
6ccf97f3e6 | ||
|
|
433bce5c3a | ||
|
|
775f7e5fbb | ||
|
|
a819fa148d | ||
|
|
1a8dc85142 | ||
|
|
4974d1d2b4 | ||
|
|
e2f3e4e4aa | ||
|
|
b26e697182 | ||
|
|
b1f9f603a0 | ||
|
|
5384e89147 | ||
|
|
8518ba0bbc | ||
|
|
80cae358b0 | ||
|
|
0efaff9b3b | ||
|
|
41a20994cc | ||
|
|
a910a7466e | ||
|
|
4371911861 | ||
|
|
5fc4ce6449 | ||
|
|
77294047d6 | ||
|
|
bea36925db | ||
|
|
7f7a712062 | ||
|
|
30c849669d | ||
|
|
e0a8615b94 | ||
|
|
8efd9142b3 | ||
|
|
80ed5bd90c | ||
|
|
eb3695d2fc | ||
|
|
48db34a7b9 | ||
|
|
da584912b6 | ||
|
|
509e4ddc02 | ||
|
|
b33cf92878 | ||
|
|
1d3c8306f8 | ||
|
|
ec785b0180 | ||
|
|
eeabf7975e | ||
|
|
c2d1074932 | ||
|
|
722916e19d | ||
|
|
cb37f818ca | ||
|
|
9a06a71627 | ||
|
|
4577bafb91 | ||
|
|
739ed32222 | ||
|
|
58f0647f96 | ||
|
|
d0c3fcd382 | ||
|
|
19e16fe15f | ||
|
|
8f87be9e03 | ||
|
|
58725ff08c | ||
|
|
15bff016d1 | ||
|
|
c6fefe5d8e | ||
|
|
ee06f78679 | ||
|
|
9ea09179b5 | ||
|
|
b1789c112b | ||
|
|
2dde63499c | ||
|
|
7f2c6ed2fa | ||
|
|
c5a3777666 | ||
|
|
0833b82efd | ||
|
|
85c3389b28 | ||
|
|
67fcf47ecb | ||
|
|
fcecafde3a | ||
|
|
d62bfe73a9 | ||
|
|
bc99c5f7db | ||
|
|
1946cc4478 | ||
|
|
5e62427e22 | ||
|
|
4ec2f07a5b | ||
|
|
883168ed94 | ||
|
|
e5f134006b | ||
|
|
d2fc597d5b | ||
|
|
3d25b1f5b8 | ||
|
|
acecb7b09f | ||
|
|
21c0a2ce0c | ||
|
|
debc97821c | ||
|
|
e2e7ba9f85 | ||
|
|
bd2d330b25 | ||
|
|
79225db0b6 | ||
|
|
c426054767 | ||
|
|
1fa793cb97 | ||
|
|
04ffb9956e | ||
|
|
94ed7c81e6 | ||
|
|
fcb3573d17 | ||
|
|
fae4fd7a26 | ||
|
|
cf09c5f687 | ||
|
|
b508619392 | ||
|
|
0f65f2762d | ||
|
|
87524922dc | ||
|
|
a303b6a733 | ||
|
|
fe57b2f963 | ||
|
|
c12b7896d0 | ||
|
|
973e6a035f | ||
|
|
84aaa03182 | ||
|
|
aa5f79206f | ||
|
|
b4a9b3f496 | ||
|
|
d04f23260d | ||
|
|
8838b0a1ff | ||
|
|
f786897e4b | ||
|
|
7acd38d19e | ||
|
|
b50e5bc816 | ||
|
|
a370b1f2e2 | ||
|
|
a13bc22204 | ||
|
|
4b7c3abbea | ||
|
|
feacfa5f83 | ||
|
|
8472e697ca | ||
|
|
65af852b54 | ||
|
|
ae697b471c | ||
|
|
94e47798f4 | ||
|
|
d853adffdb | ||
|
|
880e72c130 | ||
|
|
bfdd9f3ac9 | ||
|
|
c566cfe2ba | ||
|
|
dba55041ab | ||
|
|
fd1aaf4772 | ||
|
|
c806009453 | ||
|
|
2cc41dbe83 | ||
|
|
c26e8a1af3 | ||
|
|
0ec1fc9e11 | ||
|
|
99d75235a9 | ||
|
|
4c70b0a762 | ||
|
|
d3f52debc6 | ||
|
|
0219f8aed4 | ||
|
|
2ed1838aeb | ||
|
|
4b36c3591f | ||
|
|
a991f94c0e | ||
|
|
498b7eed25 | ||
|
|
767424af18 | ||
|
|
a80e17cfe8 | ||
|
|
349c2c9235 | ||
|
|
48d537f59f | ||
|
|
538c059aa4 | ||
|
|
a48b82eece | ||
|
|
ccd70ba123 | ||
|
|
44beee9d68 | ||
|
|
0a07ac574e | ||
|
|
6c047d398d | ||
|
|
4b886e6b39 | ||
|
|
5741349294 | ||
|
|
118b1113d9 | ||
|
|
503c176d8e | ||
|
|
dbdd8b0883 | ||
|
|
d44d432baa | ||
|
|
893bfcf95f | ||
|
|
fb53ff1eda | ||
|
|
7a17639953 | ||
|
|
bbce6fa65d | ||
|
|
95a430a2ca | ||
|
|
152f3218ac | ||
|
|
af2e5995e2 | ||
|
|
1269392822 | ||
|
|
b7271dffb5 | ||
|
|
8b2afe33a1 | ||
|
|
5cc23199be | ||
|
|
7caaf6453b | ||
|
|
1c236f4c9a | ||
|
|
1fa6fe2abd | ||
|
|
9d3c69952b | ||
|
|
5180bb5e47 | ||
|
|
0187504912 | ||
|
|
6d26deb894 | ||
|
|
81517eebc1 | ||
|
|
12d59465cb | ||
|
|
e404841235 | ||
|
|
15c946338f | ||
|
|
56a0bbbbee | ||
|
|
842e31cf5c | ||
|
|
abd3502e9e | ||
|
|
76c3cf6949 | ||
|
|
bc34ee3365 | ||
|
|
9a04cd307c | ||
|
|
f0a62c90bc | ||
|
|
2946992ad4 | ||
|
|
821ff0ecfb | ||
|
|
2c4cace56c | ||
|
|
8a502233d8 | ||
|
|
b2755edcdd | ||
|
|
d1ef3c3546 | ||
|
|
edaefeb978 | ||
|
|
3bd31e21b5 | ||
|
|
75e269c77b | ||
|
|
74cde0c925 | ||
|
|
ce2035af86 | ||
|
|
6d0f0b8cec | ||
|
|
4ba8aa1482 | ||
|
|
27d6b4daf9 | ||
|
|
446001ef51 | ||
|
|
13cbc751c9 | ||
|
|
421feea3b2 | ||
|
|
7817bc19a4 | ||
|
|
eb13ada3aa | ||
|
|
36448c9e28 | ||
|
|
de0d8a010e | ||
|
|
72569f17ec | ||
|
|
3878e6f170 | ||
|
|
ff46ec0f24 | ||
|
|
ae514ddfe5 | ||
|
|
f9d6d3780f | ||
|
|
abba11bdcf | ||
|
|
d9e0336a78 | ||
|
|
333905b0c2 | ||
|
|
5fa69422a2 | ||
|
|
de38ff2499 | ||
|
|
60e093a9dc | ||
|
|
56408504e4 | ||
|
|
974fe38ca3 | ||
|
|
c0205ca4af | ||
|
|
10f8aabb61 | ||
|
|
80de35b6c5 | ||
|
|
60663a510a | ||
|
|
03dd4dd91a | ||
|
|
0a18eecab3 | ||
|
|
7b044c0ead | ||
|
|
755e77266f | ||
|
|
07c5500d70 | ||
|
|
e70506dd8f | ||
|
|
2ae20d558b | ||
|
|
62bce6e5e6 | ||
|
|
060e835ee9 | ||
|
|
0ca43f7e9a | ||
|
|
8b3be4907d | ||
|
|
0bf5894861 | ||
|
|
e44d78dab3 | ||
|
|
c2f66c65aa | ||
|
|
125619146b | ||
|
|
341ae8665d | ||
|
|
fc23e93707 | ||
|
|
0f0580b97c | ||
|
|
486ca277a0 | ||
|
|
10a79ca3a3 | ||
|
|
466d6d41c6 | ||
|
|
8514179aa3 | ||
|
|
0403d49006 | ||
|
|
c83e01f2d6 | ||
|
|
26db954776 | ||
|
|
fda680f9cf | ||
|
|
dfc54e1bbf | ||
|
|
749b56f6af | ||
|
|
af9c9f7706 | ||
|
|
470aa15c35 | ||
|
|
188a13f9fe | ||
|
|
efdff15749 | ||
|
|
dacd39ea76 | ||
|
|
74db22455a | ||
|
|
b23556bbbd | ||
|
|
1abe4ed14c | ||
|
|
d43f737b4a | ||
|
|
f50548e86a | ||
|
|
15bad3670b | ||
|
|
8846aa6d1b | ||
|
|
80993b95d3 | ||
|
|
b24fe22b1a | ||
|
|
c82dc227f1 | ||
|
|
a96f3d629c | ||
|
|
47829e2d16 | ||
|
|
1f398dfc82 | ||
|
|
b3a0365429 | ||
|
|
72c4f8ca8f | ||
|
|
8754341848 | ||
|
|
daad9585a3 | ||
|
|
b051bbd64f | ||
|
|
b3d63b4db2 | ||
|
|
1c4b69c5fb | ||
|
|
8a74ce922c | ||
|
|
863b7362bc | ||
|
|
1bf3b34849 | ||
|
|
f9580a3473 | ||
|
|
fbb53b6cbb | ||
|
|
85c7659574 | ||
|
|
bc065c75d2 | ||
|
|
e6cc24cbd6 | ||
|
|
7ff266e3ce | ||
|
|
b4a709520d | ||
|
|
c1d0f15bde | ||
|
|
124d12a915 | ||
|
|
f29dbec321 | ||
|
|
2959045f2f | ||
|
|
36fffe48f7 | ||
|
|
fb5c1e9097 | ||
|
|
3d298da269 | ||
|
|
9ac3c821ea | ||
|
|
33c702c79f | ||
|
|
756292f8aa | ||
|
|
8c8db49331 | ||
|
|
eeb43f9e2b | ||
|
|
16047c8d4a | ||
|
|
916ef48846 | ||
|
|
f1d8c13dbc | ||
|
|
9faad2932f | ||
|
|
f8fad09301 | ||
|
|
72e7529708 | ||
|
|
1aa2bf8274 | ||
|
|
b1892ab14d | ||
|
|
439feca139 | ||
|
|
4dd55a2958 | ||
|
|
f25338f4d7 | ||
|
|
51357a6622 | ||
|
|
107bb308c3 | ||
|
|
c2ff44cbf3 | ||
|
|
2a3c3c49a1 | ||
|
|
fc13b37c55 | ||
|
|
647622281e | ||
|
|
26d2cdefd4 | ||
|
|
db183ca7b3 | ||
|
|
702a3c17db | ||
|
|
5f5a8d97c0 | ||
|
|
bae2e3327b | ||
|
|
cd0ff253ec | ||
|
|
929e77192c | ||
|
|
2116e261fb | ||
|
|
1f371e78e6 | ||
|
|
f7bb7ee3f3 | ||
|
|
baa77ffe38 | ||
|
|
4aac87251f | ||
|
|
6daa6a0d16 | ||
|
|
2a251ffab0 | ||
|
|
9b729f93a1 | ||
|
|
946b99dd5c | ||
|
|
50eef6dfc3 | ||
|
|
62f332fc04 | ||
|
|
3eba5e1101 | ||
|
|
36e6c9064f | ||
|
|
a325d1cb1e | ||
|
|
2bdb3b1afd | ||
|
|
3c7686630d | ||
|
|
296cb40161 | ||
|
|
40bb98e76a | ||
|
|
9b7a6f0122 | ||
|
|
ffd3654f67 | ||
|
|
25b2f6624d | ||
|
|
547d660f1d | ||
|
|
5633cde9ad | ||
|
|
fe8757a576 | ||
|
|
ff29221951 | ||
|
|
7fbc9d8409 | ||
|
|
79c3cfabe3 | ||
|
|
e0f390793c | ||
|
|
97812ad0d3 | ||
|
|
d66b5a1d91 | ||
|
|
b0152fdb1d | ||
|
|
e9c5418249 | ||
|
|
b25b517817 | ||
|
|
ce0fb1bca1 | ||
|
|
92fce631ed | ||
|
|
238308e0f7 | ||
|
|
719ac0d6b0 | ||
|
|
8e61a7aab6 | ||
|
|
09e992ce9f | ||
|
|
cdd401f743 | ||
|
|
59b7615d31 | ||
|
|
a8cb0dfcf5 | ||
|
|
0e7a26c19f | ||
|
|
13c636d864 | ||
|
|
00ec1629ca | ||
|
|
a72eabec9b | ||
|
|
235c97ba92 | ||
|
|
4126cb6369 | ||
|
|
8ea2ab4829 | ||
|
|
9b79607579 | ||
|
|
aadbfe78c2 | ||
|
|
7d5e16c733 | ||
|
|
e395a8042a | ||
|
|
91f1a161ca | ||
|
|
16bca3bfe2 | ||
|
|
e0ab58d815 | ||
|
|
c67a7148c4 | ||
|
|
38dc683901 | ||
|
|
cad0fa5d77 | ||
|
|
5daebe0a27 | ||
|
|
05fb735d1d | ||
|
|
7443d8b4e9 | ||
|
|
36506511a1 | ||
|
|
3afdc6d95a | ||
|
|
c14c03490f | ||
|
|
c880590d27 | ||
|
|
54294e2293 | ||
|
|
c7331ebb06 | ||
|
|
0321449944 | ||
|
|
44c390a370 | ||
|
|
bbaf01712c | ||
|
|
8e875d3c38 | ||
|
|
8d69b87c53 | ||
|
|
49cbaf3856 | ||
|
|
9b00035438 | ||
|
|
e215740e8e | ||
|
|
0cc67589d3 | ||
|
|
51e2e93019 | ||
|
|
9d3e0da385 | ||
|
|
6ff72f40cf | ||
|
|
ea0906dfd8 | ||
|
|
cc1bacea5b | ||
|
|
c285fda7f4 | ||
|
|
160034bba1 | ||
|
|
6eb16aae2d | ||
|
|
4777ca1afb | ||
|
|
0e0ae40084 | ||
|
|
b73908000c | ||
|
|
08b0c08e5e | ||
|
|
7d53633e05 | ||
|
|
bc072c5cba | ||
|
|
47981c5925 | ||
|
|
bbe9e22d60 | ||
|
|
b18a7ff6be | ||
|
|
61b88d2feb | ||
|
|
e84bdbb445 | ||
|
|
8f4cdbbc8f | ||
|
|
7390af91b6 | ||
|
|
1e6f53e070 | ||
|
|
6f846ef9c6 | ||
|
|
3c63446507 | ||
|
|
746d2db6ed | ||
|
|
441f97b2df | ||
|
|
0ad7a644df | ||
|
|
7ffd55c980 | ||
|
|
065344a06b | ||
|
|
c401167712 | ||
|
|
73e686c6a4 | ||
|
|
ae039dde13 | ||
|
|
e1f1f66a52 | ||
|
|
f0648f8860 | ||
|
|
54607665ab | ||
|
|
a7bd4c455a | ||
|
|
b868bfb84a | ||
|
|
3b19b466a7 | ||
|
|
199ac3f2e7 | ||
|
|
e3ba5329ff | ||
|
|
17f119689e | ||
|
|
3849cc65ee | ||
|
|
ec0a8b2e6d | ||
|
|
7fa87a8b12 | ||
|
|
b29b81a1f4 | ||
|
|
47585c8ab2 | ||
|
|
c415b627a7 | ||
|
|
78bb808337 | ||
|
|
caf4936661 | ||
|
|
774c3c1e0a | ||
|
|
f80c8e17eb | ||
|
|
38ab7e6ed0 | ||
|
|
aa664eabb9 | ||
|
|
4304c73542 | ||
|
|
925fb6b937 | ||
|
|
a77458a8ff | ||
|
|
a669052f12 | ||
|
|
36a2b2e9dc | ||
|
|
2859a31ac8 | ||
|
|
d13711a363 | ||
|
|
fe102248ac | ||
|
|
8c8ae2d819 | ||
|
|
29aebf96e6 | ||
|
|
79085e08e9 | ||
|
|
a961d72e65 | ||
|
|
8998f4099e | ||
|
|
6fa6cdd2b9 | ||
|
|
736267cf6b | ||
|
|
7402fea0a8 | ||
|
|
0320f7e3a7 | ||
|
|
c0f2cb016e | ||
|
|
ffc995c9e4 | ||
|
|
b30706bd5c | ||
|
|
59f5f155c2 | ||
|
|
0a6c472335 | ||
|
|
aceae8314b | ||
|
|
16d4c7a5e8 | ||
|
|
a395024d44 | ||
|
|
f74ed34539 | ||
|
|
5e26b7cf9d | ||
|
|
74b1d79d77 | ||
|
|
0b362e0c9a | ||
|
|
a6be1952f4 | ||
|
|
0bd5671b9e | ||
|
|
8dd3b716e3 | ||
|
|
7eefdb948c | ||
|
|
082f7ddc37 | ||
|
|
9be72cda2a | ||
|
|
ae40583965 | ||
|
|
5806e73800 | ||
|
|
2bf63c6b4a | ||
|
|
da6ec81282 | ||
|
|
354bd8a428 | ||
|
|
5c0f294098 | ||
|
|
cbc572caf7 | ||
|
|
1e0c2f6ddb | ||
|
|
c98881e130 | ||
|
|
d67548f345 | ||
|
|
6c7ab50811 | ||
|
|
728c3d2cb9 | ||
|
|
af31b6c37a | ||
|
|
93125e372d | ||
|
|
b2e1453e1e | ||
|
|
9621333545 | ||
|
|
5f6ec95291 | ||
|
|
ecd2c8f37b | ||
|
|
2eee6eaf3c | ||
|
|
8af02d19b2 | ||
|
|
95b0a6707b | ||
|
|
b1ab6a8e0b | ||
|
|
61cff28618 | ||
|
|
68e8ddaf94 | ||
|
|
3a735a6cf1 | ||
|
|
ccc41128fb | ||
|
|
b3a07eecc5 | ||
|
|
c16b80746a | ||
|
|
5b78780def | ||
|
|
8169c6ac59 | ||
|
|
463554c254 | ||
|
|
82c066b3c4 | ||
|
|
0543cb51b5 | ||
|
|
99e27916cf | ||
|
|
06545058bb | ||
|
|
7002aa858f | ||
|
|
8cfb138e73 | ||
|
|
1b5de5a37b | ||
|
|
a08cba6b5f | ||
|
|
573c587e3d | ||
|
|
551bf5c66a | ||
|
|
2d136d3d7f | ||
|
|
873401032b | ||
|
|
d595fd31f5 | ||
|
|
bffa15142c | ||
|
|
94acccc126 | ||
|
|
34694d8828 | ||
|
|
ee1c55f923 | ||
|
|
f54e62e4a9 | ||
|
|
5fa7262e4c | ||
|
|
fef534f52e | ||
|
|
d357bbd9c0 | ||
|
|
27c068e9d6 | ||
|
|
e94fe4cc3e | ||
|
|
2ca0ccd2f2 | ||
|
|
8d8acc3ab4 | ||
|
|
6c5e915e9a | ||
|
|
f0b82c3ab9 | ||
|
|
6f1a0479b3 | ||
|
|
b5fd774775 | ||
|
|
34ca81b1bf | ||
|
|
7f917807c6 | ||
|
|
8af1ba5346 | ||
|
|
c6f7337032 | ||
|
|
626821b0e3 | ||
|
|
27ca9437a1 | ||
|
|
d0261bd26c | ||
|
|
c15c65990f | ||
|
|
fb6e32a62f | ||
|
|
d331def6cc | ||
|
|
5584275325 | ||
|
|
cc6dd878ee | ||
|
|
fc6ecebc69 | ||
|
|
230f5c3aa9 | ||
|
|
0c4fc69d62 | ||
|
|
e16e52d493 | ||
|
|
c49421a82b | ||
|
|
ccd7beba90 | ||
|
|
84a99f3a93 | ||
|
|
43c2747e92 | ||
|
|
3c5e82ee0b | ||
|
|
d132159ba3 | ||
|
|
075b1168b4 | ||
|
|
be027bede8 | ||
|
|
f1ed1b7d11 | ||
|
|
20b0747bdb | ||
|
|
11462c1a29 | ||
|
|
e667819055 | ||
|
|
35c9f8779d | ||
|
|
da81e863e2 | ||
|
|
c5c4269961 | ||
|
|
b734863536 | ||
|
|
1046ea7a89 | ||
|
|
8b10081dea | ||
|
|
042bd9cbe2 | ||
|
|
93e867b63c | ||
|
|
e702934dfa | ||
|
|
eef44fb2a5 | ||
|
|
eb9c8cffd6 | ||
|
|
240e2f4162 | ||
|
|
b0702dca05 | ||
|
|
7191f31961 | ||
|
|
6d7bd066e0 | ||
|
|
66078fbd58 | ||
|
|
4e80704c53 | ||
|
|
043ece9730 | ||
|
|
48db2b8799 | ||
|
|
593a82202f | ||
|
|
f24ba33c2d | ||
|
|
ef807ea020 | ||
|
|
da19c48d61 | ||
|
|
cef49d21f0 | ||
|
|
53726663c7 | ||
|
|
2ad3dac422 | ||
|
|
e3d34064bf | ||
|
|
3f5591981f | ||
|
|
6def9fd52b | ||
|
|
76ee39485f | ||
|
|
0c6b931cbc | ||
|
|
fd96ff166d | ||
|
|
9a09b75df3 | ||
|
|
52dc1d7ffd | ||
|
|
24e33a0d86 | ||
|
|
b5333b6760 | ||
|
|
6a4489c523 | ||
|
|
7958d92c23 | ||
|
|
044f27546f | ||
|
|
cd4ea5151f | ||
|
|
f9276f9f90 | ||
|
|
88ec3fdef4 | ||
|
|
5b93c59198 | ||
|
|
fd5be2f9cc | ||
|
|
598776b088 | ||
|
|
cdedc9e90d | ||
|
|
7c1fc0ee7c | ||
|
|
baf2b13589 | ||
|
|
12504a79d1 | ||
|
|
ae360a9ec0 | ||
|
|
516304cd90 | ||
|
|
4c5da3b03a | ||
|
|
b8020d11de | ||
|
|
6b931b3e47 | ||
|
|
d21708172a | ||
|
|
8946e0cb80 | ||
|
|
bf9747b9ff | ||
|
|
a5522a1381 | ||
|
|
d646cc95ad | ||
|
|
8ea9e762d6 | ||
|
|
0a44b5249c | ||
|
|
fbc5beadc8 | ||
|
|
b2368b3408 | ||
|
|
965ee4e853 | ||
|
|
d51c9f1e93 | ||
|
|
56f4144035 | ||
|
|
609ef90213 | ||
|
|
f407a86a3f | ||
|
|
0257cf1cef | ||
|
|
941319a198 | ||
|
|
273a952099 | ||
|
|
551d20a824 | ||
|
|
f5ed3421e9 | ||
|
|
945b0802c9 | ||
|
|
2a0ca0131d | ||
|
|
6f7f0ab6c2 | ||
|
|
68069af969 | ||
|
|
af74b16b0f | ||
|
|
f707f15842 | ||
|
|
a443b3d98d | ||
|
|
811ec5bfcb | ||
|
|
31d40ebc9d | ||
|
|
0c5f4fd8da | ||
|
|
2b50ade6ca | ||
|
|
f9149f9ba0 | ||
|
|
76d05e8236 | ||
|
|
fa81676d64 | ||
|
|
b56348046f | ||
|
|
47a7de7b53 | ||
|
|
8607779757 | ||
|
|
be71c46a3c | ||
|
|
4219db123e | ||
|
|
f003a6df38 | ||
|
|
56f9b810ab | ||
|
|
12815309a6 | ||
|
|
207747a518 | ||
|
|
5ecfdf2c00 | ||
|
|
e788869cf5 | ||
|
|
9df04bcede | ||
|
|
c31606c88a | ||
|
|
2872d964f4 | ||
|
|
2c288b3949 |
3
.hgeol
3
.hgeol
@@ -1,9 +1,6 @@
|
||||
[patterns]
|
||||
*.sh = LF
|
||||
*.MINPACK = CRLF
|
||||
scripts/*.in = LF
|
||||
debug/msvc/*.dat = CRLF
|
||||
debug/msvc/*.natvis = CRLF
|
||||
unsupported/test/mpreal/*.* = CRLF
|
||||
** = native
|
||||
|
||||
|
||||
@@ -108,8 +108,7 @@ endif()
|
||||
set(EIGEN_TEST_MAX_SIZE "320" CACHE STRING "Maximal matrix/vector size, default is 320")
|
||||
|
||||
macro(ei_add_cxx_compiler_flag FLAG)
|
||||
string(REGEX REPLACE "-" "" SFLAG1 ${FLAG})
|
||||
string(REGEX REPLACE "\\+" "p" SFLAG ${SFLAG1})
|
||||
string(REGEX REPLACE "-" "" SFLAG ${FLAG})
|
||||
check_cxx_compiler_flag(${FLAG} COMPILER_SUPPORT_${SFLAG})
|
||||
if(COMPILER_SUPPORT_${SFLAG})
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAG}")
|
||||
@@ -119,7 +118,7 @@ endmacro(ei_add_cxx_compiler_flag)
|
||||
if(NOT MSVC)
|
||||
# We assume that other compilers are partly compatible with GNUCC
|
||||
|
||||
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "-g3")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "-g0 -O2")
|
||||
|
||||
@@ -143,9 +142,6 @@ if(NOT MSVC)
|
||||
ei_add_cxx_compiler_flag("-Wpointer-arith")
|
||||
ei_add_cxx_compiler_flag("-Wwrite-strings")
|
||||
ei_add_cxx_compiler_flag("-Wformat-security")
|
||||
ei_add_cxx_compiler_flag("-Wshorten-64-to-32")
|
||||
ei_add_cxx_compiler_flag("-Wenum-conversion")
|
||||
ei_add_cxx_compiler_flag("-Wc++11-extensions")
|
||||
|
||||
ei_add_cxx_compiler_flag("-Wno-psabi")
|
||||
ei_add_cxx_compiler_flag("-Wno-variadic-macros")
|
||||
@@ -157,7 +153,6 @@ if(NOT MSVC)
|
||||
ei_add_cxx_compiler_flag("-wd981") # disable ICC's "operands are evaluated in unspecified order" remark
|
||||
ei_add_cxx_compiler_flag("-wd2304") # disbale ICC's "warning #2304: non-explicit constructor with single argument may cause implicit type conversion" produced by -Wnon-virtual-dtor
|
||||
|
||||
|
||||
# The -ansi flag must be added last, otherwise it is also used as a linker flag by check_cxx_compiler_flag making it fails
|
||||
# Moreover we should not set both -strict-ansi and -ansi
|
||||
check_cxx_compiler_flag("-strict-ansi" COMPILER_SUPPORT_STRICTANSI)
|
||||
@@ -201,18 +196,6 @@ if(NOT MSVC)
|
||||
message(STATUS "Enabling SSE4.2 in tests/examples")
|
||||
endif()
|
||||
|
||||
option(EIGEN_TEST_AVX "Enable/Disable AVX in tests/examples" OFF)
|
||||
if(EIGEN_TEST_AVX)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx")
|
||||
message(STATUS "Enabling AVX in tests/examples")
|
||||
endif()
|
||||
|
||||
option(EIGEN_TEST_FMA "Enable/Disable FMA in tests/examples" OFF)
|
||||
if(EIGEN_TEST_FMA)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfma")
|
||||
message(STATUS "Enabling FMA in tests/examples")
|
||||
endif()
|
||||
|
||||
option(EIGEN_TEST_ALTIVEC "Enable/Disable AltiVec in tests/examples" OFF)
|
||||
if(EIGEN_TEST_ALTIVEC)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maltivec -mabi=altivec")
|
||||
@@ -301,12 +284,6 @@ if(EIGEN_TEST_NO_EXPLICIT_ALIGNMENT)
|
||||
message(STATUS "Disabling alignment in tests/examples")
|
||||
endif()
|
||||
|
||||
option(EIGEN_TEST_NO_EXCEPTIONS "Disables C++ exceptions" OFF)
|
||||
if(EIGEN_TEST_NO_EXCEPTIONS)
|
||||
ei_add_cxx_compiler_flag("-fno-exceptions")
|
||||
message(STATUS "Disabling exceptions in tests/examples")
|
||||
endif()
|
||||
|
||||
option(EIGEN_TEST_C++0x "Enables all C++0x features." OFF)
|
||||
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
|
||||
@@ -324,7 +301,7 @@ if(EIGEN_INCLUDE_INSTALL_DIR)
|
||||
)
|
||||
else()
|
||||
set(INCLUDE_INSTALL_DIR
|
||||
"${CMAKE_INSTALL_PREFIX}/include/eigen3"
|
||||
"include/eigen3"
|
||||
CACHE INTERNAL
|
||||
"The directory where we install the header files (internal)"
|
||||
)
|
||||
@@ -427,7 +404,7 @@ if(cmake_generator_tolower MATCHES "makefile")
|
||||
message(STATUS "make install | Install to ${CMAKE_INSTALL_PREFIX}. To change that:")
|
||||
message(STATUS " | cmake . -DCMAKE_INSTALL_PREFIX=yourpath")
|
||||
message(STATUS " | Eigen headers will then be installed to:")
|
||||
message(STATUS " | ${INCLUDE_INSTALL_DIR}")
|
||||
message(STATUS " | ${CMAKE_INSTALL_PREFIX}/${INCLUDE_INSTALL_DIR}")
|
||||
message(STATUS " | To install Eigen headers to a separate location, do:")
|
||||
message(STATUS " | cmake . -DEIGEN_INCLUDE_INSTALL_DIR=yourpath")
|
||||
message(STATUS "make doc | Generate the API documentation, requires Doxygen & LaTeX")
|
||||
@@ -441,31 +418,3 @@ else()
|
||||
endif()
|
||||
|
||||
message(STATUS "")
|
||||
|
||||
set ( EIGEN_CONFIG_CMAKE_PATH
|
||||
lib${LIB_SUFFIX}/cmake/eigen3
|
||||
CACHE PATH "The directory where the CMake files are installed"
|
||||
)
|
||||
if ( NOT IS_ABSOLUTE EIGEN_CONFIG_CMAKE_PATH )
|
||||
set ( EIGEN_CONFIG_CMAKE_PATH ${CMAKE_INSTALL_PREFIX}/${EIGEN_CONFIG_CMAKE_PATH} )
|
||||
endif ()
|
||||
|
||||
set ( EIGEN_USE_FILE ${EIGEN_CONFIG_CMAKE_PATH}/UseEigen3.cmake )
|
||||
set ( EIGEN_VERSION_STRING ${EIGEN_VERSION_NUMBER} )
|
||||
set ( EIGEN_VERSION_MAJOR ${EIGEN_WORLD_VERSION} )
|
||||
set ( EIGEN_VERSION_MINOR ${EIGEN_MAJOR_VERSION} )
|
||||
set ( EIGEN_VERSION_PATCH ${EIGEN_MINOR_VERSION} )
|
||||
set ( EIGEN_DEFINITIONS "")
|
||||
set ( EIGEN_INCLUDE_DIR ${INCLUDE_INSTALL_DIR} )
|
||||
set ( EIGEN_INCLUDE_DIRS ${EIGEN_INCLUDE_DIR} )
|
||||
set ( EIGEN_ROOT_DIR ${CMAKE_INSTALL_PREFIX} )
|
||||
|
||||
configure_file ( ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Eigen3Config.cmake.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
|
||||
@ONLY ESCAPE_QUOTES
|
||||
)
|
||||
|
||||
install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/cmake/UseEigen3.cmake
|
||||
${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
|
||||
DESTINATION ${EIGEN_CONFIG_CMAKE_PATH}
|
||||
)
|
||||
|
||||
@@ -4,14 +4,10 @@
|
||||
## # The following are required to uses Dart and the Cdash dashboard
|
||||
## ENABLE_TESTING()
|
||||
## INCLUDE(CTest)
|
||||
set(CTEST_PROJECT_NAME "Eigen")
|
||||
set(CTEST_PROJECT_NAME "Eigen3.2")
|
||||
set(CTEST_NIGHTLY_START_TIME "00:00:00 UTC")
|
||||
|
||||
set(CTEST_DROP_METHOD "http")
|
||||
set(CTEST_DROP_SITE "manao.inria.fr")
|
||||
set(CTEST_DROP_LOCATION "/CDash/submit.php?project=Eigen")
|
||||
set(CTEST_DROP_LOCATION "/CDash/submit.php?project=Eigen3.2")
|
||||
set(CTEST_DROP_SITE_CDASH TRUE)
|
||||
set(CTEST_PROJECT_SUBPROJECTS
|
||||
Official
|
||||
Unsupported
|
||||
)
|
||||
|
||||
11
Eigen/Array
Normal file
11
Eigen/Array
Normal file
@@ -0,0 +1,11 @@
|
||||
#ifndef EIGEN_ARRAY_MODULE_H
|
||||
#define EIGEN_ARRAY_MODULE_H
|
||||
|
||||
// include Core first to handle Eigen2 support macros
|
||||
#include "Core"
|
||||
|
||||
#ifndef EIGEN2_SUPPORT
|
||||
#error The Eigen/Array header does no longer exist in Eigen3. All that functionality has moved to Eigen/Core.
|
||||
#endif
|
||||
|
||||
#endif // EIGEN_ARRAY_MODULE_H
|
||||
@@ -10,11 +10,9 @@
|
||||
*
|
||||
*
|
||||
* This module provides two variants of the Cholesky decomposition for selfadjoint (hermitian) matrices.
|
||||
* Those decompositions are also accessible via the following methods:
|
||||
* - MatrixBase::llt()
|
||||
* Those decompositions are accessible via the following MatrixBase methods:
|
||||
* - MatrixBase::llt(),
|
||||
* - MatrixBase::ldlt()
|
||||
* - SelfAdjointView::llt()
|
||||
* - SelfAdjointView::ldlt()
|
||||
*
|
||||
* \code
|
||||
* #include <Eigen/Cholesky>
|
||||
|
||||
123
Eigen/Core
123
Eigen/Core
@@ -14,42 +14,6 @@
|
||||
// first thing Eigen does: stop the compiler from committing suicide
|
||||
#include "src/Core/util/DisableStupidWarnings.h"
|
||||
|
||||
// Handle NVCC/CUDA
|
||||
#ifdef __CUDACC__
|
||||
// Do not try asserts on CUDA!
|
||||
#ifndef EIGEN_NO_DEBUG
|
||||
#define EIGEN_NO_DEBUG
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN_INTERNAL_DEBUGGING
|
||||
#undef EIGEN_INTERNAL_DEBUGGING
|
||||
#endif
|
||||
|
||||
// Do not try to vectorize on CUDA!
|
||||
#define EIGEN_DONT_VECTORIZE
|
||||
|
||||
// All functions callable from CUDA code must be qualified with __device__
|
||||
#define EIGEN_DEVICE_FUNC __host__ __device__
|
||||
|
||||
#else
|
||||
#define EIGEN_DEVICE_FUNC
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__CUDA_ARCH__)
|
||||
#define EIGEN_USING_STD_MATH(FUNC) using ::FUNC;
|
||||
#else
|
||||
#define EIGEN_USING_STD_MATH(FUNC) using std::FUNC;
|
||||
#endif
|
||||
|
||||
#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(__CUDA_ARCH__) && !defined(EIGEN_EXCEPTIONS)
|
||||
#define EIGEN_EXCEPTIONS
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN_EXCEPTIONS
|
||||
#include <new>
|
||||
#endif
|
||||
|
||||
// then include this file where all our macros are defined. It's really important to do it first because
|
||||
// it's where we do all the alignment settings (platform detection and honoring the user's will if he
|
||||
// defined e.g. EIGEN_DONT_ALIGN) so it needs to be done before we do anything with vectorization.
|
||||
@@ -118,16 +82,7 @@
|
||||
#ifdef __SSE4_2__
|
||||
#define EIGEN_VECTORIZE_SSE4_2
|
||||
#endif
|
||||
#ifdef __AVX__
|
||||
#define EIGEN_VECTORIZE_AVX
|
||||
#define EIGEN_VECTORIZE_SSE3
|
||||
#define EIGEN_VECTORIZE_SSSE3
|
||||
#define EIGEN_VECTORIZE_SSE4_1
|
||||
#define EIGEN_VECTORIZE_SSE4_2
|
||||
#endif
|
||||
#ifdef __FMA__
|
||||
#define EIGEN_VECTORIZE_FMA
|
||||
#endif
|
||||
|
||||
// include files
|
||||
|
||||
// This extern "C" works around a MINGW-w64 compilation issue
|
||||
@@ -157,9 +112,6 @@
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_2
|
||||
#include <nmmintrin.h>
|
||||
#endif
|
||||
#ifdef EIGEN_VECTORIZE_AVX
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
#endif
|
||||
} // end extern "C"
|
||||
#elif defined __ALTIVEC__
|
||||
@@ -171,7 +123,7 @@
|
||||
#undef bool
|
||||
#undef vector
|
||||
#undef pixel
|
||||
#elif defined __ARM_NEON__
|
||||
#elif defined __ARM_NEON
|
||||
#define EIGEN_VECTORIZE
|
||||
#define EIGEN_VECTORIZE_NEON
|
||||
#include <arm_neon.h>
|
||||
@@ -217,13 +169,19 @@
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
#if defined(_CPPUNWIND) || defined(__EXCEPTIONS)
|
||||
#define EIGEN_EXCEPTIONS
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN_EXCEPTIONS
|
||||
#include <new>
|
||||
#endif
|
||||
|
||||
/** \brief Namespace containing all symbols from the %Eigen library. */
|
||||
namespace Eigen {
|
||||
|
||||
inline static const char *SimdInstructionSetsInUse(void) {
|
||||
#if defined(EIGEN_VECTORIZE_AVX)
|
||||
return "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
|
||||
#elif defined(EIGEN_VECTORIZE_SSE4_2)
|
||||
#if defined(EIGEN_VECTORIZE_SSE4_2)
|
||||
return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
|
||||
#elif defined(EIGEN_VECTORIZE_SSE4_1)
|
||||
return "SSE, SSE2, SSE3, SSSE3, SSE4.1";
|
||||
@@ -244,9 +202,34 @@ inline static const char *SimdInstructionSetsInUse(void) {
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#if defined EIGEN2_SUPPORT_STAGE40_FULL_EIGEN3_STRICTNESS || defined EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API || defined EIGEN2_SUPPORT_STAGE20_RESOLVE_API_CONFLICTS || defined EIGEN2_SUPPORT_STAGE10_FULL_EIGEN2_API || defined EIGEN2_SUPPORT
|
||||
// This will generate an error message:
|
||||
#error Eigen2-support is only available up to version 3.2. Please go to "http://eigen.tuxfamily.org/index.php?title=Eigen2" for further information
|
||||
#define STAGE10_FULL_EIGEN2_API 10
|
||||
#define STAGE20_RESOLVE_API_CONFLICTS 20
|
||||
#define STAGE30_FULL_EIGEN3_API 30
|
||||
#define STAGE40_FULL_EIGEN3_STRICTNESS 40
|
||||
#define STAGE99_NO_EIGEN2_SUPPORT 99
|
||||
|
||||
#if defined EIGEN2_SUPPORT_STAGE40_FULL_EIGEN3_STRICTNESS
|
||||
#define EIGEN2_SUPPORT
|
||||
#define EIGEN2_SUPPORT_STAGE STAGE40_FULL_EIGEN3_STRICTNESS
|
||||
#elif defined EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API
|
||||
#define EIGEN2_SUPPORT
|
||||
#define EIGEN2_SUPPORT_STAGE STAGE30_FULL_EIGEN3_API
|
||||
#elif defined EIGEN2_SUPPORT_STAGE20_RESOLVE_API_CONFLICTS
|
||||
#define EIGEN2_SUPPORT
|
||||
#define EIGEN2_SUPPORT_STAGE STAGE20_RESOLVE_API_CONFLICTS
|
||||
#elif defined EIGEN2_SUPPORT_STAGE10_FULL_EIGEN2_API
|
||||
#define EIGEN2_SUPPORT
|
||||
#define EIGEN2_SUPPORT_STAGE STAGE10_FULL_EIGEN2_API
|
||||
#elif defined EIGEN2_SUPPORT
|
||||
// default to stage 3, that's what it's always meant
|
||||
#define EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API
|
||||
#define EIGEN2_SUPPORT_STAGE STAGE30_FULL_EIGEN3_API
|
||||
#else
|
||||
#define EIGEN2_SUPPORT_STAGE STAGE99_NO_EIGEN2_SUPPORT
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
#undef minor
|
||||
#endif
|
||||
|
||||
// we use size_t frequently and we'll never remember to prepend it with std:: everytime just to
|
||||
@@ -276,13 +259,7 @@ using std::ptrdiff_t;
|
||||
#include "src/Core/MathFunctions.h"
|
||||
#include "src/Core/GenericPacketMath.h"
|
||||
|
||||
#if defined EIGEN_VECTORIZE_AVX
|
||||
// Use AVX for floats and doubles, SSE for integers
|
||||
#include "src/Core/arch/SSE/PacketMath.h"
|
||||
#include "src/Core/arch/SSE/Complex.h"
|
||||
#include "src/Core/arch/AVX/PacketMath.h"
|
||||
#include "src/Core/arch/AVX/Complex.h"
|
||||
#elif defined EIGEN_VECTORIZE_SSE
|
||||
#if defined EIGEN_VECTORIZE_SSE
|
||||
#include "src/Core/arch/SSE/PacketMath.h"
|
||||
#include "src/Core/arch/SSE/MathFunctions.h"
|
||||
#include "src/Core/arch/SSE/Complex.h"
|
||||
@@ -296,30 +273,17 @@ using std::ptrdiff_t;
|
||||
|
||||
#include "src/Core/arch/Default/Settings.h"
|
||||
|
||||
#include "src/Core/functors/BinaryFunctors.h"
|
||||
#include "src/Core/functors/UnaryFunctors.h"
|
||||
#include "src/Core/functors/NullaryFunctors.h"
|
||||
#include "src/Core/functors/StlFunctors.h"
|
||||
|
||||
#include "src/Core/Functors.h"
|
||||
#include "src/Core/DenseCoeffsBase.h"
|
||||
#include "src/Core/DenseBase.h"
|
||||
#include "src/Core/MatrixBase.h"
|
||||
#include "src/Core/EigenBase.h"
|
||||
|
||||
#ifdef EIGEN_ENABLE_EVALUATORS
|
||||
#include "src/Core/functors/AssignmentFunctors.h"
|
||||
#include "src/Core/Product.h"
|
||||
#include "src/Core/CoreEvaluators.h"
|
||||
#include "src/Core/AssignEvaluator.h"
|
||||
#include "src/Core/ProductEvaluators.h"
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN // work around Doxygen bug triggered by Assign.h r814874
|
||||
// at least confirmed with Doxygen 1.5.5 and 1.5.6
|
||||
#include "src/Core/Assign.h"
|
||||
#endif
|
||||
|
||||
#include "src/Core/ArrayBase.h"
|
||||
#include "src/Core/util/BlasUtil.h"
|
||||
#include "src/Core/DenseStorage.h"
|
||||
#include "src/Core/NestByValue.h"
|
||||
@@ -383,6 +347,7 @@ using std::ptrdiff_t;
|
||||
#include "src/Core/Random.h"
|
||||
#include "src/Core/Replicate.h"
|
||||
#include "src/Core/Reverse.h"
|
||||
#include "src/Core/ArrayBase.h"
|
||||
#include "src/Core/ArrayWrapper.h"
|
||||
|
||||
#ifdef EIGEN_USE_BLAS
|
||||
@@ -404,4 +369,8 @@ using std::ptrdiff_t;
|
||||
|
||||
#include "src/Core/util/ReenableStupidWarnings.h"
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
#include "Eigen2Support"
|
||||
#endif
|
||||
|
||||
#endif // EIGEN_CORE_H
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
#include "Dense"
|
||||
#include "Sparse"
|
||||
//#include "Sparse"
|
||||
|
||||
95
Eigen/Eigen2Support
Normal file
95
Eigen/Eigen2Support
Normal file
@@ -0,0 +1,95 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN2SUPPORT_H
|
||||
#define EIGEN2SUPPORT_H
|
||||
|
||||
#if (!defined(EIGEN2_SUPPORT)) || (!defined(EIGEN_CORE_H))
|
||||
#error Eigen2 support must be enabled by defining EIGEN2_SUPPORT before including any Eigen header
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_NO_EIGEN2_DEPRECATED_WARNING
|
||||
|
||||
#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
|
||||
#warning "Eigen2 support is deprecated in Eigen 3.2.x and it will be removed in Eigen 3.3. (Define EIGEN_NO_EIGEN2_DEPRECATED_WARNING to disable this warning)"
|
||||
#else
|
||||
#pragma message ("Eigen2 support is deprecated in Eigen 3.2.x and it will be removed in Eigen 3.3. (Define EIGEN_NO_EIGEN2_DEPRECATED_WARNING to disable this warning)")
|
||||
#endif
|
||||
|
||||
#endif // EIGEN_NO_EIGEN2_DEPRECATED_WARNING
|
||||
|
||||
#include "src/Core/util/DisableStupidWarnings.h"
|
||||
|
||||
/** \ingroup Support_modules
|
||||
* \defgroup Eigen2Support_Module Eigen2 support module
|
||||
*
|
||||
* \warning Eigen2 support is deprecated in Eigen 3.2.x and it will be removed in Eigen 3.3.
|
||||
*
|
||||
* This module provides a couple of deprecated functions improving the compatibility with Eigen2.
|
||||
*
|
||||
* To use it, define EIGEN2_SUPPORT before including any Eigen header
|
||||
* \code
|
||||
* #define EIGEN2_SUPPORT
|
||||
* \endcode
|
||||
*
|
||||
*/
|
||||
|
||||
#include "src/Eigen2Support/Macros.h"
|
||||
#include "src/Eigen2Support/Memory.h"
|
||||
#include "src/Eigen2Support/Meta.h"
|
||||
#include "src/Eigen2Support/Lazy.h"
|
||||
#include "src/Eigen2Support/Cwise.h"
|
||||
#include "src/Eigen2Support/CwiseOperators.h"
|
||||
#include "src/Eigen2Support/TriangularSolver.h"
|
||||
#include "src/Eigen2Support/Block.h"
|
||||
#include "src/Eigen2Support/VectorBlock.h"
|
||||
#include "src/Eigen2Support/Minor.h"
|
||||
#include "src/Eigen2Support/MathFunctions.h"
|
||||
|
||||
|
||||
#include "src/Core/util/ReenableStupidWarnings.h"
|
||||
|
||||
// Eigen2 used to include iostream
|
||||
#include<iostream>
|
||||
|
||||
#define EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, SizeSuffix) \
|
||||
using Eigen::Matrix##SizeSuffix##TypeSuffix; \
|
||||
using Eigen::Vector##SizeSuffix##TypeSuffix; \
|
||||
using Eigen::RowVector##SizeSuffix##TypeSuffix;
|
||||
|
||||
#define EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(TypeSuffix) \
|
||||
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 2) \
|
||||
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 3) \
|
||||
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 4) \
|
||||
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, X) \
|
||||
|
||||
#define EIGEN_USING_MATRIX_TYPEDEFS \
|
||||
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(i) \
|
||||
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(f) \
|
||||
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(d) \
|
||||
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(cf) \
|
||||
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(cd)
|
||||
|
||||
#define USING_PART_OF_NAMESPACE_EIGEN \
|
||||
EIGEN_USING_MATRIX_TYPEDEFS \
|
||||
using Eigen::Matrix; \
|
||||
using Eigen::MatrixBase; \
|
||||
using Eigen::ei_random; \
|
||||
using Eigen::ei_real; \
|
||||
using Eigen::ei_imag; \
|
||||
using Eigen::ei_conj; \
|
||||
using Eigen::ei_abs; \
|
||||
using Eigen::ei_abs2; \
|
||||
using Eigen::ei_sqrt; \
|
||||
using Eigen::ei_exp; \
|
||||
using Eigen::ei_log; \
|
||||
using Eigen::ei_sin; \
|
||||
using Eigen::ei_cos;
|
||||
|
||||
#endif // EIGEN2SUPPORT_H
|
||||
@@ -33,23 +33,27 @@
|
||||
#include "src/Geometry/OrthoMethods.h"
|
||||
#include "src/Geometry/EulerAngles.h"
|
||||
|
||||
#include "src/Geometry/Homogeneous.h"
|
||||
#include "src/Geometry/RotationBase.h"
|
||||
#include "src/Geometry/Rotation2D.h"
|
||||
#include "src/Geometry/Quaternion.h"
|
||||
#include "src/Geometry/AngleAxis.h"
|
||||
#include "src/Geometry/Transform.h"
|
||||
#include "src/Geometry/Translation.h"
|
||||
#include "src/Geometry/Scaling.h"
|
||||
#include "src/Geometry/Hyperplane.h"
|
||||
#include "src/Geometry/ParametrizedLine.h"
|
||||
#include "src/Geometry/AlignedBox.h"
|
||||
#include "src/Geometry/Umeyama.h"
|
||||
#if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS
|
||||
#include "src/Geometry/Homogeneous.h"
|
||||
#include "src/Geometry/RotationBase.h"
|
||||
#include "src/Geometry/Rotation2D.h"
|
||||
#include "src/Geometry/Quaternion.h"
|
||||
#include "src/Geometry/AngleAxis.h"
|
||||
#include "src/Geometry/Transform.h"
|
||||
#include "src/Geometry/Translation.h"
|
||||
#include "src/Geometry/Scaling.h"
|
||||
#include "src/Geometry/Hyperplane.h"
|
||||
#include "src/Geometry/ParametrizedLine.h"
|
||||
#include "src/Geometry/AlignedBox.h"
|
||||
#include "src/Geometry/Umeyama.h"
|
||||
|
||||
// Use the SSE optimized version whenever possible. At the moment the
|
||||
// SSE version doesn't compile when AVX is enabled
|
||||
#if defined EIGEN_VECTORIZE_SSE && !defined EIGEN_VECTORIZE_AVX
|
||||
#include "src/Geometry/arch/Geometry_SSE.h"
|
||||
#if defined EIGEN_VECTORIZE_SSE
|
||||
#include "src/Geometry/arch/Geometry_SSE.h"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
#include "src/Eigen2Support/Geometry/All.h"
|
||||
#endif
|
||||
|
||||
#include "src/Core/util/ReenableStupidWarnings.h"
|
||||
|
||||
8
Eigen/LU
8
Eigen/LU
@@ -27,12 +27,14 @@
|
||||
#include "src/LU/Determinant.h"
|
||||
#include "src/LU/Inverse.h"
|
||||
|
||||
// Use the SSE optimized version whenever possible. At the moment the
|
||||
// SSE version doesn't compile when AVX is enabled
|
||||
#if defined EIGEN_VECTORIZE_SSE && !defined EIGEN_VECTORIZE_AVX
|
||||
#if defined EIGEN_VECTORIZE_SSE
|
||||
#include "src/LU/arch/Inverse_SSE.h"
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
#include "src/Eigen2Support/LU.h"
|
||||
#endif
|
||||
|
||||
#include "src/Core/util/ReenableStupidWarnings.h"
|
||||
|
||||
#endif // EIGEN_LU_MODULE_H
|
||||
|
||||
32
Eigen/LeastSquares
Normal file
32
Eigen/LeastSquares
Normal file
@@ -0,0 +1,32 @@
|
||||
#ifndef EIGEN_REGRESSION_MODULE_H
|
||||
#define EIGEN_REGRESSION_MODULE_H
|
||||
|
||||
#ifndef EIGEN2_SUPPORT
|
||||
#error LeastSquares is only available in Eigen2 support mode (define EIGEN2_SUPPORT)
|
||||
#endif
|
||||
|
||||
// exclude from normal eigen3-only documentation
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
|
||||
#include "Core"
|
||||
|
||||
#include "src/Core/util/DisableStupidWarnings.h"
|
||||
|
||||
#include "Eigenvalues"
|
||||
#include "Geometry"
|
||||
|
||||
/** \defgroup LeastSquares_Module LeastSquares module
|
||||
* This module provides linear regression and related features.
|
||||
*
|
||||
* \code
|
||||
* #include <Eigen/LeastSquares>
|
||||
* \endcode
|
||||
*/
|
||||
|
||||
#include "src/Eigen2Support/LeastSquares.h"
|
||||
|
||||
#include "src/Core/util/ReenableStupidWarnings.h"
|
||||
|
||||
#endif // EIGEN2_SUPPORT
|
||||
|
||||
#endif // EIGEN_REGRESSION_MODULE_H
|
||||
12
Eigen/QR
12
Eigen/QR
@@ -15,9 +15,7 @@
|
||||
*
|
||||
* This module provides various QR decompositions
|
||||
* This module also provides some MatrixBase methods, including:
|
||||
* - MatrixBase::householderQr()
|
||||
* - MatrixBase::colPivHouseholderQr()
|
||||
* - MatrixBase::fullPivHouseholderQr()
|
||||
* - MatrixBase::qr(),
|
||||
*
|
||||
* \code
|
||||
* #include <Eigen/QR>
|
||||
@@ -33,7 +31,15 @@
|
||||
#include "src/QR/ColPivHouseholderQR_MKL.h"
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
#include "src/Eigen2Support/QR.h"
|
||||
#endif
|
||||
|
||||
#include "src/Core/util/ReenableStupidWarnings.h"
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
#include "Eigenvalues"
|
||||
#endif
|
||||
|
||||
#endif // EIGEN_QR_MODULE_H
|
||||
/* vim: set filetype=cpp et sw=2 ts=2 ai: */
|
||||
|
||||
@@ -21,13 +21,16 @@
|
||||
*/
|
||||
|
||||
#include "src/misc/Solve.h"
|
||||
#include "src/SVD/SVDBase.h"
|
||||
#include "src/SVD/JacobiSVD.h"
|
||||
#if defined(EIGEN_USE_LAPACKE) && !defined(EIGEN_USE_LAPACKE_STRICT)
|
||||
#include "src/SVD/JacobiSVD_MKL.h"
|
||||
#endif
|
||||
#include "src/SVD/UpperBidiagonalization.h"
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
#include "src/Eigen2Support/SVD.h"
|
||||
#endif
|
||||
|
||||
#include "src/Core/util/ReenableStupidWarnings.h"
|
||||
|
||||
#endif // EIGEN_SVD_MODULE_H
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
/**
|
||||
* \defgroup SparseCore_Module SparseCore module
|
||||
*
|
||||
* This module provides a sparse matrix representation, and basic associatd matrix manipulations
|
||||
* This module provides a sparse matrix representation, and basic associated matrix manipulations
|
||||
* and operations.
|
||||
*
|
||||
* See the \ref TutorialSparse "Sparse tutorial"
|
||||
|
||||
@@ -43,7 +43,7 @@ namespace internal {
|
||||
* Remember that Cholesky decompositions are not rank-revealing. Also, do not use a Cholesky
|
||||
* decomposition to determine whether a system of equations has a solution.
|
||||
*
|
||||
* \sa MatrixBase::ldlt(), SelfAdjointView::ldlt(), class LLT
|
||||
* \sa MatrixBase::ldlt(), class LLT
|
||||
*/
|
||||
template<typename _MatrixType, int _UpLo> class LDLT
|
||||
{
|
||||
@@ -151,6 +151,13 @@ template<typename _MatrixType, int _UpLo> class LDLT
|
||||
eigen_assert(m_isInitialized && "LDLT is not initialized.");
|
||||
return m_sign == internal::PositiveSemiDef || m_sign == internal::ZeroSign;
|
||||
}
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
inline bool isPositiveDefinite() const
|
||||
{
|
||||
return isPositive();
|
||||
}
|
||||
#endif
|
||||
|
||||
/** \returns true if the matrix is negative (semidefinite) */
|
||||
inline bool isNegative(void) const
|
||||
@@ -172,7 +179,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
|
||||
* least-square solution of \f$ D y_3 = y_2 \f$ is computed. This does not mean that this function
|
||||
* computes the least-square solution of \f$ A x = b \f$ is \f$ A \f$ is singular.
|
||||
*
|
||||
* \sa MatrixBase::ldlt(), SelfAdjointView::ldlt()
|
||||
* \sa MatrixBase::ldlt()
|
||||
*/
|
||||
template<typename Rhs>
|
||||
inline const internal::solve_retval<LDLT, Rhs>
|
||||
@@ -184,6 +191,15 @@ template<typename _MatrixType, int _UpLo> class LDLT
|
||||
return internal::solve_retval<LDLT, Rhs>(*this, b.derived());
|
||||
}
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
template<typename OtherDerived, typename ResultType>
|
||||
bool solve(const MatrixBase<OtherDerived>& b, ResultType *result) const
|
||||
{
|
||||
*result = this->solve(b);
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
template<typename Derived>
|
||||
bool solveInPlace(MatrixBase<Derived> &bAndX) const;
|
||||
|
||||
@@ -219,6 +235,11 @@ template<typename _MatrixType, int _UpLo> class LDLT
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
static void check_template_parameters()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
|
||||
}
|
||||
|
||||
/** \internal
|
||||
* Used to compute and store the Cholesky decomposition A = L D L^* = U^* D U.
|
||||
@@ -246,7 +267,6 @@ template<> struct ldlt_inplace<Lower>
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
typedef typename MatrixType::RealScalar RealScalar;
|
||||
typedef typename MatrixType::Index Index;
|
||||
typedef typename TranspositionType::StorageIndexType IndexType;
|
||||
eigen_assert(mat.rows()==mat.cols());
|
||||
const Index size = mat.rows();
|
||||
|
||||
@@ -266,7 +286,7 @@ template<> struct ldlt_inplace<Lower>
|
||||
mat.diagonal().tail(size-k).cwiseAbs().maxCoeff(&index_of_biggest_in_corner);
|
||||
index_of_biggest_in_corner += k;
|
||||
|
||||
transpositions.coeffRef(k) = IndexType(index_of_biggest_in_corner);
|
||||
transpositions.coeffRef(k) = index_of_biggest_in_corner;
|
||||
if(k != index_of_biggest_in_corner)
|
||||
{
|
||||
// apply the transposition while taking care to consider only
|
||||
@@ -275,7 +295,7 @@ template<> struct ldlt_inplace<Lower>
|
||||
mat.row(k).head(k).swap(mat.row(index_of_biggest_in_corner).head(k));
|
||||
mat.col(k).tail(s).swap(mat.col(index_of_biggest_in_corner).tail(s));
|
||||
std::swap(mat.coeffRef(k,k),mat.coeffRef(index_of_biggest_in_corner,index_of_biggest_in_corner));
|
||||
for(Index i=k+1;i<index_of_biggest_in_corner;++i)
|
||||
for(int i=k+1;i<index_of_biggest_in_corner;++i)
|
||||
{
|
||||
Scalar tmp = mat.coeffRef(i,k);
|
||||
mat.coeffRef(i,k) = numext::conj(mat.coeffRef(index_of_biggest_in_corner,i));
|
||||
@@ -419,6 +439,8 @@ template<typename MatrixType> struct LDLT_Traits<MatrixType,Upper>
|
||||
template<typename MatrixType, int _UpLo>
|
||||
LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const MatrixType& a)
|
||||
{
|
||||
check_template_parameters();
|
||||
|
||||
eigen_assert(a.rows()==a.cols());
|
||||
const Index size = a.rows();
|
||||
|
||||
@@ -427,6 +449,7 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const MatrixType& a)
|
||||
m_transpositions.resize(size);
|
||||
m_isInitialized = false;
|
||||
m_temporary.resize(size);
|
||||
m_sign = internal::ZeroSign;
|
||||
|
||||
internal::ldlt_inplace<UpLo>::unblocked(m_matrix, m_transpositions, m_temporary, m_sign);
|
||||
|
||||
@@ -441,9 +464,8 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const MatrixType& a)
|
||||
*/
|
||||
template<typename MatrixType, int _UpLo>
|
||||
template<typename Derived>
|
||||
LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Derived>& w, const typename NumTraits<typename MatrixType::Scalar>::Real& sigma)
|
||||
LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Derived>& w, const typename LDLT<MatrixType,_UpLo>::RealScalar& sigma)
|
||||
{
|
||||
typedef typename TranspositionType::StorageIndexType IndexType;
|
||||
const Index size = w.rows();
|
||||
if (m_isInitialized)
|
||||
{
|
||||
@@ -455,7 +477,7 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Deri
|
||||
m_matrix.setZero();
|
||||
m_transpositions.resize(size);
|
||||
for (Index i = 0; i < size; i++)
|
||||
m_transpositions.coeffRef(i) = IndexType(i);
|
||||
m_transpositions.coeffRef(i) = i;
|
||||
m_temporary.resize(size);
|
||||
m_sign = sigma>=0 ? internal::PositiveSemiDef : internal::NegativeSemiDef;
|
||||
m_isInitialized = true;
|
||||
@@ -486,7 +508,7 @@ struct solve_retval<LDLT<_MatrixType,_UpLo>, Rhs>
|
||||
// dst = D^-1 (L^-1 P b)
|
||||
// more precisely, use pseudo-inverse of D (see bug 241)
|
||||
using std::abs;
|
||||
EIGEN_USING_STD_MATH(max);
|
||||
using std::max;
|
||||
typedef typename LDLTType::MatrixType MatrixType;
|
||||
typedef typename LDLTType::RealScalar RealScalar;
|
||||
const typename Diagonal<const MatrixType>::RealReturnType vectorD(dec().vectorD());
|
||||
@@ -497,6 +519,7 @@ struct solve_retval<LDLT<_MatrixType,_UpLo>, Rhs>
|
||||
// diagonal element is not well justified and to numerical issues in some cases.
|
||||
// Moreover, Lapack's xSYTRS routines use 0 for the tolerance.
|
||||
RealScalar tolerance = RealScalar(1) / NumTraits<RealScalar>::highest();
|
||||
|
||||
for (Index i = 0; i < vectorD.size(); ++i) {
|
||||
if(abs(vectorD(i)) > tolerance)
|
||||
dst.row(i) /= vectorD(i);
|
||||
@@ -563,10 +586,8 @@ MatrixType LDLT<MatrixType,_UpLo>::reconstructedMatrix() const
|
||||
return res;
|
||||
}
|
||||
|
||||
#ifndef __CUDACC__
|
||||
/** \cholesky_module
|
||||
* \returns the Cholesky decomposition with full pivoting without square root of \c *this
|
||||
* \sa MatrixBase::ldlt()
|
||||
*/
|
||||
template<typename MatrixType, unsigned int UpLo>
|
||||
inline const LDLT<typename SelfAdjointView<MatrixType, UpLo>::PlainObject, UpLo>
|
||||
@@ -577,7 +598,6 @@ SelfAdjointView<MatrixType, UpLo>::ldlt() const
|
||||
|
||||
/** \cholesky_module
|
||||
* \returns the Cholesky decomposition with full pivoting without square root of \c *this
|
||||
* \sa SelfAdjointView::ldlt()
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline const LDLT<typename MatrixBase<Derived>::PlainObject>
|
||||
@@ -585,7 +605,6 @@ MatrixBase<Derived>::ldlt() const
|
||||
{
|
||||
return LDLT<PlainObject>(derived());
|
||||
}
|
||||
#endif // __CUDACC__
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
|
||||
@@ -41,7 +41,7 @@ template<typename MatrixType, int UpLo> struct LLT_Traits;
|
||||
* Example: \include LLT_example.cpp
|
||||
* Output: \verbinclude LLT_example.out
|
||||
*
|
||||
* \sa MatrixBase::llt(), SelfAdjointView::llt(), class LDLT
|
||||
* \sa MatrixBase::llt(), class LDLT
|
||||
*/
|
||||
/* HEY THIS DOX IS DISABLED BECAUSE THERE's A BUG EITHER HERE OR IN LDLT ABOUT THAT (OR BOTH)
|
||||
* Note that during the decomposition, only the upper triangular part of A is considered. Therefore,
|
||||
@@ -115,7 +115,7 @@ template<typename _MatrixType, int _UpLo> class LLT
|
||||
* Example: \include LLT_solve.cpp
|
||||
* Output: \verbinclude LLT_solve.out
|
||||
*
|
||||
* \sa solveInPlace(), MatrixBase::llt(), SelfAdjointView::llt()
|
||||
* \sa solveInPlace(), MatrixBase::llt()
|
||||
*/
|
||||
template<typename Rhs>
|
||||
inline const internal::solve_retval<LLT, Rhs>
|
||||
@@ -127,6 +127,17 @@ template<typename _MatrixType, int _UpLo> class LLT
|
||||
return internal::solve_retval<LLT, Rhs>(*this, b.derived());
|
||||
}
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
template<typename OtherDerived, typename ResultType>
|
||||
bool solve(const MatrixBase<OtherDerived>& b, ResultType *result) const
|
||||
{
|
||||
*result = this->solve(b);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool isPositiveDefinite() const { return true; }
|
||||
#endif
|
||||
|
||||
template<typename Derived>
|
||||
void solveInPlace(MatrixBase<Derived> &bAndX) const;
|
||||
|
||||
@@ -163,6 +174,12 @@ template<typename _MatrixType, int _UpLo> class LLT
|
||||
LLT rankUpdate(const VectorType& vec, const RealScalar& sigma = 1);
|
||||
|
||||
protected:
|
||||
|
||||
static void check_template_parameters()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
|
||||
}
|
||||
|
||||
/** \internal
|
||||
* Used to compute and store L
|
||||
* The strict upper part is not used and even not initialized.
|
||||
@@ -272,7 +289,7 @@ template<typename Scalar> struct llt_inplace<Scalar, Lower>
|
||||
return k;
|
||||
mat.coeffRef(k,k) = x = sqrt(x);
|
||||
if (k>0 && rs>0) A21.noalias() -= A20 * A10.adjoint();
|
||||
if (rs>0) A21 *= RealScalar(1)/x;
|
||||
if (rs>0) A21 /= x;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
@@ -373,6 +390,8 @@ template<typename MatrixType> struct LLT_Traits<MatrixType,Upper>
|
||||
template<typename MatrixType, int _UpLo>
|
||||
LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const MatrixType& a)
|
||||
{
|
||||
check_template_parameters();
|
||||
|
||||
eigen_assert(a.rows()==a.cols());
|
||||
const Index size = a.rows();
|
||||
m_matrix.resize(size, size);
|
||||
@@ -454,10 +473,8 @@ MatrixType LLT<MatrixType,_UpLo>::reconstructedMatrix() const
|
||||
return matrixL() * matrixL().adjoint().toDenseMatrix();
|
||||
}
|
||||
|
||||
#ifndef __CUDACC__
|
||||
/** \cholesky_module
|
||||
* \returns the LLT decomposition of \c *this
|
||||
* \sa SelfAdjointView::llt()
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline const LLT<typename MatrixBase<Derived>::PlainObject>
|
||||
@@ -468,7 +485,6 @@ MatrixBase<Derived>::llt() const
|
||||
|
||||
/** \cholesky_module
|
||||
* \returns the LLT decomposition of \c *this
|
||||
* \sa SelfAdjointView::llt()
|
||||
*/
|
||||
template<typename MatrixType, unsigned int UpLo>
|
||||
inline const LLT<typename SelfAdjointView<MatrixType, UpLo>::PlainObject, UpLo>
|
||||
@@ -476,8 +492,7 @@ SelfAdjointView<MatrixType, UpLo>::llt() const
|
||||
{
|
||||
return LLT<PlainObject,UpLo>(m_matrix);
|
||||
}
|
||||
#endif // __CUDACC__
|
||||
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_LLT_H
|
||||
|
||||
@@ -60,7 +60,7 @@ template<> struct mkl_llt<EIGTYPE> \
|
||||
lda = m.outerStride(); \
|
||||
\
|
||||
info = LAPACKE_##MKLPREFIX##potrf( matrix_order, uplo, size, (MKLTYPE*)a, lda ); \
|
||||
info = (info==0) ? Success : NumericalIssue; \
|
||||
info = (info==0) ? -1 : info>0 ? info-1 : size; \
|
||||
return info; \
|
||||
} \
|
||||
}; \
|
||||
|
||||
@@ -78,7 +78,7 @@ cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_Index>& mat)
|
||||
{
|
||||
res.itype = CHOLMOD_INT;
|
||||
}
|
||||
else if (internal::is_same<_Index,UF_long>::value)
|
||||
else if (internal::is_same<_Index,SuiteSparse_long>::value)
|
||||
{
|
||||
res.itype = CHOLMOD_LONG;
|
||||
}
|
||||
@@ -395,7 +395,7 @@ class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimpl
|
||||
CholmodSimplicialLLT(const MatrixType& matrix) : Base()
|
||||
{
|
||||
init();
|
||||
compute(matrix);
|
||||
Base::compute(matrix);
|
||||
}
|
||||
|
||||
~CholmodSimplicialLLT() {}
|
||||
@@ -442,7 +442,7 @@ class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimp
|
||||
CholmodSimplicialLDLT(const MatrixType& matrix) : Base()
|
||||
{
|
||||
init();
|
||||
compute(matrix);
|
||||
Base::compute(matrix);
|
||||
}
|
||||
|
||||
~CholmodSimplicialLDLT() {}
|
||||
@@ -487,7 +487,7 @@ class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSuper
|
||||
CholmodSupernodalLLT(const MatrixType& matrix) : Base()
|
||||
{
|
||||
init();
|
||||
compute(matrix);
|
||||
Base::compute(matrix);
|
||||
}
|
||||
|
||||
~CholmodSupernodalLLT() {}
|
||||
@@ -534,7 +534,7 @@ class CholmodDecomposition : public CholmodBase<_MatrixType, _UpLo, CholmodDecom
|
||||
CholmodDecomposition(const MatrixType& matrix) : Base()
|
||||
{
|
||||
init();
|
||||
compute(matrix);
|
||||
Base::compute(matrix);
|
||||
}
|
||||
|
||||
~CholmodDecomposition() {}
|
||||
|
||||
@@ -69,7 +69,6 @@ class Array
|
||||
* the usage of 'using'. This should be done only for operator=.
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Array& operator=(const EigenBase<OtherDerived> &other)
|
||||
{
|
||||
return Base::operator=(other);
|
||||
@@ -85,7 +84,6 @@ class Array
|
||||
* remain row-vectors and vectors remain vectors.
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Array& operator=(const ArrayBase<OtherDerived>& other)
|
||||
{
|
||||
return Base::_set(other);
|
||||
@@ -94,7 +92,6 @@ class Array
|
||||
/** This is a special case of the templated operator=. Its purpose is to
|
||||
* prevent a default operator= from hiding the templated operator=.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Array& operator=(const Array& other)
|
||||
{
|
||||
return Base::_set(other);
|
||||
@@ -110,7 +107,6 @@ class Array
|
||||
*
|
||||
* \sa resize(Index,Index)
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Array() : Base()
|
||||
{
|
||||
Base::_check_template_params();
|
||||
@@ -120,7 +116,6 @@ class Array
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
// FIXME is it still needed ??
|
||||
/** \internal */
|
||||
EIGEN_DEVICE_FUNC
|
||||
Array(internal::constructor_without_unaligned_array_assert)
|
||||
: Base(internal::constructor_without_unaligned_array_assert())
|
||||
{
|
||||
@@ -144,48 +139,41 @@ class Array
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE explicit Array(const T& x)
|
||||
/** Constructs a vector or row-vector with given dimension. \only_for_vectors
|
||||
*
|
||||
* Note that this is only useful for dynamic-size vectors. For fixed-size vectors,
|
||||
* it is redundant to pass the dimension here, so it makes more sense to use the default
|
||||
* constructor Matrix() instead.
|
||||
*/
|
||||
EIGEN_STRONG_INLINE explicit Array(Index dim)
|
||||
: Base(dim, RowsAtCompileTime == 1 ? 1 : dim, ColsAtCompileTime == 1 ? 1 : dim)
|
||||
{
|
||||
Base::_check_template_params();
|
||||
Base::template _init1<T>(x);
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Array)
|
||||
eigen_assert(dim >= 0);
|
||||
eigen_assert(SizeAtCompileTime == Dynamic || SizeAtCompileTime == dim);
|
||||
EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
|
||||
}
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename T0, typename T1>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Array(const T0& val0, const T1& val1)
|
||||
{
|
||||
Base::_check_template_params();
|
||||
this->template _init2<T0,T1>(val0, val1);
|
||||
}
|
||||
#else
|
||||
/** \brief Constructs a fixed-sized array initialized with coefficients starting at \a data */
|
||||
EIGEN_DEVICE_FUNC explicit Array(const Scalar *data);
|
||||
/** Constructs a vector or row-vector with given dimension. \only_for_vectors
|
||||
/** constructs an uninitialized matrix with \a rows rows and \a cols columns.
|
||||
*
|
||||
* Note that this is only useful for dynamic-size vectors. For fixed-size vectors,
|
||||
* it is redundant to pass the dimension here, so it makes more sense to use the default
|
||||
* constructor Array() instead.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE explicit Array(Index dim);
|
||||
/** constructs an initialized 1x1 Array with the given coefficient */
|
||||
Array(const Scalar& value);
|
||||
/** constructs an uninitialized array with \a rows rows and \a cols columns.
|
||||
*
|
||||
* This is useful for dynamic-size arrays. For fixed-size arrays,
|
||||
* This is useful for dynamic-size matrices. For fixed-size matrices,
|
||||
* it is redundant to pass these parameters, so one should use the default constructor
|
||||
* Array() instead. */
|
||||
* Matrix() instead. */
|
||||
Array(Index rows, Index cols);
|
||||
/** constructs an initialized 2D vector with given coefficients */
|
||||
Array(const Scalar& val0, const Scalar& val1);
|
||||
#endif
|
||||
|
||||
/** constructs an initialized 3D vector with given coefficients */
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2)
|
||||
{
|
||||
Base::_check_template_params();
|
||||
@@ -195,7 +183,6 @@ class Array
|
||||
m_storage.data()[2] = val2;
|
||||
}
|
||||
/** constructs an initialized 4D vector with given coefficients */
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2, const Scalar& val3)
|
||||
{
|
||||
Base::_check_template_params();
|
||||
@@ -206,9 +193,10 @@ class Array
|
||||
m_storage.data()[3] = val3;
|
||||
}
|
||||
|
||||
explicit Array(const Scalar *data);
|
||||
|
||||
/** Constructor copying the value of the expression \a other */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Array(const ArrayBase<OtherDerived>& other)
|
||||
: Base(other.rows() * other.cols(), other.rows(), other.cols())
|
||||
{
|
||||
@@ -216,7 +204,6 @@ class Array
|
||||
Base::_set_noalias(other);
|
||||
}
|
||||
/** Copy constructor */
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Array(const Array& other)
|
||||
: Base(other.rows() * other.cols(), other.rows(), other.cols())
|
||||
{
|
||||
@@ -225,7 +212,6 @@ class Array
|
||||
}
|
||||
/** Copy constructor with in-place evaluation */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Array(const ReturnByValue<OtherDerived>& other)
|
||||
{
|
||||
Base::_check_template_params();
|
||||
@@ -235,7 +221,6 @@ class Array
|
||||
|
||||
/** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Array(const EigenBase<OtherDerived> &other)
|
||||
: Base(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols())
|
||||
{
|
||||
@@ -251,8 +236,8 @@ class Array
|
||||
void swap(ArrayBase<OtherDerived> const & other)
|
||||
{ this->_swap(other.derived()); }
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; }
|
||||
EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); }
|
||||
inline Index innerStride() const { return 1; }
|
||||
inline Index outerStride() const { return this->innerSize(); }
|
||||
|
||||
#ifdef EIGEN_ARRAY_PLUGIN
|
||||
#include EIGEN_ARRAY_PLUGIN
|
||||
|
||||
@@ -46,9 +46,6 @@ template<typename Derived> class ArrayBase
|
||||
|
||||
typedef ArrayBase Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl;
|
||||
|
||||
using internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar,
|
||||
typename NumTraits<typename internal::traits<Derived>::Scalar>::Real>::operator*;
|
||||
|
||||
typedef typename internal::traits<Derived>::StorageKind StorageKind;
|
||||
typedef typename internal::traits<Derived>::Index Index;
|
||||
typedef typename internal::traits<Derived>::Scalar Scalar;
|
||||
@@ -56,6 +53,7 @@ template<typename Derived> class ArrayBase
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
|
||||
typedef DenseBase<Derived> Base;
|
||||
using Base::operator*;
|
||||
using Base::RowsAtCompileTime;
|
||||
using Base::ColsAtCompileTime;
|
||||
using Base::SizeAtCompileTime;
|
||||
@@ -118,50 +116,40 @@ template<typename Derived> class ArrayBase
|
||||
/** Special case of the template operator=, in order to prevent the compiler
|
||||
* from generating a default operator= (issue hit with g++ 4.1)
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator=(const ArrayBase& other)
|
||||
{
|
||||
return internal::assign_selector<Derived,Derived>::run(derived(), other.derived());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator+=(const Scalar& scalar);
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator-=(const Scalar& scalar);
|
||||
Derived& operator+=(const Scalar& scalar)
|
||||
{ return *this = derived() + scalar; }
|
||||
Derived& operator-=(const Scalar& scalar)
|
||||
{ return *this = derived() - scalar; }
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator+=(const ArrayBase<OtherDerived>& other);
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator-=(const ArrayBase<OtherDerived>& other);
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator*=(const ArrayBase<OtherDerived>& other);
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator/=(const ArrayBase<OtherDerived>& other);
|
||||
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC
|
||||
ArrayBase<Derived>& array() { return *this; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
const ArrayBase<Derived>& array() const { return *this; }
|
||||
|
||||
/** \returns an \link Eigen::MatrixBase Matrix \endlink expression of this array
|
||||
* \sa MatrixBase::array() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
MatrixWrapper<Derived> matrix() { return derived(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
const MatrixWrapper<const Derived> matrix() const { return derived(); }
|
||||
|
||||
// template<typename Dest>
|
||||
// inline void evalTo(Dest& dst) const { dst = matrix(); }
|
||||
|
||||
protected:
|
||||
EIGEN_DEVICE_FUNC
|
||||
ArrayBase() : Base() {}
|
||||
|
||||
private:
|
||||
|
||||
@@ -53,54 +53,41 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
|
||||
|
||||
typedef typename internal::nested<ExpressionType>::type NestedExpressionType;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {}
|
||||
inline ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index rows() const { return m_expression.rows(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index cols() const { return m_expression.cols(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index outerStride() const { return m_expression.outerStride(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index innerStride() const { return m_expression.innerStride(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline ScalarWithConstIfNotLvalue* data() { return m_expression.const_cast_derived().data(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar* data() const { return m_expression.data(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline CoeffReturnType coeff(Index rowId, Index colId) const
|
||||
{
|
||||
return m_expression.coeff(rowId, colId);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index rowId, Index colId)
|
||||
{
|
||||
return m_expression.const_cast_derived().coeffRef(rowId, colId);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index rowId, Index colId) const
|
||||
{
|
||||
return m_expression.const_cast_derived().coeffRef(rowId, colId);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
return m_expression.coeff(index);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index index)
|
||||
{
|
||||
return m_expression.const_cast_derived().coeffRef(index);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index index) const
|
||||
{
|
||||
return m_expression.const_cast_derived().coeffRef(index);
|
||||
@@ -131,11 +118,9 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
|
||||
}
|
||||
|
||||
template<typename Dest>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void evalTo(Dest& dst) const { dst = m_expression; }
|
||||
|
||||
const typename internal::remove_all<NestedExpressionType>::type&
|
||||
EIGEN_DEVICE_FUNC
|
||||
nestedExpression() const
|
||||
{
|
||||
return m_expression;
|
||||
@@ -143,11 +128,9 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
|
||||
|
||||
/** Forwards the resizing request to the nested expression
|
||||
* \sa DenseBase::resize(Index) */
|
||||
EIGEN_DEVICE_FUNC
|
||||
void resize(Index newSize) { m_expression.const_cast_derived().resize(newSize); }
|
||||
/** Forwards the resizing request to the nested expression
|
||||
* \sa DenseBase::resize(Index,Index)*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
void resize(Index nbRows, Index nbCols) { m_expression.const_cast_derived().resize(nbRows,nbCols); }
|
||||
|
||||
protected:
|
||||
@@ -195,54 +178,41 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
|
||||
|
||||
typedef typename internal::nested<ExpressionType>::type NestedExpressionType;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline MatrixWrapper(ExpressionType& a_matrix) : m_expression(a_matrix) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index rows() const { return m_expression.rows(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index cols() const { return m_expression.cols(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index outerStride() const { return m_expression.outerStride(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index innerStride() const { return m_expression.innerStride(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline ScalarWithConstIfNotLvalue* data() { return m_expression.const_cast_derived().data(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar* data() const { return m_expression.data(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline CoeffReturnType coeff(Index rowId, Index colId) const
|
||||
{
|
||||
return m_expression.coeff(rowId, colId);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index rowId, Index colId)
|
||||
{
|
||||
return m_expression.const_cast_derived().coeffRef(rowId, colId);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index rowId, Index colId) const
|
||||
{
|
||||
return m_expression.derived().coeffRef(rowId, colId);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
return m_expression.coeff(index);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index index)
|
||||
{
|
||||
return m_expression.const_cast_derived().coeffRef(index);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index index) const
|
||||
{
|
||||
return m_expression.const_cast_derived().coeffRef(index);
|
||||
@@ -272,7 +242,6 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
|
||||
m_expression.const_cast_derived().template writePacket<LoadMode>(index, val);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<NestedExpressionType>::type&
|
||||
nestedExpression() const
|
||||
{
|
||||
@@ -281,11 +250,9 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
|
||||
|
||||
/** Forwards the resizing request to the nested expression
|
||||
* \sa DenseBase::resize(Index) */
|
||||
EIGEN_DEVICE_FUNC
|
||||
void resize(Index newSize) { m_expression.const_cast_derived().resize(newSize); }
|
||||
/** Forwards the resizing request to the nested expression
|
||||
* \sa DenseBase::resize(Index,Index)*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
void resize(Index nbRows, Index nbCols) { m_expression.const_cast_derived().resize(nbRows,nbCols); }
|
||||
|
||||
protected:
|
||||
|
||||
@@ -105,8 +105,6 @@ public:
|
||||
EIGEN_DEBUG_VAR(DstIsAligned)
|
||||
EIGEN_DEBUG_VAR(SrcIsAligned)
|
||||
EIGEN_DEBUG_VAR(JointAlignment)
|
||||
EIGEN_DEBUG_VAR(Derived::SizeAtCompileTime)
|
||||
EIGEN_DEBUG_VAR(OtherDerived::CoeffReadCost)
|
||||
EIGEN_DEBUG_VAR(InnerSize)
|
||||
EIGEN_DEBUG_VAR(InnerMaxSize)
|
||||
EIGEN_DEBUG_VAR(PacketSize)
|
||||
@@ -141,7 +139,6 @@ struct assign_DefaultTraversal_CompleteUnrolling
|
||||
inner = Index % Derived1::InnerSizeAtCompileTime
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
dst.copyCoeffByOuterInner(outer, inner, src);
|
||||
@@ -152,14 +149,12 @@ struct assign_DefaultTraversal_CompleteUnrolling
|
||||
template<typename Derived1, typename Derived2, int Stop>
|
||||
struct assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {}
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2, int Index, int Stop>
|
||||
struct assign_DefaultTraversal_InnerUnrolling
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, typename Derived1::Index outer)
|
||||
{
|
||||
dst.copyCoeffByOuterInner(outer, Index, src);
|
||||
@@ -170,7 +165,6 @@ struct assign_DefaultTraversal_InnerUnrolling
|
||||
template<typename Derived1, typename Derived2, int Stop>
|
||||
struct assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Stop, Stop>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, typename Derived1::Index) {}
|
||||
};
|
||||
|
||||
@@ -181,7 +175,6 @@ struct assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Stop, Stop>
|
||||
template<typename Derived1, typename Derived2, int Index, int Stop>
|
||||
struct assign_LinearTraversal_CompleteUnrolling
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
dst.copyCoeff(Index, src);
|
||||
@@ -192,7 +185,6 @@ struct assign_LinearTraversal_CompleteUnrolling
|
||||
template<typename Derived1, typename Derived2, int Stop>
|
||||
struct assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {}
|
||||
};
|
||||
|
||||
@@ -257,7 +249,6 @@ struct assign_impl;
|
||||
template<typename Derived1, typename Derived2, int Unrolling, int Version>
|
||||
struct assign_impl<Derived1, Derived2, InvalidTraversal, Unrolling, Version>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(Derived1 &, const Derived2 &) { }
|
||||
};
|
||||
|
||||
@@ -265,7 +256,6 @@ template<typename Derived1, typename Derived2, int Version>
|
||||
struct assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling, Version>
|
||||
{
|
||||
typedef typename Derived1::Index Index;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
const Index innerSize = dst.innerSize();
|
||||
@@ -279,7 +269,6 @@ struct assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling, Version>
|
||||
template<typename Derived1, typename Derived2, int Version>
|
||||
struct assign_impl<Derived1, Derived2, DefaultTraversal, CompleteUnrolling, Version>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
|
||||
@@ -291,7 +280,6 @@ template<typename Derived1, typename Derived2, int Version>
|
||||
struct assign_impl<Derived1, Derived2, DefaultTraversal, InnerUnrolling, Version>
|
||||
{
|
||||
typedef typename Derived1::Index Index;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
const Index outerSize = dst.outerSize();
|
||||
@@ -309,7 +297,6 @@ template<typename Derived1, typename Derived2, int Version>
|
||||
struct assign_impl<Derived1, Derived2, LinearTraversal, NoUnrolling, Version>
|
||||
{
|
||||
typedef typename Derived1::Index Index;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
const Index size = dst.size();
|
||||
@@ -321,7 +308,6 @@ struct assign_impl<Derived1, Derived2, LinearTraversal, NoUnrolling, Version>
|
||||
template<typename Derived1, typename Derived2, int Version>
|
||||
struct assign_impl<Derived1, Derived2, LinearTraversal, CompleteUnrolling, Version>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
|
||||
@@ -453,19 +439,26 @@ struct assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling, Ve
|
||||
typedef typename Derived1::Index Index;
|
||||
static inline void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
typedef packet_traits<typename Derived1::Scalar> PacketTraits;
|
||||
typedef typename Derived1::Scalar Scalar;
|
||||
typedef packet_traits<Scalar> PacketTraits;
|
||||
enum {
|
||||
packetSize = PacketTraits::size,
|
||||
alignable = PacketTraits::AlignedOnScalar,
|
||||
dstAlignment = alignable ? Aligned : int(assign_traits<Derived1,Derived2>::DstIsAligned) ,
|
||||
dstIsAligned = assign_traits<Derived1,Derived2>::DstIsAligned,
|
||||
dstAlignment = alignable ? Aligned : int(dstIsAligned),
|
||||
srcAlignment = assign_traits<Derived1,Derived2>::JointAlignment
|
||||
};
|
||||
const Scalar *dst_ptr = &dst.coeffRef(0,0);
|
||||
if((!bool(dstIsAligned)) && (size_t(dst_ptr) % sizeof(Scalar))>0)
|
||||
{
|
||||
// the pointer is not aligend-on scalar, so alignment is not possible
|
||||
return assign_impl<Derived1,Derived2,DefaultTraversal,NoUnrolling>::run(dst, src);
|
||||
}
|
||||
const Index packetAlignedMask = packetSize - 1;
|
||||
const Index innerSize = dst.innerSize();
|
||||
const Index outerSize = dst.outerSize();
|
||||
const Index alignedStep = alignable ? (packetSize - dst.outerStride() % packetSize) & packetAlignedMask : 0;
|
||||
Index alignedStart = ((!alignable) || assign_traits<Derived1,Derived2>::DstIsAligned) ? 0
|
||||
: internal::first_aligned(&dst.coeffRef(0,0), innerSize);
|
||||
Index alignedStart = ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned(dst_ptr, innerSize);
|
||||
|
||||
for(Index outer = 0; outer < outerSize; ++outer)
|
||||
{
|
||||
@@ -506,25 +499,12 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>
|
||||
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived)
|
||||
EIGEN_STATIC_ASSERT(SameType,YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
|
||||
|
||||
#ifdef EIGEN_TEST_EVALUATORS
|
||||
|
||||
#ifdef EIGEN_DEBUG_ASSIGN
|
||||
internal::copy_using_evaluator_traits<Derived, OtherDerived>::debug();
|
||||
#endif
|
||||
eigen_assert(rows() == other.rows() && cols() == other.cols());
|
||||
internal::call_dense_assignment_loop(derived(),other.derived());
|
||||
|
||||
#else // EIGEN_TEST_EVALUATORS
|
||||
|
||||
#ifdef EIGEN_DEBUG_ASSIGN
|
||||
internal::assign_traits<Derived, OtherDerived>::debug();
|
||||
#endif
|
||||
eigen_assert(rows() == other.rows() && cols() == other.cols());
|
||||
internal::assign_impl<Derived, OtherDerived, int(SameType) ? int(internal::assign_traits<Derived, OtherDerived>::Traversal)
|
||||
: int(InvalidTraversal)>::run(derived(),other.derived());
|
||||
|
||||
#endif // EIGEN_TEST_EVALUATORS
|
||||
|
||||
: int(InvalidTraversal)>::run(derived(),other.derived());
|
||||
#ifndef EIGEN_NO_DEBUG
|
||||
checkTransposeAliasing(other.derived());
|
||||
#endif
|
||||
@@ -544,28 +524,22 @@ struct assign_selector;
|
||||
|
||||
template<typename Derived, typename OtherDerived>
|
||||
struct assign_selector<Derived,OtherDerived,false,false> {
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.derived()); }
|
||||
template<typename ActualDerived, typename ActualOtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE Derived& evalTo(ActualDerived& dst, const ActualOtherDerived& other) { other.evalTo(dst); return dst; }
|
||||
};
|
||||
template<typename Derived, typename OtherDerived>
|
||||
struct assign_selector<Derived,OtherDerived,true,false> {
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.eval()); }
|
||||
};
|
||||
template<typename Derived, typename OtherDerived>
|
||||
struct assign_selector<Derived,OtherDerived,false,true> {
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose()); }
|
||||
template<typename ActualDerived, typename ActualOtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE Derived& evalTo(ActualDerived& dst, const ActualOtherDerived& other) { Transpose<ActualDerived> dstTrans(dst); other.evalTo(dstTrans); return dst; }
|
||||
};
|
||||
template<typename Derived, typename OtherDerived>
|
||||
struct assign_selector<Derived,OtherDerived,true,true> {
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose().eval()); }
|
||||
};
|
||||
|
||||
@@ -573,21 +547,18 @@ struct assign_selector<Derived,OtherDerived,true,true> {
|
||||
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator=(const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
return internal::assign_selector<Derived,OtherDerived>::run(derived(), other.derived());
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator=(const DenseBase& other)
|
||||
{
|
||||
return internal::assign_selector<Derived,Derived>::run(derived(), other.derived());
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const MatrixBase& other)
|
||||
{
|
||||
return internal::assign_selector<Derived,Derived>::run(derived(), other.derived());
|
||||
@@ -595,7 +566,6 @@ EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const MatrixBase& ot
|
||||
|
||||
template<typename Derived>
|
||||
template <typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
return internal::assign_selector<Derived,OtherDerived>::run(derived(), other.derived());
|
||||
@@ -603,7 +573,6 @@ EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const DenseBase<Othe
|
||||
|
||||
template<typename Derived>
|
||||
template <typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const EigenBase<OtherDerived>& other)
|
||||
{
|
||||
return internal::assign_selector<Derived,OtherDerived,false>::evalTo(derived(), other.derived());
|
||||
@@ -611,7 +580,6 @@ EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const EigenBase<Othe
|
||||
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const ReturnByValue<OtherDerived>& other)
|
||||
{
|
||||
return internal::assign_selector<Derived,OtherDerived,false>::evalTo(derived(), other.derived());
|
||||
|
||||
@@ -1,842 +0,0 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2011 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2011-2013 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2011-2012 Jitse Niesen <jitse@maths.leeds.ac.uk>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_ASSIGN_EVALUATOR_H
|
||||
#define EIGEN_ASSIGN_EVALUATOR_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
// This implementation is based on Assign.h
|
||||
|
||||
namespace internal {
|
||||
|
||||
/***************************************************************************
|
||||
* Part 1 : the logic deciding a strategy for traversal and unrolling *
|
||||
***************************************************************************/
|
||||
|
||||
// copy_using_evaluator_traits is based on assign_traits
|
||||
|
||||
template <typename Derived, typename OtherDerived>
|
||||
struct copy_using_evaluator_traits
|
||||
{
|
||||
public:
|
||||
enum {
|
||||
DstIsAligned = Derived::Flags & AlignedBit,
|
||||
DstHasDirectAccess = Derived::Flags & DirectAccessBit,
|
||||
SrcIsAligned = OtherDerived::Flags & AlignedBit,
|
||||
JointAlignment = bool(DstIsAligned) && bool(SrcIsAligned) ? Aligned : Unaligned,
|
||||
SrcEvalBeforeAssign = (evaluator_traits<OtherDerived>::HasEvalTo == 1)
|
||||
};
|
||||
|
||||
private:
|
||||
enum {
|
||||
InnerSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::SizeAtCompileTime)
|
||||
: int(Derived::Flags)&RowMajorBit ? int(Derived::ColsAtCompileTime)
|
||||
: int(Derived::RowsAtCompileTime),
|
||||
InnerMaxSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::MaxSizeAtCompileTime)
|
||||
: int(Derived::Flags)&RowMajorBit ? int(Derived::MaxColsAtCompileTime)
|
||||
: int(Derived::MaxRowsAtCompileTime),
|
||||
MaxSizeAtCompileTime = Derived::SizeAtCompileTime,
|
||||
PacketSize = packet_traits<typename Derived::Scalar>::size
|
||||
};
|
||||
|
||||
enum {
|
||||
StorageOrdersAgree = (int(Derived::IsRowMajor) == int(OtherDerived::IsRowMajor)),
|
||||
MightVectorize = StorageOrdersAgree
|
||||
&& (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit),
|
||||
MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0
|
||||
&& int(DstIsAligned) && int(SrcIsAligned),
|
||||
MayLinearize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit),
|
||||
MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess
|
||||
&& (DstIsAligned || MaxSizeAtCompileTime == Dynamic),
|
||||
/* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
|
||||
so it's only good for large enough sizes. */
|
||||
MaySliceVectorize = MightVectorize && DstHasDirectAccess
|
||||
&& (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=3*PacketSize)
|
||||
/* slice vectorization can be slow, so we only want it if the slices are big, which is
|
||||
indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
|
||||
in a fixed-size matrix */
|
||||
};
|
||||
|
||||
public:
|
||||
enum {
|
||||
Traversal = int(SrcEvalBeforeAssign) ? int(AllAtOnceTraversal)
|
||||
: int(MayInnerVectorize) ? int(InnerVectorizedTraversal)
|
||||
: int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
|
||||
: int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
|
||||
: int(MayLinearize) ? int(LinearTraversal)
|
||||
: int(DefaultTraversal),
|
||||
Vectorized = int(Traversal) == InnerVectorizedTraversal
|
||||
|| int(Traversal) == LinearVectorizedTraversal
|
||||
|| int(Traversal) == SliceVectorizedTraversal
|
||||
};
|
||||
|
||||
private:
|
||||
enum {
|
||||
UnrollingLimit = EIGEN_UNROLLING_LIMIT * (Vectorized ? int(PacketSize) : 1),
|
||||
MayUnrollCompletely = int(Derived::SizeAtCompileTime) != Dynamic
|
||||
&& int(OtherDerived::CoeffReadCost) != Dynamic
|
||||
&& int(Derived::SizeAtCompileTime) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit),
|
||||
MayUnrollInner = int(InnerSize) != Dynamic
|
||||
&& int(OtherDerived::CoeffReadCost) != Dynamic
|
||||
&& int(InnerSize) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit)
|
||||
};
|
||||
|
||||
public:
|
||||
enum {
|
||||
Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal))
|
||||
? (
|
||||
int(MayUnrollCompletely) ? int(CompleteUnrolling)
|
||||
: int(MayUnrollInner) ? int(InnerUnrolling)
|
||||
: int(NoUnrolling)
|
||||
)
|
||||
: int(Traversal) == int(LinearVectorizedTraversal)
|
||||
? ( bool(MayUnrollCompletely) && bool(DstIsAligned) ? int(CompleteUnrolling)
|
||||
: int(NoUnrolling) )
|
||||
: int(Traversal) == int(LinearTraversal)
|
||||
? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling)
|
||||
: int(NoUnrolling) )
|
||||
: int(NoUnrolling)
|
||||
};
|
||||
|
||||
#ifdef EIGEN_DEBUG_ASSIGN
|
||||
static void debug()
|
||||
{
|
||||
EIGEN_DEBUG_VAR(DstIsAligned)
|
||||
EIGEN_DEBUG_VAR(SrcIsAligned)
|
||||
EIGEN_DEBUG_VAR(JointAlignment)
|
||||
EIGEN_DEBUG_VAR(InnerSize)
|
||||
EIGEN_DEBUG_VAR(InnerMaxSize)
|
||||
EIGEN_DEBUG_VAR(PacketSize)
|
||||
EIGEN_DEBUG_VAR(StorageOrdersAgree)
|
||||
EIGEN_DEBUG_VAR(MightVectorize)
|
||||
EIGEN_DEBUG_VAR(MayLinearize)
|
||||
EIGEN_DEBUG_VAR(MayInnerVectorize)
|
||||
EIGEN_DEBUG_VAR(MayLinearVectorize)
|
||||
EIGEN_DEBUG_VAR(MaySliceVectorize)
|
||||
EIGEN_DEBUG_VAR(Traversal)
|
||||
EIGEN_DEBUG_VAR(UnrollingLimit)
|
||||
EIGEN_DEBUG_VAR(MayUnrollCompletely)
|
||||
EIGEN_DEBUG_VAR(MayUnrollInner)
|
||||
EIGEN_DEBUG_VAR(Unrolling)
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
/***************************************************************************
|
||||
* Part 2 : meta-unrollers
|
||||
***************************************************************************/
|
||||
|
||||
/************************
|
||||
*** Default traversal ***
|
||||
************************/
|
||||
|
||||
template<typename Kernel, int Index, int Stop>
|
||||
struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
|
||||
{
|
||||
typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
|
||||
typedef typename DstEvaluatorType::XprType DstXprType;
|
||||
|
||||
enum {
|
||||
outer = Index / DstXprType::InnerSizeAtCompileTime,
|
||||
inner = Index % DstXprType::InnerSizeAtCompileTime
|
||||
};
|
||||
|
||||
static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
{
|
||||
kernel.assignCoeffByOuterInner(outer, inner);
|
||||
copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Kernel, int Stop>
|
||||
struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Stop, Stop>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Kernel&) { }
|
||||
};
|
||||
|
||||
template<typename Kernel, int Index, int Stop>
|
||||
struct copy_using_evaluator_DefaultTraversal_InnerUnrolling
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Kernel &kernel, int outer)
|
||||
{
|
||||
kernel.assignCoeffByOuterInner(outer, Index);
|
||||
copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Index+1, Stop>::run(kernel, outer);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Kernel, int Stop>
|
||||
struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Stop, Stop>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Kernel&, int) { }
|
||||
};
|
||||
|
||||
/***********************
|
||||
*** Linear traversal ***
|
||||
***********************/
|
||||
|
||||
template<typename Kernel, int Index, int Stop>
|
||||
struct copy_using_evaluator_LinearTraversal_CompleteUnrolling
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Kernel& kernel)
|
||||
{
|
||||
kernel.assignCoeff(Index);
|
||||
copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Kernel, int Stop>
|
||||
struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Stop, Stop>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Kernel&) { }
|
||||
};
|
||||
|
||||
/**************************
|
||||
*** Inner vectorization ***
|
||||
**************************/
|
||||
|
||||
template<typename Kernel, int Index, int Stop>
|
||||
struct copy_using_evaluator_innervec_CompleteUnrolling
|
||||
{
|
||||
typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
|
||||
typedef typename DstEvaluatorType::XprType DstXprType;
|
||||
|
||||
enum {
|
||||
outer = Index / DstXprType::InnerSizeAtCompileTime,
|
||||
inner = Index % DstXprType::InnerSizeAtCompileTime,
|
||||
JointAlignment = Kernel::AssignmentTraits::JointAlignment
|
||||
};
|
||||
|
||||
static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
{
|
||||
kernel.template assignPacketByOuterInner<Aligned, JointAlignment>(outer, inner);
|
||||
enum { NextIndex = Index + packet_traits<typename DstXprType::Scalar>::size };
|
||||
copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Kernel, int Stop>
|
||||
struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Kernel&) { }
|
||||
};
|
||||
|
||||
template<typename Kernel, int Index, int Stop>
|
||||
struct copy_using_evaluator_innervec_InnerUnrolling
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Kernel &kernel, int outer)
|
||||
{
|
||||
kernel.template assignPacketByOuterInner<Aligned, Aligned>(outer, Index);
|
||||
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
|
||||
enum { NextIndex = Index + packet_traits<typename DstXprType::Scalar>::size };
|
||||
copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop>::run(kernel, outer);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Kernel, int Stop>
|
||||
struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Kernel &, int) { }
|
||||
};
|
||||
|
||||
/***************************************************************************
|
||||
* Part 3 : implementation of all cases
|
||||
***************************************************************************/
|
||||
|
||||
// dense_assignment_loop is based on assign_impl
|
||||
|
||||
template<typename Kernel,
|
||||
int Traversal = Kernel::AssignmentTraits::Traversal,
|
||||
int Unrolling = Kernel::AssignmentTraits::Unrolling>
|
||||
struct dense_assignment_loop;
|
||||
|
||||
/************************
|
||||
*** Default traversal ***
|
||||
************************/
|
||||
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, DefaultTraversal, NoUnrolling>
|
||||
{
|
||||
static void run(Kernel &kernel)
|
||||
{
|
||||
typedef typename Kernel::Index Index;
|
||||
|
||||
for(Index outer = 0; outer < kernel.outerSize(); ++outer) {
|
||||
for(Index inner = 0; inner < kernel.innerSize(); ++inner) {
|
||||
kernel.assignCoeffByOuterInner(outer, inner);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, DefaultTraversal, CompleteUnrolling>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
{
|
||||
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
|
||||
copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, DefaultTraversal, InnerUnrolling>
|
||||
{
|
||||
typedef typename Kernel::Index Index;
|
||||
static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
{
|
||||
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
|
||||
|
||||
const Index outerSize = kernel.outerSize();
|
||||
for(Index outer = 0; outer < outerSize; ++outer)
|
||||
copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel, outer);
|
||||
}
|
||||
};
|
||||
|
||||
/***************************
|
||||
*** Linear vectorization ***
|
||||
***************************/
|
||||
|
||||
|
||||
// The goal of unaligned_dense_assignment_loop is simply to factorize the handling
|
||||
// of the non vectorizable beginning and ending parts
|
||||
|
||||
template <bool IsAligned = false>
|
||||
struct unaligned_dense_assignment_loop
|
||||
{
|
||||
// if IsAligned = true, then do nothing
|
||||
template <typename Kernel>
|
||||
static EIGEN_STRONG_INLINE void run(Kernel&, typename Kernel::Index, typename Kernel::Index) {}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct unaligned_dense_assignment_loop<false>
|
||||
{
|
||||
// MSVC must not inline this functions. If it does, it fails to optimize the
|
||||
// packet access path.
|
||||
// FIXME check which version exhibits this issue
|
||||
#ifdef _MSC_VER
|
||||
template <typename Kernel>
|
||||
static EIGEN_DONT_INLINE void run(Kernel &kernel,
|
||||
typename Kernel::Index start,
|
||||
typename Kernel::Index end)
|
||||
#else
|
||||
template <typename Kernel>
|
||||
static EIGEN_STRONG_INLINE void run(Kernel &kernel,
|
||||
typename Kernel::Index start,
|
||||
typename Kernel::Index end)
|
||||
#endif
|
||||
{
|
||||
for (typename Kernel::Index index = start; index < end; ++index)
|
||||
kernel.assignCoeff(index);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
{
|
||||
typedef typename Kernel::Index Index;
|
||||
|
||||
const Index size = kernel.size();
|
||||
typedef packet_traits<typename Kernel::Scalar> PacketTraits;
|
||||
enum {
|
||||
packetSize = PacketTraits::size,
|
||||
dstIsAligned = int(Kernel::AssignmentTraits::DstIsAligned),
|
||||
dstAlignment = PacketTraits::AlignedOnScalar ? Aligned : dstIsAligned,
|
||||
srcAlignment = Kernel::AssignmentTraits::JointAlignment
|
||||
};
|
||||
const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned(&kernel.dstEvaluator().coeffRef(0), size);
|
||||
const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
|
||||
|
||||
unaligned_dense_assignment_loop<dstIsAligned!=0>::run(kernel, 0, alignedStart);
|
||||
|
||||
for(Index index = alignedStart; index < alignedEnd; index += packetSize)
|
||||
kernel.template assignPacket<dstAlignment, srcAlignment>(index);
|
||||
|
||||
unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrolling>
|
||||
{
|
||||
typedef typename Kernel::Index Index;
|
||||
static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
{
|
||||
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
|
||||
|
||||
enum { size = DstXprType::SizeAtCompileTime,
|
||||
packetSize = packet_traits<typename Kernel::Scalar>::size,
|
||||
alignedSize = (size/packetSize)*packetSize };
|
||||
|
||||
copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, alignedSize>::run(kernel);
|
||||
copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, alignedSize, size>::run(kernel);
|
||||
}
|
||||
};
|
||||
|
||||
/**************************
|
||||
*** Inner vectorization ***
|
||||
**************************/
|
||||
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
|
||||
{
|
||||
static inline void run(Kernel &kernel)
|
||||
{
|
||||
typedef typename Kernel::Index Index;
|
||||
|
||||
const Index innerSize = kernel.innerSize();
|
||||
const Index outerSize = kernel.outerSize();
|
||||
const Index packetSize = packet_traits<typename Kernel::Scalar>::size;
|
||||
for(Index outer = 0; outer < outerSize; ++outer)
|
||||
for(Index inner = 0; inner < innerSize; inner+=packetSize)
|
||||
kernel.template assignPacketByOuterInner<Aligned, Aligned>(outer, inner);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, CompleteUnrolling>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
{
|
||||
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
|
||||
copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling>
|
||||
{
|
||||
typedef typename Kernel::Index Index;
|
||||
static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
{
|
||||
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
|
||||
const Index outerSize = kernel.outerSize();
|
||||
for(Index outer = 0; outer < outerSize; ++outer)
|
||||
copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel, outer);
|
||||
}
|
||||
};
|
||||
|
||||
/***********************
|
||||
*** Linear traversal ***
|
||||
***********************/
|
||||
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, LinearTraversal, NoUnrolling>
|
||||
{
|
||||
static inline void run(Kernel &kernel)
|
||||
{
|
||||
typedef typename Kernel::Index Index;
|
||||
const Index size = kernel.size();
|
||||
for(Index i = 0; i < size; ++i)
|
||||
kernel.assignCoeff(i);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
{
|
||||
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
|
||||
copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
|
||||
}
|
||||
};
|
||||
|
||||
/**************************
|
||||
*** Slice vectorization ***
|
||||
***************************/
|
||||
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
|
||||
{
|
||||
static inline void run(Kernel &kernel)
|
||||
{
|
||||
typedef typename Kernel::Index Index;
|
||||
typedef packet_traits<typename Kernel::Scalar> PacketTraits;
|
||||
enum {
|
||||
packetSize = PacketTraits::size,
|
||||
alignable = PacketTraits::AlignedOnScalar,
|
||||
dstAlignment = alignable ? Aligned : int(Kernel::AssignmentTraits::DstIsAligned)
|
||||
};
|
||||
const Index packetAlignedMask = packetSize - 1;
|
||||
const Index innerSize = kernel.innerSize();
|
||||
const Index outerSize = kernel.outerSize();
|
||||
const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0;
|
||||
Index alignedStart = ((!alignable) || Kernel::AssignmentTraits::DstIsAligned) ? 0
|
||||
: internal::first_aligned(&kernel.dstEvaluator().coeffRef(0,0), innerSize);
|
||||
|
||||
for(Index outer = 0; outer < outerSize; ++outer)
|
||||
{
|
||||
const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
|
||||
// do the non-vectorizable part of the assignment
|
||||
for(Index inner = 0; inner<alignedStart ; ++inner)
|
||||
kernel.assignCoeffByOuterInner(outer, inner);
|
||||
|
||||
// do the vectorizable part of the assignment
|
||||
for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize)
|
||||
kernel.template assignPacketByOuterInner<dstAlignment, Unaligned>(outer, inner);
|
||||
|
||||
// do the non-vectorizable part of the assignment
|
||||
for(Index inner = alignedEnd; inner<innerSize ; ++inner)
|
||||
kernel.assignCoeffByOuterInner(outer, inner);
|
||||
|
||||
alignedStart = std::min<Index>((alignedStart+alignedStep)%packetSize, innerSize);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/****************************
|
||||
*** All-at-once traversal ***
|
||||
****************************/
|
||||
|
||||
// TODO: this 'AllAtOnceTraversal' should be dropped or caught earlier (Gael)
|
||||
// Indeed, what to do with the kernel's functor??
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, AllAtOnceTraversal, NoUnrolling>
|
||||
{
|
||||
static inline void run(Kernel & kernel)
|
||||
{
|
||||
// Evaluate rhs in temporary to prevent aliasing problems in a = a * a;
|
||||
// TODO: Do not pass the xpr object to evalTo() (Jitse)
|
||||
kernel.srcEvaluator().evalTo(kernel.dstEvaluator(), kernel.dstExpression());
|
||||
}
|
||||
};
|
||||
|
||||
/***************************************************************************
|
||||
* Part 4 : Generic Assignment routine
|
||||
***************************************************************************/
|
||||
|
||||
// This class generalize the assignment of a coefficient (or packet) from one dense evaluator
|
||||
// to another dense writable evaluator.
|
||||
// It is parametrized by the two evaluators, and the actual assignment functor.
|
||||
// This abstraction level permits to keep the evaluation loops as simple and as generic as possible.
|
||||
// One can customize the assignment using this generic dense_assignment_kernel with different
|
||||
// functors, or by completely overloading it, by-passing a functor.
|
||||
template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor>
|
||||
class generic_dense_assignment_kernel
|
||||
{
|
||||
protected:
|
||||
typedef typename DstEvaluatorTypeT::XprType DstXprType;
|
||||
typedef typename SrcEvaluatorTypeT::XprType SrcXprType;
|
||||
public:
|
||||
|
||||
typedef DstEvaluatorTypeT DstEvaluatorType;
|
||||
typedef SrcEvaluatorTypeT SrcEvaluatorType;
|
||||
typedef typename DstEvaluatorType::Scalar Scalar;
|
||||
typedef typename DstEvaluatorType::Index Index;
|
||||
typedef copy_using_evaluator_traits<DstXprType, SrcXprType> AssignmentTraits;
|
||||
|
||||
|
||||
generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
|
||||
: m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr)
|
||||
{}
|
||||
|
||||
Index size() const { return m_dstExpr.size(); }
|
||||
Index innerSize() const { return m_dstExpr.innerSize(); }
|
||||
Index outerSize() const { return m_dstExpr.outerSize(); }
|
||||
Index outerStride() const { return m_dstExpr.outerStride(); }
|
||||
|
||||
// TODO get rid of this one:
|
||||
DstXprType& dstExpression() const { return m_dstExpr; }
|
||||
|
||||
DstEvaluatorType& dstEvaluator() { return m_dst; }
|
||||
const SrcEvaluatorType& srcEvaluator() const { return m_src; }
|
||||
|
||||
void assignCoeff(Index row, Index col)
|
||||
{
|
||||
m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col));
|
||||
}
|
||||
|
||||
void assignCoeff(Index index)
|
||||
{
|
||||
m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index));
|
||||
}
|
||||
|
||||
void assignCoeffByOuterInner(Index outer, Index inner)
|
||||
{
|
||||
Index row = rowIndexByOuterInner(outer, inner);
|
||||
Index col = colIndexByOuterInner(outer, inner);
|
||||
assignCoeff(row, col);
|
||||
}
|
||||
|
||||
|
||||
template<int StoreMode, int LoadMode>
|
||||
void assignPacket(Index row, Index col)
|
||||
{
|
||||
m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row,col), m_src.template packet<LoadMode>(row,col));
|
||||
}
|
||||
|
||||
template<int StoreMode, int LoadMode>
|
||||
void assignPacket(Index index)
|
||||
{
|
||||
m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode>(index));
|
||||
}
|
||||
|
||||
template<int StoreMode, int LoadMode>
|
||||
void assignPacketByOuterInner(Index outer, Index inner)
|
||||
{
|
||||
Index row = rowIndexByOuterInner(outer, inner);
|
||||
Index col = colIndexByOuterInner(outer, inner);
|
||||
assignPacket<StoreMode,LoadMode>(row, col);
|
||||
}
|
||||
|
||||
static Index rowIndexByOuterInner(Index outer, Index inner)
|
||||
{
|
||||
typedef typename DstEvaluatorType::ExpressionTraits Traits;
|
||||
return int(Traits::RowsAtCompileTime) == 1 ? 0
|
||||
: int(Traits::ColsAtCompileTime) == 1 ? inner
|
||||
: int(Traits::Flags)&RowMajorBit ? outer
|
||||
: inner;
|
||||
}
|
||||
|
||||
static Index colIndexByOuterInner(Index outer, Index inner)
|
||||
{
|
||||
typedef typename DstEvaluatorType::ExpressionTraits Traits;
|
||||
return int(Traits::ColsAtCompileTime) == 1 ? 0
|
||||
: int(Traits::RowsAtCompileTime) == 1 ? inner
|
||||
: int(Traits::Flags)&RowMajorBit ? inner
|
||||
: outer;
|
||||
}
|
||||
|
||||
protected:
|
||||
DstEvaluatorType& m_dst;
|
||||
const SrcEvaluatorType& m_src;
|
||||
const Functor &m_functor;
|
||||
// TODO find a way to avoid the needs of the original expression
|
||||
DstXprType& m_dstExpr;
|
||||
};
|
||||
|
||||
template<typename DstXprType, typename SrcXprType, typename Functor>
|
||||
void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func)
|
||||
{
|
||||
#ifdef EIGEN_DEBUG_ASSIGN
|
||||
// TODO these traits should be computed from information provided by the evaluators
|
||||
internal::copy_using_evaluator_traits<DstXprType, SrcXprType>::debug();
|
||||
#endif
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||
|
||||
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
|
||||
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
|
||||
|
||||
DstEvaluatorType dstEvaluator(dst);
|
||||
SrcEvaluatorType srcEvaluator(src);
|
||||
|
||||
typedef generic_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;
|
||||
Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
|
||||
|
||||
dense_assignment_loop<Kernel>::run(kernel);
|
||||
}
|
||||
|
||||
template<typename DstXprType, typename SrcXprType>
|
||||
void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src)
|
||||
{
|
||||
call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar>());
|
||||
}
|
||||
|
||||
/***************************************************************************
|
||||
* Part 5 : Entry points
|
||||
***************************************************************************/
|
||||
|
||||
// Based on DenseBase::LazyAssign()
|
||||
// The following functions are just for testing and they are meant to be moved to operator= and the likes.
|
||||
|
||||
template<typename DstXprType, template <typename> class StorageBase, typename SrcXprType>
|
||||
EIGEN_STRONG_INLINE
|
||||
const DstXprType& copy_using_evaluator(const NoAlias<DstXprType, StorageBase>& dst,
|
||||
const EigenBase<SrcXprType>& src)
|
||||
{
|
||||
return noalias_copy_using_evaluator(dst.expression(), src.derived(), internal::assign_op<typename DstXprType::Scalar>());
|
||||
}
|
||||
|
||||
template<typename XprType, int AssumeAliasing = evaluator_traits<XprType>::AssumeAliasing>
|
||||
struct AddEvalIfAssumingAliasing;
|
||||
|
||||
template<typename XprType>
|
||||
struct AddEvalIfAssumingAliasing<XprType, 0>
|
||||
{
|
||||
static const XprType& run(const XprType& xpr)
|
||||
{
|
||||
return xpr;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename XprType>
|
||||
struct AddEvalIfAssumingAliasing<XprType, 1>
|
||||
{
|
||||
static const EvalToTemp<XprType> run(const XprType& xpr)
|
||||
{
|
||||
return EvalToTemp<XprType>(xpr);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename DstXprType, typename SrcXprType, typename Functor>
|
||||
EIGEN_STRONG_INLINE
|
||||
const DstXprType& copy_using_evaluator(const EigenBase<DstXprType>& dst, const EigenBase<SrcXprType>& src, const Functor &func)
|
||||
{
|
||||
return noalias_copy_using_evaluator(dst.const_cast_derived(),
|
||||
AddEvalIfAssumingAliasing<SrcXprType>::run(src.derived()),
|
||||
func
|
||||
);
|
||||
}
|
||||
|
||||
// this mimics operator=
|
||||
template<typename DstXprType, typename SrcXprType>
|
||||
EIGEN_STRONG_INLINE
|
||||
const DstXprType& copy_using_evaluator(const EigenBase<DstXprType>& dst, const EigenBase<SrcXprType>& src)
|
||||
{
|
||||
return copy_using_evaluator(dst.const_cast_derived(), src.derived(), internal::assign_op<typename DstXprType::Scalar>());
|
||||
}
|
||||
|
||||
template<typename DstXprType, typename SrcXprType, typename Functor>
|
||||
EIGEN_STRONG_INLINE
|
||||
const DstXprType& noalias_copy_using_evaluator(const PlainObjectBase<DstXprType>& dst, const EigenBase<SrcXprType>& src, const Functor &func)
|
||||
{
|
||||
#ifdef EIGEN_DEBUG_ASSIGN
|
||||
internal::copy_using_evaluator_traits<DstXprType, SrcXprType>::debug();
|
||||
#endif
|
||||
#ifdef EIGEN_NO_AUTOMATIC_RESIZING
|
||||
eigen_assert((dst.size()==0 || (IsVectorAtCompileTime ? (dst.size() == src.size())
|
||||
: (dst.rows() == src.rows() && dst.cols() == src.cols())))
|
||||
&& "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined");
|
||||
#else
|
||||
dst.const_cast_derived().resizeLike(src.derived());
|
||||
#endif
|
||||
call_dense_assignment_loop(dst.const_cast_derived(), src.derived(), func);
|
||||
return dst.derived();
|
||||
}
|
||||
|
||||
template<typename DstXprType, typename SrcXprType, typename Functor>
|
||||
EIGEN_STRONG_INLINE
|
||||
const DstXprType& noalias_copy_using_evaluator(const EigenBase<DstXprType>& dst, const EigenBase<SrcXprType>& src, const Functor &func)
|
||||
{
|
||||
call_dense_assignment_loop(dst.const_cast_derived(), src.derived(), func);
|
||||
return dst.derived();
|
||||
}
|
||||
|
||||
// Based on DenseBase::swap()
|
||||
// TODO: Check whether we need to do something special for swapping two
|
||||
// Arrays or Matrices. (Jitse)
|
||||
|
||||
// Overload default assignPacket behavior for swapping them
|
||||
template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT>
|
||||
class swap_kernel : public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, swap_assign_op<typename DstEvaluatorTypeT::Scalar> >
|
||||
{
|
||||
typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, swap_assign_op<typename DstEvaluatorTypeT::Scalar> > Base;
|
||||
typedef typename DstEvaluatorTypeT::PacketScalar PacketScalar;
|
||||
using Base::m_dst;
|
||||
using Base::m_src;
|
||||
using Base::m_functor;
|
||||
|
||||
public:
|
||||
typedef typename Base::Scalar Scalar;
|
||||
typedef typename Base::Index Index;
|
||||
typedef typename Base::DstXprType DstXprType;
|
||||
|
||||
swap_kernel(DstEvaluatorTypeT &dst, const SrcEvaluatorTypeT &src, DstXprType& dstExpr)
|
||||
: Base(dst, src, swap_assign_op<Scalar>(), dstExpr)
|
||||
{}
|
||||
|
||||
template<int StoreMode, int LoadMode>
|
||||
void assignPacket(Index row, Index col)
|
||||
{
|
||||
m_functor.template swapPacket<StoreMode,LoadMode,PacketScalar>(&m_dst.coeffRef(row,col), &const_cast<SrcEvaluatorTypeT&>(m_src).coeffRef(row,col));
|
||||
}
|
||||
|
||||
template<int StoreMode, int LoadMode>
|
||||
void assignPacket(Index index)
|
||||
{
|
||||
m_functor.template swapPacket<StoreMode,LoadMode,PacketScalar>(&m_dst.coeffRef(index), &const_cast<SrcEvaluatorTypeT&>(m_src).coeffRef(index));
|
||||
}
|
||||
|
||||
// TODO find a simple way not to have to copy/paste this function from generic_dense_assignment_kernel, by simple I mean no CRTP (Gael)
|
||||
template<int StoreMode, int LoadMode>
|
||||
void assignPacketByOuterInner(Index outer, Index inner)
|
||||
{
|
||||
Index row = Base::rowIndexByOuterInner(outer, inner);
|
||||
Index col = Base::colIndexByOuterInner(outer, inner);
|
||||
assignPacket<StoreMode,LoadMode>(row, col);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename DstXprType, typename SrcXprType>
|
||||
void swap_using_evaluator(const DstXprType& dst, const SrcXprType& src)
|
||||
{
|
||||
// TODO there is too much redundancy with call_dense_assignment_loop
|
||||
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||
|
||||
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
|
||||
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
|
||||
|
||||
DstEvaluatorType dstEvaluator(dst);
|
||||
SrcEvaluatorType srcEvaluator(src);
|
||||
|
||||
typedef swap_kernel<DstEvaluatorType,SrcEvaluatorType> Kernel;
|
||||
Kernel kernel(dstEvaluator, srcEvaluator, dst.const_cast_derived());
|
||||
|
||||
dense_assignment_loop<Kernel>::run(kernel);
|
||||
}
|
||||
|
||||
// Based on MatrixBase::operator+= (in CwiseBinaryOp.h)
|
||||
template<typename DstXprType, typename SrcXprType>
|
||||
void add_assign_using_evaluator(const MatrixBase<DstXprType>& dst, const MatrixBase<SrcXprType>& src)
|
||||
{
|
||||
typedef typename DstXprType::Scalar Scalar;
|
||||
copy_using_evaluator(dst.derived(), src.derived(), add_assign_op<Scalar>());
|
||||
}
|
||||
|
||||
// Based on ArrayBase::operator+=
|
||||
template<typename DstXprType, typename SrcXprType>
|
||||
void add_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src)
|
||||
{
|
||||
typedef typename DstXprType::Scalar Scalar;
|
||||
copy_using_evaluator(dst.derived(), src.derived(), add_assign_op<Scalar>());
|
||||
}
|
||||
|
||||
// TODO: Add add_assign_using_evaluator for EigenBase ? (Jitse)
|
||||
|
||||
template<typename DstXprType, typename SrcXprType>
|
||||
void subtract_assign_using_evaluator(const MatrixBase<DstXprType>& dst, const MatrixBase<SrcXprType>& src)
|
||||
{
|
||||
typedef typename DstXprType::Scalar Scalar;
|
||||
copy_using_evaluator(dst.derived(), src.derived(), sub_assign_op<Scalar>());
|
||||
}
|
||||
|
||||
template<typename DstXprType, typename SrcXprType>
|
||||
void subtract_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src)
|
||||
{
|
||||
typedef typename DstXprType::Scalar Scalar;
|
||||
copy_using_evaluator(dst.derived(), src.derived(), sub_assign_op<Scalar>());
|
||||
}
|
||||
|
||||
template<typename DstXprType, typename SrcXprType>
|
||||
void multiply_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src)
|
||||
{
|
||||
typedef typename DstXprType::Scalar Scalar;
|
||||
copy_using_evaluator(dst.derived(), src.derived(), mul_assign_op<Scalar>());
|
||||
}
|
||||
|
||||
template<typename DstXprType, typename SrcXprType>
|
||||
void divide_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src)
|
||||
{
|
||||
typedef typename DstXprType::Scalar Scalar;
|
||||
copy_using_evaluator(dst.derived(), src.derived(), div_assign_op<Scalar>());
|
||||
}
|
||||
|
||||
|
||||
} // namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_ASSIGN_EVALUATOR_H
|
||||
@@ -202,7 +202,6 @@ EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(asin, Asin)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(cos, Cos)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(acos, Acos)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(tan, Tan)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(atan, Atan)
|
||||
//EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs, Abs)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(exp, Exp)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(log, Ln)
|
||||
|
||||
@@ -21,9 +21,6 @@ namespace Eigen {
|
||||
* \param XprType the type of the expression in which we are taking a block
|
||||
* \param BlockRows the number of rows of the block we are taking at compile time (optional)
|
||||
* \param BlockCols the number of columns of the block we are taking at compile time (optional)
|
||||
* \param InnerPanel is true, if the block maps to a set of rows of a row major matrix or
|
||||
* to set of columns of a column major matrix (optional). The parameter allows to determine
|
||||
* at compile time whether aligned access is possible on the block expression.
|
||||
*
|
||||
* This class represents an expression of either a fixed-size or dynamic-size block. It is the return
|
||||
* type of DenseBase::block(Index,Index,Index,Index) and DenseBase::block<int,int>(Index,Index) and
|
||||
@@ -69,8 +66,9 @@ struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprTyp
|
||||
: ColsAtCompileTime != Dynamic ? int(ColsAtCompileTime)
|
||||
: int(traits<XprType>::MaxColsAtCompileTime),
|
||||
XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0,
|
||||
IsRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
|
||||
: (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
|
||||
IsDense = is_same<StorageKind,Dense>::value,
|
||||
IsRowMajor = (IsDense&&MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
|
||||
: (IsDense&&MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
|
||||
: XprTypeIsRowMajor,
|
||||
HasSameStorageOrderAsXprType = (IsRowMajor == XprTypeIsRowMajor),
|
||||
InnerSize = IsRowMajor ? int(ColsAtCompileTime) : int(RowsAtCompileTime),
|
||||
@@ -83,7 +81,7 @@ struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprTyp
|
||||
MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % packet_traits<Scalar>::size) == 0)
|
||||
&& (InnerStrideAtCompileTime == 1)
|
||||
? PacketAccessBit : 0,
|
||||
MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % EIGEN_ALIGN_BYTES) == 0)) ? AlignedBit : 0,
|
||||
MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % 16) == 0)) ? AlignedBit : 0,
|
||||
FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (traits<XprType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0,
|
||||
FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0,
|
||||
FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0,
|
||||
@@ -114,7 +112,6 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class
|
||||
|
||||
/** Column or Row constructor
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Block(XprType& xpr, Index i) : Impl(xpr,i)
|
||||
{
|
||||
eigen_assert( (i>=0) && (
|
||||
@@ -124,7 +121,6 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class
|
||||
|
||||
/** Fixed-size constructor
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Block(XprType& xpr, Index a_startRow, Index a_startCol)
|
||||
: Impl(xpr, a_startRow, a_startCol)
|
||||
{
|
||||
@@ -135,7 +131,6 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class
|
||||
|
||||
/** Dynamic-size constructor
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Block(XprType& xpr,
|
||||
Index a_startRow, Index a_startCol,
|
||||
Index blockRows, Index blockCols)
|
||||
@@ -159,9 +154,8 @@ class BlockImpl<XprType, BlockRows, BlockCols, InnerPanel, Dense>
|
||||
public:
|
||||
typedef Impl Base;
|
||||
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl)
|
||||
EIGEN_DEVICE_FUNC inline BlockImpl(XprType& xpr, Index i) : Impl(xpr,i) {}
|
||||
EIGEN_DEVICE_FUNC inline BlockImpl(XprType& xpr, Index a_startRow, Index a_startCol) : Impl(xpr, a_startRow, a_startCol) {}
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline BlockImpl(XprType& xpr, Index i) : Impl(xpr,i) {}
|
||||
inline BlockImpl(XprType& xpr, Index a_startRow, Index a_startCol) : Impl(xpr, a_startRow, a_startCol) {}
|
||||
inline BlockImpl(XprType& xpr, Index a_startRow, Index a_startCol, Index blockRows, Index blockCols)
|
||||
: Impl(xpr, a_startRow, a_startCol, blockRows, blockCols) {}
|
||||
};
|
||||
@@ -183,7 +177,6 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
|
||||
|
||||
/** Column or Row constructor
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline BlockImpl_dense(XprType& xpr, Index i)
|
||||
: m_xpr(xpr),
|
||||
// It is a row if and only if BlockRows==1 and BlockCols==XprType::ColsAtCompileTime,
|
||||
@@ -198,7 +191,6 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
|
||||
|
||||
/** Fixed-size constructor
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline BlockImpl_dense(XprType& xpr, Index a_startRow, Index a_startCol)
|
||||
: m_xpr(xpr), m_startRow(a_startRow), m_startCol(a_startCol),
|
||||
m_blockRows(BlockRows), m_blockCols(BlockCols)
|
||||
@@ -206,7 +198,6 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
|
||||
|
||||
/** Dynamic-size constructor
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline BlockImpl_dense(XprType& xpr,
|
||||
Index a_startRow, Index a_startCol,
|
||||
Index blockRows, Index blockCols)
|
||||
@@ -214,10 +205,9 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
|
||||
m_blockRows(blockRows), m_blockCols(blockCols)
|
||||
{}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Index rows() const { return m_blockRows.value(); }
|
||||
EIGEN_DEVICE_FUNC inline Index cols() const { return m_blockCols.value(); }
|
||||
inline Index rows() const { return m_blockRows.value(); }
|
||||
inline Index cols() const { return m_blockCols.value(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index rowId, Index colId)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_LVALUE(XprType)
|
||||
@@ -225,20 +215,17 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
|
||||
.coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index rowId, Index colId) const
|
||||
{
|
||||
return m_xpr.derived()
|
||||
.coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index rowId, Index colId) const
|
||||
{
|
||||
return m_xpr.coeff(rowId + m_startRow.value(), colId + m_startCol.value());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index index)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_LVALUE(XprType)
|
||||
@@ -247,7 +234,6 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
|
||||
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index index) const
|
||||
{
|
||||
return m_xpr.const_cast_derived()
|
||||
@@ -255,7 +241,6 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
|
||||
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
return m_xpr
|
||||
@@ -295,24 +280,21 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
|
||||
|
||||
#ifdef EIGEN_PARSED_BY_DOXYGEN
|
||||
/** \sa MapBase::data() */
|
||||
EIGEN_DEVICE_FUNC inline const Scalar* data() const;
|
||||
EIGEN_DEVICE_FUNC inline Index innerStride() const;
|
||||
EIGEN_DEVICE_FUNC inline Index outerStride() const;
|
||||
inline const Scalar* data() const;
|
||||
inline Index innerStride() const;
|
||||
inline Index outerStride() const;
|
||||
#endif
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename XprType::Nested>::type& nestedExpression() const
|
||||
const typename internal::remove_all<typename XprType::Nested>::type& nestedExpression() const
|
||||
{
|
||||
return m_xpr;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
Index startRow() const
|
||||
Index startRow() const
|
||||
{
|
||||
return m_startRow.value();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
Index startCol() const
|
||||
{
|
||||
return m_startCol.value();
|
||||
@@ -341,7 +323,6 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
|
||||
|
||||
/** Column or Row constructor
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline BlockImpl_dense(XprType& xpr, Index i)
|
||||
: Base(internal::const_cast_ptr(&xpr.coeffRef(
|
||||
(BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0,
|
||||
@@ -355,7 +336,6 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
|
||||
|
||||
/** Fixed-size constructor
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol)
|
||||
: Base(internal::const_cast_ptr(&xpr.coeffRef(startRow,startCol))), m_xpr(xpr)
|
||||
{
|
||||
@@ -364,7 +344,6 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
|
||||
|
||||
/** Dynamic-size constructor
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline BlockImpl_dense(XprType& xpr,
|
||||
Index startRow, Index startCol,
|
||||
Index blockRows, Index blockCols)
|
||||
@@ -374,14 +353,12 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
|
||||
init();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename XprType::Nested>::type& nestedExpression() const
|
||||
const typename internal::remove_all<typename XprType::Nested>::type& nestedExpression() const
|
||||
{
|
||||
return m_xpr;
|
||||
}
|
||||
|
||||
/** \sa MapBase::innerStride() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index innerStride() const
|
||||
{
|
||||
return internal::traits<BlockType>::HasSameStorageOrderAsXprType
|
||||
@@ -390,7 +367,6 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
|
||||
}
|
||||
|
||||
/** \sa MapBase::outerStride() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index outerStride() const
|
||||
{
|
||||
return m_outerStride;
|
||||
@@ -404,7 +380,6 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
/** \internal used by allowAligned() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline BlockImpl_dense(XprType& xpr, const Scalar* data, Index blockRows, Index blockCols)
|
||||
: Base(data, blockRows, blockCols), m_xpr(xpr)
|
||||
{
|
||||
@@ -413,7 +388,6 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
|
||||
#endif
|
||||
|
||||
protected:
|
||||
EIGEN_DEVICE_FUNC
|
||||
void init()
|
||||
{
|
||||
m_outerStride = internal::traits<BlockType>::HasSameStorageOrderAsXprType
|
||||
|
||||
@@ -8,4 +8,3 @@ INSTALL(FILES
|
||||
ADD_SUBDIRECTORY(products)
|
||||
ADD_SUBDIRECTORY(util)
|
||||
ADD_SUBDIRECTORY(arch)
|
||||
ADD_SUBDIRECTORY(functors)
|
||||
|
||||
@@ -30,7 +30,6 @@ struct CommaInitializer
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename XprType::Index Index;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline CommaInitializer(XprType& xpr, const Scalar& s)
|
||||
: m_xpr(xpr), m_row(0), m_col(1), m_currentBlockRows(1)
|
||||
{
|
||||
@@ -38,7 +37,6 @@ struct CommaInitializer
|
||||
}
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline CommaInitializer(XprType& xpr, const DenseBase<OtherDerived>& other)
|
||||
: m_xpr(xpr), m_row(0), m_col(other.cols()), m_currentBlockRows(other.rows())
|
||||
{
|
||||
@@ -48,7 +46,6 @@ struct CommaInitializer
|
||||
/* Copy/Move constructor which transfers ownership. This is crucial in
|
||||
* absence of return value optimization to avoid assertions during destruction. */
|
||||
// FIXME in C++11 mode this could be replaced by a proper RValue constructor
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline CommaInitializer(const CommaInitializer& o)
|
||||
: m_xpr(o.m_xpr), m_row(o.m_row), m_col(o.m_col), m_currentBlockRows(o.m_currentBlockRows) {
|
||||
// Mark original object as finished. In absence of R-value references we need to const_cast:
|
||||
@@ -58,7 +55,6 @@ struct CommaInitializer
|
||||
}
|
||||
|
||||
/* inserts a scalar value in the target matrix */
|
||||
EIGEN_DEVICE_FUNC
|
||||
CommaInitializer& operator,(const Scalar& s)
|
||||
{
|
||||
if (m_col==m_xpr.cols())
|
||||
@@ -78,7 +74,6 @@ struct CommaInitializer
|
||||
|
||||
/* inserts a matrix expression in the target matrix */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
CommaInitializer& operator,(const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
if(other.cols()==0 || other.rows()==0)
|
||||
@@ -104,7 +99,6 @@ struct CommaInitializer
|
||||
return *this;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline ~CommaInitializer()
|
||||
{
|
||||
eigen_assert((m_row+m_currentBlockRows) == m_xpr.rows()
|
||||
@@ -119,10 +113,9 @@ struct CommaInitializer
|
||||
* quaternion.fromRotationMatrix((Matrix3f() << axis0, axis1, axis2).finished());
|
||||
* \endcode
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline XprType& finished() { return m_xpr; }
|
||||
|
||||
XprType& m_xpr; // target expression
|
||||
XprType& m_xpr; // target expression
|
||||
Index m_row; // current row id
|
||||
Index m_col; // current col id
|
||||
Index m_currentBlockRows; // current block height
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -81,7 +81,8 @@ struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
|
||||
)
|
||||
),
|
||||
Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit),
|
||||
CoeffReadCost = LhsCoeffReadCost + RhsCoeffReadCost + functor_traits<BinaryOp>::Cost
|
||||
Cost0 = EIGEN_ADD_COST(LhsCoeffReadCost,RhsCoeffReadCost),
|
||||
CoeffReadCost = EIGEN_ADD_COST(Cost0,functor_traits<BinaryOp>::Cost)
|
||||
};
|
||||
};
|
||||
} // end namespace internal
|
||||
@@ -122,7 +123,6 @@ class CwiseBinaryOp : internal::no_assignment_operator,
|
||||
typedef typename internal::remove_reference<LhsNested>::type _LhsNested;
|
||||
typedef typename internal::remove_reference<RhsNested>::type _RhsNested;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE CwiseBinaryOp(const Lhs& aLhs, const Rhs& aRhs, const BinaryOp& func = BinaryOp())
|
||||
: m_lhs(aLhs), m_rhs(aRhs), m_functor(func)
|
||||
{
|
||||
@@ -132,7 +132,6 @@ class CwiseBinaryOp : internal::no_assignment_operator,
|
||||
eigen_assert(aLhs.rows() == aRhs.rows() && aLhs.cols() == aRhs.cols());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index rows() const {
|
||||
// return the fixed size type if available to enable compile time optimizations
|
||||
if (internal::traits<typename internal::remove_all<LhsNested>::type>::RowsAtCompileTime==Dynamic)
|
||||
@@ -140,7 +139,6 @@ class CwiseBinaryOp : internal::no_assignment_operator,
|
||||
else
|
||||
return m_lhs.rows();
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index cols() const {
|
||||
// return the fixed size type if available to enable compile time optimizations
|
||||
if (internal::traits<typename internal::remove_all<LhsNested>::type>::ColsAtCompileTime==Dynamic)
|
||||
@@ -150,13 +148,10 @@ class CwiseBinaryOp : internal::no_assignment_operator,
|
||||
}
|
||||
|
||||
/** \returns the left hand side nested expression */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const _LhsNested& lhs() const { return m_lhs; }
|
||||
/** \returns the right hand side nested expression */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const _RhsNested& rhs() const { return m_rhs; }
|
||||
/** \returns the functor representing the binary operation */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const BinaryOp& functor() const { return m_functor; }
|
||||
|
||||
protected:
|
||||
@@ -175,7 +170,6 @@ class CwiseBinaryOpImpl<BinaryOp, Lhs, Rhs, Dense>
|
||||
typedef typename internal::dense_xpr_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type Base;
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE( Derived )
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar coeff(Index rowId, Index colId) const
|
||||
{
|
||||
return derived().functor()(derived().lhs().coeff(rowId, colId),
|
||||
@@ -189,7 +183,6 @@ class CwiseBinaryOpImpl<BinaryOp, Lhs, Rhs, Dense>
|
||||
derived().rhs().template packet<LoadMode>(rowId, colId));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
|
||||
{
|
||||
return derived().functor()(derived().lhs().coeff(index),
|
||||
@@ -235,4 +228,3 @@ MatrixBase<Derived>::operator+=(const MatrixBase<OtherDerived>& other)
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_CWISE_BINARY_OP_H
|
||||
|
||||
|
||||
@@ -54,7 +54,6 @@ class CwiseNullaryOp : internal::no_assignment_operator,
|
||||
typedef typename internal::dense_xpr_base<CwiseNullaryOp>::type Base;
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(CwiseNullaryOp)
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
CwiseNullaryOp(Index nbRows, Index nbCols, const NullaryOp& func = NullaryOp())
|
||||
: m_rows(nbRows), m_cols(nbCols), m_functor(func)
|
||||
{
|
||||
@@ -64,12 +63,9 @@ class CwiseNullaryOp : internal::no_assignment_operator,
|
||||
&& (ColsAtCompileTime == Dynamic || ColsAtCompileTime == nbCols));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index rows() const { return m_rows.value(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index cols() const { return m_cols.value(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar coeff(Index rowId, Index colId) const
|
||||
{
|
||||
return m_functor(rowId, colId);
|
||||
@@ -81,7 +77,6 @@ class CwiseNullaryOp : internal::no_assignment_operator,
|
||||
return m_functor.packetOp(rowId, colId);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
|
||||
{
|
||||
return m_functor(index);
|
||||
@@ -94,7 +89,6 @@ class CwiseNullaryOp : internal::no_assignment_operator,
|
||||
}
|
||||
|
||||
/** \returns the functor representing the nullary operation */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const NullaryOp& functor() const { return m_functor; }
|
||||
|
||||
protected:
|
||||
@@ -138,9 +132,6 @@ DenseBase<Derived>::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& f
|
||||
*
|
||||
* The template parameter \a CustomNullaryOp is the type of the functor.
|
||||
*
|
||||
* Here is an example with C++11 random generators: \include random_cpp11.cpp
|
||||
* Output: \verbinclude random_cpp11.out
|
||||
*
|
||||
* \sa class CwiseNullaryOp
|
||||
*/
|
||||
template<typename Derived>
|
||||
@@ -749,7 +740,6 @@ namespace internal {
|
||||
template<typename Derived, bool Big = (Derived::SizeAtCompileTime>=16)>
|
||||
struct setIdentity_impl
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE Derived& run(Derived& m)
|
||||
{
|
||||
return m = Derived::Identity(m.rows(), m.cols());
|
||||
@@ -760,7 +750,6 @@ template<typename Derived>
|
||||
struct setIdentity_impl<Derived, true>
|
||||
{
|
||||
typedef typename Derived::Index Index;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE Derived& run(Derived& m)
|
||||
{
|
||||
m.setZero();
|
||||
|
||||
@@ -47,7 +47,7 @@ struct traits<CwiseUnaryOp<UnaryOp, XprType> >
|
||||
Flags = _XprTypeNested::Flags & (
|
||||
HereditaryBits | LinearAccessBit | AlignedBit
|
||||
| (functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0)),
|
||||
CoeffReadCost = _XprTypeNested::CoeffReadCost + functor_traits<UnaryOp>::Cost
|
||||
CoeffReadCost = EIGEN_ADD_COST(_XprTypeNested::CoeffReadCost, functor_traits<UnaryOp>::Cost)
|
||||
};
|
||||
};
|
||||
}
|
||||
@@ -64,26 +64,20 @@ class CwiseUnaryOp : internal::no_assignment_operator,
|
||||
typedef typename CwiseUnaryOpImpl<UnaryOp, XprType,typename internal::traits<XprType>::StorageKind>::Base Base;
|
||||
EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryOp)
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp())
|
||||
: m_xpr(xpr), m_functor(func) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index rows() const { return m_xpr.rows(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index cols() const { return m_xpr.cols(); }
|
||||
|
||||
/** \returns the functor representing the unary operation */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const UnaryOp& functor() const { return m_functor; }
|
||||
|
||||
/** \returns the nested expression */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename XprType::Nested>::type&
|
||||
nestedExpression() const { return m_xpr; }
|
||||
|
||||
/** \returns the nested expression */
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename internal::remove_all<typename XprType::Nested>::type&
|
||||
nestedExpression() { return m_xpr.const_cast_derived(); }
|
||||
|
||||
@@ -104,7 +98,6 @@ class CwiseUnaryOpImpl<UnaryOp,XprType,Dense>
|
||||
typedef typename internal::dense_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type Base;
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar coeff(Index rowId, Index colId) const
|
||||
{
|
||||
return derived().functor()(derived().nestedExpression().coeff(rowId, colId));
|
||||
@@ -116,14 +109,12 @@ class CwiseUnaryOpImpl<UnaryOp,XprType,Dense>
|
||||
return derived().functor().packetOp(derived().nestedExpression().template packet<LoadMode>(rowId, colId));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
|
||||
{
|
||||
return derived().functor()(derived().nestedExpression().coeff(index));
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE PacketScalar packet(Index index) const
|
||||
{
|
||||
return derived().functor().packetOp(derived().nestedExpression().template packet<LoadMode>(index));
|
||||
|
||||
@@ -40,15 +40,14 @@ static inline void check_DenseIndex_is_signed() {
|
||||
*/
|
||||
template<typename Derived> class DenseBase
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
: public internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar,
|
||||
typename NumTraits<typename internal::traits<Derived>::Scalar>::Real>
|
||||
: public internal::special_scalar_op_base<Derived, typename internal::traits<Derived>::Scalar,
|
||||
typename NumTraits<typename internal::traits<Derived>::Scalar>::Real,
|
||||
DenseCoeffsBase<Derived> >
|
||||
#else
|
||||
: public DenseCoeffsBase<Derived>
|
||||
#endif // not EIGEN_PARSED_BY_DOXYGEN
|
||||
{
|
||||
public:
|
||||
using internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar,
|
||||
typename NumTraits<typename internal::traits<Derived>::Scalar>::Real>::operator*;
|
||||
|
||||
class InnerIterator;
|
||||
|
||||
@@ -63,8 +62,9 @@ template<typename Derived> class DenseBase
|
||||
typedef typename internal::traits<Derived>::Scalar Scalar;
|
||||
typedef typename internal::packet_traits<Scalar>::type PacketScalar;
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
typedef internal::special_scalar_op_base<Derived,Scalar,RealScalar, DenseCoeffsBase<Derived> > Base;
|
||||
|
||||
typedef DenseCoeffsBase<Derived> Base;
|
||||
using Base::operator*;
|
||||
using Base::derived;
|
||||
using Base::const_cast_derived;
|
||||
using Base::rows;
|
||||
@@ -182,19 +182,13 @@ template<typename Derived> class DenseBase
|
||||
|
||||
/** \returns the number of nonzero coefficients which is in practice the number
|
||||
* of stored coefficients. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index nonZeros() const { return size(); }
|
||||
/** \returns true if either the number of rows or the number of columns is equal to 1.
|
||||
* In other words, this function returns
|
||||
* \code rows()==1 || cols()==1 \endcode
|
||||
* \sa rows(), cols(), IsVectorAtCompileTime. */
|
||||
|
||||
/** \returns the outer size.
|
||||
*
|
||||
* \note For a vector, this returns just 1. For a matrix (non-vector), this is the major dimension
|
||||
* with respect to the \ref TopicStorageOrders "storage order", i.e., the number of columns for a
|
||||
* column-major matrix, and the number of rows for a row-major matrix. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
Index outerSize() const
|
||||
{
|
||||
return IsVectorAtCompileTime ? 1
|
||||
@@ -206,7 +200,6 @@ template<typename Derived> class DenseBase
|
||||
* \note For a vector, this is just the size. For a matrix (non-vector), this is the minor dimension
|
||||
* with respect to the \ref TopicStorageOrders "storage order", i.e., the number of rows for a
|
||||
* column-major matrix, and the number of columns for a row-major matrix. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
Index innerSize() const
|
||||
{
|
||||
return IsVectorAtCompileTime ? this->size()
|
||||
@@ -217,7 +210,6 @@ template<typename Derived> class DenseBase
|
||||
* Matrix::resize() and Array::resize(). The present method only asserts that the new size equals the old size, and does
|
||||
* nothing else.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
void resize(Index newSize)
|
||||
{
|
||||
EIGEN_ONLY_USED_FOR_DEBUG(newSize);
|
||||
@@ -228,7 +220,6 @@ template<typename Derived> class DenseBase
|
||||
* Matrix::resize() and Array::resize(). The present method only asserts that the new size equals the old size, and does
|
||||
* nothing else.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
void resize(Index nbRows, Index nbCols)
|
||||
{
|
||||
EIGEN_ONLY_USED_FOR_DEBUG(nbRows);
|
||||
@@ -252,54 +243,44 @@ template<typename Derived> class DenseBase
|
||||
|
||||
/** Copies \a other into *this. \returns a reference to *this. */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator=(const DenseBase<OtherDerived>& other);
|
||||
|
||||
/** Special case of the template operator=, in order to prevent the compiler
|
||||
* from generating a default operator= (issue hit with g++ 4.1)
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator=(const DenseBase& other);
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator=(const EigenBase<OtherDerived> &other);
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator+=(const EigenBase<OtherDerived> &other);
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator-=(const EigenBase<OtherDerived> &other);
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator=(const ReturnByValue<OtherDerived>& func);
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
/** Copies \a other into *this without evaluating other. \returns a reference to *this. */
|
||||
/** \internal Copies \a other into *this without evaluating other. \returns a reference to *this. */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& lazyAssign(const DenseBase<OtherDerived>& other);
|
||||
#endif // not EIGEN_PARSED_BY_DOXYGEN
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
/** \internal Evaluates \a other into *this. \returns a reference to *this. */
|
||||
template<typename OtherDerived>
|
||||
Derived& lazyAssign(const ReturnByValue<OtherDerived>& other);
|
||||
|
||||
CommaInitializer<Derived> operator<< (const Scalar& s);
|
||||
|
||||
template<unsigned int Added,unsigned int Removed>
|
||||
const Flagged<Derived, Added, Removed> flagged() const;
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
CommaInitializer<Derived> operator<< (const DenseBase<OtherDerived>& other);
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
Eigen::Transpose<Derived> transpose();
|
||||
typedef typename internal::add_const<Transpose<const Derived> >::type ConstTransposeReturnType;
|
||||
EIGEN_DEVICE_FUNC
|
||||
typedef typename internal::add_const<Transpose<const Derived> >::type ConstTransposeReturnType;
|
||||
ConstTransposeReturnType transpose() const;
|
||||
EIGEN_DEVICE_FUNC
|
||||
void transposeInPlace();
|
||||
#ifndef EIGEN_NO_DEBUG
|
||||
protected:
|
||||
@@ -309,68 +290,65 @@ template<typename Derived> class DenseBase
|
||||
#endif
|
||||
|
||||
|
||||
EIGEN_DEVICE_FUNC static const ConstantReturnType
|
||||
static const ConstantReturnType
|
||||
Constant(Index rows, Index cols, const Scalar& value);
|
||||
EIGEN_DEVICE_FUNC static const ConstantReturnType
|
||||
static const ConstantReturnType
|
||||
Constant(Index size, const Scalar& value);
|
||||
EIGEN_DEVICE_FUNC static const ConstantReturnType
|
||||
static const ConstantReturnType
|
||||
Constant(const Scalar& value);
|
||||
|
||||
EIGEN_DEVICE_FUNC static const SequentialLinSpacedReturnType
|
||||
static const SequentialLinSpacedReturnType
|
||||
LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high);
|
||||
EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType
|
||||
static const RandomAccessLinSpacedReturnType
|
||||
LinSpaced(Index size, const Scalar& low, const Scalar& high);
|
||||
EIGEN_DEVICE_FUNC static const SequentialLinSpacedReturnType
|
||||
static const SequentialLinSpacedReturnType
|
||||
LinSpaced(Sequential_t, const Scalar& low, const Scalar& high);
|
||||
EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType
|
||||
static const RandomAccessLinSpacedReturnType
|
||||
LinSpaced(const Scalar& low, const Scalar& high);
|
||||
|
||||
template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
|
||||
template<typename CustomNullaryOp>
|
||||
static const CwiseNullaryOp<CustomNullaryOp, Derived>
|
||||
NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func);
|
||||
template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
|
||||
template<typename CustomNullaryOp>
|
||||
static const CwiseNullaryOp<CustomNullaryOp, Derived>
|
||||
NullaryExpr(Index size, const CustomNullaryOp& func);
|
||||
template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
|
||||
template<typename CustomNullaryOp>
|
||||
static const CwiseNullaryOp<CustomNullaryOp, Derived>
|
||||
NullaryExpr(const CustomNullaryOp& func);
|
||||
|
||||
EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index rows, Index cols);
|
||||
EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index size);
|
||||
EIGEN_DEVICE_FUNC static const ConstantReturnType Zero();
|
||||
EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(Index rows, Index cols);
|
||||
EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(Index size);
|
||||
EIGEN_DEVICE_FUNC static const ConstantReturnType Ones();
|
||||
static const ConstantReturnType Zero(Index rows, Index cols);
|
||||
static const ConstantReturnType Zero(Index size);
|
||||
static const ConstantReturnType Zero();
|
||||
static const ConstantReturnType Ones(Index rows, Index cols);
|
||||
static const ConstantReturnType Ones(Index size);
|
||||
static const ConstantReturnType Ones();
|
||||
|
||||
EIGEN_DEVICE_FUNC void fill(const Scalar& value);
|
||||
EIGEN_DEVICE_FUNC Derived& setConstant(const Scalar& value);
|
||||
EIGEN_DEVICE_FUNC Derived& setLinSpaced(Index size, const Scalar& low, const Scalar& high);
|
||||
EIGEN_DEVICE_FUNC Derived& setLinSpaced(const Scalar& low, const Scalar& high);
|
||||
EIGEN_DEVICE_FUNC Derived& setZero();
|
||||
EIGEN_DEVICE_FUNC Derived& setOnes();
|
||||
EIGEN_DEVICE_FUNC Derived& setRandom();
|
||||
void fill(const Scalar& value);
|
||||
Derived& setConstant(const Scalar& value);
|
||||
Derived& setLinSpaced(Index size, const Scalar& low, const Scalar& high);
|
||||
Derived& setLinSpaced(const Scalar& low, const Scalar& high);
|
||||
Derived& setZero();
|
||||
Derived& setOnes();
|
||||
Derived& setRandom();
|
||||
|
||||
template<typename OtherDerived> EIGEN_DEVICE_FUNC
|
||||
template<typename OtherDerived>
|
||||
bool isApprox(const DenseBase<OtherDerived>& other,
|
||||
const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
|
||||
EIGEN_DEVICE_FUNC
|
||||
bool isMuchSmallerThan(const RealScalar& other,
|
||||
const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
|
||||
template<typename OtherDerived> EIGEN_DEVICE_FUNC
|
||||
template<typename OtherDerived>
|
||||
bool isMuchSmallerThan(const DenseBase<OtherDerived>& other,
|
||||
const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
|
||||
|
||||
EIGEN_DEVICE_FUNC bool isApproxToConstant(const Scalar& value, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
|
||||
EIGEN_DEVICE_FUNC bool isConstant(const Scalar& value, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
|
||||
EIGEN_DEVICE_FUNC bool isZero(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
|
||||
EIGEN_DEVICE_FUNC bool isOnes(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
|
||||
bool isApproxToConstant(const Scalar& value, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
|
||||
bool isConstant(const Scalar& value, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
|
||||
bool isZero(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
|
||||
bool isOnes(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
|
||||
|
||||
inline bool hasNaN() const;
|
||||
inline bool allFinite() const;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Derived& operator*=(const Scalar& other);
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Derived& operator/=(const Scalar& other);
|
||||
|
||||
typedef typename internal::add_const_on_value_type<typename internal::eval<Derived>::type>::type EvalReturnType;
|
||||
@@ -379,7 +357,6 @@ template<typename Derived> class DenseBase
|
||||
* Notice that in the case of a plain matrix or vector (not an expression) this function just returns
|
||||
* a const reference, in order to avoid a useless copy.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE EvalReturnType eval() const
|
||||
{
|
||||
// Even though MSVC does not honor strong inlining when the return type
|
||||
@@ -392,7 +369,6 @@ template<typename Derived> class DenseBase
|
||||
*
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void swap(const DenseBase<OtherDerived>& other,
|
||||
int = OtherDerived::ThisConstantIsPrivateInPlainObjectBase)
|
||||
{
|
||||
@@ -403,52 +379,46 @@ template<typename Derived> class DenseBase
|
||||
*
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void swap(PlainObjectBase<OtherDerived>& other)
|
||||
{
|
||||
SwapWrapper<Derived>(derived()).lazyAssign(other.derived());
|
||||
}
|
||||
|
||||
|
||||
EIGEN_DEVICE_FUNC inline const NestByValue<Derived> nestByValue() const;
|
||||
EIGEN_DEVICE_FUNC inline const ForceAlignedAccess<Derived> forceAlignedAccess() const;
|
||||
EIGEN_DEVICE_FUNC inline ForceAlignedAccess<Derived> forceAlignedAccess();
|
||||
template<bool Enable> EIGEN_DEVICE_FUNC
|
||||
inline const typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf() const;
|
||||
template<bool Enable> EIGEN_DEVICE_FUNC
|
||||
inline typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf();
|
||||
inline const NestByValue<Derived> nestByValue() const;
|
||||
inline const ForceAlignedAccess<Derived> forceAlignedAccess() const;
|
||||
inline ForceAlignedAccess<Derived> forceAlignedAccess();
|
||||
template<bool Enable> inline const typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf() const;
|
||||
template<bool Enable> inline typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf();
|
||||
|
||||
EIGEN_DEVICE_FUNC Scalar sum() const;
|
||||
EIGEN_DEVICE_FUNC Scalar mean() const;
|
||||
EIGEN_DEVICE_FUNC Scalar trace() const;
|
||||
Scalar sum() const;
|
||||
Scalar mean() const;
|
||||
Scalar trace() const;
|
||||
|
||||
EIGEN_DEVICE_FUNC Scalar prod() const;
|
||||
Scalar prod() const;
|
||||
|
||||
EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar minCoeff() const;
|
||||
EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar maxCoeff() const;
|
||||
typename internal::traits<Derived>::Scalar minCoeff() const;
|
||||
typename internal::traits<Derived>::Scalar maxCoeff() const;
|
||||
|
||||
template<typename IndexType> EIGEN_DEVICE_FUNC
|
||||
template<typename IndexType>
|
||||
typename internal::traits<Derived>::Scalar minCoeff(IndexType* row, IndexType* col) const;
|
||||
template<typename IndexType> EIGEN_DEVICE_FUNC
|
||||
template<typename IndexType>
|
||||
typename internal::traits<Derived>::Scalar maxCoeff(IndexType* row, IndexType* col) const;
|
||||
template<typename IndexType> EIGEN_DEVICE_FUNC
|
||||
template<typename IndexType>
|
||||
typename internal::traits<Derived>::Scalar minCoeff(IndexType* index) const;
|
||||
template<typename IndexType> EIGEN_DEVICE_FUNC
|
||||
template<typename IndexType>
|
||||
typename internal::traits<Derived>::Scalar maxCoeff(IndexType* index) const;
|
||||
|
||||
template<typename BinaryOp>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename internal::result_of<BinaryOp(typename internal::traits<Derived>::Scalar)>::type
|
||||
redux(const BinaryOp& func) const;
|
||||
|
||||
template<typename Visitor>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void visit(Visitor& func) const;
|
||||
|
||||
inline const WithFormat<Derived> format(const IOFormat& fmt) const;
|
||||
|
||||
/** \returns the unique coefficient of a 1x1 expression */
|
||||
EIGEN_DEVICE_FUNC
|
||||
CoeffReturnType value() const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)
|
||||
@@ -456,8 +426,8 @@ template<typename Derived> class DenseBase
|
||||
return derived().coeff(0,0);
|
||||
}
|
||||
|
||||
bool all() const;
|
||||
bool any() const;
|
||||
bool all(void) const;
|
||||
bool any(void) const;
|
||||
Index count() const;
|
||||
|
||||
typedef VectorwiseOp<Derived, Horizontal> RowwiseReturnType;
|
||||
@@ -490,8 +460,10 @@ template<typename Derived> class DenseBase
|
||||
template<int p> RealScalar lpNorm() const;
|
||||
|
||||
template<int RowFactor, int ColFactor>
|
||||
const Replicate<Derived,RowFactor,ColFactor> replicate() const;
|
||||
const Replicate<Derived,Dynamic,Dynamic> replicate(Index rowFacor,Index colFactor) const;
|
||||
inline const Replicate<Derived,RowFactor,ColFactor> replicate() const;
|
||||
|
||||
typedef Replicate<Derived,Dynamic,Dynamic> ReplicateReturnType;
|
||||
inline const ReplicateReturnType replicate(Index rowFacor,Index colFactor) const;
|
||||
|
||||
typedef Reverse<Derived, BothDirections> ReverseReturnType;
|
||||
typedef const Reverse<const Derived, BothDirections> ConstReverseReturnType;
|
||||
@@ -506,18 +478,27 @@ template<typename Derived> class DenseBase
|
||||
# endif
|
||||
#undef EIGEN_CURRENT_STORAGE_BASE_CLASS
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
|
||||
Block<Derived> corner(CornerType type, Index cRows, Index cCols);
|
||||
const Block<Derived> corner(CornerType type, Index cRows, Index cCols) const;
|
||||
template<int CRows, int CCols>
|
||||
Block<Derived, CRows, CCols> corner(CornerType type);
|
||||
template<int CRows, int CCols>
|
||||
const Block<Derived, CRows, CCols> corner(CornerType type) const;
|
||||
|
||||
#endif // EIGEN2_SUPPORT
|
||||
|
||||
|
||||
// disable the use of evalTo for dense objects with a nice compilation error
|
||||
template<typename Dest>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void evalTo(Dest& ) const
|
||||
template<typename Dest> inline void evalTo(Dest& ) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<Dest,void>::value),THE_EVAL_EVALTO_FUNCTION_SHOULD_NEVER_BE_CALLED_FOR_DENSE_OBJECTS);
|
||||
}
|
||||
|
||||
protected:
|
||||
/** Default constructor. Do nothing. */
|
||||
EIGEN_DEVICE_FUNC DenseBase()
|
||||
DenseBase()
|
||||
{
|
||||
/* Just checks for self-consistency of the flags.
|
||||
* Only do it when debugging Eigen, as this borders on paranoiac and could slow compilation down
|
||||
@@ -530,9 +511,9 @@ template<typename Derived> class DenseBase
|
||||
}
|
||||
|
||||
private:
|
||||
EIGEN_DEVICE_FUNC explicit DenseBase(int);
|
||||
EIGEN_DEVICE_FUNC DenseBase(int,int);
|
||||
template<typename OtherDerived> EIGEN_DEVICE_FUNC explicit DenseBase(const DenseBase<OtherDerived>&);
|
||||
explicit DenseBase(int);
|
||||
DenseBase(int,int);
|
||||
template<typename OtherDerived> explicit DenseBase(const DenseBase<OtherDerived>&);
|
||||
};
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
@@ -61,7 +61,6 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
|
||||
using Base::size;
|
||||
using Base::derived;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner) const
|
||||
{
|
||||
return int(Derived::RowsAtCompileTime) == 1 ? 0
|
||||
@@ -70,7 +69,6 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
|
||||
: inner;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner) const
|
||||
{
|
||||
return int(Derived::ColsAtCompileTime) == 1 ? 0
|
||||
@@ -93,7 +91,6 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
|
||||
*
|
||||
* \sa operator()(Index,Index) const, coeffRef(Index,Index), coeff(Index) const
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const
|
||||
{
|
||||
eigen_internal_assert(row >= 0 && row < rows()
|
||||
@@ -101,7 +98,6 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
|
||||
return derived().coeff(row, col);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
|
||||
{
|
||||
return coeff(rowIndexByOuterInner(outer, inner),
|
||||
@@ -112,7 +108,6 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
|
||||
*
|
||||
* \sa operator()(Index,Index), operator[](Index)
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE CoeffReturnType operator()(Index row, Index col) const
|
||||
{
|
||||
eigen_assert(row >= 0 && row < rows()
|
||||
@@ -135,7 +130,6 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
|
||||
* \sa operator[](Index) const, coeffRef(Index), coeff(Index,Index) const
|
||||
*/
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE CoeffReturnType
|
||||
coeff(Index index) const
|
||||
{
|
||||
@@ -152,12 +146,13 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
|
||||
* z() const, w() const
|
||||
*/
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE CoeffReturnType
|
||||
operator[](Index index) const
|
||||
{
|
||||
#ifndef EIGEN2_SUPPORT
|
||||
EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime,
|
||||
THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD)
|
||||
#endif
|
||||
eigen_assert(index >= 0 && index < size());
|
||||
return derived().coeff(index);
|
||||
}
|
||||
@@ -172,7 +167,6 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
|
||||
* z() const, w() const
|
||||
*/
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE CoeffReturnType
|
||||
operator()(Index index) const
|
||||
{
|
||||
@@ -182,25 +176,21 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
|
||||
|
||||
/** equivalent to operator[](0). */
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE CoeffReturnType
|
||||
x() const { return (*this)[0]; }
|
||||
|
||||
/** equivalent to operator[](1). */
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE CoeffReturnType
|
||||
y() const { return (*this)[1]; }
|
||||
|
||||
/** equivalent to operator[](2). */
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE CoeffReturnType
|
||||
z() const { return (*this)[2]; }
|
||||
|
||||
/** equivalent to operator[](3). */
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE CoeffReturnType
|
||||
w() const { return (*this)[3]; }
|
||||
|
||||
@@ -321,7 +311,6 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
|
||||
*
|
||||
* \sa operator()(Index,Index), coeff(Index, Index) const, coeffRef(Index)
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col)
|
||||
{
|
||||
eigen_internal_assert(row >= 0 && row < rows()
|
||||
@@ -329,7 +318,6 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
|
||||
return derived().coeffRef(row, col);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar&
|
||||
coeffRefByOuterInner(Index outer, Index inner)
|
||||
{
|
||||
@@ -342,7 +330,6 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
|
||||
* \sa operator[](Index)
|
||||
*/
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar&
|
||||
operator()(Index row, Index col)
|
||||
{
|
||||
@@ -367,7 +354,6 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
|
||||
* \sa operator[](Index), coeff(Index) const, coeffRef(Index,Index)
|
||||
*/
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar&
|
||||
coeffRef(Index index)
|
||||
{
|
||||
@@ -382,12 +368,13 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
|
||||
* \sa operator[](Index) const, operator()(Index,Index), x(), y(), z(), w()
|
||||
*/
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar&
|
||||
operator[](Index index)
|
||||
{
|
||||
#ifndef EIGEN2_SUPPORT
|
||||
EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime,
|
||||
THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD)
|
||||
#endif
|
||||
eigen_assert(index >= 0 && index < size());
|
||||
return derived().coeffRef(index);
|
||||
}
|
||||
@@ -401,7 +388,6 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
|
||||
* \sa operator[](Index) const, operator()(Index,Index), x(), y(), z(), w()
|
||||
*/
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar&
|
||||
operator()(Index index)
|
||||
{
|
||||
@@ -411,25 +397,21 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
|
||||
|
||||
/** equivalent to operator[](0). */
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar&
|
||||
x() { return (*this)[0]; }
|
||||
|
||||
/** equivalent to operator[](1). */
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar&
|
||||
y() { return (*this)[1]; }
|
||||
|
||||
/** equivalent to operator[](2). */
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar&
|
||||
z() { return (*this)[2]; }
|
||||
|
||||
/** equivalent to operator[](3). */
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar&
|
||||
w() { return (*this)[3]; }
|
||||
|
||||
@@ -491,7 +473,6 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
|
||||
*/
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void copyCoeff(Index row, Index col, const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
eigen_internal_assert(row >= 0 && row < rows()
|
||||
@@ -508,7 +489,6 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
|
||||
*/
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void copyCoeff(Index index, const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
eigen_internal_assert(index >= 0 && index < size());
|
||||
@@ -517,7 +497,6 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
|
||||
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void copyCoeffByOuterInner(Index outer, Index inner, const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
const Index row = rowIndexByOuterInner(outer,inner);
|
||||
@@ -602,7 +581,6 @@ class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived
|
||||
*
|
||||
* \sa outerStride(), rowStride(), colStride()
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index innerStride() const
|
||||
{
|
||||
return derived().innerStride();
|
||||
@@ -613,7 +591,6 @@ class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived
|
||||
*
|
||||
* \sa innerStride(), rowStride(), colStride()
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index outerStride() const
|
||||
{
|
||||
return derived().outerStride();
|
||||
@@ -629,7 +606,6 @@ class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived
|
||||
*
|
||||
* \sa innerStride(), outerStride(), colStride()
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index rowStride() const
|
||||
{
|
||||
return Derived::IsRowMajor ? outerStride() : innerStride();
|
||||
@@ -639,7 +615,6 @@ class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived
|
||||
*
|
||||
* \sa innerStride(), outerStride(), rowStride()
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index colStride() const
|
||||
{
|
||||
return Derived::IsRowMajor ? innerStride() : outerStride();
|
||||
@@ -677,7 +652,6 @@ class DenseCoeffsBase<Derived, DirectWriteAccessors>
|
||||
*
|
||||
* \sa outerStride(), rowStride(), colStride()
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index innerStride() const
|
||||
{
|
||||
return derived().innerStride();
|
||||
@@ -688,7 +662,6 @@ class DenseCoeffsBase<Derived, DirectWriteAccessors>
|
||||
*
|
||||
* \sa innerStride(), rowStride(), colStride()
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index outerStride() const
|
||||
{
|
||||
return derived().outerStride();
|
||||
@@ -704,7 +677,6 @@ class DenseCoeffsBase<Derived, DirectWriteAccessors>
|
||||
*
|
||||
* \sa innerStride(), outerStride(), colStride()
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index rowStride() const
|
||||
{
|
||||
return Derived::IsRowMajor ? outerStride() : innerStride();
|
||||
@@ -714,7 +686,6 @@ class DenseCoeffsBase<Derived, DirectWriteAccessors>
|
||||
*
|
||||
* \sa innerStride(), outerStride(), rowStride()
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index colStride() const
|
||||
{
|
||||
return Derived::IsRowMajor ? innerStride() : outerStride();
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
//
|
||||
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2006-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2010-2013 Hauke Heibel <hauke.heibel@gmail.com>
|
||||
// Copyright (C) 2010 Hauke Heibel <hauke.heibel@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
@@ -24,9 +24,7 @@ namespace internal {
|
||||
|
||||
struct constructor_without_unaligned_array_assert {};
|
||||
|
||||
template<typename T, int Size>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void check_static_allocation_size()
|
||||
template<typename T, int Size> void check_static_allocation_size()
|
||||
{
|
||||
// if EIGEN_STACK_ALLOCATION_LIMIT is defined to 0, then no limit
|
||||
#if EIGEN_STACK_ALLOCATION_LIMIT
|
||||
@@ -40,20 +38,18 @@ void check_static_allocation_size()
|
||||
*/
|
||||
template <typename T, int Size, int MatrixOrArrayOptions,
|
||||
int Alignment = (MatrixOrArrayOptions&DontAlign) ? 0
|
||||
: (((Size*sizeof(T))%EIGEN_ALIGN_BYTES)==0) ? EIGEN_ALIGN_BYTES
|
||||
: (((Size*sizeof(T))%16)==0) ? 16
|
||||
: 0 >
|
||||
struct plain_array
|
||||
{
|
||||
T array[Size];
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
plain_array()
|
||||
plain_array()
|
||||
{
|
||||
check_static_allocation_size<T,Size>();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
plain_array(constructor_without_unaligned_array_assert)
|
||||
plain_array(constructor_without_unaligned_array_assert)
|
||||
{
|
||||
check_static_allocation_size<T,Size>();
|
||||
}
|
||||
@@ -68,31 +64,29 @@ struct plain_array
|
||||
template<typename PtrType>
|
||||
EIGEN_ALWAYS_INLINE PtrType eigen_unaligned_array_assert_workaround_gcc47(PtrType array) { return array; }
|
||||
#define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \
|
||||
eigen_assert((reinterpret_cast<size_t>(eigen_unaligned_array_assert_workaround_gcc47(array)) & (sizemask)) == 0 \
|
||||
eigen_assert((reinterpret_cast<size_t>(eigen_unaligned_array_assert_workaround_gcc47(array)) & sizemask) == 0 \
|
||||
&& "this assertion is explained here: " \
|
||||
"http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \
|
||||
" **** READ THIS WEB PAGE !!! ****");
|
||||
#else
|
||||
#define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \
|
||||
eigen_assert((reinterpret_cast<size_t>(array) & (sizemask)) == 0 \
|
||||
eigen_assert((reinterpret_cast<size_t>(array) & sizemask) == 0 \
|
||||
&& "this assertion is explained here: " \
|
||||
"http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \
|
||||
" **** READ THIS WEB PAGE !!! ****");
|
||||
#endif
|
||||
|
||||
template <typename T, int Size, int MatrixOrArrayOptions>
|
||||
struct plain_array<T, Size, MatrixOrArrayOptions, EIGEN_ALIGN_BYTES>
|
||||
struct plain_array<T, Size, MatrixOrArrayOptions, 16>
|
||||
{
|
||||
EIGEN_USER_ALIGN_DEFAULT T array[Size];
|
||||
EIGEN_USER_ALIGN16 T array[Size];
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
plain_array()
|
||||
{
|
||||
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(EIGEN_ALIGN_BYTES-1);
|
||||
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(0xf);
|
||||
check_static_allocation_size<T,Size>();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
plain_array(constructor_without_unaligned_array_assert)
|
||||
{
|
||||
check_static_allocation_size<T,Size>();
|
||||
@@ -102,9 +96,9 @@ struct plain_array<T, Size, MatrixOrArrayOptions, EIGEN_ALIGN_BYTES>
|
||||
template <typename T, int MatrixOrArrayOptions, int Alignment>
|
||||
struct plain_array<T, 0, MatrixOrArrayOptions, Alignment>
|
||||
{
|
||||
EIGEN_USER_ALIGN_DEFAULT T array[1];
|
||||
EIGEN_DEVICE_FUNC plain_array() {}
|
||||
EIGEN_DEVICE_FUNC plain_array(constructor_without_unaligned_array_assert) {}
|
||||
EIGEN_USER_ALIGN16 T array[1];
|
||||
plain_array() {}
|
||||
plain_array(constructor_without_unaligned_array_assert) {}
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
@@ -128,44 +122,41 @@ template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseSt
|
||||
{
|
||||
internal::plain_array<T,Size,_Options> m_data;
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC DenseStorage() {}
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseStorage() {}
|
||||
DenseStorage(internal::constructor_without_unaligned_array_assert)
|
||||
: m_data(internal::constructor_without_unaligned_array_assert()) {}
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseStorage(const DenseStorage& other) : m_data(other.m_data) {}
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseStorage& operator=(const DenseStorage& other)
|
||||
{
|
||||
{
|
||||
if (this != &other) m_data = other.m_data;
|
||||
return *this;
|
||||
return *this;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(DenseIndex,DenseIndex,DenseIndex) {}
|
||||
EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); }
|
||||
EIGEN_DEVICE_FUNC static DenseIndex rows(void) {return _Rows;}
|
||||
EIGEN_DEVICE_FUNC static DenseIndex cols(void) {return _Cols;}
|
||||
EIGEN_DEVICE_FUNC void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {}
|
||||
EIGEN_DEVICE_FUNC void resize(DenseIndex,DenseIndex,DenseIndex) {}
|
||||
EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
|
||||
EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
|
||||
DenseStorage(DenseIndex,DenseIndex,DenseIndex) {}
|
||||
void swap(DenseStorage& other) { std::swap(m_data,other.m_data); }
|
||||
static DenseIndex rows(void) {return _Rows;}
|
||||
static DenseIndex cols(void) {return _Cols;}
|
||||
void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {}
|
||||
void resize(DenseIndex,DenseIndex,DenseIndex) {}
|
||||
const T *data() const { return m_data.array; }
|
||||
T *data() { return m_data.array; }
|
||||
};
|
||||
|
||||
// null matrix
|
||||
template<typename T, int _Rows, int _Cols, int _Options> class DenseStorage<T, 0, _Rows, _Cols, _Options>
|
||||
{
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC DenseStorage() {}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(internal::constructor_without_unaligned_array_assert) {}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage&) {}
|
||||
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage&) { return *this; }
|
||||
EIGEN_DEVICE_FUNC DenseStorage(DenseIndex,DenseIndex,DenseIndex) {}
|
||||
EIGEN_DEVICE_FUNC void swap(DenseStorage& ) {}
|
||||
EIGEN_DEVICE_FUNC static DenseIndex rows(void) {return _Rows;}
|
||||
EIGEN_DEVICE_FUNC static DenseIndex cols(void) {return _Cols;}
|
||||
EIGEN_DEVICE_FUNC void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {}
|
||||
EIGEN_DEVICE_FUNC void resize(DenseIndex,DenseIndex,DenseIndex) {}
|
||||
EIGEN_DEVICE_FUNC const T *data() const { return 0; }
|
||||
EIGEN_DEVICE_FUNC T *data() { return 0; }
|
||||
DenseStorage() {}
|
||||
DenseStorage(internal::constructor_without_unaligned_array_assert) {}
|
||||
DenseStorage(const DenseStorage&) {}
|
||||
DenseStorage& operator=(const DenseStorage&) { return *this; }
|
||||
DenseStorage(DenseIndex,DenseIndex,DenseIndex) {}
|
||||
void swap(DenseStorage& ) {}
|
||||
static DenseIndex rows(void) {return _Rows;}
|
||||
static DenseIndex cols(void) {return _Cols;}
|
||||
void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {}
|
||||
void resize(DenseIndex,DenseIndex,DenseIndex) {}
|
||||
const T *data() const { return 0; }
|
||||
T *data() { return 0; }
|
||||
};
|
||||
|
||||
// more specializations for null matrices; these are necessary to resolve ambiguities
|
||||
@@ -185,29 +176,29 @@ template<typename T, int Size, int _Options> class DenseStorage<T, Size, Dynamic
|
||||
DenseIndex m_rows;
|
||||
DenseIndex m_cols;
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0), m_cols(0) {}
|
||||
DenseStorage() : m_rows(0), m_cols(0) {}
|
||||
DenseStorage(internal::constructor_without_unaligned_array_assert)
|
||||
: m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0), m_cols(0) {}
|
||||
DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows), m_cols(other.m_cols) {}
|
||||
DenseStorage& operator=(const DenseStorage& other)
|
||||
{
|
||||
DenseStorage& operator=(const DenseStorage& other)
|
||||
{
|
||||
if (this != &other)
|
||||
{
|
||||
m_data = other.m_data;
|
||||
m_rows = other.m_rows;
|
||||
m_cols = other.m_cols;
|
||||
}
|
||||
return *this;
|
||||
return *this;
|
||||
}
|
||||
DenseStorage(DenseIndex, DenseIndex nbRows, DenseIndex nbCols) : m_rows(nbRows), m_cols(nbCols) {}
|
||||
void swap(DenseStorage& other)
|
||||
{ std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
|
||||
EIGEN_DEVICE_FUNC DenseIndex rows() const {return m_rows;}
|
||||
EIGEN_DEVICE_FUNC DenseIndex cols() const {return m_cols;}
|
||||
DenseIndex rows() const {return m_rows;}
|
||||
DenseIndex cols() const {return m_cols;}
|
||||
void conservativeResize(DenseIndex, DenseIndex nbRows, DenseIndex nbCols) { m_rows = nbRows; m_cols = nbCols; }
|
||||
void resize(DenseIndex, DenseIndex nbRows, DenseIndex nbCols) { m_rows = nbRows; m_cols = nbCols; }
|
||||
EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
|
||||
EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
|
||||
const T *data() const { return m_data.array; }
|
||||
T *data() { return m_data.array; }
|
||||
};
|
||||
|
||||
// dynamic-size matrix with fixed-size storage and fixed width
|
||||
@@ -216,27 +207,27 @@ template<typename T, int Size, int _Cols, int _Options> class DenseStorage<T, Si
|
||||
internal::plain_array<T,Size,_Options> m_data;
|
||||
DenseIndex m_rows;
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0) {}
|
||||
DenseStorage() : m_rows(0) {}
|
||||
DenseStorage(internal::constructor_without_unaligned_array_assert)
|
||||
: m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0) {}
|
||||
DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows) {}
|
||||
DenseStorage& operator=(const DenseStorage& other)
|
||||
DenseStorage& operator=(const DenseStorage& other)
|
||||
{
|
||||
if (this != &other)
|
||||
{
|
||||
m_data = other.m_data;
|
||||
m_rows = other.m_rows;
|
||||
}
|
||||
return *this;
|
||||
return *this;
|
||||
}
|
||||
DenseStorage(DenseIndex, DenseIndex nbRows, DenseIndex) : m_rows(nbRows) {}
|
||||
void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
|
||||
EIGEN_DEVICE_FUNC DenseIndex rows(void) const {return m_rows;}
|
||||
EIGEN_DEVICE_FUNC DenseIndex cols(void) const {return _Cols;}
|
||||
DenseIndex rows(void) const {return m_rows;}
|
||||
DenseIndex cols(void) const {return _Cols;}
|
||||
void conservativeResize(DenseIndex, DenseIndex nbRows, DenseIndex) { m_rows = nbRows; }
|
||||
void resize(DenseIndex, DenseIndex nbRows, DenseIndex) { m_rows = nbRows; }
|
||||
EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
|
||||
EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
|
||||
const T *data() const { return m_data.array; }
|
||||
T *data() { return m_data.array; }
|
||||
};
|
||||
|
||||
// dynamic-size matrix with fixed-size storage and fixed height
|
||||
@@ -245,7 +236,7 @@ template<typename T, int Size, int _Rows, int _Options> class DenseStorage<T, Si
|
||||
internal::plain_array<T,Size,_Options> m_data;
|
||||
DenseIndex m_cols;
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC DenseStorage() : m_cols(0) {}
|
||||
DenseStorage() : m_cols(0) {}
|
||||
DenseStorage(internal::constructor_without_unaligned_array_assert)
|
||||
: m_data(internal::constructor_without_unaligned_array_assert()), m_cols(0) {}
|
||||
DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_cols(other.m_cols) {}
|
||||
@@ -260,12 +251,12 @@ template<typename T, int Size, int _Rows, int _Options> class DenseStorage<T, Si
|
||||
}
|
||||
DenseStorage(DenseIndex, DenseIndex, DenseIndex nbCols) : m_cols(nbCols) {}
|
||||
void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
|
||||
EIGEN_DEVICE_FUNC DenseIndex rows(void) const {return _Rows;}
|
||||
EIGEN_DEVICE_FUNC DenseIndex cols(void) const {return m_cols;}
|
||||
DenseIndex rows(void) const {return _Rows;}
|
||||
DenseIndex cols(void) const {return m_cols;}
|
||||
void conservativeResize(DenseIndex, DenseIndex, DenseIndex nbCols) { m_cols = nbCols; }
|
||||
void resize(DenseIndex, DenseIndex, DenseIndex nbCols) { m_cols = nbCols; }
|
||||
EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
|
||||
EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
|
||||
const T *data() const { return m_data.array; }
|
||||
T *data() { return m_data.array; }
|
||||
};
|
||||
|
||||
// purely dynamic matrix.
|
||||
@@ -275,28 +266,12 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
|
||||
DenseIndex m_rows;
|
||||
DenseIndex m_cols;
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0), m_cols(0) {}
|
||||
DenseStorage() : m_data(0), m_rows(0), m_cols(0) {}
|
||||
DenseStorage(internal::constructor_without_unaligned_array_assert)
|
||||
: m_data(0), m_rows(0), m_cols(0) {}
|
||||
DenseStorage(DenseIndex size, DenseIndex nbRows, DenseIndex nbCols)
|
||||
: m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(nbRows), m_cols(nbCols)
|
||||
{ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN }
|
||||
DenseStorage(const DenseStorage& other)
|
||||
: m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(other.m_rows*other.m_cols))
|
||||
, m_rows(other.m_rows)
|
||||
, m_cols(other.m_cols)
|
||||
{
|
||||
internal::smart_copy(other.m_data, other.m_data+other.m_rows*other.m_cols, m_data);
|
||||
}
|
||||
DenseStorage& operator=(const DenseStorage& other)
|
||||
{
|
||||
if (this != &other)
|
||||
{
|
||||
DenseStorage tmp(other);
|
||||
this->swap(tmp);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
#ifdef EIGEN_HAVE_RVALUE_REFERENCES
|
||||
DenseStorage(DenseStorage&& other)
|
||||
: m_data(std::move(other.m_data))
|
||||
@@ -317,8 +292,8 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
|
||||
~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, m_rows*m_cols); }
|
||||
void swap(DenseStorage& other)
|
||||
{ std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
|
||||
EIGEN_DEVICE_FUNC DenseIndex rows(void) const {return m_rows;}
|
||||
EIGEN_DEVICE_FUNC DenseIndex cols(void) const {return m_cols;}
|
||||
DenseIndex rows(void) const {return m_rows;}
|
||||
DenseIndex cols(void) const {return m_cols;}
|
||||
void conservativeResize(DenseIndex size, DenseIndex nbRows, DenseIndex nbCols)
|
||||
{
|
||||
m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*m_cols);
|
||||
@@ -339,8 +314,11 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
|
||||
m_rows = nbRows;
|
||||
m_cols = nbCols;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC const T *data() const { return m_data; }
|
||||
EIGEN_DEVICE_FUNC T *data() { return m_data; }
|
||||
const T *data() const { return m_data; }
|
||||
T *data() { return m_data; }
|
||||
private:
|
||||
DenseStorage(const DenseStorage&);
|
||||
DenseStorage& operator=(const DenseStorage&);
|
||||
};
|
||||
|
||||
// matrix with dynamic width and fixed height (so that matrix has dynamic size).
|
||||
@@ -349,25 +327,10 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
|
||||
T *m_data;
|
||||
DenseIndex m_cols;
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_cols(0) {}
|
||||
DenseStorage() : m_data(0), m_cols(0) {}
|
||||
DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {}
|
||||
DenseStorage(DenseIndex size, DenseIndex, DenseIndex nbCols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_cols(nbCols)
|
||||
{ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN }
|
||||
DenseStorage(const DenseStorage& other)
|
||||
: m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(_Rows*other.m_cols))
|
||||
, m_cols(other.m_cols)
|
||||
{
|
||||
internal::smart_copy(other.m_data, other.m_data+_Rows*m_cols, m_data);
|
||||
}
|
||||
DenseStorage& operator=(const DenseStorage& other)
|
||||
{
|
||||
if (this != &other)
|
||||
{
|
||||
DenseStorage tmp(other);
|
||||
this->swap(tmp);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
#ifdef EIGEN_HAVE_RVALUE_REFERENCES
|
||||
DenseStorage(DenseStorage&& other)
|
||||
: m_data(std::move(other.m_data))
|
||||
@@ -385,8 +348,8 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
|
||||
#endif
|
||||
~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols); }
|
||||
void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
|
||||
EIGEN_DEVICE_FUNC static DenseIndex rows(void) {return _Rows;}
|
||||
EIGEN_DEVICE_FUNC DenseIndex cols(void) const {return m_cols;}
|
||||
static DenseIndex rows(void) {return _Rows;}
|
||||
DenseIndex cols(void) const {return m_cols;}
|
||||
void conservativeResize(DenseIndex size, DenseIndex, DenseIndex nbCols)
|
||||
{
|
||||
m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, _Rows*m_cols);
|
||||
@@ -405,8 +368,11 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
|
||||
}
|
||||
m_cols = nbCols;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC const T *data() const { return m_data; }
|
||||
EIGEN_DEVICE_FUNC T *data() { return m_data; }
|
||||
const T *data() const { return m_data; }
|
||||
T *data() { return m_data; }
|
||||
private:
|
||||
DenseStorage(const DenseStorage&);
|
||||
DenseStorage& operator=(const DenseStorage&);
|
||||
};
|
||||
|
||||
// matrix with dynamic height and fixed width (so that matrix has dynamic size).
|
||||
@@ -415,25 +381,10 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
|
||||
T *m_data;
|
||||
DenseIndex m_rows;
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0) {}
|
||||
DenseStorage() : m_data(0), m_rows(0) {}
|
||||
DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {}
|
||||
DenseStorage(DenseIndex size, DenseIndex nbRows, DenseIndex) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(nbRows)
|
||||
{ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN }
|
||||
DenseStorage(const DenseStorage& other)
|
||||
: m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(other.m_rows*_Cols))
|
||||
, m_rows(other.m_rows)
|
||||
{
|
||||
internal::smart_copy(other.m_data, other.m_data+other.m_rows*_Cols, m_data);
|
||||
}
|
||||
DenseStorage& operator=(const DenseStorage& other)
|
||||
{
|
||||
if (this != &other)
|
||||
{
|
||||
DenseStorage tmp(other);
|
||||
this->swap(tmp);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
#ifdef EIGEN_HAVE_RVALUE_REFERENCES
|
||||
DenseStorage(DenseStorage&& other)
|
||||
: m_data(std::move(other.m_data))
|
||||
@@ -451,8 +402,8 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
|
||||
#endif
|
||||
~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows); }
|
||||
void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
|
||||
EIGEN_DEVICE_FUNC DenseIndex rows(void) const {return m_rows;}
|
||||
EIGEN_DEVICE_FUNC static DenseIndex cols(void) {return _Cols;}
|
||||
DenseIndex rows(void) const {return m_rows;}
|
||||
static DenseIndex cols(void) {return _Cols;}
|
||||
void conservativeResize(DenseIndex size, DenseIndex nbRows, DenseIndex)
|
||||
{
|
||||
m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*_Cols);
|
||||
@@ -471,8 +422,11 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
|
||||
}
|
||||
m_rows = nbRows;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC const T *data() const { return m_data; }
|
||||
EIGEN_DEVICE_FUNC T *data() { return m_data; }
|
||||
const T *data() const { return m_data; }
|
||||
T *data() { return m_data; }
|
||||
private:
|
||||
DenseStorage(const DenseStorage&);
|
||||
DenseStorage& operator=(const DenseStorage&);
|
||||
};
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
@@ -70,30 +70,20 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
|
||||
typedef typename internal::dense_xpr_base<Diagonal>::type Base;
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(Diagonal)
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index) {}
|
||||
|
||||
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Diagonal)
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index rows() const
|
||||
{
|
||||
EIGEN_USING_STD_MATH(min);
|
||||
return m_index.value()<0 ? (min)(Index(m_matrix.cols()),Index(m_matrix.rows()+m_index.value()))
|
||||
: (min)(Index(m_matrix.rows()),Index(m_matrix.cols()-m_index.value()));
|
||||
|
||||
}
|
||||
{ return m_index.value()<0 ? (std::min<Index>)(m_matrix.cols(),m_matrix.rows()+m_index.value()) : (std::min<Index>)(m_matrix.rows(),m_matrix.cols()-m_index.value()); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index cols() const { return 1; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index innerStride() const
|
||||
{
|
||||
return m_matrix.outerStride() + 1;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index outerStride() const
|
||||
{
|
||||
return 0;
|
||||
@@ -105,57 +95,47 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
|
||||
const Scalar
|
||||
>::type ScalarWithConstIfNotLvalue;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline ScalarWithConstIfNotLvalue* data() { return &(m_matrix.const_cast_derived().coeffRef(rowOffset(), colOffset())); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar* data() const { return &(m_matrix.const_cast_derived().coeffRef(rowOffset(), colOffset())); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index row, Index)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
|
||||
return m_matrix.const_cast_derived().coeffRef(row+rowOffset(), row+colOffset());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index row, Index) const
|
||||
{
|
||||
return m_matrix.const_cast_derived().coeffRef(row+rowOffset(), row+colOffset());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline CoeffReturnType coeff(Index row, Index) const
|
||||
{
|
||||
return m_matrix.coeff(row+rowOffset(), row+colOffset());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index idx)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
|
||||
return m_matrix.const_cast_derived().coeffRef(idx+rowOffset(), idx+colOffset());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index idx) const
|
||||
{
|
||||
return m_matrix.const_cast_derived().coeffRef(idx+rowOffset(), idx+colOffset());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline CoeffReturnType coeff(Index idx) const
|
||||
{
|
||||
return m_matrix.coeff(idx+rowOffset(), idx+colOffset());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename MatrixType::Nested>::type&
|
||||
nestedExpression() const
|
||||
{
|
||||
return m_matrix;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
int index() const
|
||||
{
|
||||
return m_index.value();
|
||||
@@ -167,11 +147,8 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
|
||||
|
||||
private:
|
||||
// some compilers may fail to optimize std::max etc in case of compile-time constants...
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index absDiagIndex() const { return m_index.value()>0 ? m_index.value() : -m_index.value(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value()>0 ? 0 : -m_index.value(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value()>0 ? m_index.value() : 0; }
|
||||
// triger a compile time error is someone try to call packet
|
||||
template<int LoadMode> typename MatrixType::PacketReturnType packet(Index) const;
|
||||
@@ -213,18 +190,18 @@ MatrixBase<Derived>::diagonal() const
|
||||
*
|
||||
* \sa MatrixBase::diagonal(), class Diagonal */
|
||||
template<typename Derived>
|
||||
inline typename MatrixBase<Derived>::template DiagonalIndexReturnType<DynamicIndex>::Type
|
||||
inline typename MatrixBase<Derived>::DiagonalDynamicIndexReturnType
|
||||
MatrixBase<Derived>::diagonal(Index index)
|
||||
{
|
||||
return typename DiagonalIndexReturnType<DynamicIndex>::Type(derived(), index);
|
||||
return DiagonalDynamicIndexReturnType(derived(), index);
|
||||
}
|
||||
|
||||
/** This is the const version of diagonal(Index). */
|
||||
template<typename Derived>
|
||||
inline typename MatrixBase<Derived>::template ConstDiagonalIndexReturnType<DynamicIndex>::Type
|
||||
inline typename MatrixBase<Derived>::ConstDiagonalDynamicIndexReturnType
|
||||
MatrixBase<Derived>::diagonal(Index index) const
|
||||
{
|
||||
return typename ConstDiagonalIndexReturnType<DynamicIndex>::Type(derived(), index);
|
||||
return ConstDiagonalDynamicIndexReturnType(derived(), index);
|
||||
}
|
||||
|
||||
/** \returns an expression of the \a DiagIndex-th sub or super diagonal of the matrix \c *this
|
||||
|
||||
@@ -37,64 +37,63 @@ class DiagonalBase : public EigenBase<Derived>
|
||||
typedef DenseMatrixType DenseType;
|
||||
typedef DiagonalMatrix<Scalar,DiagonalVectorType::SizeAtCompileTime,DiagonalVectorType::MaxSizeAtCompileTime> PlainObject;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Derived& derived() const { return *static_cast<const Derived*>(this); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Derived& derived() { return *static_cast<Derived*>(this); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseMatrixType toDenseMatrix() const { return derived(); }
|
||||
template<typename DenseDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void evalTo(MatrixBase<DenseDerived> &other) const;
|
||||
template<typename DenseDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void addTo(MatrixBase<DenseDerived> &other) const
|
||||
{ other.diagonal() += diagonal(); }
|
||||
template<typename DenseDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void subTo(MatrixBase<DenseDerived> &other) const
|
||||
{ other.diagonal() -= diagonal(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const DiagonalVectorType& diagonal() const { return derived().diagonal(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline DiagonalVectorType& diagonal() { return derived().diagonal(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index rows() const { return diagonal().size(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index cols() const { return diagonal().size(); }
|
||||
|
||||
/** \returns the diagonal matrix product of \c *this by the matrix \a matrix.
|
||||
*/
|
||||
template<typename MatrixDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
const DiagonalProduct<MatrixDerived, Derived, OnTheLeft>
|
||||
operator*(const MatrixBase<MatrixDerived> &matrix) const
|
||||
{
|
||||
return DiagonalProduct<MatrixDerived, Derived, OnTheLeft>(matrix.derived(), derived());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const DiagonalWrapper<const CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const DiagonalVectorType> >
|
||||
inverse() const
|
||||
{
|
||||
return diagonal().cwiseInverse();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const DiagonalWrapper<const CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const DiagonalVectorType> >
|
||||
operator*(const Scalar& scalar) const
|
||||
{
|
||||
return diagonal() * scalar;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
friend inline const DiagonalWrapper<const CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const DiagonalVectorType> >
|
||||
operator*(const Scalar& scalar, const DiagonalBase& other)
|
||||
{
|
||||
return other.diagonal() * scalar;
|
||||
}
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
template<typename OtherDerived>
|
||||
bool isApprox(const DiagonalBase<OtherDerived>& other, typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) const
|
||||
{
|
||||
return diagonal().isApprox(other.diagonal(), precision);
|
||||
}
|
||||
template<typename OtherDerived>
|
||||
bool isApprox(const MatrixBase<OtherDerived>& other, typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) const
|
||||
{
|
||||
return toDenseMatrix().isApprox(other, precision);
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
template<typename Derived>
|
||||
@@ -152,31 +151,24 @@ class DiagonalMatrix
|
||||
public:
|
||||
|
||||
/** const version of diagonal(). */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const DiagonalVectorType& diagonal() const { return m_diagonal; }
|
||||
/** \returns a reference to the stored vector of diagonal coefficients. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline DiagonalVectorType& diagonal() { return m_diagonal; }
|
||||
|
||||
/** Default constructor without initialization */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline DiagonalMatrix() {}
|
||||
|
||||
/** Constructs a diagonal matrix with given dimension */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline DiagonalMatrix(Index dim) : m_diagonal(dim) {}
|
||||
|
||||
/** 2D constructor. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline DiagonalMatrix(const Scalar& x, const Scalar& y) : m_diagonal(x,y) {}
|
||||
|
||||
/** 3D constructor. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline DiagonalMatrix(const Scalar& x, const Scalar& y, const Scalar& z) : m_diagonal(x,y,z) {}
|
||||
|
||||
/** Copy constructor. */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline DiagonalMatrix(const DiagonalBase<OtherDerived>& other) : m_diagonal(other.diagonal()) {}
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
@@ -186,13 +178,11 @@ class DiagonalMatrix
|
||||
|
||||
/** generic constructor from expression of the diagonal coefficients */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit inline DiagonalMatrix(const MatrixBase<OtherDerived>& other) : m_diagonal(other)
|
||||
{}
|
||||
|
||||
/** Copy operator. */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
DiagonalMatrix& operator=(const DiagonalBase<OtherDerived>& other)
|
||||
{
|
||||
m_diagonal = other.diagonal();
|
||||
@@ -203,7 +193,6 @@ class DiagonalMatrix
|
||||
/** This is a special case of the templated operator=. Its purpose is to
|
||||
* prevent a default operator= from hiding the templated operator=.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
DiagonalMatrix& operator=(const DiagonalMatrix& other)
|
||||
{
|
||||
m_diagonal = other.diagonal();
|
||||
@@ -212,19 +201,14 @@ class DiagonalMatrix
|
||||
#endif
|
||||
|
||||
/** Resizes to given size. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void resize(Index size) { m_diagonal.resize(size); }
|
||||
/** Sets all coefficients to zero. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void setZero() { m_diagonal.setZero(); }
|
||||
/** Resizes and sets all coefficients to zero. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void setZero(Index size) { m_diagonal.setZero(size); }
|
||||
/** Sets this matrix to be the identity matrix of the current size. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void setIdentity() { m_diagonal.setOnes(); }
|
||||
/** Sets this matrix to be the identity matrix of the given size. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void setIdentity(Index size) { m_diagonal.setOnes(size); }
|
||||
};
|
||||
|
||||
@@ -271,11 +255,9 @@ class DiagonalWrapper
|
||||
#endif
|
||||
|
||||
/** Constructor from expression of diagonal coefficients to wrap. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline DiagonalWrapper(DiagonalVectorType& a_diagonal) : m_diagonal(a_diagonal) {}
|
||||
|
||||
/** \returns a const reference to the wrapped expression of diagonal coefficients. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const DiagonalVectorType& diagonal() const { return m_diagonal; }
|
||||
|
||||
protected:
|
||||
|
||||
@@ -34,8 +34,9 @@ struct traits<DiagonalProduct<MatrixType, DiagonalType, ProductOrder> >
|
||||
_Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagonalType::DiagonalVectorType::Flags)&PacketAccessBit))),
|
||||
_LinearAccessMask = (RowsAtCompileTime==1 || ColsAtCompileTime==1) ? LinearAccessBit : 0,
|
||||
|
||||
Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixType::Flags)) | (_Vectorizable ? PacketAccessBit : 0) | AlignedBit,//(int(MatrixType::Flags)&int(DiagonalType::DiagonalVectorType::Flags)&AlignedBit),
|
||||
CoeffReadCost = NumTraits<Scalar>::MulCost + MatrixType::CoeffReadCost + DiagonalType::DiagonalVectorType::CoeffReadCost
|
||||
Flags = ((HereditaryBits|_LinearAccessMask|AlignedBit) & (unsigned int)(MatrixType::Flags)) | (_Vectorizable ? PacketAccessBit : 0),//(int(MatrixType::Flags)&int(DiagonalType::DiagonalVectorType::Flags)&AlignedBit),
|
||||
Cost0 = EIGEN_ADD_COST(NumTraits<Scalar>::MulCost, MatrixType::CoeffReadCost),
|
||||
CoeffReadCost = EIGEN_ADD_COST(Cost0,DiagonalType::DiagonalVectorType::CoeffReadCost)
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
@@ -29,7 +29,6 @@ template<typename T, typename U,
|
||||
struct dot_nocheck
|
||||
{
|
||||
typedef typename scalar_product_traits<typename traits<T>::Scalar,typename traits<U>::Scalar>::ReturnType ResScalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
|
||||
{
|
||||
return a.template binaryExpr<scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> >(b).sum();
|
||||
@@ -40,7 +39,6 @@ template<typename T, typename U>
|
||||
struct dot_nocheck<T, U, true>
|
||||
{
|
||||
typedef typename scalar_product_traits<typename traits<T>::Scalar,typename traits<U>::Scalar>::ReturnType ResScalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
|
||||
{
|
||||
return a.transpose().template binaryExpr<scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> >(b).sum();
|
||||
@@ -61,7 +59,6 @@ struct dot_nocheck<T, U, true>
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename internal::scalar_product_traits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
|
||||
MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
|
||||
{
|
||||
@@ -76,6 +73,34 @@ MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
|
||||
return internal::dot_nocheck<Derived,OtherDerived>::run(*this, other);
|
||||
}
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
/** \returns the dot product of *this with other, with the Eigen2 convention that the dot product is linear in the first variable
|
||||
* (conjugating the second variable). Of course this only makes a difference in the complex case.
|
||||
*
|
||||
* This method is only available in EIGEN2_SUPPORT mode.
|
||||
*
|
||||
* \only_for_vectors
|
||||
*
|
||||
* \sa dot()
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
typename internal::traits<Derived>::Scalar
|
||||
MatrixBase<Derived>::eigen2_dot(const MatrixBase<OtherDerived>& other) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
|
||||
EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived)
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<Scalar, typename OtherDerived::Scalar>::value),
|
||||
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
|
||||
|
||||
eigen_assert(size() == other.size());
|
||||
|
||||
return internal::dot_nocheck<OtherDerived,Derived>::run(other,*this);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
//---------- implementation of L2 norm and related functions ----------
|
||||
|
||||
/** \returns, for vectors, the squared \em l2 norm of \c *this, and for matrices the Frobenius norm.
|
||||
@@ -139,7 +164,6 @@ template<typename Derived, int p>
|
||||
struct lpNorm_selector
|
||||
{
|
||||
typedef typename NumTraits<typename traits<Derived>::Scalar>::Real RealScalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline RealScalar run(const MatrixBase<Derived>& m)
|
||||
{
|
||||
using std::pow;
|
||||
@@ -150,7 +174,6 @@ struct lpNorm_selector
|
||||
template<typename Derived>
|
||||
struct lpNorm_selector<Derived, 1>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
|
||||
{
|
||||
return m.cwiseAbs().sum();
|
||||
@@ -160,7 +183,6 @@ struct lpNorm_selector<Derived, 1>
|
||||
template<typename Derived>
|
||||
struct lpNorm_selector<Derived, 2>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
|
||||
{
|
||||
return m.norm();
|
||||
@@ -170,7 +192,6 @@ struct lpNorm_selector<Derived, 2>
|
||||
template<typename Derived>
|
||||
struct lpNorm_selector<Derived, Infinity>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
|
||||
{
|
||||
return m.cwiseAbs().maxCoeff();
|
||||
|
||||
@@ -31,40 +31,29 @@ template<typename Derived> struct EigenBase
|
||||
typedef typename internal::traits<Derived>::Index Index;
|
||||
|
||||
/** \returns a reference to the derived object */
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& derived() { return *static_cast<Derived*>(this); }
|
||||
/** \returns a const reference to the derived object */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const Derived& derived() const { return *static_cast<const Derived*>(this); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Derived& const_cast_derived() const
|
||||
{ return *static_cast<Derived*>(const_cast<EigenBase*>(this)); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Derived& const_derived() const
|
||||
{ return *static_cast<const Derived*>(this); }
|
||||
|
||||
/** \returns the number of rows. \sa cols(), RowsAtCompileTime */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index rows() const { return derived().rows(); }
|
||||
/** \returns the number of columns. \sa rows(), ColsAtCompileTime*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index cols() const { return derived().cols(); }
|
||||
/** \returns the number of coefficients, which is rows()*cols().
|
||||
* \sa rows(), cols(), SizeAtCompileTime. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index size() const { return rows() * cols(); }
|
||||
|
||||
/** \internal Don't use it, but do the equivalent: \code dst = *this; \endcode */
|
||||
template<typename Dest>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void evalTo(Dest& dst) const
|
||||
template<typename Dest> inline void evalTo(Dest& dst) const
|
||||
{ derived().evalTo(dst); }
|
||||
|
||||
/** \internal Don't use it, but do the equivalent: \code dst += *this; \endcode */
|
||||
template<typename Dest>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void addTo(Dest& dst) const
|
||||
template<typename Dest> inline void addTo(Dest& dst) const
|
||||
{
|
||||
// This is the default implementation,
|
||||
// derived class can reimplement it in a more optimized way.
|
||||
@@ -74,9 +63,7 @@ template<typename Derived> struct EigenBase
|
||||
}
|
||||
|
||||
/** \internal Don't use it, but do the equivalent: \code dst -= *this; \endcode */
|
||||
template<typename Dest>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void subTo(Dest& dst) const
|
||||
template<typename Dest> inline void subTo(Dest& dst) const
|
||||
{
|
||||
// This is the default implementation,
|
||||
// derived class can reimplement it in a more optimized way.
|
||||
@@ -86,8 +73,7 @@ template<typename Derived> struct EigenBase
|
||||
}
|
||||
|
||||
/** \internal Don't use it, but do the equivalent: \code dst.applyOnTheRight(*this); \endcode */
|
||||
template<typename Dest>
|
||||
EIGEN_DEVICE_FUNC inline void applyThisOnTheRight(Dest& dst) const
|
||||
template<typename Dest> inline void applyThisOnTheRight(Dest& dst) const
|
||||
{
|
||||
// This is the default implementation,
|
||||
// derived class can reimplement it in a more optimized way.
|
||||
@@ -95,8 +81,7 @@ template<typename Derived> struct EigenBase
|
||||
}
|
||||
|
||||
/** \internal Don't use it, but do the equivalent: \code dst.applyOnTheLeft(*this); \endcode */
|
||||
template<typename Dest>
|
||||
EIGEN_DEVICE_FUNC inline void applyThisOnTheLeft(Dest& dst) const
|
||||
template<typename Dest> inline void applyThisOnTheLeft(Dest& dst) const
|
||||
{
|
||||
// This is the default implementation,
|
||||
// derived class can reimplement it in a more optimized way.
|
||||
|
||||
1026
Eigen/src/Core/Functors.h
Normal file
1026
Eigen/src/Core/Functors.h
Normal file
File diff suppressed because it is too large
Load Diff
@@ -19,10 +19,9 @@ namespace internal
|
||||
template<typename Derived, typename OtherDerived, bool is_integer = NumTraits<typename Derived::Scalar>::IsInteger>
|
||||
struct isApprox_selector
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar& prec)
|
||||
{
|
||||
EIGEN_USING_STD_MATH(min);
|
||||
using std::min;
|
||||
typename internal::nested<Derived,2>::type nested(x);
|
||||
typename internal::nested<OtherDerived,2>::type otherNested(y);
|
||||
return (nested - otherNested).cwiseAbs2().sum() <= prec * prec * (min)(nested.cwiseAbs2().sum(), otherNested.cwiseAbs2().sum());
|
||||
@@ -32,7 +31,6 @@ struct isApprox_selector
|
||||
template<typename Derived, typename OtherDerived>
|
||||
struct isApprox_selector<Derived, OtherDerived, true>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar&)
|
||||
{
|
||||
return x.matrix() == y.matrix();
|
||||
@@ -42,7 +40,6 @@ struct isApprox_selector<Derived, OtherDerived, true>
|
||||
template<typename Derived, typename OtherDerived, bool is_integer = NumTraits<typename Derived::Scalar>::IsInteger>
|
||||
struct isMuchSmallerThan_object_selector
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar& prec)
|
||||
{
|
||||
return x.cwiseAbs2().sum() <= numext::abs2(prec) * y.cwiseAbs2().sum();
|
||||
@@ -52,7 +49,6 @@ struct isMuchSmallerThan_object_selector
|
||||
template<typename Derived, typename OtherDerived>
|
||||
struct isMuchSmallerThan_object_selector<Derived, OtherDerived, true>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static bool run(const Derived& x, const OtherDerived&, const typename Derived::RealScalar&)
|
||||
{
|
||||
return x.matrix() == Derived::Zero(x.rows(), x.cols()).matrix();
|
||||
@@ -62,7 +58,6 @@ struct isMuchSmallerThan_object_selector<Derived, OtherDerived, true>
|
||||
template<typename Derived, bool is_integer = NumTraits<typename Derived::Scalar>::IsInteger>
|
||||
struct isMuchSmallerThan_scalar_selector
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static bool run(const Derived& x, const typename Derived::RealScalar& y, const typename Derived::RealScalar& prec)
|
||||
{
|
||||
return x.cwiseAbs2().sum() <= numext::abs2(prec * y);
|
||||
@@ -72,7 +67,6 @@ struct isMuchSmallerThan_scalar_selector
|
||||
template<typename Derived>
|
||||
struct isMuchSmallerThan_scalar_selector<Derived, true>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static bool run(const Derived& x, const typename Derived::RealScalar&, const typename Derived::RealScalar&)
|
||||
{
|
||||
return x.matrix() == Derived::Zero(x.rows(), x.cols()).matrix();
|
||||
|
||||
@@ -66,7 +66,8 @@ template<typename Lhs, typename Rhs> struct product_type
|
||||
MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::MaxColsAtCompileTime,
|
||||
_Rhs::MaxRowsAtCompileTime),
|
||||
Depth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::ColsAtCompileTime,
|
||||
_Rhs::RowsAtCompileTime)
|
||||
_Rhs::RowsAtCompileTime),
|
||||
LargeThreshold = EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
||||
};
|
||||
|
||||
// the splitting into different lines of code here, introducing the _select enums and the typedef below,
|
||||
@@ -256,7 +257,7 @@ template<typename Lhs, typename Rhs>
|
||||
class GeneralProduct<Lhs, Rhs, OuterProduct>
|
||||
: public ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs>
|
||||
{
|
||||
template<typename T> struct IsRowMajor : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {};
|
||||
template<typename T> struct is_row_major : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {};
|
||||
|
||||
public:
|
||||
EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
|
||||
@@ -280,22 +281,22 @@ class GeneralProduct<Lhs, Rhs, OuterProduct>
|
||||
|
||||
template<typename Dest>
|
||||
inline void evalTo(Dest& dest) const {
|
||||
internal::outer_product_selector_run(*this, dest, set(), IsRowMajor<Dest>());
|
||||
internal::outer_product_selector_run(*this, dest, set(), is_row_major<Dest>());
|
||||
}
|
||||
|
||||
template<typename Dest>
|
||||
inline void addTo(Dest& dest) const {
|
||||
internal::outer_product_selector_run(*this, dest, add(), IsRowMajor<Dest>());
|
||||
internal::outer_product_selector_run(*this, dest, add(), is_row_major<Dest>());
|
||||
}
|
||||
|
||||
template<typename Dest>
|
||||
inline void subTo(Dest& dest) const {
|
||||
internal::outer_product_selector_run(*this, dest, sub(), IsRowMajor<Dest>());
|
||||
internal::outer_product_selector_run(*this, dest, sub(), is_row_major<Dest>());
|
||||
}
|
||||
|
||||
template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const
|
||||
{
|
||||
internal::outer_product_selector_run(*this, dest, adds(alpha), IsRowMajor<Dest>());
|
||||
internal::outer_product_selector_run(*this, dest, adds(alpha), is_row_major<Dest>());
|
||||
}
|
||||
};
|
||||
|
||||
@@ -396,7 +397,7 @@ struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
|
||||
internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?PacketSize:0),0> m_data;
|
||||
EIGEN_STRONG_INLINE Scalar* data() {
|
||||
return ForceAlignment
|
||||
? reinterpret_cast<Scalar*>((reinterpret_cast<size_t>(m_data.array) & ~(size_t(EIGEN_ALIGN_BYTES-1))) + EIGEN_ALIGN_BYTES)
|
||||
? reinterpret_cast<Scalar*>((reinterpret_cast<size_t>(m_data.array) & ~(size_t(15))) + 16)
|
||||
: m_data.array;
|
||||
}
|
||||
#endif
|
||||
@@ -445,7 +446,7 @@ template<> struct gemv_selector<OnTheRight,ColMajor,true>
|
||||
if(!evalToDest)
|
||||
{
|
||||
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
Index size = dest.size();
|
||||
int size = dest.size();
|
||||
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
#endif
|
||||
if(!alphaIsCompatible)
|
||||
@@ -510,7 +511,7 @@ template<> struct gemv_selector<OnTheRight,RowMajor,true>
|
||||
if(!DirectlyUseRhs)
|
||||
{
|
||||
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
Index size = actualRhs.size();
|
||||
int size = actualRhs.size();
|
||||
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
#endif
|
||||
Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
|
||||
@@ -564,40 +565,6 @@ template<> struct gemv_selector<OnTheRight,RowMajor,false>
|
||||
*
|
||||
* \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*()
|
||||
*/
|
||||
#ifndef __CUDACC__
|
||||
|
||||
#ifdef EIGEN_TEST_EVALUATORS
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
inline const Product<Derived, OtherDerived>
|
||||
MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
|
||||
{
|
||||
// A note regarding the function declaration: In MSVC, this function will sometimes
|
||||
// not be inlined since DenseStorage is an unwindable object for dynamic
|
||||
// matrices and product types are holding a member to store the result.
|
||||
// Thus it does not help tagging this function with EIGEN_STRONG_INLINE.
|
||||
enum {
|
||||
ProductIsValid = Derived::ColsAtCompileTime==Dynamic
|
||||
|| OtherDerived::RowsAtCompileTime==Dynamic
|
||||
|| int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
|
||||
AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
|
||||
SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
|
||||
};
|
||||
// note to the lost user:
|
||||
// * for a dot product use: v1.dot(v2)
|
||||
// * for a coeff-wise product use: v1.cwiseProduct(v2)
|
||||
EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
|
||||
INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
|
||||
EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
|
||||
INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
|
||||
EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
|
||||
#ifdef EIGEN_DEBUG_PRODUCT
|
||||
internal::product_type<Derived,OtherDerived>::debug();
|
||||
#endif
|
||||
|
||||
return Product<Derived, OtherDerived>(derived(), other.derived());
|
||||
}
|
||||
#else
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
inline const typename ProductReturnType<Derived, OtherDerived>::Type
|
||||
@@ -627,9 +594,7 @@ MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
|
||||
#endif
|
||||
return typename ProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
/** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation.
|
||||
*
|
||||
* The returned product will behave like any other expressions: the coefficients of the product will be
|
||||
|
||||
@@ -42,8 +42,6 @@ namespace internal {
|
||||
struct default_packet_traits
|
||||
{
|
||||
enum {
|
||||
HasHalfPacket = 0,
|
||||
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasMul = 1,
|
||||
@@ -73,12 +71,10 @@ struct default_packet_traits
|
||||
template<typename T> struct packet_traits : default_packet_traits
|
||||
{
|
||||
typedef T type;
|
||||
typedef T half;
|
||||
enum {
|
||||
Vectorizable = 0,
|
||||
size = 1,
|
||||
AlignedOnScalar = 0,
|
||||
HasHalfPacket = 0
|
||||
AlignedOnScalar = 0
|
||||
};
|
||||
enum {
|
||||
HasAdd = 0,
|
||||
@@ -95,149 +91,94 @@ template<typename T> struct packet_traits : default_packet_traits
|
||||
};
|
||||
|
||||
/** \internal \returns a + b (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
template<typename Packet> inline Packet
|
||||
padd(const Packet& a,
|
||||
const Packet& b) { return a+b; }
|
||||
|
||||
/** \internal \returns a - b (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
template<typename Packet> inline Packet
|
||||
psub(const Packet& a,
|
||||
const Packet& b) { return a-b; }
|
||||
|
||||
/** \internal \returns -a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
template<typename Packet> inline Packet
|
||||
pnegate(const Packet& a) { return -a; }
|
||||
|
||||
/** \internal \returns conj(a) (coeff-wise) */
|
||||
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
template<typename Packet> inline Packet
|
||||
pconj(const Packet& a) { return numext::conj(a); }
|
||||
|
||||
/** \internal \returns a * b (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
template<typename Packet> inline Packet
|
||||
pmul(const Packet& a,
|
||||
const Packet& b) { return a*b; }
|
||||
|
||||
/** \internal \returns a / b (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
template<typename Packet> inline Packet
|
||||
pdiv(const Packet& a,
|
||||
const Packet& b) { return a/b; }
|
||||
|
||||
/** \internal \returns the min of \a a and \a b (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
template<typename Packet> inline Packet
|
||||
pmin(const Packet& a,
|
||||
const Packet& b) { EIGEN_USING_STD_MATH(min); return (min)(a, b); }
|
||||
const Packet& b) { using std::min; return (min)(a, b); }
|
||||
|
||||
/** \internal \returns the max of \a a and \a b (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
template<typename Packet> inline Packet
|
||||
pmax(const Packet& a,
|
||||
const Packet& b) { EIGEN_USING_STD_MATH(max); return (max)(a, b); }
|
||||
const Packet& b) { using std::max; return (max)(a, b); }
|
||||
|
||||
/** \internal \returns the absolute value of \a a */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
template<typename Packet> inline Packet
|
||||
pabs(const Packet& a) { using std::abs; return abs(a); }
|
||||
|
||||
/** \internal \returns the bitwise and of \a a and \a b */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
template<typename Packet> inline Packet
|
||||
pand(const Packet& a, const Packet& b) { return a & b; }
|
||||
|
||||
/** \internal \returns the bitwise or of \a a and \a b */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
template<typename Packet> inline Packet
|
||||
por(const Packet& a, const Packet& b) { return a | b; }
|
||||
|
||||
/** \internal \returns the bitwise xor of \a a and \a b */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
template<typename Packet> inline Packet
|
||||
pxor(const Packet& a, const Packet& b) { return a ^ b; }
|
||||
|
||||
/** \internal \returns the bitwise andnot of \a a and \a b */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
template<typename Packet> inline Packet
|
||||
pandnot(const Packet& a, const Packet& b) { return a & (!b); }
|
||||
|
||||
/** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
template<typename Packet> inline Packet
|
||||
pload(const typename unpacket_traits<Packet>::type* from) { return *from; }
|
||||
|
||||
/** \internal \returns a packet version of \a *from, (un-aligned load) */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
template<typename Packet> inline Packet
|
||||
ploadu(const typename unpacket_traits<Packet>::type* from) { return *from; }
|
||||
|
||||
/** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pset1(const typename unpacket_traits<Packet>::type& a) { return a; }
|
||||
|
||||
/** \internal \returns a packet with constant coefficients \a a[0], e.g.: (a[0],a[0],a[0],a[0]) */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pload1(const typename unpacket_traits<Packet>::type *a) { return pset1<Packet>(*a); }
|
||||
|
||||
/** \internal \returns a packet with elements of \a *from duplicated.
|
||||
* For instance, for a packet of 8 elements, 4 scalars will be read from \a *from and
|
||||
* duplicated to form: {from[0],from[0],from[1],from[1],from[2],from[2],from[3],from[3]}
|
||||
* For instance, for a packet of 8 elements, 4 scalar will be read from \a *from and
|
||||
* duplicated to form: {from[0],from[0],from[1],from[1],,from[2],from[2],,from[3],from[3]}
|
||||
* Currently, this function is only used for scalar * complex products.
|
||||
*/
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
*/
|
||||
template<typename Packet> inline Packet
|
||||
ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; }
|
||||
|
||||
/** \internal \returns a packet with elements of \a *from quadrupled.
|
||||
* For instance, for a packet of 8 elements, 2 scalars will be read from \a *from and
|
||||
* replicated to form: {from[0],from[0],from[0],from[0],from[1],from[1],from[1],from[1]}
|
||||
* Currently, this function is only used in matrix products.
|
||||
* For packet-size smaller or equal to 4, this function is equivalent to pload1
|
||||
*/
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
ploadquad(const typename unpacket_traits<Packet>::type* from)
|
||||
{ return pload1<Packet>(from); }
|
||||
|
||||
/** \internal equivalent to
|
||||
* \code
|
||||
* a0 = pload1(a+0);
|
||||
* a1 = pload1(a+1);
|
||||
* a2 = pload1(a+2);
|
||||
* a3 = pload1(a+3);
|
||||
* \endcode
|
||||
* \sa pset1, pload1, ploaddup, pbroadcast2
|
||||
*/
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC
|
||||
inline void pbroadcast4(const typename unpacket_traits<Packet>::type *a,
|
||||
Packet& a0, Packet& a1, Packet& a2, Packet& a3)
|
||||
{
|
||||
a0 = pload1<Packet>(a+0);
|
||||
a1 = pload1<Packet>(a+1);
|
||||
a2 = pload1<Packet>(a+2);
|
||||
a3 = pload1<Packet>(a+3);
|
||||
}
|
||||
|
||||
/** \internal equivalent to
|
||||
* \code
|
||||
* a0 = pload1(a+0);
|
||||
* a1 = pload1(a+1);
|
||||
* \endcode
|
||||
* \sa pset1, pload1, ploaddup, pbroadcast4
|
||||
*/
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC
|
||||
inline void pbroadcast2(const typename unpacket_traits<Packet>::type *a,
|
||||
Packet& a0, Packet& a1)
|
||||
{
|
||||
a0 = pload1<Packet>(a+0);
|
||||
a1 = pload1<Packet>(a+1);
|
||||
}
|
||||
/** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */
|
||||
template<typename Packet> inline Packet
|
||||
pset1(const typename unpacket_traits<Packet>::type& a) { return a; }
|
||||
|
||||
/** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */
|
||||
template<typename Scalar> inline typename packet_traits<Scalar>::type
|
||||
plset(const Scalar& a) { return a; }
|
||||
|
||||
/** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */
|
||||
template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from)
|
||||
template<typename Scalar, typename Packet> inline void pstore(Scalar* to, const Packet& from)
|
||||
{ (*to) = from; }
|
||||
|
||||
/** \internal copy the packet \a from to \a *to, (un-aligned store) */
|
||||
template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from)
|
||||
{ (*to) = from; }
|
||||
|
||||
template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, DenseIndex /*stride*/)
|
||||
{ return ploadu<Packet>(from); }
|
||||
|
||||
template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, DenseIndex /*stride*/)
|
||||
{ pstore(to, from); }
|
||||
template<typename Scalar, typename Packet> inline void pstoreu(Scalar* to, const Packet& from)
|
||||
{ (*to) = from; }
|
||||
|
||||
/** \internal tries to do cache prefetching of \a addr */
|
||||
template<typename Scalar> inline void prefetch(const Scalar* addr)
|
||||
@@ -248,45 +189,36 @@ __builtin_prefetch(addr);
|
||||
}
|
||||
|
||||
/** \internal \returns the first element of a packet */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type pfirst(const Packet& a)
|
||||
template<typename Packet> inline typename unpacket_traits<Packet>::type pfirst(const Packet& a)
|
||||
{ return a; }
|
||||
|
||||
/** \internal \returns a packet where the element i contains the sum of the packet of \a vec[i] */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
template<typename Packet> inline Packet
|
||||
preduxp(const Packet* vecs) { return vecs[0]; }
|
||||
|
||||
/** \internal \returns the sum of the elements of \a a*/
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux(const Packet& a)
|
||||
{ return a; }
|
||||
|
||||
/** \internal \returns the sum of the elements of \a a by block of 4 elements.
|
||||
* For a packet {a0, a1, a2, a3, a4, a5, a6, a7}, it returns a half packet {a0+a4, a1+a5, a2+a6, a3+a7}
|
||||
* For packet-size smaller or equal to 4, this boils down to a noop.
|
||||
*/
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline
|
||||
typename conditional<(unpacket_traits<Packet>::size%8)==0,typename unpacket_traits<Packet>::half,Packet>::type
|
||||
predux4(const Packet& a)
|
||||
template<typename Packet> inline typename unpacket_traits<Packet>::type predux(const Packet& a)
|
||||
{ return a; }
|
||||
|
||||
/** \internal \returns the product of the elements of \a a*/
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a)
|
||||
template<typename Packet> inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a)
|
||||
{ return a; }
|
||||
|
||||
/** \internal \returns the min of the elements of \a a*/
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(const Packet& a)
|
||||
template<typename Packet> inline typename unpacket_traits<Packet>::type predux_min(const Packet& a)
|
||||
{ return a; }
|
||||
|
||||
/** \internal \returns the max of the elements of \a a*/
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(const Packet& a)
|
||||
template<typename Packet> inline typename unpacket_traits<Packet>::type predux_max(const Packet& a)
|
||||
{ return a; }
|
||||
|
||||
/** \internal \returns the reversed elements of \a a*/
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a)
|
||||
template<typename Packet> inline Packet preverse(const Packet& a)
|
||||
{ return a; }
|
||||
|
||||
|
||||
/** \internal \returns \a a with real and imaginary part flipped (for complex type only) */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a)
|
||||
template<typename Packet> inline Packet pcplxflip(const Packet& a)
|
||||
{
|
||||
// FIXME: uncomment the following in case we drop the internal imag and real functions.
|
||||
// using std::imag;
|
||||
@@ -318,10 +250,6 @@ Packet pasin(const Packet& a) { using std::asin; return asin(a); }
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet pacos(const Packet& a) { using std::acos; return acos(a); }
|
||||
|
||||
/** \internal \returns the atan of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet patan(const Packet& a) { using std::atan; return atan(a); }
|
||||
|
||||
/** \internal \returns the exp of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet pexp(const Packet& a) { using std::exp; return exp(a); }
|
||||
@@ -347,7 +275,7 @@ inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename u
|
||||
}
|
||||
|
||||
/** \internal \returns a * b + c (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
template<typename Packet> inline Packet
|
||||
pmadd(const Packet& a,
|
||||
const Packet& b,
|
||||
const Packet& c)
|
||||
@@ -408,33 +336,15 @@ inline void palign(PacketType& first, const PacketType& second)
|
||||
* Fast complex products (GCC generates a function call which is very slow)
|
||||
***************************************************************************/
|
||||
|
||||
// Eigen+CUDA does not support complexes.
|
||||
#ifndef __CUDACC__
|
||||
|
||||
template<> inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b)
|
||||
{ return std::complex<float>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
|
||||
|
||||
template<> inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b)
|
||||
{ return std::complex<double>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
* PacketBlock, that is a collection of N packets where the number of words
|
||||
* in the packet is a multiple of N.
|
||||
***************************************************************************/
|
||||
template <typename Packet,int N=unpacket_traits<Packet>::size> struct PacketBlock {
|
||||
Packet packet[N];
|
||||
};
|
||||
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet,1>& /*kernel*/) {
|
||||
// Nothing to do in the scalar case, i.e. a 1x1 matrix.
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_GENERIC_PACKET_MATH_H
|
||||
|
||||
|
||||
@@ -45,7 +45,6 @@ namespace Eigen
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(asin,scalar_asin_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(acos,scalar_acos_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tan,scalar_tan_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(atan,scalar_atan_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs,scalar_abs_op)
|
||||
|
||||
@@ -49,7 +49,7 @@ std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat&
|
||||
*/
|
||||
struct IOFormat
|
||||
{
|
||||
/** Default constructor, see class IOFormat for the meaning of the parameters */
|
||||
/** Default contructor, see class IOFormat for the meaning of the parameters */
|
||||
IOFormat(int _precision = StreamPrecision, int _flags = 0,
|
||||
const std::string& _coeffSeparator = " ",
|
||||
const std::string& _rowSeparator = "\n", const std::string& _rowPrefix="", const std::string& _rowSuffix="",
|
||||
@@ -57,10 +57,6 @@ struct IOFormat
|
||||
: matPrefix(_matPrefix), matSuffix(_matSuffix), rowPrefix(_rowPrefix), rowSuffix(_rowSuffix), rowSeparator(_rowSeparator),
|
||||
rowSpacer(""), coeffSeparator(_coeffSeparator), precision(_precision), flags(_flags)
|
||||
{
|
||||
// TODO check if rowPrefix, rowSuffix or rowSeparator contains a newline
|
||||
// don't add rowSpacer if columns are not to be aligned
|
||||
if((flags & DontAlignCols))
|
||||
return;
|
||||
int i = int(matSuffix.length())-1;
|
||||
while (i>=0 && matSuffix[i]!='\n')
|
||||
{
|
||||
|
||||
@@ -88,7 +88,7 @@ struct traits<Map<PlainObjectType, MapOptions, StrideType> >
|
||||
&& ( bool(IsDynamicSize)
|
||||
|| HasNoOuterStride
|
||||
|| ( OuterStrideAtCompileTime!=Dynamic
|
||||
&& ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime)%EIGEN_ALIGN_BYTES)==0 ) ),
|
||||
&& ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime)%16)==0 ) ),
|
||||
Flags0 = TraitsBase::Flags & (~NestByRefBit),
|
||||
Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit),
|
||||
Flags2 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime))
|
||||
@@ -110,17 +110,19 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(Map)
|
||||
|
||||
typedef typename Base::PointerType PointerType;
|
||||
#if EIGEN2_SUPPORT_STAGE <= STAGE30_FULL_EIGEN3_API
|
||||
typedef const Scalar* PointerArgType;
|
||||
inline PointerType cast_to_pointer_type(PointerArgType ptr) { return const_cast<PointerType>(ptr); }
|
||||
#else
|
||||
typedef PointerType PointerArgType;
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline PointerType cast_to_pointer_type(PointerArgType ptr) { return ptr; }
|
||||
#endif
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index innerStride() const
|
||||
{
|
||||
return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index outerStride() const
|
||||
{
|
||||
return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer()
|
||||
@@ -134,7 +136,6 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
|
||||
* \param dataPtr pointer to the array to map
|
||||
* \param a_stride optional Stride object, passing the strides.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Map(PointerArgType dataPtr, const StrideType& a_stride = StrideType())
|
||||
: Base(cast_to_pointer_type(dataPtr)), m_stride(a_stride)
|
||||
{
|
||||
@@ -147,7 +148,6 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
|
||||
* \param a_size the size of the vector expression
|
||||
* \param a_stride optional Stride object, passing the strides.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Map(PointerArgType dataPtr, Index a_size, const StrideType& a_stride = StrideType())
|
||||
: Base(cast_to_pointer_type(dataPtr), a_size), m_stride(a_stride)
|
||||
{
|
||||
@@ -161,7 +161,6 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
|
||||
* \param nbCols the number of columns of the matrix expression
|
||||
* \param a_stride optional Stride object, passing the strides.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Map(PointerArgType dataPtr, Index nbRows, Index nbCols, const StrideType& a_stride = StrideType())
|
||||
: Base(cast_to_pointer_type(dataPtr), nbRows, nbCols), m_stride(a_stride)
|
||||
{
|
||||
@@ -174,6 +173,19 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
|
||||
StrideType m_stride;
|
||||
};
|
||||
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
|
||||
inline Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>
|
||||
::Array(const Scalar *data)
|
||||
{
|
||||
this->_set_noalias(Eigen::Map<const Array>(data));
|
||||
}
|
||||
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
|
||||
inline Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>
|
||||
::Matrix(const Scalar *data)
|
||||
{
|
||||
this->_set_noalias(Eigen::Map<const Matrix>(data));
|
||||
}
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
|
||||
@@ -76,8 +76,8 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
|
||||
|
||||
typedef typename Base::CoeffReturnType CoeffReturnType;
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Index rows() const { return m_rows.value(); }
|
||||
EIGEN_DEVICE_FUNC inline Index cols() const { return m_cols.value(); }
|
||||
inline Index rows() const { return m_rows.value(); }
|
||||
inline Index cols() const { return m_cols.value(); }
|
||||
|
||||
/** Returns a pointer to the first coefficient of the matrix or vector.
|
||||
*
|
||||
@@ -87,26 +87,22 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
|
||||
*/
|
||||
inline const Scalar* data() const { return m_data; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeff(Index rowId, Index colId) const
|
||||
{
|
||||
return m_data[colId * colStride() + rowId * rowStride()];
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeff(Index index) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
|
||||
return m_data[index * innerStride()];
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index rowId, Index colId) const
|
||||
{
|
||||
return this->m_data[colId * colStride() + rowId * rowStride()];
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index index) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
|
||||
@@ -127,14 +123,12 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
|
||||
return internal::ploadt<PacketScalar, LoadMode>(m_data + index * innerStride());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline MapBase(PointerType dataPtr) : m_data(dataPtr), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime)
|
||||
explicit inline MapBase(PointerType dataPtr) : m_data(dataPtr), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
|
||||
checkSanity();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline MapBase(PointerType dataPtr, Index vecSize)
|
||||
: m_data(dataPtr),
|
||||
m_rows(RowsAtCompileTime == Dynamic ? vecSize : Index(RowsAtCompileTime)),
|
||||
@@ -146,7 +140,6 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
|
||||
checkSanity();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline MapBase(PointerType dataPtr, Index nbRows, Index nbCols)
|
||||
: m_data(dataPtr), m_rows(nbRows), m_cols(nbCols)
|
||||
{
|
||||
@@ -158,14 +151,13 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
|
||||
|
||||
protected:
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
void checkSanity() const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(internal::traits<Derived>::Flags&PacketAccessBit,
|
||||
internal::inner_stride_at_compile_time<Derived>::ret==1),
|
||||
PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1);
|
||||
eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::Flags&AlignedBit, (size_t(m_data) % EIGEN_ALIGN_BYTES) == 0)
|
||||
&& "data is not aligned");
|
||||
eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::Flags&AlignedBit, (size_t(m_data) % 16) == 0)
|
||||
&& "input pointer is not aligned on a 16 byte boundary");
|
||||
}
|
||||
|
||||
PointerType m_data;
|
||||
@@ -176,6 +168,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
|
||||
template<typename Derived> class MapBase<Derived, WriteAccessors>
|
||||
: public MapBase<Derived, ReadOnlyAccessors>
|
||||
{
|
||||
typedef MapBase<Derived, ReadOnlyAccessors> ReadOnlyMapBase;
|
||||
public:
|
||||
|
||||
typedef MapBase<Derived, ReadOnlyAccessors> Base;
|
||||
@@ -203,18 +196,14 @@ template<typename Derived> class MapBase<Derived, WriteAccessors>
|
||||
const Scalar
|
||||
>::type ScalarWithConstIfNotLvalue;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar* data() const { return this->m_data; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline ScalarWithConstIfNotLvalue* data() { return this->m_data; } // no const-cast here so non-const-correct code will give a compile error
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline ScalarWithConstIfNotLvalue& coeffRef(Index row, Index col)
|
||||
{
|
||||
return this->m_data[col * colStride() + row * rowStride()];
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline ScalarWithConstIfNotLvalue& coeffRef(Index index)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
|
||||
@@ -236,18 +225,19 @@ template<typename Derived> class MapBase<Derived, WriteAccessors>
|
||||
(this->m_data + index * innerStride(), val);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit inline MapBase(PointerType dataPtr) : Base(dataPtr) {}
|
||||
EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index vecSize) : Base(dataPtr, vecSize) {}
|
||||
EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index nbRows, Index nbCols) : Base(dataPtr, nbRows, nbCols) {}
|
||||
explicit inline MapBase(PointerType dataPtr) : Base(dataPtr) {}
|
||||
inline MapBase(PointerType dataPtr, Index vecSize) : Base(dataPtr, vecSize) {}
|
||||
inline MapBase(PointerType dataPtr, Index nbRows, Index nbCols) : Base(dataPtr, nbRows, nbCols) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator=(const MapBase& other)
|
||||
{
|
||||
Base::Base::operator=(other);
|
||||
ReadOnlyMapBase::Base::operator=(other);
|
||||
return derived();
|
||||
}
|
||||
|
||||
using Base::Base::operator=;
|
||||
// In theory we could simply refer to Base:Base::operator=, but MSVC does not like Base::Base,
|
||||
// see bugs 821 and 920.
|
||||
using ReadOnlyMapBase::Base::operator=;
|
||||
};
|
||||
|
||||
#undef EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS
|
||||
|
||||
@@ -12,15 +12,6 @@
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
// On WINCE, std::abs is defined for int only, so let's defined our own overloads:
|
||||
// This issue has been confirmed with MSVC 2008 only, but the issue might exist for more recent versions too.
|
||||
#if defined(_WIN32_WCE) && defined(_MSC_VER) && _MSC_VER<=1500
|
||||
long abs(long x) { return (labs(x)); }
|
||||
double abs(double x) { return (fabs(x)); }
|
||||
float abs(float x) { return (fabsf(x)); }
|
||||
long double abs(long double x) { return (fabsl(x)); }
|
||||
#endif
|
||||
|
||||
namespace internal {
|
||||
|
||||
/** \internal \struct global_math_functions_filtering_base
|
||||
@@ -71,7 +62,6 @@ template<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
|
||||
struct real_default_impl
|
||||
{
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline RealScalar run(const Scalar& x)
|
||||
{
|
||||
return x;
|
||||
@@ -82,7 +72,6 @@ template<typename Scalar>
|
||||
struct real_default_impl<Scalar,true>
|
||||
{
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline RealScalar run(const Scalar& x)
|
||||
{
|
||||
using std::real;
|
||||
@@ -98,6 +87,7 @@ struct real_retval
|
||||
typedef typename NumTraits<Scalar>::Real type;
|
||||
};
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
* Implementation of imag *
|
||||
****************************************************************************/
|
||||
@@ -106,7 +96,6 @@ template<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
|
||||
struct imag_default_impl
|
||||
{
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline RealScalar run(const Scalar&)
|
||||
{
|
||||
return RealScalar(0);
|
||||
@@ -117,7 +106,6 @@ template<typename Scalar>
|
||||
struct imag_default_impl<Scalar,true>
|
||||
{
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline RealScalar run(const Scalar& x)
|
||||
{
|
||||
using std::imag;
|
||||
@@ -141,12 +129,10 @@ template<typename Scalar>
|
||||
struct real_ref_impl
|
||||
{
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline RealScalar& run(Scalar& x)
|
||||
{
|
||||
return reinterpret_cast<RealScalar*>(&x)[0];
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline const RealScalar& run(const Scalar& x)
|
||||
{
|
||||
return reinterpret_cast<const RealScalar*>(&x)[0];
|
||||
@@ -167,12 +153,10 @@ template<typename Scalar, bool IsComplex>
|
||||
struct imag_ref_default_impl
|
||||
{
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline RealScalar& run(Scalar& x)
|
||||
{
|
||||
return reinterpret_cast<RealScalar*>(&x)[1];
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline const RealScalar& run(const Scalar& x)
|
||||
{
|
||||
return reinterpret_cast<RealScalar*>(&x)[1];
|
||||
@@ -182,12 +166,10 @@ struct imag_ref_default_impl
|
||||
template<typename Scalar>
|
||||
struct imag_ref_default_impl<Scalar, false>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline Scalar run(Scalar&)
|
||||
{
|
||||
return Scalar(0);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline const Scalar run(const Scalar&)
|
||||
{
|
||||
return Scalar(0);
|
||||
@@ -210,7 +192,6 @@ struct imag_ref_retval
|
||||
template<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
|
||||
struct conj_impl
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline Scalar run(const Scalar& x)
|
||||
{
|
||||
return x;
|
||||
@@ -220,7 +201,6 @@ struct conj_impl
|
||||
template<typename Scalar>
|
||||
struct conj_impl<Scalar,true>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline Scalar run(const Scalar& x)
|
||||
{
|
||||
using std::conj;
|
||||
@@ -242,7 +222,6 @@ template<typename Scalar>
|
||||
struct abs2_impl
|
||||
{
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline RealScalar run(const Scalar& x)
|
||||
{
|
||||
return x*x;
|
||||
@@ -252,7 +231,6 @@ struct abs2_impl
|
||||
template<typename RealScalar>
|
||||
struct abs2_impl<std::complex<RealScalar> >
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline RealScalar run(const std::complex<RealScalar>& x)
|
||||
{
|
||||
return real(x)*real(x) + imag(x)*imag(x);
|
||||
@@ -273,7 +251,6 @@ template<typename Scalar, bool IsComplex>
|
||||
struct norm1_default_impl
|
||||
{
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline RealScalar run(const Scalar& x)
|
||||
{
|
||||
using std::abs;
|
||||
@@ -284,7 +261,6 @@ struct norm1_default_impl
|
||||
template<typename Scalar>
|
||||
struct norm1_default_impl<Scalar, false>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline Scalar run(const Scalar& x)
|
||||
{
|
||||
using std::abs;
|
||||
@@ -311,23 +287,16 @@ struct hypot_impl
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
static inline RealScalar run(const Scalar& x, const Scalar& y)
|
||||
{
|
||||
EIGEN_USING_STD_MATH(max);
|
||||
EIGEN_USING_STD_MATH(min);
|
||||
using std::max;
|
||||
using std::min;
|
||||
using std::abs;
|
||||
using std::sqrt;
|
||||
RealScalar _x = abs(x);
|
||||
RealScalar _y = abs(y);
|
||||
Scalar p, qp;
|
||||
if(_x>_y)
|
||||
{
|
||||
p = _x;
|
||||
qp = _y / p;
|
||||
}
|
||||
else
|
||||
{
|
||||
p = _y;
|
||||
qp = _x / p;
|
||||
}
|
||||
RealScalar p = (max)(_x, _y);
|
||||
if(p==RealScalar(0)) return RealScalar(0);
|
||||
RealScalar q = (min)(_x, _y);
|
||||
RealScalar qp = q/p;
|
||||
return p * sqrt(RealScalar(1) + qp*qp);
|
||||
}
|
||||
};
|
||||
@@ -363,45 +332,37 @@ inline NewType cast(const OldType& x)
|
||||
* Implementation of atanh2 *
|
||||
****************************************************************************/
|
||||
|
||||
template<typename Scalar>
|
||||
struct atanh2_impl
|
||||
template<typename Scalar, bool IsInteger>
|
||||
struct atanh2_default_impl
|
||||
{
|
||||
static inline Scalar run(const Scalar& x, const Scalar& r)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
|
||||
#if (__cplusplus >= 201103L) && !defined(__CYGWIN__)
|
||||
using std::log1p;
|
||||
return log1p(2 * x / (r - x)) / 2;
|
||||
#else
|
||||
using std::abs;
|
||||
using std::log;
|
||||
using std::sqrt;
|
||||
Scalar z = x / r;
|
||||
if (r == 0 || abs(z) > sqrt(NumTraits<Scalar>::epsilon()))
|
||||
return log((r + x) / (r - x)) / 2;
|
||||
else
|
||||
return z + z*z*z / 3;
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
template<typename RealScalar>
|
||||
struct atanh2_impl<std::complex<RealScalar> >
|
||||
{
|
||||
typedef std::complex<RealScalar> Scalar;
|
||||
static inline Scalar run(const Scalar& x, const Scalar& r)
|
||||
typedef Scalar retval;
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
static inline Scalar run(const Scalar& x, const Scalar& y)
|
||||
{
|
||||
using std::abs;
|
||||
using std::log;
|
||||
using std::norm;
|
||||
using std::sqrt;
|
||||
Scalar z = x / r;
|
||||
if (r == Scalar(0) || norm(z) > NumTraits<RealScalar>::epsilon())
|
||||
return RealScalar(0.5) * log((r + x) / (r - x));
|
||||
Scalar z = x / y;
|
||||
if (y == Scalar(0) || abs(z) > sqrt(NumTraits<RealScalar>::epsilon()))
|
||||
return RealScalar(0.5) * log((y + x) / (y - x));
|
||||
else
|
||||
return z + z*z*z / RealScalar(3);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Scalar>
|
||||
struct atanh2_default_impl<Scalar, true>
|
||||
{
|
||||
static inline Scalar run(const Scalar&, const Scalar&)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
|
||||
return Scalar(0);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Scalar>
|
||||
struct atanh2_impl : atanh2_default_impl<Scalar, NumTraits<Scalar>::IsInteger> {};
|
||||
|
||||
template<typename Scalar>
|
||||
struct atanh2_retval
|
||||
{
|
||||
@@ -593,84 +554,72 @@ inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random()
|
||||
namespace numext {
|
||||
|
||||
template<typename Scalar>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline EIGEN_MATHFUNC_RETVAL(real, Scalar) real(const Scalar& x)
|
||||
{
|
||||
return EIGEN_MATHFUNC_IMPL(real, Scalar)::run(x);
|
||||
}
|
||||
|
||||
template<typename Scalar>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline typename internal::add_const_on_value_type< EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) >::type real_ref(const Scalar& x)
|
||||
{
|
||||
return internal::real_ref_impl<Scalar>::run(x);
|
||||
}
|
||||
|
||||
template<typename Scalar>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) real_ref(Scalar& x)
|
||||
{
|
||||
return EIGEN_MATHFUNC_IMPL(real_ref, Scalar)::run(x);
|
||||
}
|
||||
|
||||
template<typename Scalar>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline EIGEN_MATHFUNC_RETVAL(imag, Scalar) imag(const Scalar& x)
|
||||
{
|
||||
return EIGEN_MATHFUNC_IMPL(imag, Scalar)::run(x);
|
||||
}
|
||||
|
||||
template<typename Scalar>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline typename internal::add_const_on_value_type< EIGEN_MATHFUNC_RETVAL(imag_ref, Scalar) >::type imag_ref(const Scalar& x)
|
||||
{
|
||||
return internal::imag_ref_impl<Scalar>::run(x);
|
||||
}
|
||||
|
||||
template<typename Scalar>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline EIGEN_MATHFUNC_RETVAL(imag_ref, Scalar) imag_ref(Scalar& x)
|
||||
{
|
||||
return EIGEN_MATHFUNC_IMPL(imag_ref, Scalar)::run(x);
|
||||
}
|
||||
|
||||
template<typename Scalar>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline EIGEN_MATHFUNC_RETVAL(conj, Scalar) conj(const Scalar& x)
|
||||
{
|
||||
return EIGEN_MATHFUNC_IMPL(conj, Scalar)::run(x);
|
||||
}
|
||||
|
||||
template<typename Scalar>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline EIGEN_MATHFUNC_RETVAL(abs2, Scalar) abs2(const Scalar& x)
|
||||
{
|
||||
return EIGEN_MATHFUNC_IMPL(abs2, Scalar)::run(x);
|
||||
}
|
||||
|
||||
template<typename Scalar>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline EIGEN_MATHFUNC_RETVAL(norm1, Scalar) norm1(const Scalar& x)
|
||||
{
|
||||
return EIGEN_MATHFUNC_IMPL(norm1, Scalar)::run(x);
|
||||
}
|
||||
|
||||
template<typename Scalar>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline EIGEN_MATHFUNC_RETVAL(hypot, Scalar) hypot(const Scalar& x, const Scalar& y)
|
||||
{
|
||||
return EIGEN_MATHFUNC_IMPL(hypot, Scalar)::run(x, y);
|
||||
}
|
||||
|
||||
template<typename Scalar>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline EIGEN_MATHFUNC_RETVAL(atanh2, Scalar) atanh2(const Scalar& x, const Scalar& y)
|
||||
{
|
||||
return EIGEN_MATHFUNC_IMPL(atanh2, Scalar)::run(x, y);
|
||||
}
|
||||
|
||||
template<typename Scalar>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline EIGEN_MATHFUNC_RETVAL(pow, Scalar) pow(const Scalar& x, const Scalar& y)
|
||||
{
|
||||
return EIGEN_MATHFUNC_IMPL(pow, Scalar)::run(x, y);
|
||||
@@ -678,22 +627,11 @@ inline EIGEN_MATHFUNC_RETVAL(pow, Scalar) pow(const Scalar& x, const Scalar& y)
|
||||
|
||||
// std::isfinite is non standard, so let's define our own version,
|
||||
// even though it is not very efficient.
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
bool (isfinite)(const T& x)
|
||||
template<typename T> bool (isfinite)(const T& x)
|
||||
{
|
||||
return x<NumTraits<T>::highest() && x>NumTraits<T>::lowest();
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
bool (isfinite)(const std::complex<T>& x)
|
||||
{
|
||||
using std::real;
|
||||
using std::imag;
|
||||
return isfinite(real(x)) && isfinite(imag(x));
|
||||
}
|
||||
|
||||
} // end namespace numext
|
||||
|
||||
namespace internal {
|
||||
@@ -711,20 +649,18 @@ template<typename Scalar>
|
||||
struct scalar_fuzzy_default_impl<Scalar, false, false>
|
||||
{
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
template<typename OtherScalar> EIGEN_DEVICE_FUNC
|
||||
template<typename OtherScalar>
|
||||
static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, const RealScalar& prec)
|
||||
{
|
||||
using std::abs;
|
||||
return abs(x) <= abs(y) * prec;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec)
|
||||
{
|
||||
EIGEN_USING_STD_MATH(min);
|
||||
using std::min;
|
||||
using std::abs;
|
||||
return abs(x - y) <= (min)(abs(x), abs(y)) * prec;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar& prec)
|
||||
{
|
||||
return x <= y || isApprox(x, y, prec);
|
||||
@@ -735,17 +671,15 @@ template<typename Scalar>
|
||||
struct scalar_fuzzy_default_impl<Scalar, false, true>
|
||||
{
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
template<typename OtherScalar> EIGEN_DEVICE_FUNC
|
||||
template<typename OtherScalar>
|
||||
static inline bool isMuchSmallerThan(const Scalar& x, const Scalar&, const RealScalar&)
|
||||
{
|
||||
return x == Scalar(0);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar&)
|
||||
{
|
||||
return x == y;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar&)
|
||||
{
|
||||
return x <= y;
|
||||
@@ -763,7 +697,7 @@ struct scalar_fuzzy_default_impl<Scalar, true, false>
|
||||
}
|
||||
static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec)
|
||||
{
|
||||
EIGEN_USING_STD_MATH(min);
|
||||
using std::min;
|
||||
return numext::abs2(x - y) <= (min)(numext::abs2(x), numext::abs2(y)) * prec * prec;
|
||||
}
|
||||
};
|
||||
@@ -771,21 +705,21 @@ struct scalar_fuzzy_default_impl<Scalar, true, false>
|
||||
template<typename Scalar>
|
||||
struct scalar_fuzzy_impl : scalar_fuzzy_default_impl<Scalar, NumTraits<Scalar>::IsComplex, NumTraits<Scalar>::IsInteger> {};
|
||||
|
||||
template<typename Scalar, typename OtherScalar> EIGEN_DEVICE_FUNC
|
||||
template<typename Scalar, typename OtherScalar>
|
||||
inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y,
|
||||
typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision())
|
||||
{
|
||||
return scalar_fuzzy_impl<Scalar>::template isMuchSmallerThan<OtherScalar>(x, y, precision);
|
||||
}
|
||||
|
||||
template<typename Scalar> EIGEN_DEVICE_FUNC
|
||||
template<typename Scalar>
|
||||
inline bool isApprox(const Scalar& x, const Scalar& y,
|
||||
typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision())
|
||||
{
|
||||
return scalar_fuzzy_impl<Scalar>::isApprox(x, y, precision);
|
||||
}
|
||||
|
||||
template<typename Scalar> EIGEN_DEVICE_FUNC
|
||||
template<typename Scalar>
|
||||
inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y,
|
||||
typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision())
|
||||
{
|
||||
@@ -808,19 +742,17 @@ template<> struct scalar_fuzzy_impl<bool>
|
||||
{
|
||||
typedef bool RealScalar;
|
||||
|
||||
template<typename OtherScalar> EIGEN_DEVICE_FUNC
|
||||
template<typename OtherScalar>
|
||||
static inline bool isMuchSmallerThan(const bool& x, const bool&, const bool&)
|
||||
{
|
||||
return !x;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline bool isApprox(bool x, bool y, bool)
|
||||
{
|
||||
return x == y;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline bool isApproxOrLessThan(const bool& x, const bool& y, const bool&)
|
||||
{
|
||||
return (!x) || y;
|
||||
|
||||
@@ -151,7 +151,6 @@ class Matrix
|
||||
*
|
||||
* \callgraph
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Matrix& operator=(const Matrix& other)
|
||||
{
|
||||
return Base::_set(other);
|
||||
@@ -168,7 +167,6 @@ class Matrix
|
||||
* remain row-vectors and vectors remain vectors.
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Matrix& operator=(const MatrixBase<OtherDerived>& other)
|
||||
{
|
||||
return Base::_set(other);
|
||||
@@ -181,14 +179,12 @@ class Matrix
|
||||
* \copydetails DenseBase::operator=(const EigenBase<OtherDerived> &other)
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Matrix& operator=(const EigenBase<OtherDerived> &other)
|
||||
{
|
||||
return Base::operator=(other);
|
||||
}
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Matrix& operator=(const ReturnByValue<OtherDerived>& func)
|
||||
{
|
||||
return Base::operator=(func);
|
||||
@@ -204,7 +200,6 @@ class Matrix
|
||||
*
|
||||
* \sa resize(Index,Index)
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Matrix() : Base()
|
||||
{
|
||||
Base::_check_template_params();
|
||||
@@ -212,7 +207,6 @@ class Matrix
|
||||
}
|
||||
|
||||
// FIXME is it still needed
|
||||
EIGEN_DEVICE_FUNC
|
||||
Matrix(internal::constructor_without_unaligned_array_assert)
|
||||
: Base(internal::constructor_without_unaligned_array_assert())
|
||||
{ Base::_check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED }
|
||||
@@ -232,65 +226,41 @@ class Matrix
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
|
||||
// This constructor is for both 1x1 matrices and dynamic vectors
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE explicit Matrix(const T& x)
|
||||
/** \brief Constructs a vector or row-vector with given dimension. \only_for_vectors
|
||||
*
|
||||
* Note that this is only useful for dynamic-size vectors. For fixed-size vectors,
|
||||
* it is redundant to pass the dimension here, so it makes more sense to use the default
|
||||
* constructor Matrix() instead.
|
||||
*/
|
||||
EIGEN_STRONG_INLINE explicit Matrix(Index dim)
|
||||
: Base(dim, RowsAtCompileTime == 1 ? 1 : dim, ColsAtCompileTime == 1 ? 1 : dim)
|
||||
{
|
||||
Base::_check_template_params();
|
||||
Base::template _init1<T>(x);
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Matrix)
|
||||
eigen_assert(dim >= 0);
|
||||
eigen_assert(SizeAtCompileTime == Dynamic || SizeAtCompileTime == dim);
|
||||
EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
|
||||
}
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename T0, typename T1>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Matrix(const T0& x, const T1& y)
|
||||
{
|
||||
Base::_check_template_params();
|
||||
Base::template _init2<T0,T1>(x, y);
|
||||
}
|
||||
#else
|
||||
/** \brief Constructs a fixed-sized matrix initialized with coefficients starting at \a data */
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit Matrix(const Scalar *data);
|
||||
|
||||
/** \brief Constructs a vector or row-vector with given dimension. \only_for_vectors
|
||||
*
|
||||
* This is useful for dynamic-size vectors. For fixed-size vectors,
|
||||
* it is redundant to pass these parameters, so one should use the default constructor
|
||||
* Matrix() instead.
|
||||
*
|
||||
* \warning This constructor is disabled for fixed-size \c 1x1 matrices. For instance,
|
||||
* calling Matrix<double,1,1>(1) will call the initialization constructor: Matrix(const Scalar&).
|
||||
* For fixed-size \c 1x1 matrices it is thefore recommended to use the default
|
||||
* constructor Matrix() instead, especilly when using one of the non standard
|
||||
* \c EIGEN_INITIALIZE_MATRICES_BY_{ZERO,\c NAN} macros (see \ref TopicPreprocessorDirectives).
|
||||
*/
|
||||
EIGEN_STRONG_INLINE explicit Matrix(Index dim);
|
||||
/** \brief Constructs an initialized 1x1 matrix with the given coefficient */
|
||||
Matrix(const Scalar& x);
|
||||
/** \brief Constructs an uninitialized matrix with \a rows rows and \a cols columns.
|
||||
*
|
||||
* This is useful for dynamic-size matrices. For fixed-size matrices,
|
||||
* it is redundant to pass these parameters, so one should use the default constructor
|
||||
* Matrix() instead.
|
||||
*
|
||||
* \warning This constructor is disabled for fixed-size \c 1x2 and \c 2x1 vectors. For instance,
|
||||
* calling Matrix2f(2,1) will call the initialization constructor: Matrix(const Scalar& x, const Scalar& y).
|
||||
* For fixed-size \c 1x2 or \c 2x1 vectors it is thefore recommended to use the default
|
||||
* constructor Matrix() instead, especilly when using one of the non standard
|
||||
* \c EIGEN_INITIALIZE_MATRICES_BY_{ZERO,\c NAN} macros (see \ref TopicPreprocessorDirectives).
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
* Matrix() instead. */
|
||||
Matrix(Index rows, Index cols);
|
||||
|
||||
/** \brief Constructs an initialized 2D vector with given coefficients */
|
||||
Matrix(const Scalar& x, const Scalar& y);
|
||||
#endif
|
||||
|
||||
/** \brief Constructs an initialized 3D vector with given coefficients */
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z)
|
||||
{
|
||||
Base::_check_template_params();
|
||||
@@ -300,7 +270,6 @@ class Matrix
|
||||
m_storage.data()[2] = z;
|
||||
}
|
||||
/** \brief Constructs an initialized 4D vector with given coefficients */
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z, const Scalar& w)
|
||||
{
|
||||
Base::_check_template_params();
|
||||
@@ -311,10 +280,10 @@ class Matrix
|
||||
m_storage.data()[3] = w;
|
||||
}
|
||||
|
||||
explicit Matrix(const Scalar *data);
|
||||
|
||||
/** \brief Constructor copying the value of the expression \a other */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Matrix(const MatrixBase<OtherDerived>& other)
|
||||
: Base(other.rows() * other.cols(), other.rows(), other.cols())
|
||||
{
|
||||
@@ -327,7 +296,6 @@ class Matrix
|
||||
Base::_set_noalias(other);
|
||||
}
|
||||
/** \brief Copy constructor */
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Matrix(const Matrix& other)
|
||||
: Base(other.rows() * other.cols(), other.rows(), other.cols())
|
||||
{
|
||||
@@ -336,7 +304,6 @@ class Matrix
|
||||
}
|
||||
/** \brief Copy constructor with in-place evaluation */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Matrix(const ReturnByValue<OtherDerived>& other)
|
||||
{
|
||||
Base::_check_template_params();
|
||||
@@ -348,7 +315,6 @@ class Matrix
|
||||
* \sa MatrixBase::operator=(const EigenBase<OtherDerived>&)
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Matrix(const EigenBase<OtherDerived> &other)
|
||||
: Base(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols())
|
||||
{
|
||||
@@ -364,22 +330,26 @@ class Matrix
|
||||
* of same type it is enough to swap the data pointers.
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void swap(MatrixBase<OtherDerived> const & other)
|
||||
{ this->_swap(other.derived()); }
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; }
|
||||
EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); }
|
||||
inline Index innerStride() const { return 1; }
|
||||
inline Index outerStride() const { return this->innerSize(); }
|
||||
|
||||
/////////// Geometry module ///////////
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit Matrix(const RotationBase<OtherDerived,ColsAtCompileTime>& r);
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Matrix& operator=(const RotationBase<OtherDerived,ColsAtCompileTime>& r);
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
template<typename OtherDerived>
|
||||
explicit Matrix(const eigen2_RotationBase<OtherDerived,ColsAtCompileTime>& r);
|
||||
template<typename OtherDerived>
|
||||
Matrix& operator=(const eigen2_RotationBase<OtherDerived,ColsAtCompileTime>& r);
|
||||
#endif
|
||||
|
||||
// allow to extend Matrix outside Eigen
|
||||
#ifdef EIGEN_MATRIX_PLUGIN
|
||||
#include EIGEN_MATRIX_PLUGIN
|
||||
|
||||
@@ -81,7 +81,6 @@ template<typename Derived> class MatrixBase
|
||||
using Base::operator-=;
|
||||
using Base::operator*=;
|
||||
using Base::operator/=;
|
||||
using Base::operator*;
|
||||
|
||||
typedef typename Base::CoeffReturnType CoeffReturnType;
|
||||
typedef typename Base::ConstTransposeReturnType ConstTransposeReturnType;
|
||||
@@ -99,7 +98,6 @@ template<typename Derived> class MatrixBase
|
||||
|
||||
/** \returns the size of the main diagonal, which is min(rows(),cols()).
|
||||
* \sa rows(), cols(), SizeAtCompileTime. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index diagonalSize() const { return (std::min)(rows(),cols()); }
|
||||
|
||||
/** \brief The plain matrix type corresponding to this expression.
|
||||
@@ -147,59 +145,36 @@ template<typename Derived> class MatrixBase
|
||||
/** Special case of the template operator=, in order to prevent the compiler
|
||||
* from generating a default operator= (issue hit with g++ 4.1)
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator=(const MatrixBase& other);
|
||||
|
||||
// We cannot inherit here via Base::operator= since it is causing
|
||||
// trouble with MSVC.
|
||||
|
||||
template <typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator=(const DenseBase<OtherDerived>& other);
|
||||
|
||||
template <typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator=(const EigenBase<OtherDerived>& other);
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator=(const ReturnByValue<OtherDerived>& other);
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename ProductDerived, typename Lhs, typename Rhs>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& lazyAssign(const ProductBase<ProductDerived, Lhs,Rhs>& other);
|
||||
#endif // not EIGEN_PARSED_BY_DOXYGEN
|
||||
|
||||
template<typename MatrixPower, typename Lhs, typename Rhs>
|
||||
Derived& lazyAssign(const MatrixPowerProduct<MatrixPower, Lhs,Rhs>& other);
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator+=(const MatrixBase<OtherDerived>& other);
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator-=(const MatrixBase<OtherDerived>& other);
|
||||
|
||||
#ifdef __CUDACC__
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename LazyProductReturnType<Derived,OtherDerived>::Type
|
||||
operator*(const MatrixBase<OtherDerived> &other) const
|
||||
{ return this->lazyProduct(other); }
|
||||
#else
|
||||
|
||||
#ifdef EIGEN_TEST_EVALUATORS
|
||||
template<typename OtherDerived>
|
||||
const Product<Derived,OtherDerived>
|
||||
operator*(const MatrixBase<OtherDerived> &other) const;
|
||||
#else
|
||||
template<typename OtherDerived>
|
||||
const typename ProductReturnType<Derived,OtherDerived>::Type
|
||||
operator*(const MatrixBase<OtherDerived> &other) const;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename LazyProductReturnType<Derived,OtherDerived>::Type
|
||||
lazyProduct(const MatrixBase<OtherDerived> &other) const;
|
||||
|
||||
@@ -213,96 +188,84 @@ template<typename Derived> class MatrixBase
|
||||
void applyOnTheRight(const EigenBase<OtherDerived>& other);
|
||||
|
||||
template<typename DiagonalDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
const DiagonalProduct<Derived, DiagonalDerived, OnTheRight>
|
||||
operator*(const DiagonalBase<DiagonalDerived> &diagonal) const;
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename internal::scalar_product_traits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
|
||||
dot(const MatrixBase<OtherDerived>& other) const;
|
||||
|
||||
EIGEN_DEVICE_FUNC RealScalar squaredNorm() const;
|
||||
EIGEN_DEVICE_FUNC RealScalar norm() const;
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
template<typename OtherDerived>
|
||||
Scalar eigen2_dot(const MatrixBase<OtherDerived>& other) const;
|
||||
#endif
|
||||
|
||||
RealScalar squaredNorm() const;
|
||||
RealScalar norm() const;
|
||||
RealScalar stableNorm() const;
|
||||
RealScalar blueNorm() const;
|
||||
RealScalar hypotNorm() const;
|
||||
EIGEN_DEVICE_FUNC const PlainObject normalized() const;
|
||||
EIGEN_DEVICE_FUNC void normalize();
|
||||
const PlainObject normalized() const;
|
||||
void normalize();
|
||||
|
||||
EIGEN_DEVICE_FUNC const AdjointReturnType adjoint() const;
|
||||
EIGEN_DEVICE_FUNC void adjointInPlace();
|
||||
const AdjointReturnType adjoint() const;
|
||||
void adjointInPlace();
|
||||
|
||||
typedef Diagonal<Derived> DiagonalReturnType;
|
||||
EIGEN_DEVICE_FUNC
|
||||
DiagonalReturnType diagonal();
|
||||
|
||||
typedef typename internal::add_const<Diagonal<const Derived> >::type ConstDiagonalReturnType;
|
||||
EIGEN_DEVICE_FUNC
|
||||
ConstDiagonalReturnType diagonal() const;
|
||||
|
||||
template<int Index> struct DiagonalIndexReturnType { typedef Diagonal<Derived,Index> Type; };
|
||||
template<int Index> struct ConstDiagonalIndexReturnType { typedef const Diagonal<const Derived,Index> Type; };
|
||||
|
||||
template<int Index>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename DiagonalIndexReturnType<Index>::Type diagonal();
|
||||
|
||||
template<int Index>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename ConstDiagonalIndexReturnType<Index>::Type diagonal() const;
|
||||
|
||||
// Note: The "MatrixBase::" prefixes are added to help MSVC9 to match these declarations with the later implementations.
|
||||
// On the other hand they confuse MSVC8...
|
||||
#if (defined _MSC_VER) && (_MSC_VER >= 1500) // 2008 or later
|
||||
typename MatrixBase::template DiagonalIndexReturnType<DynamicIndex>::Type diagonal(Index index);
|
||||
typename MatrixBase::template ConstDiagonalIndexReturnType<DynamicIndex>::Type diagonal(Index index) const;
|
||||
#else
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename DiagonalIndexReturnType<DynamicIndex>::Type diagonal(Index index);
|
||||
template<int Index> typename DiagonalIndexReturnType<Index>::Type diagonal();
|
||||
template<int Index> typename ConstDiagonalIndexReturnType<Index>::Type diagonal() const;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename ConstDiagonalIndexReturnType<DynamicIndex>::Type diagonal(Index index) const;
|
||||
#endif
|
||||
typedef Diagonal<Derived,DynamicIndex> DiagonalDynamicIndexReturnType;
|
||||
typedef typename internal::add_const<Diagonal<const Derived,DynamicIndex> >::type ConstDiagonalDynamicIndexReturnType;
|
||||
|
||||
DiagonalDynamicIndexReturnType diagonal(Index index);
|
||||
ConstDiagonalDynamicIndexReturnType diagonal(Index index) const;
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
template<unsigned int Mode> typename internal::eigen2_part_return_type<Derived, Mode>::type part();
|
||||
template<unsigned int Mode> const typename internal::eigen2_part_return_type<Derived, Mode>::type part() const;
|
||||
|
||||
// huuuge hack. make Eigen2's matrix.part<Diagonal>() work in eigen3. Problem: Diagonal is now a class template instead
|
||||
// of an integer constant. Solution: overload the part() method template wrt template parameters list.
|
||||
template<template<typename T, int N> class U>
|
||||
const DiagonalWrapper<ConstDiagonalReturnType> part() const
|
||||
{ return diagonal().asDiagonal(); }
|
||||
#endif // EIGEN2_SUPPORT
|
||||
|
||||
template<unsigned int Mode> struct TriangularViewReturnType { typedef TriangularView<Derived, Mode> Type; };
|
||||
template<unsigned int Mode> struct ConstTriangularViewReturnType { typedef const TriangularView<const Derived, Mode> Type; };
|
||||
|
||||
template<unsigned int Mode>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename TriangularViewReturnType<Mode>::Type triangularView();
|
||||
template<unsigned int Mode>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename ConstTriangularViewReturnType<Mode>::Type triangularView() const;
|
||||
template<unsigned int Mode> typename TriangularViewReturnType<Mode>::Type triangularView();
|
||||
template<unsigned int Mode> typename ConstTriangularViewReturnType<Mode>::Type triangularView() const;
|
||||
|
||||
template<unsigned int UpLo> struct SelfAdjointViewReturnType { typedef SelfAdjointView<Derived, UpLo> Type; };
|
||||
template<unsigned int UpLo> struct ConstSelfAdjointViewReturnType { typedef const SelfAdjointView<const Derived, UpLo> Type; };
|
||||
|
||||
template<unsigned int UpLo>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename SelfAdjointViewReturnType<UpLo>::Type selfadjointView();
|
||||
template<unsigned int UpLo>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename ConstSelfAdjointViewReturnType<UpLo>::Type selfadjointView() const;
|
||||
template<unsigned int UpLo> typename SelfAdjointViewReturnType<UpLo>::Type selfadjointView();
|
||||
template<unsigned int UpLo> typename ConstSelfAdjointViewReturnType<UpLo>::Type selfadjointView() const;
|
||||
|
||||
const SparseView<Derived> sparseView(const Scalar& m_reference = Scalar(0),
|
||||
const typename NumTraits<Scalar>::Real& m_epsilon = NumTraits<Scalar>::dummy_precision()) const;
|
||||
EIGEN_DEVICE_FUNC static const IdentityReturnType Identity();
|
||||
EIGEN_DEVICE_FUNC static const IdentityReturnType Identity(Index rows, Index cols);
|
||||
EIGEN_DEVICE_FUNC static const BasisReturnType Unit(Index size, Index i);
|
||||
EIGEN_DEVICE_FUNC static const BasisReturnType Unit(Index i);
|
||||
EIGEN_DEVICE_FUNC static const BasisReturnType UnitX();
|
||||
EIGEN_DEVICE_FUNC static const BasisReturnType UnitY();
|
||||
EIGEN_DEVICE_FUNC static const BasisReturnType UnitZ();
|
||||
EIGEN_DEVICE_FUNC static const BasisReturnType UnitW();
|
||||
static const IdentityReturnType Identity();
|
||||
static const IdentityReturnType Identity(Index rows, Index cols);
|
||||
static const BasisReturnType Unit(Index size, Index i);
|
||||
static const BasisReturnType Unit(Index i);
|
||||
static const BasisReturnType UnitX();
|
||||
static const BasisReturnType UnitY();
|
||||
static const BasisReturnType UnitZ();
|
||||
static const BasisReturnType UnitW();
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const DiagonalWrapper<const Derived> asDiagonal() const;
|
||||
const PermutationWrapper<const Derived> asPermutation() const;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& setIdentity();
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& setIdentity(Index rows, Index cols);
|
||||
|
||||
bool isIdentity(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
|
||||
@@ -341,26 +304,42 @@ template<typename Derived> class MatrixBase
|
||||
|
||||
Scalar trace() const;
|
||||
|
||||
template<int p> EIGEN_DEVICE_FUNC RealScalar lpNorm() const;
|
||||
/////////// Array module ///////////
|
||||
|
||||
EIGEN_DEVICE_FUNC MatrixBase<Derived>& matrix() { return *this; }
|
||||
EIGEN_DEVICE_FUNC const MatrixBase<Derived>& matrix() const { return *this; }
|
||||
template<int p> RealScalar lpNorm() const;
|
||||
|
||||
MatrixBase<Derived>& matrix() { return *this; }
|
||||
const MatrixBase<Derived>& matrix() const { return *this; }
|
||||
|
||||
/** \returns an \link Eigen::ArrayBase Array \endlink expression of this matrix
|
||||
* \sa ArrayBase::matrix() */
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ArrayWrapper<Derived> array() { return derived(); }
|
||||
/** \returns a const \link Eigen::ArrayBase Array \endlink expression of this matrix
|
||||
* \sa ArrayBase::matrix() */
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const ArrayWrapper<const Derived> array() const { return derived(); }
|
||||
ArrayWrapper<Derived> array() { return derived(); }
|
||||
const ArrayWrapper<const Derived> array() const { return derived(); }
|
||||
|
||||
/////////// LU module ///////////
|
||||
|
||||
EIGEN_DEVICE_FUNC const FullPivLU<PlainObject> fullPivLu() const;
|
||||
EIGEN_DEVICE_FUNC const PartialPivLU<PlainObject> partialPivLu() const;
|
||||
const FullPivLU<PlainObject> fullPivLu() const;
|
||||
const PartialPivLU<PlainObject> partialPivLu() const;
|
||||
|
||||
#if EIGEN2_SUPPORT_STAGE < STAGE20_RESOLVE_API_CONFLICTS
|
||||
const LU<PlainObject> lu() const;
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
const LU<PlainObject> eigen2_lu() const;
|
||||
#endif
|
||||
|
||||
#if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS
|
||||
const PartialPivLU<PlainObject> lu() const;
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
template<typename ResultType>
|
||||
void computeInverse(MatrixBase<ResultType> *result) const {
|
||||
*result = this->inverse();
|
||||
}
|
||||
#endif
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const internal::inverse_impl<Derived> inverse() const;
|
||||
template<typename ResultType>
|
||||
void computeInverseAndDetWithCheck(
|
||||
@@ -387,6 +366,10 @@ template<typename Derived> class MatrixBase
|
||||
const HouseholderQR<PlainObject> householderQr() const;
|
||||
const ColPivHouseholderQR<PlainObject> colPivHouseholderQr() const;
|
||||
const FullPivHouseholderQR<PlainObject> fullPivHouseholderQr() const;
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
const QR<PlainObject> qr() const;
|
||||
#endif
|
||||
|
||||
EigenvaluesReturnType eigenvalues() const;
|
||||
RealScalar operatorNorm() const;
|
||||
@@ -395,6 +378,10 @@ template<typename Derived> class MatrixBase
|
||||
|
||||
JacobiSVD<PlainObject> jacobiSvd(unsigned int computationOptions = 0) const;
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
SVD<PlainObject> svd() const;
|
||||
#endif
|
||||
|
||||
/////////// Geometry module ///////////
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
@@ -405,24 +392,20 @@ template<typename Derived> class MatrixBase
|
||||
};
|
||||
#endif // EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename cross_product_return_type<OtherDerived>::type
|
||||
cross(const MatrixBase<OtherDerived>& other) const;
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
PlainObject cross3(const MatrixBase<OtherDerived>& other) const;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
PlainObject unitOrthogonal(void) const;
|
||||
|
||||
Matrix<Scalar,3,1> eulerAngles(Index a0, Index a1, Index a2) const;
|
||||
|
||||
#if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS
|
||||
ScalarMultipleReturnType operator*(const UniformScaling<Scalar>& s) const;
|
||||
// put this as separate enum value to work around possible GCC 4.3 bug (?)
|
||||
enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1?Vertical:Horizontal };
|
||||
typedef Homogeneous<Derived, HomogeneousReturnTypeDirection> HomogeneousReturnType;
|
||||
HomogeneousReturnType homogeneous() const;
|
||||
#endif
|
||||
|
||||
enum {
|
||||
SizeMinusOne = SizeAtCompileTime==Dynamic ? Dynamic : SizeAtCompileTime-1
|
||||
@@ -457,6 +440,15 @@ template<typename Derived> class MatrixBase
|
||||
template<typename OtherScalar>
|
||||
void applyOnTheRight(Index p, Index q, const JacobiRotation<OtherScalar>& j);
|
||||
|
||||
///////// SparseCore module /////////
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_STRONG_INLINE const typename SparseMatrixBase<OtherDerived>::template CwiseProductDenseReturnType<Derived>::Type
|
||||
cwiseProduct(const SparseMatrixBase<OtherDerived> &other) const
|
||||
{
|
||||
return other.cwiseProduct(derived());
|
||||
}
|
||||
|
||||
///////// MatrixFunctions module /////////
|
||||
|
||||
typedef typename internal::stem_function<Scalar>::type StemFunction;
|
||||
@@ -469,15 +461,49 @@ template<typename Derived> class MatrixBase
|
||||
const MatrixSquareRootReturnValue<Derived> sqrt() const;
|
||||
const MatrixLogarithmReturnValue<Derived> log() const;
|
||||
const MatrixPowerReturnValue<Derived> pow(const RealScalar& p) const;
|
||||
const MatrixComplexPowerReturnValue<Derived> pow(const std::complex<RealScalar>& p) const;
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
template<typename ProductDerived, typename Lhs, typename Rhs>
|
||||
Derived& operator+=(const Flagged<ProductBase<ProductDerived, Lhs,Rhs>, 0,
|
||||
EvalBeforeAssigningBit>& other);
|
||||
|
||||
template<typename ProductDerived, typename Lhs, typename Rhs>
|
||||
Derived& operator-=(const Flagged<ProductBase<ProductDerived, Lhs,Rhs>, 0,
|
||||
EvalBeforeAssigningBit>& other);
|
||||
|
||||
/** \deprecated because .lazy() is deprecated
|
||||
* Overloaded for cache friendly product evaluation */
|
||||
template<typename OtherDerived>
|
||||
Derived& lazyAssign(const Flagged<OtherDerived, 0, EvalBeforeAssigningBit>& other)
|
||||
{ return lazyAssign(other._expression()); }
|
||||
|
||||
template<unsigned int Added>
|
||||
const Flagged<Derived, Added, 0> marked() const;
|
||||
const Flagged<Derived, 0, EvalBeforeAssigningBit> lazy() const;
|
||||
|
||||
inline const Cwise<Derived> cwise() const;
|
||||
inline Cwise<Derived> cwise();
|
||||
|
||||
VectorBlock<Derived> start(Index size);
|
||||
const VectorBlock<const Derived> start(Index size) const;
|
||||
VectorBlock<Derived> end(Index size);
|
||||
const VectorBlock<const Derived> end(Index size) const;
|
||||
template<int Size> VectorBlock<Derived,Size> start();
|
||||
template<int Size> const VectorBlock<const Derived,Size> start() const;
|
||||
template<int Size> VectorBlock<Derived,Size> end();
|
||||
template<int Size> const VectorBlock<const Derived,Size> end() const;
|
||||
|
||||
Minor<Derived> minor(Index row, Index col);
|
||||
const Minor<Derived> minor(Index row, Index col) const;
|
||||
#endif
|
||||
|
||||
protected:
|
||||
EIGEN_DEVICE_FUNC MatrixBase() : Base() {}
|
||||
MatrixBase() : Base() {}
|
||||
|
||||
private:
|
||||
EIGEN_DEVICE_FUNC explicit MatrixBase(int);
|
||||
EIGEN_DEVICE_FUNC MatrixBase(int,int);
|
||||
template<typename OtherDerived> EIGEN_DEVICE_FUNC explicit MatrixBase(const MatrixBase<OtherDerived>&);
|
||||
explicit MatrixBase(int);
|
||||
MatrixBase(int,int);
|
||||
template<typename OtherDerived> explicit MatrixBase(const MatrixBase<OtherDerived>&);
|
||||
protected:
|
||||
// mixing arrays and matrices is not legal
|
||||
template<typename OtherDerived> Derived& operator+=(const ArrayBase<OtherDerived>& )
|
||||
|
||||
@@ -37,13 +37,11 @@ class NoAlias
|
||||
/** Behaves like MatrixBase::lazyAssign(other)
|
||||
* \sa MatrixBase::lazyAssign() */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE ExpressionType& operator=(const StorageBase<OtherDerived>& other)
|
||||
{ return internal::assign_selector<ExpressionType,OtherDerived,false>::run(m_expression,other.derived()); }
|
||||
|
||||
/** \sa MatrixBase::operator+= */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE ExpressionType& operator+=(const StorageBase<OtherDerived>& other)
|
||||
{
|
||||
typedef SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, ExpressionType, OtherDerived> SelfAdder;
|
||||
@@ -56,7 +54,6 @@ class NoAlias
|
||||
|
||||
/** \sa MatrixBase::operator-= */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE ExpressionType& operator-=(const StorageBase<OtherDerived>& other)
|
||||
{
|
||||
typedef SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, ExpressionType, OtherDerived> SelfAdder;
|
||||
@@ -69,12 +66,10 @@ class NoAlias
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename ProductDerived, typename Lhs, typename Rhs>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE ExpressionType& operator+=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
|
||||
{ other.derived().addTo(m_expression); return m_expression; }
|
||||
|
||||
template<typename ProductDerived, typename Lhs, typename Rhs>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE ExpressionType& operator-=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
|
||||
{ other.derived().subTo(m_expression); return m_expression; }
|
||||
|
||||
@@ -83,7 +78,6 @@ class NoAlias
|
||||
{ return m_expression.derived() += CoeffBasedProduct<Lhs,Rhs,NestByRefBit>(other.lhs(), other.rhs()); }
|
||||
|
||||
template<typename Lhs, typename Rhs, int NestingFlags>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE ExpressionType& operator-=(const CoeffBasedProduct<Lhs,Rhs,NestingFlags>& other)
|
||||
{ return m_expression.derived() -= CoeffBasedProduct<Lhs,Rhs,NestByRefBit>(other.lhs(), other.rhs()); }
|
||||
|
||||
@@ -92,7 +86,6 @@ class NoAlias
|
||||
{ return m_expression = func; }
|
||||
#endif
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
ExpressionType& expression() const
|
||||
{
|
||||
return m_expression;
|
||||
|
||||
@@ -68,16 +68,7 @@ template<typename T> struct GenericNumTraits
|
||||
>::type NonInteger;
|
||||
typedef T Nested;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline Real epsilon()
|
||||
{
|
||||
#if defined(__CUDA_ARCH__)
|
||||
return internal::device::numeric_limits<T>::epsilon();
|
||||
#else
|
||||
return std::numeric_limits<T>::epsilon();
|
||||
#endif
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline Real epsilon() { return std::numeric_limits<T>::epsilon(); }
|
||||
static inline Real dummy_precision()
|
||||
{
|
||||
// make sure to override this for floating-point types
|
||||
@@ -85,6 +76,13 @@ template<typename T> struct GenericNumTraits
|
||||
}
|
||||
static inline T highest() { return (std::numeric_limits<T>::max)(); }
|
||||
static inline T lowest() { return IsInteger ? (std::numeric_limits<T>::min)() : (-(std::numeric_limits<T>::max)()); }
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
enum {
|
||||
HasFloatingPoint = !IsInteger
|
||||
};
|
||||
typedef NonInteger FloatingPoint;
|
||||
#endif
|
||||
};
|
||||
|
||||
template<typename T> struct NumTraits : GenericNumTraits<T>
|
||||
@@ -93,13 +91,11 @@ template<typename T> struct NumTraits : GenericNumTraits<T>
|
||||
template<> struct NumTraits<float>
|
||||
: GenericNumTraits<float>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline float dummy_precision() { return 1e-5f; }
|
||||
};
|
||||
|
||||
template<> struct NumTraits<double> : GenericNumTraits<double>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline double dummy_precision() { return 1e-12; }
|
||||
};
|
||||
|
||||
|
||||
@@ -66,11 +66,11 @@ class PermutationBase : public EigenBase<Derived>
|
||||
MaxRowsAtCompileTime = Traits::MaxRowsAtCompileTime,
|
||||
MaxColsAtCompileTime = Traits::MaxColsAtCompileTime
|
||||
};
|
||||
typedef typename Traits::StorageIndexType StorageIndexType;
|
||||
typedef typename Traits::Scalar Scalar;
|
||||
typedef typename Traits::Index Index;
|
||||
typedef Matrix<StorageIndexType,RowsAtCompileTime,ColsAtCompileTime,0,MaxRowsAtCompileTime,MaxColsAtCompileTime>
|
||||
typedef Matrix<Scalar,RowsAtCompileTime,ColsAtCompileTime,0,MaxRowsAtCompileTime,MaxColsAtCompileTime>
|
||||
DenseMatrixType;
|
||||
typedef PermutationMatrix<IndicesType::SizeAtCompileTime,IndicesType::MaxSizeAtCompileTime,StorageIndexType>
|
||||
typedef PermutationMatrix<IndicesType::SizeAtCompileTime,IndicesType::MaxSizeAtCompileTime,Index>
|
||||
PlainPermutationType;
|
||||
using Base::derived;
|
||||
#endif
|
||||
@@ -147,7 +147,7 @@ class PermutationBase : public EigenBase<Derived>
|
||||
/** Sets *this to be the identity permutation matrix */
|
||||
void setIdentity()
|
||||
{
|
||||
for(StorageIndexType i = 0; i < size(); ++i)
|
||||
for(Index i = 0; i < size(); ++i)
|
||||
indices().coeffRef(i) = i;
|
||||
}
|
||||
|
||||
@@ -173,8 +173,8 @@ class PermutationBase : public EigenBase<Derived>
|
||||
eigen_assert(i>=0 && j>=0 && i<size() && j<size());
|
||||
for(Index k = 0; k < size(); ++k)
|
||||
{
|
||||
if(indices().coeff(k) == i) indices().coeffRef(k) = StorageIndexType(j);
|
||||
else if(indices().coeff(k) == j) indices().coeffRef(k) = StorageIndexType(i);
|
||||
if(indices().coeff(k) == i) indices().coeffRef(k) = j;
|
||||
else if(indices().coeff(k) == j) indices().coeffRef(k) = i;
|
||||
}
|
||||
return derived();
|
||||
}
|
||||
@@ -250,6 +250,35 @@ class PermutationBase : public EigenBase<Derived>
|
||||
template<typename Other> friend
|
||||
inline PlainPermutationType operator*(const Transpose<PermutationBase<Other> >& other, const PermutationBase& perm)
|
||||
{ return PlainPermutationType(internal::PermPermProduct, other.eval(), perm); }
|
||||
|
||||
/** \returns the determinant of the permutation matrix, which is either 1 or -1 depending on the parity of the permutation.
|
||||
*
|
||||
* This function is O(\c n) procedure allocating a buffer of \c n booleans.
|
||||
*/
|
||||
Index determinant() const
|
||||
{
|
||||
Index res = 1;
|
||||
Index n = size();
|
||||
Matrix<bool,RowsAtCompileTime,1,0,MaxRowsAtCompileTime> mask(n);
|
||||
mask.fill(false);
|
||||
Index r = 0;
|
||||
while(r < n)
|
||||
{
|
||||
// search for the next seed
|
||||
while(r<n && mask[r]) r++;
|
||||
if(r>=n)
|
||||
break;
|
||||
// we got one, let's follow it until we are back to the seed
|
||||
Index k0 = r++;
|
||||
mask.coeffRef(k0) = true;
|
||||
for(Index k=indices().coeff(k0); k!=k0; k=indices().coeff(k))
|
||||
{
|
||||
mask.coeffRef(k) = true;
|
||||
res = -res;
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
@@ -262,7 +291,7 @@ class PermutationBase : public EigenBase<Derived>
|
||||
*
|
||||
* \param SizeAtCompileTime the number of rows/cols, or Dynamic
|
||||
* \param MaxSizeAtCompileTime the maximum number of rows/cols, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it.
|
||||
* \param StorageIndexType the integer type of the indices
|
||||
* \param IndexType the interger type of the indices
|
||||
*
|
||||
* This class represents a permutation matrix, internally stored as a vector of integers.
|
||||
*
|
||||
@@ -270,18 +299,17 @@ class PermutationBase : public EigenBase<Derived>
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndexType>
|
||||
struct traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndexType> >
|
||||
: traits<Matrix<_StorageIndexType,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType>
|
||||
struct traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType> >
|
||||
: traits<Matrix<IndexType,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
|
||||
{
|
||||
typedef Matrix<_StorageIndexType, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType;
|
||||
typedef typename IndicesType::Index Index;
|
||||
typedef _StorageIndexType StorageIndexType;
|
||||
typedef IndexType Index;
|
||||
typedef Matrix<IndexType, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType;
|
||||
};
|
||||
}
|
||||
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndexType>
|
||||
class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndexType> >
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType>
|
||||
class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType> >
|
||||
{
|
||||
typedef PermutationBase<PermutationMatrix> Base;
|
||||
typedef internal::traits<PermutationMatrix> Traits;
|
||||
@@ -289,8 +317,6 @@ class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompile
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
typedef typename Traits::IndicesType IndicesType;
|
||||
typedef typename Traits::StorageIndexType StorageIndexType;
|
||||
typedef typename Traits::Index Index;
|
||||
#endif
|
||||
|
||||
inline PermutationMatrix()
|
||||
@@ -298,7 +324,7 @@ class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompile
|
||||
|
||||
/** Constructs an uninitialized permutation matrix of given size.
|
||||
*/
|
||||
inline PermutationMatrix(Index size) : m_indices(size)
|
||||
inline PermutationMatrix(int size) : m_indices(size)
|
||||
{}
|
||||
|
||||
/** Copy constructor. */
|
||||
@@ -387,19 +413,18 @@ class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompile
|
||||
|
||||
|
||||
namespace internal {
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndexType, int _PacketAccess>
|
||||
struct traits<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndexType>,_PacketAccess> >
|
||||
: traits<Matrix<_StorageIndexType,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType, int _PacketAccess>
|
||||
struct traits<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType>,_PacketAccess> >
|
||||
: traits<Matrix<IndexType,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
|
||||
{
|
||||
typedef Map<const Matrix<_StorageIndexType, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1>, _PacketAccess> IndicesType;
|
||||
typedef typename IndicesType::Index Index;
|
||||
typedef _StorageIndexType StorageIndexType;
|
||||
typedef IndexType Index;
|
||||
typedef Map<const Matrix<IndexType, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1>, _PacketAccess> IndicesType;
|
||||
};
|
||||
}
|
||||
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndexType, int _PacketAccess>
|
||||
class Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndexType>,_PacketAccess>
|
||||
: public PermutationBase<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndexType>,_PacketAccess> >
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType, int _PacketAccess>
|
||||
class Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType>,_PacketAccess>
|
||||
: public PermutationBase<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType>,_PacketAccess> >
|
||||
{
|
||||
typedef PermutationBase<Map> Base;
|
||||
typedef internal::traits<Map> Traits;
|
||||
@@ -407,15 +432,14 @@ class Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageInd
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
typedef typename Traits::IndicesType IndicesType;
|
||||
typedef typename IndicesType::Scalar StorageIndexType;
|
||||
typedef typename IndicesType::Index Index;
|
||||
typedef typename IndicesType::Scalar Index;
|
||||
#endif
|
||||
|
||||
inline Map(const StorageIndexType* indicesPtr)
|
||||
inline Map(const Index* indicesPtr)
|
||||
: m_indices(indicesPtr)
|
||||
{}
|
||||
|
||||
inline Map(const StorageIndexType* indicesPtr, Index size)
|
||||
inline Map(const Index* indicesPtr, Index size)
|
||||
: m_indices(indicesPtr,size)
|
||||
{}
|
||||
|
||||
@@ -471,8 +495,7 @@ struct traits<PermutationWrapper<_IndicesType> >
|
||||
{
|
||||
typedef PermutationStorage StorageKind;
|
||||
typedef typename _IndicesType::Scalar Scalar;
|
||||
typedef typename _IndicesType::Scalar StorageIndexType;
|
||||
typedef typename _IndicesType::Index Index;
|
||||
typedef typename _IndicesType::Scalar Index;
|
||||
typedef _IndicesType IndicesType;
|
||||
enum {
|
||||
RowsAtCompileTime = _IndicesType::SizeAtCompileTime,
|
||||
@@ -561,7 +584,10 @@ struct permut_matrix_product_retval
|
||||
const Index n = Side==OnTheLeft ? rows() : cols();
|
||||
// FIXME we need an is_same for expression that is not sensitive to constness. For instance
|
||||
// is_same_xpr<Block<const Matrix>, Block<Matrix> >::value should be true.
|
||||
if(is_same<MatrixTypeNestedCleaned,Dest>::value && extract_data(dst) == extract_data(m_matrix))
|
||||
if( is_same<MatrixTypeNestedCleaned,Dest>::value
|
||||
&& blas_traits<MatrixTypeNestedCleaned>::HasUsableDirectAccess
|
||||
&& blas_traits<Dest>::HasUsableDirectAccess
|
||||
&& extract_data(dst) == extract_data(m_matrix))
|
||||
{
|
||||
// apply the permutation inplace
|
||||
Matrix<bool,PermutationType::RowsAtCompileTime,1,0,PermutationType::MaxRowsAtCompileTime> mask(m_permutation.size());
|
||||
|
||||
@@ -28,7 +28,6 @@ namespace internal {
|
||||
|
||||
template<int MaxSizeAtCompileTime> struct check_rows_cols_for_overflow {
|
||||
template<typename Index>
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_ALWAYS_INLINE void run(Index, Index)
|
||||
{
|
||||
}
|
||||
@@ -36,7 +35,6 @@ template<int MaxSizeAtCompileTime> struct check_rows_cols_for_overflow {
|
||||
|
||||
template<> struct check_rows_cols_for_overflow<Dynamic> {
|
||||
template<typename Index>
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_ALWAYS_INLINE void run(Index rows, Index cols)
|
||||
{
|
||||
// http://hg.mozilla.org/mozilla-central/file/6c8a909977d3/xpcom/ds/CheckedInt.h#l242
|
||||
@@ -131,17 +129,12 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
enum { NeedsToAlign = SizeAtCompileTime != Dynamic && (internal::traits<Derived>::Flags & AlignedBit) != 0 };
|
||||
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
Base& base() { return *static_cast<Base*>(this); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
const Base& base() const { return *static_cast<const Base*>(this); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index rows() const { return m_storage.rows(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index cols() const { return m_storage.cols(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar& coeff(Index rowId, Index colId) const
|
||||
{
|
||||
if(Flags & RowMajorBit)
|
||||
@@ -150,13 +143,11 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
return m_storage.data()[rowId + colId * m_storage.rows()];
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const
|
||||
{
|
||||
return m_storage.data()[index];
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar& coeffRef(Index rowId, Index colId)
|
||||
{
|
||||
if(Flags & RowMajorBit)
|
||||
@@ -165,13 +156,11 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
return m_storage.data()[rowId + colId * m_storage.rows()];
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar& coeffRef(Index index)
|
||||
{
|
||||
return m_storage.data()[index];
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar& coeffRef(Index rowId, Index colId) const
|
||||
{
|
||||
if(Flags & RowMajorBit)
|
||||
@@ -180,7 +169,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
return m_storage.data()[rowId + colId * m_storage.rows()];
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar& coeffRef(Index index) const
|
||||
{
|
||||
return m_storage.data()[index];
|
||||
@@ -244,7 +232,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
*
|
||||
* \sa resize(Index) for vectors, resize(NoChange_t, Index), resize(Index, NoChange_t)
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void resize(Index nbRows, Index nbCols)
|
||||
{
|
||||
eigen_assert( EIGEN_IMPLIES(RowsAtCompileTime!=Dynamic,nbRows==RowsAtCompileTime)
|
||||
@@ -275,7 +262,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
*
|
||||
* \sa resize(Index,Index), resize(NoChange_t, Index), resize(Index, NoChange_t)
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void resize(Index size)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(PlainObjectBase)
|
||||
@@ -300,7 +286,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
*
|
||||
* \sa resize(Index,Index)
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void resize(NoChange_t, Index nbCols)
|
||||
{
|
||||
resize(rows(), nbCols);
|
||||
@@ -314,7 +299,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
*
|
||||
* \sa resize(Index,Index)
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void resize(Index nbRows, NoChange_t)
|
||||
{
|
||||
resize(nbRows, cols());
|
||||
@@ -328,7 +312,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
* remain row-vectors and vectors remain vectors.
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void resizeLike(const EigenBase<OtherDerived>& _other)
|
||||
{
|
||||
const OtherDerived& other = _other.derived();
|
||||
@@ -356,7 +339,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
* Matrices are resized relative to the top-left element. In case values need to be
|
||||
* appended to the matrix they will be uninitialized.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void conservativeResize(Index nbRows, Index nbCols)
|
||||
{
|
||||
internal::conservative_resize_like_impl<Derived>::run(*this, nbRows, nbCols);
|
||||
@@ -369,7 +351,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
*
|
||||
* In case the matrix is growing, new rows will be uninitialized.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void conservativeResize(Index nbRows, NoChange_t)
|
||||
{
|
||||
// Note: see the comment in conservativeResize(Index,Index)
|
||||
@@ -383,7 +364,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
*
|
||||
* In case the matrix is growing, new columns will be uninitialized.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void conservativeResize(NoChange_t, Index nbCols)
|
||||
{
|
||||
// Note: see the comment in conservativeResize(Index,Index)
|
||||
@@ -398,7 +378,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
*
|
||||
* When values are appended, they will be uninitialized.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void conservativeResize(Index size)
|
||||
{
|
||||
internal::conservative_resize_like_impl<Derived>::run(*this, size);
|
||||
@@ -414,7 +393,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
* appended to the matrix they will copied from \c other.
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void conservativeResizeLike(const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
internal::conservative_resize_like_impl<Derived,OtherDerived>::run(*this, other);
|
||||
@@ -423,7 +401,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
/** This is a special case of the templated operator=. Its purpose is to
|
||||
* prevent a default operator= from hiding the templated operator=.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Derived& operator=(const PlainObjectBase& other)
|
||||
{
|
||||
return _set(other);
|
||||
@@ -431,7 +408,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
|
||||
/** \sa MatrixBase::lazyAssign() */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Derived& lazyAssign(const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
_resize_to_match(other);
|
||||
@@ -439,14 +415,12 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
}
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Derived& operator=(const ReturnByValue<OtherDerived>& func)
|
||||
{
|
||||
resize(func.rows(), func.cols());
|
||||
return Base::operator=(func);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE PlainObjectBase() : m_storage()
|
||||
{
|
||||
// _check_template_params();
|
||||
@@ -456,7 +430,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
// FIXME is it still needed ?
|
||||
/** \internal */
|
||||
EIGEN_DEVICE_FUNC
|
||||
PlainObjectBase(internal::constructor_without_unaligned_array_assert)
|
||||
: m_storage(internal::constructor_without_unaligned_array_assert())
|
||||
{
|
||||
@@ -465,13 +438,11 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN_HAVE_RVALUE_REFERENCES
|
||||
EIGEN_DEVICE_FUNC
|
||||
PlainObjectBase(PlainObjectBase&& other)
|
||||
: m_storage( std::move(other.m_storage) )
|
||||
{
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
PlainObjectBase& operator=(PlainObjectBase&& other)
|
||||
{
|
||||
using std::swap;
|
||||
@@ -480,7 +451,22 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
}
|
||||
#endif
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
/** Copy constructor */
|
||||
EIGEN_STRONG_INLINE PlainObjectBase(const PlainObjectBase& other)
|
||||
: m_storage()
|
||||
{
|
||||
_check_template_params();
|
||||
lazyAssign(other);
|
||||
}
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_STRONG_INLINE PlainObjectBase(const DenseBase<OtherDerived> &other)
|
||||
: m_storage()
|
||||
{
|
||||
_check_template_params();
|
||||
lazyAssign(other);
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE PlainObjectBase(Index a_size, Index nbRows, Index nbCols)
|
||||
: m_storage(a_size, nbRows, nbCols)
|
||||
{
|
||||
@@ -491,7 +477,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
/** \copydoc MatrixBase::operator=(const EigenBase<OtherDerived>&)
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Derived& operator=(const EigenBase<OtherDerived> &other)
|
||||
{
|
||||
_resize_to_match(other);
|
||||
@@ -501,7 +486,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
|
||||
/** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE PlainObjectBase(const EigenBase<OtherDerived> &other)
|
||||
: m_storage(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols())
|
||||
{
|
||||
@@ -584,16 +568,16 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
//@}
|
||||
|
||||
using Base::setConstant;
|
||||
EIGEN_DEVICE_FUNC Derived& setConstant(Index size, const Scalar& value);
|
||||
EIGEN_DEVICE_FUNC Derived& setConstant(Index rows, Index cols, const Scalar& value);
|
||||
Derived& setConstant(Index size, const Scalar& value);
|
||||
Derived& setConstant(Index rows, Index cols, const Scalar& value);
|
||||
|
||||
using Base::setZero;
|
||||
EIGEN_DEVICE_FUNC Derived& setZero(Index size);
|
||||
EIGEN_DEVICE_FUNC Derived& setZero(Index rows, Index cols);
|
||||
Derived& setZero(Index size);
|
||||
Derived& setZero(Index rows, Index cols);
|
||||
|
||||
using Base::setOnes;
|
||||
EIGEN_DEVICE_FUNC Derived& setOnes(Index size);
|
||||
EIGEN_DEVICE_FUNC Derived& setOnes(Index rows, Index cols);
|
||||
Derived& setOnes(Index size);
|
||||
Derived& setOnes(Index rows, Index cols);
|
||||
|
||||
using Base::setRandom;
|
||||
Derived& setRandom(Index size);
|
||||
@@ -612,7 +596,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
* remain row-vectors and vectors remain vectors.
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void _resize_to_match(const EigenBase<OtherDerived>& other)
|
||||
{
|
||||
#ifdef EIGEN_NO_AUTOMATIC_RESIZING
|
||||
@@ -620,6 +603,8 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
: (rows() == other.rows() && cols() == other.cols())))
|
||||
&& "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined");
|
||||
EIGEN_ONLY_USED_FOR_DEBUG(other);
|
||||
if(this->size()==0)
|
||||
resizeLike(other);
|
||||
#else
|
||||
resizeLike(other);
|
||||
#endif
|
||||
@@ -640,7 +625,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
* \internal
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Derived& _set(const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
_set_selector(other.derived(), typename internal::conditional<static_cast<bool>(int(OtherDerived::Flags) & EvalBeforeAssigningBit), internal::true_type, internal::false_type>::type());
|
||||
@@ -648,11 +632,9 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
}
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void _set_selector(const OtherDerived& other, const internal::true_type&) { _set_noalias(other.eval()); }
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void _set_selector(const OtherDerived& other, const internal::false_type&) { _set_noalias(other); }
|
||||
|
||||
/** \internal Like _set() but additionally makes the assumption that no aliasing effect can happen (which
|
||||
@@ -661,7 +643,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
* \sa operator=(const MatrixBase<OtherDerived>&), _set()
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Derived& _set_noalias(const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
// I don't think we need this resize call since the lazyAssign will anyways resize
|
||||
@@ -673,7 +654,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
}
|
||||
|
||||
template<typename T0, typename T1>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void _init2(Index nbRows, Index nbCols, typename internal::enable_if<Base::SizeAtCompileTime!=2,T0>::type* = 0)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(bool(NumTraits<T0>::IsInteger) &&
|
||||
@@ -681,91 +661,13 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED)
|
||||
resize(nbRows,nbCols);
|
||||
}
|
||||
|
||||
template<typename T0, typename T1>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void _init2(const Scalar& val0, const Scalar& val1, typename internal::enable_if<Base::SizeAtCompileTime==2,T0>::type* = 0)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2)
|
||||
m_storage.data()[0] = val0;
|
||||
m_storage.data()[1] = val1;
|
||||
}
|
||||
|
||||
template<typename T0, typename T1>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void _init2(const Index& val0, const Index& val1,
|
||||
typename internal::enable_if< (!internal::is_same<Index,Scalar>::value)
|
||||
&& (internal::is_same<T0,Index>::value)
|
||||
&& (internal::is_same<T1,Index>::value)
|
||||
&& Base::SizeAtCompileTime==2,T1>::type* = 0)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2)
|
||||
m_storage.data()[0] = Scalar(val0);
|
||||
m_storage.data()[1] = Scalar(val1);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void _init1(Index size, typename internal::enable_if<Base::SizeAtCompileTime!=1 || !internal::is_convertible<T, Scalar>::value,T>::type* = 0)
|
||||
{
|
||||
// NOTE MSVC 2008 complains if we directly put bool(NumTraits<T>::IsInteger) as the EIGEN_STATIC_ASSERT argument.
|
||||
const bool is_integer = NumTraits<T>::IsInteger;
|
||||
EIGEN_STATIC_ASSERT(is_integer,
|
||||
FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED)
|
||||
resize(size);
|
||||
}
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void _init1(const Scalar& val0, typename internal::enable_if<Base::SizeAtCompileTime==1 && internal::is_convertible<T, Scalar>::value,T>::type* = 0)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 1)
|
||||
m_storage.data()[0] = val0;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void _init1(const Index& val0,
|
||||
typename internal::enable_if< (!internal::is_same<Index,Scalar>::value)
|
||||
&& (internal::is_same<Index,T>::value)
|
||||
&& Base::SizeAtCompileTime==1
|
||||
&& internal::is_convertible<T, Scalar>::value,T*>::type* = 0)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 1)
|
||||
m_storage.data()[0] = Scalar(val0);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void _init1(const Scalar* data){
|
||||
this->_set_noalias(ConstMapType(data));
|
||||
}
|
||||
|
||||
template<typename T, typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void _init1(const DenseBase<OtherDerived>& other){
|
||||
this->_set_noalias(other);
|
||||
}
|
||||
|
||||
template<typename T, typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void _init1(const EigenBase<OtherDerived>& other){
|
||||
this->derived() = other;
|
||||
}
|
||||
|
||||
template<typename T, typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void _init1(const ReturnByValue<OtherDerived>& other)
|
||||
{
|
||||
resize(other.rows(), other.cols());
|
||||
other.evalTo(this->derived());
|
||||
}
|
||||
|
||||
template<typename T, typename OtherDerived, int ColsAtCompileTime>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void _init1(const RotationBase<OtherDerived,ColsAtCompileTime>& r)
|
||||
{
|
||||
this->derived() = r;
|
||||
}
|
||||
|
||||
template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers>
|
||||
friend struct internal::matrix_swap_impl;
|
||||
@@ -774,7 +676,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
* data pointers.
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void _swap(DenseBase<OtherDerived> const & other)
|
||||
{
|
||||
enum { SwapPointers = internal::is_same<Derived, OtherDerived>::value && Base::SizeAtCompileTime==Dynamic };
|
||||
@@ -783,7 +684,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
|
||||
public:
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE void _check_template_params()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, (Options&RowMajor)==RowMajor)
|
||||
@@ -900,7 +800,6 @@ struct conservative_resize_like_impl<Derived,OtherDerived,true>
|
||||
template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers>
|
||||
struct matrix_swap_impl
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(MatrixTypeA& a, MatrixTypeB& b)
|
||||
{
|
||||
a.base().swap(b);
|
||||
@@ -910,7 +809,6 @@ struct matrix_swap_impl
|
||||
template<typename MatrixTypeA, typename MatrixTypeB>
|
||||
struct matrix_swap_impl<MatrixTypeA, MatrixTypeB, true>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(MatrixTypeA& a, MatrixTypeB& b)
|
||||
{
|
||||
static_cast<typename MatrixTypeA::Base&>(a).m_storage.swap(static_cast<typename MatrixTypeB::Base&>(b).m_storage);
|
||||
|
||||
@@ -1,107 +0,0 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_PRODUCT_H
|
||||
#define EIGEN_PRODUCT_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
template<typename Lhs, typename Rhs> class Product;
|
||||
template<typename Lhs, typename Rhs, typename StorageKind> class ProductImpl;
|
||||
|
||||
/** \class Product
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of the product of two arbitrary matrices or vectors
|
||||
*
|
||||
* \param Lhs the type of the left-hand side expression
|
||||
* \param Rhs the type of the right-hand side expression
|
||||
*
|
||||
* This class represents an expression of the product of two arbitrary matrices.
|
||||
*
|
||||
*/
|
||||
|
||||
// Use ProductReturnType to get correct traits, in particular vectorization flags
|
||||
namespace internal {
|
||||
template<typename Lhs, typename Rhs>
|
||||
struct traits<Product<Lhs, Rhs> >
|
||||
: traits<typename ProductReturnType<Lhs, Rhs>::Type>
|
||||
{
|
||||
// We want A+B*C to be of type Product<Matrix, Sum> and not Product<Matrix, Matrix>
|
||||
// TODO: This flag should eventually go in a separate evaluator traits class
|
||||
enum {
|
||||
Flags = traits<typename ProductReturnType<Lhs, Rhs>::Type>::Flags & ~(EvalBeforeNestingBit | DirectAccessBit)
|
||||
};
|
||||
};
|
||||
} // end namespace internal
|
||||
|
||||
|
||||
template<typename Lhs, typename Rhs>
|
||||
class Product : public ProductImpl<Lhs,Rhs,typename internal::promote_storage_type<typename internal::traits<Lhs>::StorageKind,
|
||||
typename internal::traits<Rhs>::StorageKind>::ret>
|
||||
{
|
||||
public:
|
||||
|
||||
typedef typename ProductImpl<
|
||||
Lhs, Rhs,
|
||||
typename internal::promote_storage_type<typename Lhs::StorageKind,
|
||||
typename Rhs::StorageKind>::ret>::Base Base;
|
||||
EIGEN_GENERIC_PUBLIC_INTERFACE(Product)
|
||||
|
||||
typedef typename Lhs::Nested LhsNested;
|
||||
typedef typename Rhs::Nested RhsNested;
|
||||
typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
|
||||
typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
|
||||
|
||||
Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs)
|
||||
{
|
||||
eigen_assert(lhs.cols() == rhs.rows()
|
||||
&& "invalid matrix product"
|
||||
&& "if you wanted a coeff-wise or a dot product use the respective explicit functions");
|
||||
}
|
||||
|
||||
inline Index rows() const { return m_lhs.rows(); }
|
||||
inline Index cols() const { return m_rhs.cols(); }
|
||||
|
||||
const LhsNestedCleaned& lhs() const { return m_lhs; }
|
||||
const RhsNestedCleaned& rhs() const { return m_rhs; }
|
||||
|
||||
protected:
|
||||
|
||||
LhsNested m_lhs;
|
||||
RhsNested m_rhs;
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs>
|
||||
class ProductImpl<Lhs,Rhs,Dense> : public internal::dense_xpr_base<Product<Lhs,Rhs> >::type
|
||||
{
|
||||
typedef Product<Lhs, Rhs> Derived;
|
||||
public:
|
||||
|
||||
typedef typename internal::dense_xpr_base<Product<Lhs, Rhs> >::type Base;
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
|
||||
};
|
||||
|
||||
/***************************************************************************
|
||||
* Implementation of matrix base methods
|
||||
***************************************************************************/
|
||||
|
||||
|
||||
/** \internal used to test the evaluator only
|
||||
*/
|
||||
template<typename Lhs,typename Rhs>
|
||||
const Product<Lhs,Rhs>
|
||||
prod(const Lhs& lhs, const Rhs& rhs)
|
||||
{
|
||||
return Product<Lhs,Rhs>(lhs,rhs);
|
||||
}
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_PRODUCT_H
|
||||
@@ -85,7 +85,14 @@ class ProductBase : public MatrixBase<Derived>
|
||||
|
||||
public:
|
||||
|
||||
#ifndef EIGEN_NO_MALLOC
|
||||
typedef typename Base::PlainObject BasePlainObject;
|
||||
typedef Matrix<Scalar,RowsAtCompileTime==1?1:Dynamic,ColsAtCompileTime==1?1:Dynamic,BasePlainObject::Options> DynPlainObject;
|
||||
typedef typename internal::conditional<(BasePlainObject::SizeAtCompileTime==Dynamic) || (BasePlainObject::SizeAtCompileTime*int(sizeof(Scalar)) < int(EIGEN_STACK_ALLOCATION_LIMIT)),
|
||||
BasePlainObject, DynPlainObject>::type PlainObject;
|
||||
#else
|
||||
typedef typename Base::PlainObject PlainObject;
|
||||
#endif
|
||||
|
||||
ProductBase(const Lhs& a_lhs, const Rhs& a_rhs)
|
||||
: m_lhs(a_lhs), m_rhs(a_rhs)
|
||||
@@ -131,13 +138,17 @@ class ProductBase : public MatrixBase<Derived>
|
||||
const Diagonal<FullyLazyCoeffBaseProductType,Dynamic> diagonal(Index index) const
|
||||
{ return FullyLazyCoeffBaseProductType(m_lhs, m_rhs).diagonal(index); }
|
||||
|
||||
// restrict coeff accessors to 1x1 expressions. No need to care about mutators here since this isn't an Lvalue expression
|
||||
// restrict coeff accessors to 1x1 expressions. No need to care about mutators here since this isnt a Lvalue expression
|
||||
typename Base::CoeffReturnType coeff(Index row, Index col) const
|
||||
{
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
return lhs().row(row).cwiseProduct(rhs().col(col).transpose()).sum();
|
||||
#else
|
||||
EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)
|
||||
eigen_assert(this->rows() == 1 && this->cols() == 1);
|
||||
Matrix<Scalar,1,1> result = *this;
|
||||
return result.coeff(row,col);
|
||||
#endif
|
||||
}
|
||||
|
||||
typename Base::CoeffReturnType coeff(Index i) const
|
||||
@@ -176,7 +187,12 @@ namespace internal {
|
||||
template<typename Lhs, typename Rhs, int Mode, int N, typename PlainObject>
|
||||
struct nested<GeneralProduct<Lhs,Rhs,Mode>, N, PlainObject>
|
||||
{
|
||||
typedef PlainObject const& type;
|
||||
typedef typename GeneralProduct<Lhs,Rhs,Mode>::PlainObject const& type;
|
||||
};
|
||||
template<typename Lhs, typename Rhs, int Mode, int N, typename PlainObject>
|
||||
struct nested<const GeneralProduct<Lhs,Rhs,Mode>, N, PlainObject>
|
||||
{
|
||||
typedef typename GeneralProduct<Lhs,Rhs,Mode>::PlainObject const& type;
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -1,411 +0,0 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2011 Jitse Niesen <jitse@maths.leeds.ac.uk>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
|
||||
#ifndef EIGEN_PRODUCTEVALUATORS_H
|
||||
#define EIGEN_PRODUCTEVALUATORS_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
// We can evaluate the product either all at once, like GeneralProduct and its evalTo() function, or
|
||||
// traverse the matrix coefficient by coefficient, like CoeffBasedProduct. Use the existing logic
|
||||
// in ProductReturnType to decide.
|
||||
|
||||
template<typename XprType, typename ProductType>
|
||||
struct product_evaluator_dispatcher;
|
||||
|
||||
template<typename Lhs, typename Rhs>
|
||||
struct evaluator_impl<Product<Lhs, Rhs> >
|
||||
: product_evaluator_dispatcher<Product<Lhs, Rhs>, typename ProductReturnType<Lhs, Rhs>::Type>
|
||||
{
|
||||
typedef Product<Lhs, Rhs> XprType;
|
||||
typedef product_evaluator_dispatcher<XprType, typename ProductReturnType<Lhs, Rhs>::Type> Base;
|
||||
|
||||
evaluator_impl(const XprType& xpr) : Base(xpr)
|
||||
{ }
|
||||
};
|
||||
|
||||
template<typename XprType, typename ProductType>
|
||||
struct product_evaluator_traits_dispatcher;
|
||||
|
||||
template<typename Lhs, typename Rhs>
|
||||
struct evaluator_traits<Product<Lhs, Rhs> >
|
||||
: product_evaluator_traits_dispatcher<Product<Lhs, Rhs>, typename ProductReturnType<Lhs, Rhs>::Type>
|
||||
{
|
||||
static const int AssumeAliasing = 1;
|
||||
};
|
||||
|
||||
// Case 1: Evaluate all at once
|
||||
//
|
||||
// We can view the GeneralProduct class as a part of the product evaluator.
|
||||
// Four sub-cases: InnerProduct, OuterProduct, GemmProduct and GemvProduct.
|
||||
// InnerProduct is special because GeneralProduct does not have an evalTo() method in this case.
|
||||
|
||||
template<typename Lhs, typename Rhs>
|
||||
struct product_evaluator_traits_dispatcher<Product<Lhs, Rhs>, GeneralProduct<Lhs, Rhs, InnerProduct> >
|
||||
{
|
||||
static const int HasEvalTo = 0;
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs>
|
||||
struct product_evaluator_dispatcher<Product<Lhs, Rhs>, GeneralProduct<Lhs, Rhs, InnerProduct> >
|
||||
: public evaluator<typename Product<Lhs, Rhs>::PlainObject>::type
|
||||
{
|
||||
typedef Product<Lhs, Rhs> XprType;
|
||||
typedef typename XprType::PlainObject PlainObject;
|
||||
typedef typename evaluator<PlainObject>::type evaluator_base;
|
||||
|
||||
// TODO: Computation is too early (?)
|
||||
product_evaluator_dispatcher(const XprType& xpr) : evaluator_base(m_result)
|
||||
{
|
||||
m_result.coeffRef(0,0) = (xpr.lhs().transpose().cwiseProduct(xpr.rhs())).sum();
|
||||
}
|
||||
|
||||
protected:
|
||||
PlainObject m_result;
|
||||
};
|
||||
|
||||
// For the other three subcases, simply call the evalTo() method of GeneralProduct
|
||||
// TODO: GeneralProduct should take evaluators, not expression objects.
|
||||
|
||||
template<typename Lhs, typename Rhs, int ProductType>
|
||||
struct product_evaluator_traits_dispatcher<Product<Lhs, Rhs>, GeneralProduct<Lhs, Rhs, ProductType> >
|
||||
{
|
||||
static const int HasEvalTo = 1;
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, int ProductType>
|
||||
struct product_evaluator_dispatcher<Product<Lhs, Rhs>, GeneralProduct<Lhs, Rhs, ProductType> >
|
||||
{
|
||||
typedef Product<Lhs, Rhs> XprType;
|
||||
typedef typename XprType::PlainObject PlainObject;
|
||||
typedef typename evaluator<PlainObject>::type evaluator_base;
|
||||
|
||||
product_evaluator_dispatcher(const XprType& xpr) : m_xpr(xpr)
|
||||
{ }
|
||||
|
||||
template<typename DstEvaluatorType, typename DstXprType>
|
||||
void evalTo(DstEvaluatorType /* not used */, DstXprType& dst) const
|
||||
{
|
||||
dst.resize(m_xpr.rows(), m_xpr.cols());
|
||||
GeneralProduct<Lhs, Rhs, ProductType>(m_xpr.lhs(), m_xpr.rhs()).evalTo(dst);
|
||||
}
|
||||
|
||||
protected:
|
||||
const XprType& m_xpr;
|
||||
};
|
||||
|
||||
// Case 2: Evaluate coeff by coeff
|
||||
//
|
||||
// This is mostly taken from CoeffBasedProduct.h
|
||||
// The main difference is that we add an extra argument to the etor_product_*_impl::run() function
|
||||
// for the inner dimension of the product, because evaluator object do not know their size.
|
||||
|
||||
template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
|
||||
struct etor_product_coeff_impl;
|
||||
|
||||
template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||
struct etor_product_packet_impl;
|
||||
|
||||
template<typename Lhs, typename Rhs, typename LhsNested, typename RhsNested, int Flags>
|
||||
struct product_evaluator_traits_dispatcher<Product<Lhs, Rhs>, CoeffBasedProduct<LhsNested, RhsNested, Flags> >
|
||||
{
|
||||
static const int HasEvalTo = 0;
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename LhsNested, typename RhsNested, int Flags>
|
||||
struct product_evaluator_dispatcher<Product<Lhs, Rhs>, CoeffBasedProduct<LhsNested, RhsNested, Flags> >
|
||||
: evaluator_impl_base<Product<Lhs, Rhs> >
|
||||
{
|
||||
typedef Product<Lhs, Rhs> XprType;
|
||||
typedef CoeffBasedProduct<LhsNested, RhsNested, Flags> CoeffBasedProductType;
|
||||
|
||||
product_evaluator_dispatcher(const XprType& xpr)
|
||||
: m_lhsImpl(xpr.lhs()),
|
||||
m_rhsImpl(xpr.rhs()),
|
||||
m_innerDim(xpr.lhs().cols())
|
||||
{ }
|
||||
|
||||
typedef typename XprType::Index Index;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename XprType::PacketScalar PacketScalar;
|
||||
typedef typename XprType::PacketReturnType PacketReturnType;
|
||||
|
||||
// Everything below here is taken from CoeffBasedProduct.h
|
||||
|
||||
enum {
|
||||
RowsAtCompileTime = traits<CoeffBasedProductType>::RowsAtCompileTime,
|
||||
PacketSize = packet_traits<Scalar>::size,
|
||||
InnerSize = traits<CoeffBasedProductType>::InnerSize,
|
||||
CoeffReadCost = traits<CoeffBasedProductType>::CoeffReadCost,
|
||||
Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
|
||||
CanVectorizeInner = traits<CoeffBasedProductType>::CanVectorizeInner
|
||||
};
|
||||
|
||||
typedef typename evaluator<Lhs>::type LhsEtorType;
|
||||
typedef typename evaluator<Rhs>::type RhsEtorType;
|
||||
typedef etor_product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal,
|
||||
Unroll ? InnerSize-1 : Dynamic,
|
||||
LhsEtorType, RhsEtorType, Scalar> CoeffImpl;
|
||||
|
||||
const CoeffReturnType coeff(Index row, Index col) const
|
||||
{
|
||||
Scalar res;
|
||||
CoeffImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res);
|
||||
return res;
|
||||
}
|
||||
|
||||
/* Allow index-based non-packet access. It is impossible though to allow index-based packed access,
|
||||
* which is why we don't set the LinearAccessBit.
|
||||
*/
|
||||
const CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
Scalar res;
|
||||
const Index row = RowsAtCompileTime == 1 ? 0 : index;
|
||||
const Index col = RowsAtCompileTime == 1 ? index : 0;
|
||||
CoeffImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
const PacketReturnType packet(Index row, Index col) const
|
||||
{
|
||||
PacketScalar res;
|
||||
typedef etor_product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor,
|
||||
Unroll ? InnerSize-1 : Dynamic,
|
||||
LhsEtorType, RhsEtorType, PacketScalar, LoadMode> PacketImpl;
|
||||
PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res);
|
||||
return res;
|
||||
}
|
||||
|
||||
protected:
|
||||
typename evaluator<Lhs>::type m_lhsImpl;
|
||||
typename evaluator<Rhs>::type m_rhsImpl;
|
||||
|
||||
// TODO: Get rid of m_innerDim if known at compile time
|
||||
Index m_innerDim;
|
||||
};
|
||||
|
||||
/***************************************************************************
|
||||
* Normal product .coeff() implementation (with meta-unrolling)
|
||||
***************************************************************************/
|
||||
|
||||
/**************************************
|
||||
*** Scalar path - no vectorization ***
|
||||
**************************************/
|
||||
|
||||
template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
|
||||
struct etor_product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar &res)
|
||||
{
|
||||
etor_product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, innerDim, res);
|
||||
res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename RetScalar>
|
||||
struct etor_product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, RetScalar &res)
|
||||
{
|
||||
res = lhs.coeff(row, 0) * rhs.coeff(0, col);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename RetScalar>
|
||||
struct etor_product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar& res)
|
||||
{
|
||||
eigen_assert(innerDim>0 && "you are using a non initialized matrix");
|
||||
res = lhs.coeff(row, 0) * rhs.coeff(0, col);
|
||||
for(Index i = 1; i < innerDim; ++i)
|
||||
res += lhs.coeff(row, i) * rhs.coeff(i, col);
|
||||
}
|
||||
};
|
||||
|
||||
/*******************************************
|
||||
*** Scalar path with inner vectorization ***
|
||||
*******************************************/
|
||||
|
||||
template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet>
|
||||
struct etor_product_coeff_vectorized_unroller
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, typename Lhs::PacketScalar &pres)
|
||||
{
|
||||
etor_product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, innerDim, pres);
|
||||
pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) ));
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename Packet>
|
||||
struct etor_product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::PacketScalar &pres)
|
||||
{
|
||||
pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col));
|
||||
}
|
||||
};
|
||||
|
||||
template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
|
||||
struct etor_product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
|
||||
{
|
||||
typedef typename Lhs::PacketScalar Packet;
|
||||
typedef typename Lhs::Index Index;
|
||||
enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar &res)
|
||||
{
|
||||
Packet pres;
|
||||
etor_product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, innerDim, pres);
|
||||
etor_product_coeff_impl<DefaultTraversal,UnrollingIndex,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, innerDim, res);
|
||||
res = predux(pres);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int RhsCols = Rhs::ColsAtCompileTime>
|
||||
struct etor_product_coeff_vectorized_dyn_selector
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
|
||||
{
|
||||
res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum();
|
||||
}
|
||||
};
|
||||
|
||||
// NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower
|
||||
// NOTE maybe they are now useless since we have a specialization for Block<Matrix>
|
||||
template<typename Lhs, typename Rhs, int RhsCols>
|
||||
struct etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
|
||||
{
|
||||
res = lhs.transpose().cwiseProduct(rhs.col(col)).sum();
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, int LhsRows>
|
||||
struct etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
|
||||
{
|
||||
res = lhs.row(row).transpose().cwiseProduct(rhs).sum();
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs>
|
||||
struct etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
|
||||
{
|
||||
res = lhs.transpose().cwiseProduct(rhs).sum();
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename RetScalar>
|
||||
struct etor_product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, typename Lhs::Scalar &res)
|
||||
{
|
||||
etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, innerDim, res);
|
||||
}
|
||||
};
|
||||
|
||||
/*******************
|
||||
*** Packet path ***
|
||||
*******************/
|
||||
|
||||
template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||
struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
|
||||
{
|
||||
etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
|
||||
res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
|
||||
}
|
||||
};
|
||||
|
||||
template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||
struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
|
||||
{
|
||||
etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
|
||||
res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||
struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
|
||||
{
|
||||
res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||
struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
|
||||
{
|
||||
res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||
struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
|
||||
{
|
||||
eigen_assert(innerDim>0 && "you are using a non initialized matrix");
|
||||
res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
|
||||
for(Index i = 1; i < innerDim; ++i)
|
||||
res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||
struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
|
||||
{
|
||||
eigen_assert(innerDim>0 && "you are using a non initialized matrix");
|
||||
res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
|
||||
for(Index i = 1; i < innerDim; ++i)
|
||||
res = pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_PRODUCT_EVALUATORS_H
|
||||
@@ -28,18 +28,12 @@ struct functor_traits<scalar_random_op<Scalar> >
|
||||
|
||||
/** \returns a random matrix expression
|
||||
*
|
||||
* Numbers are uniformly spread through their whole definition range for integer types,
|
||||
* and in the [-1:1] range for floating point scalar types.
|
||||
*
|
||||
* The parameters \a rows and \a cols are the number of rows and of columns of
|
||||
* the returned matrix. Must be compatible with this MatrixBase type.
|
||||
*
|
||||
* \not_reentrant
|
||||
*
|
||||
* This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
|
||||
* it is redundant to pass \a rows and \a cols as arguments, so Random() should be used
|
||||
* instead.
|
||||
*
|
||||
*
|
||||
* Example: \include MatrixBase_random_int_int.cpp
|
||||
* Output: \verbinclude MatrixBase_random_int_int.out
|
||||
@@ -47,10 +41,8 @@ struct functor_traits<scalar_random_op<Scalar> >
|
||||
* This expression has the "evaluate before nesting" flag so that it will be evaluated into
|
||||
* a temporary matrix whenever it is nested in a larger expression. This prevents unexpected
|
||||
* behavior with expressions involving random matrices.
|
||||
*
|
||||
* See DenseBase::NullaryExpr(Index, const CustomNullaryOp&) for an example using C++11 random generators.
|
||||
*
|
||||
* \sa DenseBase::setRandom(), DenseBase::Random(Index), DenseBase::Random()
|
||||
* \sa MatrixBase::setRandom(), MatrixBase::Random(Index), MatrixBase::Random()
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline const CwiseNullaryOp<internal::scalar_random_op<typename internal::traits<Derived>::Scalar>, Derived>
|
||||
@@ -60,15 +52,11 @@ DenseBase<Derived>::Random(Index rows, Index cols)
|
||||
}
|
||||
|
||||
/** \returns a random vector expression
|
||||
*
|
||||
* Numbers are uniformly spread through their whole definition range for integer types,
|
||||
* and in the [-1:1] range for floating point scalar types.
|
||||
*
|
||||
* The parameter \a size is the size of the returned vector.
|
||||
* Must be compatible with this MatrixBase type.
|
||||
*
|
||||
* \only_for_vectors
|
||||
* \not_reentrant
|
||||
*
|
||||
* This variant is meant to be used for dynamic-size vector types. For fixed-size types,
|
||||
* it is redundant to pass \a size as argument, so Random() should be used
|
||||
@@ -81,7 +69,7 @@ DenseBase<Derived>::Random(Index rows, Index cols)
|
||||
* a temporary vector whenever it is nested in a larger expression. This prevents unexpected
|
||||
* behavior with expressions involving random matrices.
|
||||
*
|
||||
* \sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random()
|
||||
* \sa MatrixBase::setRandom(), MatrixBase::Random(Index,Index), MatrixBase::Random()
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline const CwiseNullaryOp<internal::scalar_random_op<typename internal::traits<Derived>::Scalar>, Derived>
|
||||
@@ -92,9 +80,6 @@ DenseBase<Derived>::Random(Index size)
|
||||
|
||||
/** \returns a fixed-size random matrix or vector expression
|
||||
*
|
||||
* Numbers are uniformly spread through their whole definition range for integer types,
|
||||
* and in the [-1:1] range for floating point scalar types.
|
||||
*
|
||||
* This variant is only for fixed-size MatrixBase types. For dynamic-size types, you
|
||||
* need to use the variants taking size arguments.
|
||||
*
|
||||
@@ -104,10 +89,8 @@ DenseBase<Derived>::Random(Index size)
|
||||
* This expression has the "evaluate before nesting" flag so that it will be evaluated into
|
||||
* a temporary matrix whenever it is nested in a larger expression. This prevents unexpected
|
||||
* behavior with expressions involving random matrices.
|
||||
*
|
||||
* \not_reentrant
|
||||
*
|
||||
* \sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random(Index)
|
||||
* \sa MatrixBase::setRandom(), MatrixBase::Random(Index,Index), MatrixBase::Random(Index)
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline const CwiseNullaryOp<internal::scalar_random_op<typename internal::traits<Derived>::Scalar>, Derived>
|
||||
@@ -118,11 +101,6 @@ DenseBase<Derived>::Random()
|
||||
|
||||
/** Sets all coefficients in this expression to random values.
|
||||
*
|
||||
* Numbers are uniformly spread through their whole definition range for integer types,
|
||||
* and in the [-1:1] range for floating point scalar types.
|
||||
*
|
||||
* \not_reentrant
|
||||
*
|
||||
* Example: \include MatrixBase_setRandom.cpp
|
||||
* Output: \verbinclude MatrixBase_setRandom.out
|
||||
*
|
||||
@@ -136,16 +114,12 @@ inline Derived& DenseBase<Derived>::setRandom()
|
||||
|
||||
/** Resizes to the given \a newSize, and sets all coefficients in this expression to random values.
|
||||
*
|
||||
* Numbers are uniformly spread through their whole definition range for integer types,
|
||||
* and in the [-1:1] range for floating point scalar types.
|
||||
*
|
||||
* \only_for_vectors
|
||||
* \not_reentrant
|
||||
*
|
||||
* Example: \include Matrix_setRandom_int.cpp
|
||||
* Output: \verbinclude Matrix_setRandom_int.out
|
||||
*
|
||||
* \sa DenseBase::setRandom(), setRandom(Index,Index), class CwiseNullaryOp, DenseBase::Random()
|
||||
* \sa MatrixBase::setRandom(), setRandom(Index,Index), class CwiseNullaryOp, MatrixBase::Random()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived&
|
||||
@@ -157,18 +131,13 @@ PlainObjectBase<Derived>::setRandom(Index newSize)
|
||||
|
||||
/** Resizes to the given size, and sets all coefficients in this expression to random values.
|
||||
*
|
||||
* Numbers are uniformly spread through their whole definition range for integer types,
|
||||
* and in the [-1:1] range for floating point scalar types.
|
||||
*
|
||||
* \not_reentrant
|
||||
*
|
||||
* \param nbRows the new number of rows
|
||||
* \param nbCols the new number of columns
|
||||
*
|
||||
* Example: \include Matrix_setRandom_int_int.cpp
|
||||
* Output: \verbinclude Matrix_setRandom_int_int.out
|
||||
*
|
||||
* \sa DenseBase::setRandom(), setRandom(Index), class CwiseNullaryOp, DenseBase::Random()
|
||||
* \sa MatrixBase::setRandom(), setRandom(Index), class CwiseNullaryOp, MatrixBase::Random()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived&
|
||||
|
||||
@@ -82,7 +82,6 @@ struct redux_novec_unroller
|
||||
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
|
||||
{
|
||||
return func(redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
|
||||
@@ -100,7 +99,6 @@ struct redux_novec_unroller<Func, Derived, Start, 1>
|
||||
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&)
|
||||
{
|
||||
return mat.coeffByOuterInner(outer, inner);
|
||||
@@ -114,7 +112,6 @@ template<typename Func, typename Derived, int Start>
|
||||
struct redux_novec_unroller<Func, Derived, Start, 0>
|
||||
{
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); }
|
||||
};
|
||||
|
||||
@@ -173,7 +170,6 @@ struct redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>
|
||||
{
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
typedef typename Derived::Index Index;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func)
|
||||
{
|
||||
eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
|
||||
@@ -207,7 +203,7 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
|
||||
const Index packetSize = packet_traits<Scalar>::size;
|
||||
const Index alignedStart = internal::first_aligned(mat);
|
||||
enum {
|
||||
alignment = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) || bool(Derived::Flags & AlignedBit)
|
||||
alignment = bool(Derived::Flags & DirectAccessBit) || bool(Derived::Flags & AlignedBit)
|
||||
? Aligned : Unaligned
|
||||
};
|
||||
const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
|
||||
@@ -251,8 +247,9 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Func, typename Derived>
|
||||
struct redux_impl<Func, Derived, SliceVectorizedTraversal, NoUnrolling>
|
||||
// NOTE: for SliceVectorizedTraversal we simply bypass unrolling
|
||||
template<typename Func, typename Derived, int Unrolling>
|
||||
struct redux_impl<Func, Derived, SliceVectorizedTraversal, Unrolling>
|
||||
{
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
typedef typename packet_traits<Scalar>::type PacketScalar;
|
||||
@@ -303,15 +300,10 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
|
||||
static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func)
|
||||
{
|
||||
eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
|
||||
if (VectorizedSize > 0) {
|
||||
Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));
|
||||
if (VectorizedSize != Size)
|
||||
res = func(res,redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func));
|
||||
return res;
|
||||
}
|
||||
else {
|
||||
return redux_novec_unroller<Func, Derived, 0, Size>::run(mat,func);
|
||||
}
|
||||
Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));
|
||||
if (VectorizedSize != Size)
|
||||
res = func(res,redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func));
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -19,17 +19,17 @@ template<typename PlainObjectType, int Options = 0,
|
||||
/** \class Ref
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief A matrix or vector expression mapping an existing expression
|
||||
* \brief A matrix or vector expression mapping an existing expressions
|
||||
*
|
||||
* \tparam PlainObjectType the equivalent matrix type of the mapped data
|
||||
* \tparam Options specifies whether the pointer is \c #Aligned, or \c #Unaligned.
|
||||
* The default is \c #Unaligned.
|
||||
* \tparam StrideType optionally specifies strides. By default, Ref implies a contiguous storage along the inner dimension (inner stride==1),
|
||||
* but accepts a variable outer stride (leading dimension).
|
||||
* but accept a variable outer stride (leading dimension).
|
||||
* This can be overridden by specifying strides.
|
||||
* The type passed here must be a specialization of the Stride template, see examples below.
|
||||
*
|
||||
* This class provides a way to write non-template functions taking Eigen objects as parameters while limiting the number of copies.
|
||||
* This class permits to write non template functions taking Eigen's object as parameters while limiting the number of copies.
|
||||
* A Ref<> object can represent either a const expression or a l-value:
|
||||
* \code
|
||||
* // in-out argument:
|
||||
@@ -39,10 +39,10 @@ template<typename PlainObjectType, int Options = 0,
|
||||
* void foo2(const Ref<const VectorXf>& x);
|
||||
* \endcode
|
||||
*
|
||||
* In the in-out case, the input argument must satisfy the constraints of the actual Ref<> type, otherwise a compilation issue will be triggered.
|
||||
* In the in-out case, the input argument must satisfies the constraints of the actual Ref<> type, otherwise a compilation issue will be triggered.
|
||||
* By default, a Ref<VectorXf> can reference any dense vector expression of float having a contiguous memory layout.
|
||||
* Likewise, a Ref<MatrixXf> can reference any column-major dense matrix expression of float whose column's elements are contiguously stored with
|
||||
* the possibility to have a constant space in-between each column, i.e. the inner stride must be equal to 1, but the outer stride (or leading dimension)
|
||||
* Likewise, a Ref<MatrixXf> can reference any column major dense matrix expression of float whose column's elements are contiguously stored with
|
||||
* the possibility to have a constant space inbetween each column, i.e.: the inner stride mmust be equal to 1, but the outer-stride (or leading dimension),
|
||||
* can be greater than the number of rows.
|
||||
*
|
||||
* In the const case, if the input expression does not match the above requirement, then it is evaluated into a temporary before being passed to the function.
|
||||
@@ -58,15 +58,15 @@ template<typename PlainObjectType, int Options = 0,
|
||||
* foo2(A.col().segment(2,4)); // No temporary
|
||||
* \endcode
|
||||
*
|
||||
* The range of inputs that can be referenced without temporary can be enlarged using the last two template parameters.
|
||||
* The range of inputs that can be referenced without temporary can be enlarged using the last two template parameter.
|
||||
* Here is an example accepting an innerstride!=1:
|
||||
* \code
|
||||
* // in-out argument:
|
||||
* void foo3(Ref<VectorXf,0,InnerStride<> > x);
|
||||
* foo3(A.row()); // OK
|
||||
* \endcode
|
||||
* The downside here is that the function foo3 might be significantly slower than foo1 because it won't be able to exploit vectorization, and will involve more
|
||||
* expensive address computations even if the input is contiguously stored in memory. To overcome this issue, one might propose to overload internally calling a
|
||||
* The downside here is that the function foo3 might be significantly slower than foo1 because it won't be able to exploit vectorization, and will involved more
|
||||
* expensive address computations even if the input is contiguously stored in memory. To overcome this issue, one might propose to overloads internally calling a
|
||||
* template function, e.g.:
|
||||
* \code
|
||||
* // in the .h:
|
||||
@@ -108,7 +108,8 @@ struct traits<Ref<_PlainObjectType, _Options, _StrideType> >
|
||||
OuterStrideMatch = Derived::IsVectorAtCompileTime
|
||||
|| int(StrideType::OuterStrideAtCompileTime)==int(Dynamic) || int(StrideType::OuterStrideAtCompileTime)==int(Derived::OuterStrideAtCompileTime),
|
||||
AlignmentMatch = (_Options!=Aligned) || ((PlainObjectType::Flags&AlignedBit)==0) || ((traits<Derived>::Flags&AlignedBit)==AlignedBit),
|
||||
MatchAtCompileTime = HasDirectAccess && StorageOrderMatch && InnerStrideMatch && OuterStrideMatch && AlignmentMatch
|
||||
ScalarTypeMatch = internal::is_same<typename PlainObjectType::Scalar, typename Derived::Scalar>::value,
|
||||
MatchAtCompileTime = HasDirectAccess && StorageOrderMatch && InnerStrideMatch && OuterStrideMatch && AlignmentMatch && ScalarTypeMatch
|
||||
};
|
||||
typedef typename internal::conditional<MatchAtCompileTime,internal::true_type,internal::false_type>::type type;
|
||||
};
|
||||
@@ -187,7 +188,11 @@ protected:
|
||||
template<typename PlainObjectType, int Options, typename StrideType> class Ref
|
||||
: public RefBase<Ref<PlainObjectType, Options, StrideType> >
|
||||
{
|
||||
private:
|
||||
typedef internal::traits<Ref> Traits;
|
||||
template<typename Derived>
|
||||
inline Ref(const PlainObjectBase<Derived>& expr,
|
||||
typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0);
|
||||
public:
|
||||
|
||||
typedef RefBase<Ref> Base;
|
||||
@@ -199,17 +204,20 @@ template<typename PlainObjectType, int Options, typename StrideType> class Ref
|
||||
inline Ref(PlainObjectBase<Derived>& expr,
|
||||
typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0)
|
||||
{
|
||||
Base::construct(expr);
|
||||
EIGEN_STATIC_ASSERT(static_cast<bool>(Traits::template match<Derived>::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH);
|
||||
Base::construct(expr.derived());
|
||||
}
|
||||
template<typename Derived>
|
||||
inline Ref(const DenseBase<Derived>& expr,
|
||||
typename internal::enable_if<bool(internal::is_lvalue<Derived>::value&&bool(Traits::template match<Derived>::MatchAtCompileTime)),Derived>::type* = 0,
|
||||
int = Derived::ThisConstantIsPrivateInPlainObjectBase)
|
||||
typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0)
|
||||
#else
|
||||
template<typename Derived>
|
||||
inline Ref(DenseBase<Derived>& expr)
|
||||
#endif
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(static_cast<bool>(internal::is_lvalue<Derived>::value), THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY);
|
||||
EIGEN_STATIC_ASSERT(static_cast<bool>(Traits::template match<Derived>::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH);
|
||||
enum { THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY = Derived::ThisConstantIsPrivateInPlainObjectBase};
|
||||
Base::construct(expr.const_cast_derived());
|
||||
}
|
||||
|
||||
@@ -228,13 +236,23 @@ template<typename TPlainObjectType, int Options, typename StrideType> class Ref<
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(Ref)
|
||||
|
||||
template<typename Derived>
|
||||
inline Ref(const DenseBase<Derived>& expr)
|
||||
inline Ref(const DenseBase<Derived>& expr,
|
||||
typename internal::enable_if<bool(Traits::template match<Derived>::ScalarTypeMatch),Derived>::type* = 0)
|
||||
{
|
||||
// std::cout << match_helper<Derived>::HasDirectAccess << "," << match_helper<Derived>::OuterStrideMatch << "," << match_helper<Derived>::InnerStrideMatch << "\n";
|
||||
// std::cout << int(StrideType::OuterStrideAtCompileTime) << " - " << int(Derived::OuterStrideAtCompileTime) << "\n";
|
||||
// std::cout << int(StrideType::InnerStrideAtCompileTime) << " - " << int(Derived::InnerStrideAtCompileTime) << "\n";
|
||||
construct(expr.derived(), typename Traits::template match<Derived>::type());
|
||||
}
|
||||
|
||||
inline Ref(const Ref& other) : Base(other) {
|
||||
// copy constructor shall not copy the m_object, to avoid unnecessary malloc and copy
|
||||
}
|
||||
|
||||
template<typename OtherRef>
|
||||
inline Ref(const RefBase<OtherRef>& other) {
|
||||
construct(other.derived(), typename Traits::template match<OtherRef>::type());
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
|
||||
@@ -135,7 +135,7 @@ template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<int RowFactor, int ColFactor>
|
||||
inline const Replicate<Derived,RowFactor,ColFactor>
|
||||
const Replicate<Derived,RowFactor,ColFactor>
|
||||
DenseBase<Derived>::replicate() const
|
||||
{
|
||||
return Replicate<Derived,RowFactor,ColFactor>(derived());
|
||||
@@ -150,7 +150,7 @@ DenseBase<Derived>::replicate() const
|
||||
* \sa VectorwiseOp::replicate(), DenseBase::replicate<int,int>(), class Replicate
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline const Replicate<Derived,Dynamic,Dynamic>
|
||||
const typename DenseBase<Derived>::ReplicateReturnType
|
||||
DenseBase<Derived>::replicate(Index rowFactor,Index colFactor) const
|
||||
{
|
||||
return Replicate<Derived,Dynamic,Dynamic>(derived(),rowFactor,colFactor);
|
||||
|
||||
@@ -57,11 +57,10 @@ template<typename Derived> class ReturnByValue
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(ReturnByValue)
|
||||
|
||||
template<typename Dest>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void evalTo(Dest& dst) const
|
||||
{ static_cast<const Derived*>(this)->evalTo(dst); }
|
||||
EIGEN_DEVICE_FUNC inline Index rows() const { return static_cast<const Derived*>(this)->rows(); }
|
||||
EIGEN_DEVICE_FUNC inline Index cols() const { return static_cast<const Derived*>(this)->cols(); }
|
||||
inline Index rows() const { return static_cast<const Derived*>(this)->rows(); }
|
||||
inline Index cols() const { return static_cast<const Derived*>(this)->cols(); }
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
#define Unusable YOU_ARE_TRYING_TO_ACCESS_A_SINGLE_COEFFICIENT_IN_A_SPECIAL_EXPRESSION_WHERE_THAT_IS_NOT_ALLOWED_BECAUSE_THAT_WOULD_BE_INEFFICIENT
|
||||
@@ -73,6 +72,8 @@ template<typename Derived> class ReturnByValue
|
||||
const Unusable& coeff(Index,Index) const { return *reinterpret_cast<const Unusable*>(this); }
|
||||
Unusable& coeffRef(Index) { return *reinterpret_cast<Unusable*>(this); }
|
||||
Unusable& coeffRef(Index,Index) { return *reinterpret_cast<Unusable*>(this); }
|
||||
template<int LoadMode> Unusable& packet(Index) const;
|
||||
template<int LoadMode> Unusable& packet(Index, Index) const;
|
||||
#endif
|
||||
};
|
||||
|
||||
@@ -84,6 +85,15 @@ Derived& DenseBase<Derived>::operator=(const ReturnByValue<OtherDerived>& other)
|
||||
return derived();
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
Derived& DenseBase<Derived>::lazyAssign(const ReturnByValue<OtherDerived>& other)
|
||||
{
|
||||
other.evalTo(derived());
|
||||
return derived();
|
||||
}
|
||||
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_RETURNBYVALUE_H
|
||||
|
||||
@@ -69,23 +69,17 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
|
||||
};
|
||||
typedef typename MatrixType::PlainObject PlainObject;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline SelfAdjointView(MatrixType& matrix) : m_matrix(matrix)
|
||||
{}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index rows() const { return m_matrix.rows(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index cols() const { return m_matrix.cols(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index outerStride() const { return m_matrix.outerStride(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index innerStride() const { return m_matrix.innerStride(); }
|
||||
|
||||
/** \sa MatrixBase::coeff()
|
||||
* \warning the coordinates must fit into the referenced triangular part
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar coeff(Index row, Index col) const
|
||||
{
|
||||
Base::check_coordinates_internal(row, col);
|
||||
@@ -95,7 +89,6 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
|
||||
/** \sa MatrixBase::coeffRef()
|
||||
* \warning the coordinates must fit into the referenced triangular part
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index row, Index col)
|
||||
{
|
||||
Base::check_coordinates_internal(row, col);
|
||||
@@ -103,17 +96,13 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
|
||||
}
|
||||
|
||||
/** \internal */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const MatrixTypeNestedCleaned& _expression() const { return m_matrix; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const MatrixTypeNestedCleaned& nestedExpression() const { return m_matrix; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
MatrixTypeNestedCleaned& nestedExpression() { return *const_cast<MatrixTypeNestedCleaned*>(&m_matrix); }
|
||||
|
||||
/** Efficient self-adjoint matrix times vector/matrix product */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
SelfadjointProductMatrix<MatrixType,Mode,false,OtherDerived,0,OtherDerived::IsVectorAtCompileTime>
|
||||
operator*(const MatrixBase<OtherDerived>& rhs) const
|
||||
{
|
||||
@@ -124,7 +113,6 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
|
||||
|
||||
/** Efficient vector/matrix times self-adjoint matrix product */
|
||||
template<typename OtherDerived> friend
|
||||
EIGEN_DEVICE_FUNC
|
||||
SelfadjointProductMatrix<OtherDerived,0,OtherDerived::IsVectorAtCompileTime,MatrixType,Mode,false>
|
||||
operator*(const MatrixBase<OtherDerived>& lhs, const SelfAdjointView& rhs)
|
||||
{
|
||||
@@ -144,7 +132,6 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
|
||||
* \sa rankUpdate(const MatrixBase<DerivedU>&, Scalar)
|
||||
*/
|
||||
template<typename DerivedU, typename DerivedV>
|
||||
EIGEN_DEVICE_FUNC
|
||||
SelfAdjointView& rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, const Scalar& alpha = Scalar(1));
|
||||
|
||||
/** Perform a symmetric rank K update of the selfadjoint matrix \c *this:
|
||||
@@ -158,7 +145,6 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
|
||||
* \sa rankUpdate(const MatrixBase<DerivedU>&, const MatrixBase<DerivedV>&, Scalar)
|
||||
*/
|
||||
template<typename DerivedU>
|
||||
EIGEN_DEVICE_FUNC
|
||||
SelfAdjointView& rankUpdate(const MatrixBase<DerivedU>& u, const Scalar& alpha = Scalar(1));
|
||||
|
||||
/////////// Cholesky module ///////////
|
||||
@@ -173,10 +159,31 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
|
||||
/** Return type of eigenvalues() */
|
||||
typedef Matrix<RealScalar, internal::traits<MatrixType>::ColsAtCompileTime, 1> EigenvaluesReturnType;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EigenvaluesReturnType eigenvalues() const;
|
||||
EIGEN_DEVICE_FUNC
|
||||
RealScalar operatorNorm() const;
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
template<typename OtherDerived>
|
||||
SelfAdjointView& operator=(const MatrixBase<OtherDerived>& other)
|
||||
{
|
||||
enum {
|
||||
OtherPart = UpLo == Upper ? StrictlyLower : StrictlyUpper
|
||||
};
|
||||
m_matrix.const_cast_derived().template triangularView<UpLo>() = other;
|
||||
m_matrix.const_cast_derived().template triangularView<OtherPart>() = other.adjoint();
|
||||
return *this;
|
||||
}
|
||||
template<typename OtherMatrixType, unsigned int OtherMode>
|
||||
SelfAdjointView& operator=(const TriangularView<OtherMatrixType, OtherMode>& other)
|
||||
{
|
||||
enum {
|
||||
OtherPart = UpLo == Upper ? StrictlyLower : StrictlyUpper
|
||||
};
|
||||
m_matrix.const_cast_derived().template triangularView<UpLo>() = other.toDenseMatrix();
|
||||
m_matrix.const_cast_derived().template triangularView<OtherPart>() = other.toDenseMatrix().adjoint();
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
|
||||
protected:
|
||||
MatrixTypeNested m_matrix;
|
||||
@@ -202,7 +209,6 @@ struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), U
|
||||
row = (UnrollCount-1) % Derived1::RowsAtCompileTime
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), UnrollCount-1, ClearOpposite>::run(dst, src);
|
||||
@@ -217,7 +223,6 @@ struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), U
|
||||
template<typename Derived1, typename Derived2, bool ClearOpposite>
|
||||
struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, 0, ClearOpposite>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(Derived1 &, const Derived2 &) {}
|
||||
};
|
||||
|
||||
@@ -229,7 +234,6 @@ struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), U
|
||||
row = (UnrollCount-1) % Derived1::RowsAtCompileTime
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), UnrollCount-1, ClearOpposite>::run(dst, src);
|
||||
@@ -244,7 +248,6 @@ struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), U
|
||||
template<typename Derived1, typename Derived2, bool ClearOpposite>
|
||||
struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Lower, 0, ClearOpposite>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(Derived1 &, const Derived2 &) {}
|
||||
};
|
||||
|
||||
@@ -252,7 +255,6 @@ template<typename Derived1, typename Derived2, bool ClearOpposite>
|
||||
struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, Dynamic, ClearOpposite>
|
||||
{
|
||||
typedef typename Derived1::Index Index;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
for(Index j = 0; j < dst.cols(); ++j)
|
||||
@@ -270,7 +272,6 @@ struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, Dyn
|
||||
template<typename Derived1, typename Derived2, bool ClearOpposite>
|
||||
struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Lower, Dynamic, ClearOpposite>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
typedef typename Derived1::Index Index;
|
||||
|
||||
@@ -35,7 +35,7 @@ struct traits<SelfCwiseBinaryOp<BinaryOp,Lhs,Rhs> >
|
||||
enum {
|
||||
// Note that it is still a good idea to preserve the DirectAccessBit
|
||||
// so that assign can correctly align the data.
|
||||
Flags = traits<CwiseBinaryOp<BinaryOp,Lhs,Rhs> >::Flags | (Lhs::Flags&AlignedBit) | (Lhs::Flags&DirectAccessBit) | (Lhs::Flags&LvalueBit),
|
||||
Flags = traits<CwiseBinaryOp<BinaryOp,Lhs,Rhs> >::Flags | (Lhs::Flags&DirectAccessBit) | (Lhs::Flags&LvalueBit),
|
||||
OuterStrideAtCompileTime = Lhs::OuterStrideAtCompileTime,
|
||||
InnerStrideAtCompileTime = Lhs::InnerStrideAtCompileTime
|
||||
};
|
||||
@@ -52,24 +52,21 @@ template<typename BinaryOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp
|
||||
|
||||
typedef typename internal::packet_traits<Scalar>::type Packet;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline SelfCwiseBinaryOp(Lhs& xpr, const BinaryOp& func = BinaryOp()) : m_matrix(xpr), m_functor(func) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows(); }
|
||||
EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols(); }
|
||||
EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_matrix.outerStride(); }
|
||||
EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_matrix.innerStride(); }
|
||||
EIGEN_DEVICE_FUNC inline const Scalar* data() const { return m_matrix.data(); }
|
||||
inline Index rows() const { return m_matrix.rows(); }
|
||||
inline Index cols() const { return m_matrix.cols(); }
|
||||
inline Index outerStride() const { return m_matrix.outerStride(); }
|
||||
inline Index innerStride() const { return m_matrix.innerStride(); }
|
||||
inline const Scalar* data() const { return m_matrix.data(); }
|
||||
|
||||
// note that this function is needed by assign to correctly align loads/stores
|
||||
// TODO make Assign use .data()
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index row, Index col)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_LVALUE(Lhs)
|
||||
return m_matrix.const_cast_derived().coeffRef(row, col);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index row, Index col) const
|
||||
{
|
||||
return m_matrix.coeffRef(row, col);
|
||||
@@ -77,20 +74,17 @@ template<typename BinaryOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp
|
||||
|
||||
// note that this function is needed by assign to correctly align loads/stores
|
||||
// TODO make Assign use .data()
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index index)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_LVALUE(Lhs)
|
||||
return m_matrix.const_cast_derived().coeffRef(index);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index index) const
|
||||
{
|
||||
return m_matrix.const_cast_derived().coeffRef(index);
|
||||
}
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void copyCoeff(Index row, Index col, const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
OtherDerived& _other = other.const_cast_derived();
|
||||
@@ -101,7 +95,6 @@ template<typename BinaryOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp
|
||||
}
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void copyCoeff(Index index, const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
OtherDerived& _other = other.const_cast_derived();
|
||||
@@ -132,7 +125,6 @@ template<typename BinaryOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp
|
||||
// reimplement lazyAssign to handle complex *= real
|
||||
// see CwiseBinaryOp ctor for details
|
||||
template<typename RhsDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE SelfCwiseBinaryOp& lazyAssign(const DenseBase<RhsDerived>& rhs)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Lhs,RhsDerived)
|
||||
@@ -152,20 +144,17 @@ template<typename BinaryOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp
|
||||
// overloaded to honor evaluation of special matrices
|
||||
// maybe another solution would be to not use SelfCwiseBinaryOp
|
||||
// at first...
|
||||
EIGEN_DEVICE_FUNC
|
||||
SelfCwiseBinaryOp& operator=(const Rhs& _rhs)
|
||||
{
|
||||
typename internal::nested<Rhs>::type rhs(_rhs);
|
||||
return Base::operator=(rhs);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
Lhs& expression() const
|
||||
{
|
||||
return m_matrix;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const BinaryOp& functor() const
|
||||
{
|
||||
return m_functor;
|
||||
@@ -188,24 +177,6 @@ inline Derived& DenseBase<Derived>::operator*=(const Scalar& other)
|
||||
return derived();
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
inline Derived& ArrayBase<Derived>::operator+=(const Scalar& other)
|
||||
{
|
||||
typedef typename Derived::PlainObject PlainObject;
|
||||
SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, Derived, typename PlainObject::ConstantReturnType> tmp(derived());
|
||||
tmp = PlainObject::Constant(rows(),cols(),other);
|
||||
return derived();
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
inline Derived& ArrayBase<Derived>::operator-=(const Scalar& other)
|
||||
{
|
||||
typedef typename Derived::PlainObject PlainObject;
|
||||
SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, Derived, typename PlainObject::ConstantReturnType> tmp(derived());
|
||||
tmp = PlainObject::Constant(rows(),cols(),other);
|
||||
return derived();
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
inline Derived& DenseBase<Derived>::operator/=(const Scalar& other)
|
||||
{
|
||||
|
||||
@@ -20,7 +20,7 @@ inline void stable_norm_kernel(const ExpressionType& bl, Scalar& ssq, Scalar& sc
|
||||
using std::max;
|
||||
Scalar maxCoeff = bl.cwiseAbs().maxCoeff();
|
||||
|
||||
if(maxCoeff>scale)
|
||||
if (maxCoeff>scale)
|
||||
{
|
||||
ssq = ssq * numext::abs2(scale/maxCoeff);
|
||||
Scalar tmp = Scalar(1)/maxCoeff;
|
||||
@@ -29,21 +29,12 @@ inline void stable_norm_kernel(const ExpressionType& bl, Scalar& ssq, Scalar& sc
|
||||
invScale = NumTraits<Scalar>::highest();
|
||||
scale = Scalar(1)/invScale;
|
||||
}
|
||||
else if(maxCoeff>NumTraits<Scalar>::highest()) // we got a INF
|
||||
{
|
||||
invScale = Scalar(1);
|
||||
scale = maxCoeff;
|
||||
}
|
||||
else
|
||||
{
|
||||
scale = maxCoeff;
|
||||
invScale = tmp;
|
||||
}
|
||||
}
|
||||
else if(maxCoeff!=maxCoeff) // we got a NaN
|
||||
{
|
||||
scale = maxCoeff;
|
||||
}
|
||||
|
||||
// TODO if the maxCoeff is much much smaller than the current scale,
|
||||
// then we can neglect this sub vector
|
||||
@@ -58,13 +49,13 @@ blueNorm_impl(const EigenBase<Derived>& _vec)
|
||||
typedef typename Derived::RealScalar RealScalar;
|
||||
typedef typename Derived::Index Index;
|
||||
using std::pow;
|
||||
EIGEN_USING_STD_MATH(min);
|
||||
EIGEN_USING_STD_MATH(max);
|
||||
using std::min;
|
||||
using std::max;
|
||||
using std::sqrt;
|
||||
using std::abs;
|
||||
const Derived& vec(_vec.derived());
|
||||
static bool initialized = false;
|
||||
static RealScalar b1, b2, s1m, s2m, rbig, relerr;
|
||||
static RealScalar b1, b2, s1m, s2m, overfl, rbig, relerr;
|
||||
if(!initialized)
|
||||
{
|
||||
int ibeta, it, iemin, iemax, iexp;
|
||||
@@ -93,6 +84,7 @@ blueNorm_impl(const EigenBase<Derived>& _vec)
|
||||
iexp = - ((iemax+it)/2);
|
||||
s2m = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // scaling factor for upper range
|
||||
|
||||
overfl = rbig*s2m; // overflow boundary for abig
|
||||
eps = RealScalar(pow(double(ibeta), 1-it));
|
||||
relerr = sqrt(eps); // tolerance for neglecting asml
|
||||
initialized = true;
|
||||
@@ -109,13 +101,13 @@ blueNorm_impl(const EigenBase<Derived>& _vec)
|
||||
else if(ax < b1) asml += numext::abs2(ax*s1m);
|
||||
else amed += numext::abs2(ax);
|
||||
}
|
||||
if(amed!=amed)
|
||||
return amed; // we got a NaN
|
||||
if(abig > RealScalar(0))
|
||||
{
|
||||
abig = sqrt(abig);
|
||||
if(abig > rbig) // overflow, or *this contains INF values
|
||||
return abig; // return INF
|
||||
if(abig > overfl)
|
||||
{
|
||||
return rbig;
|
||||
}
|
||||
if(amed > RealScalar(0))
|
||||
{
|
||||
abig = abig/s2m;
|
||||
@@ -160,7 +152,7 @@ template<typename Derived>
|
||||
inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
|
||||
MatrixBase<Derived>::stableNorm() const
|
||||
{
|
||||
EIGEN_USING_STD_MATH(min);
|
||||
using std::min;
|
||||
using std::sqrt;
|
||||
const Index blockSize = 4096;
|
||||
RealScalar scale(0);
|
||||
|
||||
@@ -51,7 +51,6 @@ class Stride
|
||||
};
|
||||
|
||||
/** Default constructor, for use when strides are fixed at compile time */
|
||||
EIGEN_DEVICE_FUNC
|
||||
Stride()
|
||||
: m_outer(OuterStrideAtCompileTime), m_inner(InnerStrideAtCompileTime)
|
||||
{
|
||||
@@ -59,7 +58,6 @@ class Stride
|
||||
}
|
||||
|
||||
/** Constructor allowing to pass the strides at runtime */
|
||||
EIGEN_DEVICE_FUNC
|
||||
Stride(Index outerStride, Index innerStride)
|
||||
: m_outer(outerStride), m_inner(innerStride)
|
||||
{
|
||||
@@ -67,16 +65,13 @@ class Stride
|
||||
}
|
||||
|
||||
/** Copy constructor */
|
||||
EIGEN_DEVICE_FUNC
|
||||
Stride(const Stride& other)
|
||||
: m_outer(other.outer()), m_inner(other.inner())
|
||||
{}
|
||||
|
||||
/** \returns the outer stride */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index outer() const { return m_outer.value(); }
|
||||
/** \returns the inner stride */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index inner() const { return m_inner.value(); }
|
||||
|
||||
protected:
|
||||
@@ -92,8 +87,8 @@ class InnerStride : public Stride<0, Value>
|
||||
typedef Stride<0, Value> Base;
|
||||
public:
|
||||
typedef DenseIndex Index;
|
||||
EIGEN_DEVICE_FUNC InnerStride() : Base() {}
|
||||
EIGEN_DEVICE_FUNC InnerStride(Index v) : Base(0, v) {}
|
||||
InnerStride() : Base() {}
|
||||
InnerStride(Index v) : Base(0, v) {}
|
||||
};
|
||||
|
||||
/** \brief Convenience specialization of Stride to specify only an outer stride
|
||||
@@ -104,8 +99,8 @@ class OuterStride : public Stride<Value, 0>
|
||||
typedef Stride<Value, 0> Base;
|
||||
public:
|
||||
typedef DenseIndex Index;
|
||||
EIGEN_DEVICE_FUNC OuterStride() : Base() {}
|
||||
EIGEN_DEVICE_FUNC OuterStride(Index v) : Base(v,0) {}
|
||||
OuterStride() : Base() {}
|
||||
OuterStride(Index v) : Base(v,0) {}
|
||||
};
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
@@ -33,16 +33,11 @@ template<typename ExpressionType> class SwapWrapper
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(SwapWrapper)
|
||||
typedef typename internal::packet_traits<Scalar>::type Packet;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline SwapWrapper(ExpressionType& xpr) : m_expression(xpr) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index rows() const { return m_expression.rows(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index cols() const { return m_expression.cols(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index outerStride() const { return m_expression.outerStride(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index innerStride() const { return m_expression.innerStride(); }
|
||||
|
||||
typedef typename internal::conditional<
|
||||
@@ -51,37 +46,30 @@ template<typename ExpressionType> class SwapWrapper
|
||||
const Scalar
|
||||
>::type ScalarWithConstIfNotLvalue;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar* data() const { return m_expression.data(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index rowId, Index colId)
|
||||
{
|
||||
return m_expression.const_cast_derived().coeffRef(rowId, colId);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index index)
|
||||
{
|
||||
return m_expression.const_cast_derived().coeffRef(index);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index rowId, Index colId) const
|
||||
{
|
||||
return m_expression.coeffRef(rowId, colId);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index index) const
|
||||
{
|
||||
return m_expression.coeffRef(index);
|
||||
}
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void copyCoeff(Index rowId, Index colId, const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
OtherDerived& _other = other.const_cast_derived();
|
||||
@@ -93,7 +81,6 @@ template<typename ExpressionType> class SwapWrapper
|
||||
}
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void copyCoeff(Index index, const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
OtherDerived& _other = other.const_cast_derived();
|
||||
@@ -128,7 +115,6 @@ template<typename ExpressionType> class SwapWrapper
|
||||
_other.template writePacket<LoadMode>(index, tmp);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
ExpressionType& expression() const { return m_expression; }
|
||||
|
||||
protected:
|
||||
|
||||
@@ -62,21 +62,18 @@ template<typename MatrixType> class Transpose
|
||||
typedef typename TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base;
|
||||
EIGEN_GENERIC_PUBLIC_INTERFACE(Transpose)
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Transpose(MatrixType& a_matrix) : m_matrix(a_matrix) {}
|
||||
|
||||
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Transpose)
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.cols(); }
|
||||
EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.rows(); }
|
||||
inline Index rows() const { return m_matrix.cols(); }
|
||||
inline Index cols() const { return m_matrix.rows(); }
|
||||
|
||||
/** \returns the nested expression */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename MatrixType::Nested>::type&
|
||||
nestedExpression() const { return m_matrix; }
|
||||
|
||||
/** \returns the nested expression */
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename internal::remove_all<typename MatrixType::Nested>::type&
|
||||
nestedExpression() { return m_matrix.const_cast_derived(); }
|
||||
|
||||
@@ -109,8 +106,8 @@ template<typename MatrixType> class TransposeImpl<MatrixType,Dense>
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(Transpose<MatrixType>)
|
||||
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(TransposeImpl)
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Index innerStride() const { return derived().nestedExpression().innerStride(); }
|
||||
EIGEN_DEVICE_FUNC inline Index outerStride() const { return derived().nestedExpression().outerStride(); }
|
||||
inline Index innerStride() const { return derived().nestedExpression().innerStride(); }
|
||||
inline Index outerStride() const { return derived().nestedExpression().outerStride(); }
|
||||
|
||||
typedef typename internal::conditional<
|
||||
internal::is_lvalue<MatrixType>::value,
|
||||
@@ -121,39 +118,33 @@ template<typename MatrixType> class TransposeImpl<MatrixType,Dense>
|
||||
inline ScalarWithConstIfNotLvalue* data() { return derived().nestedExpression().data(); }
|
||||
inline const Scalar* data() const { return derived().nestedExpression().data(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline ScalarWithConstIfNotLvalue& coeffRef(Index rowId, Index colId)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
|
||||
return derived().nestedExpression().const_cast_derived().coeffRef(colId, rowId);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline ScalarWithConstIfNotLvalue& coeffRef(Index index)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
|
||||
return derived().nestedExpression().const_cast_derived().coeffRef(index);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index rowId, Index colId) const
|
||||
{
|
||||
return derived().nestedExpression().coeffRef(colId, rowId);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index index) const
|
||||
{
|
||||
return derived().nestedExpression().coeffRef(index);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline CoeffReturnType coeff(Index rowId, Index colId) const
|
||||
{
|
||||
return derived().nestedExpression().coeff(colId, rowId);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
return derived().nestedExpression().coeff(index);
|
||||
|
||||
@@ -53,8 +53,7 @@ class TranspositionsBase
|
||||
public:
|
||||
|
||||
typedef typename Traits::IndicesType IndicesType;
|
||||
typedef typename IndicesType::Scalar StorageIndexType;
|
||||
typedef typename IndicesType::Index Index;
|
||||
typedef typename IndicesType::Scalar Index;
|
||||
|
||||
Derived& derived() { return *static_cast<Derived*>(this); }
|
||||
const Derived& derived() const { return *static_cast<const Derived*>(this); }
|
||||
@@ -82,17 +81,17 @@ class TranspositionsBase
|
||||
inline Index size() const { return indices().size(); }
|
||||
|
||||
/** Direct access to the underlying index vector */
|
||||
inline const StorageIndexType& coeff(Index i) const { return indices().coeff(i); }
|
||||
inline const Index& coeff(Index i) const { return indices().coeff(i); }
|
||||
/** Direct access to the underlying index vector */
|
||||
inline StorageIndexType& coeffRef(Index i) { return indices().coeffRef(i); }
|
||||
inline Index& coeffRef(Index i) { return indices().coeffRef(i); }
|
||||
/** Direct access to the underlying index vector */
|
||||
inline const StorageIndexType& operator()(Index i) const { return indices()(i); }
|
||||
inline const Index& operator()(Index i) const { return indices()(i); }
|
||||
/** Direct access to the underlying index vector */
|
||||
inline StorageIndexType& operator()(Index i) { return indices()(i); }
|
||||
inline Index& operator()(Index i) { return indices()(i); }
|
||||
/** Direct access to the underlying index vector */
|
||||
inline const StorageIndexType& operator[](Index i) const { return indices()(i); }
|
||||
inline const Index& operator[](Index i) const { return indices()(i); }
|
||||
/** Direct access to the underlying index vector */
|
||||
inline StorageIndexType& operator[](Index i) { return indices()(i); }
|
||||
inline Index& operator[](Index i) { return indices()(i); }
|
||||
|
||||
/** const version of indices(). */
|
||||
const IndicesType& indices() const { return derived().indices(); }
|
||||
@@ -100,7 +99,7 @@ class TranspositionsBase
|
||||
IndicesType& indices() { return derived().indices(); }
|
||||
|
||||
/** Resizes to given size. */
|
||||
inline void resize(Index newSize)
|
||||
inline void resize(int newSize)
|
||||
{
|
||||
indices().resize(newSize);
|
||||
}
|
||||
@@ -108,7 +107,7 @@ class TranspositionsBase
|
||||
/** Sets \c *this to represents an identity transformation */
|
||||
void setIdentity()
|
||||
{
|
||||
for(StorageIndexType i = 0; i < indices().size(); ++i)
|
||||
for(int i = 0; i < indices().size(); ++i)
|
||||
coeffRef(i) = i;
|
||||
}
|
||||
|
||||
@@ -145,26 +144,23 @@ class TranspositionsBase
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndexType>
|
||||
struct traits<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndexType> >
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType>
|
||||
struct traits<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,IndexType> >
|
||||
{
|
||||
typedef Matrix<_StorageIndexType, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType;
|
||||
typedef typename IndicesType::Index Index;
|
||||
typedef _StorageIndexType StorageIndexType;
|
||||
typedef IndexType Index;
|
||||
typedef Matrix<Index, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType;
|
||||
};
|
||||
}
|
||||
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndexType>
|
||||
class Transpositions : public TranspositionsBase<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndexType> >
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType>
|
||||
class Transpositions : public TranspositionsBase<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,IndexType> >
|
||||
{
|
||||
typedef internal::traits<Transpositions> Traits;
|
||||
public:
|
||||
|
||||
typedef TranspositionsBase<Transpositions> Base;
|
||||
typedef typename Traits::IndicesType IndicesType;
|
||||
typedef typename IndicesType::Scalar StorageIndexType;
|
||||
typedef typename IndicesType::Index Index;
|
||||
|
||||
typedef typename IndicesType::Scalar Index;
|
||||
|
||||
inline Transpositions() {}
|
||||
|
||||
@@ -219,32 +215,30 @@ class Transpositions : public TranspositionsBase<Transpositions<SizeAtCompileTim
|
||||
|
||||
|
||||
namespace internal {
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndexType, int _PacketAccess>
|
||||
struct traits<Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndexType>,_PacketAccess> >
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType, int _PacketAccess>
|
||||
struct traits<Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,IndexType>,_PacketAccess> >
|
||||
{
|
||||
typedef Map<const Matrix<_StorageIndexType,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1>, _PacketAccess> IndicesType;
|
||||
typedef typename IndicesType::Index Index;
|
||||
typedef _StorageIndexType StorageIndexType;
|
||||
typedef IndexType Index;
|
||||
typedef Map<const Matrix<Index,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1>, _PacketAccess> IndicesType;
|
||||
};
|
||||
}
|
||||
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndexType, int PacketAccess>
|
||||
class Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndexType>,PacketAccess>
|
||||
: public TranspositionsBase<Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndexType>,PacketAccess> >
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType, int PacketAccess>
|
||||
class Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,IndexType>,PacketAccess>
|
||||
: public TranspositionsBase<Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,IndexType>,PacketAccess> >
|
||||
{
|
||||
typedef internal::traits<Map> Traits;
|
||||
public:
|
||||
|
||||
typedef TranspositionsBase<Map> Base;
|
||||
typedef typename Traits::IndicesType IndicesType;
|
||||
typedef typename IndicesType::Scalar StorageIndexType;
|
||||
typedef typename IndicesType::Index Index;
|
||||
typedef typename IndicesType::Scalar Index;
|
||||
|
||||
inline Map(const StorageIndexType* indicesPtr)
|
||||
inline Map(const Index* indicesPtr)
|
||||
: m_indices(indicesPtr)
|
||||
{}
|
||||
|
||||
inline Map(const StorageIndexType* indicesPtr, Index size)
|
||||
inline Map(const Index* indicesPtr, Index size)
|
||||
: m_indices(indicesPtr,size)
|
||||
{}
|
||||
|
||||
@@ -281,8 +275,7 @@ namespace internal {
|
||||
template<typename _IndicesType>
|
||||
struct traits<TranspositionsWrapper<_IndicesType> >
|
||||
{
|
||||
typedef typename _IndicesType::Scalar StorageIndexType;
|
||||
typedef typename _IndicesType::Index Index;
|
||||
typedef typename _IndicesType::Scalar Index;
|
||||
typedef _IndicesType IndicesType;
|
||||
};
|
||||
}
|
||||
@@ -296,8 +289,7 @@ class TranspositionsWrapper
|
||||
|
||||
typedef TranspositionsBase<TranspositionsWrapper> Base;
|
||||
typedef typename Traits::IndicesType IndicesType;
|
||||
typedef typename IndicesType::Scalar StorageIndexType;
|
||||
typedef typename IndicesType::Index Index;
|
||||
typedef typename IndicesType::Scalar Index;
|
||||
|
||||
inline TranspositionsWrapper(IndicesType& a_indices)
|
||||
: m_indices(a_indices)
|
||||
@@ -371,25 +363,24 @@ struct transposition_matrix_product_retval
|
||||
{
|
||||
typedef typename remove_all<typename MatrixType::Nested>::type MatrixTypeNestedCleaned;
|
||||
typedef typename TranspositionType::Index Index;
|
||||
typedef typename TranspositionType::StorageIndexType StorageIndexType;
|
||||
|
||||
transposition_matrix_product_retval(const TranspositionType& tr, const MatrixType& matrix)
|
||||
: m_transpositions(tr), m_matrix(matrix)
|
||||
{}
|
||||
|
||||
inline Index rows() const { return m_matrix.rows(); }
|
||||
inline Index cols() const { return m_matrix.cols(); }
|
||||
inline int rows() const { return m_matrix.rows(); }
|
||||
inline int cols() const { return m_matrix.cols(); }
|
||||
|
||||
template<typename Dest> inline void evalTo(Dest& dst) const
|
||||
{
|
||||
const Index size = m_transpositions.size();
|
||||
StorageIndexType j = 0;
|
||||
const int size = m_transpositions.size();
|
||||
Index j = 0;
|
||||
|
||||
if(!(is_same<MatrixTypeNestedCleaned,Dest>::value && extract_data(dst) == extract_data(m_matrix)))
|
||||
dst = m_matrix;
|
||||
|
||||
for(Index k=(Transposed?size-1:0) ; Transposed?k>=0:k<size ; Transposed?--k:++k)
|
||||
if(Index(j=m_transpositions.coeff(k))!=k)
|
||||
for(int k=(Transposed?size-1:0) ; Transposed?k>=0:k<size ; Transposed?--k:++k)
|
||||
if((j=m_transpositions.coeff(k))!=k)
|
||||
{
|
||||
if(Side==OnTheLeft)
|
||||
dst.row(k).swap(dst.row(j));
|
||||
|
||||
@@ -44,39 +44,29 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
|
||||
typedef typename internal::traits<Derived>::DenseMatrixType DenseMatrixType;
|
||||
typedef DenseMatrixType DenseType;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline TriangularBase() { eigen_assert(!((Mode&UnitDiag) && (Mode&ZeroDiag))); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index rows() const { return derived().rows(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index cols() const { return derived().cols(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index outerStride() const { return derived().outerStride(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index innerStride() const { return derived().innerStride(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar coeff(Index row, Index col) const { return derived().coeff(row,col); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index row, Index col) { return derived().coeffRef(row,col); }
|
||||
|
||||
/** \see MatrixBase::copyCoeff(row,col)
|
||||
*/
|
||||
template<typename Other>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void copyCoeff(Index row, Index col, Other& other)
|
||||
{
|
||||
derived().coeffRef(row, col) = other.coeff(row, col);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar operator()(Index row, Index col) const
|
||||
{
|
||||
check_coordinates(row, col);
|
||||
return coeff(row,col);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& operator()(Index row, Index col)
|
||||
{
|
||||
check_coordinates(row, col);
|
||||
@@ -84,20 +74,15 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
|
||||
}
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Derived& derived() const { return *static_cast<const Derived*>(this); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Derived& derived() { return *static_cast<Derived*>(this); }
|
||||
#endif // not EIGEN_PARSED_BY_DOXYGEN
|
||||
|
||||
template<typename DenseDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void evalTo(MatrixBase<DenseDerived> &other) const;
|
||||
template<typename DenseDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void evalToLazy(MatrixBase<DenseDerived> &other) const;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseMatrixType toDenseMatrix() const
|
||||
{
|
||||
DenseMatrixType res(rows(), cols());
|
||||
@@ -204,52 +189,36 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
|
||||
| (Mode & (ZeroDiag))
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline TriangularView(const MatrixType& matrix) : m_matrix(matrix)
|
||||
{}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index rows() const { return m_matrix.rows(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index cols() const { return m_matrix.cols(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index outerStride() const { return m_matrix.outerStride(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index innerStride() const { return m_matrix.innerStride(); }
|
||||
|
||||
/** \sa MatrixBase::operator+=() */
|
||||
template<typename Other>
|
||||
EIGEN_DEVICE_FUNC
|
||||
TriangularView& operator+=(const DenseBase<Other>& other) { return *this = m_matrix + other.derived(); }
|
||||
/** \sa MatrixBase::operator+=() */
|
||||
template<typename Other> TriangularView& operator+=(const DenseBase<Other>& other) { return *this = m_matrix + other.derived(); }
|
||||
/** \sa MatrixBase::operator-=() */
|
||||
template<typename Other>
|
||||
EIGEN_DEVICE_FUNC
|
||||
TriangularView& operator-=(const DenseBase<Other>& other) { return *this = m_matrix - other.derived(); }
|
||||
template<typename Other> TriangularView& operator-=(const DenseBase<Other>& other) { return *this = m_matrix - other.derived(); }
|
||||
/** \sa MatrixBase::operator*=() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
TriangularView& operator*=(const typename internal::traits<MatrixType>::Scalar& other) { return *this = m_matrix * other; }
|
||||
/** \sa MatrixBase::operator/=() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
TriangularView& operator/=(const typename internal::traits<MatrixType>::Scalar& other) { return *this = m_matrix / other; }
|
||||
|
||||
/** \sa MatrixBase::fill() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
void fill(const Scalar& value) { setConstant(value); }
|
||||
/** \sa MatrixBase::setConstant() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
TriangularView& setConstant(const Scalar& value)
|
||||
{ return *this = MatrixType::Constant(rows(), cols(), value); }
|
||||
/** \sa MatrixBase::setZero() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
TriangularView& setZero() { return setConstant(Scalar(0)); }
|
||||
/** \sa MatrixBase::setOnes() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
TriangularView& setOnes() { return setConstant(Scalar(1)); }
|
||||
|
||||
/** \sa MatrixBase::coeff()
|
||||
* \warning the coordinates must fit into the referenced triangular part
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar coeff(Index row, Index col) const
|
||||
{
|
||||
Base::check_coordinates_internal(row, col);
|
||||
@@ -259,62 +228,49 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
|
||||
/** \sa MatrixBase::coeffRef()
|
||||
* \warning the coordinates must fit into the referenced triangular part
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index row, Index col)
|
||||
{
|
||||
Base::check_coordinates_internal(row, col);
|
||||
return m_matrix.const_cast_derived().coeffRef(row, col);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const MatrixTypeNestedCleaned& nestedExpression() const { return m_matrix; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
MatrixTypeNestedCleaned& nestedExpression() { return *const_cast<MatrixTypeNestedCleaned*>(&m_matrix); }
|
||||
|
||||
/** Assigns a triangular matrix to a triangular part of a dense matrix */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
TriangularView& operator=(const TriangularBase<OtherDerived>& other);
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
TriangularView& operator=(const MatrixBase<OtherDerived>& other);
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
TriangularView& operator=(const TriangularView& other)
|
||||
{ return *this = other.nestedExpression(); }
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void lazyAssign(const TriangularBase<OtherDerived>& other);
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void lazyAssign(const MatrixBase<OtherDerived>& other);
|
||||
|
||||
/** \sa MatrixBase::conjugate() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline TriangularView<MatrixConjugateReturnType,Mode> conjugate()
|
||||
{ return m_matrix.conjugate(); }
|
||||
/** \sa MatrixBase::conjugate() const */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const TriangularView<MatrixConjugateReturnType,Mode> conjugate() const
|
||||
{ return m_matrix.conjugate(); }
|
||||
|
||||
/** \sa MatrixBase::adjoint() const */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const TriangularView<const typename MatrixType::AdjointReturnType,TransposeMode> adjoint() const
|
||||
{ return m_matrix.adjoint(); }
|
||||
|
||||
/** \sa MatrixBase::transpose() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline TriangularView<Transpose<MatrixType>,TransposeMode> transpose()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
|
||||
return m_matrix.const_cast_derived().transpose();
|
||||
}
|
||||
/** \sa MatrixBase::transpose() const */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const TriangularView<Transpose<MatrixType>,TransposeMode> transpose() const
|
||||
{
|
||||
return m_matrix.transpose();
|
||||
@@ -322,7 +278,6 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
|
||||
|
||||
/** Efficient triangular matrix times vector/matrix product */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
TriangularProduct<Mode, true, MatrixType, false, OtherDerived, OtherDerived::ColsAtCompileTime==1>
|
||||
operator*(const MatrixBase<OtherDerived>& rhs) const
|
||||
{
|
||||
@@ -333,7 +288,6 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
|
||||
|
||||
/** Efficient vector/matrix times triangular matrix product */
|
||||
template<typename OtherDerived> friend
|
||||
EIGEN_DEVICE_FUNC
|
||||
TriangularProduct<Mode, false, OtherDerived, OtherDerived::RowsAtCompileTime==1, MatrixType, false>
|
||||
operator*(const MatrixBase<OtherDerived>& lhs, const TriangularView& rhs)
|
||||
{
|
||||
@@ -342,33 +296,56 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
|
||||
(lhs.derived(),rhs.m_matrix);
|
||||
}
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
template<typename OtherDerived>
|
||||
struct eigen2_product_return_type
|
||||
{
|
||||
typedef typename TriangularView<MatrixType,Mode>::DenseMatrixType DenseMatrixType;
|
||||
typedef typename OtherDerived::PlainObject::DenseType OtherPlainObject;
|
||||
typedef typename ProductReturnType<DenseMatrixType, OtherPlainObject>::Type ProdRetType;
|
||||
typedef typename ProdRetType::PlainObject type;
|
||||
};
|
||||
template<typename OtherDerived>
|
||||
const typename eigen2_product_return_type<OtherDerived>::type
|
||||
operator*(const EigenBase<OtherDerived>& rhs) const
|
||||
{
|
||||
typename OtherDerived::PlainObject::DenseType rhsPlainObject;
|
||||
rhs.evalTo(rhsPlainObject);
|
||||
return this->toDenseMatrix() * rhsPlainObject;
|
||||
}
|
||||
template<typename OtherMatrixType>
|
||||
bool isApprox(const TriangularView<OtherMatrixType, Mode>& other, typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) const
|
||||
{
|
||||
return this->toDenseMatrix().isApprox(other.toDenseMatrix(), precision);
|
||||
}
|
||||
template<typename OtherDerived>
|
||||
bool isApprox(const MatrixBase<OtherDerived>& other, typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) const
|
||||
{
|
||||
return this->toDenseMatrix().isApprox(other, precision);
|
||||
}
|
||||
#endif // EIGEN2_SUPPORT
|
||||
|
||||
template<int Side, typename Other>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const internal::triangular_solve_retval<Side,TriangularView, Other>
|
||||
solve(const MatrixBase<Other>& other) const;
|
||||
|
||||
template<int Side, typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void solveInPlace(const MatrixBase<OtherDerived>& other) const;
|
||||
|
||||
template<typename Other>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const internal::triangular_solve_retval<OnTheLeft,TriangularView, Other>
|
||||
solve(const MatrixBase<Other>& other) const
|
||||
{ return solve<OnTheLeft>(other); }
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void solveInPlace(const MatrixBase<OtherDerived>& other) const
|
||||
{ return solveInPlace<OnTheLeft>(other); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const SelfAdjointView<MatrixTypeNestedNonRef,Mode> selfadjointView() const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((Mode&UnitDiag)==0,PROGRAMMING_ERROR);
|
||||
return SelfAdjointView<MatrixTypeNestedNonRef,Mode>(m_matrix);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
SelfAdjointView<MatrixTypeNestedNonRef,Mode> selfadjointView()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((Mode&UnitDiag)==0,PROGRAMMING_ERROR);
|
||||
@@ -376,21 +353,18 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
|
||||
}
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void swap(TriangularBase<OtherDerived> const & other)
|
||||
{
|
||||
TriangularView<SwapWrapper<MatrixType>,Mode>(const_cast<MatrixType&>(m_matrix)).lazyAssign(other.derived());
|
||||
}
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void swap(MatrixBase<OtherDerived> const & other)
|
||||
{
|
||||
SwapWrapper<MatrixType> swaper(const_cast<MatrixType&>(m_matrix));
|
||||
TriangularView<SwapWrapper<MatrixType>,Mode>(swaper).lazyAssign(other.derived());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
Scalar determinant() const
|
||||
{
|
||||
if (Mode & UnitDiag)
|
||||
@@ -403,55 +377,57 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
|
||||
|
||||
// TODO simplify the following:
|
||||
template<typename ProductDerived, typename Lhs, typename Rhs>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE TriangularView& operator=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
|
||||
{
|
||||
setZero();
|
||||
return assignProduct(other,1);
|
||||
return assignProduct(other.derived(),1);
|
||||
}
|
||||
|
||||
template<typename ProductDerived, typename Lhs, typename Rhs>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE TriangularView& operator+=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
|
||||
{
|
||||
return assignProduct(other,1);
|
||||
return assignProduct(other.derived(),1);
|
||||
}
|
||||
|
||||
template<typename ProductDerived, typename Lhs, typename Rhs>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE TriangularView& operator-=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
|
||||
{
|
||||
return assignProduct(other,-1);
|
||||
return assignProduct(other.derived(),-1);
|
||||
}
|
||||
|
||||
|
||||
template<typename ProductDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE TriangularView& operator=(const ScaledProduct<ProductDerived>& other)
|
||||
{
|
||||
setZero();
|
||||
return assignProduct(other,other.alpha());
|
||||
return assignProduct(other.derived(),other.alpha());
|
||||
}
|
||||
|
||||
template<typename ProductDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE TriangularView& operator+=(const ScaledProduct<ProductDerived>& other)
|
||||
{
|
||||
return assignProduct(other,other.alpha());
|
||||
return assignProduct(other.derived(),other.alpha());
|
||||
}
|
||||
|
||||
template<typename ProductDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE TriangularView& operator-=(const ScaledProduct<ProductDerived>& other)
|
||||
{
|
||||
return assignProduct(other,-other.alpha());
|
||||
return assignProduct(other.derived(),-other.alpha());
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
template<typename ProductDerived, typename Lhs, typename Rhs>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE TriangularView& assignProduct(const ProductBase<ProductDerived, Lhs,Rhs>& prod, const Scalar& alpha);
|
||||
|
||||
template<int Mode, bool LhsIsTriangular,
|
||||
typename Lhs, bool LhsIsVector,
|
||||
typename Rhs, bool RhsIsVector>
|
||||
EIGEN_STRONG_INLINE TriangularView& assignProduct(const TriangularProduct<Mode, LhsIsTriangular, Lhs, LhsIsVector, Rhs, RhsIsVector>& prod, const Scalar& alpha)
|
||||
{
|
||||
lazyAssign(alpha*prod.eval());
|
||||
return *this;
|
||||
}
|
||||
|
||||
MatrixTypeNested m_matrix;
|
||||
};
|
||||
@@ -472,7 +448,6 @@ struct triangular_assignment_selector
|
||||
|
||||
typedef typename Derived1::Scalar Scalar;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
triangular_assignment_selector<Derived1, Derived2, Mode, UnrollCount-1, ClearOpposite>::run(dst, src);
|
||||
@@ -501,7 +476,6 @@ struct triangular_assignment_selector
|
||||
template<typename Derived1, typename Derived2, unsigned int Mode, bool ClearOpposite>
|
||||
struct triangular_assignment_selector<Derived1, Derived2, Mode, 0, ClearOpposite>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(Derived1 &, const Derived2 &) {}
|
||||
};
|
||||
|
||||
@@ -510,7 +484,6 @@ struct triangular_assignment_selector<Derived1, Derived2, Upper, Dynamic, ClearO
|
||||
{
|
||||
typedef typename Derived1::Index Index;
|
||||
typedef typename Derived1::Scalar Scalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
for(Index j = 0; j < dst.cols(); ++j)
|
||||
@@ -529,7 +502,6 @@ template<typename Derived1, typename Derived2, bool ClearOpposite>
|
||||
struct triangular_assignment_selector<Derived1, Derived2, Lower, Dynamic, ClearOpposite>
|
||||
{
|
||||
typedef typename Derived1::Index Index;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
for(Index j = 0; j < dst.cols(); ++j)
|
||||
@@ -549,7 +521,6 @@ struct triangular_assignment_selector<Derived1, Derived2, StrictlyUpper, Dynamic
|
||||
{
|
||||
typedef typename Derived1::Index Index;
|
||||
typedef typename Derived1::Scalar Scalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
for(Index j = 0; j < dst.cols(); ++j)
|
||||
@@ -568,7 +539,6 @@ template<typename Derived1, typename Derived2, bool ClearOpposite>
|
||||
struct triangular_assignment_selector<Derived1, Derived2, StrictlyLower, Dynamic, ClearOpposite>
|
||||
{
|
||||
typedef typename Derived1::Index Index;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
for(Index j = 0; j < dst.cols(); ++j)
|
||||
@@ -587,7 +557,6 @@ template<typename Derived1, typename Derived2, bool ClearOpposite>
|
||||
struct triangular_assignment_selector<Derived1, Derived2, UnitUpper, Dynamic, ClearOpposite>
|
||||
{
|
||||
typedef typename Derived1::Index Index;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
for(Index j = 0; j < dst.cols(); ++j)
|
||||
@@ -608,7 +577,6 @@ template<typename Derived1, typename Derived2, bool ClearOpposite>
|
||||
struct triangular_assignment_selector<Derived1, Derived2, UnitLower, Dynamic, ClearOpposite>
|
||||
{
|
||||
typedef typename Derived1::Index Index;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
for(Index j = 0; j < dst.cols(); ++j)
|
||||
@@ -751,6 +719,41 @@ void TriangularBase<Derived>::evalToLazy(MatrixBase<DenseDerived> &other) const
|
||||
* Implementation of MatrixBase methods
|
||||
***************************************************************************/
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
|
||||
// implementation of part<>(), including the SelfAdjoint case.
|
||||
|
||||
namespace internal {
|
||||
template<typename MatrixType, unsigned int Mode>
|
||||
struct eigen2_part_return_type
|
||||
{
|
||||
typedef TriangularView<MatrixType, Mode> type;
|
||||
};
|
||||
|
||||
template<typename MatrixType>
|
||||
struct eigen2_part_return_type<MatrixType, SelfAdjoint>
|
||||
{
|
||||
typedef SelfAdjointView<MatrixType, Upper> type;
|
||||
};
|
||||
}
|
||||
|
||||
/** \deprecated use MatrixBase::triangularView() */
|
||||
template<typename Derived>
|
||||
template<unsigned int Mode>
|
||||
const typename internal::eigen2_part_return_type<Derived, Mode>::type MatrixBase<Derived>::part() const
|
||||
{
|
||||
return derived();
|
||||
}
|
||||
|
||||
/** \deprecated use MatrixBase::triangularView() */
|
||||
template<typename Derived>
|
||||
template<unsigned int Mode>
|
||||
typename internal::eigen2_part_return_type<Derived, Mode>::type MatrixBase<Derived>::part()
|
||||
{
|
||||
return derived();
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \returns an expression of a triangular view extracted from the current matrix
|
||||
*
|
||||
|
||||
@@ -72,7 +72,6 @@ template<typename VectorType, int Size> class VectorBlock
|
||||
|
||||
/** Dynamic-size constructor
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline VectorBlock(VectorType& vector, Index start, Index size)
|
||||
: Base(vector,
|
||||
IsColVector ? start : 0, IsColVector ? 0 : start,
|
||||
@@ -83,7 +82,6 @@ template<typename VectorType, int Size> class VectorBlock
|
||||
|
||||
/** Fixed-size constructor
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline VectorBlock(VectorType& vector, Index start)
|
||||
: Base(vector, IsColVector ? start : 0, IsColVector ? 0 : start)
|
||||
{
|
||||
|
||||
@@ -302,7 +302,6 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
|
||||
/** \returns a row (or column) vector expression of the squared norm
|
||||
* of each column (or row) of the referenced expression.
|
||||
* This is a vector with real entries, even if the original matrix has complex entries.
|
||||
*
|
||||
* Example: \include PartialRedux_squaredNorm.cpp
|
||||
* Output: \verbinclude PartialRedux_squaredNorm.out
|
||||
@@ -313,7 +312,6 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
|
||||
/** \returns a row (or column) vector expression of the norm
|
||||
* of each column (or row) of the referenced expression.
|
||||
* This is a vector with real entries, even if the original matrix has complex entries.
|
||||
*
|
||||
* Example: \include PartialRedux_norm.cpp
|
||||
* Output: \verbinclude PartialRedux_norm.out
|
||||
@@ -325,8 +323,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
|
||||
/** \returns a row (or column) vector expression of the norm
|
||||
* of each column (or row) of the referenced expression, using
|
||||
* Blue's algorithm.
|
||||
* This is a vector with real entries, even if the original matrix has complex entries.
|
||||
* blue's algorithm.
|
||||
*
|
||||
* \sa DenseBase::blueNorm() */
|
||||
const typename ReturnType<internal::member_blueNorm,RealScalar>::Type blueNorm() const
|
||||
@@ -336,7 +333,6 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
/** \returns a row (or column) vector expression of the norm
|
||||
* of each column (or row) of the referenced expression, avoiding
|
||||
* underflow and overflow.
|
||||
* This is a vector with real entries, even if the original matrix has complex entries.
|
||||
*
|
||||
* \sa DenseBase::stableNorm() */
|
||||
const typename ReturnType<internal::member_stableNorm,RealScalar>::Type stableNorm() const
|
||||
@@ -346,7 +342,6 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
/** \returns a row (or column) vector expression of the norm
|
||||
* of each column (or row) of the referenced expression, avoiding
|
||||
* underflow and overflow using a concatenation of hypot() calls.
|
||||
* This is a vector with real entries, even if the original matrix has complex entries.
|
||||
*
|
||||
* \sa DenseBase::hypotNorm() */
|
||||
const typename ReturnType<internal::member_hypotNorm,RealScalar>::Type hypotNorm() const
|
||||
@@ -371,7 +366,6 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
|
||||
/** \returns a row (or column) vector expression representing
|
||||
* whether \b all coefficients of each respective column (or row) are \c true.
|
||||
* This expression can be assigned to a vector with entries of type \c bool.
|
||||
*
|
||||
* \sa DenseBase::all() */
|
||||
const typename ReturnType<internal::member_all>::Type all() const
|
||||
@@ -379,7 +373,6 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
|
||||
/** \returns a row (or column) vector expression representing
|
||||
* whether \b at \b least one coefficient of each respective column (or row) is \c true.
|
||||
* This expression can be assigned to a vector with entries of type \c bool.
|
||||
*
|
||||
* \sa DenseBase::any() */
|
||||
const typename ReturnType<internal::member_any>::Type any() const
|
||||
@@ -387,8 +380,6 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
|
||||
/** \returns a row (or column) vector expression representing
|
||||
* the number of \c true coefficients of each respective column (or row).
|
||||
* This expression can be assigned to a vector whose entries have the same type as is used to
|
||||
* index entries of the original matrix; for dense matrices, this is \c std::ptrdiff_t .
|
||||
*
|
||||
* Example: \include PartialRedux_count.cpp
|
||||
* Output: \verbinclude PartialRedux_count.out
|
||||
@@ -560,7 +551,9 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
|
||||
/////////// Geometry module ///////////
|
||||
|
||||
#if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS
|
||||
Homogeneous<ExpressionType,Direction> homogeneous() const;
|
||||
#endif
|
||||
|
||||
typedef typename ExpressionType::PlainObject CrossReturnType;
|
||||
template<typename OtherDerived>
|
||||
|
||||
@@ -194,7 +194,7 @@ DenseBase<Derived>::minCoeff(IndexType* index) const
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
internal::min_coeff_visitor<Derived> minVisitor;
|
||||
this->visit(minVisitor);
|
||||
*index = IndexType((RowsAtCompileTime==1) ? minVisitor.col : minVisitor.row);
|
||||
*index = (RowsAtCompileTime==1) ? minVisitor.col : minVisitor.row;
|
||||
return minVisitor.res;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +0,0 @@
|
||||
FILE(GLOB Eigen_Core_arch_AVX_SRCS "*.h")
|
||||
|
||||
INSTALL(FILES
|
||||
${Eigen_Core_arch_AVX_SRCS}
|
||||
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/AVX COMPONENT Devel
|
||||
)
|
||||
@@ -1,463 +0,0 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2014 Benoit Steiner (benoit.steiner.goog@gmail.com)
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_COMPLEX_AVX_H
|
||||
#define EIGEN_COMPLEX_AVX_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
//---------- float ----------
|
||||
struct Packet4cf
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet4cf() {}
|
||||
EIGEN_STRONG_INLINE explicit Packet4cf(const __m256& a) : v(a) {}
|
||||
__m256 v;
|
||||
};
|
||||
|
||||
template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
||||
{
|
||||
typedef Packet4cf type;
|
||||
typedef Packet2cf half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size = 4,
|
||||
HasHalfPacket = 1,
|
||||
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasMul = 1,
|
||||
HasDiv = 1,
|
||||
HasNegate = 1,
|
||||
HasAbs = 0,
|
||||
HasAbs2 = 0,
|
||||
HasMin = 0,
|
||||
HasMax = 0,
|
||||
HasSetLinear = 0
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct unpacket_traits<Packet4cf> { typedef std::complex<float> type; enum {size=4}; typedef Packet2cf half; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf padd<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_add_ps(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf psub<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_sub_ps(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf pnegate(const Packet4cf& a)
|
||||
{
|
||||
return Packet4cf(pnegate(a.v));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf pconj(const Packet4cf& a)
|
||||
{
|
||||
const __m256 mask = _mm256_castsi256_ps(_mm256_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000,0x00000000,0x80000000,0x00000000,0x80000000));
|
||||
return Packet4cf(_mm256_xor_ps(a.v,mask));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf pmul<Packet4cf>(const Packet4cf& a, const Packet4cf& b)
|
||||
{
|
||||
__m256 tmp1 = _mm256_mul_ps(_mm256_moveldup_ps(a.v), b.v);
|
||||
__m256 tmp2 = _mm256_mul_ps(_mm256_movehdup_ps(a.v), _mm256_permute_ps(b.v, _MM_SHUFFLE(2,3,0,1)));
|
||||
__m256 result = _mm256_addsub_ps(tmp1, tmp2);
|
||||
return Packet4cf(result);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf pand <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_and_ps(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf por <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_or_ps(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf pxor <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_xor_ps(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf pandnot<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_andnot_ps(a.v,b.v)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf pload <Packet4cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet4cf(pload<Packet8f>(&numext::real_ref(*from))); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf ploadu<Packet4cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet4cf(ploadu<Packet8f>(&numext::real_ref(*from))); }
|
||||
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf pset1<Packet4cf>(const std::complex<float>& from)
|
||||
{
|
||||
return Packet4cf(_mm256_castpd_ps(_mm256_broadcast_sd((const double*)(const void*)&from)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf ploaddup<Packet4cf>(const std::complex<float>* from)
|
||||
{
|
||||
// FIXME The following might be optimized using _mm256_movedup_pd
|
||||
Packet2cf a = ploaddup<Packet2cf>(from);
|
||||
Packet2cf b = ploaddup<Packet2cf>(from+1);
|
||||
return Packet4cf(_mm256_insertf128_ps(_mm256_castps128_ps256(a.v), b.v, 1));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float>* to, const Packet4cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float>* to, const Packet4cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), from.v); }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet4cf pgather<std::complex<float>, Packet4cf>(const std::complex<float>* from, DenseIndex stride)
|
||||
{
|
||||
return Packet4cf(_mm256_set_ps(std::imag(from[3*stride]), std::real(from[3*stride]),
|
||||
std::imag(from[2*stride]), std::real(from[2*stride]),
|
||||
std::imag(from[1*stride]), std::real(from[1*stride]),
|
||||
std::imag(from[0*stride]), std::real(from[0*stride])));
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet4cf>(std::complex<float>* to, const Packet4cf& from, DenseIndex stride)
|
||||
{
|
||||
__m128 low = _mm256_extractf128_ps(from.v, 0);
|
||||
to[stride*0] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(low, low, 0)),
|
||||
_mm_cvtss_f32(_mm_shuffle_ps(low, low, 1)));
|
||||
to[stride*1] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(low, low, 2)),
|
||||
_mm_cvtss_f32(_mm_shuffle_ps(low, low, 3)));
|
||||
|
||||
__m128 high = _mm256_extractf128_ps(from.v, 1);
|
||||
to[stride*2] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(high, high, 0)),
|
||||
_mm_cvtss_f32(_mm_shuffle_ps(high, high, 1)));
|
||||
to[stride*3] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(high, high, 2)),
|
||||
_mm_cvtss_f32(_mm_shuffle_ps(high, high, 3)));
|
||||
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet4cf>(const Packet4cf& a)
|
||||
{
|
||||
return pfirst(Packet2cf(_mm256_castps256_ps128(a.v)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf preverse(const Packet4cf& a) {
|
||||
__m128 low = _mm256_extractf128_ps(a.v, 0);
|
||||
__m128 high = _mm256_extractf128_ps(a.v, 1);
|
||||
__m128d lowd = _mm_castps_pd(low);
|
||||
__m128d highd = _mm_castps_pd(high);
|
||||
low = _mm_castpd_ps(_mm_shuffle_pd(lowd,lowd,0x1));
|
||||
high = _mm_castpd_ps(_mm_shuffle_pd(highd,highd,0x1));
|
||||
__m256 result = _mm256_setzero_ps();
|
||||
result = _mm256_insertf128_ps(result, low, 1);
|
||||
result = _mm256_insertf128_ps(result, high, 0);
|
||||
return Packet4cf(result);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet4cf>(const Packet4cf& a)
|
||||
{
|
||||
return predux(padd(Packet2cf(_mm256_extractf128_ps(a.v,0)),
|
||||
Packet2cf(_mm256_extractf128_ps(a.v,1))));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf preduxp<Packet4cf>(const Packet4cf* vecs)
|
||||
{
|
||||
Packet8f t0 = _mm256_shuffle_ps(vecs[0].v, vecs[0].v, _MM_SHUFFLE(3, 1, 2 ,0));
|
||||
Packet8f t1 = _mm256_shuffle_ps(vecs[1].v, vecs[1].v, _MM_SHUFFLE(3, 1, 2 ,0));
|
||||
t0 = _mm256_hadd_ps(t0,t1);
|
||||
Packet8f t2 = _mm256_shuffle_ps(vecs[2].v, vecs[2].v, _MM_SHUFFLE(3, 1, 2 ,0));
|
||||
Packet8f t3 = _mm256_shuffle_ps(vecs[3].v, vecs[3].v, _MM_SHUFFLE(3, 1, 2 ,0));
|
||||
t2 = _mm256_hadd_ps(t2,t3);
|
||||
|
||||
t1 = _mm256_permute2f128_ps(t0,t2, 0 + (2<<4));
|
||||
t3 = _mm256_permute2f128_ps(t0,t2, 1 + (3<<4));
|
||||
|
||||
return Packet4cf(_mm256_add_ps(t1,t3));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet4cf>(const Packet4cf& a)
|
||||
{
|
||||
return predux_mul(pmul(Packet2cf(_mm256_extractf128_ps(a.v, 0)),
|
||||
Packet2cf(_mm256_extractf128_ps(a.v, 1))));
|
||||
}
|
||||
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet4cf>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Packet4cf& first, const Packet4cf& second)
|
||||
{
|
||||
if (Offset==0) return;
|
||||
palign_impl<Offset*2,Packet8f>::run(first.v, second.v);
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet4cf, Packet4cf, false,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) const
|
||||
{
|
||||
return internal::pmul(a, pconj(b));
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet4cf, Packet4cf, true,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) const
|
||||
{
|
||||
return internal::pmul(pconj(a), b);
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet4cf, Packet4cf, true,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) const
|
||||
{
|
||||
return pconj(internal::pmul(a, b));
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet8f, Packet4cf, false,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet8f& x, const Packet4cf& y, const Packet4cf& c) const
|
||||
{ return padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet8f& x, const Packet4cf& y) const
|
||||
{ return Packet4cf(Eigen::internal::pmul(x, y.v)); }
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet4cf, Packet8f, false,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet8f& y, const Packet4cf& c) const
|
||||
{ return padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& x, const Packet8f& y) const
|
||||
{ return Packet4cf(Eigen::internal::pmul(x.v, y)); }
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf pdiv<Packet4cf>(const Packet4cf& a, const Packet4cf& b)
|
||||
{
|
||||
Packet4cf num = pmul(a, pconj(b));
|
||||
__m256 tmp = _mm256_mul_ps(b.v, b.v);
|
||||
__m256 tmp2 = _mm256_shuffle_ps(tmp,tmp,0xB1);
|
||||
__m256 denom = _mm256_add_ps(tmp, tmp2);
|
||||
return Packet4cf(_mm256_div_ps(num.v, denom));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf pcplxflip<Packet4cf>(const Packet4cf& x)
|
||||
{
|
||||
return Packet4cf(_mm256_shuffle_ps(x.v, x.v, _MM_SHUFFLE(2, 3, 0 ,1)));
|
||||
}
|
||||
|
||||
//---------- double ----------
|
||||
struct Packet2cd
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cd() {}
|
||||
EIGEN_STRONG_INLINE explicit Packet2cd(const __m256d& a) : v(a) {}
|
||||
__m256d v;
|
||||
};
|
||||
|
||||
template<> struct packet_traits<std::complex<double> > : default_packet_traits
|
||||
{
|
||||
typedef Packet2cd type;
|
||||
typedef Packet1cd half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 0,
|
||||
size = 2,
|
||||
HasHalfPacket = 1,
|
||||
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasMul = 1,
|
||||
HasDiv = 1,
|
||||
HasNegate = 1,
|
||||
HasAbs = 0,
|
||||
HasAbs2 = 0,
|
||||
HasMin = 0,
|
||||
HasMax = 0,
|
||||
HasSetLinear = 0
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct unpacket_traits<Packet2cd> { typedef std::complex<double> type; enum {size=2}; typedef Packet1cd half; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd padd<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_add_pd(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd psub<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_sub_pd(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd pnegate(const Packet2cd& a) { return Packet2cd(pnegate(a.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd pconj(const Packet2cd& a)
|
||||
{
|
||||
const __m256d mask = _mm256_castsi256_pd(_mm256_set_epi32(0x80000000,0x0,0x0,0x0,0x80000000,0x0,0x0,0x0));
|
||||
return Packet2cd(_mm256_xor_pd(a.v,mask));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd pmul<Packet2cd>(const Packet2cd& a, const Packet2cd& b)
|
||||
{
|
||||
__m256d tmp1 = _mm256_shuffle_pd(a.v,a.v,0x0);
|
||||
__m256d even = _mm256_mul_pd(tmp1, b.v);
|
||||
__m256d tmp2 = _mm256_shuffle_pd(a.v,a.v,0xF);
|
||||
__m256d tmp3 = _mm256_shuffle_pd(b.v,b.v,0x5);
|
||||
__m256d odd = _mm256_mul_pd(tmp2, tmp3);
|
||||
return Packet2cd(_mm256_addsub_pd(even, odd));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd pand <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_and_pd(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd por <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_or_pd(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd pxor <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_xor_pd(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd pandnot<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_andnot_pd(a.v,b.v)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd pload <Packet2cd>(const std::complex<double>* from)
|
||||
{ EIGEN_DEBUG_ALIGNED_LOAD return Packet2cd(pload<Packet4d>((const double*)from)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd ploadu<Packet2cd>(const std::complex<double>* from)
|
||||
{ EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cd(ploadu<Packet4d>((const double*)from)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd pset1<Packet2cd>(const std::complex<double>& from)
|
||||
{
|
||||
// in case casting to a __m128d* is really not safe, then we can still fallback to this version: (much slower though)
|
||||
// return Packet2cd(_mm256_loadu2_m128d((const double*)&from,(const double*)&from));
|
||||
return Packet2cd(_mm256_broadcast_pd((const __m128d*)(const void*)&from));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd ploaddup<Packet2cd>(const std::complex<double>* from) { return pset1<Packet2cd>(*from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet2cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet2cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet2cd pgather<std::complex<double>, Packet2cd>(const std::complex<double>* from, DenseIndex stride)
|
||||
{
|
||||
return Packet2cd(_mm256_set_pd(std::imag(from[1*stride]), std::real(from[1*stride]),
|
||||
std::imag(from[0*stride]), std::real(from[0*stride])));
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet2cd>(std::complex<double>* to, const Packet2cd& from, DenseIndex stride)
|
||||
{
|
||||
__m128d low = _mm256_extractf128_pd(from.v, 0);
|
||||
to[stride*0] = std::complex<double>(_mm_cvtsd_f64(low), _mm_cvtsd_f64(_mm_shuffle_pd(low, low, 1)));
|
||||
__m128d high = _mm256_extractf128_pd(from.v, 1);
|
||||
to[stride*1] = std::complex<double>(_mm_cvtsd_f64(high), _mm_cvtsd_f64(_mm_shuffle_pd(high, high, 1)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet2cd>(const Packet2cd& a)
|
||||
{
|
||||
__m128d low = _mm256_extractf128_pd(a.v, 0);
|
||||
EIGEN_ALIGN16 double res[2];
|
||||
_mm_store_pd(res, low);
|
||||
return std::complex<double>(res[0],res[1]);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd preverse(const Packet2cd& a) {
|
||||
__m256d result = _mm256_permute2f128_pd(a.v, a.v, 1);
|
||||
return Packet2cd(result);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet2cd>(const Packet2cd& a)
|
||||
{
|
||||
return predux(padd(Packet1cd(_mm256_extractf128_pd(a.v,0)),
|
||||
Packet1cd(_mm256_extractf128_pd(a.v,1))));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd preduxp<Packet2cd>(const Packet2cd* vecs)
|
||||
{
|
||||
Packet4d t0 = _mm256_permute2f128_pd(vecs[0].v,vecs[1].v, 0 + (2<<4));
|
||||
Packet4d t1 = _mm256_permute2f128_pd(vecs[0].v,vecs[1].v, 1 + (3<<4));
|
||||
|
||||
return Packet2cd(_mm256_add_pd(t0,t1));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet2cd>(const Packet2cd& a)
|
||||
{
|
||||
return predux(pmul(Packet1cd(_mm256_extractf128_pd(a.v,0)),
|
||||
Packet1cd(_mm256_extractf128_pd(a.v,1))));
|
||||
}
|
||||
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet2cd>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Packet2cd& first, const Packet2cd& second)
|
||||
{
|
||||
if (Offset==0) return;
|
||||
palign_impl<Offset*2,Packet4d>::run(first.v, second.v);
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet2cd, Packet2cd, false,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) const
|
||||
{
|
||||
return internal::pmul(a, pconj(b));
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet2cd, Packet2cd, true,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) const
|
||||
{
|
||||
return internal::pmul(pconj(a), b);
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet2cd, Packet2cd, true,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) const
|
||||
{
|
||||
return pconj(internal::pmul(a, b));
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet4d, Packet2cd, false,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet4d& x, const Packet2cd& y, const Packet2cd& c) const
|
||||
{ return padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet4d& x, const Packet2cd& y) const
|
||||
{ return Packet2cd(Eigen::internal::pmul(x, y.v)); }
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet2cd, Packet4d, false,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet4d& y, const Packet2cd& c) const
|
||||
{ return padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& x, const Packet4d& y) const
|
||||
{ return Packet2cd(Eigen::internal::pmul(x.v, y)); }
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd pdiv<Packet2cd>(const Packet2cd& a, const Packet2cd& b)
|
||||
{
|
||||
Packet2cd num = pmul(a, pconj(b));
|
||||
__m256d tmp = _mm256_mul_pd(b.v, b.v);
|
||||
__m256d denom = _mm256_hadd_pd(tmp, tmp);
|
||||
return Packet2cd(_mm256_div_pd(num.v, denom));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd pcplxflip<Packet2cd>(const Packet2cd& x)
|
||||
{
|
||||
return Packet2cd(_mm256_shuffle_pd(x.v, x.v, 0x5));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet4cf,4>& kernel) {
|
||||
__m256d P0 = _mm256_castps_pd(kernel.packet[0].v);
|
||||
__m256d P1 = _mm256_castps_pd(kernel.packet[1].v);
|
||||
__m256d P2 = _mm256_castps_pd(kernel.packet[2].v);
|
||||
__m256d P3 = _mm256_castps_pd(kernel.packet[3].v);
|
||||
|
||||
__m256d T0 = _mm256_shuffle_pd(P0, P1, 15);
|
||||
__m256d T1 = _mm256_shuffle_pd(P0, P1, 0);
|
||||
__m256d T2 = _mm256_shuffle_pd(P2, P3, 15);
|
||||
__m256d T3 = _mm256_shuffle_pd(P2, P3, 0);
|
||||
|
||||
kernel.packet[1].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T0, T2, 32));
|
||||
kernel.packet[3].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T0, T2, 49));
|
||||
kernel.packet[0].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T1, T3, 32));
|
||||
kernel.packet[2].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T1, T3, 49));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet2cd,2>& kernel) {
|
||||
__m256d tmp = _mm256_permute2f128_pd(kernel.packet[0].v, kernel.packet[1].v, 0+(2<<4));
|
||||
kernel.packet[1].v = _mm256_permute2f128_pd(kernel.packet[0].v, kernel.packet[1].v, 1+(3<<4));
|
||||
kernel.packet[0].v = tmp;
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_COMPLEX_AVX_H
|
||||
@@ -1,564 +0,0 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2014 Benoit Steiner (benoit.steiner.goog@gmail.com)
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_PACKET_MATH_AVX_H
|
||||
#define EIGEN_PACKET_MATH_AVX_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
||||
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
|
||||
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_FMA
|
||||
#ifndef EIGEN_HAS_FUSED_MADD
|
||||
#define EIGEN_HAS_FUSED_MADD 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
typedef __m256 Packet8f;
|
||||
typedef __m256i Packet8i;
|
||||
typedef __m256d Packet4d;
|
||||
|
||||
template<> struct is_arithmetic<__m256> { enum { value = true }; };
|
||||
template<> struct is_arithmetic<__m256i> { enum { value = true }; };
|
||||
template<> struct is_arithmetic<__m256d> { enum { value = true }; };
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet8f(NAME,X) \
|
||||
const Packet8f p8f_##NAME = pset1<Packet8f>(X)
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet4d(NAME,X) \
|
||||
const Packet4d p4d_##NAME = pset1<Packet4d>(X)
|
||||
|
||||
|
||||
template<> struct packet_traits<float> : default_packet_traits
|
||||
{
|
||||
typedef Packet8f type;
|
||||
typedef Packet4f half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size=8,
|
||||
HasHalfPacket = 1,
|
||||
|
||||
HasDiv = 1,
|
||||
HasSin = 0,
|
||||
HasCos = 0,
|
||||
HasLog = 0,
|
||||
HasExp = 0,
|
||||
HasSqrt = 0
|
||||
};
|
||||
};
|
||||
template<> struct packet_traits<double> : default_packet_traits
|
||||
{
|
||||
typedef Packet4d type;
|
||||
typedef Packet2d half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size=4,
|
||||
HasHalfPacket = 1,
|
||||
|
||||
HasDiv = 1,
|
||||
HasExp = 0
|
||||
};
|
||||
};
|
||||
|
||||
/* Proper support for integers is only provided by AVX2. In the meantime, we'll
|
||||
use SSE instructions and packets to deal with integers.
|
||||
template<> struct packet_traits<int> : default_packet_traits
|
||||
{
|
||||
typedef Packet8i type;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size=8
|
||||
};
|
||||
};
|
||||
*/
|
||||
|
||||
template<> struct unpacket_traits<Packet8f> { typedef float type; typedef Packet4f half; enum {size=8}; };
|
||||
template<> struct unpacket_traits<Packet4d> { typedef double type; typedef Packet2d half; enum {size=4}; };
|
||||
template<> struct unpacket_traits<Packet8i> { typedef int type; typedef Packet4i half; enum {size=8}; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pset1<Packet8f>(const float& from) { return _mm256_set1_ps(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pset1<Packet4d>(const double& from) { return _mm256_set1_pd(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet8i pset1<Packet8i>(const int& from) { return _mm256_set1_epi32(from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pload1<Packet8f>(const float* from) { return _mm256_broadcast_ss(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pload1<Packet4d>(const double* from) { return _mm256_broadcast_sd(from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f plset<float>(const float& a) { return _mm256_add_ps(_mm256_set1_ps(a), _mm256_set_ps(7.0,6.0,5.0,4.0,3.0,2.0,1.0,0.0)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d plset<double>(const double& a) { return _mm256_add_pd(_mm256_set1_pd(a), _mm256_set_pd(3.0,2.0,1.0,0.0)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f padd<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_add_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d padd<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_add_pd(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f psub<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_sub_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d psub<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_sub_pd(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pnegate(const Packet8f& a)
|
||||
{
|
||||
return _mm256_sub_ps(_mm256_set1_ps(0.0),a);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pnegate(const Packet4d& a)
|
||||
{
|
||||
return _mm256_sub_pd(_mm256_set1_pd(0.0),a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pconj(const Packet8f& a) { return a; }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pconj(const Packet4d& a) { return a; }
|
||||
template<> EIGEN_STRONG_INLINE Packet8i pconj(const Packet8i& a) { return a; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pmul<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_mul_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pmul<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_mul_pd(a,b); }
|
||||
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pdiv<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_div_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pdiv<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_div_pd(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet8i pdiv<Packet8i>(const Packet8i& /*a*/, const Packet8i& /*b*/)
|
||||
{ eigen_assert(false && "packet integer division are not supported by AVX");
|
||||
return pset1<Packet8i>(0);
|
||||
}
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_FMA
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) {
|
||||
#if defined(__clang__) || defined(__GNUC__)
|
||||
// clang stupidly generates a vfmadd213ps instruction plus some vmovaps on registers,
|
||||
// and gcc stupidly generates a vfmadd132ps instruction,
|
||||
// so let's enforce it to generate a vfmadd231ps instruction since the most common use case is to accumulate
|
||||
// the result of the product.
|
||||
Packet8f res = c;
|
||||
__asm__("vfmadd231ps %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
|
||||
return res;
|
||||
#else
|
||||
return _mm256_fmadd_ps(a,b,c);
|
||||
#endif
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) {
|
||||
#if defined(__clang__) || defined(__GNUC__)
|
||||
// see above
|
||||
Packet4d res = c;
|
||||
__asm__("vfmadd231pd %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
|
||||
return res;
|
||||
#else
|
||||
return _mm256_fmadd_pd(a,b,c);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pmin<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_min_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pmin<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_min_pd(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pmax<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_max_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pmax<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_max_pd(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pand<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_and_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pand<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_and_pd(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f por<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_or_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d por<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_or_pd(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pxor<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_xor_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pxor<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_xor_pd(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pandnot<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_andnot_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pandnot<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_andnot_pd(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pload<Packet8f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_ps(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pload<Packet4d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_pd(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet8i pload<Packet8i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_si256(reinterpret_cast<const __m256i*>(from)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f ploadu<Packet8f>(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_ps(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d ploadu<Packet4d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_pd(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet8i ploadu<Packet8i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_si256(reinterpret_cast<const __m256i*>(from)); }
|
||||
|
||||
// Loads 4 floats from memory a returns the packet {a0, a0 a1, a1, a2, a2, a3, a3}
|
||||
template<> EIGEN_STRONG_INLINE Packet8f ploaddup<Packet8f>(const float* from)
|
||||
{
|
||||
// TODO try to find a way to avoid the need of a temporary register
|
||||
// Packet8f tmp = _mm256_castps128_ps256(_mm_loadu_ps(from));
|
||||
// tmp = _mm256_insertf128_ps(tmp, _mm_movehl_ps(_mm256_castps256_ps128(tmp),_mm256_castps256_ps128(tmp)), 1);
|
||||
// return _mm256_unpacklo_ps(tmp,tmp);
|
||||
|
||||
// _mm256_insertf128_ps is very slow on Haswell, thus:
|
||||
Packet8f tmp = _mm256_broadcast_ps((const __m128*)(const void*)from);
|
||||
// mimic an "inplace" permutation of the lower 128bits using a blend
|
||||
tmp = _mm256_blend_ps(tmp,_mm256_castps128_ps256(_mm_permute_ps( _mm256_castps256_ps128(tmp), _MM_SHUFFLE(1,0,1,0))), 15);
|
||||
// then we can perform a consistent permutation on the global register to get everything in shape:
|
||||
return _mm256_permute_ps(tmp, _MM_SHUFFLE(3,3,2,2));
|
||||
}
|
||||
// Loads 2 doubles from memory a returns the packet {a0, a0 a1, a1}
|
||||
template<> EIGEN_STRONG_INLINE Packet4d ploaddup<Packet4d>(const double* from)
|
||||
{
|
||||
Packet4d tmp = _mm256_broadcast_pd((const __m128d*)(const void*)from);
|
||||
return _mm256_permute_pd(tmp, 3<<2);
|
||||
}
|
||||
|
||||
// Loads 2 floats from memory a returns the packet {a0, a0 a0, a0, a1, a1, a1, a1}
|
||||
template<> EIGEN_STRONG_INLINE Packet8f ploadquad<Packet8f>(const float* from)
|
||||
{
|
||||
Packet8f tmp = _mm256_castps128_ps256(_mm_broadcast_ss(from));
|
||||
return _mm256_insertf128_ps(tmp, _mm_broadcast_ss(from+1), 1);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet8f& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_store_ps(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet4d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_store_pd(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet8i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_storeu_si256(reinterpret_cast<__m256i*>(to), from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet8f& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_ps(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet4d& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_pd(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet8i& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_si256(reinterpret_cast<__m256i*>(to), from); }
|
||||
|
||||
// NOTE: leverage _mm256_i32gather_ps and _mm256_i32gather_pd if AVX2 instructions are available
|
||||
// NOTE: for the record the following seems to be slower: return _mm256_i32gather_ps(from, _mm256_set1_epi32(stride), 4);
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet8f pgather<float, Packet8f>(const float* from, DenseIndex stride)
|
||||
{
|
||||
return _mm256_set_ps(from[7*stride], from[6*stride], from[5*stride], from[4*stride],
|
||||
from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet4d pgather<double, Packet4d>(const double* from, DenseIndex stride)
|
||||
{
|
||||
return _mm256_set_pd(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet8f>(float* to, const Packet8f& from, DenseIndex stride)
|
||||
{
|
||||
__m128 low = _mm256_extractf128_ps(from, 0);
|
||||
to[stride*0] = _mm_cvtss_f32(low);
|
||||
to[stride*1] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 1));
|
||||
to[stride*2] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 2));
|
||||
to[stride*3] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 3));
|
||||
|
||||
__m128 high = _mm256_extractf128_ps(from, 1);
|
||||
to[stride*4] = _mm_cvtss_f32(high);
|
||||
to[stride*5] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 1));
|
||||
to[stride*6] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 2));
|
||||
to[stride*7] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 3));
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet4d>(double* to, const Packet4d& from, DenseIndex stride)
|
||||
{
|
||||
__m128d low = _mm256_extractf128_pd(from, 0);
|
||||
to[stride*0] = _mm_cvtsd_f64(low);
|
||||
to[stride*1] = _mm_cvtsd_f64(_mm_shuffle_pd(low, low, 1));
|
||||
__m128d high = _mm256_extractf128_pd(from, 1);
|
||||
to[stride*2] = _mm_cvtsd_f64(high);
|
||||
to[stride*3] = _mm_cvtsd_f64(_mm_shuffle_pd(high, high, 1));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstore1<Packet8f>(float* to, const float& a)
|
||||
{
|
||||
Packet8f pa = pset1<Packet8f>(a);
|
||||
pstore(to, pa);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE void pstore1<Packet4d>(double* to, const double& a)
|
||||
{
|
||||
Packet4d pa = pset1<Packet4d>(a);
|
||||
pstore(to, pa);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE void pstore1<Packet8i>(int* to, const int& a)
|
||||
{
|
||||
Packet8i pa = pset1<Packet8i>(a);
|
||||
pstore(to, pa);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE float pfirst<Packet8f>(const Packet8f& a) {
|
||||
return _mm_cvtss_f32(_mm256_castps256_ps128(a));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE double pfirst<Packet4d>(const Packet4d& a) {
|
||||
return _mm_cvtsd_f64(_mm256_castpd256_pd128(a));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE int pfirst<Packet8i>(const Packet8i& a) {
|
||||
return _mm_cvtsi128_si32(_mm256_castsi256_si128(a));
|
||||
}
|
||||
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f preverse(const Packet8f& a)
|
||||
{
|
||||
__m256 tmp = _mm256_shuffle_ps(a,a,0x1b);
|
||||
return _mm256_permute2f128_ps(tmp, tmp, 1);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4d preverse(const Packet4d& a)
|
||||
{
|
||||
__m256d tmp = _mm256_shuffle_pd(a,a,5);
|
||||
return _mm256_permute2f128_pd(tmp, tmp, 1);
|
||||
|
||||
__m256d swap_halves = _mm256_permute2f128_pd(a,a,1);
|
||||
return _mm256_permute_pd(swap_halves,5);
|
||||
}
|
||||
|
||||
// pabs should be ok
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pabs(const Packet8f& a)
|
||||
{
|
||||
const Packet8f mask = _mm256_castsi256_ps(_mm256_setr_epi32(0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF));
|
||||
return _mm256_and_ps(a,mask);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pabs(const Packet4d& a)
|
||||
{
|
||||
const Packet4d mask = _mm256_castsi256_pd(_mm256_setr_epi32(0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF));
|
||||
return _mm256_and_pd(a,mask);
|
||||
}
|
||||
|
||||
// preduxp should be ok
|
||||
// FIXME: why is this ok? why isn't the simply implementation working as expected?
|
||||
template<> EIGEN_STRONG_INLINE Packet8f preduxp<Packet8f>(const Packet8f* vecs)
|
||||
{
|
||||
__m256 hsum1 = _mm256_hadd_ps(vecs[0], vecs[1]);
|
||||
__m256 hsum2 = _mm256_hadd_ps(vecs[2], vecs[3]);
|
||||
__m256 hsum3 = _mm256_hadd_ps(vecs[4], vecs[5]);
|
||||
__m256 hsum4 = _mm256_hadd_ps(vecs[6], vecs[7]);
|
||||
|
||||
__m256 hsum5 = _mm256_hadd_ps(hsum1, hsum1);
|
||||
__m256 hsum6 = _mm256_hadd_ps(hsum2, hsum2);
|
||||
__m256 hsum7 = _mm256_hadd_ps(hsum3, hsum3);
|
||||
__m256 hsum8 = _mm256_hadd_ps(hsum4, hsum4);
|
||||
|
||||
__m256 perm1 = _mm256_permute2f128_ps(hsum5, hsum5, 0x23);
|
||||
__m256 perm2 = _mm256_permute2f128_ps(hsum6, hsum6, 0x23);
|
||||
__m256 perm3 = _mm256_permute2f128_ps(hsum7, hsum7, 0x23);
|
||||
__m256 perm4 = _mm256_permute2f128_ps(hsum8, hsum8, 0x23);
|
||||
|
||||
__m256 sum1 = _mm256_add_ps(perm1, hsum5);
|
||||
__m256 sum2 = _mm256_add_ps(perm2, hsum6);
|
||||
__m256 sum3 = _mm256_add_ps(perm3, hsum7);
|
||||
__m256 sum4 = _mm256_add_ps(perm4, hsum8);
|
||||
|
||||
__m256 blend1 = _mm256_blend_ps(sum1, sum2, 0xcc);
|
||||
__m256 blend2 = _mm256_blend_ps(sum3, sum4, 0xcc);
|
||||
|
||||
__m256 final = _mm256_blend_ps(blend1, blend2, 0xf0);
|
||||
return final;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4d preduxp<Packet4d>(const Packet4d* vecs)
|
||||
{
|
||||
Packet4d tmp0, tmp1;
|
||||
|
||||
tmp0 = _mm256_hadd_pd(vecs[0], vecs[1]);
|
||||
tmp0 = _mm256_add_pd(tmp0, _mm256_permute2f128_pd(tmp0, tmp0, 1));
|
||||
|
||||
tmp1 = _mm256_hadd_pd(vecs[2], vecs[3]);
|
||||
tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1));
|
||||
|
||||
return _mm256_blend_pd(tmp0, tmp1, 0xC);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE float predux<Packet8f>(const Packet8f& a)
|
||||
{
|
||||
Packet8f tmp0 = _mm256_hadd_ps(a,_mm256_permute2f128_ps(a,a,1));
|
||||
tmp0 = _mm256_hadd_ps(tmp0,tmp0);
|
||||
return pfirst(_mm256_hadd_ps(tmp0, tmp0));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE double predux<Packet4d>(const Packet4d& a)
|
||||
{
|
||||
Packet4d tmp0 = _mm256_hadd_pd(a,_mm256_permute2f128_pd(a,a,1));
|
||||
return pfirst(_mm256_hadd_pd(tmp0,tmp0));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f predux4<Packet8f>(const Packet8f& a)
|
||||
{
|
||||
return _mm_add_ps(_mm256_castps256_ps128(a),_mm256_extractf128_ps(a,1));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE float predux_mul<Packet8f>(const Packet8f& a)
|
||||
{
|
||||
Packet8f tmp;
|
||||
tmp = _mm256_mul_ps(a, _mm256_permute2f128_ps(a,a,1));
|
||||
tmp = _mm256_mul_ps(tmp, _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(1,0,3,2)));
|
||||
return pfirst(_mm256_mul_ps(tmp, _mm256_shuffle_ps(tmp,tmp,1)));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE double predux_mul<Packet4d>(const Packet4d& a)
|
||||
{
|
||||
Packet4d tmp;
|
||||
tmp = _mm256_mul_pd(a, _mm256_permute2f128_pd(a,a,1));
|
||||
return pfirst(_mm256_mul_pd(tmp, _mm256_shuffle_pd(tmp,tmp,1)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE float predux_min<Packet8f>(const Packet8f& a)
|
||||
{
|
||||
Packet8f tmp = _mm256_min_ps(a, _mm256_permute2f128_ps(a,a,1));
|
||||
tmp = _mm256_min_ps(tmp, _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(1,0,3,2)));
|
||||
return pfirst(_mm256_min_ps(tmp, _mm256_shuffle_ps(tmp,tmp,1)));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE double predux_min<Packet4d>(const Packet4d& a)
|
||||
{
|
||||
Packet4d tmp = _mm256_min_pd(a, _mm256_permute2f128_pd(a,a,1));
|
||||
return pfirst(_mm256_min_pd(tmp, _mm256_shuffle_pd(tmp, tmp, 1)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE float predux_max<Packet8f>(const Packet8f& a)
|
||||
{
|
||||
Packet8f tmp = _mm256_max_ps(a, _mm256_permute2f128_ps(a,a,1));
|
||||
tmp = _mm256_max_ps(tmp, _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(1,0,3,2)));
|
||||
return pfirst(_mm256_max_ps(tmp, _mm256_shuffle_ps(tmp,tmp,1)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE double predux_max<Packet4d>(const Packet4d& a)
|
||||
{
|
||||
Packet4d tmp = _mm256_max_pd(a, _mm256_permute2f128_pd(a,a,1));
|
||||
return pfirst(_mm256_max_pd(tmp, _mm256_shuffle_pd(tmp, tmp, 1)));
|
||||
}
|
||||
|
||||
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet8f>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Packet8f& first, const Packet8f& second)
|
||||
{
|
||||
if (Offset==1)
|
||||
{
|
||||
first = _mm256_blend_ps(first, second, 1);
|
||||
Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(0,3,2,1));
|
||||
first = _mm256_blend_ps(tmp, _mm256_permute2f128_ps (tmp, tmp, 1), 0x88);
|
||||
}
|
||||
else if (Offset==2)
|
||||
{
|
||||
first = _mm256_blend_ps(first, second, 3);
|
||||
Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(1,0,3,2));
|
||||
first = _mm256_blend_ps(tmp, _mm256_permute2f128_ps (tmp, tmp, 1), 0xcc);
|
||||
}
|
||||
else if (Offset==3)
|
||||
{
|
||||
first = _mm256_blend_ps(first, second, 7);
|
||||
Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(2,1,0,3));
|
||||
first = _mm256_blend_ps(tmp, _mm256_permute2f128_ps (tmp, tmp, 1), 0xee);
|
||||
}
|
||||
else if (Offset==4)
|
||||
{
|
||||
first = _mm256_blend_ps(first, second, 15);
|
||||
Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(3,2,1,0));
|
||||
first = _mm256_permute_ps(_mm256_permute2f128_ps (tmp, tmp, 1), _MM_SHUFFLE(3,2,1,0));
|
||||
}
|
||||
else if (Offset==5)
|
||||
{
|
||||
first = _mm256_blend_ps(first, second, 31);
|
||||
first = _mm256_permute2f128_ps(first, first, 1);
|
||||
Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(0,3,2,1));
|
||||
first = _mm256_permute2f128_ps(tmp, tmp, 1);
|
||||
first = _mm256_blend_ps(tmp, first, 0x88);
|
||||
}
|
||||
else if (Offset==6)
|
||||
{
|
||||
first = _mm256_blend_ps(first, second, 63);
|
||||
first = _mm256_permute2f128_ps(first, first, 1);
|
||||
Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(1,0,3,2));
|
||||
first = _mm256_permute2f128_ps(tmp, tmp, 1);
|
||||
first = _mm256_blend_ps(tmp, first, 0xcc);
|
||||
}
|
||||
else if (Offset==7)
|
||||
{
|
||||
first = _mm256_blend_ps(first, second, 127);
|
||||
first = _mm256_permute2f128_ps(first, first, 1);
|
||||
Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(2,1,0,3));
|
||||
first = _mm256_permute2f128_ps(tmp, tmp, 1);
|
||||
first = _mm256_blend_ps(tmp, first, 0xee);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet4d>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Packet4d& first, const Packet4d& second)
|
||||
{
|
||||
if (Offset==1)
|
||||
{
|
||||
first = _mm256_blend_pd(first, second, 1);
|
||||
__m256d tmp = _mm256_permute_pd(first, 5);
|
||||
first = _mm256_permute2f128_pd(tmp, tmp, 1);
|
||||
first = _mm256_blend_pd(tmp, first, 0xA);
|
||||
}
|
||||
else if (Offset==2)
|
||||
{
|
||||
first = _mm256_blend_pd(first, second, 3);
|
||||
first = _mm256_permute2f128_pd(first, first, 1);
|
||||
}
|
||||
else if (Offset==3)
|
||||
{
|
||||
first = _mm256_blend_pd(first, second, 7);
|
||||
__m256d tmp = _mm256_permute_pd(first, 5);
|
||||
first = _mm256_permute2f128_pd(tmp, tmp, 1);
|
||||
first = _mm256_blend_pd(tmp, first, 5);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet8f,8>& kernel) {
|
||||
__m256 T0 = _mm256_unpacklo_ps(kernel.packet[0], kernel.packet[1]);
|
||||
__m256 T1 = _mm256_unpackhi_ps(kernel.packet[0], kernel.packet[1]);
|
||||
__m256 T2 = _mm256_unpacklo_ps(kernel.packet[2], kernel.packet[3]);
|
||||
__m256 T3 = _mm256_unpackhi_ps(kernel.packet[2], kernel.packet[3]);
|
||||
__m256 T4 = _mm256_unpacklo_ps(kernel.packet[4], kernel.packet[5]);
|
||||
__m256 T5 = _mm256_unpackhi_ps(kernel.packet[4], kernel.packet[5]);
|
||||
__m256 T6 = _mm256_unpacklo_ps(kernel.packet[6], kernel.packet[7]);
|
||||
__m256 T7 = _mm256_unpackhi_ps(kernel.packet[6], kernel.packet[7]);
|
||||
__m256 S0 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(1,0,1,0));
|
||||
__m256 S1 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(3,2,3,2));
|
||||
__m256 S2 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(1,0,1,0));
|
||||
__m256 S3 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(3,2,3,2));
|
||||
__m256 S4 = _mm256_shuffle_ps(T4,T6,_MM_SHUFFLE(1,0,1,0));
|
||||
__m256 S5 = _mm256_shuffle_ps(T4,T6,_MM_SHUFFLE(3,2,3,2));
|
||||
__m256 S6 = _mm256_shuffle_ps(T5,T7,_MM_SHUFFLE(1,0,1,0));
|
||||
__m256 S7 = _mm256_shuffle_ps(T5,T7,_MM_SHUFFLE(3,2,3,2));
|
||||
kernel.packet[0] = _mm256_permute2f128_ps(S0, S4, 0x20);
|
||||
kernel.packet[1] = _mm256_permute2f128_ps(S1, S5, 0x20);
|
||||
kernel.packet[2] = _mm256_permute2f128_ps(S2, S6, 0x20);
|
||||
kernel.packet[3] = _mm256_permute2f128_ps(S3, S7, 0x20);
|
||||
kernel.packet[4] = _mm256_permute2f128_ps(S0, S4, 0x31);
|
||||
kernel.packet[5] = _mm256_permute2f128_ps(S1, S5, 0x31);
|
||||
kernel.packet[6] = _mm256_permute2f128_ps(S2, S6, 0x31);
|
||||
kernel.packet[7] = _mm256_permute2f128_ps(S3, S7, 0x31);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet8f,4>& kernel) {
|
||||
__m256 T0 = _mm256_unpacklo_ps(kernel.packet[0], kernel.packet[1]);
|
||||
__m256 T1 = _mm256_unpackhi_ps(kernel.packet[0], kernel.packet[1]);
|
||||
__m256 T2 = _mm256_unpacklo_ps(kernel.packet[2], kernel.packet[3]);
|
||||
__m256 T3 = _mm256_unpackhi_ps(kernel.packet[2], kernel.packet[3]);
|
||||
|
||||
__m256 S0 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(1,0,1,0));
|
||||
__m256 S1 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(3,2,3,2));
|
||||
__m256 S2 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(1,0,1,0));
|
||||
__m256 S3 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(3,2,3,2));
|
||||
|
||||
kernel.packet[0] = _mm256_permute2f128_ps(S0, S1, 0x20);
|
||||
kernel.packet[1] = _mm256_permute2f128_ps(S2, S3, 0x20);
|
||||
kernel.packet[2] = _mm256_permute2f128_ps(S0, S1, 0x31);
|
||||
kernel.packet[3] = _mm256_permute2f128_ps(S2, S3, 0x31);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet4d,4>& kernel) {
|
||||
__m256d T0 = _mm256_shuffle_pd(kernel.packet[0], kernel.packet[1], 15);
|
||||
__m256d T1 = _mm256_shuffle_pd(kernel.packet[0], kernel.packet[1], 0);
|
||||
__m256d T2 = _mm256_shuffle_pd(kernel.packet[2], kernel.packet[3], 15);
|
||||
__m256d T3 = _mm256_shuffle_pd(kernel.packet[2], kernel.packet[3], 0);
|
||||
|
||||
kernel.packet[1] = _mm256_permute2f128_pd(T0, T2, 32);
|
||||
kernel.packet[3] = _mm256_permute2f128_pd(T0, T2, 49);
|
||||
kernel.packet[0] = _mm256_permute2f128_pd(T1, T3, 32);
|
||||
kernel.packet[2] = _mm256_permute2f128_pd(T1, T3, 49);
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_PACKET_MATH_AVX_H
|
||||
@@ -16,14 +16,11 @@ namespace internal {
|
||||
|
||||
static Packet4ui p4ui_CONJ_XOR = vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_ZERO_);//{ 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
|
||||
static Packet16uc p16uc_COMPLEX_RE = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };
|
||||
static Packet16uc p16uc_COMPLEX_IM = vec_sld(p16uc_DUPLICATE, (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 };
|
||||
static Packet16uc p16uc_COMPLEX_IM = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 1), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 };
|
||||
static Packet16uc p16uc_COMPLEX_REV = vec_sld(p16uc_REVERSE, p16uc_REVERSE, 8);//{ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11 };
|
||||
static Packet16uc p16uc_COMPLEX_REV2 = vec_sld(p16uc_FORWARD, p16uc_FORWARD, 8);//{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
|
||||
static Packet16uc p16uc_PSET_HI = (Packet16uc) vec_mergeh((Packet4ui)p16uc_COMPLEX_RE, (Packet4ui)p16uc_COMPLEX_IM);//{ 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 };
|
||||
static Packet16uc p16uc_PSET_LO = (Packet16uc) vec_mergel((Packet4ui)p16uc_COMPLEX_RE, (Packet4ui)p16uc_COMPLEX_IM);//{ 8,9,10,11, 12,13,14,15, 8,9,10,11, 12,13,14,15 };
|
||||
static Packet16uc p16uc_COMPLEX_MASK16 = vec_sld((Packet16uc)p4i_ZERO, vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 3), 8);//{ 0,0,0,0, 0,0,0,0, 16,16,16,16, 16,16,16,16};
|
||||
static Packet16uc p16uc_COMPLEX_TRANSPOSE_0 = vec_add(p16uc_PSET_HI, p16uc_COMPLEX_MASK16);//{ 0,1,2,3, 4,5,6,7, 16,17,18,19, 20,21,22,23};
|
||||
static Packet16uc p16uc_COMPLEX_TRANSPOSE_1 = vec_add(p16uc_PSET_LO, p16uc_COMPLEX_MASK16);//{ 8,9,10,11, 12,13,14,15, 24,25,26,27, 28,29,30,31};
|
||||
static Packet16uc p16uc_PSET_HI = (Packet16uc) vec_mergeh((Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 1));//{ 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 };
|
||||
static Packet16uc p16uc_PSET_LO = (Packet16uc) vec_mergeh((Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 2), (Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 3));//{ 8,9,10,11, 12,13,14,15, 8,9,10,11, 12,13,14,15 };
|
||||
|
||||
//---------- float ----------
|
||||
struct Packet2cf
|
||||
@@ -36,7 +33,6 @@ struct Packet2cf
|
||||
template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
||||
{
|
||||
typedef Packet2cf type;
|
||||
typedef Packet2cf half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
@@ -55,7 +51,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; typedef Packet2cf half; };
|
||||
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
|
||||
{
|
||||
@@ -69,22 +65,6 @@ template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<flo
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, DenseIndex stride)
|
||||
{
|
||||
std::complex<float> EIGEN_ALIGN16 af[2];
|
||||
af[0] = from[0*stride];
|
||||
af[1] = from[1*stride];
|
||||
return Packet2cf(vec_ld(0, (const float*)af));
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, DenseIndex stride)
|
||||
{
|
||||
std::complex<float> EIGEN_ALIGN16 af[2];
|
||||
vec_st(from.v, 0, (float*)af);
|
||||
to[0*stride] = af[0];
|
||||
to[1*stride] = af[1];
|
||||
}
|
||||
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_add(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_sub(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(a.v)); }
|
||||
@@ -230,13 +210,6 @@ template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& x
|
||||
return Packet2cf(vec_perm(x.v, x.v, p16uc_COMPLEX_REV));
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel)
|
||||
{
|
||||
Packet4f tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_COMPLEX_TRANSPOSE_0);
|
||||
kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_COMPLEX_TRANSPOSE_1);
|
||||
kernel.packet[0].v = tmp;
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
132
Eigen/src/Core/arch/AltiVec/PacketMath.h
Executable file → Normal file
132
Eigen/src/Core/arch/AltiVec/PacketMath.h
Executable file → Normal file
@@ -1,7 +1,7 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008-2014 Konstantinos Margaritis <markos@freevec.org>
|
||||
// Copyright (C) 2008 Konstantinos Margaritis <markos@codex.gr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
@@ -18,10 +18,6 @@ namespace internal {
|
||||
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 4
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_HAS_FUSED_MADD
|
||||
#define EIGEN_HAS_FUSED_MADD 1
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_HAS_FUSE_CJMADD
|
||||
#define EIGEN_HAS_FUSE_CJMADD 1
|
||||
#endif
|
||||
@@ -60,32 +56,29 @@ typedef __vector unsigned char Packet16uc;
|
||||
#define DST_CTRL(size, count, stride) (((size) << 24) | ((count) << 16) | (stride))
|
||||
|
||||
// Define global static constants:
|
||||
static Packet4f p4f_COUNTDOWN = { 0.0, 1.0, 2.0, 3.0 };
|
||||
static Packet4i p4i_COUNTDOWN = { 0, 1, 2, 3 };
|
||||
static Packet16uc p16uc_REVERSE = { 12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3};
|
||||
static Packet16uc p16uc_FORWARD = vec_lvsl(0, (float*)0); //{ 0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15}
|
||||
static Packet16uc p16uc_DUPLICATE = { 0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7};
|
||||
static Packet4f p4f_COUNTDOWN = { 3.0, 2.0, 1.0, 0.0 };
|
||||
static Packet4i p4i_COUNTDOWN = { 3, 2, 1, 0 };
|
||||
static Packet16uc p16uc_REVERSE = {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3};
|
||||
static Packet16uc p16uc_FORWARD = vec_lvsl(0, (float*)0);
|
||||
static Packet16uc p16uc_DUPLICATE = {0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7};
|
||||
|
||||
static _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0); //{ 0.0, 0.0, 0.0, 0.0}
|
||||
static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0); //{ 0, 0, 0, 0,}
|
||||
static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE,1); //{ 1, 1, 1, 1}
|
||||
static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS16,-16); //{ -16, -16, -16, -16}
|
||||
static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS1,-1); //{ -1, -1, -1, -1}
|
||||
static Packet4f p4f_ONE = vec_ctf(p4i_ONE, 0); //{ 1.0, 1.0, 1.0, 1.0}
|
||||
static Packet4f p4f_ZERO_ = (Packet4f) vec_sl((Packet4ui)p4i_MINUS1, (Packet4ui)p4i_MINUS1); //{ 0x80000000, 0x80000000, 0x80000000, 0x80000000}
|
||||
static _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0);
|
||||
static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0);
|
||||
static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE,1);
|
||||
static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS16,-16);
|
||||
static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS1,-1);
|
||||
static Packet4f p4f_ONE = vec_ctf(p4i_ONE, 0);
|
||||
static Packet4f p4f_ZERO_ = (Packet4f) vec_sl((Packet4ui)p4i_MINUS1, (Packet4ui)p4i_MINUS1);
|
||||
|
||||
template<> struct packet_traits<float> : default_packet_traits
|
||||
{
|
||||
typedef Packet4f type;
|
||||
typedef Packet4f half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size=4,
|
||||
HasHalfPacket=0,
|
||||
|
||||
// FIXME check the Has*
|
||||
HasDiv = 1,
|
||||
HasSin = 0,
|
||||
HasCos = 0,
|
||||
HasLog = 0,
|
||||
@@ -96,7 +89,6 @@ template<> struct packet_traits<float> : default_packet_traits
|
||||
template<> struct packet_traits<int> : default_packet_traits
|
||||
{
|
||||
typedef Packet4i type;
|
||||
typedef Packet4i half;
|
||||
enum {
|
||||
// FIXME check the Has*
|
||||
Vectorizable = 1,
|
||||
@@ -105,8 +97,8 @@ template<> struct packet_traits<int> : default_packet_traits
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}; typedef Packet4f half; };
|
||||
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; typedef Packet4i half; };
|
||||
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}; };
|
||||
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; };
|
||||
/*
|
||||
inline std::ostream & operator <<(std::ostream & s, const Packet4f & v)
|
||||
{
|
||||
@@ -152,7 +144,6 @@ inline std::ostream & operator <<(std::ostream & s, const Packetbi & v)
|
||||
return s;
|
||||
}
|
||||
*/
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) {
|
||||
// Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
|
||||
float EIGEN_ALIGN16 af[4];
|
||||
@@ -170,65 +161,6 @@ template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) {
|
||||
return vc;
|
||||
}
|
||||
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void
|
||||
pbroadcast4<Packet4f>(const float *a,
|
||||
Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
|
||||
{
|
||||
a3 = vec_ld(0,a);
|
||||
a0 = vec_splat(a3, 0);
|
||||
a1 = vec_splat(a3, 1);
|
||||
a2 = vec_splat(a3, 2);
|
||||
a3 = vec_splat(a3, 3);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE void
|
||||
pbroadcast4<Packet4i>(const int *a,
|
||||
Packet4i& a0, Packet4i& a1, Packet4i& a2, Packet4i& a3)
|
||||
{
|
||||
a3 = vec_ld(0,a);
|
||||
a0 = vec_splat(a3, 0);
|
||||
a1 = vec_splat(a3, 1);
|
||||
a2 = vec_splat(a3, 2);
|
||||
a3 = vec_splat(a3, 3);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, DenseIndex stride)
|
||||
{
|
||||
float EIGEN_ALIGN16 af[4];
|
||||
af[0] = from[0*stride];
|
||||
af[1] = from[1*stride];
|
||||
af[2] = from[2*stride];
|
||||
af[3] = from[3*stride];
|
||||
return vec_ld(0, af);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, DenseIndex stride)
|
||||
{
|
||||
int EIGEN_ALIGN16 ai[4];
|
||||
ai[0] = from[0*stride];
|
||||
ai[1] = from[1*stride];
|
||||
ai[2] = from[2*stride];
|
||||
ai[3] = from[3*stride];
|
||||
return vec_ld(0, ai);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, DenseIndex stride)
|
||||
{
|
||||
float EIGEN_ALIGN16 af[4];
|
||||
vec_st(from, 0, af);
|
||||
to[0*stride] = af[0];
|
||||
to[1*stride] = af[1];
|
||||
to[2*stride] = af[2];
|
||||
to[3*stride] = af[3];
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, DenseIndex stride)
|
||||
{
|
||||
int EIGEN_ALIGN16 ai[4];
|
||||
vec_st(from, 0, ai);
|
||||
to[0*stride] = ai[0];
|
||||
to[1*stride] = ai[1];
|
||||
to[2*stride] = ai[2];
|
||||
to[3*stride] = ai[3];
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f plset<float>(const float& a) { return vec_add(pset1<Packet4f>(a), p4f_COUNTDOWN); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i plset<int>(const int& a) { return vec_add(pset1<Packet4i>(a), p4i_COUNTDOWN); }
|
||||
|
||||
@@ -354,15 +286,15 @@ template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
|
||||
{
|
||||
Packet4f p;
|
||||
if((ptrdiff_t(from) % 16) == 0) p = pload<Packet4f>(from);
|
||||
else p = ploadu<Packet4f>(from);
|
||||
if((ptrdiff_t(&from) % 16) == 0) p = pload<Packet4f>(from);
|
||||
else p = ploadu<Packet4f>(from);
|
||||
return vec_perm(p, p, p16uc_DUPLICATE);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
|
||||
{
|
||||
Packet4i p;
|
||||
if((ptrdiff_t(from) % 16) == 0) p = pload<Packet4i>(from);
|
||||
else p = ploadu<Packet4i>(from);
|
||||
if((ptrdiff_t(&from) % 16) == 0) p = pload<Packet4i>(from);
|
||||
else p = ploadu<Packet4i>(from);
|
||||
return vec_perm(p, p, p16uc_DUPLICATE);
|
||||
}
|
||||
|
||||
@@ -562,32 +494,6 @@ struct palign_impl<Offset,Packet4i>
|
||||
}
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet4f,4>& kernel) {
|
||||
Packet4f t0, t1, t2, t3;
|
||||
t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);
|
||||
t1 = vec_mergel(kernel.packet[0], kernel.packet[2]);
|
||||
t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]);
|
||||
t3 = vec_mergel(kernel.packet[1], kernel.packet[3]);
|
||||
kernel.packet[0] = vec_mergeh(t0, t2);
|
||||
kernel.packet[1] = vec_mergel(t0, t2);
|
||||
kernel.packet[2] = vec_mergeh(t1, t3);
|
||||
kernel.packet[3] = vec_mergel(t1, t3);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet4i,4>& kernel) {
|
||||
Packet4i t0, t1, t2, t3;
|
||||
t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);
|
||||
t1 = vec_mergel(kernel.packet[0], kernel.packet[2]);
|
||||
t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]);
|
||||
t3 = vec_mergel(kernel.packet[1], kernel.packet[3]);
|
||||
kernel.packet[0] = vec_mergeh(t0, t2);
|
||||
kernel.packet[1] = vec_mergel(t0, t2);
|
||||
kernel.packet[2] = vec_mergeh(t1, t3);
|
||||
kernel.packet[3] = vec_mergel(t1, t3);
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
ADD_SUBDIRECTORY(SSE)
|
||||
ADD_SUBDIRECTORY(AltiVec)
|
||||
ADD_SUBDIRECTORY(NEON)
|
||||
ADD_SUBDIRECTORY(AVX)
|
||||
ADD_SUBDIRECTORY(Default)
|
||||
|
||||
@@ -28,7 +28,6 @@ struct Packet2cf
|
||||
template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
||||
{
|
||||
typedef Packet2cf type;
|
||||
typedef Packet2cf half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
@@ -47,7 +46,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; typedef Packet2cf half; };
|
||||
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
|
||||
{
|
||||
@@ -111,22 +110,6 @@ template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<
|
||||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, DenseIndex stride)
|
||||
{
|
||||
Packet4f res;
|
||||
res = vsetq_lane_f32(std::real(from[0*stride]), res, 0);
|
||||
res = vsetq_lane_f32(std::imag(from[0*stride]), res, 1);
|
||||
res = vsetq_lane_f32(std::real(from[1*stride]), res, 2);
|
||||
res = vsetq_lane_f32(std::imag(from[1*stride]), res, 3);
|
||||
return Packet2cf(res);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, DenseIndex stride)
|
||||
{
|
||||
to[stride*0] = std::complex<float>(vgetq_lane_f32(from.v, 0), vgetq_lane_f32(from.v, 1));
|
||||
to[stride*1] = std::complex<float>(vgetq_lane_f32(from.v, 2), vgetq_lane_f32(from.v, 3));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_ARM_PREFETCH((float *)addr); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
|
||||
@@ -263,14 +246,6 @@ template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, con
|
||||
return Packet2cf(pdiv(res.v, vaddq_f32(s,rev_s)));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet2cf,2>& kernel) {
|
||||
float32x4_t tmp = vcombine_f32(vget_high_f32(kernel.packet[0].v), vget_high_f32(kernel.packet[1].v));
|
||||
kernel.packet[0].v = vcombine_f32(vget_low_f32(kernel.packet[0].v), vget_low_f32(kernel.packet[1].v));
|
||||
kernel.packet[1].v = tmp;
|
||||
}
|
||||
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
@@ -49,7 +49,6 @@ typedef uint32x4_t Packet4ui;
|
||||
#define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {X, Y, Z, W}
|
||||
#endif
|
||||
|
||||
|
||||
// arm64 does have the pld instruction. If available, let's trust the __builtin_prefetch built-in function
|
||||
// which available on LLVM and GCC (at least)
|
||||
#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || defined(__GNUC__)
|
||||
@@ -66,7 +65,6 @@ typedef uint32x4_t Packet4ui;
|
||||
template<> struct packet_traits<float> : default_packet_traits
|
||||
{
|
||||
typedef Packet4f type;
|
||||
typedef Packet4f half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
@@ -84,7 +82,6 @@ template<> struct packet_traits<float> : default_packet_traits
|
||||
template<> struct packet_traits<int> : default_packet_traits
|
||||
{
|
||||
typedef Packet4i type;
|
||||
typedef Packet4i half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
@@ -97,13 +94,12 @@ template<> struct packet_traits<int> : default_packet_traits
|
||||
// workaround gcc 4.2, 4.3 and 4.4 compilatin issue
|
||||
EIGEN_STRONG_INLINE float32x4_t vld1q_f32(const float* x) { return ::vld1q_f32((const float32_t*)x); }
|
||||
EIGEN_STRONG_INLINE float32x2_t vld1_f32 (const float* x) { return ::vld1_f32 ((const float32_t*)x); }
|
||||
EIGEN_STRONG_INLINE float32x2_t vld1_dup_f32 (const float* x) { return ::vld1_dup_f32 ((const float32_t*)x); }
|
||||
EIGEN_STRONG_INLINE void vst1q_f32(float* to, float32x4_t from) { ::vst1q_f32((float32_t*)to,from); }
|
||||
EIGEN_STRONG_INLINE void vst1_f32 (float* to, float32x2_t from) { ::vst1_f32 ((float32_t*)to,from); }
|
||||
#endif
|
||||
|
||||
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}; typedef Packet4f half; };
|
||||
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; typedef Packet4i half; };
|
||||
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}; };
|
||||
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return vdupq_n_f32(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return vdupq_n_s32(from); }
|
||||
@@ -222,40 +218,6 @@ template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& f
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f32(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_s32(to, from); }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, DenseIndex stride)
|
||||
{
|
||||
Packet4f res;
|
||||
res = vsetq_lane_f32(from[0*stride], res, 0);
|
||||
res = vsetq_lane_f32(from[1*stride], res, 1);
|
||||
res = vsetq_lane_f32(from[2*stride], res, 2);
|
||||
res = vsetq_lane_f32(from[3*stride], res, 3);
|
||||
return res;
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, DenseIndex stride)
|
||||
{
|
||||
Packet4i res;
|
||||
res = vsetq_lane_s32(from[0*stride], res, 0);
|
||||
res = vsetq_lane_s32(from[1*stride], res, 1);
|
||||
res = vsetq_lane_s32(from[2*stride], res, 2);
|
||||
res = vsetq_lane_s32(from[3*stride], res, 3);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, DenseIndex stride)
|
||||
{
|
||||
to[stride*0] = vgetq_lane_f32(from, 0);
|
||||
to[stride*1] = vgetq_lane_f32(from, 1);
|
||||
to[stride*2] = vgetq_lane_f32(from, 2);
|
||||
to[stride*3] = vgetq_lane_f32(from, 3);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, DenseIndex stride)
|
||||
{
|
||||
to[stride*0] = vgetq_lane_s32(from, 0);
|
||||
to[stride*1] = vgetq_lane_s32(from, 1);
|
||||
to[stride*2] = vgetq_lane_s32(from, 2);
|
||||
to[stride*3] = vgetq_lane_s32(from, 3);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { EIGEN_ARM_PREFETCH(addr); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { EIGEN_ARM_PREFETCH(addr); }
|
||||
|
||||
@@ -448,30 +410,9 @@ PALIGN_NEON(0,Packet4i,vextq_s32)
|
||||
PALIGN_NEON(1,Packet4i,vextq_s32)
|
||||
PALIGN_NEON(2,Packet4i,vextq_s32)
|
||||
PALIGN_NEON(3,Packet4i,vextq_s32)
|
||||
|
||||
|
||||
#undef PALIGN_NEON
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet4f,4>& kernel) {
|
||||
float32x4x2_t tmp1 = vzipq_f32(kernel.packet[0], kernel.packet[1]);
|
||||
float32x4x2_t tmp2 = vzipq_f32(kernel.packet[2], kernel.packet[3]);
|
||||
|
||||
kernel.packet[0] = vcombine_f32(vget_low_f32(tmp1.val[0]), vget_low_f32(tmp2.val[0]));
|
||||
kernel.packet[1] = vcombine_f32(vget_high_f32(tmp1.val[0]), vget_high_f32(tmp2.val[0]));
|
||||
kernel.packet[2] = vcombine_f32(vget_low_f32(tmp1.val[1]), vget_low_f32(tmp2.val[1]));
|
||||
kernel.packet[3] = vcombine_f32(vget_high_f32(tmp1.val[1]), vget_high_f32(tmp2.val[1]));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet4i,4>& kernel) {
|
||||
int32x4x2_t tmp1 = vzipq_s32(kernel.packet[0], kernel.packet[1]);
|
||||
int32x4x2_t tmp2 = vzipq_s32(kernel.packet[2], kernel.packet[3]);
|
||||
kernel.packet[0] = vcombine_s32(vget_low_s32(tmp1.val[0]), vget_low_s32(tmp2.val[0]));
|
||||
kernel.packet[1] = vcombine_s32(vget_high_s32(tmp1.val[0]), vget_high_s32(tmp2.val[0]));
|
||||
kernel.packet[2] = vcombine_s32(vget_low_s32(tmp1.val[1]), vget_low_s32(tmp2.val[1]));
|
||||
kernel.packet[3] = vcombine_s32(vget_high_s32(tmp1.val[1]), vget_high_s32(tmp2.val[1]));
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
@@ -22,18 +22,13 @@ struct Packet2cf
|
||||
__m128 v;
|
||||
};
|
||||
|
||||
// Use the packet_traits defined in AVX/PacketMath.h instead if we're going
|
||||
// to leverage AVX instructions.
|
||||
#ifndef EIGEN_VECTORIZE_AVX
|
||||
template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
||||
{
|
||||
typedef Packet2cf type;
|
||||
typedef Packet2cf half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size = 2,
|
||||
HasHalfPacket = 0,
|
||||
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
@@ -47,9 +42,8 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
||||
HasSetLinear = 0
|
||||
};
|
||||
};
|
||||
#endif
|
||||
|
||||
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; typedef Packet2cf half; };
|
||||
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_add_ps(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_sub_ps(a.v,b.v)); }
|
||||
@@ -110,23 +104,8 @@ template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<flo
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), Packet4f(from.v)); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), Packet4f(from.v)); }
|
||||
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, DenseIndex stride)
|
||||
{
|
||||
return Packet2cf(_mm_set_ps(std::imag(from[1*stride]), std::real(from[1*stride]),
|
||||
std::imag(from[0*stride]), std::real(from[0*stride])));
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, DenseIndex stride)
|
||||
{
|
||||
to[stride*0] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 0)),
|
||||
_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 1)));
|
||||
to[stride*1] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 2)),
|
||||
_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 3)));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), from.v); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
|
||||
@@ -145,7 +124,7 @@ template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Pack
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a) { return Packet2cf(_mm_castpd_ps(preverse(Packet2d(_mm_castps_pd(a.v))))); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a) { return Packet2cf(_mm_castpd_ps(preverse(_mm_castps_pd(a.v)))); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
|
||||
{
|
||||
@@ -235,7 +214,7 @@ template<> struct conj_helper<Packet4f, Packet2cf, false,false>
|
||||
{ return padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet4f& x, const Packet2cf& y) const
|
||||
{ return Packet2cf(Eigen::internal::pmul<Packet4f>(x, y.v)); }
|
||||
{ return Packet2cf(Eigen::internal::pmul(x, y.v)); }
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet2cf, Packet4f, false,false>
|
||||
@@ -244,7 +223,7 @@ template<> struct conj_helper<Packet2cf, Packet4f, false,false>
|
||||
{ return padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& x, const Packet4f& y) const
|
||||
{ return Packet2cf(Eigen::internal::pmul<Packet4f>(x.v, y)); }
|
||||
{ return Packet2cf(Eigen::internal::pmul(x.v, y)); }
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
||||
@@ -269,18 +248,13 @@ struct Packet1cd
|
||||
__m128d v;
|
||||
};
|
||||
|
||||
// Use the packet_traits defined in AVX/PacketMath.h instead if we're going
|
||||
// to leverage AVX instructions.
|
||||
#ifndef EIGEN_VECTORIZE_AVX
|
||||
template<> struct packet_traits<std::complex<double> > : default_packet_traits
|
||||
{
|
||||
typedef Packet1cd type;
|
||||
typedef Packet1cd half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 0,
|
||||
size = 1,
|
||||
HasHalfPacket = 0,
|
||||
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
@@ -294,13 +268,12 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
|
||||
HasSetLinear = 0
|
||||
};
|
||||
};
|
||||
#endif
|
||||
|
||||
template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1}; typedef Packet1cd half; };
|
||||
template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1}; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_add_pd(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_sub_pd(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(a.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a)
|
||||
{
|
||||
const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
|
||||
@@ -338,8 +311,8 @@ template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<dou
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }
|
||||
|
||||
// FIXME force unaligned store, this is a temporary fix
|
||||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, Packet2d(from.v)); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, Packet2d(from.v)); }
|
||||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
|
||||
@@ -437,7 +410,7 @@ template<> struct conj_helper<Packet2d, Packet1cd, false,false>
|
||||
{ return padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet2d& x, const Packet1cd& y) const
|
||||
{ return Packet1cd(Eigen::internal::pmul<Packet2d>(x, y.v)); }
|
||||
{ return Packet1cd(Eigen::internal::pmul(x, y.v)); }
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet1cd, Packet2d, false,false>
|
||||
@@ -446,7 +419,7 @@ template<> struct conj_helper<Packet1cd, Packet2d, false,false>
|
||||
{ return padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& x, const Packet2d& y) const
|
||||
{ return Packet1cd(Eigen::internal::pmul<Packet2d>(x.v, y)); }
|
||||
{ return Packet1cd(Eigen::internal::pmul(x.v, y)); }
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
||||
@@ -459,17 +432,7 @@ template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, con
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
|
||||
{
|
||||
return Packet1cd(preverse(Packet2d(x.v)));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet2cf,2>& kernel) {
|
||||
__m128d w1 = _mm_castps_pd(kernel.packet[0].v);
|
||||
__m128d w2 = _mm_castps_pd(kernel.packet[1].v);
|
||||
|
||||
__m128 tmp = _mm_castpd_ps(_mm_unpackhi_pd(w1, w2));
|
||||
kernel.packet[0].v = _mm_castpd_ps(_mm_unpacklo_pd(w1, w2));
|
||||
kernel.packet[1].v = tmp;
|
||||
return Packet1cd(preverse(x.v));
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
@@ -52,7 +52,7 @@ Packet4f plog<Packet4f>(const Packet4f& _x)
|
||||
|
||||
Packet4i emm0;
|
||||
|
||||
Packet4f invalid_mask = _mm_cmplt_ps(x, _mm_setzero_ps());
|
||||
Packet4f invalid_mask = _mm_cmpnge_ps(x, _mm_setzero_ps()); // not greater equal is true if x is NaN
|
||||
Packet4f iszero_mask = _mm_cmpeq_ps(x, _mm_setzero_ps());
|
||||
|
||||
x = pmax(x, p4f_min_norm_pos); /* cut off denormalized stuff */
|
||||
@@ -63,7 +63,7 @@ Packet4f plog<Packet4f>(const Packet4f& _x)
|
||||
x = _mm_or_ps(x, p4f_half);
|
||||
|
||||
emm0 = _mm_sub_epi32(emm0, p4i_0x7f);
|
||||
Packet4f e = padd(Packet4f(_mm_cvtepi32_ps(emm0)), p4f_1);
|
||||
Packet4f e = padd(_mm_cvtepi32_ps(emm0), p4f_1);
|
||||
|
||||
/* part2:
|
||||
if( x < SQRTHF ) {
|
||||
@@ -72,9 +72,9 @@ Packet4f plog<Packet4f>(const Packet4f& _x)
|
||||
} else { x = x - 1.0; }
|
||||
*/
|
||||
Packet4f mask = _mm_cmplt_ps(x, p4f_cephes_SQRTHF);
|
||||
Packet4f tmp = pand(x, mask);
|
||||
Packet4f tmp = _mm_and_ps(x, mask);
|
||||
x = psub(x, p4f_1);
|
||||
e = psub(e, pand(p4f_1, mask));
|
||||
e = psub(e, _mm_and_ps(p4f_1, mask));
|
||||
x = padd(x, tmp);
|
||||
|
||||
Packet4f x2 = pmul(x,x);
|
||||
@@ -138,7 +138,6 @@ Packet4f pexp<Packet4f>(const Packet4f& _x)
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
fx = _mm_floor_ps(fx);
|
||||
#else
|
||||
tmp = _mm_setzero_ps();
|
||||
emm0 = _mm_cvttps_epi32(fx);
|
||||
tmp = _mm_cvtepi32_ps(emm0);
|
||||
/* if greater, substract 1 */
|
||||
@@ -167,7 +166,7 @@ Packet4f pexp<Packet4f>(const Packet4f& _x)
|
||||
emm0 = _mm_cvttps_epi32(fx);
|
||||
emm0 = _mm_add_epi32(emm0, p4i_0x7f);
|
||||
emm0 = _mm_slli_epi32(emm0, 23);
|
||||
return pmul(y, Packet4f(_mm_castsi128_ps(emm0)));
|
||||
return pmax(pmul(y, Packet4f(_mm_castsi128_ps(emm0))), _x);
|
||||
}
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet2d pexp<Packet2d>(const Packet2d& _x)
|
||||
@@ -207,7 +206,6 @@ Packet2d pexp<Packet2d>(const Packet2d& _x)
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
fx = _mm_floor_pd(fx);
|
||||
#else
|
||||
tmp = _mm_setzero_pd();
|
||||
emm0 = _mm_cvttpd_epi32(fx);
|
||||
tmp = _mm_cvtepi32_pd(emm0);
|
||||
/* if greater, substract 1 */
|
||||
@@ -241,7 +239,7 @@ Packet2d pexp<Packet2d>(const Packet2d& _x)
|
||||
emm0 = _mm_add_epi32(emm0, p4i_1023_0);
|
||||
emm0 = _mm_slli_epi32(emm0, 20);
|
||||
emm0 = _mm_shuffle_epi32(emm0, _MM_SHUFFLE(1,2,0,3));
|
||||
return pmul(x, Packet2d(_mm_castsi128_pd(emm0)));
|
||||
return pmax(pmul(x, Packet2d(_mm_castsi128_pd(emm0))), _x);
|
||||
}
|
||||
|
||||
/* evaluation of 4 sines at onces, using SSE2 intrinsics.
|
||||
|
||||
215
Eigen/src/Core/arch/SSE/PacketMath.h
Executable file → Normal file
215
Eigen/src/Core/arch/SSE/PacketMath.h
Executable file → Normal file
@@ -22,41 +22,9 @@ namespace internal {
|
||||
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_FMA
|
||||
#ifndef EIGEN_HAS_FUSED_MADD
|
||||
#define EIGEN_HAS_FUSED_MADD 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined EIGEN_VECTORIZE_AVX && defined __GNUC__ && !(defined __clang__ || defined __INTEL_COMPILER)
|
||||
// With GCC's default ABI version, a __m128 or __m256 are the same types and therefore we cannot
|
||||
// have overloads for both types without linking error.
|
||||
// One solution is to increase ABI version using -fabi-version=4 (or greater).
|
||||
// To workaround this inconvenince, we rather wrap 128bit types into the following helper
|
||||
// structure:
|
||||
// TODO disable this wrapper if abi-versio>=4, but to detect that without asking the user to define a macro?
|
||||
template<typename T>
|
||||
struct eigen_packet_wrapper
|
||||
{
|
||||
EIGEN_ALWAYS_INLINE operator T&() { return m_val; }
|
||||
EIGEN_ALWAYS_INLINE operator const T&() const { return m_val; }
|
||||
EIGEN_ALWAYS_INLINE eigen_packet_wrapper() {}
|
||||
EIGEN_ALWAYS_INLINE eigen_packet_wrapper(const T &v) : m_val(v) {}
|
||||
EIGEN_ALWAYS_INLINE eigen_packet_wrapper& operator=(const T &v) {
|
||||
m_val = v;
|
||||
return *this;
|
||||
}
|
||||
|
||||
T m_val;
|
||||
};
|
||||
typedef eigen_packet_wrapper<__m128> Packet4f;
|
||||
typedef eigen_packet_wrapper<__m128i> Packet4i;
|
||||
typedef eigen_packet_wrapper<__m128d> Packet2d;
|
||||
#else
|
||||
typedef __m128 Packet4f;
|
||||
typedef __m128i Packet4i;
|
||||
typedef __m128d Packet2d;
|
||||
#endif
|
||||
|
||||
template<> struct is_arithmetic<__m128> { enum { value = true }; };
|
||||
template<> struct is_arithmetic<__m128i> { enum { value = true }; };
|
||||
@@ -90,18 +58,13 @@ template<> struct is_arithmetic<__m128d> { enum { value = true }; };
|
||||
const Packet4i p4i_##NAME = pset1<Packet4i>(X)
|
||||
|
||||
|
||||
// Use the packet_traits defined in AVX/PacketMath.h instead if we're going
|
||||
// to leverage AVX instructions.
|
||||
#ifndef EIGEN_VECTORIZE_AVX
|
||||
template<> struct packet_traits<float> : default_packet_traits
|
||||
{
|
||||
typedef Packet4f type;
|
||||
typedef Packet4f half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size=4,
|
||||
HasHalfPacket = 0,
|
||||
|
||||
HasDiv = 1,
|
||||
HasSin = EIGEN_FAST_MATH,
|
||||
@@ -114,23 +77,19 @@ template<> struct packet_traits<float> : default_packet_traits
|
||||
template<> struct packet_traits<double> : default_packet_traits
|
||||
{
|
||||
typedef Packet2d type;
|
||||
typedef Packet2d half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size=2,
|
||||
HasHalfPacket = 0,
|
||||
|
||||
HasDiv = 1,
|
||||
HasExp = 1,
|
||||
HasSqrt = 1
|
||||
};
|
||||
};
|
||||
#endif
|
||||
template<> struct packet_traits<int> : default_packet_traits
|
||||
{
|
||||
typedef Packet4i type;
|
||||
typedef Packet4i half;
|
||||
enum {
|
||||
// FIXME check the Has*
|
||||
Vectorizable = 1,
|
||||
@@ -139,9 +98,9 @@ template<> struct packet_traits<int> : default_packet_traits
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}; typedef Packet4f half; };
|
||||
template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2}; typedef Packet2d half; };
|
||||
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; typedef Packet4i half; };
|
||||
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}; };
|
||||
template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2}; };
|
||||
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; };
|
||||
|
||||
#if defined(_MSC_VER) && (_MSC_VER==1500)
|
||||
// Workaround MSVC 9 internal compiler error.
|
||||
@@ -151,26 +110,13 @@ template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { re
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set_pd(from,from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set_epi32(from,from,from,from); }
|
||||
#else
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set_ps1(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set1_ps(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set1_pd(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set1_epi32(from); }
|
||||
#endif
|
||||
|
||||
// GCC generates a shufps instruction for _mm_set1_ps/_mm_load1_ps instead of the more efficient pshufd instruction.
|
||||
// However, using inrinsics for pset1 makes gcc to generate crappy code in some cases (see bug 203)
|
||||
// Using inline assembly is also not an option because then gcc fails to reorder properly the instructions.
|
||||
// Therefore, we introduced the pload1 functions to be used in product kernels for which bug 203 does not apply.
|
||||
// Also note that with AVX, we want it to generate a vbroadcastss.
|
||||
#if (defined __GNUC__) && (!defined __INTEL_COMPILER) && (!defined __clang__) && (!defined __AVX__)
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pload1<Packet4f>(const float *from) {
|
||||
return vec4f_swizzle1(_mm_load_ss(from),0,0,0,0);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_VECTORIZE_AVX
|
||||
template<> EIGEN_STRONG_INLINE Packet4f plset<float>(const float& a) { return _mm_add_ps(pset1<Packet4f>(a), _mm_set_ps(3,2,1,0)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d plset<double>(const double& a) { return _mm_add_pd(pset1<Packet2d>(a),_mm_set_pd(1,0)); }
|
||||
#endif
|
||||
template<> EIGEN_STRONG_INLINE Packet4i plset<int>(const int& a) { return _mm_add_epi32(pset1<Packet4i>(a),_mm_set_epi32(3,2,1,0)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_add_ps(a,b); }
|
||||
@@ -193,7 +139,7 @@ template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a)
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a)
|
||||
{
|
||||
return psub(Packet4i(_mm_setr_epi32(0,0,0,0)), a);
|
||||
return psub(_mm_setr_epi32(0,0,0,0), a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }
|
||||
@@ -227,10 +173,6 @@ template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, co
|
||||
|
||||
// for some weird raisons, it has to be overloaded for packet of integers
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd(pmul(a,b), c); }
|
||||
#ifdef EIGEN_VECTORIZE_FMA
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return _mm_fmadd_ps(a,b,c); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return _mm_fmadd_pd(a,b,c); }
|
||||
#endif
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_min_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_min_pd(a,b); }
|
||||
@@ -276,7 +218,7 @@ template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, con
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_ps(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_pd(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(reinterpret_cast<const __m128i*>(from)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(reinterpret_cast<const Packet4i*>(from)); }
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) {
|
||||
@@ -294,7 +236,7 @@ template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { E
|
||||
#endif
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_pd(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_si128(reinterpret_cast<const __m128i*>(from)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_si128(reinterpret_cast<const Packet4i*>(from)); }
|
||||
#else
|
||||
// Fast unaligned loads. Note that here we cannot directly use intrinsics: this would
|
||||
// require pointer casting to incompatible pointer types and leads to invalid code
|
||||
@@ -303,17 +245,14 @@ template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { E
|
||||
// TODO: do the same for MSVC (ICC is compatible)
|
||||
// NOTE: with the code below, MSVC's compiler crashes!
|
||||
|
||||
#if defined(__GNUC__) && (defined(__i386__) || (defined(__x86_64) && EIGEN_GNUC_AT_LEAST(4, 8)))
|
||||
#if defined(__GNUC__) && defined(__i386__)
|
||||
// bug 195: gcc/i386 emits weird x87 fldl/fstpl instructions for _mm_load_sd
|
||||
#define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 1
|
||||
#define EIGEN_AVOID_CUSTOM_UNALIGNED_STORES 1
|
||||
#elif defined(__clang__)
|
||||
// bug 201: Segfaults in __mm_loadh_pd with clang 2.8
|
||||
#define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 1
|
||||
#define EIGEN_AVOID_CUSTOM_UNALIGNED_STORES 0
|
||||
#else
|
||||
#define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 0
|
||||
#define EIGEN_AVOID_CUSTOM_UNALIGNED_STORES 0
|
||||
#endif
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
|
||||
@@ -344,7 +283,7 @@ template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
|
||||
{
|
||||
EIGEN_DEBUG_UNALIGNED_LOAD
|
||||
#if EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS
|
||||
return _mm_loadu_si128(reinterpret_cast<const __m128i*>(from));
|
||||
return _mm_loadu_si128(reinterpret_cast<const Packet4i*>(from));
|
||||
#else
|
||||
__m128d res;
|
||||
res = _mm_load_sd((const double*)(from)) ;
|
||||
@@ -363,77 +302,38 @@ template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
|
||||
{
|
||||
Packet4i tmp;
|
||||
tmp = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(from));
|
||||
tmp = _mm_loadl_epi64(reinterpret_cast<const Packet4i*>(from));
|
||||
return vec4i_swizzle1(tmp, 0, 0, 1, 1);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_ps(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_pd(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<__m128i*>(to), from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<Packet4i*>(to), from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) {
|
||||
EIGEN_DEBUG_UNALIGNED_STORE
|
||||
#if EIGEN_AVOID_CUSTOM_UNALIGNED_STORES
|
||||
_mm_storeu_pd(to, from);
|
||||
#else
|
||||
_mm_storel_pd((to), from);
|
||||
_mm_storeh_pd((to+1), from);
|
||||
#endif
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<double*>(to), Packet2d(_mm_castps_pd(from))); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<double*>(to), Packet2d(_mm_castsi128_pd(from))); }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, DenseIndex stride)
|
||||
{
|
||||
return _mm_set_ps(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, DenseIndex stride)
|
||||
{
|
||||
return _mm_set_pd(from[1*stride], from[0*stride]);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, DenseIndex stride)
|
||||
{
|
||||
return _mm_set_epi32(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, DenseIndex stride)
|
||||
{
|
||||
to[stride*0] = _mm_cvtss_f32(from);
|
||||
to[stride*1] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 1));
|
||||
to[stride*2] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 2));
|
||||
to[stride*3] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 3));
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, DenseIndex stride)
|
||||
{
|
||||
to[stride*0] = _mm_cvtsd_f64(from);
|
||||
to[stride*1] = _mm_cvtsd_f64(_mm_shuffle_pd(from, from, 1));
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, DenseIndex stride)
|
||||
{
|
||||
to[stride*0] = _mm_cvtsi128_si32(from);
|
||||
to[stride*1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 1));
|
||||
to[stride*2] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 2));
|
||||
to[stride*3] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 3));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<double*>(to), _mm_castps_pd(from)); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<double*>(to), _mm_castsi128_pd(from)); }
|
||||
|
||||
// some compilers might be tempted to perform multiple moves instead of using a vector path.
|
||||
template<> EIGEN_STRONG_INLINE void pstore1<Packet4f>(float* to, const float& a)
|
||||
{
|
||||
Packet4f pa = _mm_set_ss(a);
|
||||
pstore(to, Packet4f(vec4f_swizzle1(pa,0,0,0,0)));
|
||||
pstore(to, vec4f_swizzle1(pa,0,0,0,0));
|
||||
}
|
||||
// some compilers might be tempted to perform multiple moves instead of using a vector path.
|
||||
template<> EIGEN_STRONG_INLINE void pstore1<Packet2d>(double* to, const double& a)
|
||||
{
|
||||
Packet2d pa = _mm_set_sd(a);
|
||||
pstore(to, Packet2d(vec2d_swizzle1(pa,0,0)));
|
||||
pstore(to, vec2d_swizzle1(pa,0,0));
|
||||
}
|
||||
|
||||
#ifndef EIGEN_VECTORIZE_AVX
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER) && defined(_WIN64) && !defined(__INTEL_COMPILER)
|
||||
// The temporary variable fixes an internal compilation error in vs <= 2008 and a wrong-result bug in vs 2010
|
||||
@@ -480,38 +380,6 @@ template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a)
|
||||
#endif
|
||||
}
|
||||
|
||||
// with AVX, the default implementations based on pload1 are faster
|
||||
#ifndef __AVX__
|
||||
template<> EIGEN_STRONG_INLINE void
|
||||
pbroadcast4<Packet4f>(const float *a,
|
||||
Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
|
||||
{
|
||||
a3 = pload<Packet4f>(a);
|
||||
a0 = vec4f_swizzle1(a3, 0,0,0,0);
|
||||
a1 = vec4f_swizzle1(a3, 1,1,1,1);
|
||||
a2 = vec4f_swizzle1(a3, 2,2,2,2);
|
||||
a3 = vec4f_swizzle1(a3, 3,3,3,3);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE void
|
||||
pbroadcast4<Packet2d>(const double *a,
|
||||
Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_SSE3
|
||||
a0 = _mm_loaddup_pd(a+0);
|
||||
a1 = _mm_loaddup_pd(a+1);
|
||||
a2 = _mm_loaddup_pd(a+2);
|
||||
a3 = _mm_loaddup_pd(a+3);
|
||||
#else
|
||||
a1 = pload<Packet2d>(a);
|
||||
a0 = vec2d_swizzle1(a1, 0,0);
|
||||
a1 = vec2d_swizzle1(a1, 1,1);
|
||||
a3 = pload<Packet2d>(a+2);
|
||||
a2 = vec2d_swizzle1(a3, 0,0);
|
||||
a3 = vec2d_swizzle1(a3, 1,1);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
EIGEN_STRONG_INLINE void punpackp(Packet4f* vecs)
|
||||
{
|
||||
vecs[1] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x55));
|
||||
@@ -539,10 +407,10 @@ template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
|
||||
template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet4f tmp0 = _mm_hadd_ps(a,a);
|
||||
return pfirst<Packet4f>(_mm_hadd_ps(tmp0, tmp0));
|
||||
return pfirst(_mm_hadd_ps(tmp0, tmp0));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) { return pfirst<Packet2d>(_mm_hadd_pd(a, a)); }
|
||||
template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) { return pfirst(_mm_hadd_pd(a, a)); }
|
||||
|
||||
// SSSE3 version:
|
||||
// EIGEN_STRONG_INLINE float predux(const Packet4i& a)
|
||||
@@ -585,7 +453,7 @@ template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
|
||||
template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
|
||||
{
|
||||
Packet4i tmp = _mm_add_epi32(a, _mm_unpackhi_epi64(a,a));
|
||||
return pfirst(tmp) + pfirst<Packet4i>(_mm_shuffle_epi32(tmp, 1));
|
||||
return pfirst(tmp) + pfirst(_mm_shuffle_epi32(tmp, 1));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
|
||||
@@ -608,11 +476,11 @@ template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
|
||||
template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet4f tmp = _mm_mul_ps(a, _mm_movehl_ps(a,a));
|
||||
return pfirst<Packet4f>(_mm_mul_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
|
||||
return pfirst(_mm_mul_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a)
|
||||
{
|
||||
return pfirst<Packet2d>(_mm_mul_sd(a, _mm_unpackhi_pd(a,a)));
|
||||
return pfirst(_mm_mul_sd(a, _mm_unpackhi_pd(a,a)));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
|
||||
{
|
||||
@@ -628,18 +496,14 @@ template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
|
||||
template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet4f tmp = _mm_min_ps(a, _mm_movehl_ps(a,a));
|
||||
return pfirst<Packet4f>(_mm_min_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
|
||||
return pfirst(_mm_min_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)
|
||||
{
|
||||
return pfirst<Packet2d>(_mm_min_sd(a, _mm_unpackhi_pd(a,a)));
|
||||
return pfirst(_mm_min_sd(a, _mm_unpackhi_pd(a,a)));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
Packet4i tmp = _mm_min_epi32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0,0,3,2)));
|
||||
return pfirst<Packet4i>(_mm_min_epi32(tmp,_mm_shuffle_epi32(tmp, 1)));
|
||||
#else
|
||||
// after some experiments, it is seems this is the fastest way to implement it
|
||||
// for GCC (eg., it does not like using std::min after the pstore !!)
|
||||
EIGEN_ALIGN16 int aux[4];
|
||||
@@ -647,25 +511,20 @@ template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
|
||||
int aux0 = aux[0]<aux[1] ? aux[0] : aux[1];
|
||||
int aux2 = aux[2]<aux[3] ? aux[2] : aux[3];
|
||||
return aux0<aux2 ? aux0 : aux2;
|
||||
#endif // EIGEN_VECTORIZE_SSE4_1
|
||||
}
|
||||
|
||||
// max
|
||||
template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet4f tmp = _mm_max_ps(a, _mm_movehl_ps(a,a));
|
||||
return pfirst<Packet4f>(_mm_max_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
|
||||
return pfirst(_mm_max_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
|
||||
{
|
||||
return pfirst<Packet2d>(_mm_max_sd(a, _mm_unpackhi_pd(a,a)));
|
||||
return pfirst(_mm_max_sd(a, _mm_unpackhi_pd(a,a)));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
Packet4i tmp = _mm_max_epi32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0,0,3,2)));
|
||||
return pfirst<Packet4i>(_mm_max_epi32(tmp,_mm_shuffle_epi32(tmp, 1)));
|
||||
#else
|
||||
// after some experiments, it is seems this is the fastest way to implement it
|
||||
// for GCC (eg., it does not like using std::min after the pstore !!)
|
||||
EIGEN_ALIGN16 int aux[4];
|
||||
@@ -673,7 +532,6 @@ template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
|
||||
int aux0 = aux[0]>aux[1] ? aux[0] : aux[1];
|
||||
int aux2 = aux[2]>aux[3] ? aux[2] : aux[3];
|
||||
return aux0>aux2 ? aux0 : aux2;
|
||||
#endif // EIGEN_VECTORIZE_SSE4_1
|
||||
}
|
||||
|
||||
#if (defined __GNUC__)
|
||||
@@ -784,31 +642,6 @@ struct palign_impl<Offset,Packet2d>
|
||||
};
|
||||
#endif
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet4f,4>& kernel) {
|
||||
_MM_TRANSPOSE4_PS(kernel.packet[0], kernel.packet[1], kernel.packet[2], kernel.packet[3]);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet2d,2>& kernel) {
|
||||
__m128d tmp = _mm_unpackhi_pd(kernel.packet[0], kernel.packet[1]);
|
||||
kernel.packet[0] = _mm_unpacklo_pd(kernel.packet[0], kernel.packet[1]);
|
||||
kernel.packet[1] = tmp;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet4i,4>& kernel) {
|
||||
__m128i T0 = _mm_unpacklo_epi32(kernel.packet[0], kernel.packet[1]);
|
||||
__m128i T1 = _mm_unpacklo_epi32(kernel.packet[2], kernel.packet[3]);
|
||||
__m128i T2 = _mm_unpackhi_epi32(kernel.packet[0], kernel.packet[1]);
|
||||
__m128i T3 = _mm_unpackhi_epi32(kernel.packet[2], kernel.packet[3]);
|
||||
|
||||
kernel.packet[0] = _mm_unpacklo_epi64(T0, T1);
|
||||
kernel.packet[1] = _mm_unpackhi_epi64(T0, T1);
|
||||
kernel.packet[2] = _mm_unpacklo_epi64(T2, T3);
|
||||
kernel.packet[3] = _mm_unpackhi_epi64(T2, T3);
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
@@ -1,167 +0,0 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_ASSIGNMENT_FUNCTORS_H
|
||||
#define EIGEN_ASSIGNMENT_FUNCTORS_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor for scalar/packet assignment
|
||||
*
|
||||
*/
|
||||
template<typename Scalar> struct assign_op {
|
||||
|
||||
EIGEN_EMPTY_STRUCT_CTOR(assign_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a = b; }
|
||||
|
||||
template<int Alignment, typename Packet>
|
||||
EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const
|
||||
{ internal::pstoret<Scalar,Packet,Alignment>(a,b); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<assign_op<Scalar> > {
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::ReadCost,
|
||||
PacketAccess = packet_traits<Scalar>::IsVectorized
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor for scalar/packet assignment with addition
|
||||
*
|
||||
*/
|
||||
template<typename Scalar> struct add_assign_op {
|
||||
|
||||
EIGEN_EMPTY_STRUCT_CTOR(add_assign_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a += b; }
|
||||
|
||||
template<int Alignment, typename Packet>
|
||||
EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const
|
||||
{ internal::pstoret<Scalar,Packet,Alignment>(a,internal::padd(internal::ploadt<Packet,Alignment>(a),b)); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<add_assign_op<Scalar> > {
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::ReadCost + NumTraits<Scalar>::AddCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasAdd
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor for scalar/packet assignment with subtraction
|
||||
*
|
||||
*/
|
||||
template<typename Scalar> struct sub_assign_op {
|
||||
|
||||
EIGEN_EMPTY_STRUCT_CTOR(sub_assign_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a -= b; }
|
||||
|
||||
template<int Alignment, typename Packet>
|
||||
EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const
|
||||
{ internal::pstoret<Scalar,Packet,Alignment>(a,internal::psub(internal::ploadt<Packet,Alignment>(a),b)); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<sub_assign_op<Scalar> > {
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::ReadCost + NumTraits<Scalar>::AddCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasAdd
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor for scalar/packet assignment with multiplication
|
||||
*
|
||||
*/
|
||||
template<typename Scalar> struct mul_assign_op {
|
||||
|
||||
EIGEN_EMPTY_STRUCT_CTOR(mul_assign_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a *= b; }
|
||||
|
||||
template<int Alignment, typename Packet>
|
||||
EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const
|
||||
{ internal::pstoret<Scalar,Packet,Alignment>(a,internal::pmul(internal::ploadt<Packet,Alignment>(a),b)); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<mul_assign_op<Scalar> > {
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::ReadCost + NumTraits<Scalar>::MulCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasMul
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor for scalar/packet assignment with diviving
|
||||
*
|
||||
*/
|
||||
template<typename Scalar> struct div_assign_op {
|
||||
|
||||
EIGEN_EMPTY_STRUCT_CTOR(div_assign_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a /= b; }
|
||||
|
||||
template<int Alignment, typename Packet>
|
||||
EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const
|
||||
{ internal::pstoret<Scalar,Packet,Alignment>(a,internal::pdiv(internal::ploadt<Packet,Alignment>(a),b)); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<div_assign_op<Scalar> > {
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::ReadCost + NumTraits<Scalar>::MulCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasMul
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor for scalar/packet assignment with swaping
|
||||
*
|
||||
* It works as follow. For a non-vectorized evaluation loop, we have:
|
||||
* for(i) func(A.coeffRef(i), B.coeff(i));
|
||||
* where B is a SwapWrapper expression. The trick is to make SwapWrapper::coeff behaves like a non-const coeffRef.
|
||||
* Actually, SwapWrapper might not even be needed since even if B is a plain expression, since it has to be writable
|
||||
* B.coeff already returns a const reference to the underlying scalar value.
|
||||
*
|
||||
* The case of a vectorized loop is more tricky:
|
||||
* for(i,j) func.assignPacket<A_Align>(&A.coeffRef(i,j), B.packet<B_Align>(i,j));
|
||||
* Here, B must be a SwapWrapper whose packet function actually returns a proxy object holding a Scalar*,
|
||||
* the actual alignment and Packet type.
|
||||
*
|
||||
*/
|
||||
template<typename Scalar> struct swap_assign_op {
|
||||
|
||||
EIGEN_EMPTY_STRUCT_CTOR(swap_assign_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const
|
||||
{
|
||||
using std::swap;
|
||||
swap(a,const_cast<Scalar&>(b));
|
||||
}
|
||||
|
||||
template<int LhsAlignment, int RhsAlignment, typename Packet>
|
||||
EIGEN_STRONG_INLINE void swapPacket(Scalar* a, Scalar* b) const
|
||||
{
|
||||
Packet tmp = internal::ploadt<Packet,RhsAlignment>(b);
|
||||
internal::pstoret<Scalar,Packet,RhsAlignment>(b, internal::ploadt<Packet,LhsAlignment>(a));
|
||||
internal::pstoret<Scalar,Packet,LhsAlignment>(a, tmp);
|
||||
}
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<swap_assign_op<Scalar> > {
|
||||
enum {
|
||||
Cost = 3 * NumTraits<Scalar>::ReadCost,
|
||||
PacketAccess = packet_traits<Scalar>::IsVectorized
|
||||
};
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
|
||||
} // namespace Eigen
|
||||
|
||||
#endif // EIGEN_ASSIGNMENT_FUNCTORS_H
|
||||
@@ -1,456 +0,0 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_BINARY_FUNCTORS_H
|
||||
#define EIGEN_BINARY_FUNCTORS_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
//---------- associative binary functors ----------
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the sum of two scalars
|
||||
*
|
||||
* \sa class CwiseBinaryOp, MatrixBase::operator+, class VectorwiseOp, DenseBase::sum()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_sum_op {
|
||||
// typedef Scalar result_type;
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a + b; }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
|
||||
{ return internal::padd(a,b); }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const
|
||||
{ return internal::predux(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_sum_op<Scalar> > {
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::AddCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasAdd
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template specialization to deprecate the summation of boolean expressions.
|
||||
* This is required to solve Bug 426.
|
||||
* \sa DenseBase::count(), DenseBase::any(), ArrayBase::cast(), MatrixBase::cast()
|
||||
*/
|
||||
template<> struct scalar_sum_op<bool> : scalar_sum_op<int> {
|
||||
EIGEN_DEPRECATED
|
||||
scalar_sum_op() {}
|
||||
};
|
||||
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the product of two scalars
|
||||
*
|
||||
* \sa class CwiseBinaryOp, Cwise::operator*(), class VectorwiseOp, MatrixBase::redux()
|
||||
*/
|
||||
template<typename LhsScalar,typename RhsScalar> struct scalar_product_op {
|
||||
enum {
|
||||
// TODO vectorize mixed product
|
||||
Vectorizable = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasMul && packet_traits<RhsScalar>::HasMul
|
||||
};
|
||||
typedef typename scalar_product_traits<LhsScalar,RhsScalar>::ReturnType result_type;
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a * b; }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
|
||||
{ return internal::pmul(a,b); }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const
|
||||
{ return internal::predux_mul(a); }
|
||||
};
|
||||
template<typename LhsScalar,typename RhsScalar>
|
||||
struct functor_traits<scalar_product_op<LhsScalar,RhsScalar> > {
|
||||
enum {
|
||||
Cost = (NumTraits<LhsScalar>::MulCost + NumTraits<RhsScalar>::MulCost)/2, // rough estimate!
|
||||
PacketAccess = scalar_product_op<LhsScalar,RhsScalar>::Vectorizable
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the conjugate product of two scalars
|
||||
*
|
||||
* This is a short cut for conj(x) * y which is needed for optimization purpose; in Eigen2 support mode, this becomes x * conj(y)
|
||||
*/
|
||||
template<typename LhsScalar,typename RhsScalar> struct scalar_conj_product_op {
|
||||
|
||||
enum {
|
||||
Conj = NumTraits<LhsScalar>::IsComplex
|
||||
};
|
||||
|
||||
typedef typename scalar_product_traits<LhsScalar,RhsScalar>::ReturnType result_type;
|
||||
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_conj_product_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const
|
||||
{ return conj_helper<LhsScalar,RhsScalar,Conj,false>().pmul(a,b); }
|
||||
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
|
||||
{ return conj_helper<Packet,Packet,Conj,false>().pmul(a,b); }
|
||||
};
|
||||
template<typename LhsScalar,typename RhsScalar>
|
||||
struct functor_traits<scalar_conj_product_op<LhsScalar,RhsScalar> > {
|
||||
enum {
|
||||
Cost = NumTraits<LhsScalar>::MulCost,
|
||||
PacketAccess = internal::is_same<LhsScalar, RhsScalar>::value && packet_traits<LhsScalar>::HasMul
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the min of two scalars
|
||||
*
|
||||
* \sa class CwiseBinaryOp, MatrixBase::cwiseMin, class VectorwiseOp, MatrixBase::minCoeff()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_min_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_min_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { EIGEN_USING_STD_MATH(min); return (min)(a, b); }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
|
||||
{ return internal::pmin(a,b); }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const
|
||||
{ return internal::predux_min(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_min_op<Scalar> > {
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::AddCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasMin
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the max of two scalars
|
||||
*
|
||||
* \sa class CwiseBinaryOp, MatrixBase::cwiseMax, class VectorwiseOp, MatrixBase::maxCoeff()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_max_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_max_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { EIGEN_USING_STD_MATH(max); return (max)(a, b); }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
|
||||
{ return internal::pmax(a,b); }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const
|
||||
{ return internal::predux_max(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_max_op<Scalar> > {
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::AddCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasMax
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the hypot of two scalars
|
||||
*
|
||||
* \sa MatrixBase::stableNorm(), class Redux
|
||||
*/
|
||||
template<typename Scalar> struct scalar_hypot_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_hypot_op)
|
||||
// typedef typename NumTraits<Scalar>::Real result_type;
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const
|
||||
{
|
||||
EIGEN_USING_STD_MATH(max);
|
||||
EIGEN_USING_STD_MATH(min);
|
||||
using std::sqrt;
|
||||
Scalar p, qp;
|
||||
if(_x>_y)
|
||||
{
|
||||
p = _x;
|
||||
qp = _y / p;
|
||||
}
|
||||
else
|
||||
{
|
||||
p = _y;
|
||||
qp = _x / p;
|
||||
}
|
||||
return p * sqrt(Scalar(1) + qp*qp);
|
||||
}
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_hypot_op<Scalar> > {
|
||||
enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess=0 };
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the pow of two scalars
|
||||
*/
|
||||
template<typename Scalar, typename OtherScalar> struct scalar_binary_pow_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_binary_pow_op)
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar operator() (const Scalar& a, const OtherScalar& b) const { return numext::pow(a, b); }
|
||||
};
|
||||
template<typename Scalar, typename OtherScalar>
|
||||
struct functor_traits<scalar_binary_pow_op<Scalar,OtherScalar> > {
|
||||
enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false };
|
||||
};
|
||||
|
||||
|
||||
|
||||
//---------- non associative binary functors ----------
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the difference of two scalars
|
||||
*
|
||||
* \sa class CwiseBinaryOp, MatrixBase::operator-
|
||||
*/
|
||||
template<typename Scalar> struct scalar_difference_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a - b; }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
|
||||
{ return internal::psub(a,b); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_difference_op<Scalar> > {
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::AddCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasSub
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the quotient of two scalars
|
||||
*
|
||||
* \sa class CwiseBinaryOp, Cwise::operator/()
|
||||
*/
|
||||
template<typename LhsScalar,typename RhsScalar> struct scalar_quotient_op {
|
||||
enum {
|
||||
// TODO vectorize mixed product
|
||||
Vectorizable = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasDiv && packet_traits<RhsScalar>::HasDiv
|
||||
};
|
||||
typedef typename scalar_product_traits<LhsScalar,RhsScalar>::ReturnType result_type;
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a / b; }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
|
||||
{ return internal::pdiv(a,b); }
|
||||
};
|
||||
template<typename LhsScalar,typename RhsScalar>
|
||||
struct functor_traits<scalar_quotient_op<LhsScalar,RhsScalar> > {
|
||||
enum {
|
||||
Cost = (NumTraits<LhsScalar>::MulCost + NumTraits<RhsScalar>::MulCost), // rough estimate!
|
||||
PacketAccess = scalar_quotient_op<LhsScalar,RhsScalar>::Vectorizable
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the and of two booleans
|
||||
*
|
||||
* \sa class CwiseBinaryOp, ArrayBase::operator&&
|
||||
*/
|
||||
struct scalar_boolean_and_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_and_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a && b; }
|
||||
};
|
||||
template<> struct functor_traits<scalar_boolean_and_op> {
|
||||
enum {
|
||||
Cost = NumTraits<bool>::AddCost,
|
||||
PacketAccess = false
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the or of two booleans
|
||||
*
|
||||
* \sa class CwiseBinaryOp, ArrayBase::operator||
|
||||
*/
|
||||
struct scalar_boolean_or_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_or_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a || b; }
|
||||
};
|
||||
template<> struct functor_traits<scalar_boolean_or_op> {
|
||||
enum {
|
||||
Cost = NumTraits<bool>::AddCost,
|
||||
PacketAccess = false
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
|
||||
//---------- binary functors bound to a constant, thus appearing as a unary functor ----------
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to multiply a scalar by a fixed other one
|
||||
*
|
||||
* \sa class CwiseUnaryOp, MatrixBase::operator*, MatrixBase::operator/
|
||||
*/
|
||||
/* NOTE why doing the pset1() in packetOp *is* an optimization ?
|
||||
* indeed it seems better to declare m_other as a Packet and do the pset1() once
|
||||
* in the constructor. However, in practice:
|
||||
* - GCC does not like m_other as a Packet and generate a load every time it needs it
|
||||
* - on the other hand GCC is able to moves the pset1() outside the loop :)
|
||||
* - simpler code ;)
|
||||
* (ICC and gcc 4.4 seems to perform well in both cases, the issue is visible with y = a*x + b*y)
|
||||
*/
|
||||
template<typename Scalar>
|
||||
struct scalar_multiple_op {
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
// FIXME default copy constructors seems bugged with std::complex<>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE scalar_multiple_op(const scalar_multiple_op& other) : m_other(other.m_other) { }
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE scalar_multiple_op(const Scalar& other) : m_other(other) { }
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a * m_other; }
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
|
||||
{ return internal::pmul(a, pset1<Packet>(m_other)); }
|
||||
typename add_const_on_value_type<typename NumTraits<Scalar>::Nested>::type m_other;
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_multiple_op<Scalar> >
|
||||
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasMul }; };
|
||||
|
||||
template<typename Scalar1, typename Scalar2>
|
||||
struct scalar_multiple2_op {
|
||||
typedef typename scalar_product_traits<Scalar1,Scalar2>::ReturnType result_type;
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_multiple2_op(const scalar_multiple2_op& other) : m_other(other.m_other) { }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_multiple2_op(const Scalar2& other) : m_other(other) { }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar1& a) const { return a * m_other; }
|
||||
typename add_const_on_value_type<typename NumTraits<Scalar2>::Nested>::type m_other;
|
||||
};
|
||||
template<typename Scalar1,typename Scalar2>
|
||||
struct functor_traits<scalar_multiple2_op<Scalar1,Scalar2> >
|
||||
{ enum { Cost = NumTraits<Scalar1>::MulCost, PacketAccess = false }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to divide a scalar by a fixed other one
|
||||
*
|
||||
* This functor is used to implement the quotient of a matrix by
|
||||
* a scalar where the scalar type is not necessarily a floating point type.
|
||||
*
|
||||
* \sa class CwiseUnaryOp, MatrixBase::operator/
|
||||
*/
|
||||
template<typename Scalar>
|
||||
struct scalar_quotient1_op {
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
// FIXME default copy constructors seems bugged with std::complex<>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient1_op(const scalar_quotient1_op& other) : m_other(other.m_other) { }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient1_op(const Scalar& other) : m_other(other) {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a / m_other; }
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
|
||||
{ return internal::pdiv(a, pset1<Packet>(m_other)); }
|
||||
typename add_const_on_value_type<typename NumTraits<Scalar>::Nested>::type m_other;
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_quotient1_op<Scalar> >
|
||||
{ enum { Cost = 2 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasDiv }; };
|
||||
|
||||
// In Eigen, any binary op (Product, CwiseBinaryOp) require the Lhs and Rhs to have the same scalar type, except for multiplication
|
||||
// where the mixing of different types is handled by scalar_product_traits
|
||||
// In particular, real * complex<real> is allowed.
|
||||
// FIXME move this to functor_traits adding a functor_default
|
||||
template<typename Functor> struct functor_is_product_like { enum { ret = 0 }; };
|
||||
template<typename LhsScalar,typename RhsScalar> struct functor_is_product_like<scalar_product_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
|
||||
template<typename LhsScalar,typename RhsScalar> struct functor_is_product_like<scalar_conj_product_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
|
||||
template<typename LhsScalar,typename RhsScalar> struct functor_is_product_like<scalar_quotient_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
|
||||
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to add a scalar to a fixed other one
|
||||
* \sa class CwiseUnaryOp, Array::operator+
|
||||
*/
|
||||
/* If you wonder why doing the pset1() in packetOp() is an optimization check scalar_multiple_op */
|
||||
template<typename Scalar>
|
||||
struct scalar_add_op {
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
// FIXME default copy constructors seems bugged with std::complex<>
|
||||
EIGEN_DEVICE_FUNC inline scalar_add_op(const scalar_add_op& other) : m_other(other.m_other) { }
|
||||
EIGEN_DEVICE_FUNC inline scalar_add_op(const Scalar& other) : m_other(other) { }
|
||||
EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a + m_other; }
|
||||
inline const Packet packetOp(const Packet& a) const
|
||||
{ return internal::padd(a, pset1<Packet>(m_other)); }
|
||||
const Scalar m_other;
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_add_op<Scalar> >
|
||||
{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = packet_traits<Scalar>::HasAdd }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to subtract a fixed scalar to another one
|
||||
* \sa class CwiseUnaryOp, Array::operator-, struct scalar_add_op, struct scalar_rsub_op
|
||||
*/
|
||||
template<typename Scalar>
|
||||
struct scalar_sub_op {
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
inline scalar_sub_op(const scalar_sub_op& other) : m_other(other.m_other) { }
|
||||
inline scalar_sub_op(const Scalar& other) : m_other(other) { }
|
||||
inline Scalar operator() (const Scalar& a) const { return a - m_other; }
|
||||
inline const Packet packetOp(const Packet& a) const
|
||||
{ return internal::psub(a, pset1<Packet>(m_other)); }
|
||||
const Scalar m_other;
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_sub_op<Scalar> >
|
||||
{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = packet_traits<Scalar>::HasAdd }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to subtract a scalar to fixed another one
|
||||
* \sa class CwiseUnaryOp, Array::operator-, struct scalar_add_op, struct scalar_sub_op
|
||||
*/
|
||||
template<typename Scalar>
|
||||
struct scalar_rsub_op {
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
inline scalar_rsub_op(const scalar_rsub_op& other) : m_other(other.m_other) { }
|
||||
inline scalar_rsub_op(const Scalar& other) : m_other(other) { }
|
||||
inline Scalar operator() (const Scalar& a) const { return m_other - a; }
|
||||
inline const Packet packetOp(const Packet& a) const
|
||||
{ return internal::psub(pset1<Packet>(m_other), a); }
|
||||
const Scalar m_other;
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_rsub_op<Scalar> >
|
||||
{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = packet_traits<Scalar>::HasAdd }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to raise a scalar to a power
|
||||
* \sa class CwiseUnaryOp, Cwise::pow
|
||||
*/
|
||||
template<typename Scalar>
|
||||
struct scalar_pow_op {
|
||||
// FIXME default copy constructors seems bugged with std::complex<>
|
||||
inline scalar_pow_op(const scalar_pow_op& other) : m_exponent(other.m_exponent) { }
|
||||
inline scalar_pow_op(const Scalar& exponent) : m_exponent(exponent) {}
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar operator() (const Scalar& a) const { return numext::pow(a, m_exponent); }
|
||||
const Scalar m_exponent;
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_pow_op<Scalar> >
|
||||
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the quotient between a scalar and array entries.
|
||||
* \sa class CwiseUnaryOp, Cwise::inverse()
|
||||
*/
|
||||
template<typename Scalar>
|
||||
struct scalar_inverse_mult_op {
|
||||
scalar_inverse_mult_op(const Scalar& other) : m_other(other) {}
|
||||
EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return m_other / a; }
|
||||
template<typename Packet>
|
||||
inline const Packet packetOp(const Packet& a) const
|
||||
{ return internal::pdiv(pset1<Packet>(m_other),a); }
|
||||
Scalar m_other;
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_BINARY_FUNCTORS_H
|
||||
@@ -1,6 +0,0 @@
|
||||
FILE(GLOB Eigen_Core_Functor_SRCS "*.h")
|
||||
|
||||
INSTALL(FILES
|
||||
${Eigen_Core_Functor_SRCS}
|
||||
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/functors COMPONENT Devel
|
||||
)
|
||||
@@ -1,158 +0,0 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_NULLARY_FUNCTORS_H
|
||||
#define EIGEN_NULLARY_FUNCTORS_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename Scalar>
|
||||
struct scalar_constant_op {
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_constant_op(const scalar_constant_op& other) : m_other(other.m_other) { }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_constant_op(const Scalar& other) : m_other(other) { }
|
||||
template<typename Index>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index, Index = 0) const { return m_other; }
|
||||
template<typename Index>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(Index, Index = 0) const { return internal::pset1<Packet>(m_other); }
|
||||
const Scalar m_other;
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_constant_op<Scalar> >
|
||||
// FIXME replace this packet test by a safe one
|
||||
{ enum { Cost = 1, PacketAccess = packet_traits<Scalar>::Vectorizable, IsRepeatable = true }; };
|
||||
|
||||
template<typename Scalar> struct scalar_identity_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_identity_op)
|
||||
template<typename Index>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index row, Index col) const { return row==col ? Scalar(1) : Scalar(0); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_identity_op<Scalar> >
|
||||
{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = false, IsRepeatable = true }; };
|
||||
|
||||
template <typename Scalar, bool RandomAccess> struct linspaced_op_impl;
|
||||
|
||||
// linear access for packet ops:
|
||||
// 1) initialization
|
||||
// base = [low, ..., low] + ([step, ..., step] * [-size, ..., 0])
|
||||
// 2) each step (where size is 1 for coeff access or PacketSize for packet access)
|
||||
// base += [size*step, ..., size*step]
|
||||
//
|
||||
// TODO: Perhaps it's better to initialize lazily (so not in the constructor but in packetOp)
|
||||
// in order to avoid the padd() in operator() ?
|
||||
template <typename Scalar>
|
||||
struct linspaced_op_impl<Scalar,false>
|
||||
{
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
|
||||
linspaced_op_impl(const Scalar& low, const Scalar& step) :
|
||||
m_low(low), m_step(step),
|
||||
m_packetStep(pset1<Packet>(packet_traits<Scalar>::size*step)),
|
||||
m_base(padd(pset1<Packet>(low), pmul(pset1<Packet>(step),plset<Scalar>(-packet_traits<Scalar>::size)))) {}
|
||||
|
||||
template<typename Index>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const
|
||||
{
|
||||
m_base = padd(m_base, pset1<Packet>(m_step));
|
||||
return m_low+Scalar(i)*m_step;
|
||||
}
|
||||
|
||||
template<typename Index>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(Index) const { return m_base = padd(m_base,m_packetStep); }
|
||||
|
||||
const Scalar m_low;
|
||||
const Scalar m_step;
|
||||
const Packet m_packetStep;
|
||||
mutable Packet m_base;
|
||||
};
|
||||
|
||||
// random access for packet ops:
|
||||
// 1) each step
|
||||
// [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) )
|
||||
template <typename Scalar>
|
||||
struct linspaced_op_impl<Scalar,true>
|
||||
{
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
|
||||
linspaced_op_impl(const Scalar& low, const Scalar& step) :
|
||||
m_low(low), m_step(step),
|
||||
m_lowPacket(pset1<Packet>(m_low)), m_stepPacket(pset1<Packet>(m_step)), m_interPacket(plset<Scalar>(0)) {}
|
||||
|
||||
template<typename Index>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return m_low+i*m_step; }
|
||||
|
||||
template<typename Index>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(Index i) const
|
||||
{ return internal::padd(m_lowPacket, pmul(m_stepPacket, padd(pset1<Packet>(Scalar(i)),m_interPacket))); }
|
||||
|
||||
const Scalar m_low;
|
||||
const Scalar m_step;
|
||||
const Packet m_lowPacket;
|
||||
const Packet m_stepPacket;
|
||||
const Packet m_interPacket;
|
||||
};
|
||||
|
||||
// ----- Linspace functor ----------------------------------------------------------------
|
||||
|
||||
// Forward declaration (we default to random access which does not really give
|
||||
// us a speed gain when using packet access but it allows to use the functor in
|
||||
// nested expressions).
|
||||
template <typename Scalar, bool RandomAccess = true> struct linspaced_op;
|
||||
template <typename Scalar, bool RandomAccess> struct functor_traits< linspaced_op<Scalar,RandomAccess> >
|
||||
{ enum { Cost = 1, PacketAccess = packet_traits<Scalar>::HasSetLinear, IsRepeatable = true }; };
|
||||
template <typename Scalar, bool RandomAccess> struct linspaced_op
|
||||
{
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
linspaced_op(const Scalar& low, const Scalar& high, DenseIndex num_steps) : impl((num_steps==1 ? high : low), (num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1))) {}
|
||||
|
||||
template<typename Index>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return impl(i); }
|
||||
|
||||
// We need this function when assigning e.g. a RowVectorXd to a MatrixXd since
|
||||
// there row==0 and col is used for the actual iteration.
|
||||
template<typename Index>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index row, Index col) const
|
||||
{
|
||||
eigen_assert(col==0 || row==0);
|
||||
return impl(col + row);
|
||||
}
|
||||
|
||||
template<typename Index>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(Index i) const { return impl.packetOp(i); }
|
||||
|
||||
// We need this function when assigning e.g. a RowVectorXd to a MatrixXd since
|
||||
// there row==0 and col is used for the actual iteration.
|
||||
template<typename Index>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(Index row, Index col) const
|
||||
{
|
||||
eigen_assert(col==0 || row==0);
|
||||
return impl.packetOp(col + row);
|
||||
}
|
||||
|
||||
// This proxy object handles the actual required temporaries, the different
|
||||
// implementations (random vs. sequential access) as well as the
|
||||
// correct piping to size 2/4 packet operations.
|
||||
const linspaced_op_impl<Scalar,RandomAccess> impl;
|
||||
};
|
||||
|
||||
// all functors allow linear access, except scalar_identity_op. So we fix here a quick meta
|
||||
// to indicate whether a functor allows linear access, just always answering 'yes' except for
|
||||
// scalar_identity_op.
|
||||
// FIXME move this to functor_traits adding a functor_default
|
||||
template<typename Functor> struct functor_has_linear_access { enum { ret = 1 }; };
|
||||
template<typename Scalar> struct functor_has_linear_access<scalar_identity_op<Scalar> > { enum { ret = 0 }; };
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_NULLARY_FUNCTORS_H
|
||||
@@ -1,129 +0,0 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_STL_FUNCTORS_H
|
||||
#define EIGEN_STL_FUNCTORS_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
// default functor traits for STL functors:
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::multiplies<T> >
|
||||
{ enum { Cost = NumTraits<T>::MulCost, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::divides<T> >
|
||||
{ enum { Cost = NumTraits<T>::MulCost, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::plus<T> >
|
||||
{ enum { Cost = NumTraits<T>::AddCost, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::minus<T> >
|
||||
{ enum { Cost = NumTraits<T>::AddCost, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::negate<T> >
|
||||
{ enum { Cost = NumTraits<T>::AddCost, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::logical_or<T> >
|
||||
{ enum { Cost = 1, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::logical_and<T> >
|
||||
{ enum { Cost = 1, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::logical_not<T> >
|
||||
{ enum { Cost = 1, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::greater<T> >
|
||||
{ enum { Cost = 1, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::less<T> >
|
||||
{ enum { Cost = 1, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::greater_equal<T> >
|
||||
{ enum { Cost = 1, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::less_equal<T> >
|
||||
{ enum { Cost = 1, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::equal_to<T> >
|
||||
{ enum { Cost = 1, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::not_equal_to<T> >
|
||||
{ enum { Cost = 1, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::binder2nd<T> >
|
||||
{ enum { Cost = functor_traits<T>::Cost, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::binder1st<T> >
|
||||
{ enum { Cost = functor_traits<T>::Cost, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::unary_negate<T> >
|
||||
{ enum { Cost = 1 + functor_traits<T>::Cost, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct functor_traits<std::binary_negate<T> >
|
||||
{ enum { Cost = 1 + functor_traits<T>::Cost, PacketAccess = false }; };
|
||||
|
||||
#ifdef EIGEN_STDEXT_SUPPORT
|
||||
|
||||
template<typename T0,typename T1>
|
||||
struct functor_traits<std::project1st<T0,T1> >
|
||||
{ enum { Cost = 0, PacketAccess = false }; };
|
||||
|
||||
template<typename T0,typename T1>
|
||||
struct functor_traits<std::project2nd<T0,T1> >
|
||||
{ enum { Cost = 0, PacketAccess = false }; };
|
||||
|
||||
template<typename T0,typename T1>
|
||||
struct functor_traits<std::select2nd<std::pair<T0,T1> > >
|
||||
{ enum { Cost = 0, PacketAccess = false }; };
|
||||
|
||||
template<typename T0,typename T1>
|
||||
struct functor_traits<std::select1st<std::pair<T0,T1> > >
|
||||
{ enum { Cost = 0, PacketAccess = false }; };
|
||||
|
||||
template<typename T0,typename T1>
|
||||
struct functor_traits<std::unary_compose<T0,T1> >
|
||||
{ enum { Cost = functor_traits<T0>::Cost + functor_traits<T1>::Cost, PacketAccess = false }; };
|
||||
|
||||
template<typename T0,typename T1,typename T2>
|
||||
struct functor_traits<std::binary_compose<T0,T1,T2> >
|
||||
{ enum { Cost = functor_traits<T0>::Cost + functor_traits<T1>::Cost + functor_traits<T2>::Cost, PacketAccess = false }; };
|
||||
|
||||
#endif // EIGEN_STDEXT_SUPPORT
|
||||
|
||||
// allow to add new functors and specializations of functor_traits from outside Eigen.
|
||||
// this macro is really needed because functor_traits must be specialized after it is declared but before it is used...
|
||||
#ifdef EIGEN_FUNCTORS_PLUGIN
|
||||
#include EIGEN_FUNCTORS_PLUGIN
|
||||
#endif
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_STL_FUNCTORS_H
|
||||
@@ -1,396 +0,0 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_UNARY_FUNCTORS_H
|
||||
#define EIGEN_UNARY_FUNCTORS_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the opposite of a scalar
|
||||
*
|
||||
* \sa class CwiseUnaryOp, MatrixBase::operator-
|
||||
*/
|
||||
template<typename Scalar> struct scalar_opposite_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_opposite_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return -a; }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
|
||||
{ return internal::pnegate(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_opposite_op<Scalar> >
|
||||
{ enum {
|
||||
Cost = NumTraits<Scalar>::AddCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasNegate };
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the absolute value of a scalar
|
||||
*
|
||||
* \sa class CwiseUnaryOp, Cwise::abs
|
||||
*/
|
||||
template<typename Scalar> struct scalar_abs_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_abs_op)
|
||||
typedef typename NumTraits<Scalar>::Real result_type;
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { using std::abs; return abs(a); }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
|
||||
{ return internal::pabs(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_abs_op<Scalar> >
|
||||
{
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::AddCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasAbs
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the squared absolute value of a scalar
|
||||
*
|
||||
* \sa class CwiseUnaryOp, Cwise::abs2
|
||||
*/
|
||||
template<typename Scalar> struct scalar_abs2_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_abs2_op)
|
||||
typedef typename NumTraits<Scalar>::Real result_type;
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return numext::abs2(a); }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
|
||||
{ return internal::pmul(a,a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_abs2_op<Scalar> >
|
||||
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasAbs2 }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the conjugate of a complex value
|
||||
*
|
||||
* \sa class CwiseUnaryOp, MatrixBase::conjugate()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_conjugate_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_conjugate_op)
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { using numext::conj; return conj(a); }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::pconj(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_conjugate_op<Scalar> >
|
||||
{
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::IsComplex ? NumTraits<Scalar>::AddCost : 0,
|
||||
PacketAccess = packet_traits<Scalar>::HasConj
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to cast a scalar to another type
|
||||
*
|
||||
* \sa class CwiseUnaryOp, MatrixBase::cast()
|
||||
*/
|
||||
template<typename Scalar, typename NewType>
|
||||
struct scalar_cast_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
|
||||
typedef NewType result_type;
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const NewType operator() (const Scalar& a) const { return cast<Scalar, NewType>(a); }
|
||||
};
|
||||
template<typename Scalar, typename NewType>
|
||||
struct functor_traits<scalar_cast_op<Scalar,NewType> >
|
||||
{ enum { Cost = is_same<Scalar, NewType>::value ? 0 : NumTraits<NewType>::AddCost, PacketAccess = false }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to extract the real part of a complex
|
||||
*
|
||||
* \sa class CwiseUnaryOp, MatrixBase::real()
|
||||
*/
|
||||
template<typename Scalar>
|
||||
struct scalar_real_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_real_op)
|
||||
typedef typename NumTraits<Scalar>::Real result_type;
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return numext::real(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_real_op<Scalar> >
|
||||
{ enum { Cost = 0, PacketAccess = false }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to extract the imaginary part of a complex
|
||||
*
|
||||
* \sa class CwiseUnaryOp, MatrixBase::imag()
|
||||
*/
|
||||
template<typename Scalar>
|
||||
struct scalar_imag_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_imag_op)
|
||||
typedef typename NumTraits<Scalar>::Real result_type;
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return numext::imag(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_imag_op<Scalar> >
|
||||
{ enum { Cost = 0, PacketAccess = false }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to extract the real part of a complex as a reference
|
||||
*
|
||||
* \sa class CwiseUnaryOp, MatrixBase::real()
|
||||
*/
|
||||
template<typename Scalar>
|
||||
struct scalar_real_ref_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_real_ref_op)
|
||||
typedef typename NumTraits<Scalar>::Real result_type;
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return numext::real_ref(*const_cast<Scalar*>(&a)); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_real_ref_op<Scalar> >
|
||||
{ enum { Cost = 0, PacketAccess = false }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to extract the imaginary part of a complex as a reference
|
||||
*
|
||||
* \sa class CwiseUnaryOp, MatrixBase::imag()
|
||||
*/
|
||||
template<typename Scalar>
|
||||
struct scalar_imag_ref_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_imag_ref_op)
|
||||
typedef typename NumTraits<Scalar>::Real result_type;
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return numext::imag_ref(*const_cast<Scalar*>(&a)); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_imag_ref_op<Scalar> >
|
||||
{ enum { Cost = 0, PacketAccess = false }; };
|
||||
|
||||
/** \internal
|
||||
*
|
||||
* \brief Template functor to compute the exponential of a scalar
|
||||
*
|
||||
* \sa class CwiseUnaryOp, Cwise::exp()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_exp_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_exp_op)
|
||||
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::exp; return exp(a); }
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
inline Packet packetOp(const Packet& a) const { return internal::pexp(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_exp_op<Scalar> >
|
||||
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasExp }; };
|
||||
|
||||
/** \internal
|
||||
*
|
||||
* \brief Template functor to compute the logarithm of a scalar
|
||||
*
|
||||
* \sa class CwiseUnaryOp, Cwise::log()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_log_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_log_op)
|
||||
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::log; return log(a); }
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
inline Packet packetOp(const Packet& a) const { return internal::plog(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_log_op<Scalar> >
|
||||
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasLog }; };
|
||||
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the square root of a scalar
|
||||
* \sa class CwiseUnaryOp, Cwise::sqrt()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_sqrt_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_sqrt_op)
|
||||
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::sqrt; return sqrt(a); }
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
inline Packet packetOp(const Packet& a) const { return internal::psqrt(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_sqrt_op<Scalar> >
|
||||
{ enum {
|
||||
Cost = 5 * NumTraits<Scalar>::MulCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasSqrt
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the cosine of a scalar
|
||||
* \sa class CwiseUnaryOp, ArrayBase::cos()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_cos_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_cos_op)
|
||||
EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { using std::cos; return cos(a); }
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
inline Packet packetOp(const Packet& a) const { return internal::pcos(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_cos_op<Scalar> >
|
||||
{
|
||||
enum {
|
||||
Cost = 5 * NumTraits<Scalar>::MulCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasCos
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the sine of a scalar
|
||||
* \sa class CwiseUnaryOp, ArrayBase::sin()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_sin_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_sin_op)
|
||||
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::sin; return sin(a); }
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
inline Packet packetOp(const Packet& a) const { return internal::psin(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_sin_op<Scalar> >
|
||||
{
|
||||
enum {
|
||||
Cost = 5 * NumTraits<Scalar>::MulCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasSin
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the tan of a scalar
|
||||
* \sa class CwiseUnaryOp, ArrayBase::tan()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_tan_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_tan_op)
|
||||
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::tan; return tan(a); }
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
inline Packet packetOp(const Packet& a) const { return internal::ptan(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_tan_op<Scalar> >
|
||||
{
|
||||
enum {
|
||||
Cost = 5 * NumTraits<Scalar>::MulCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasTan
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the arc cosine of a scalar
|
||||
* \sa class CwiseUnaryOp, ArrayBase::acos()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_acos_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_acos_op)
|
||||
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::acos; return acos(a); }
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
inline Packet packetOp(const Packet& a) const { return internal::pacos(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_acos_op<Scalar> >
|
||||
{
|
||||
enum {
|
||||
Cost = 5 * NumTraits<Scalar>::MulCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasACos
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the arc sine of a scalar
|
||||
* \sa class CwiseUnaryOp, ArrayBase::asin()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_asin_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_asin_op)
|
||||
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::asin; return asin(a); }
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
inline Packet packetOp(const Packet& a) const { return internal::pasin(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_asin_op<Scalar> >
|
||||
{
|
||||
enum {
|
||||
Cost = 5 * NumTraits<Scalar>::MulCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasASin
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the atan of a scalar
|
||||
* \sa class CwiseUnaryOp, ArrayBase::atan()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_atan_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_atan_op)
|
||||
inline const Scalar operator() (const Scalar& a) const { using std::atan; return atan(a); }
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
inline Packet packetOp(const Packet& a) const { return internal::patan(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_atan_op<Scalar> >
|
||||
{
|
||||
enum {
|
||||
Cost = 5 * NumTraits<Scalar>::MulCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasATan
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the inverse of a scalar
|
||||
* \sa class CwiseUnaryOp, Cwise::inverse()
|
||||
*/
|
||||
template<typename Scalar>
|
||||
struct scalar_inverse_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_inverse_op)
|
||||
EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return Scalar(1)/a; }
|
||||
template<typename Packet>
|
||||
inline const Packet packetOp(const Packet& a) const
|
||||
{ return internal::pdiv(pset1<Packet>(Scalar(1)),a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_inverse_op<Scalar> >
|
||||
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasDiv }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the square of a scalar
|
||||
* \sa class CwiseUnaryOp, Cwise::square()
|
||||
*/
|
||||
template<typename Scalar>
|
||||
struct scalar_square_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_square_op)
|
||||
EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a*a; }
|
||||
template<typename Packet>
|
||||
inline const Packet packetOp(const Packet& a) const
|
||||
{ return internal::pmul(a,a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_square_op<Scalar> >
|
||||
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasMul }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the cube of a scalar
|
||||
* \sa class CwiseUnaryOp, Cwise::cube()
|
||||
*/
|
||||
template<typename Scalar>
|
||||
struct scalar_cube_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_cube_op)
|
||||
EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a*a*a; }
|
||||
template<typename Packet>
|
||||
inline const Packet packetOp(const Packet& a) const
|
||||
{ return internal::pmul(a,pmul(a,a)); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_cube_op<Scalar> >
|
||||
{ enum { Cost = 2*NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasMul }; };
|
||||
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_FUNCTORS_H
|
||||
@@ -85,12 +85,12 @@ struct traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
|
||||
Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
|
||||
| (EvalToRowMajor ? RowMajorBit : 0)
|
||||
| NestingFlags
|
||||
| (CanVectorizeLhs ? (LhsFlags & AlignedBit) : 0)
|
||||
| (CanVectorizeRhs ? (RhsFlags & AlignedBit) : 0)
|
||||
| (LhsFlags & RhsFlags & AlignedBit)
|
||||
// TODO enable vectorization for mixed types
|
||||
| (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0),
|
||||
|
||||
CoeffReadCost = InnerSize == Dynamic ? Dynamic
|
||||
: InnerSize == 0 ? 0
|
||||
: InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
|
||||
+ (InnerSize - 1) * NumTraits<Scalar>::AddCost,
|
||||
|
||||
@@ -134,20 +134,18 @@ class CoeffBasedProduct
|
||||
};
|
||||
|
||||
typedef internal::product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal,
|
||||
Unroll ? InnerSize-1 : Dynamic,
|
||||
Unroll ? InnerSize : Dynamic,
|
||||
_LhsNested, _RhsNested, Scalar> ScalarCoeffImpl;
|
||||
|
||||
typedef CoeffBasedProduct<LhsNested,RhsNested,NestByRefBit> LazyCoeffBasedProductType;
|
||||
|
||||
public:
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline CoeffBasedProduct(const CoeffBasedProduct& other)
|
||||
: Base(), m_lhs(other.m_lhs), m_rhs(other.m_rhs)
|
||||
{}
|
||||
|
||||
template<typename Lhs, typename Rhs>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline CoeffBasedProduct(const Lhs& lhs, const Rhs& rhs)
|
||||
: m_lhs(lhs), m_rhs(rhs)
|
||||
{
|
||||
@@ -160,10 +158,9 @@ class CoeffBasedProduct
|
||||
&& "if you wanted a coeff-wise or a dot product use the respective explicit functions");
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); }
|
||||
EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); }
|
||||
EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
|
||||
{
|
||||
Scalar res;
|
||||
@@ -174,7 +171,6 @@ class CoeffBasedProduct
|
||||
/* Allow index-based non-packet access. It is impossible though to allow index-based packed access,
|
||||
* which is why we don't set the LinearAccessBit.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
|
||||
{
|
||||
Scalar res;
|
||||
@@ -189,33 +185,29 @@ class CoeffBasedProduct
|
||||
{
|
||||
PacketScalar res;
|
||||
internal::product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor,
|
||||
Unroll ? InnerSize-1 : Dynamic,
|
||||
Unroll ? InnerSize : Dynamic,
|
||||
_LhsNested, _RhsNested, PacketScalar, LoadMode>
|
||||
::run(row, col, m_lhs, m_rhs, res);
|
||||
return res;
|
||||
}
|
||||
|
||||
// Implicit conversion to the nested type (trigger the evaluation of the product)
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE operator const PlainObject& () const
|
||||
{
|
||||
m_result.lazyAssign(*this);
|
||||
return m_result;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC const _LhsNested& lhs() const { return m_lhs; }
|
||||
EIGEN_DEVICE_FUNC const _RhsNested& rhs() const { return m_rhs; }
|
||||
const _LhsNested& lhs() const { return m_lhs; }
|
||||
const _RhsNested& rhs() const { return m_rhs; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const Diagonal<const LazyCoeffBasedProductType,0> diagonal() const
|
||||
{ return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
|
||||
|
||||
template<int DiagonalIndex>
|
||||
EIGEN_DEVICE_FUNC
|
||||
const Diagonal<const LazyCoeffBasedProductType,DiagonalIndex> diagonal() const
|
||||
{ return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const Diagonal<const LazyCoeffBasedProductType,Dynamic> diagonal(Index index) const
|
||||
{ return reinterpret_cast<const LazyCoeffBasedProductType&>(*this).diagonal(index); }
|
||||
|
||||
@@ -248,11 +240,20 @@ template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
|
||||
struct product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
|
||||
{
|
||||
product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res);
|
||||
res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col);
|
||||
res += lhs.coeff(row, UnrollingIndex-1) * rhs.coeff(UnrollingIndex-1, col);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename RetScalar>
|
||||
struct product_coeff_impl<DefaultTraversal, 1, Lhs, Rhs, RetScalar>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
|
||||
{
|
||||
res = lhs.coeff(row, 0) * rhs.coeff(0, col);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -260,10 +261,9 @@ template<typename Lhs, typename Rhs, typename RetScalar>
|
||||
struct product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
|
||||
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, RetScalar &res)
|
||||
{
|
||||
res = lhs.coeff(row, 0) * rhs.coeff(0, col);
|
||||
res = RetScalar(0);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -271,13 +271,9 @@ template<typename Lhs, typename Rhs, typename RetScalar>
|
||||
struct product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res)
|
||||
{
|
||||
eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
|
||||
res = lhs.coeff(row, 0) * rhs.coeff(0, col);
|
||||
for(Index i = 1; i < lhs.cols(); ++i)
|
||||
res += lhs.coeff(row, i) * rhs.coeff(i, col);
|
||||
res = (lhs.row(row).transpose().cwiseProduct( rhs.col(col) )).sum();
|
||||
}
|
||||
};
|
||||
|
||||
@@ -307,6 +303,16 @@ struct product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet>
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename RetScalar>
|
||||
struct product_coeff_impl<InnerVectorizedTraversal, 0, Lhs, Rhs, RetScalar>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, RetScalar &res)
|
||||
{
|
||||
res = 0;
|
||||
}
|
||||
};
|
||||
|
||||
template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
|
||||
struct product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
|
||||
{
|
||||
@@ -316,7 +322,7 @@ struct product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, Re
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
|
||||
{
|
||||
Packet pres;
|
||||
product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
|
||||
product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
|
||||
res = predux(pres);
|
||||
}
|
||||
};
|
||||
@@ -384,7 +390,7 @@ struct product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
|
||||
{
|
||||
product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
|
||||
res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
|
||||
res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex-1)), rhs.template packet<LoadMode>(UnrollingIndex-1, col), res);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -395,12 +401,12 @@ struct product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
|
||||
{
|
||||
product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
|
||||
res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
|
||||
res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex-1), pset1<Packet>(rhs.coeff(UnrollingIndex-1, col)), res);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||
struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
||||
struct product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
|
||||
@@ -410,7 +416,7 @@ struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||
struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
||||
struct product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
|
||||
@@ -419,16 +425,35 @@ struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||
struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Packet &res)
|
||||
{
|
||||
res = pset1<Packet>(0);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||
struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Packet &res)
|
||||
{
|
||||
res = pset1<Packet>(0);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||
struct product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
|
||||
{
|
||||
eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
|
||||
res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
|
||||
for(Index i = 1; i < lhs.cols(); ++i)
|
||||
res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
|
||||
res = pset1<Packet>(0);
|
||||
for(Index i = 0; i < lhs.cols(); ++i)
|
||||
res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -438,10 +463,9 @@ struct product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
|
||||
typedef typename Lhs::Index Index;
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
|
||||
{
|
||||
eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
|
||||
res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
|
||||
for(Index i = 1; i < lhs.cols(); ++i)
|
||||
res = pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
|
||||
res = pset1<Packet>(0);
|
||||
for(Index i = 0; i < lhs.cols(); ++i)
|
||||
res = pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -23,8 +23,6 @@ template<
|
||||
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs>
|
||||
struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor>
|
||||
{
|
||||
typedef gebp_traits<RhsScalar,LhsScalar> Traits;
|
||||
|
||||
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
|
||||
static EIGEN_STRONG_INLINE void run(
|
||||
Index rows, Index cols, Index depth,
|
||||
@@ -53,8 +51,6 @@ template<
|
||||
struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor>
|
||||
{
|
||||
|
||||
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
|
||||
|
||||
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
|
||||
static void run(Index rows, Index cols, Index depth,
|
||||
const LhsScalar* _lhs, Index lhsStride,
|
||||
@@ -67,9 +63,11 @@ static void run(Index rows, Index cols, Index depth,
|
||||
const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
|
||||
const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
|
||||
|
||||
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
|
||||
|
||||
Index kc = blocking.kc(); // cache block size along the K direction
|
||||
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
|
||||
Index nc = (std::min)(cols,blocking.nc()); // cache block size along the N direction
|
||||
//Index nc = blocking.nc(); // cache block size along the N direction
|
||||
|
||||
gemm_pack_lhs<LhsScalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
|
||||
gemm_pack_rhs<RhsScalar, Index, Traits::nr, RhsStorageOrder> pack_rhs;
|
||||
@@ -82,68 +80,68 @@ static void run(Index rows, Index cols, Index depth,
|
||||
Index tid = omp_get_thread_num();
|
||||
Index threads = omp_get_num_threads();
|
||||
|
||||
LhsScalar* blockA = blocking.blockA();
|
||||
eigen_internal_assert(blockA!=0);
|
||||
std::size_t sizeA = kc*mc;
|
||||
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
|
||||
ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, 0);
|
||||
ei_declare_aligned_stack_constructed_variable(RhsScalar, w, sizeW, 0);
|
||||
|
||||
std::size_t sizeB = kc*nc;
|
||||
ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, 0);
|
||||
|
||||
RhsScalar* blockB = blocking.blockB();
|
||||
eigen_internal_assert(blockB!=0);
|
||||
|
||||
// For each horizontal panel of the rhs, and corresponding vertical panel of the lhs...
|
||||
for(Index k=0; k<depth; k+=kc)
|
||||
{
|
||||
const Index actual_kc = (std::min)(k+kc,depth)-k; // => rows of B', and cols of the A'
|
||||
|
||||
// In order to reduce the chance that a thread has to wait for the other,
|
||||
// let's start by packing B'.
|
||||
pack_rhs(blockB, &rhs(k,0), rhsStride, actual_kc, nc);
|
||||
// let's start by packing A'.
|
||||
pack_lhs(blockA, &lhs(0,k), lhsStride, actual_kc, mc);
|
||||
|
||||
// Pack A_k to A' in a parallel fashion:
|
||||
// each thread packs the sub block A_k,i to A'_i where i is the thread id.
|
||||
// Pack B_k to B' in a parallel fashion:
|
||||
// each thread packs the sub block B_k,j to B'_j where j is the thread id.
|
||||
|
||||
// However, before copying to A'_i, we have to make sure that no other thread is still using it,
|
||||
// However, before copying to B'_j, we have to make sure that no other thread is still using it,
|
||||
// i.e., we test that info[tid].users equals 0.
|
||||
// Then, we set info[tid].users to the number of threads to mark that all other threads are going to use it.
|
||||
while(info[tid].users!=0) {}
|
||||
info[tid].users += threads;
|
||||
|
||||
pack_lhs(blockA+info[tid].lhs_start*actual_kc, &lhs(info[tid].lhs_start,k), lhsStride, actual_kc, info[tid].lhs_length);
|
||||
|
||||
// Notify the other threads that the part A'_i is ready to go.
|
||||
pack_rhs(blockB+info[tid].rhs_start*actual_kc, &rhs(k,info[tid].rhs_start), rhsStride, actual_kc, info[tid].rhs_length);
|
||||
|
||||
// Notify the other threads that the part B'_j is ready to go.
|
||||
info[tid].sync = k;
|
||||
|
||||
// Computes C_i += A' * B' per A'_i
|
||||
|
||||
// Computes C_i += A' * B' per B'_j
|
||||
for(Index shift=0; shift<threads; ++shift)
|
||||
{
|
||||
Index i = (tid+shift)%threads;
|
||||
Index j = (tid+shift)%threads;
|
||||
|
||||
// At this point we have to make sure that A'_i has been updated by the thread i,
|
||||
// At this point we have to make sure that B'_j has been updated by the thread j,
|
||||
// we use testAndSetOrdered to mimic a volatile access.
|
||||
// However, no need to wait for the B' part which has been updated by the current thread!
|
||||
if(shift>0)
|
||||
while(info[i].sync!=k) {}
|
||||
gebp(res+info[i].lhs_start, resStride, blockA+info[i].lhs_start*actual_kc, blockB, info[i].lhs_length, actual_kc, nc, alpha);
|
||||
while(info[j].sync!=k) {}
|
||||
|
||||
gebp(res+info[j].rhs_start*resStride, resStride, blockA, blockB+info[j].rhs_start*actual_kc, mc, actual_kc, info[j].rhs_length, alpha, -1,-1,0,0, w);
|
||||
}
|
||||
|
||||
// Then keep going as usual with the remaining B'
|
||||
for(Index j=nc; j<cols; j+=nc)
|
||||
// Then keep going as usual with the remaining A'
|
||||
for(Index i=mc; i<rows; i+=mc)
|
||||
{
|
||||
const Index actual_nc = (std::min)(j+nc,cols)-j;
|
||||
const Index actual_mc = (std::min)(i+mc,rows)-i;
|
||||
|
||||
// pack B_k,j to B'
|
||||
pack_rhs(blockB, &rhs(k,j), rhsStride, actual_kc, actual_nc);
|
||||
// pack A_i,k to A'
|
||||
pack_lhs(blockA, &lhs(i,k), lhsStride, actual_kc, actual_mc);
|
||||
|
||||
// C_j += A' * B'
|
||||
gebp(res+j*resStride, resStride, blockA, blockB, rows, actual_kc, actual_nc, alpha);
|
||||
// C_i += A' * B'
|
||||
gebp(res+i, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha, -1,-1,0,0, w);
|
||||
}
|
||||
|
||||
// Release all the sub blocks A'_i of A' for the current thread,
|
||||
// Release all the sub blocks B'_j of B' for the current thread,
|
||||
// i.e., we simply decrement the number of users by 1
|
||||
#pragma omp critical
|
||||
{
|
||||
for(Index i=0; i<threads; ++i)
|
||||
for(Index j=0; j<threads; ++j)
|
||||
#pragma omp atomic
|
||||
--(info[i].users);
|
||||
}
|
||||
--(info[j].users);
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -153,34 +151,38 @@ static void run(Index rows, Index cols, Index depth,
|
||||
|
||||
// this is the sequential version!
|
||||
std::size_t sizeA = kc*mc;
|
||||
std::size_t sizeB = kc*nc;
|
||||
std::size_t sizeB = kc*cols;
|
||||
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
|
||||
|
||||
ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, blocking.blockA());
|
||||
ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, blocking.blockB());
|
||||
ei_declare_aligned_stack_constructed_variable(RhsScalar, blockW, sizeW, blocking.blockW());
|
||||
|
||||
// For each horizontal panel of the rhs, and corresponding panel of the lhs...
|
||||
// (==GEMM_VAR1)
|
||||
for(Index k2=0; k2<depth; k2+=kc)
|
||||
{
|
||||
const Index actual_kc = (std::min)(k2+kc,depth)-k2;
|
||||
|
||||
// OK, here we have selected one horizontal panel of rhs and one vertical panel of lhs.
|
||||
// => Pack lhs's panel into a sequential chunk of memory (L2/L3 caching)
|
||||
// Note that this panel will be read as many times as the number of blocks in the rhs's
|
||||
// horizontal panel which is, in practice, a very low number.
|
||||
pack_lhs(blockA, &lhs(0,k2), lhsStride, actual_kc, rows);
|
||||
// => Pack rhs's panel into a sequential chunk of memory (L2 caching)
|
||||
// Note that this panel will be read as many times as the number of blocks in the lhs's
|
||||
// vertical panel which is, in practice, a very low number.
|
||||
pack_rhs(blockB, &rhs(k2,0), rhsStride, actual_kc, cols);
|
||||
|
||||
// For each kc x nc block of the rhs's horizontal panel...
|
||||
for(Index j2=0; j2<cols; j2+=nc)
|
||||
// For each mc x kc block of the lhs's vertical panel...
|
||||
// (==GEPP_VAR1)
|
||||
for(Index i2=0; i2<rows; i2+=mc)
|
||||
{
|
||||
const Index actual_nc = (std::min)(j2+nc,cols)-j2;
|
||||
const Index actual_mc = (std::min)(i2+mc,rows)-i2;
|
||||
|
||||
// We pack the rhs's block into a sequential chunk of memory (L2 caching)
|
||||
// We pack the lhs's block into a sequential chunk of memory (L1 caching)
|
||||
// Note that this block will be read a very high number of times, which is equal to the number of
|
||||
// micro horizontal panel of the large rhs's panel (e.g., rows/12 times).
|
||||
pack_rhs(blockB, &rhs(k2,j2), rhsStride, actual_kc, actual_nc);
|
||||
// micro vertical panel of the large rhs's panel (e.g., cols/4 times).
|
||||
pack_lhs(blockA, &lhs(i2,k2), lhsStride, actual_kc, actual_mc);
|
||||
|
||||
// Everything is packed, we can now call the panel * block kernel:
|
||||
gebp(res+j2*resStride, resStride, blockA, blockB, rows, actual_kc, actual_nc, alpha);
|
||||
// Everything is packed, we can now call the block * panel kernel:
|
||||
gebp(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha, -1, -1, 0, 0, blockW);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -201,13 +203,14 @@ struct traits<GeneralProduct<Lhs,Rhs,GemmProduct> >
|
||||
template<typename Scalar, typename Index, typename Gemm, typename Lhs, typename Rhs, typename Dest, typename BlockingType>
|
||||
struct gemm_functor
|
||||
{
|
||||
gemm_functor(const Lhs& lhs, const Rhs& rhs, Dest& dest, const Scalar& actualAlpha, BlockingType& blocking)
|
||||
gemm_functor(const Lhs& lhs, const Rhs& rhs, Dest& dest, const Scalar& actualAlpha,
|
||||
BlockingType& blocking)
|
||||
: m_lhs(lhs), m_rhs(rhs), m_dest(dest), m_actualAlpha(actualAlpha), m_blocking(blocking)
|
||||
{}
|
||||
|
||||
void initParallelSession() const
|
||||
{
|
||||
m_blocking.allocateA();
|
||||
m_blocking.allocateB();
|
||||
}
|
||||
|
||||
void operator() (Index row, Index rows, Index col=0, Index cols=-1, GemmParallelInfo<Index>* info=0) const
|
||||
@@ -221,8 +224,6 @@ struct gemm_functor
|
||||
(Scalar*)&(m_dest.coeffRef(row,col)), m_dest.outerStride(),
|
||||
m_actualAlpha, m_blocking, info);
|
||||
}
|
||||
|
||||
typedef typename Gemm::Traits Traits;
|
||||
|
||||
protected:
|
||||
const Lhs& m_lhs;
|
||||
@@ -244,6 +245,7 @@ class level3_blocking
|
||||
protected:
|
||||
LhsScalar* m_blockA;
|
||||
RhsScalar* m_blockB;
|
||||
RhsScalar* m_blockW;
|
||||
|
||||
DenseIndex m_mc;
|
||||
DenseIndex m_nc;
|
||||
@@ -252,7 +254,7 @@ class level3_blocking
|
||||
public:
|
||||
|
||||
level3_blocking()
|
||||
: m_blockA(0), m_blockB(0), m_mc(0), m_nc(0), m_kc(0)
|
||||
: m_blockA(0), m_blockB(0), m_blockW(0), m_mc(0), m_nc(0), m_kc(0)
|
||||
{}
|
||||
|
||||
inline DenseIndex mc() const { return m_mc; }
|
||||
@@ -261,6 +263,7 @@ class level3_blocking
|
||||
|
||||
inline LhsScalar* blockA() { return m_blockA; }
|
||||
inline RhsScalar* blockB() { return m_blockB; }
|
||||
inline RhsScalar* blockW() { return m_blockW; }
|
||||
};
|
||||
|
||||
template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor>
|
||||
@@ -279,25 +282,29 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
|
||||
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
|
||||
enum {
|
||||
SizeA = ActualRows * MaxDepth,
|
||||
SizeB = ActualCols * MaxDepth
|
||||
SizeB = ActualCols * MaxDepth,
|
||||
SizeW = MaxDepth * Traits::WorkSpaceFactor
|
||||
};
|
||||
|
||||
EIGEN_ALIGN_DEFAULT LhsScalar m_staticA[SizeA];
|
||||
EIGEN_ALIGN_DEFAULT RhsScalar m_staticB[SizeB];
|
||||
EIGEN_ALIGN16 LhsScalar m_staticA[SizeA];
|
||||
EIGEN_ALIGN16 RhsScalar m_staticB[SizeB];
|
||||
EIGEN_ALIGN16 RhsScalar m_staticW[SizeW];
|
||||
|
||||
public:
|
||||
|
||||
gemm_blocking_space(DenseIndex /*rows*/, DenseIndex /*cols*/, DenseIndex /*depth*/, bool /*full_rows*/ = false)
|
||||
gemm_blocking_space(DenseIndex /*rows*/, DenseIndex /*cols*/, DenseIndex /*depth*/)
|
||||
{
|
||||
this->m_mc = ActualRows;
|
||||
this->m_nc = ActualCols;
|
||||
this->m_kc = MaxDepth;
|
||||
this->m_blockA = m_staticA;
|
||||
this->m_blockB = m_staticB;
|
||||
this->m_blockW = m_staticW;
|
||||
}
|
||||
|
||||
inline void allocateA() {}
|
||||
inline void allocateB() {}
|
||||
inline void allocateW() {}
|
||||
inline void allocateAll() {}
|
||||
};
|
||||
|
||||
@@ -316,28 +323,20 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
|
||||
|
||||
DenseIndex m_sizeA;
|
||||
DenseIndex m_sizeB;
|
||||
DenseIndex m_sizeW;
|
||||
|
||||
public:
|
||||
|
||||
gemm_blocking_space(DenseIndex rows, DenseIndex cols, DenseIndex depth, bool full_rows = false)
|
||||
gemm_blocking_space(DenseIndex rows, DenseIndex cols, DenseIndex depth)
|
||||
{
|
||||
this->m_mc = Transpose ? cols : rows;
|
||||
this->m_nc = Transpose ? rows : cols;
|
||||
this->m_kc = depth;
|
||||
|
||||
if(full_rows)
|
||||
{
|
||||
DenseIndex m = this->m_mc;
|
||||
computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, m, this->m_nc);
|
||||
}
|
||||
else // full columns
|
||||
{
|
||||
DenseIndex n = this->m_nc;
|
||||
computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, this->m_mc, n);
|
||||
}
|
||||
|
||||
computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, this->m_mc, this->m_nc);
|
||||
m_sizeA = this->m_mc * this->m_kc;
|
||||
m_sizeB = this->m_kc * this->m_nc;
|
||||
m_sizeW = this->m_kc*Traits::WorkSpaceFactor;
|
||||
}
|
||||
|
||||
void allocateA()
|
||||
@@ -352,16 +351,24 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
|
||||
this->m_blockB = aligned_new<RhsScalar>(m_sizeB);
|
||||
}
|
||||
|
||||
void allocateW()
|
||||
{
|
||||
if(this->m_blockW==0)
|
||||
this->m_blockW = aligned_new<RhsScalar>(m_sizeW);
|
||||
}
|
||||
|
||||
void allocateAll()
|
||||
{
|
||||
allocateA();
|
||||
allocateB();
|
||||
allocateW();
|
||||
}
|
||||
|
||||
~gemm_blocking_space()
|
||||
{
|
||||
aligned_delete(this->m_blockA, m_sizeA);
|
||||
aligned_delete(this->m_blockB, m_sizeB);
|
||||
aligned_delete(this->m_blockW, m_sizeW);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -386,37 +393,7 @@ class GeneralProduct<Lhs, Rhs, GemmProduct>
|
||||
typedef internal::scalar_product_op<LhsScalar,RhsScalar> BinOp;
|
||||
EIGEN_CHECK_BINARY_COMPATIBILIY(BinOp,LhsScalar,RhsScalar);
|
||||
}
|
||||
|
||||
template<typename Dest>
|
||||
inline void evalTo(Dest& dst) const
|
||||
{
|
||||
if((m_rhs.rows()+dst.rows()+dst.cols())<20 && m_rhs.rows()>0)
|
||||
dst.noalias() = m_lhs .lazyProduct( m_rhs );
|
||||
else
|
||||
{
|
||||
dst.setZero();
|
||||
scaleAndAddTo(dst,Scalar(1));
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Dest>
|
||||
inline void addTo(Dest& dst) const
|
||||
{
|
||||
if((m_rhs.rows()+dst.rows()+dst.cols())<20 && m_rhs.rows()>0)
|
||||
dst.noalias() += m_lhs .lazyProduct( m_rhs );
|
||||
else
|
||||
scaleAndAddTo(dst,Scalar(1));
|
||||
}
|
||||
|
||||
template<typename Dest>
|
||||
inline void subTo(Dest& dst) const
|
||||
{
|
||||
if((m_rhs.rows()+dst.rows()+dst.cols())<20 && m_rhs.rows()>0)
|
||||
dst.noalias() -= m_lhs .lazyProduct( m_rhs );
|
||||
else
|
||||
scaleAndAddTo(dst,Scalar(-1));
|
||||
}
|
||||
|
||||
template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
|
||||
{
|
||||
eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
|
||||
@@ -439,7 +416,7 @@ class GeneralProduct<Lhs, Rhs, GemmProduct>
|
||||
(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor>,
|
||||
_ActualLhsType, _ActualRhsType, Dest, BlockingType> GemmFunctor;
|
||||
|
||||
BlockingType blocking(dst.rows(), dst.cols(), lhs.cols(), true);
|
||||
BlockingType blocking(dst.rows(), dst.cols(), lhs.cols());
|
||||
|
||||
internal::parallelize_gemm<(Dest::MaxRowsAtCompileTime>32 || Dest::MaxRowsAtCompileTime==Dynamic)>(GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), this->rows(), this->cols(), Dest::Flags&RowMajorBit);
|
||||
}
|
||||
|
||||
@@ -73,8 +73,11 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
|
||||
if(mc > Traits::nr)
|
||||
mc = (mc/Traits::nr)*Traits::nr;
|
||||
|
||||
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
|
||||
std::size_t sizeB = sizeW + kc*size;
|
||||
ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, kc*mc, 0);
|
||||
ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, kc*size, 0);
|
||||
ei_declare_aligned_stack_constructed_variable(RhsScalar, allocatedBlockB, sizeB, 0);
|
||||
RhsScalar* blockB = allocatedBlockB + sizeW;
|
||||
|
||||
gemm_pack_lhs<LhsScalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
|
||||
gemm_pack_rhs<RhsScalar, Index, Traits::nr, RhsStorageOrder> pack_rhs;
|
||||
@@ -100,15 +103,15 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
|
||||
// 3 - after the diagonal => processed with gebp or skipped
|
||||
if (UpLo==Lower)
|
||||
gebp(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, (std::min)(size,i2), alpha,
|
||||
-1, -1, 0, 0);
|
||||
-1, -1, 0, 0, allocatedBlockB);
|
||||
|
||||
sybb(res+resStride*i2 + i2, resStride, blockA, blockB + actual_kc*i2, actual_mc, actual_kc, alpha);
|
||||
sybb(res+resStride*i2 + i2, resStride, blockA, blockB + actual_kc*i2, actual_mc, actual_kc, alpha, allocatedBlockB);
|
||||
|
||||
if (UpLo==Upper)
|
||||
{
|
||||
Index j2 = i2+actual_mc;
|
||||
gebp(res+resStride*j2+i2, resStride, blockA, blockB+actual_kc*j2, actual_mc, actual_kc, (std::max)(Index(0), size-j2), alpha,
|
||||
-1, -1, 0, 0);
|
||||
-1, -1, 0, 0, allocatedBlockB);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -133,7 +136,7 @@ struct tribb_kernel
|
||||
enum {
|
||||
BlockSize = EIGEN_PLAIN_ENUM_MAX(mr,nr)
|
||||
};
|
||||
void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, const ResScalar& alpha)
|
||||
void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, const ResScalar& alpha, RhsScalar* workspace)
|
||||
{
|
||||
gebp_kernel<LhsScalar, RhsScalar, Index, mr, nr, ConjLhs, ConjRhs> gebp_kernel;
|
||||
Matrix<ResScalar,BlockSize,BlockSize,ColMajor> buffer;
|
||||
@@ -147,7 +150,7 @@ struct tribb_kernel
|
||||
|
||||
if(UpLo==Upper)
|
||||
gebp_kernel(res+j*resStride, resStride, blockA, actual_b, j, depth, actualBlockSize, alpha,
|
||||
-1, -1, 0, 0);
|
||||
-1, -1, 0, 0, workspace);
|
||||
|
||||
// selfadjoint micro block
|
||||
{
|
||||
@@ -155,7 +158,7 @@ struct tribb_kernel
|
||||
buffer.setZero();
|
||||
// 1 - apply the kernel on the temporary buffer
|
||||
gebp_kernel(buffer.data(), BlockSize, blockA+depth*i, actual_b, actualBlockSize, depth, actualBlockSize, alpha,
|
||||
-1, -1, 0, 0);
|
||||
-1, -1, 0, 0, workspace);
|
||||
// 2 - triangular accumulation
|
||||
for(Index j1=0; j1<actualBlockSize; ++j1)
|
||||
{
|
||||
@@ -170,7 +173,7 @@ struct tribb_kernel
|
||||
{
|
||||
Index i = j+actualBlockSize;
|
||||
gebp_kernel(res+j*resStride+i, resStride, blockA+depth*i, actual_b, size-i, depth, actualBlockSize, alpha,
|
||||
-1, -1, 0, 0);
|
||||
-1, -1, 0, 0, workspace);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -265,8 +268,6 @@ template<typename MatrixType, unsigned int UpLo>
|
||||
template<typename ProductDerived, typename _Lhs, typename _Rhs>
|
||||
TriangularView<MatrixType,UpLo>& TriangularView<MatrixType,UpLo>::assignProduct(const ProductBase<ProductDerived, _Lhs,_Rhs>& prod, const Scalar& alpha)
|
||||
{
|
||||
eigen_assert(m_matrix.rows() == prod.rows() && m_matrix.cols() == prod.cols());
|
||||
|
||||
general_product_to_triangular_selector<MatrixType, ProductDerived, UpLo, (_Lhs::ColsAtCompileTime==1) || (_Rhs::RowsAtCompileTime==1)>::run(m_matrix.const_cast_derived(), prod.derived(), alpha);
|
||||
|
||||
return *this;
|
||||
|
||||
@@ -53,8 +53,6 @@ template< \
|
||||
int RhsStorageOrder, bool ConjugateRhs> \
|
||||
struct general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor> \
|
||||
{ \
|
||||
typedef gebp_traits<EIGTYPE,EIGTYPE> Traits; \
|
||||
\
|
||||
static void run(Index rows, Index cols, Index depth, \
|
||||
const EIGTYPE* _lhs, Index lhsStride, \
|
||||
const EIGTYPE* _rhs, Index rhsStride, \
|
||||
|
||||
@@ -26,34 +26,6 @@ namespace internal {
|
||||
* |real |cplx |real | alpha is converted to a cplx when calling the run function, no vectorization
|
||||
* |cplx |real |cplx | invalid, the caller has to do tmp: = A * B; C += alpha*tmp
|
||||
* |cplx |real |real | optimal case, vectorization possible via real-cplx mul
|
||||
*
|
||||
* Accesses to the matrix coefficients follow the following logic:
|
||||
*
|
||||
* - if all columns have the same alignment then
|
||||
* - if the columns have the same alignment as the result vector, then easy! (-> AllAligned case)
|
||||
* - otherwise perform unaligned loads only (-> NoneAligned case)
|
||||
* - otherwise
|
||||
* - if even columns have the same alignment then
|
||||
* // odd columns are guaranteed to have the same alignment too
|
||||
* - if even or odd columns have the same alignment as the result, then
|
||||
* // for a register size of 2 scalars, this is guarantee to be the case (e.g., SSE with double)
|
||||
* - perform half aligned and half unaligned loads (-> EvenAligned case)
|
||||
* - otherwise perform unaligned loads only (-> NoneAligned case)
|
||||
* - otherwise, if the register size is 4 scalars (e.g., SSE with float) then
|
||||
* - one over 4 consecutive columns is guaranteed to be aligned with the result vector,
|
||||
* perform simple aligned loads for this column and aligned loads plus re-alignment for the other. (-> FirstAligned case)
|
||||
* // this re-alignment is done by the palign function implemented for SSE in Eigen/src/Core/arch/SSE/PacketMath.h
|
||||
* - otherwise,
|
||||
* // if we get here, this means the register size is greater than 4 (e.g., AVX with floats),
|
||||
* // we currently fall back to the NoneAligned case
|
||||
*
|
||||
* The same reasoning apply for the transposed case.
|
||||
*
|
||||
* The last case (PacketSize>4) could probably be improved by generalizing the FirstAligned case, but since we do not support AVX yet...
|
||||
* One might also wonder why in the EvenAligned case we perform unaligned loads instead of using the aligned-loads plus re-alignment
|
||||
* strategy as in the FirstAligned case. The reason is that we observed that unaligned loads on a 8 byte boundary are not too slow
|
||||
* compared to unaligned loads on a 4 byte boundary.
|
||||
*
|
||||
*/
|
||||
template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version>
|
||||
struct general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjugateLhs,RhsScalar,ConjugateRhs,Version>
|
||||
@@ -80,8 +52,7 @@ EIGEN_DONT_INLINE static void run(
|
||||
Index rows, Index cols,
|
||||
const LhsScalar* lhs, Index lhsStride,
|
||||
const RhsScalar* rhs, Index rhsIncr,
|
||||
ResScalar* res, Index resIncr,
|
||||
RhsScalar alpha);
|
||||
ResScalar* res, Index resIncr, RhsScalar alpha);
|
||||
};
|
||||
|
||||
template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version>
|
||||
@@ -89,10 +60,9 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,ColMajor,Co
|
||||
Index rows, Index cols,
|
||||
const LhsScalar* lhs, Index lhsStride,
|
||||
const RhsScalar* rhs, Index rhsIncr,
|
||||
ResScalar* res, Index resIncr,
|
||||
RhsScalar alpha)
|
||||
ResScalar* res, Index resIncr, RhsScalar alpha)
|
||||
{
|
||||
EIGEN_UNUSED_VARIABLE(resIncr);
|
||||
EIGEN_UNUSED_VARIABLE(resIncr)
|
||||
eigen_internal_assert(resIncr==1);
|
||||
#ifdef _EIGEN_ACCUMULATE_PACKETS
|
||||
#error _EIGEN_ACCUMULATE_PACKETS has already been defined
|
||||
@@ -141,12 +111,6 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,ColMajor,Co
|
||||
alignedSize = 0;
|
||||
alignedStart = 0;
|
||||
}
|
||||
else if(LhsPacketSize > 4)
|
||||
{
|
||||
// TODO: extend the code to support aligned loads whenever possible when LhsPacketSize > 4.
|
||||
// Currently, it seems to be better to perform unaligned loads anyway
|
||||
alignmentPattern = NoneAligned;
|
||||
}
|
||||
else if (LhsPacketSize>1)
|
||||
{
|
||||
eigen_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || size<LhsPacketSize);
|
||||
@@ -351,7 +315,7 @@ EIGEN_DONT_INLINE static void run(
|
||||
Index rows, Index cols,
|
||||
const LhsScalar* lhs, Index lhsStride,
|
||||
const RhsScalar* rhs, Index rhsIncr,
|
||||
ResScalar* res, Index resIncr,
|
||||
ResScalar* res, Index resIncr,
|
||||
ResScalar alpha);
|
||||
};
|
||||
|
||||
@@ -365,7 +329,6 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,RowMajor,Co
|
||||
{
|
||||
EIGEN_UNUSED_VARIABLE(rhsIncr);
|
||||
eigen_internal_assert(rhsIncr==1);
|
||||
|
||||
#ifdef _EIGEN_ACCUMULATE_PACKETS
|
||||
#error _EIGEN_ACCUMULATE_PACKETS has already been defined
|
||||
#endif
|
||||
@@ -411,11 +374,6 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,RowMajor,Co
|
||||
alignedSize = 0;
|
||||
alignedStart = 0;
|
||||
}
|
||||
else if(LhsPacketSize > 4)
|
||||
{
|
||||
// TODO: extend the code to support aligned loads whenever possible when LhsPacketSize > 4.
|
||||
alignmentPattern = NoneAligned;
|
||||
}
|
||||
else if (LhsPacketSize>1)
|
||||
{
|
||||
eigen_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || depth<LhsPacketSize);
|
||||
@@ -453,7 +411,7 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,RowMajor,Co
|
||||
Index rowBound = ((rows-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows;
|
||||
for (Index i=skipRows; i<rowBound; i+=rowsAtOnce)
|
||||
{
|
||||
EIGEN_ALIGN_DEFAULT ResScalar tmp0 = ResScalar(0);
|
||||
EIGEN_ALIGN16 ResScalar tmp0 = ResScalar(0);
|
||||
ResScalar tmp1 = ResScalar(0), tmp2 = ResScalar(0), tmp3 = ResScalar(0);
|
||||
|
||||
// this helps the compiler generating good binary code
|
||||
@@ -562,7 +520,7 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,RowMajor,Co
|
||||
{
|
||||
for (Index i=start; i<end; ++i)
|
||||
{
|
||||
EIGEN_ALIGN_DEFAULT ResScalar tmp0 = ResScalar(0);
|
||||
EIGEN_ALIGN16 ResScalar tmp0 = ResScalar(0);
|
||||
ResPacket ptmp0 = pset1<ResPacket>(tmp0);
|
||||
const LhsScalar* lhs0 = lhs + i*lhsStride;
|
||||
// process first unaligned result's coeffs
|
||||
|
||||
@@ -73,13 +73,13 @@ namespace internal {
|
||||
|
||||
template<typename Index> struct GemmParallelInfo
|
||||
{
|
||||
GemmParallelInfo() : sync(-1), users(0), lhs_start(0), lhs_length(0) {}
|
||||
GemmParallelInfo() : sync(-1), users(0), rhs_start(0), rhs_length(0) {}
|
||||
|
||||
int volatile sync;
|
||||
int volatile users;
|
||||
|
||||
Index lhs_start;
|
||||
Index lhs_length;
|
||||
Index rhs_start;
|
||||
Index rhs_length;
|
||||
};
|
||||
|
||||
template<bool Condition, typename Functor, typename Index>
|
||||
@@ -107,7 +107,7 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos
|
||||
if((!Condition) || (omp_get_num_threads()>1))
|
||||
return func(0,rows, 0,cols);
|
||||
|
||||
Index size = transpose ? rows : cols;
|
||||
Index size = transpose ? cols : rows;
|
||||
|
||||
// 2- compute the maximal number of threads from the size of the product:
|
||||
// FIXME this has to be fine tuned
|
||||
@@ -125,26 +125,30 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos
|
||||
if(transpose)
|
||||
std::swap(rows,cols);
|
||||
|
||||
Index blockCols = (cols / threads) & ~Index(0x3);
|
||||
Index blockRows = (rows / threads);
|
||||
blockRows = (blockRows/Functor::Traits::mr)*Functor::Traits::mr;
|
||||
|
||||
GemmParallelInfo<Index>* info = new GemmParallelInfo<Index>[threads];
|
||||
|
||||
#pragma omp parallel num_threads(threads)
|
||||
{
|
||||
Index i = omp_get_thread_num();
|
||||
// Note that the actual number of threads might be lower than the number of request ones.
|
||||
Index actual_threads = omp_get_num_threads();
|
||||
|
||||
Index blockCols = (cols / actual_threads) & ~Index(0x3);
|
||||
Index blockRows = (rows / actual_threads) & ~Index(0x7);
|
||||
|
||||
Index r0 = i*blockRows;
|
||||
Index actualBlockRows = (i+1==threads) ? rows-r0 : blockRows;
|
||||
Index actualBlockRows = (i+1==actual_threads) ? rows-r0 : blockRows;
|
||||
|
||||
Index c0 = i*blockCols;
|
||||
Index actualBlockCols = (i+1==threads) ? cols-c0 : blockCols;
|
||||
Index actualBlockCols = (i+1==actual_threads) ? cols-c0 : blockCols;
|
||||
|
||||
info[i].lhs_start = r0;
|
||||
info[i].lhs_length = actualBlockRows;
|
||||
info[i].rhs_start = c0;
|
||||
info[i].rhs_length = actualBlockCols;
|
||||
|
||||
if(transpose) func(c0, actualBlockCols, 0, rows, info);
|
||||
else func(0, rows, c0, actualBlockCols, info);
|
||||
if(transpose)
|
||||
func(0, cols, r0, actualBlockRows, info);
|
||||
else
|
||||
func(r0, actualBlockRows, 0,cols, info);
|
||||
}
|
||||
|
||||
delete[] info;
|
||||
|
||||
@@ -15,7 +15,7 @@ namespace Eigen {
|
||||
namespace internal {
|
||||
|
||||
// pack a selfadjoint block diagonal for use with the gebp_kernel
|
||||
template<typename Scalar, typename Index, int Pack1, int Pack2_dummy, int StorageOrder>
|
||||
template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder>
|
||||
struct symm_pack_lhs
|
||||
{
|
||||
template<int BlockRows> inline
|
||||
@@ -45,32 +45,25 @@ struct symm_pack_lhs
|
||||
}
|
||||
void operator()(Scalar* blockA, const Scalar* _lhs, Index lhsStride, Index cols, Index rows)
|
||||
{
|
||||
enum { PacketSize = packet_traits<Scalar>::size };
|
||||
const_blas_data_mapper<Scalar,Index,StorageOrder> lhs(_lhs,lhsStride);
|
||||
Index count = 0;
|
||||
//Index peeled_mc3 = (rows/Pack1)*Pack1;
|
||||
|
||||
const Index peeled_mc3 = Pack1>=3*PacketSize ? (rows/(3*PacketSize))*(3*PacketSize) : 0;
|
||||
const Index peeled_mc2 = Pack1>=2*PacketSize ? peeled_mc3+((rows-peeled_mc3)/(2*PacketSize))*(2*PacketSize) : 0;
|
||||
const Index peeled_mc1 = Pack1>=1*PacketSize ? (rows/(1*PacketSize))*(1*PacketSize) : 0;
|
||||
|
||||
if(Pack1>=3*PacketSize)
|
||||
for(Index i=0; i<peeled_mc3; i+=3*PacketSize)
|
||||
pack<3*PacketSize>(blockA, lhs, cols, i, count);
|
||||
|
||||
if(Pack1>=2*PacketSize)
|
||||
for(Index i=peeled_mc3; i<peeled_mc2; i+=2*PacketSize)
|
||||
pack<2*PacketSize>(blockA, lhs, cols, i, count);
|
||||
|
||||
if(Pack1>=1*PacketSize)
|
||||
for(Index i=peeled_mc2; i<peeled_mc1; i+=1*PacketSize)
|
||||
pack<1*PacketSize>(blockA, lhs, cols, i, count);
|
||||
Index peeled_mc = (rows/Pack1)*Pack1;
|
||||
for(Index i=0; i<peeled_mc; i+=Pack1)
|
||||
{
|
||||
pack<Pack1>(blockA, lhs, cols, i, count);
|
||||
}
|
||||
|
||||
if(rows-peeled_mc>=Pack2)
|
||||
{
|
||||
pack<Pack2>(blockA, lhs, cols, peeled_mc, count);
|
||||
peeled_mc += Pack2;
|
||||
}
|
||||
|
||||
// do the same with mr==1
|
||||
for(Index i=peeled_mc1; i<rows; i++)
|
||||
for(Index i=peeled_mc; i<rows; i++)
|
||||
{
|
||||
for(Index k=0; k<i; k++)
|
||||
blockA[count++] = lhs(i, k); // normal
|
||||
blockA[count++] = lhs(i, k); // normal
|
||||
|
||||
blockA[count++] = numext::real(lhs(i, i)); // real (diagonal)
|
||||
|
||||
@@ -89,8 +82,7 @@ struct symm_pack_rhs
|
||||
Index end_k = k2 + rows;
|
||||
Index count = 0;
|
||||
const_blas_data_mapper<Scalar,Index,StorageOrder> rhs(_rhs,rhsStride);
|
||||
Index packet_cols8 = nr>=8 ? (cols/8) * 8 : 0;
|
||||
Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0;
|
||||
Index packet_cols = (cols/nr)*nr;
|
||||
|
||||
// first part: normal case
|
||||
for(Index j2=0; j2<k2; j2+=nr)
|
||||
@@ -99,151 +91,79 @@ struct symm_pack_rhs
|
||||
{
|
||||
blockB[count+0] = rhs(k,j2+0);
|
||||
blockB[count+1] = rhs(k,j2+1);
|
||||
if (nr>=4)
|
||||
if (nr==4)
|
||||
{
|
||||
blockB[count+2] = rhs(k,j2+2);
|
||||
blockB[count+3] = rhs(k,j2+3);
|
||||
}
|
||||
if (nr>=8)
|
||||
{
|
||||
blockB[count+4] = rhs(k,j2+4);
|
||||
blockB[count+5] = rhs(k,j2+5);
|
||||
blockB[count+6] = rhs(k,j2+6);
|
||||
blockB[count+7] = rhs(k,j2+7);
|
||||
}
|
||||
count += nr;
|
||||
}
|
||||
}
|
||||
|
||||
// second part: diagonal block
|
||||
Index end8 = nr>=8 ? (std::min)(k2+rows,packet_cols8) : k2;
|
||||
if(nr>=8)
|
||||
for(Index j2=k2; j2<(std::min)(k2+rows,packet_cols); j2+=nr)
|
||||
{
|
||||
for(Index j2=k2; j2<end8; j2+=8)
|
||||
// again we can split vertically in three different parts (transpose, symmetric, normal)
|
||||
// transpose
|
||||
for(Index k=k2; k<j2; k++)
|
||||
{
|
||||
// again we can split vertically in three different parts (transpose, symmetric, normal)
|
||||
// transpose
|
||||
for(Index k=k2; k<j2; k++)
|
||||
blockB[count+0] = numext::conj(rhs(j2+0,k));
|
||||
blockB[count+1] = numext::conj(rhs(j2+1,k));
|
||||
if (nr==4)
|
||||
{
|
||||
blockB[count+0] = numext::conj(rhs(j2+0,k));
|
||||
blockB[count+1] = numext::conj(rhs(j2+1,k));
|
||||
blockB[count+2] = numext::conj(rhs(j2+2,k));
|
||||
blockB[count+3] = numext::conj(rhs(j2+3,k));
|
||||
blockB[count+4] = numext::conj(rhs(j2+4,k));
|
||||
blockB[count+5] = numext::conj(rhs(j2+5,k));
|
||||
blockB[count+6] = numext::conj(rhs(j2+6,k));
|
||||
blockB[count+7] = numext::conj(rhs(j2+7,k));
|
||||
count += 8;
|
||||
}
|
||||
// symmetric
|
||||
Index h = 0;
|
||||
for(Index k=j2; k<j2+8; k++)
|
||||
{
|
||||
// normal
|
||||
for (Index w=0 ; w<h; ++w)
|
||||
blockB[count+w] = rhs(k,j2+w);
|
||||
|
||||
blockB[count+h] = numext::real(rhs(k,k));
|
||||
|
||||
// transpose
|
||||
for (Index w=h+1 ; w<8; ++w)
|
||||
blockB[count+w] = numext::conj(rhs(j2+w,k));
|
||||
count += 8;
|
||||
++h;
|
||||
}
|
||||
// normal
|
||||
for(Index k=j2+8; k<end_k; k++)
|
||||
{
|
||||
blockB[count+0] = rhs(k,j2+0);
|
||||
blockB[count+1] = rhs(k,j2+1);
|
||||
blockB[count+2] = rhs(k,j2+2);
|
||||
blockB[count+3] = rhs(k,j2+3);
|
||||
blockB[count+4] = rhs(k,j2+4);
|
||||
blockB[count+5] = rhs(k,j2+5);
|
||||
blockB[count+6] = rhs(k,j2+6);
|
||||
blockB[count+7] = rhs(k,j2+7);
|
||||
count += 8;
|
||||
}
|
||||
count += nr;
|
||||
}
|
||||
}
|
||||
if(nr>=4)
|
||||
{
|
||||
for(Index j2=end8; j2<(std::min)(k2+rows,packet_cols4); j2+=4)
|
||||
// symmetric
|
||||
Index h = 0;
|
||||
for(Index k=j2; k<j2+nr; k++)
|
||||
{
|
||||
// again we can split vertically in three different parts (transpose, symmetric, normal)
|
||||
// transpose
|
||||
for(Index k=k2; k<j2; k++)
|
||||
{
|
||||
blockB[count+0] = numext::conj(rhs(j2+0,k));
|
||||
blockB[count+1] = numext::conj(rhs(j2+1,k));
|
||||
blockB[count+2] = numext::conj(rhs(j2+2,k));
|
||||
blockB[count+3] = numext::conj(rhs(j2+3,k));
|
||||
count += 4;
|
||||
}
|
||||
// symmetric
|
||||
Index h = 0;
|
||||
for(Index k=j2; k<j2+4; k++)
|
||||
{
|
||||
// normal
|
||||
for (Index w=0 ; w<h; ++w)
|
||||
blockB[count+w] = rhs(k,j2+w);
|
||||
|
||||
blockB[count+h] = numext::real(rhs(k,k));
|
||||
|
||||
// transpose
|
||||
for (Index w=h+1 ; w<4; ++w)
|
||||
blockB[count+w] = numext::conj(rhs(j2+w,k));
|
||||
count += 4;
|
||||
++h;
|
||||
}
|
||||
// normal
|
||||
for(Index k=j2+4; k<end_k; k++)
|
||||
for (Index w=0 ; w<h; ++w)
|
||||
blockB[count+w] = rhs(k,j2+w);
|
||||
|
||||
blockB[count+h] = numext::real(rhs(k,k));
|
||||
|
||||
// transpose
|
||||
for (Index w=h+1 ; w<nr; ++w)
|
||||
blockB[count+w] = numext::conj(rhs(j2+w,k));
|
||||
count += nr;
|
||||
++h;
|
||||
}
|
||||
// normal
|
||||
for(Index k=j2+nr; k<end_k; k++)
|
||||
{
|
||||
blockB[count+0] = rhs(k,j2+0);
|
||||
blockB[count+1] = rhs(k,j2+1);
|
||||
if (nr==4)
|
||||
{
|
||||
blockB[count+0] = rhs(k,j2+0);
|
||||
blockB[count+1] = rhs(k,j2+1);
|
||||
blockB[count+2] = rhs(k,j2+2);
|
||||
blockB[count+3] = rhs(k,j2+3);
|
||||
count += 4;
|
||||
}
|
||||
count += nr;
|
||||
}
|
||||
}
|
||||
|
||||
// third part: transposed
|
||||
if(nr>=8)
|
||||
for(Index j2=k2+rows; j2<packet_cols; j2+=nr)
|
||||
{
|
||||
for(Index j2=k2+rows; j2<packet_cols8; j2+=8)
|
||||
for(Index k=k2; k<end_k; k++)
|
||||
{
|
||||
for(Index k=k2; k<end_k; k++)
|
||||
blockB[count+0] = numext::conj(rhs(j2+0,k));
|
||||
blockB[count+1] = numext::conj(rhs(j2+1,k));
|
||||
if (nr==4)
|
||||
{
|
||||
blockB[count+0] = numext::conj(rhs(j2+0,k));
|
||||
blockB[count+1] = numext::conj(rhs(j2+1,k));
|
||||
blockB[count+2] = numext::conj(rhs(j2+2,k));
|
||||
blockB[count+3] = numext::conj(rhs(j2+3,k));
|
||||
blockB[count+4] = numext::conj(rhs(j2+4,k));
|
||||
blockB[count+5] = numext::conj(rhs(j2+5,k));
|
||||
blockB[count+6] = numext::conj(rhs(j2+6,k));
|
||||
blockB[count+7] = numext::conj(rhs(j2+7,k));
|
||||
count += 8;
|
||||
}
|
||||
}
|
||||
}
|
||||
if(nr>=4)
|
||||
{
|
||||
for(Index j2=(std::max)(packet_cols8,k2+rows); j2<packet_cols4; j2+=4)
|
||||
{
|
||||
for(Index k=k2; k<end_k; k++)
|
||||
{
|
||||
blockB[count+0] = numext::conj(rhs(j2+0,k));
|
||||
blockB[count+1] = numext::conj(rhs(j2+1,k));
|
||||
blockB[count+2] = numext::conj(rhs(j2+2,k));
|
||||
blockB[count+3] = numext::conj(rhs(j2+3,k));
|
||||
count += 4;
|
||||
}
|
||||
count += nr;
|
||||
}
|
||||
}
|
||||
|
||||
// copy the remaining columns one at a time (=> the same with nr==1)
|
||||
for(Index j2=packet_cols4; j2<cols; ++j2)
|
||||
for(Index j2=packet_cols; j2<cols; ++j2)
|
||||
{
|
||||
// transpose
|
||||
Index half = (std::min)(end_k,j2);
|
||||
@@ -341,10 +261,11 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,t
|
||||
// kc must smaller than mc
|
||||
kc = (std::min)(kc,mc);
|
||||
|
||||
std::size_t sizeB = kc*cols;
|
||||
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
|
||||
std::size_t sizeB = sizeW + kc*cols;
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
|
||||
Scalar* blockB = allocatedBlockB;
|
||||
Scalar* blockB = allocatedBlockB + sizeW;
|
||||
|
||||
gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
|
||||
symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
|
||||
@@ -427,10 +348,11 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,f
|
||||
Index mc = rows; // cache block size along the M direction
|
||||
Index nc = cols; // cache block size along the N direction
|
||||
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
|
||||
std::size_t sizeB = kc*cols;
|
||||
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
|
||||
std::size_t sizeB = sizeW + kc*cols;
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
|
||||
Scalar* blockB = allocatedBlockB;
|
||||
Scalar* blockB = allocatedBlockB + sizeW;
|
||||
|
||||
gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
|
||||
gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
|
||||
@@ -500,11 +422,11 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>
|
||||
NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsIsUpper,bool(RhsBlasTraits::NeedToConjugate)),
|
||||
internal::traits<Dest>::Flags&RowMajorBit ? RowMajor : ColMajor>
|
||||
::run(
|
||||
lhs.rows(), rhs.cols(), // sizes
|
||||
&lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
|
||||
&rhs.coeffRef(0,0), rhs.outerStride(), // rhs info
|
||||
&dst.coeffRef(0,0), dst.outerStride(), // result info
|
||||
actualAlpha // alpha
|
||||
lhs.rows(), rhs.cols(), // sizes
|
||||
&lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
|
||||
&rhs.coeffRef(0,0), rhs.outerStride(), // rhs info
|
||||
&dst.coeffRef(0,0), dst.outerStride(), // result info
|
||||
actualAlpha // alpha
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -113,9 +113,9 @@ EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrd
|
||||
|
||||
for (size_t i=starti; i<alignedStart; ++i)
|
||||
{
|
||||
res[i] += cj0.pmul(A0[i], t0) + cj0.pmul(A1[i],t1);
|
||||
t2 += cj1.pmul(A0[i], rhs[i]);
|
||||
t3 += cj1.pmul(A1[i], rhs[i]);
|
||||
res[i] += t0 * A0[i] + t1 * A1[i];
|
||||
t2 += numext::conj(A0[i]) * rhs[i];
|
||||
t3 += numext::conj(A1[i]) * rhs[i];
|
||||
}
|
||||
// Yes this an optimization for gcc 4.3 and 4.4 (=> huge speed up)
|
||||
// gcc 4.2 does this optimization automatically.
|
||||
@@ -218,7 +218,7 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
|
||||
if(!EvalToDest)
|
||||
{
|
||||
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
Index size = dest.size();
|
||||
int size = dest.size();
|
||||
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
#endif
|
||||
MappedDest(actualDestPtr, dest.size()) = dest;
|
||||
@@ -227,7 +227,7 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
|
||||
if(!UseRhs)
|
||||
{
|
||||
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
Index size = rhs.size();
|
||||
int size = rhs.size();
|
||||
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
#endif
|
||||
Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, rhs.size()) = rhs;
|
||||
|
||||
@@ -125,9 +125,11 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
|
||||
|
||||
std::size_t sizeA = kc*mc;
|
||||
std::size_t sizeB = kc*cols;
|
||||
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
|
||||
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW());
|
||||
|
||||
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer;
|
||||
triangularBuffer.setZero();
|
||||
@@ -185,7 +187,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
|
||||
pack_lhs(blockA, triangularBuffer.data(), triangularBuffer.outerStride(), actualPanelWidth, actualPanelWidth);
|
||||
|
||||
gebp_kernel(res+startBlock, resStride, blockA, blockB, actualPanelWidth, actualPanelWidth, cols, alpha,
|
||||
actualPanelWidth, actual_kc, 0, blockBOffset);
|
||||
actualPanelWidth, actual_kc, 0, blockBOffset, blockW);
|
||||
|
||||
// GEBP with remaining micro panel
|
||||
if (lengthTarget>0)
|
||||
@@ -195,7 +197,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
|
||||
pack_lhs(blockA, &lhs(startTarget,startBlock), lhsStride, actualPanelWidth, lengthTarget);
|
||||
|
||||
gebp_kernel(res+startTarget, resStride, blockA, blockB, lengthTarget, actualPanelWidth, cols, alpha,
|
||||
actualPanelWidth, actual_kc, 0, blockBOffset);
|
||||
actualPanelWidth, actual_kc, 0, blockBOffset, blockW);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -209,7 +211,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
|
||||
gemm_pack_lhs<Scalar, Index, Traits::mr,Traits::LhsProgress, LhsStorageOrder,false>()
|
||||
(blockA, &lhs(i2, actual_k2), lhsStride, actual_kc, actual_mc);
|
||||
|
||||
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha, -1, -1, 0, 0);
|
||||
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha, -1, -1, 0, 0, blockW);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -263,10 +265,12 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
|
||||
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
|
||||
|
||||
std::size_t sizeA = kc*mc;
|
||||
std::size_t sizeB = kc*cols+EIGEN_ALIGN_BYTES/sizeof(Scalar);
|
||||
std::size_t sizeB = kc*cols;
|
||||
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
|
||||
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW());
|
||||
|
||||
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer;
|
||||
triangularBuffer.setZero();
|
||||
@@ -300,7 +304,6 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
|
||||
Index ts = (IsLower && actual_k2>=cols) ? 0 : actual_kc;
|
||||
|
||||
Scalar* geb = blockB+ts*ts;
|
||||
geb = geb + internal::first_aligned(geb,EIGEN_ALIGN_BYTES/sizeof(Scalar));
|
||||
|
||||
pack_rhs(geb, &rhs(actual_k2,IsLower ? 0 : k2), rhsStride, actual_kc, rs);
|
||||
|
||||
@@ -354,13 +357,14 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
|
||||
actual_mc, panelLength, actualPanelWidth,
|
||||
alpha,
|
||||
actual_kc, actual_kc, // strides
|
||||
blockOffset, blockOffset);// offsets
|
||||
blockOffset, blockOffset,// offsets
|
||||
blockW); // workspace
|
||||
}
|
||||
}
|
||||
gebp_kernel(res+i2+(IsLower ? 0 : k2)*resStride, resStride,
|
||||
blockA, geb, actual_mc, actual_kc, rs,
|
||||
alpha,
|
||||
-1, -1, 0, 0);
|
||||
-1, -1, 0, 0, blockW);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user