mirror of
https://gitlab.com/libeigen/eigen.git
synced 2026-04-10 11:34:33 +08:00
Compare commits
913 Commits
before-git
...
3.4.1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d71c30c478 | ||
|
|
79488684e1 | ||
|
|
b66188b5df | ||
|
|
5c81034fc1 | ||
|
|
c3f9707824 | ||
|
|
4fc1cfeda5 | ||
|
|
cd7263e7f6 | ||
|
|
eb57d4bdf1 | ||
|
|
4a2c4901ce | ||
|
|
68f4e58cfa | ||
|
|
0e607fd350 | ||
|
|
13507d1efd | ||
|
|
85ffda9539 | ||
|
|
72f77ccb3e | ||
|
|
526a6328e2 | ||
|
|
7b378c2d91 | ||
|
|
129e003cdf | ||
|
|
6161ce5cde | ||
|
|
be62728876 | ||
|
|
1426855b68 | ||
|
|
b2deb94e4a | ||
|
|
c23abcf25c | ||
|
|
f23b8c0d78 | ||
|
|
d60c3a3341 | ||
|
|
57c8d7c93f | ||
|
|
ab92609cad | ||
|
|
551e95a409 | ||
|
|
2924f58188 | ||
|
|
f1922b6dac | ||
|
|
052d91349a | ||
|
|
72e38684c1 | ||
|
|
bb1dbb4df6 | ||
|
|
0a5abc042e | ||
|
|
42d9cc0b1d | ||
|
|
7312765992 | ||
|
|
88cd53774e | ||
|
|
c0378fedd8 | ||
|
|
414f0a1756 | ||
|
|
3adc78e39c | ||
|
|
e67c494cba | ||
|
|
3e7bcf54f7 | ||
|
|
9df21dc8b4 | ||
|
|
157756130a | ||
|
|
7893285e59 | ||
|
|
3ee06ec52f | ||
|
|
287c801780 | ||
|
|
42b04a08c4 | ||
|
|
b86ac5f1e7 | ||
|
|
380a9483e0 | ||
|
|
25270e35db | ||
|
|
ebf968b272 | ||
|
|
bd57b99f44 | ||
|
|
b8f894947a | ||
|
|
4be2870267 | ||
|
|
f7085a1096 | ||
|
|
63291e34bf | ||
|
|
23886fd7db | ||
|
|
7c6020e424 | ||
|
|
2e3f1d8044 | ||
|
|
fc5575264f | ||
|
|
bae907b8f6 | ||
|
|
cf207eacd5 | ||
|
|
e734787bb7 | ||
|
|
1217390db4 | ||
|
|
7176ae1623 | ||
|
|
0db5928f00 | ||
|
|
764b132a79 | ||
|
|
d0bfdc1658 | ||
|
|
75ebef26b6 | ||
|
|
208e44c979 | ||
|
|
17d57fb168 | ||
|
|
6973687c70 | ||
|
|
ac561cd038 | ||
|
|
554982beef | ||
|
|
89a71f3126 | ||
|
|
a605d6b996 | ||
|
|
dfcd6de20a | ||
|
|
1ec1b16d36 | ||
|
|
0f39c851a5 | ||
|
|
daa0b70a65 | ||
|
|
33ba98b641 | ||
|
|
e6e921f0e3 | ||
|
|
ebfdd6bdea | ||
|
|
357bb11066 | ||
|
|
9b3d104c02 | ||
|
|
af3ca50f0b | ||
|
|
26b8fabd80 | ||
|
|
385a0b38f8 | ||
|
|
a4ecfd8ead | ||
|
|
f296720d7d | ||
|
|
f04d02dbf6 | ||
|
|
6f9bffe8dd | ||
|
|
d4c24eca96 | ||
|
|
72b0759451 | ||
|
|
34d0d83278 | ||
|
|
63e8b31c94 | ||
|
|
99473f255b | ||
|
|
2ce5dc428f | ||
|
|
8f1b6198c2 | ||
|
|
dae8c6d7ad | ||
|
|
2dfdaa2abf | ||
|
|
a659b5dbb2 | ||
|
|
879854382c | ||
|
|
90dce8dfa3 | ||
|
|
b26ada1e03 | ||
|
|
f5593b4baa | ||
|
|
3eb0c8b69e | ||
|
|
26adb0e5af | ||
|
|
5547205092 | ||
|
|
771e91860b | ||
|
|
4786edba26 | ||
|
|
15e23ab849 | ||
|
|
af6e7cc66a | ||
|
|
3fbb1c1b48 | ||
|
|
28cd280726 | ||
|
|
8cc3ec8e47 | ||
|
|
d641062a05 | ||
|
|
1000cf9fbc | ||
|
|
fd2817e3d6 | ||
|
|
11dacc4802 | ||
|
|
ab6f39e1e3 | ||
|
|
6576ee4fb1 | ||
|
|
68f35d76b8 | ||
|
|
d0e2b3e58d | ||
|
|
669dc8fadf | ||
|
|
33a602eb37 | ||
|
|
a9490cd3c5 | ||
|
|
61efca2e90 | ||
|
|
a5469a6f0f | ||
|
|
6aaa45db5f | ||
|
|
ea57f9b78f | ||
|
|
f55a112cb1 | ||
|
|
a11bdf3965 | ||
|
|
b9ac284e52 | ||
|
|
3df7d7fec9 | ||
|
|
80c5b8b3c3 | ||
|
|
848db4ed2d | ||
|
|
5cb7505a44 | ||
|
|
af912a7b5c | ||
|
|
86d958e8f2 | ||
|
|
8e7bd5397f | ||
|
|
8a21df2d9c | ||
|
|
ac78f84b72 | ||
|
|
973b04f3e1 | ||
|
|
16844d7529 | ||
|
|
5cb2dfec1d | ||
|
|
c473d69d22 | ||
|
|
6469fbf93a | ||
|
|
3a4a4e9fde | ||
|
|
fab848d4f7 | ||
|
|
b158fcaa74 | ||
|
|
b6d9b6f48d | ||
|
|
b1f06aac61 | ||
|
|
f6954e4485 | ||
|
|
b30a2a527e | ||
|
|
bc1b354b32 | ||
|
|
bd0d873b16 | ||
|
|
e0fe006915 | ||
|
|
d259104c8d | ||
|
|
cd543434bf | ||
|
|
36be6747e0 | ||
|
|
d1ed3fe5c9 | ||
|
|
21e0ad056e | ||
|
|
709d704819 | ||
|
|
995714142d | ||
|
|
730a781221 | ||
|
|
77b2807322 | ||
|
|
52e545324e | ||
|
|
0cd4719f3e | ||
|
|
770ed0794e | ||
|
|
e7248b26a1 | ||
|
|
a1e1612c28 | ||
|
|
f3aaba8705 | ||
|
|
34e5f34b39 | ||
|
|
4612627355 | ||
|
|
3e71c621c9 | ||
|
|
b5d218d857 | ||
|
|
bd72e4a8c4 | ||
|
|
3af8c262ac | ||
|
|
7e3bc4177e | ||
|
|
c379a21191 | ||
|
|
6f57470bcc | ||
|
|
df53e28179 | ||
|
|
fbdaff81bd | ||
|
|
71320af66a | ||
|
|
962a596d21 | ||
|
|
0ab1f8ec03 | ||
|
|
b0fe14213e | ||
|
|
23469c3cda | ||
|
|
18824d10ea | ||
|
|
f9b2e92040 | ||
|
|
9c193db5c7 | ||
|
|
6b6ba41269 | ||
|
|
96007cae8c | ||
|
|
5d918b82a8 | ||
|
|
05c9d7ce20 | ||
|
|
943ef50a2d | ||
|
|
7ea4adb5f0 | ||
|
|
71498b32c9 | ||
|
|
ebd5c6d44b | ||
|
|
a8eb797a43 | ||
|
|
929bc0e191 | ||
|
|
f046e326d9 | ||
|
|
3335e0767c | ||
|
|
3395f4e604 | ||
|
|
f03d3e7072 | ||
|
|
b8cf1ed753 | ||
|
|
9263475740 | ||
|
|
0fdc99c65e | ||
|
|
07cc362238 | ||
|
|
4ef67cbfb2 | ||
|
|
c2b6df6e60 | ||
|
|
277d369060 | ||
|
|
7aee90b8d3 | ||
|
|
3147391d94 | ||
|
|
115591b9e3 | ||
|
|
fd100138dd | ||
|
|
1ec173b54e | ||
|
|
aef926abf6 | ||
|
|
f1032255d3 | ||
|
|
f57dec64ef | ||
|
|
926e1a8226 | ||
|
|
cd474d4cd0 | ||
|
|
0b56b62f30 | ||
|
|
44cc96e1a1 | ||
|
|
576e451b10 | ||
|
|
0d89012708 | ||
|
|
6d2506040c | ||
|
|
cb44a003de | ||
|
|
13d7658c5d | ||
|
|
338924602d | ||
|
|
93bff85a42 | ||
|
|
4e0357c6dd | ||
|
|
1e9f623f3e | ||
|
|
4240b480e0 | ||
|
|
5b83d3c4bc | ||
|
|
46ecdcd745 | ||
|
|
9a1691a14e | ||
|
|
bb33880e57 | ||
|
|
237c59a2aa | ||
|
|
3dc42eeaec | ||
|
|
7adc1545b4 | ||
|
|
c0c7b695cd | ||
|
|
c334eece44 | ||
|
|
5ccb72b2e4 | ||
|
|
9c90d5d832 | ||
|
|
5d37114fc0 | ||
|
|
930696fc53 | ||
|
|
56966fd2e6 | ||
|
|
5a3c9eddb4 | ||
|
|
69ec4907da | ||
|
|
7571704a43 | ||
|
|
84955d109f | ||
|
|
601814b575 | ||
|
|
05bab8139a | ||
|
|
eebde572d9 | ||
|
|
8190739f12 | ||
|
|
b6db013435 | ||
|
|
1f6b1c1a1f | ||
|
|
517294d6e1 | ||
|
|
94e2250b36 | ||
|
|
d82d915047 | ||
|
|
380d0e4916 | ||
|
|
e83af2cc24 | ||
|
|
413ff2b531 | ||
|
|
a235ddef39 | ||
|
|
4780d8dfb2 | ||
|
|
fd5d23fdf3 | ||
|
|
a2040ef796 | ||
|
|
c2c0f6f64b | ||
|
|
ee4e099aa2 | ||
|
|
9fc93ce31a | ||
|
|
1374f49f28 | ||
|
|
2d6eaaf687 | ||
|
|
47722a66f2 | ||
|
|
5e75331b9f | ||
|
|
b5fc69bdd8 | ||
|
|
4b683b65df | ||
|
|
1cb1ffd5b2 | ||
|
|
4b502a7215 | ||
|
|
85868564df | ||
|
|
cbb6ae6296 | ||
|
|
573570b6c9 | ||
|
|
98cf1e076f | ||
|
|
ee2a8f7139 | ||
|
|
3835046309 | ||
|
|
4fbd01cd4b | ||
|
|
a883a8797c | ||
|
|
0bd9e9bc45 | ||
|
|
77c66e368c | ||
|
|
2f908f8255 | ||
|
|
82f13830e6 | ||
|
|
d1825cbb68 | ||
|
|
d9288f078d | ||
|
|
85ebd6aff8 | ||
|
|
2947c0cc84 | ||
|
|
25424f4cf1 | ||
|
|
42acbd5700 | ||
|
|
9e0dc8f09b | ||
|
|
da19f7a910 | ||
|
|
fc2cc10842 | ||
|
|
a33855f6ee | ||
|
|
83df5df61b | ||
|
|
ac3c5aad31 | ||
|
|
63abb10000 | ||
|
|
baf601a0e3 | ||
|
|
587a691516 | ||
|
|
8830d66c02 | ||
|
|
54425a39b2 | ||
|
|
34d0be9ec1 | ||
|
|
42a8bdd4d7 | ||
|
|
28564957ac | ||
|
|
ab7fe215f9 | ||
|
|
1f4c0311cd | ||
|
|
3e819d83bf | ||
|
|
69adf26aa3 | ||
|
|
9357feedc7 | ||
|
|
a2c0542010 | ||
|
|
dfd6720d82 | ||
|
|
1e1c8a735c | ||
|
|
f6fc66aa75 | ||
|
|
d58678069c | ||
|
|
2859db0220 | ||
|
|
fcb5106c6e | ||
|
|
6197ce1a35 | ||
|
|
22edb46823 | ||
|
|
ace7f132ed | ||
|
|
90187a33e1 | ||
|
|
3ddc0974ce | ||
|
|
c24bee6120 | ||
|
|
e4233b6e3d | ||
|
|
ae95b74af9 | ||
|
|
5bbc9cea93 | ||
|
|
b5a926a0f6 | ||
|
|
78ee3d6261 | ||
|
|
af1247fbc1 | ||
|
|
87729ea39f | ||
|
|
748489ef9c | ||
|
|
d59ef212e1 | ||
|
|
e7b8643d70 | ||
|
|
5521c65afb | ||
|
|
69a4f70956 | ||
|
|
824272cde8 | ||
|
|
4811e81966 | ||
|
|
f019b97aca | ||
|
|
0cc9b5eb40 | ||
|
|
c3fbc6cec7 | ||
|
|
ed964ba3f1 | ||
|
|
8dfe1029a5 | ||
|
|
eb71e5db98 | ||
|
|
df4bc2731c | ||
|
|
75ce9cd2a7 | ||
|
|
9fb7062440 | ||
|
|
b8502a9dd6 | ||
|
|
2e83cbbba9 | ||
|
|
c0a889890f | ||
|
|
f612df2736 | ||
|
|
14b7ebea11 | ||
|
|
c9d4367fa4 | ||
|
|
d24f9f9b55 | ||
|
|
14487ed14e | ||
|
|
b271110788 | ||
|
|
d098c4d64c | ||
|
|
543e34ab9d | ||
|
|
b8d1857f0d | ||
|
|
853a5c4b84 | ||
|
|
94327dbfba | ||
|
|
1296abdf82 | ||
|
|
6045243141 | ||
|
|
1a96d49afe | ||
|
|
2468253c9a | ||
|
|
82d61af3a4 | ||
|
|
5f0b4a4010 | ||
|
|
6cbb3038ac | ||
|
|
5bfc67f9e7 | ||
|
|
a6601070f2 | ||
|
|
9a663973b4 | ||
|
|
e72dfeb8b9 | ||
|
|
199c5f2b47 | ||
|
|
1e0c7d4f49 | ||
|
|
976ae0ca6f | ||
|
|
c65c2b31d4 | ||
|
|
39a590dfb6 | ||
|
|
8f686ac4ec | ||
|
|
2660d01fa7 | ||
|
|
a3521d743c | ||
|
|
ca528593f4 | ||
|
|
81b5fe2f0a | ||
|
|
4fb3459a23 | ||
|
|
4bfcee47b9 | ||
|
|
29ebd84cb7 | ||
|
|
fe19714f80 | ||
|
|
e67672024d | ||
|
|
5e7d4c33d6 | ||
|
|
fb5b59641a | ||
|
|
e19829c3b0 | ||
|
|
5529db7524 | ||
|
|
51eba8c3e2 | ||
|
|
5297b7162a | ||
|
|
ecb7b19dfa | ||
|
|
6eebe97bab | ||
|
|
f284c8592b | ||
|
|
4cb0592af7 | ||
|
|
6b34568c74 | ||
|
|
0065f9d322 | ||
|
|
841c8986f8 | ||
|
|
113e61f364 | ||
|
|
98ca58b02c | ||
|
|
c31ead8a15 | ||
|
|
f44197fabd | ||
|
|
a31effc3bc | ||
|
|
8523d447a1 | ||
|
|
5908aeeaba | ||
|
|
119763cf38 | ||
|
|
6cf0ab5e99 | ||
|
|
aba3998278 | ||
|
|
db5691ff2b | ||
|
|
88d4c6d4c8 | ||
|
|
2ac0b78739 | ||
|
|
10c77b0ff4 | ||
|
|
73922b0174 | ||
|
|
5f9cfb2529 | ||
|
|
ce4af0b38f | ||
|
|
a7749c09bc | ||
|
|
128eebf05e | ||
|
|
33e0af0130 | ||
|
|
7f09d3487d | ||
|
|
12fd3dd655 | ||
|
|
aa8b22e776 | ||
|
|
5336ad8591 | ||
|
|
0845df7f77 | ||
|
|
9b51dc7972 | ||
|
|
be0574e215 | ||
|
|
7ff0b7a980 | ||
|
|
9ad4096ccb | ||
|
|
f702792a7c | ||
|
|
db61b8d478 | ||
|
|
622c598944 | ||
|
|
90ee821c56 | ||
|
|
9fde9cce5d | ||
|
|
4cb563a01e | ||
|
|
7eb07da538 | ||
|
|
36200b7855 | ||
|
|
54589635ad | ||
|
|
984d010b7b | ||
|
|
b578930657 | ||
|
|
66841ea070 | ||
|
|
6e3b795f81 | ||
|
|
abcde69a79 | ||
|
|
f85038b7f3 | ||
|
|
56c8b14d87 | ||
|
|
fb4548e27b | ||
|
|
1615a27993 | ||
|
|
1414e2212c | ||
|
|
170a504c2f | ||
|
|
598e1b6e54 | ||
|
|
0668c68b03 | ||
|
|
288d456c29 | ||
|
|
3f4684f87d | ||
|
|
0784d9f87b | ||
|
|
a4edb1079c | ||
|
|
4c42d5ee41 | ||
|
|
e0d13ead90 | ||
|
|
c35965b381 | ||
|
|
f0e46ed5d4 | ||
|
|
f19bcffee6 | ||
|
|
65e2169c45 | ||
|
|
b2126fd6b5 | ||
|
|
25d8498f8b | ||
|
|
660c6b857c | ||
|
|
d5b7981119 | ||
|
|
e409795d6b | ||
|
|
cdd8fdc32e | ||
|
|
bde6741641 | ||
|
|
21a8a2487c | ||
|
|
38ae5353ab | ||
|
|
352f1422d3 | ||
|
|
2044084979 | ||
|
|
3daf92c7a5 | ||
|
|
587fd6ab70 | ||
|
|
2a6addb4f9 | ||
|
|
f149e0ebc3 | ||
|
|
8d9cfba799 | ||
|
|
e741b43668 | ||
|
|
0bdc0dba20 | ||
|
|
cb654b1c45 | ||
|
|
52d1dd979a | ||
|
|
166fcdecdb | ||
|
|
bb1de9dbde | ||
|
|
12dda34b15 | ||
|
|
070d303d56 | ||
|
|
fdf2ee62c5 | ||
|
|
05754100fe | ||
|
|
3bee9422d6 | ||
|
|
19e6496ce0 | ||
|
|
6cee8d347e | ||
|
|
bc7d1599fb | ||
|
|
cf0b5b0344 | ||
|
|
751f18f2c0 | ||
|
|
5dc2fbabee | ||
|
|
55967f87d1 | ||
|
|
839aa505c3 | ||
|
|
536c8a79f2 | ||
|
|
8c9976d7f0 | ||
|
|
c6efc4e0ba | ||
|
|
e82722a4a7 | ||
|
|
f3d2ea48f5 | ||
|
|
c7eb3a74cb | ||
|
|
bccf055a7c | ||
|
|
82c0c18a83 | ||
|
|
00be0a7ff3 | ||
|
|
8eb461a431 | ||
|
|
2e8f850c78 | ||
|
|
125cc9a5df | ||
|
|
8cfe0db108 | ||
|
|
baf9d762b7 | ||
|
|
634bd79b0e | ||
|
|
655c3a4042 | ||
|
|
5ec4907434 | ||
|
|
f9fac1d5b0 | ||
|
|
2dbac2f99f | ||
|
|
e2f21465fe | ||
|
|
305b8bd277 | ||
|
|
9ee9ac81de | ||
|
|
a9a2f2bebf | ||
|
|
f23dc5b971 | ||
|
|
4d91519a9b | ||
|
|
25d8ae7465 | ||
|
|
eb4d4ae070 | ||
|
|
71c85df4c1 | ||
|
|
70fbcf82ed | ||
|
|
2627e2f2e6 | ||
|
|
ddd48b242c | ||
|
|
e57281a741 | ||
|
|
1992af3de2 | ||
|
|
7b80609d49 | ||
|
|
89f90b585d | ||
|
|
c5985c46f5 | ||
|
|
68f69414f7 | ||
|
|
a7170f2aca | ||
|
|
550e8f8f57 | ||
|
|
9842366bba | ||
|
|
aa56e1d980 | ||
|
|
1e74f93d55 | ||
|
|
79818216ed | ||
|
|
c770746d70 | ||
|
|
22f67b5958 | ||
|
|
a3b300f1af | ||
|
|
38abf2be42 | ||
|
|
4cf01d2cf5 | ||
|
|
fd1dcb6b45 | ||
|
|
6c9c3f9a1a | ||
|
|
a8fdcae55d | ||
|
|
11e4056f6b | ||
|
|
17268b155d | ||
|
|
41d5d5334b | ||
|
|
3669498f5a | ||
|
|
60218829b7 | ||
|
|
2d63706545 | ||
|
|
6bba58f109 | ||
|
|
e9b55c4db8 | ||
|
|
117a4c0617 | ||
|
|
394f564055 | ||
|
|
8e9cc5b10a | ||
|
|
9175f50d6f | ||
|
|
280f4f2407 | ||
|
|
bb69a8db5d | ||
|
|
90f6d9d23e | ||
|
|
8324e5e049 | ||
|
|
852513e7a6 | ||
|
|
bec72345d6 | ||
|
|
276db21f26 | ||
|
|
cf12474a8b | ||
|
|
c29935b323 | ||
|
|
b714dd9701 | ||
|
|
e24a1f57e3 | ||
|
|
6961468915 | ||
|
|
348a48682e | ||
|
|
82fe059f35 | ||
|
|
9d11e2c03e | ||
|
|
39a038f2e4 | ||
|
|
f895755c0e | ||
|
|
09f015852b | ||
|
|
e265f7ed8e | ||
|
|
a725a3233c | ||
|
|
b9ff791fed | ||
|
|
61461d682a | ||
|
|
ecb7bc9514 | ||
|
|
09f595a269 | ||
|
|
28aef8e816 | ||
|
|
4a77eda1fd | ||
|
|
d9f0d9eb76 | ||
|
|
21edea5edd | ||
|
|
011e0db31d | ||
|
|
6ea8091705 | ||
|
|
4700713faf | ||
|
|
af6f43d7ff | ||
|
|
274ef12b61 | ||
|
|
208b3626d1 | ||
|
|
e3e2cf9d24 | ||
|
|
61fc78bbda | ||
|
|
c6953f799b | ||
|
|
807e51528d | ||
|
|
9a4d04c05f | ||
|
|
4e4d3f32d1 | ||
|
|
7a8d3d5b81 | ||
|
|
9022f5aa8a | ||
|
|
d199c17b14 | ||
|
|
4091f6b25c | ||
|
|
183a208212 | ||
|
|
8f8d77b516 | ||
|
|
2279f2c62f | ||
|
|
b431024404 | ||
|
|
f66f3393e3 | ||
|
|
22c971a225 | ||
|
|
f93841b53e | ||
|
|
ee714f79f7 | ||
|
|
f7b185a8b1 | ||
|
|
9078f47cd6 | ||
|
|
3b445d9bf2 | ||
|
|
44b9d4e412 | ||
|
|
d5a0d89491 | ||
|
|
30960d485e | ||
|
|
f9d1500f74 | ||
|
|
068121ec02 | ||
|
|
74ff5719b3 | ||
|
|
3a0b23e473 | ||
|
|
6b0c0b587e | ||
|
|
6425e875a1 | ||
|
|
a967fadb21 | ||
|
|
e4b24e7fb2 | ||
|
|
ce5c59729d | ||
|
|
b8a13f13ca | ||
|
|
821702e771 | ||
|
|
493a7c773c | ||
|
|
38e4a67394 | ||
|
|
c4b99f78c7 | ||
|
|
9aad16b443 | ||
|
|
c4aa8e0db2 | ||
|
|
e55182ac09 | ||
|
|
14022f5eb5 | ||
|
|
a5b226920f | ||
|
|
3af744b023 | ||
|
|
31a6b88ff3 | ||
|
|
880fa43b2b | ||
|
|
6f0f6f792e | ||
|
|
cc0c38ace8 | ||
|
|
bb56a62582 | ||
|
|
3012e755e9 | ||
|
|
e4fb0ddf78 | ||
|
|
65e400896b | ||
|
|
5636f80d11 | ||
|
|
7c5d48f313 | ||
|
|
71e08c702b | ||
|
|
adc861cabd | ||
|
|
5328c9be43 | ||
|
|
35d149e34c | ||
|
|
85428a3440 | ||
|
|
5272106826 | ||
|
|
5f25bcf7d6 | ||
|
|
6fe88a3c9d | ||
|
|
6568856275 | ||
|
|
27e6648074 | ||
|
|
5b9bfc892a | ||
|
|
e5886457c8 | ||
|
|
25424d91f6 | ||
|
|
8bb0febaf9 | ||
|
|
1b1082334b | ||
|
|
603e213d13 | ||
|
|
c060114a25 | ||
|
|
fe8c3ef3cb | ||
|
|
d10b27fe37 | ||
|
|
d4a727d092 | ||
|
|
d2bb6cf396 | ||
|
|
c6820a6316 | ||
|
|
8ba1b0f41a | ||
|
|
704798d1df | ||
|
|
46f8a18567 | ||
|
|
21122498ec | ||
|
|
23b7f0572b | ||
|
|
d0f5d4bc50 | ||
|
|
5e484fa11d | ||
|
|
3ec4f0b641 | ||
|
|
b92206676c | ||
|
|
99da2e1a8d | ||
|
|
649fd1c2ae | ||
|
|
e48d8e4725 | ||
|
|
b8ca93842c | ||
|
|
fb0c6868ad | ||
|
|
c1ffe452fc | ||
|
|
2ce2f51989 | ||
|
|
1b84f21e32 | ||
|
|
38b91f256b | ||
|
|
bed7fbe854 | ||
|
|
0e1a33a461 | ||
|
|
acab22c205 | ||
|
|
ac2eca6b11 | ||
|
|
0aeaf5f451 | ||
|
|
60faa9f897 | ||
|
|
b11f817bcf | ||
|
|
56b3e3f3f8 | ||
|
|
4ab32e2de2 | ||
|
|
dcf7655b3d | ||
|
|
ed00df445d | ||
|
|
fb77b7288c | ||
|
|
ee4715ff48 | ||
|
|
8889a2c1c6 | ||
|
|
6964ae8d52 | ||
|
|
cb63153183 | ||
|
|
116c5235ac | ||
|
|
8731452b97 | ||
|
|
9cb8771e9c | ||
|
|
145e51516f | ||
|
|
689b57070d | ||
|
|
f3b8d441f6 | ||
|
|
dc0b81fb1d | ||
|
|
13d25f5ed8 | ||
|
|
7222f0b6b5 | ||
|
|
14f84978e8 | ||
|
|
ff4e7a0820 | ||
|
|
03ebdf6acb | ||
|
|
386d809bde | ||
|
|
6b9c92fe7e | ||
|
|
cf7adf3a5d | ||
|
|
231ce21535 | ||
|
|
a475bf14d4 | ||
|
|
c6c84ed961 | ||
|
|
6228f27234 | ||
|
|
39cbd6578f | ||
|
|
a7d2552af8 | ||
|
|
463ec86648 | ||
|
|
b5d66b5e73 | ||
|
|
c4059ffcb6 | ||
|
|
1fcaaf460f | ||
|
|
3ce18d3c8f | ||
|
|
c2ab36f47a | ||
|
|
537e2b322f | ||
|
|
fdc1cbdce3 | ||
|
|
daf9bbeca2 | ||
|
|
6d2a9a524b | ||
|
|
029a76e115 | ||
|
|
99b7f7cb9c | ||
|
|
72782d13e0 | ||
|
|
867a756509 | ||
|
|
ab615e4114 | ||
|
|
95177362ed | ||
|
|
8d1302f566 | ||
|
|
8719b9c5bc | ||
|
|
8e1df5b082 | ||
|
|
4e7046063b | ||
|
|
2d67af2d2b | ||
|
|
5328cd62b3 | ||
|
|
cc86a31e20 | ||
|
|
8a7f360ec3 | ||
|
|
a145e4adf5 | ||
|
|
8ce9630ddb | ||
|
|
9b411757ab | ||
|
|
d640276d31 | ||
|
|
fa8fd4b4d5 | ||
|
|
a187ffea28 | ||
|
|
ba9d18b938 | ||
|
|
5fdc179241 | ||
|
|
d3e81db6c5 | ||
|
|
c1d944dd91 | ||
|
|
5c4e19fbe7 | ||
|
|
225ab040e0 | ||
|
|
74ec8e6618 | ||
|
|
49f1aeb60d | ||
|
|
2fd8a5a08f | ||
|
|
0e59f786e1 | ||
|
|
7b76c85daf | ||
|
|
a74a278abd | ||
|
|
923ee9aba3 | ||
|
|
a32923a439 | ||
|
|
1e41406c36 | ||
|
|
fbe7916c55 | ||
|
|
82f54ad144 | ||
|
|
ab773c7e91 | ||
|
|
b47c777993 | ||
|
|
29f0917a43 | ||
|
|
e80ec24357 | ||
|
|
0aebe19aca | ||
|
|
3c02fefec5 | ||
|
|
0c67b855d2 | ||
|
|
e8f40e4670 | ||
|
|
2f6ddaa25c | ||
|
|
00f6340153 | ||
|
|
5ab87d8aba | ||
|
|
4aae8ac693 | ||
|
|
1d674003b2 | ||
|
|
b1aa07a8d3 | ||
|
|
d46d726e9d | ||
|
|
c854e189e6 | ||
|
|
39142904cc | ||
|
|
f0577a2bfd | ||
|
|
8e875719b3 | ||
|
|
9dda5eb7d2 | ||
|
|
bcc0e9e15c | ||
|
|
54a0a9c9dd | ||
|
|
4fd5d1477b | ||
|
|
393dbd8ee9 | ||
|
|
55c8fe8d0f | ||
|
|
6d2dbfc453 | ||
|
|
52d54278be | ||
|
|
deb93ed1bf | ||
|
|
5c22c7a7de | ||
|
|
5afdaa473a | ||
|
|
96cd1ff718 | ||
|
|
cc954777f2 | ||
|
|
55ecd58a3c | ||
|
|
4da2c6b197 | ||
|
|
eda90baf35 | ||
|
|
d5c665742b | ||
|
|
6ff5a14091 | ||
|
|
232f904082 | ||
|
|
54aa8fa186 | ||
|
|
37ccb86916 | ||
|
|
7158ed4e0e | ||
|
|
d53ae40f7b | ||
|
|
4b9ecf2924 | ||
|
|
ceaabd4e16 | ||
|
|
d5d3cf9339 | ||
|
|
eacf97f727 | ||
|
|
5f411b729e | ||
|
|
88337acae2 | ||
|
|
9e68977578 | ||
|
|
b733b8b680 | ||
|
|
a45d28256d | ||
|
|
98bfc5aaa8 | ||
|
|
52a2fbbb00 | ||
|
|
235bcfe08d | ||
|
|
d7a42eade6 | ||
|
|
6ac37768a9 | ||
|
|
87cfa4862f | ||
|
|
b625adffd8 | ||
|
|
f0ce88cff7 | ||
|
|
eb6cc29583 | ||
|
|
7769600245 | ||
|
|
e9cc0cd353 | ||
|
|
44df2109c8 | ||
|
|
5ca10480b0 | ||
|
|
f584bd9b30 | ||
|
|
3fda850c46 | ||
|
|
b5df8cabd7 | ||
|
|
6d284bb1b7 | ||
|
|
f6c6de5d63 | ||
|
|
6601abce86 | ||
|
|
b9362fb8f7 | ||
|
|
5a8b97b401 | ||
|
|
42838c28b8 | ||
|
|
1d0c45122a | ||
|
|
35219cea68 | ||
|
|
2e099e8d8f | ||
|
|
e1ecfc162d | ||
|
|
da5a7afed0 | ||
|
|
452371cead | ||
|
|
601f89dfd0 | ||
|
|
2ea5a715cf | ||
|
|
9254974115 | ||
|
|
a3ec89b5bd | ||
|
|
8333e03590 | ||
|
|
e6fcee995b | ||
|
|
4217a9f090 | ||
|
|
9623c0c4b9 | ||
|
|
19876ced76 | ||
|
|
d0ae052da4 | ||
|
|
eedb7eeacf | ||
|
|
bcbaad6d87 | ||
|
|
00de570793 | ||
|
|
636e2bb3fa | ||
|
|
1e9664b147 | ||
|
|
d86544d654 | ||
|
|
dde279f57d | ||
|
|
c21771ac04 | ||
|
|
a3273aeff8 | ||
|
|
870e53c0f2 | ||
|
|
6965f6de7f | ||
|
|
7a65219a2e | ||
|
|
73e55525e5 | ||
|
|
ae07801dd8 | ||
|
|
72166d0e6e | ||
|
|
5a3eaf88ac | ||
|
|
de07c4d1c2 | ||
|
|
788bef6ab5 | ||
|
|
7252163335 | ||
|
|
a566074480 | ||
|
|
8e5da71466 | ||
|
|
9b7a2b43c2 | ||
|
|
06e99aaf40 | ||
|
|
73a8d572f5 | ||
|
|
88062b7fed | ||
|
|
381f8f3139 | ||
|
|
64272c7f40 | ||
|
|
963ba1015b | ||
|
|
1b6e0395e6 | ||
|
|
3c0ef9f394 | ||
|
|
e87af0ed37 | ||
|
|
15b3bcfca0 | ||
|
|
c9220c035f | ||
|
|
1c879eb010 | ||
|
|
dbca11e880 | ||
|
|
c49f0d851a | ||
|
|
2918f85ba9 | ||
|
|
8056a05b54 | ||
|
|
dbb703d44e | ||
|
|
11d6465326 | ||
|
|
bb7ccac3af | ||
|
|
25230d1862 | ||
|
|
08eeb648ea | ||
|
|
366cf005b0 | ||
|
|
c488b8b32f | ||
|
|
8fbe0e4699 | ||
|
|
114a15c66a |
19
.clang-format
Normal file
19
.clang-format
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
---
|
||||||
|
BasedOnStyle: Google
|
||||||
|
ColumnLimit: 120
|
||||||
|
---
|
||||||
|
Language: Cpp
|
||||||
|
BasedOnStyle: Google
|
||||||
|
ColumnLimit: 120
|
||||||
|
StatementMacros:
|
||||||
|
- EIGEN_STATIC_ASSERT
|
||||||
|
- EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
|
||||||
|
- EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
|
||||||
|
SortIncludes: false
|
||||||
|
AttributeMacros:
|
||||||
|
- EIGEN_STRONG_INLINE
|
||||||
|
- EIGEN_ALWAYS_INLINE
|
||||||
|
- EIGEN_DEVICE_FUNC
|
||||||
|
- EIGEN_DONT_INLINE
|
||||||
|
- EIGEN_DEPRECATED
|
||||||
|
- EIGEN_UNUSED
|
||||||
5
.hgignore → .gitignore
vendored
5
.hgignore → .gitignore
vendored
@@ -1,4 +1,3 @@
|
|||||||
syntax: glob
|
|
||||||
qrc_*cxx
|
qrc_*cxx
|
||||||
*.orig
|
*.orig
|
||||||
*.pyc
|
*.pyc
|
||||||
@@ -36,3 +35,7 @@ lapack/reference
|
|||||||
.*project
|
.*project
|
||||||
.settings
|
.settings
|
||||||
Makefile
|
Makefile
|
||||||
|
!ci/build.gitlab-ci.yml
|
||||||
|
!scripts/buildtests.in
|
||||||
|
!Eigen/Core
|
||||||
|
!Eigen/src/Core
|
||||||
34
.gitlab-ci.yml
Normal file
34
.gitlab-ci.yml
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
# This file is part of Eigen, a lightweight C++ template library
|
||||||
|
# for linear algebra.
|
||||||
|
#
|
||||||
|
# Copyright (C) 2023, The Eigen Authors
|
||||||
|
#
|
||||||
|
# This Source Code Form is subject to the terms of the Mozilla
|
||||||
|
# Public License v. 2.0. If a copy of the MPL was not distributed
|
||||||
|
# with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
|
||||||
|
stages:
|
||||||
|
- checkformat
|
||||||
|
- build
|
||||||
|
- test
|
||||||
|
- deploy
|
||||||
|
|
||||||
|
variables:
|
||||||
|
# CMake build directory.
|
||||||
|
EIGEN_CI_BUILDDIR: .build
|
||||||
|
# Specify the CMake build target.
|
||||||
|
EIGEN_CI_BUILD_TARGET: ""
|
||||||
|
# If a test regex is specified, that will be selected.
|
||||||
|
# Otherwise, we will try a label if specified.
|
||||||
|
EIGEN_CI_CTEST_REGEX: ""
|
||||||
|
EIGEN_CI_CTEST_LABEL: ""
|
||||||
|
EIGEN_CI_CTEST_ARGS: ""
|
||||||
|
|
||||||
|
include:
|
||||||
|
- "/ci/checkformat.gitlab-ci.yml"
|
||||||
|
- "/ci/common.gitlab-ci.yml"
|
||||||
|
- "/ci/build.linux.gitlab-ci.yml"
|
||||||
|
- "/ci/build.windows.gitlab-ci.yml"
|
||||||
|
- "/ci/test.linux.gitlab-ci.yml"
|
||||||
|
- "/ci/test.windows.gitlab-ci.yml"
|
||||||
|
- "/ci/deploy.gitlab-ci.yml"
|
||||||
69
.gitlab/issue_templates/Bug Report.md
Normal file
69
.gitlab/issue_templates/Bug Report.md
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
<!--
|
||||||
|
Please read this!
|
||||||
|
|
||||||
|
Before opening a new issue, make sure to search for keywords in the issues
|
||||||
|
filtered by "bug::confirmed" or "bug::unconfirmed" and "bugzilla" label:
|
||||||
|
|
||||||
|
- https://gitlab.com/libeigen/eigen/-/issues?scope=all&utf8=%E2%9C%93&state=opened&label_name[]=bug%3A%3Aconfirmed
|
||||||
|
- https://gitlab.com/libeigen/eigen/-/issues?scope=all&utf8=%E2%9C%93&state=opened&label_name[]=bug%3A%3Aunconfirmed
|
||||||
|
- https://gitlab.com/libeigen/eigen/-/issues?scope=all&utf8=%E2%9C%93&state=opened&label_name[]=bugzilla
|
||||||
|
|
||||||
|
and verify the issue you're about to submit isn't a duplicate. -->
|
||||||
|
|
||||||
|
### Summary
|
||||||
|
<!-- Summarize the bug encountered concisely. -->
|
||||||
|
|
||||||
|
### Environment
|
||||||
|
<!-- Please provide your development environment here -->
|
||||||
|
- **Operating System** : Windows/Linux
|
||||||
|
- **Architecture** : x64/Arm64/PowerPC ...
|
||||||
|
- **Eigen Version** : 3.3.9
|
||||||
|
- **Compiler Version** : Gcc7.0
|
||||||
|
- **Compile Flags** : -O3 -march=native
|
||||||
|
- **Vector Extension** : SSE/AVX/NEON ...
|
||||||
|
|
||||||
|
### Minimal Example
|
||||||
|
<!-- If possible, please create a minimal example here that exhibits the problematic behavior.
|
||||||
|
You can also link to [godbolt](https://godbolt.org). But please note that you need to click
|
||||||
|
the "Share" button in the top right-hand corner of the godbolt page where you reproduce the sample
|
||||||
|
code to get the share link instead of in your browser address bar.
|
||||||
|
|
||||||
|
You can read [the guidelines on stackoverflow](https://stackoverflow.com/help/minimal-reproducible-example)
|
||||||
|
on how to create a good minimal example. -->
|
||||||
|
|
||||||
|
```cpp
|
||||||
|
//show your code here
|
||||||
|
```
|
||||||
|
|
||||||
|
### Steps to reproduce
|
||||||
|
<!-- Describe how one can reproduce the issue - this is very important. Please use an ordered list. -->
|
||||||
|
|
||||||
|
1. first step
|
||||||
|
2. second step
|
||||||
|
3. ...
|
||||||
|
|
||||||
|
### What is the current *bug* behavior?
|
||||||
|
<!-- Describe what actually happens. -->
|
||||||
|
|
||||||
|
### What is the expected *correct* behavior?
|
||||||
|
<!-- Describe what you should see instead. -->
|
||||||
|
|
||||||
|
### Relevant logs
|
||||||
|
<!-- Add relevant code snippets or program output within blocks marked by " ``` " -->
|
||||||
|
|
||||||
|
<!-- OPTIONAL: remove this section if you are not reporting a compilation warning issue.-->
|
||||||
|
### Warning Messages
|
||||||
|
<!-- Show us the warning messages you got! -->
|
||||||
|
|
||||||
|
<!-- OPTIONAL: remove this section if you are not reporting a performance issue. -->
|
||||||
|
### Benchmark scripts and results
|
||||||
|
<!-- Please share any benchmark scripts - either standalone, or using [Google Benchmark](https://github.com/google/benchmark). -->
|
||||||
|
|
||||||
|
### Anything else that might help
|
||||||
|
<!-- It will be better to provide us more information to help narrow down the cause.
|
||||||
|
Including but not limited to the following:
|
||||||
|
- lines of code that might help us diagnose the problem.
|
||||||
|
- potential ways to address the issue.
|
||||||
|
- last known working/first broken version (release number or commit hash). -->
|
||||||
|
|
||||||
|
- [ ] Have a plan to fix this issue.
|
||||||
7
.gitlab/issue_templates/Feature Request.md
Normal file
7
.gitlab/issue_templates/Feature Request.md
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
### Describe the feature you would like to be implemented.
|
||||||
|
|
||||||
|
### Would such a feature be useful for other users? Why?
|
||||||
|
|
||||||
|
### Any hints on how to implement the requested feature?
|
||||||
|
|
||||||
|
### Additional resources
|
||||||
26
.gitlab/merge_request_templates/Merge Request Template.md
Normal file
26
.gitlab/merge_request_templates/Merge Request Template.md
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
<!--
|
||||||
|
Thanks for contributing a merge request! Please name and fully describe your MR as you would for a commit message.
|
||||||
|
If the MR fixes an issue, please include "Fixes #issue" in the commit message and the MR description.
|
||||||
|
|
||||||
|
In addition, we recommend that first-time contributors read our [contribution guidelines](https://eigen.tuxfamily.org/index.php?title=Contributing_to_Eigen) and [git page](https://eigen.tuxfamily.org/index.php?title=Git), which will help you submit a more standardized MR.
|
||||||
|
|
||||||
|
Before submitting the MR, you also need to complete the following checks:
|
||||||
|
- Make one PR per feature/bugfix (don't mix multiple changes into one PR). Avoid committing unrelated changes.
|
||||||
|
- Rebase before committing
|
||||||
|
- For code changes, run the test suite (at least the tests that are likely affected by the change).
|
||||||
|
See our [test guidelines](https://eigen.tuxfamily.org/index.php?title=Tests).
|
||||||
|
- If possible, add a test (both for bug-fixes as well as new features)
|
||||||
|
- Make sure new features are documented
|
||||||
|
|
||||||
|
Note that we are a team of volunteers; we appreciate your patience during the review process.
|
||||||
|
|
||||||
|
Again, thanks for contributing! -->
|
||||||
|
|
||||||
|
### Reference issue
|
||||||
|
<!-- You can link to a specific issue using the gitlab syntax #<issue number> -->
|
||||||
|
|
||||||
|
### What does this implement/fix?
|
||||||
|
<!--Please explain your changes.-->
|
||||||
|
|
||||||
|
### Additional information
|
||||||
|
<!--Any additional information you think is important.-->
|
||||||
720
CMakeLists.txt
720
CMakeLists.txt
@@ -1,38 +1,101 @@
|
|||||||
|
cmake_minimum_required(VERSION 3.10.0)
|
||||||
|
|
||||||
|
#==============================================================================
|
||||||
|
# CMake Policy issues.
|
||||||
|
#==============================================================================
|
||||||
|
# Allow overriding options in a parent project via `set` before including Eigen.
|
||||||
|
if (POLICY CMP0077)
|
||||||
|
cmake_policy (SET CMP0077 NEW)
|
||||||
|
endif (POLICY CMP0077)
|
||||||
|
|
||||||
|
# NOTE Remove setting the policy once the minimum required CMake version is
|
||||||
|
# increased to at least 3.15. Retain enabling the export to package registry.
|
||||||
|
if (POLICY CMP0090)
|
||||||
|
# The export command does not populate package registry by default
|
||||||
|
cmake_policy (SET CMP0090 NEW)
|
||||||
|
# Unless otherwise specified, always export to package registry to ensure
|
||||||
|
# backwards compatibility.
|
||||||
|
if (NOT DEFINED CMAKE_EXPORT_PACKAGE_REGISTRY)
|
||||||
|
set (CMAKE_EXPORT_PACKAGE_REGISTRY ON)
|
||||||
|
endif (NOT DEFINED CMAKE_EXPORT_PACKAGE_REGISTRY)
|
||||||
|
endif (POLICY CMP0090)
|
||||||
|
|
||||||
|
# Disable warning about find_package(CUDA).
|
||||||
|
# CUDA language support is lacking for clang as the CUDA compiler
|
||||||
|
# until at least cmake version 3.18. Even then, there seems to be
|
||||||
|
# issues on Windows+Ninja in passing build flags. Continue using
|
||||||
|
# the "old" way for now.
|
||||||
|
if (POLICY CMP0146)
|
||||||
|
cmake_policy(SET CMP0146 OLD)
|
||||||
|
endif ()
|
||||||
|
|
||||||
|
#==============================================================================
|
||||||
|
# CMake Project.
|
||||||
|
#==============================================================================
|
||||||
|
|
||||||
project(Eigen3)
|
project(Eigen3)
|
||||||
|
|
||||||
cmake_minimum_required(VERSION 2.8.11)
|
# Remove this block after bumping CMake to v3.21.0
|
||||||
|
# PROJECT_IS_TOP_LEVEL is defined then by default
|
||||||
# guard against in-source builds
|
if(CMAKE_VERSION VERSION_LESS 3.21.0)
|
||||||
|
if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
|
||||||
if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR})
|
set(PROJECT_IS_TOP_LEVEL ON)
|
||||||
message(FATAL_ERROR "In-source builds not allowed. Please make a new directory (called a build directory) and run CMake from there. You may need to remove CMakeCache.txt. ")
|
else()
|
||||||
|
set(PROJECT_IS_TOP_LEVEL OFF)
|
||||||
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
#==============================================================================
|
||||||
|
# Build ON/OFF Settings.
|
||||||
|
#==============================================================================
|
||||||
|
# Determine if we should build tests.
|
||||||
|
include(CMakeDependentOption)
|
||||||
|
cmake_dependent_option(BUILD_TESTING "Enable creation of tests." ON "PROJECT_IS_TOP_LEVEL" OFF)
|
||||||
|
option(EIGEN_BUILD_TESTING "Enable creation of Eigen tests." ${BUILD_TESTING})
|
||||||
|
option(EIGEN_LEAVE_TEST_IN_ALL_TARGET "Leaves tests in the all target, needed by ctest for automatic building." OFF)
|
||||||
|
|
||||||
# Alias Eigen_*_DIR to Eigen3_*_DIR:
|
# Determine if we should build BLAS/LAPACK implementations.
|
||||||
|
option(EIGEN_BUILD_BLAS "Toggles the building of the Eigen Blas library" ${PROJECT_IS_TOP_LEVEL})
|
||||||
set(Eigen_SOURCE_DIR ${Eigen3_SOURCE_DIR})
|
option(EIGEN_BUILD_LAPACK "Toggles the building of the included Eigen LAPACK library" ${PROJECT_IS_TOP_LEVEL})
|
||||||
set(Eigen_BINARY_DIR ${Eigen3_BINARY_DIR})
|
if (EIGEN_BUILD_BLAS OR EIGEN_BUILD_LAPACK)
|
||||||
|
# BLAS and LAPACK currently need a fortran compiler.
|
||||||
# guard against bad build-type strings
|
include(CMakeDetermineFortranCompiler)
|
||||||
|
if (NOT CMAKE_Fortran_COMPILER)
|
||||||
if (NOT CMAKE_BUILD_TYPE)
|
set(EIGEN_BUILD_BLAS OFF)
|
||||||
set(CMAKE_BUILD_TYPE "Release")
|
set(EIGEN_BUILD_LAPACK OFF)
|
||||||
|
else()
|
||||||
|
# Determine if we should build shared libraries for BLAS/LAPACK on this platform.
|
||||||
|
get_cmake_property(EIGEN_BUILD_SHARED_LIBS TARGET_SUPPORTS_SHARED_LIBS)
|
||||||
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
string(TOLOWER "${CMAKE_BUILD_TYPE}" cmake_build_type_tolower)
|
option(EIGEN_BUILD_BTL "Build benchmark suite" OFF)
|
||||||
if( NOT cmake_build_type_tolower STREQUAL "debug"
|
option(EIGEN_BUILD_SPBENCH "Build sparse benchmark suite" OFF)
|
||||||
AND NOT cmake_build_type_tolower STREQUAL "release"
|
# Avoid building docs if included from another project.
|
||||||
AND NOT cmake_build_type_tolower STREQUAL "relwithdebinfo")
|
# Building documentation requires creating and running executables on the host
|
||||||
message(FATAL_ERROR "Unknown build type \"${CMAKE_BUILD_TYPE}\". Allowed values are Debug, Release, RelWithDebInfo (case-insensitive).")
|
# platform. We shouldn't do this if cross-compiling.
|
||||||
|
if (PROJECT_IS_TOP_LEVEL AND NOT CMAKE_CROSSCOMPILING)
|
||||||
|
set(EIGEN_BUILD_DOC_DEFAULT ON)
|
||||||
|
endif()
|
||||||
|
option(EIGEN_BUILD_DOC "Enable creation of Eigen documentation" ${EIGEN_BUILD_DOC_DEFAULT})
|
||||||
|
|
||||||
|
option(EIGEN_BUILD_DEMOS "Toggles the building of the Eigen demos" ${PROJECT_IS_TOP_LEVEL})
|
||||||
|
|
||||||
|
# Disable pkgconfig only for native Windows builds
|
||||||
|
if(NOT WIN32 OR NOT CMAKE_HOST_SYSTEM_NAME MATCHES Windows)
|
||||||
|
option(EIGEN_BUILD_PKGCONFIG "Build pkg-config .pc file for Eigen" ${PROJECT_IS_TOP_LEVEL})
|
||||||
|
endif()
|
||||||
|
option(EIGEN_BUILD_CMAKE_PACKAGE "Enables the creation of EigenConfig.cmake and related files" ${PROJECT_IS_TOP_LEVEL})
|
||||||
|
|
||||||
|
if (EIGEN_BUILD_TESTING OR EIGEN_BUILD_BLAS OR EIGEN_BUILD_LAPACK OR EIGEN_BUILT_BTL OR EIGEN_BUILD_BTL OR EIGEN_BUILD_SPBENCH OR EIGEN_BUILD_DOC OR EIGEN_BUILD_DEMOS)
|
||||||
|
set(EIGEN_IS_BUILDING_ ON)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
#==============================================================================
|
||||||
|
# Version Info.
|
||||||
|
#==============================================================================
|
||||||
|
|
||||||
#############################################################################
|
# Automatically parse the version number from header files.
|
||||||
# retrieve version information #
|
|
||||||
#############################################################################
|
|
||||||
|
|
||||||
# automatically parse the version number
|
|
||||||
file(READ "${PROJECT_SOURCE_DIR}/Eigen/src/Core/util/Macros.h" _eigen_version_header)
|
file(READ "${PROJECT_SOURCE_DIR}/Eigen/src/Core/util/Macros.h" _eigen_version_header)
|
||||||
string(REGEX MATCH "define[ \t]+EIGEN_WORLD_VERSION[ \t]+([0-9]+)" _eigen_world_version_match "${_eigen_version_header}")
|
string(REGEX MATCH "define[ \t]+EIGEN_WORLD_VERSION[ \t]+([0-9]+)" _eigen_world_version_match "${_eigen_version_header}")
|
||||||
set(EIGEN_WORLD_VERSION "${CMAKE_MATCH_1}")
|
set(EIGEN_WORLD_VERSION "${CMAKE_MATCH_1}")
|
||||||
@@ -42,36 +105,218 @@ string(REGEX MATCH "define[ \t]+EIGEN_MINOR_VERSION[ \t]+([0-9]+)" _eigen_minor_
|
|||||||
set(EIGEN_MINOR_VERSION "${CMAKE_MATCH_1}")
|
set(EIGEN_MINOR_VERSION "${CMAKE_MATCH_1}")
|
||||||
set(EIGEN_VERSION_NUMBER ${EIGEN_WORLD_VERSION}.${EIGEN_MAJOR_VERSION}.${EIGEN_MINOR_VERSION})
|
set(EIGEN_VERSION_NUMBER ${EIGEN_WORLD_VERSION}.${EIGEN_MAJOR_VERSION}.${EIGEN_MINOR_VERSION})
|
||||||
|
|
||||||
# if we are not in a mercurial clone
|
# If we are in a git repo, extract a changeset.
|
||||||
if(IS_DIRECTORY ${CMAKE_SOURCE_DIR}/.hg)
|
if(IS_DIRECTORY ${CMAKE_SOURCE_DIR}/.git)
|
||||||
# if the mercurial program is absent or this will leave the EIGEN_HG_CHANGESET string empty,
|
# if the git program is absent or this will leave the EIGEN_GIT_REVNUM string empty,
|
||||||
# but won't stop CMake.
|
# but won't stop CMake.
|
||||||
execute_process(COMMAND hg tip -R ${CMAKE_SOURCE_DIR} OUTPUT_VARIABLE EIGEN_HGTIP_OUTPUT)
|
execute_process(COMMAND git ls-remote -q ${CMAKE_SOURCE_DIR} HEAD OUTPUT_VARIABLE EIGEN_GIT_OUTPUT)
|
||||||
execute_process(COMMAND hg branch -R ${CMAKE_SOURCE_DIR} OUTPUT_VARIABLE EIGEN_BRANCH_OUTPUT)
|
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# if this is the default (aka development) branch, extract the mercurial changeset number from the hg tip output...
|
# extract the git rev number from the git output...
|
||||||
if(EIGEN_BRANCH_OUTPUT MATCHES "default")
|
if(EIGEN_GIT_OUTPUT)
|
||||||
string(REGEX MATCH "^changeset: *[0-9]*:([0-9;a-f]+).*" EIGEN_HG_CHANGESET_MATCH "${EIGEN_HGTIP_OUTPUT}")
|
string(REGEX MATCH "^([0-9;a-f]+).*" EIGEN_GIT_CHANGESET_MATCH "${EIGEN_GIT_OUTPUT}")
|
||||||
set(EIGEN_HG_CHANGESET "${CMAKE_MATCH_1}")
|
set(EIGEN_GIT_REVNUM "${CMAKE_MATCH_1}")
|
||||||
endif()
|
endif()
|
||||||
#...and show it next to the version number
|
#...and show it next to the version number
|
||||||
if(EIGEN_HG_CHANGESET)
|
if(EIGEN_GIT_REVNUM)
|
||||||
set(EIGEN_VERSION "${EIGEN_VERSION_NUMBER} (mercurial changeset ${EIGEN_HG_CHANGESET})")
|
set(EIGEN_VERSION "${EIGEN_VERSION_NUMBER} (git rev ${EIGEN_GIT_REVNUM})")
|
||||||
else()
|
else()
|
||||||
set(EIGEN_VERSION "${EIGEN_VERSION_NUMBER}")
|
set(EIGEN_VERSION "${EIGEN_VERSION_NUMBER}")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
#==============================================================================
|
||||||
|
# Install Path Configuration.
|
||||||
|
#==============================================================================
|
||||||
|
|
||||||
|
# Unconditionally allow install of targets to support nested dependency
|
||||||
|
# installations.
|
||||||
|
#
|
||||||
|
# Note: projects that depend on Eigen should _probably_ exclude installing
|
||||||
|
# Eigen by default (e.g. by using EXCLUDE_FROM_ALL when using
|
||||||
|
# FetchContent_Declare or add_subdirectory) to avoid overwriting a previous
|
||||||
|
# installation.
|
||||||
|
|
||||||
include(CheckCXXCompilerFlag)
|
|
||||||
include(GNUInstallDirs)
|
include(GNUInstallDirs)
|
||||||
|
# Backward compatibility support for EIGEN_INCLUDE_INSTALL_DIR
|
||||||
|
if(EIGEN_INCLUDE_INSTALL_DIR)
|
||||||
|
message(WARNING "EIGEN_INCLUDE_INSTALL_DIR is deprecated. Use INCLUDE_INSTALL_DIR instead.")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(EIGEN_INCLUDE_INSTALL_DIR AND NOT INCLUDE_INSTALL_DIR)
|
||||||
|
set(INCLUDE_INSTALL_DIR ${EIGEN_INCLUDE_INSTALL_DIR}
|
||||||
|
CACHE PATH "The directory relative to CMAKE_INSTALL_PREFIX where Eigen header files are installed")
|
||||||
|
else()
|
||||||
|
set(INCLUDE_INSTALL_DIR
|
||||||
|
"${CMAKE_INSTALL_INCLUDEDIR}/eigen3"
|
||||||
|
CACHE PATH "The directory relative to CMAKE_INSTALL_PREFIX where Eigen header files are installed"
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
set(CMAKEPACKAGE_INSTALL_DIR
|
||||||
|
"${CMAKE_INSTALL_DATADIR}/eigen3/cmake"
|
||||||
|
CACHE PATH "The directory relative to CMAKE_INSTALL_PREFIX where Eigen3Config.cmake is installed"
|
||||||
|
)
|
||||||
|
set(PKGCONFIG_INSTALL_DIR
|
||||||
|
"${CMAKE_INSTALL_DATADIR}/pkgconfig"
|
||||||
|
CACHE PATH "The directory relative to CMAKE_INSTALL_PREFIX where eigen3.pc is installed"
|
||||||
|
)
|
||||||
|
|
||||||
|
foreach(var INCLUDE_INSTALL_DIR CMAKEPACKAGE_INSTALL_DIR PKGCONFIG_INSTALL_DIR)
|
||||||
|
# If an absolute path is specified, make it relative to "{CMAKE_INSTALL_PREFIX}".
|
||||||
|
if(IS_ABSOLUTE "${${var}}")
|
||||||
|
file(RELATIVE_PATH "${var}" "${CMAKE_INSTALL_PREFIX}" "${${var}}")
|
||||||
|
endif()
|
||||||
|
endforeach()
|
||||||
|
|
||||||
|
#==============================================================================
|
||||||
|
# Eigen Library.
|
||||||
|
#==============================================================================
|
||||||
|
|
||||||
|
set ( EIGEN_VERSION_STRING ${EIGEN_VERSION_NUMBER} )
|
||||||
|
set ( EIGEN_VERSION_MAJOR ${EIGEN_WORLD_VERSION} )
|
||||||
|
set ( EIGEN_VERSION_MINOR ${EIGEN_MAJOR_VERSION} )
|
||||||
|
set ( EIGEN_VERSION_PATCH ${EIGEN_MINOR_VERSION} )
|
||||||
|
set ( EIGEN_DEFINITIONS "")
|
||||||
|
set ( EIGEN_INCLUDE_DIR "${CMAKE_INSTALL_PREFIX}/${INCLUDE_INSTALL_DIR}" )
|
||||||
|
set ( EIGEN_ROOT_DIR ${CMAKE_INSTALL_PREFIX} )
|
||||||
|
|
||||||
|
# Alias Eigen_*_DIR to Eigen3_*_DIR:
|
||||||
|
set(Eigen_SOURCE_DIR ${Eigen3_SOURCE_DIR})
|
||||||
|
set(Eigen_BINARY_DIR ${Eigen3_BINARY_DIR})
|
||||||
|
|
||||||
|
# Imported target support
|
||||||
|
add_library (eigen INTERFACE)
|
||||||
|
add_library (Eigen3::Eigen ALIAS eigen)
|
||||||
|
target_include_directories (eigen INTERFACE
|
||||||
|
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
|
||||||
|
$<INSTALL_INTERFACE:${INCLUDE_INSTALL_DIR}>
|
||||||
|
)
|
||||||
|
|
||||||
|
# Export as title case Eigen
|
||||||
|
set_target_properties (eigen PROPERTIES EXPORT_NAME Eigen)
|
||||||
|
|
||||||
|
#==============================================================================
|
||||||
|
# Install Rule Configuration.
|
||||||
|
#==============================================================================
|
||||||
|
|
||||||
|
install(FILES
|
||||||
|
signature_of_eigen3_matrix_library
|
||||||
|
DESTINATION ${INCLUDE_INSTALL_DIR} COMPONENT Devel
|
||||||
|
)
|
||||||
|
|
||||||
|
if(EIGEN_BUILD_PKGCONFIG)
|
||||||
|
configure_file(eigen3.pc.in eigen3.pc @ONLY)
|
||||||
|
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/eigen3.pc
|
||||||
|
DESTINATION ${PKGCONFIG_INSTALL_DIR})
|
||||||
|
endif()
|
||||||
|
|
||||||
|
install(DIRECTORY Eigen DESTINATION ${INCLUDE_INSTALL_DIR} COMPONENT Devel)
|
||||||
|
|
||||||
|
install(TARGETS eigen EXPORT Eigen3Targets)
|
||||||
|
|
||||||
|
if(EIGEN_BUILD_CMAKE_PACKAGE)
|
||||||
|
include (CMakePackageConfigHelpers)
|
||||||
|
configure_package_config_file (
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/cmake/Eigen3Config.cmake.in
|
||||||
|
${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
|
||||||
|
PATH_VARS EIGEN_INCLUDE_DIR EIGEN_ROOT_DIR
|
||||||
|
INSTALL_DESTINATION ${CMAKEPACKAGE_INSTALL_DIR}
|
||||||
|
NO_SET_AND_CHECK_MACRO # Eigen does not provide legacy style defines
|
||||||
|
NO_CHECK_REQUIRED_COMPONENTS_MACRO # Eigen does not provide components
|
||||||
|
)
|
||||||
|
|
||||||
|
set(CVF_VERSION "${EIGEN_VERSION_NUMBER}")
|
||||||
|
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/cmake/Eigen3ConfigVersion.cmake.in"
|
||||||
|
"Eigen3ConfigVersion.cmake"
|
||||||
|
@ONLY)
|
||||||
|
|
||||||
|
# The Eigen target will be located in the Eigen3 namespace. Other CMake
|
||||||
|
# targets can refer to it using Eigen3::Eigen.
|
||||||
|
export (TARGETS eigen NAMESPACE Eigen3:: FILE Eigen3Targets.cmake)
|
||||||
|
# Export Eigen3 package to CMake registry such that it can be easily found by
|
||||||
|
# CMake even if it has not been installed to a standard directory.
|
||||||
|
export (PACKAGE Eigen3)
|
||||||
|
|
||||||
|
install (EXPORT Eigen3Targets NAMESPACE Eigen3:: DESTINATION ${CMAKEPACKAGE_INSTALL_DIR})
|
||||||
|
|
||||||
|
install (FILES ${CMAKE_CURRENT_SOURCE_DIR}/cmake/UseEigen3.cmake
|
||||||
|
${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
|
||||||
|
${CMAKE_CURRENT_BINARY_DIR}/Eigen3ConfigVersion.cmake
|
||||||
|
DESTINATION ${CMAKEPACKAGE_INSTALL_DIR})
|
||||||
|
|
||||||
|
# Add uninstall target
|
||||||
|
if(NOT TARGET uninstall)
|
||||||
|
add_custom_target ( uninstall
|
||||||
|
COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/EigenUninstall.cmake)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
#==============================================================================
|
||||||
|
# General Build Configuration.
|
||||||
|
#==============================================================================
|
||||||
|
|
||||||
|
# Guard against in-source builds
|
||||||
|
if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR})
|
||||||
|
message(FATAL_ERROR "In-source builds not allowed. Please make a new directory (called a build directory) and run CMake from there. You may need to remove CMakeCache.txt. ")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# Guard against bad build-type strings
|
||||||
|
if (PROJECT_IS_TOP_LEVEL AND NOT CMAKE_BUILD_TYPE)
|
||||||
|
set(CMAKE_BUILD_TYPE "Release")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# Only try to figure out how to link the math library if we are building something.
|
||||||
|
# Otherwise, let the parent project deal with dependencies.
|
||||||
|
if (EIGEN_IS_BUILDING_)
|
||||||
|
# Use Eigen's cmake files.
|
||||||
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
|
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
|
||||||
|
|
||||||
|
set(CMAKE_INCLUDE_CURRENT_DIR OFF)
|
||||||
|
|
||||||
option(EIGEN_TEST_CXX11 "Enable testing with C++11 and C++11 features (e.g. Tensor module)." OFF)
|
find_package(StandardMathLibrary)
|
||||||
|
set(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO "")
|
||||||
|
if(NOT STANDARD_MATH_LIBRARY_FOUND)
|
||||||
|
message(FATAL_ERROR
|
||||||
|
"Can't link to the standard math library. Please report to the Eigen developers, telling them about your platform.")
|
||||||
|
else()
|
||||||
|
if(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO)
|
||||||
|
set(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO "${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO} ${STANDARD_MATH_LIBRARY}")
|
||||||
|
else()
|
||||||
|
set(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO "${STANDARD_MATH_LIBRARY}")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
if(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO)
|
||||||
|
message(STATUS "Standard libraries to link to explicitly: ${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO}")
|
||||||
|
else()
|
||||||
|
message(STATUS "Standard libraries to link to explicitly: none")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# Default tests/examples/libraries to row-major.
|
||||||
|
option(EIGEN_DEFAULT_TO_ROW_MAJOR "Use row-major as default matrix storage order" OFF)
|
||||||
|
if(EIGEN_DEFAULT_TO_ROW_MAJOR)
|
||||||
|
add_definitions("-DEIGEN_DEFAULT_TO_ROW_MAJOR")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
#==============================================================================
|
||||||
|
# Test Configuration.
|
||||||
|
#==============================================================================
|
||||||
|
|
||||||
|
if (EIGEN_BUILD_TESTING)
|
||||||
|
function(ei_maybe_separate_arguments variable mode args)
|
||||||
|
# Use separate_arguments if the input is a single string containing a space.
|
||||||
|
# Otherwise, if it is already a list or doesn't have a space, just propagate
|
||||||
|
# the original value. This is to better support multi-argument lists.
|
||||||
|
list(LENGTH args list_length)
|
||||||
|
if (${list_length} EQUAL 1)
|
||||||
|
string(FIND "${args}" " " has_space)
|
||||||
|
if (${has_space} GREATER -1)
|
||||||
|
separate_arguments(args ${mode} "${args}")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
set(${variable} ${args} PARENT_SCOPE)
|
||||||
|
endfunction(ei_maybe_separate_arguments)
|
||||||
|
|
||||||
|
include(CheckCXXCompilerFlag)
|
||||||
macro(ei_add_cxx_compiler_flag FLAG)
|
macro(ei_add_cxx_compiler_flag FLAG)
|
||||||
string(REGEX REPLACE "-" "" SFLAG1 ${FLAG})
|
string(REGEX REPLACE "-" "" SFLAG1 ${FLAG})
|
||||||
string(REGEX REPLACE "\\+" "p" SFLAG ${SFLAG1})
|
string(REGEX REPLACE "\\+" "p" SFLAG ${SFLAG1})
|
||||||
@@ -83,6 +328,7 @@ endmacro()
|
|||||||
|
|
||||||
check_cxx_compiler_flag("-std=c++11" EIGEN_COMPILER_SUPPORT_CPP11)
|
check_cxx_compiler_flag("-std=c++11" EIGEN_COMPILER_SUPPORT_CPP11)
|
||||||
|
|
||||||
|
option(EIGEN_TEST_CXX11 "Enable testing with C++11 and C++11 features (e.g. Tensor module)." ${EIGEN_COMPILER_SUPPORT_CPP11})
|
||||||
if(EIGEN_TEST_CXX11)
|
if(EIGEN_TEST_CXX11)
|
||||||
set(CMAKE_CXX_STANDARD 11)
|
set(CMAKE_CXX_STANDARD 11)
|
||||||
set(CMAKE_CXX_EXTENSIONS OFF)
|
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||||
@@ -90,62 +336,19 @@ if(EIGEN_TEST_CXX11)
|
|||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
|
||||||
endif()
|
endif()
|
||||||
else()
|
else()
|
||||||
#set(CMAKE_CXX_STANDARD 03)
|
|
||||||
#set(CMAKE_CXX_EXTENSIONS OFF)
|
|
||||||
ei_add_cxx_compiler_flag("-std=c++03")
|
ei_add_cxx_compiler_flag("-std=c++03")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
#############################################################################
|
|
||||||
# find how to link to the standard libraries #
|
|
||||||
#############################################################################
|
|
||||||
|
|
||||||
find_package(StandardMathLibrary)
|
|
||||||
|
|
||||||
|
|
||||||
set(EIGEN_TEST_CUSTOM_LINKER_FLAGS "" CACHE STRING "Additional linker flags when linking unit tests.")
|
set(EIGEN_TEST_CUSTOM_LINKER_FLAGS "" CACHE STRING "Additional linker flags when linking unit tests.")
|
||||||
set(EIGEN_TEST_CUSTOM_CXX_FLAGS "" CACHE STRING "Additional compiler flags when compiling unit tests.")
|
set(EIGEN_TEST_CUSTOM_CXX_FLAGS "" CACHE STRING "Additional compiler flags when compiling unit tests.")
|
||||||
|
# Convert space-separated arguments into CMake lists for downstream consumption.
|
||||||
set(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO "")
|
ei_maybe_separate_arguments(EIGEN_TEST_CUSTOM_LINKER_FLAGS NATIVE_COMMAND "${EIGEN_TEST_CUSTOM_LINKER_FLAGS}")
|
||||||
|
ei_maybe_separate_arguments(EIGEN_TEST_CUSTOM_CXX_FLAGS NATIVE_COMMAND "${EIGEN_TEST_CUSTOM_CXX_FLAGS}")
|
||||||
if(NOT STANDARD_MATH_LIBRARY_FOUND)
|
|
||||||
|
|
||||||
message(FATAL_ERROR
|
|
||||||
"Can't link to the standard math library. Please report to the Eigen developers, telling them about your platform.")
|
|
||||||
|
|
||||||
else()
|
|
||||||
|
|
||||||
if(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO)
|
|
||||||
set(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO "${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO} ${STANDARD_MATH_LIBRARY}")
|
|
||||||
else()
|
|
||||||
set(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO "${STANDARD_MATH_LIBRARY}")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO)
|
|
||||||
message(STATUS "Standard libraries to link to explicitly: ${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO}")
|
|
||||||
else()
|
|
||||||
message(STATUS "Standard libraries to link to explicitly: none")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
option(EIGEN_BUILD_BTL "Build benchmark suite" OFF)
|
|
||||||
|
|
||||||
# Disable pkgconfig only for native Windows builds
|
|
||||||
if(NOT WIN32 OR NOT CMAKE_HOST_SYSTEM_NAME MATCHES Windows)
|
|
||||||
option(EIGEN_BUILD_PKGCONFIG "Build pkg-config .pc file for Eigen" ON)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
set(CMAKE_INCLUDE_CURRENT_DIR OFF)
|
|
||||||
|
|
||||||
option(EIGEN_SPLIT_LARGE_TESTS "Split large tests into smaller executables" ON)
|
option(EIGEN_SPLIT_LARGE_TESTS "Split large tests into smaller executables" ON)
|
||||||
|
|
||||||
option(EIGEN_DEFAULT_TO_ROW_MAJOR "Use row-major as default matrix storage order" OFF)
|
|
||||||
if(EIGEN_DEFAULT_TO_ROW_MAJOR)
|
|
||||||
add_definitions("-DEIGEN_DEFAULT_TO_ROW_MAJOR")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
set(EIGEN_TEST_MAX_SIZE "320" CACHE STRING "Maximal matrix/vector size, default is 320")
|
set(EIGEN_TEST_MAX_SIZE "320" CACHE STRING "Maximal matrix/vector size, default is 320")
|
||||||
|
|
||||||
|
# Flags for tests.
|
||||||
if(NOT MSVC)
|
if(NOT MSVC)
|
||||||
# We assume that other compilers are partly compatible with GNUCC
|
# We assume that other compilers are partly compatible with GNUCC
|
||||||
|
|
||||||
@@ -159,7 +362,6 @@ if(NOT MSVC)
|
|||||||
ei_add_cxx_compiler_flag("-Wall")
|
ei_add_cxx_compiler_flag("-Wall")
|
||||||
ei_add_cxx_compiler_flag("-Wextra")
|
ei_add_cxx_compiler_flag("-Wextra")
|
||||||
# ei_add_cxx_compiler_flag("-Weverything") # clang
|
# ei_add_cxx_compiler_flag("-Weverything") # clang
|
||||||
|
|
||||||
ei_add_cxx_compiler_flag("-Wundef")
|
ei_add_cxx_compiler_flag("-Wundef")
|
||||||
ei_add_cxx_compiler_flag("-Wcast-align")
|
ei_add_cxx_compiler_flag("-Wcast-align")
|
||||||
ei_add_cxx_compiler_flag("-Wchar-subscripts")
|
ei_add_cxx_compiler_flag("-Wchar-subscripts")
|
||||||
@@ -174,31 +376,16 @@ if(NOT MSVC)
|
|||||||
ei_add_cxx_compiler_flag("-Wc++11-extensions")
|
ei_add_cxx_compiler_flag("-Wc++11-extensions")
|
||||||
ei_add_cxx_compiler_flag("-Wdouble-promotion")
|
ei_add_cxx_compiler_flag("-Wdouble-promotion")
|
||||||
# ei_add_cxx_compiler_flag("-Wconversion")
|
# ei_add_cxx_compiler_flag("-Wconversion")
|
||||||
|
|
||||||
ei_add_cxx_compiler_flag("-Wshadow")
|
ei_add_cxx_compiler_flag("-Wshadow")
|
||||||
|
|
||||||
ei_add_cxx_compiler_flag("-Wno-psabi")
|
ei_add_cxx_compiler_flag("-Wno-psabi")
|
||||||
ei_add_cxx_compiler_flag("-Wno-variadic-macros")
|
ei_add_cxx_compiler_flag("-Wno-variadic-macros")
|
||||||
ei_add_cxx_compiler_flag("-Wno-long-long")
|
ei_add_cxx_compiler_flag("-Wno-long-long")
|
||||||
|
|
||||||
ei_add_cxx_compiler_flag("-fno-check-new")
|
ei_add_cxx_compiler_flag("-fno-check-new")
|
||||||
ei_add_cxx_compiler_flag("-fno-common")
|
ei_add_cxx_compiler_flag("-fno-common")
|
||||||
ei_add_cxx_compiler_flag("-fstrict-aliasing")
|
ei_add_cxx_compiler_flag("-fstrict-aliasing")
|
||||||
ei_add_cxx_compiler_flag("-wd981") # disable ICC's "operands are evaluated in unspecified order" remark
|
ei_add_cxx_compiler_flag("-wd981") # disable ICC's "operands are evaluated in unspecified order" remark
|
||||||
ei_add_cxx_compiler_flag("-wd2304") # disable ICC's "warning #2304: non-explicit constructor with single argument may cause implicit type conversion" produced by -Wnon-virtual-dtor
|
ei_add_cxx_compiler_flag("-wd2304") # disable ICC's "warning #2304: non-explicit constructor with single argument may cause implicit type conversion" produced by -Wnon-virtual-dtor
|
||||||
|
|
||||||
|
|
||||||
# The -ansi flag must be added last, otherwise it is also used as a linker flag by check_cxx_compiler_flag making it fails
|
|
||||||
# Moreover we should not set both -strict-ansi and -ansi
|
|
||||||
check_cxx_compiler_flag("-strict-ansi" COMPILER_SUPPORT_STRICTANSI)
|
|
||||||
ei_add_cxx_compiler_flag("-Qunused-arguments") # disable clang warning: argument unused during compilation: '-ansi'
|
|
||||||
|
|
||||||
if(COMPILER_SUPPORT_STRICTANSI)
|
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -strict-ansi")
|
|
||||||
else()
|
|
||||||
ei_add_cxx_compiler_flag("-ansi")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if(ANDROID_NDK)
|
if(ANDROID_NDK)
|
||||||
ei_add_cxx_compiler_flag("-pie")
|
ei_add_cxx_compiler_flag("-pie")
|
||||||
ei_add_cxx_compiler_flag("-fPIE")
|
ei_add_cxx_compiler_flag("-fPIE")
|
||||||
@@ -248,15 +435,30 @@ if(NOT MSVC)
|
|||||||
message(STATUS "Enabling FMA in tests/examples")
|
message(STATUS "Enabling FMA in tests/examples")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
option(EIGEN_TEST_AVX2 "Enable/Disable AVX2 in tests/examples" OFF)
|
||||||
|
if(EIGEN_TEST_AVX2)
|
||||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx2 -mfma")
|
||||||
|
message(STATUS "Enabling AVX2 in tests/examples")
|
||||||
|
endif()
|
||||||
|
|
||||||
option(EIGEN_TEST_AVX512 "Enable/Disable AVX512 in tests/examples" OFF)
|
option(EIGEN_TEST_AVX512 "Enable/Disable AVX512 in tests/examples" OFF)
|
||||||
if(EIGEN_TEST_AVX512)
|
if(EIGEN_TEST_AVX512)
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -mfma -DEIGEN_ENABLE_AVX512")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -mfma")
|
||||||
if (NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
|
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fabi-version=6")
|
|
||||||
endif()
|
|
||||||
message(STATUS "Enabling AVX512 in tests/examples")
|
message(STATUS "Enabling AVX512 in tests/examples")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
option(EIGEN_TEST_AVX512DQ "Enable/Disable AVX512DQ in tests/examples" OFF)
|
||||||
|
if(EIGEN_TEST_AVX512DQ)
|
||||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512dq -mfma")
|
||||||
|
message(STATUS "Enabling AVX512DQ in tests/examples")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
option(EIGEN_TEST_AVX512FP16 "Enable/Disable AVX512-FP16 in tests/examples" OFF)
|
||||||
|
if(EIGEN_TEST_AVX512FP16)
|
||||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -mfma -mavx512vl -mavx512fp16")
|
||||||
|
message(STATUS "Enabling AVX512-FP16 in tests/examples")
|
||||||
|
endif()
|
||||||
|
|
||||||
option(EIGEN_TEST_F16C "Enable/Disable F16C in tests/examples" OFF)
|
option(EIGEN_TEST_F16C "Enable/Disable F16C in tests/examples" OFF)
|
||||||
if(EIGEN_TEST_F16C)
|
if(EIGEN_TEST_F16C)
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mf16c")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mf16c")
|
||||||
@@ -320,7 +522,6 @@ if(NOT MSVC)
|
|||||||
endif()
|
endif()
|
||||||
|
|
||||||
else()
|
else()
|
||||||
|
|
||||||
# C4127 - conditional expression is constant
|
# C4127 - conditional expression is constant
|
||||||
# C4714 - marked as __forceinline not inlined (I failed to deactivate it selectively)
|
# C4714 - marked as __forceinline not inlined (I failed to deactivate it selectively)
|
||||||
# We can disable this warning in the unit tests since it is clear that it occurs
|
# We can disable this warning in the unit tests since it is clear that it occurs
|
||||||
@@ -358,13 +559,21 @@ else()
|
|||||||
endif()
|
endif()
|
||||||
|
|
||||||
option(EIGEN_TEST_FMA "Enable/Disable FMA/AVX2 in tests/examples" OFF)
|
option(EIGEN_TEST_FMA "Enable/Disable FMA/AVX2 in tests/examples" OFF)
|
||||||
if(EIGEN_TEST_FMA AND NOT EIGEN_TEST_NEON)
|
option(EIGEN_TEST_AVX2 "Enable/Disable FMA/AVX2 in tests/examples" OFF)
|
||||||
|
if((EIGEN_TEST_FMA AND NOT EIGEN_TEST_NEON) OR EIGEN_TEST_AVX2)
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2")
|
||||||
message(STATUS "Enabling FMA/AVX2 in tests/examples")
|
message(STATUS "Enabling FMA/AVX2 in tests/examples")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
option(EIGEN_TEST_AVX512 "Enable/Disable AVX512 in tests/examples" OFF)
|
||||||
|
option(EIGEN_TEST_AVX512DQ "Enable/Disable AVX512DQ in tests/examples" OFF)
|
||||||
|
if(EIGEN_TEST_AVX512 OR EIGEN_TEST_AVX512DQ)
|
||||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX512")
|
||||||
|
message(STATUS "Enabling AVX512 in tests/examples")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
endif(NOT MSVC)
|
||||||
|
|
||||||
option(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION "Disable explicit vectorization in tests/examples" OFF)
|
option(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION "Disable explicit vectorization in tests/examples" OFF)
|
||||||
option(EIGEN_TEST_X87 "Force using X87 instructions. Implies no vectorization." OFF)
|
option(EIGEN_TEST_X87 "Force using X87 instructions. Implies no vectorization." OFF)
|
||||||
option(EIGEN_TEST_32BIT "Force generating 32bit code." OFF)
|
option(EIGEN_TEST_32BIT "Force generating 32bit code." OFF)
|
||||||
@@ -405,91 +614,38 @@ if(EIGEN_TEST_NO_EXCEPTIONS)
|
|||||||
message(STATUS "Disabling exceptions in tests/examples")
|
message(STATUS "Disabling exceptions in tests/examples")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
set(EIGEN_CUDA_COMPUTE_ARCH 30 CACHE STRING "The CUDA compute architecture level to target when compiling CUDA code")
|
set(EIGEN_CUDA_CXX_FLAGS "" CACHE STRING "Additional flags to pass to the cuda compiler.")
|
||||||
|
set(EIGEN_CUDA_COMPUTE_ARCH 30 CACHE STRING "The CUDA compute architecture(s) to target when compiling CUDA code")
|
||||||
|
|
||||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
|
||||||
|
|
||||||
# Backward compatibility support for EIGEN_INCLUDE_INSTALL_DIR
|
|
||||||
if(EIGEN_INCLUDE_INSTALL_DIR)
|
|
||||||
message(WARNING "EIGEN_INCLUDE_INSTALL_DIR is deprecated. Use INCLUDE_INSTALL_DIR instead.")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if(EIGEN_INCLUDE_INSTALL_DIR AND NOT INCLUDE_INSTALL_DIR)
|
|
||||||
set(INCLUDE_INSTALL_DIR ${EIGEN_INCLUDE_INSTALL_DIR}
|
|
||||||
CACHE PATH "The directory relative to CMAKE_PREFIX_PATH where Eigen header files are installed")
|
|
||||||
else()
|
|
||||||
set(INCLUDE_INSTALL_DIR
|
|
||||||
"${CMAKE_INSTALL_INCLUDEDIR}/eigen3"
|
|
||||||
CACHE PATH "The directory relative to CMAKE_PREFIX_PATH where Eigen header files are installed"
|
|
||||||
)
|
|
||||||
endif()
|
|
||||||
set(CMAKEPACKAGE_INSTALL_DIR
|
|
||||||
"${CMAKE_INSTALL_DATADIR}/eigen3/cmake"
|
|
||||||
CACHE PATH "The directory relative to CMAKE_PREFIX_PATH where Eigen3Config.cmake is installed"
|
|
||||||
)
|
|
||||||
set(PKGCONFIG_INSTALL_DIR
|
|
||||||
"${CMAKE_INSTALL_DATADIR}/pkgconfig"
|
|
||||||
CACHE PATH "The directory relative to CMAKE_PREFIX_PATH where eigen3.pc is installed"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# similar to set_target_properties but append the property instead of overwriting it
|
|
||||||
macro(ei_add_target_property target prop value)
|
|
||||||
|
|
||||||
get_target_property(previous ${target} ${prop})
|
|
||||||
# if the property wasn't previously set, ${previous} is now "previous-NOTFOUND" which cmake allows catching with plain if()
|
|
||||||
if(NOT previous)
|
|
||||||
set(previous "")
|
|
||||||
endif()
|
|
||||||
set_target_properties(${target} PROPERTIES ${prop} "${previous} ${value}")
|
|
||||||
endmacro()
|
|
||||||
|
|
||||||
install(FILES
|
|
||||||
signature_of_eigen3_matrix_library
|
|
||||||
DESTINATION ${INCLUDE_INSTALL_DIR} COMPONENT Devel
|
|
||||||
)
|
|
||||||
|
|
||||||
if(EIGEN_BUILD_PKGCONFIG)
|
|
||||||
configure_file(eigen3.pc.in eigen3.pc @ONLY)
|
|
||||||
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/eigen3.pc
|
|
||||||
DESTINATION ${PKGCONFIG_INSTALL_DIR}
|
|
||||||
)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
install(DIRECTORY Eigen DESTINATION ${INCLUDE_INSTALL_DIR} COMPONENT Devel)
|
|
||||||
|
|
||||||
add_subdirectory(doc EXCLUDE_FROM_ALL)
|
|
||||||
|
|
||||||
option(BUILD_TESTING "Enable creation of Eigen tests." ON)
|
|
||||||
if(BUILD_TESTING)
|
|
||||||
include(EigenConfigureTesting)
|
|
||||||
|
|
||||||
if(EIGEN_LEAVE_TEST_IN_ALL_TARGET)
|
|
||||||
add_subdirectory(test) # can't do EXCLUDE_FROM_ALL here, breaks CTest
|
|
||||||
else()
|
|
||||||
add_subdirectory(test EXCLUDE_FROM_ALL)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
add_subdirectory(failtest)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if(EIGEN_LEAVE_TEST_IN_ALL_TARGET)
|
|
||||||
add_subdirectory(blas)
|
|
||||||
add_subdirectory(lapack)
|
|
||||||
else()
|
|
||||||
add_subdirectory(blas EXCLUDE_FROM_ALL)
|
|
||||||
add_subdirectory(lapack EXCLUDE_FROM_ALL)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# add SYCL
|
|
||||||
option(EIGEN_TEST_SYCL "Add Sycl support." OFF)
|
option(EIGEN_TEST_SYCL "Add Sycl support." OFF)
|
||||||
option(EIGEN_SYCL_TRISYCL "Use the triSYCL Sycl implementation (ComputeCPP by default)." OFF)
|
|
||||||
if(EIGEN_TEST_SYCL)
|
if(EIGEN_TEST_SYCL)
|
||||||
|
option(EIGEN_SYCL_DPCPP "Use the DPCPP Sycl implementation (DPCPP is default SYCL-Compiler)." ON)
|
||||||
|
option(EIGEN_SYCL_TRISYCL "Use the triSYCL Sycl implementation." OFF)
|
||||||
|
option(EIGEN_SYCL_ComputeCpp "Use the ComputeCPP Sycl implementation." OFF)
|
||||||
|
|
||||||
|
# Building options
|
||||||
|
# https://developer.codeplay.com/products/computecpp/ce/2.11.0/guides/eigen-overview/options-for-building-eigen-sycl
|
||||||
|
option(EIGEN_SYCL_USE_DEFAULT_SELECTOR "Use sycl default selector to select the preferred device." OFF)
|
||||||
|
option(EIGEN_SYCL_NO_LOCAL_MEM "Build for devices without dedicated shared memory." OFF)
|
||||||
|
option(EIGEN_SYCL_LOCAL_MEM "Allow the use of local memory (enabled by default)." ON)
|
||||||
|
option(EIGEN_SYCL_LOCAL_THREAD_DIM0 "Set work group size for dimension 0." 16)
|
||||||
|
option(EIGEN_SYCL_LOCAL_THREAD_DIM1 "Set work group size for dimension 1." 16)
|
||||||
|
option(EIGEN_SYCL_ASYNC_EXECUTION "Allow asynchronous execution (enabled by default)." ON)
|
||||||
|
option(EIGEN_SYCL_DISABLE_SKINNY "Disable optimization for tall/skinny matrices." OFF)
|
||||||
|
option(EIGEN_SYCL_DISABLE_DOUBLE_BUFFER "Disable double buffer." OFF)
|
||||||
|
option(EIGEN_SYCL_DISABLE_SCALAR "Disable scalar contraction." OFF)
|
||||||
|
option(EIGEN_SYCL_DISABLE_GEMV "Disable GEMV and create a single kernel to calculate contraction instead." OFF)
|
||||||
|
|
||||||
|
set(EIGEN_SYCL ON)
|
||||||
|
set(CMAKE_CXX_STANDARD 17)
|
||||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated-declarations -Wno-shorten-64-to-32 -Wno-cast-align")
|
||||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated-copy-with-user-provided-copy -Wno-unused-variable")
|
||||||
set (CMAKE_MODULE_PATH "${CMAKE_ROOT}/Modules" "cmake/Modules/" "${CMAKE_MODULE_PATH}")
|
set (CMAKE_MODULE_PATH "${CMAKE_ROOT}/Modules" "cmake/Modules/" "${CMAKE_MODULE_PATH}")
|
||||||
|
find_package(Threads REQUIRED)
|
||||||
if(EIGEN_SYCL_TRISYCL)
|
if(EIGEN_SYCL_TRISYCL)
|
||||||
message(STATUS "Using triSYCL")
|
message(STATUS "Using triSYCL")
|
||||||
include(FindTriSYCL)
|
include(FindTriSYCL)
|
||||||
else()
|
elseif(EIGEN_SYCL_ComputeCpp)
|
||||||
message(STATUS "Using ComputeCPP SYCL")
|
message(STATUS "Using ComputeCPP SYCL")
|
||||||
include(FindComputeCpp)
|
include(FindComputeCpp)
|
||||||
set(COMPUTECPP_DRIVER_DEFAULT_VALUE OFF)
|
set(COMPUTECPP_DRIVER_DEFAULT_VALUE OFF)
|
||||||
@@ -500,8 +656,12 @@ if(EIGEN_TEST_SYCL)
|
|||||||
"Use ComputeCpp driver instead of a 2 steps compilation"
|
"Use ComputeCpp driver instead of a 2 steps compilation"
|
||||||
${COMPUTECPP_DRIVER_DEFAULT_VALUE}
|
${COMPUTECPP_DRIVER_DEFAULT_VALUE}
|
||||||
)
|
)
|
||||||
|
else() #Default SYCL compiler is DPCPP (EIGEN_SYCL_DPCPP)
|
||||||
|
set(DPCPP_SYCL_TARGET "spir64" CACHE STRING "Default target for Intel CPU/GPU")
|
||||||
|
message(STATUS "Using DPCPP")
|
||||||
|
find_package(DPCPP)
|
||||||
|
add_definitions(-DSYCL_COMPILER_IS_DPCPP)
|
||||||
endif(EIGEN_SYCL_TRISYCL)
|
endif(EIGEN_SYCL_TRISYCL)
|
||||||
option(EIGEN_DONT_VECTORIZE_SYCL "Don't use vectorisation in the SYCL tests." OFF)
|
|
||||||
if(EIGEN_DONT_VECTORIZE_SYCL)
|
if(EIGEN_DONT_VECTORIZE_SYCL)
|
||||||
message(STATUS "Disabling SYCL vectorization in tests/examples")
|
message(STATUS "Disabling SYCL vectorization in tests/examples")
|
||||||
# When disabling SYCL vectorization, also disable Eigen default vectorization
|
# When disabling SYCL vectorization, also disable Eigen default vectorization
|
||||||
@@ -510,39 +670,75 @@ if(EIGEN_TEST_SYCL)
|
|||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
include(EigenConfigureTesting)
|
||||||
|
|
||||||
|
if(EIGEN_LEAVE_TEST_IN_ALL_TARGET)
|
||||||
|
# CTest automatic test building relies on the "all" target.
|
||||||
|
add_subdirectory(test)
|
||||||
|
add_subdirectory(failtest)
|
||||||
|
else()
|
||||||
|
add_subdirectory(test EXCLUDE_FROM_ALL)
|
||||||
|
add_subdirectory(failtest EXCLUDE_FROM_ALL)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
ei_testing_print_summary()
|
||||||
|
|
||||||
|
if (EIGEN_SPLIT_TESTSUITE)
|
||||||
|
ei_split_testsuite("${EIGEN_SPLIT_TESTSUITE}")
|
||||||
|
endif()
|
||||||
|
endif(EIGEN_BUILD_TESTING)
|
||||||
|
|
||||||
|
#==============================================================================
|
||||||
|
# Other Build Configurations.
|
||||||
|
#==============================================================================
|
||||||
add_subdirectory(unsupported)
|
add_subdirectory(unsupported)
|
||||||
|
|
||||||
add_subdirectory(demos EXCLUDE_FROM_ALL)
|
if(EIGEN_BUILD_BLAS)
|
||||||
|
add_subdirectory(blas)
|
||||||
|
endif()
|
||||||
|
|
||||||
# must be after test and unsupported, for configuring buildtests.in
|
if (EIGEN_BUILD_LAPACK)
|
||||||
add_subdirectory(scripts EXCLUDE_FROM_ALL)
|
add_subdirectory(lapack)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(EIGEN_BUILD_DOC)
|
||||||
|
add_subdirectory(doc EXCLUDE_FROM_ALL)
|
||||||
|
endif()
|
||||||
|
|
||||||
# TODO: consider also replacing EIGEN_BUILD_BTL by a custom target "make btl"?
|
# TODO: consider also replacing EIGEN_BUILD_BTL by a custom target "make btl"?
|
||||||
if(EIGEN_BUILD_BTL)
|
if(EIGEN_BUILD_BTL)
|
||||||
add_subdirectory(bench/btl EXCLUDE_FROM_ALL)
|
add_subdirectory(bench/btl EXCLUDE_FROM_ALL)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(NOT WIN32)
|
if(NOT WIN32 AND EIGEN_BUILD_SPBENCH)
|
||||||
add_subdirectory(bench/spbench EXCLUDE_FROM_ALL)
|
add_subdirectory(bench/spbench EXCLUDE_FROM_ALL)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
configure_file(scripts/cdashtesting.cmake.in cdashtesting.cmake @ONLY)
|
if (EIGEN_BUILD_DEMOS)
|
||||||
|
add_subdirectory(demos EXCLUDE_FROM_ALL)
|
||||||
if(BUILD_TESTING)
|
|
||||||
ei_testing_print_summary()
|
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
message(STATUS "")
|
if (PROJECT_IS_TOP_LEVEL)
|
||||||
message(STATUS "Configured Eigen ${EIGEN_VERSION_NUMBER}")
|
# must be after test and unsupported, for configuring buildtests.in
|
||||||
message(STATUS "")
|
add_subdirectory(scripts EXCLUDE_FROM_ALL)
|
||||||
|
configure_file(scripts/cdashtesting.cmake.in cdashtesting.cmake @ONLY)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
#==============================================================================
|
||||||
|
# Summary.
|
||||||
|
#==============================================================================
|
||||||
|
|
||||||
|
if(PROJECT_IS_TOP_LEVEL)
|
||||||
string(TOLOWER "${CMAKE_GENERATOR}" cmake_generator_tolower)
|
string(TOLOWER "${CMAKE_GENERATOR}" cmake_generator_tolower)
|
||||||
if(cmake_generator_tolower MATCHES "makefile")
|
if(cmake_generator_tolower MATCHES "makefile")
|
||||||
message(STATUS "Some things you can do now:")
|
message(STATUS "Available targets (use: make TARGET):")
|
||||||
message(STATUS "--------------+--------------------------------------------------------------")
|
else()
|
||||||
message(STATUS "Command | Description")
|
message(STATUS "Available targets (use: cmake --build . --target TARGET):")
|
||||||
message(STATUS "--------------+--------------------------------------------------------------")
|
endif()
|
||||||
message(STATUS "make install | Install Eigen. Headers will be installed to:")
|
message(STATUS "---------+--------------------------------------------------------------")
|
||||||
|
message(STATUS "Target | Description")
|
||||||
|
message(STATUS "---------+--------------------------------------------------------------")
|
||||||
|
message(STATUS "install | Install Eigen. Headers will be installed to:")
|
||||||
message(STATUS " | <CMAKE_INSTALL_PREFIX>/<INCLUDE_INSTALL_DIR>")
|
message(STATUS " | <CMAKE_INSTALL_PREFIX>/<INCLUDE_INSTALL_DIR>")
|
||||||
message(STATUS " | Using the following values:")
|
message(STATUS " | Using the following values:")
|
||||||
message(STATUS " | CMAKE_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX}")
|
message(STATUS " | CMAKE_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX}")
|
||||||
@@ -551,112 +747,24 @@ if(cmake_generator_tolower MATCHES "makefile")
|
|||||||
message(STATUS " | cmake . -DCMAKE_INSTALL_PREFIX=yourprefix")
|
message(STATUS " | cmake . -DCMAKE_INSTALL_PREFIX=yourprefix")
|
||||||
message(STATUS " | Or:")
|
message(STATUS " | Or:")
|
||||||
message(STATUS " | cmake . -DINCLUDE_INSTALL_DIR=yourdir")
|
message(STATUS " | cmake . -DINCLUDE_INSTALL_DIR=yourdir")
|
||||||
message(STATUS "make doc | Generate the API documentation, requires Doxygen & LaTeX")
|
message(STATUS "uninstall| Remove files installed by the install target")
|
||||||
if(BUILD_TESTING)
|
if (EIGEN_BUILD_DOC)
|
||||||
message(STATUS "make check | Build and run the unit-tests. Read this page:")
|
message(STATUS "doc | Generate the API documentation, requires Doxygen & LaTeX")
|
||||||
|
endif()
|
||||||
|
if(EIGEN_BUILD_TESTING)
|
||||||
|
message(STATUS "check | Build and run the unit-tests. Read this page:")
|
||||||
message(STATUS " | http://eigen.tuxfamily.org/index.php?title=Tests")
|
message(STATUS " | http://eigen.tuxfamily.org/index.php?title=Tests")
|
||||||
endif()
|
endif()
|
||||||
message(STATUS "make blas | Build BLAS library (not the same thing as Eigen)")
|
if (EIGEN_BUILD_BLAS)
|
||||||
message(STATUS "make uninstall| Removes files installed by make install")
|
message(STATUS "blas | Build BLAS library (not the same thing as Eigen)")
|
||||||
message(STATUS "--------------+--------------------------------------------------------------")
|
endif()
|
||||||
else()
|
if (EIGEN_BUILD_LAPACK)
|
||||||
message(STATUS "To build/run the unit tests, read this page:")
|
message(STATUS "lapack | Build LAPACK subset library (not the same thing as Eigen)")
|
||||||
message(STATUS " http://eigen.tuxfamily.org/index.php?title=Tests")
|
endif()
|
||||||
|
message(STATUS "---------+--------------------------------------------------------------")
|
||||||
|
message(STATUS "")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
message(STATUS "")
|
message(STATUS "")
|
||||||
|
message(STATUS "Configured Eigen ${EIGEN_VERSION_NUMBER}")
|
||||||
|
message(STATUS "")
|
||||||
set ( EIGEN_VERSION_STRING ${EIGEN_VERSION_NUMBER} )
|
|
||||||
set ( EIGEN_VERSION_MAJOR ${EIGEN_WORLD_VERSION} )
|
|
||||||
set ( EIGEN_VERSION_MINOR ${EIGEN_MAJOR_VERSION} )
|
|
||||||
set ( EIGEN_VERSION_PATCH ${EIGEN_MINOR_VERSION} )
|
|
||||||
set ( EIGEN_DEFINITIONS "")
|
|
||||||
set ( EIGEN_INCLUDE_DIR "${CMAKE_INSTALL_PREFIX}/${INCLUDE_INSTALL_DIR}" )
|
|
||||||
set ( EIGEN_ROOT_DIR ${CMAKE_INSTALL_PREFIX} )
|
|
||||||
|
|
||||||
# Interface libraries require at least CMake 3.0
|
|
||||||
if (NOT CMAKE_VERSION VERSION_LESS 3.0)
|
|
||||||
include (CMakePackageConfigHelpers)
|
|
||||||
|
|
||||||
# Imported target support
|
|
||||||
add_library (eigen INTERFACE)
|
|
||||||
add_library (Eigen3::Eigen ALIAS eigen)
|
|
||||||
target_compile_definitions (eigen INTERFACE ${EIGEN_DEFINITIONS})
|
|
||||||
target_include_directories (eigen INTERFACE
|
|
||||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
|
|
||||||
$<INSTALL_INTERFACE:${INCLUDE_INSTALL_DIR}>
|
|
||||||
)
|
|
||||||
|
|
||||||
# Export as title case Eigen
|
|
||||||
set_target_properties (eigen PROPERTIES EXPORT_NAME Eigen)
|
|
||||||
|
|
||||||
install (TARGETS eigen EXPORT Eigen3Targets)
|
|
||||||
|
|
||||||
configure_package_config_file (
|
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/cmake/Eigen3Config.cmake.in
|
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
|
|
||||||
PATH_VARS EIGEN_INCLUDE_DIR EIGEN_ROOT_DIR
|
|
||||||
INSTALL_DESTINATION ${CMAKEPACKAGE_INSTALL_DIR}
|
|
||||||
NO_CHECK_REQUIRED_COMPONENTS_MACRO # Eigen does not provide components
|
|
||||||
)
|
|
||||||
# Remove CMAKE_SIZEOF_VOID_P from Eigen3ConfigVersion.cmake since Eigen does
|
|
||||||
# not depend on architecture specific settings or libraries. More
|
|
||||||
# specifically, an Eigen3Config.cmake generated from a 64 bit target can be
|
|
||||||
# used for 32 bit targets as well (and vice versa).
|
|
||||||
set (_Eigen3_CMAKE_SIZEOF_VOID_P ${CMAKE_SIZEOF_VOID_P})
|
|
||||||
unset (CMAKE_SIZEOF_VOID_P)
|
|
||||||
write_basic_package_version_file (Eigen3ConfigVersion.cmake
|
|
||||||
VERSION ${EIGEN_VERSION_NUMBER}
|
|
||||||
COMPATIBILITY SameMajorVersion)
|
|
||||||
set (CMAKE_SIZEOF_VOID_P ${_Eigen3_CMAKE_SIZEOF_VOID_P})
|
|
||||||
|
|
||||||
# The Eigen target will be located in the Eigen3 namespace. Other CMake
|
|
||||||
# targets can refer to it using Eigen3::Eigen.
|
|
||||||
export (TARGETS eigen NAMESPACE Eigen3:: FILE Eigen3Targets.cmake)
|
|
||||||
# Export Eigen3 package to CMake registry such that it can be easily found by
|
|
||||||
# CMake even if it has not been installed to a standard directory.
|
|
||||||
export (PACKAGE Eigen3)
|
|
||||||
|
|
||||||
install (EXPORT Eigen3Targets NAMESPACE Eigen3:: DESTINATION ${CMAKEPACKAGE_INSTALL_DIR})
|
|
||||||
|
|
||||||
else ()
|
|
||||||
# Fallback to legacy Eigen3Config.cmake without the imported target
|
|
||||||
|
|
||||||
# If CMakePackageConfigHelpers module is available (CMake >= 2.8.8)
|
|
||||||
# create a relocatable Config file, otherwise leave the hardcoded paths
|
|
||||||
include(CMakePackageConfigHelpers OPTIONAL RESULT_VARIABLE CPCH_PATH)
|
|
||||||
|
|
||||||
if(CPCH_PATH)
|
|
||||||
configure_package_config_file (
|
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/cmake/Eigen3ConfigLegacy.cmake.in
|
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
|
|
||||||
PATH_VARS EIGEN_INCLUDE_DIR EIGEN_ROOT_DIR
|
|
||||||
INSTALL_DESTINATION ${CMAKEPACKAGE_INSTALL_DIR}
|
|
||||||
NO_CHECK_REQUIRED_COMPONENTS_MACRO # Eigen does not provide components
|
|
||||||
)
|
|
||||||
else()
|
|
||||||
# The PACKAGE_* variables are defined by the configure_package_config_file
|
|
||||||
# but without it we define them manually to the hardcoded paths
|
|
||||||
set(PACKAGE_INIT "")
|
|
||||||
set(PACKAGE_EIGEN_INCLUDE_DIR ${EIGEN_INCLUDE_DIR})
|
|
||||||
set(PACKAGE_EIGEN_ROOT_DIR ${EIGEN_ROOT_DIR})
|
|
||||||
configure_file ( ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Eigen3ConfigLegacy.cmake.in
|
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
|
|
||||||
@ONLY ESCAPE_QUOTES )
|
|
||||||
endif()
|
|
||||||
|
|
||||||
write_basic_package_version_file( Eigen3ConfigVersion.cmake
|
|
||||||
VERSION ${EIGEN_VERSION_NUMBER}
|
|
||||||
COMPATIBILITY SameMajorVersion )
|
|
||||||
|
|
||||||
endif ()
|
|
||||||
|
|
||||||
install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/cmake/UseEigen3.cmake
|
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
|
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/Eigen3ConfigVersion.cmake
|
|
||||||
DESTINATION ${CMAKEPACKAGE_INSTALL_DIR} )
|
|
||||||
|
|
||||||
# Add uninstall target
|
|
||||||
add_custom_target ( uninstall
|
|
||||||
COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/EigenUninstall.cmake)
|
|
||||||
|
|||||||
203
COPYING.APACHE
Normal file
203
COPYING.APACHE
Normal file
@@ -0,0 +1,203 @@
|
|||||||
|
/*
|
||||||
|
Apache License
|
||||||
|
Version 2.0, January 2004
|
||||||
|
http://www.apache.org/licenses/
|
||||||
|
|
||||||
|
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||||
|
|
||||||
|
1. Definitions.
|
||||||
|
|
||||||
|
"License" shall mean the terms and conditions for use, reproduction,
|
||||||
|
and distribution as defined by Sections 1 through 9 of this document.
|
||||||
|
|
||||||
|
"Licensor" shall mean the copyright owner or entity authorized by
|
||||||
|
the copyright owner that is granting the License.
|
||||||
|
|
||||||
|
"Legal Entity" shall mean the union of the acting entity and all
|
||||||
|
other entities that control, are controlled by, or are under common
|
||||||
|
control with that entity. For the purposes of this definition,
|
||||||
|
"control" means (i) the power, direct or indirect, to cause the
|
||||||
|
direction or management of such entity, whether by contract or
|
||||||
|
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||||
|
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||||
|
|
||||||
|
"You" (or "Your") shall mean an individual or Legal Entity
|
||||||
|
exercising permissions granted by this License.
|
||||||
|
|
||||||
|
"Source" form shall mean the preferred form for making modifications,
|
||||||
|
including but not limited to software source code, documentation
|
||||||
|
source, and configuration files.
|
||||||
|
|
||||||
|
"Object" form shall mean any form resulting from mechanical
|
||||||
|
transformation or translation of a Source form, including but
|
||||||
|
not limited to compiled object code, generated documentation,
|
||||||
|
and conversions to other media types.
|
||||||
|
|
||||||
|
"Work" shall mean the work of authorship, whether in Source or
|
||||||
|
Object form, made available under the License, as indicated by a
|
||||||
|
copyright notice that is included in or attached to the work
|
||||||
|
(an example is provided in the Appendix below).
|
||||||
|
|
||||||
|
"Derivative Works" shall mean any work, whether in Source or Object
|
||||||
|
form, that is based on (or derived from) the Work and for which the
|
||||||
|
editorial revisions, annotations, elaborations, or other modifications
|
||||||
|
represent, as a whole, an original work of authorship. For the purposes
|
||||||
|
of this License, Derivative Works shall not include works that remain
|
||||||
|
separable from, or merely link (or bind by name) to the interfaces of,
|
||||||
|
the Work and Derivative Works thereof.
|
||||||
|
|
||||||
|
"Contribution" shall mean any work of authorship, including
|
||||||
|
the original version of the Work and any modifications or additions
|
||||||
|
to that Work or Derivative Works thereof, that is intentionally
|
||||||
|
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||||
|
or by an individual or Legal Entity authorized to submit on behalf of
|
||||||
|
the copyright owner. For the purposes of this definition, "submitted"
|
||||||
|
means any form of electronic, verbal, or written communication sent
|
||||||
|
to the Licensor or its representatives, including but not limited to
|
||||||
|
communication on electronic mailing lists, source code control systems,
|
||||||
|
and issue tracking systems that are managed by, or on behalf of, the
|
||||||
|
Licensor for the purpose of discussing and improving the Work, but
|
||||||
|
excluding communication that is conspicuously marked or otherwise
|
||||||
|
designated in writing by the copyright owner as "Not a Contribution."
|
||||||
|
|
||||||
|
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||||
|
on behalf of whom a Contribution has been received by Licensor and
|
||||||
|
subsequently incorporated within the Work.
|
||||||
|
|
||||||
|
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
copyright license to reproduce, prepare Derivative Works of,
|
||||||
|
publicly display, publicly perform, sublicense, and distribute the
|
||||||
|
Work and such Derivative Works in Source or Object form.
|
||||||
|
|
||||||
|
3. Grant of Patent License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
(except as stated in this section) patent license to make, have made,
|
||||||
|
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||||
|
where such license applies only to those patent claims licensable
|
||||||
|
by such Contributor that are necessarily infringed by their
|
||||||
|
Contribution(s) alone or by combination of their Contribution(s)
|
||||||
|
with the Work to which such Contribution(s) was submitted. If You
|
||||||
|
institute patent litigation against any entity (including a
|
||||||
|
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||||
|
or a Contribution incorporated within the Work constitutes direct
|
||||||
|
or contributory patent infringement, then any patent licenses
|
||||||
|
granted to You under this License for that Work shall terminate
|
||||||
|
as of the date such litigation is filed.
|
||||||
|
|
||||||
|
4. Redistribution. You may reproduce and distribute copies of the
|
||||||
|
Work or Derivative Works thereof in any medium, with or without
|
||||||
|
modifications, and in Source or Object form, provided that You
|
||||||
|
meet the following conditions:
|
||||||
|
|
||||||
|
(a) You must give any other recipients of the Work or
|
||||||
|
Derivative Works a copy of this License; and
|
||||||
|
|
||||||
|
(b) You must cause any modified files to carry prominent notices
|
||||||
|
stating that You changed the files; and
|
||||||
|
|
||||||
|
(c) You must retain, in the Source form of any Derivative Works
|
||||||
|
that You distribute, all copyright, patent, trademark, and
|
||||||
|
attribution notices from the Source form of the Work,
|
||||||
|
excluding those notices that do not pertain to any part of
|
||||||
|
the Derivative Works; and
|
||||||
|
|
||||||
|
(d) If the Work includes a "NOTICE" text file as part of its
|
||||||
|
distribution, then any Derivative Works that You distribute must
|
||||||
|
include a readable copy of the attribution notices contained
|
||||||
|
within such NOTICE file, excluding those notices that do not
|
||||||
|
pertain to any part of the Derivative Works, in at least one
|
||||||
|
of the following places: within a NOTICE text file distributed
|
||||||
|
as part of the Derivative Works; within the Source form or
|
||||||
|
documentation, if provided along with the Derivative Works; or,
|
||||||
|
within a display generated by the Derivative Works, if and
|
||||||
|
wherever such third-party notices normally appear. The contents
|
||||||
|
of the NOTICE file are for informational purposes only and
|
||||||
|
do not modify the License. You may add Your own attribution
|
||||||
|
notices within Derivative Works that You distribute, alongside
|
||||||
|
or as an addendum to the NOTICE text from the Work, provided
|
||||||
|
that such additional attribution notices cannot be construed
|
||||||
|
as modifying the License.
|
||||||
|
|
||||||
|
You may add Your own copyright statement to Your modifications and
|
||||||
|
may provide additional or different license terms and conditions
|
||||||
|
for use, reproduction, or distribution of Your modifications, or
|
||||||
|
for any such Derivative Works as a whole, provided Your use,
|
||||||
|
reproduction, and distribution of the Work otherwise complies with
|
||||||
|
the conditions stated in this License.
|
||||||
|
|
||||||
|
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||||
|
any Contribution intentionally submitted for inclusion in the Work
|
||||||
|
by You to the Licensor shall be under the terms and conditions of
|
||||||
|
this License, without any additional terms or conditions.
|
||||||
|
Notwithstanding the above, nothing herein shall supersede or modify
|
||||||
|
the terms of any separate license agreement you may have executed
|
||||||
|
with Licensor regarding such Contributions.
|
||||||
|
|
||||||
|
6. Trademarks. This License does not grant permission to use the trade
|
||||||
|
names, trademarks, service marks, or product names of the Licensor,
|
||||||
|
except as required for reasonable and customary use in describing the
|
||||||
|
origin of the Work and reproducing the content of the NOTICE file.
|
||||||
|
|
||||||
|
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||||
|
agreed to in writing, Licensor provides the Work (and each
|
||||||
|
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||||
|
implied, including, without limitation, any warranties or conditions
|
||||||
|
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||||
|
appropriateness of using or redistributing the Work and assume any
|
||||||
|
risks associated with Your exercise of permissions under this License.
|
||||||
|
|
||||||
|
8. Limitation of Liability. In no event and under no legal theory,
|
||||||
|
whether in tort (including negligence), contract, or otherwise,
|
||||||
|
unless required by applicable law (such as deliberate and grossly
|
||||||
|
negligent acts) or agreed to in writing, shall any Contributor be
|
||||||
|
liable to You for damages, including any direct, indirect, special,
|
||||||
|
incidental, or consequential damages of any character arising as a
|
||||||
|
result of this License or out of the use or inability to use the
|
||||||
|
Work (including but not limited to damages for loss of goodwill,
|
||||||
|
work stoppage, computer failure or malfunction, or any and all
|
||||||
|
other commercial damages or losses), even if such Contributor
|
||||||
|
has been advised of the possibility of such damages.
|
||||||
|
|
||||||
|
9. Accepting Warranty or Additional Liability. While redistributing
|
||||||
|
the Work or Derivative Works thereof, You may choose to offer,
|
||||||
|
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||||
|
or other liability obligations and/or rights consistent with this
|
||||||
|
License. However, in accepting such obligations, You may act only
|
||||||
|
on Your own behalf and on Your sole responsibility, not on behalf
|
||||||
|
of any other Contributor, and only if You agree to indemnify,
|
||||||
|
defend, and hold each Contributor harmless for any liability
|
||||||
|
incurred by, or claims asserted against, such Contributor by reason
|
||||||
|
of your accepting any such warranty or additional liability.
|
||||||
|
|
||||||
|
END OF TERMS AND CONDITIONS
|
||||||
|
|
||||||
|
APPENDIX: How to apply the Apache License to your work.
|
||||||
|
|
||||||
|
To apply the Apache License to your work, attach the following
|
||||||
|
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||||
|
replaced with your own identifying information. (Don't include
|
||||||
|
the brackets!) The text should be enclosed in the appropriate
|
||||||
|
comment syntax for the file format. We also recommend that a
|
||||||
|
file or class name and description of purpose be included on the
|
||||||
|
same "printed page" as the copyright notice for easier
|
||||||
|
identification within third-party archives.
|
||||||
|
|
||||||
|
Copyright [yyyy] [name of copyright owner]
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
@@ -49,4 +49,3 @@ SUCH LIABILITY IS ASSERTED ON THE BASIS OF CONTRACT, TORT
|
|||||||
(INCLUDING NEGLIGENCE OR STRICT LIABILITY), OR OTHERWISE,
|
(INCLUDING NEGLIGENCE OR STRICT LIABILITY), OR OTHERWISE,
|
||||||
EVEN IF ANY OF SAID PARTIES HAS BEEN WARNED OF THE
|
EVEN IF ANY OF SAID PARTIES HAS BEEN WARNED OF THE
|
||||||
POSSIBILITY OF SUCH LOSS OR DAMAGES.
|
POSSIBILITY OF SUCH LOSS OR DAMAGES.
|
||||||
|
|
||||||
|
|||||||
@@ -43,4 +43,3 @@
|
|||||||
#include "src/Core/util/ReenableStupidWarnings.h"
|
#include "src/Core/util/ReenableStupidWarnings.h"
|
||||||
|
|
||||||
#endif // EIGEN_CHOLESKY_MODULE_H
|
#endif // EIGEN_CHOLESKY_MODULE_H
|
||||||
/* vim: set filetype=cpp et sw=2 ts=2 ai: */
|
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ extern "C" {
|
|||||||
* This module provides an interface to the Cholmod library which is part of the <a href="http://www.suitesparse.com">suitesparse</a> package.
|
* This module provides an interface to the Cholmod library which is part of the <a href="http://www.suitesparse.com">suitesparse</a> package.
|
||||||
* It provides the two following main factorization classes:
|
* It provides the two following main factorization classes:
|
||||||
* - class CholmodSupernodalLLT: a supernodal LLT Cholesky factorization.
|
* - class CholmodSupernodalLLT: a supernodal LLT Cholesky factorization.
|
||||||
* - class CholmodDecomposiiton: a general L(D)LT Cholesky factorization with automatic or explicit runtime selection of the underlying factorization method (supernodal or simplicial).
|
* - class CholmodDecomposition: a general L(D)LT Cholesky factorization with automatic or explicit runtime selection of the underlying factorization method (supernodal or simplicial).
|
||||||
*
|
*
|
||||||
* For the sake of completeness, this module also propose the two following classes:
|
* For the sake of completeness, this module also propose the two following classes:
|
||||||
* - class CholmodSimplicialLLT
|
* - class CholmodSimplicialLLT
|
||||||
|
|||||||
34
Eigen/Core
34
Eigen/Core
@@ -11,7 +11,7 @@
|
|||||||
#ifndef EIGEN_CORE_H
|
#ifndef EIGEN_CORE_H
|
||||||
#define EIGEN_CORE_H
|
#define EIGEN_CORE_H
|
||||||
|
|
||||||
// first thing Eigen does: stop the compiler from committing suicide
|
// first thing Eigen does: stop the compiler from reporting useless warnings.
|
||||||
#include "src/Core/util/DisableStupidWarnings.h"
|
#include "src/Core/util/DisableStupidWarnings.h"
|
||||||
|
|
||||||
// then include this file where all our macros are defined. It's really important to do it first because
|
// then include this file where all our macros are defined. It's really important to do it first because
|
||||||
@@ -22,7 +22,7 @@
|
|||||||
#include "src/Core/util/ConfigureVectorization.h"
|
#include "src/Core/util/ConfigureVectorization.h"
|
||||||
|
|
||||||
// We need cuda_runtime.h/hip_runtime.h to ensure that
|
// We need cuda_runtime.h/hip_runtime.h to ensure that
|
||||||
// the EIGEN_USING_STD_MATH macro works properly on the device side
|
// the EIGEN_USING_STD macro works properly on the device side
|
||||||
#if defined(EIGEN_CUDACC)
|
#if defined(EIGEN_CUDACC)
|
||||||
#include <cuda_runtime.h>
|
#include <cuda_runtime.h>
|
||||||
#elif defined(EIGEN_HIPCC)
|
#elif defined(EIGEN_HIPCC)
|
||||||
@@ -36,10 +36,17 @@
|
|||||||
|
|
||||||
// Disable the ipa-cp-clone optimization flag with MinGW 6.x or newer (enabled by default with -O3)
|
// Disable the ipa-cp-clone optimization flag with MinGW 6.x or newer (enabled by default with -O3)
|
||||||
// See http://eigen.tuxfamily.org/bz/show_bug.cgi?id=556 for details.
|
// See http://eigen.tuxfamily.org/bz/show_bug.cgi?id=556 for details.
|
||||||
#if EIGEN_COMP_MINGW && EIGEN_GNUC_AT_LEAST(4,6)
|
#if EIGEN_COMP_MINGW && EIGEN_GNUC_AT_LEAST(4,6) && EIGEN_GNUC_AT_MOST(5,5)
|
||||||
#pragma GCC optimize ("-fno-ipa-cp-clone")
|
#pragma GCC optimize ("-fno-ipa-cp-clone")
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// Prevent ICC from specializing std::complex operators that silently fail
|
||||||
|
// on device. This allows us to use our own device-compatible specializations
|
||||||
|
// instead.
|
||||||
|
#if defined(EIGEN_COMP_ICC) && defined(EIGEN_GPU_COMPILE_PHASE) \
|
||||||
|
&& !defined(_OVERRIDE_COMPLEX_SPECIALIZATION_)
|
||||||
|
#define _OVERRIDE_COMPLEX_SPECIALIZATION_ 1
|
||||||
|
#endif
|
||||||
#include <complex>
|
#include <complex>
|
||||||
|
|
||||||
// this include file manages BLAS and MKL related macros
|
// this include file manages BLAS and MKL related macros
|
||||||
@@ -51,6 +58,10 @@
|
|||||||
#define EIGEN_HAS_GPU_FP16
|
#define EIGEN_HAS_GPU_FP16
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(EIGEN_HAS_CUDA_BF16) || defined(EIGEN_HAS_HIP_BF16)
|
||||||
|
#define EIGEN_HAS_GPU_BF16
|
||||||
|
#endif
|
||||||
|
|
||||||
#if (defined _OPENMP) && (!defined EIGEN_DONT_PARALLELIZE)
|
#if (defined _OPENMP) && (!defined EIGEN_DONT_PARALLELIZE)
|
||||||
#define EIGEN_HAS_OPENMP
|
#define EIGEN_HAS_OPENMP
|
||||||
#endif
|
#endif
|
||||||
@@ -73,6 +84,7 @@
|
|||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#ifndef EIGEN_NO_IO
|
#ifndef EIGEN_NO_IO
|
||||||
|
#include <sstream>
|
||||||
#include <iosfwd>
|
#include <iosfwd>
|
||||||
#endif
|
#endif
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
@@ -97,7 +109,8 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
// required for __cpuid, needs to be included after cmath
|
// required for __cpuid, needs to be included after cmath
|
||||||
#if EIGEN_COMP_MSVC && EIGEN_ARCH_i386_OR_x86_64 && !EIGEN_OS_WINCE
|
// also required for _BitScanReverse on Windows on ARM
|
||||||
|
#if EIGEN_COMP_MSVC && (EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM64) && !EIGEN_OS_WINCE
|
||||||
#include <intrin.h>
|
#include <intrin.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -107,7 +120,7 @@
|
|||||||
#undef isnan
|
#undef isnan
|
||||||
#undef isinf
|
#undef isinf
|
||||||
#undef isfinite
|
#undef isfinite
|
||||||
#include <SYCL/sycl.hpp>
|
#include <CL/sycl.hpp>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
@@ -162,6 +175,7 @@ using std::ptrdiff_t;
|
|||||||
#include "src/Core/arch/Default/ConjHelper.h"
|
#include "src/Core/arch/Default/ConjHelper.h"
|
||||||
// Generic half float support
|
// Generic half float support
|
||||||
#include "src/Core/arch/Default/Half.h"
|
#include "src/Core/arch/Default/Half.h"
|
||||||
|
#include "src/Core/arch/Default/BFloat16.h"
|
||||||
#include "src/Core/arch/Default/TypeCasting.h"
|
#include "src/Core/arch/Default/TypeCasting.h"
|
||||||
#include "src/Core/arch/Default/GenericPacketMathFunctionsFwd.h"
|
#include "src/Core/arch/Default/GenericPacketMathFunctionsFwd.h"
|
||||||
|
|
||||||
@@ -202,6 +216,10 @@ using std::ptrdiff_t;
|
|||||||
#include "src/Core/arch/NEON/TypeCasting.h"
|
#include "src/Core/arch/NEON/TypeCasting.h"
|
||||||
#include "src/Core/arch/NEON/MathFunctions.h"
|
#include "src/Core/arch/NEON/MathFunctions.h"
|
||||||
#include "src/Core/arch/NEON/Complex.h"
|
#include "src/Core/arch/NEON/Complex.h"
|
||||||
|
#elif defined EIGEN_VECTORIZE_SVE
|
||||||
|
#include "src/Core/arch/SVE/PacketMath.h"
|
||||||
|
#include "src/Core/arch/SVE/TypeCasting.h"
|
||||||
|
#include "src/Core/arch/SVE/MathFunctions.h"
|
||||||
#elif defined EIGEN_VECTORIZE_ZVECTOR
|
#elif defined EIGEN_VECTORIZE_ZVECTOR
|
||||||
#include "src/Core/arch/ZVector/PacketMath.h"
|
#include "src/Core/arch/ZVector/PacketMath.h"
|
||||||
#include "src/Core/arch/ZVector/MathFunctions.h"
|
#include "src/Core/arch/ZVector/MathFunctions.h"
|
||||||
@@ -329,6 +347,12 @@ using std::ptrdiff_t;
|
|||||||
#include "src/Core/CoreIterators.h"
|
#include "src/Core/CoreIterators.h"
|
||||||
#include "src/Core/ConditionEstimator.h"
|
#include "src/Core/ConditionEstimator.h"
|
||||||
|
|
||||||
|
#if defined(EIGEN_VECTORIZE_VSX)
|
||||||
|
#include "src/Core/arch/AltiVec/MatrixProduct.h"
|
||||||
|
#elif defined EIGEN_VECTORIZE_NEON
|
||||||
|
#include "src/Core/arch/NEON/GeneralBlockPanelKernel.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "src/Core/BooleanRedux.h"
|
#include "src/Core/BooleanRedux.h"
|
||||||
#include "src/Core/Select.h"
|
#include "src/Core/Select.h"
|
||||||
#include "src/Core/VectorwiseOp.h"
|
#include "src/Core/VectorwiseOp.h"
|
||||||
|
|||||||
@@ -58,4 +58,3 @@
|
|||||||
#include "src/Core/util/ReenableStupidWarnings.h"
|
#include "src/Core/util/ReenableStupidWarnings.h"
|
||||||
|
|
||||||
#endif // EIGEN_EIGENVALUES_MODULE_H
|
#endif // EIGEN_EIGENVALUES_MODULE_H
|
||||||
/* vim: set filetype=cpp et sw=2 ts=2 ai: */
|
|
||||||
|
|||||||
@@ -50,11 +50,10 @@
|
|||||||
#include "src/Geometry/Umeyama.h"
|
#include "src/Geometry/Umeyama.h"
|
||||||
|
|
||||||
// Use the SSE optimized version whenever possible.
|
// Use the SSE optimized version whenever possible.
|
||||||
#if defined EIGEN_VECTORIZE_SSE
|
#if (defined EIGEN_VECTORIZE_SSE) || (defined EIGEN_VECTORIZE_NEON)
|
||||||
#include "src/Geometry/arch/Geometry_SSE.h"
|
#include "src/Geometry/arch/Geometry_SIMD.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "src/Core/util/ReenableStupidWarnings.h"
|
#include "src/Core/util/ReenableStupidWarnings.h"
|
||||||
|
|
||||||
#endif // EIGEN_GEOMETRY_MODULE_H
|
#endif // EIGEN_GEOMETRY_MODULE_H
|
||||||
/* vim: set filetype=cpp et sw=2 ts=2 ai: */
|
|
||||||
|
|||||||
@@ -27,4 +27,3 @@
|
|||||||
#include "src/Core/util/ReenableStupidWarnings.h"
|
#include "src/Core/util/ReenableStupidWarnings.h"
|
||||||
|
|
||||||
#endif // EIGEN_HOUSEHOLDER_MODULE_H
|
#endif // EIGEN_HOUSEHOLDER_MODULE_H
|
||||||
/* vim: set filetype=cpp et sw=2 ts=2 ai: */
|
|
||||||
|
|||||||
@@ -29,5 +29,4 @@
|
|||||||
#include "src/Core/util/ReenableStupidWarnings.h"
|
#include "src/Core/util/ReenableStupidWarnings.h"
|
||||||
|
|
||||||
#endif // EIGEN_JACOBI_MODULE_H
|
#endif // EIGEN_JACOBI_MODULE_H
|
||||||
/* vim: set filetype=cpp et sw=2 ts=2 ai: */
|
|
||||||
|
|
||||||
|
|||||||
7
Eigen/LU
7
Eigen/LU
@@ -38,13 +38,10 @@
|
|||||||
#include "src/LU/Determinant.h"
|
#include "src/LU/Determinant.h"
|
||||||
#include "src/LU/InverseImpl.h"
|
#include "src/LU/InverseImpl.h"
|
||||||
|
|
||||||
// Use the SSE optimized version whenever possible. At the moment the
|
#if defined EIGEN_VECTORIZE_SSE || defined EIGEN_VECTORIZE_NEON
|
||||||
// SSE version doesn't compile when AVX is enabled
|
#include "src/LU/arch/InverseSize4.h"
|
||||||
#if defined EIGEN_VECTORIZE_SSE && !defined EIGEN_VECTORIZE_AVX
|
|
||||||
#include "src/LU/arch/Inverse_SSE.h"
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "src/Core/util/ReenableStupidWarnings.h"
|
#include "src/Core/util/ReenableStupidWarnings.h"
|
||||||
|
|
||||||
#endif // EIGEN_LU_MODULE_H
|
#endif // EIGEN_LU_MODULE_H
|
||||||
/* vim: set filetype=cpp et sw=2 ts=2 ai: */
|
|
||||||
|
|||||||
1
Eigen/QR
1
Eigen/QR
@@ -48,4 +48,3 @@
|
|||||||
#include "src/Core/util/ReenableStupidWarnings.h"
|
#include "src/Core/util/ReenableStupidWarnings.h"
|
||||||
|
|
||||||
#endif // EIGEN_QR_MODULE_H
|
#endif // EIGEN_QR_MODULE_H
|
||||||
/* vim: set filetype=cpp et sw=2 ts=2 ai: */
|
|
||||||
|
|||||||
@@ -37,4 +37,3 @@ void *qRealloc(void *ptr, std::size_t size)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif // EIGEN_QTMALLOC_MODULE_H
|
#endif // EIGEN_QTMALLOC_MODULE_H
|
||||||
/* vim: set filetype=cpp et sw=2 ts=2 ai: */
|
|
||||||
|
|||||||
@@ -48,4 +48,3 @@
|
|||||||
#include "src/Core/util/ReenableStupidWarnings.h"
|
#include "src/Core/util/ReenableStupidWarnings.h"
|
||||||
|
|
||||||
#endif // EIGEN_SVD_MODULE_H
|
#endif // EIGEN_SVD_MODULE_H
|
||||||
/* vim: set filetype=cpp et sw=2 ts=2 ai: */
|
|
||||||
|
|||||||
@@ -25,8 +25,6 @@
|
|||||||
|
|
||||||
#include "src/Core/util/DisableStupidWarnings.h"
|
#include "src/Core/util/DisableStupidWarnings.h"
|
||||||
|
|
||||||
#include "src/SparseLU/SparseLU_gemm_kernel.h"
|
|
||||||
|
|
||||||
#include "src/SparseLU/SparseLU_Structs.h"
|
#include "src/SparseLU/SparseLU_Structs.h"
|
||||||
#include "src/SparseLU/SparseLU_SupernodalMatrix.h"
|
#include "src/SparseLU/SparseLU_SupernodalMatrix.h"
|
||||||
#include "src/SparseLU/SparseLUImpl.h"
|
#include "src/SparseLU/SparseLUImpl.h"
|
||||||
|
|||||||
@@ -45,7 +45,7 @@ namespace internal {
|
|||||||
* matrix \f$ A \f$ such that \f$ A = P^TLDL^*P \f$, where P is a permutation matrix, L
|
* matrix \f$ A \f$ such that \f$ A = P^TLDL^*P \f$, where P is a permutation matrix, L
|
||||||
* is lower triangular with a unit diagonal and D is a diagonal matrix.
|
* is lower triangular with a unit diagonal and D is a diagonal matrix.
|
||||||
*
|
*
|
||||||
* The decomposition uses pivoting to ensure stability, so that L will have
|
* The decomposition uses pivoting to ensure stability, so that D will have
|
||||||
* zeros in the bottom right rank(A) - n submatrix. Avoiding the square root
|
* zeros in the bottom right rank(A) - n submatrix. Avoiding the square root
|
||||||
* on D also stabilizes the computation.
|
* on D also stabilizes the computation.
|
||||||
*
|
*
|
||||||
@@ -200,7 +200,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
|
|||||||
* \f$ L^* y_4 = y_3 \f$ and \f$ P x = y_4 \f$ in succession. If the matrix \f$ A \f$ is singular, then
|
* \f$ L^* y_4 = y_3 \f$ and \f$ P x = y_4 \f$ in succession. If the matrix \f$ A \f$ is singular, then
|
||||||
* \f$ D \f$ will also be singular (all the other matrices are invertible). In that case, the
|
* \f$ D \f$ will also be singular (all the other matrices are invertible). In that case, the
|
||||||
* least-square solution of \f$ D y_3 = y_2 \f$ is computed. This does not mean that this function
|
* least-square solution of \f$ D y_3 = y_2 \f$ is computed. This does not mean that this function
|
||||||
* computes the least-square solution of \f$ A x = b \f$ is \f$ A \f$ is singular.
|
* computes the least-square solution of \f$ A x = b \f$ if \f$ A \f$ is singular.
|
||||||
*
|
*
|
||||||
* \sa MatrixBase::ldlt(), SelfAdjointView::ldlt()
|
* \sa MatrixBase::ldlt(), SelfAdjointView::ldlt()
|
||||||
*/
|
*/
|
||||||
@@ -246,8 +246,8 @@ template<typename _MatrixType, int _UpLo> class LDLT
|
|||||||
*/
|
*/
|
||||||
const LDLT& adjoint() const { return *this; };
|
const LDLT& adjoint() const { return *this; };
|
||||||
|
|
||||||
inline Index rows() const { return m_matrix.rows(); }
|
EIGEN_DEVICE_FUNC inline EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_matrix.rows(); }
|
||||||
inline Index cols() const { return m_matrix.cols(); }
|
EIGEN_DEVICE_FUNC inline EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_matrix.cols(); }
|
||||||
|
|
||||||
/** \brief Reports whether previous computation was successful.
|
/** \brief Reports whether previous computation was successful.
|
||||||
*
|
*
|
||||||
|
|||||||
@@ -199,10 +199,10 @@ template<typename _MatrixType, int _UpLo> class LLT
|
|||||||
* This method is provided for compatibility with other matrix decompositions, thus enabling generic code such as:
|
* This method is provided for compatibility with other matrix decompositions, thus enabling generic code such as:
|
||||||
* \code x = decomposition.adjoint().solve(b) \endcode
|
* \code x = decomposition.adjoint().solve(b) \endcode
|
||||||
*/
|
*/
|
||||||
const LLT& adjoint() const { return *this; };
|
const LLT& adjoint() const EIGEN_NOEXCEPT { return *this; };
|
||||||
|
|
||||||
inline Index rows() const { return m_matrix.rows(); }
|
inline EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_matrix.rows(); }
|
||||||
inline Index cols() const { return m_matrix.cols(); }
|
inline EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_matrix.cols(); }
|
||||||
|
|
||||||
template<typename VectorType>
|
template<typename VectorType>
|
||||||
LLT & rankUpdate(const VectorType& vec, const RealScalar& sigma = 1);
|
LLT & rankUpdate(const VectorType& vec, const RealScalar& sigma = 1);
|
||||||
|
|||||||
@@ -172,7 +172,8 @@ seqN(FirstType first, SizeType size) {
|
|||||||
return ArithmeticSequence<typename internal::cleanup_index_type<FirstType>::type,typename internal::cleanup_index_type<SizeType>::type>(first,size);
|
return ArithmeticSequence<typename internal::cleanup_index_type<FirstType>::type,typename internal::cleanup_index_type<SizeType>::type>(first,size);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef EIGEN_PARSED_BY_DOXYGEN
|
|
||||||
|
#if EIGEN_HAS_CXX11
|
||||||
|
|
||||||
/** \returns an ArithmeticSequence starting at \a f, up (or down) to \a l, and with positive (or negative) increment \a incr
|
/** \returns an ArithmeticSequence starting at \a f, up (or down) to \a l, and with positive (or negative) increment \a incr
|
||||||
*
|
*
|
||||||
@@ -183,24 +184,6 @@ seqN(FirstType first, SizeType size) {
|
|||||||
*
|
*
|
||||||
* \sa seqN(FirstType,SizeType,IncrType), seq(FirstType,LastType)
|
* \sa seqN(FirstType,SizeType,IncrType), seq(FirstType,LastType)
|
||||||
*/
|
*/
|
||||||
template<typename FirstType,typename LastType, typename IncrType>
|
|
||||||
auto seq(FirstType f, LastType l, IncrType incr);
|
|
||||||
|
|
||||||
/** \returns an ArithmeticSequence starting at \a f, up (or down) to \a l, and unit increment
|
|
||||||
*
|
|
||||||
* It is essentially an alias to:
|
|
||||||
* \code
|
|
||||||
* seqN(f,l-f+1);
|
|
||||||
* \endcode
|
|
||||||
*
|
|
||||||
* \sa seqN(FirstType,SizeType), seq(FirstType,LastType,IncrType)
|
|
||||||
*/
|
|
||||||
template<typename FirstType,typename LastType>
|
|
||||||
auto seq(FirstType f, LastType l);
|
|
||||||
|
|
||||||
#else // EIGEN_PARSED_BY_DOXYGEN
|
|
||||||
|
|
||||||
#if EIGEN_HAS_CXX11
|
|
||||||
template<typename FirstType,typename LastType>
|
template<typename FirstType,typename LastType>
|
||||||
auto seq(FirstType f, LastType l) -> decltype(seqN(typename internal::cleanup_index_type<FirstType>::type(f),
|
auto seq(FirstType f, LastType l) -> decltype(seqN(typename internal::cleanup_index_type<FirstType>::type(f),
|
||||||
( typename internal::cleanup_index_type<LastType>::type(l)
|
( typename internal::cleanup_index_type<LastType>::type(l)
|
||||||
@@ -211,6 +194,15 @@ auto seq(FirstType f, LastType l) -> decltype(seqN(typename internal::cleanup_in
|
|||||||
-typename internal::cleanup_index_type<FirstType>::type(f)+fix<1>()));
|
-typename internal::cleanup_index_type<FirstType>::type(f)+fix<1>()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** \returns an ArithmeticSequence starting at \a f, up (or down) to \a l, and unit increment
|
||||||
|
*
|
||||||
|
* It is essentially an alias to:
|
||||||
|
* \code
|
||||||
|
* seqN(f,l-f+1);
|
||||||
|
* \endcode
|
||||||
|
*
|
||||||
|
* \sa seqN(FirstType,SizeType), seq(FirstType,LastType,IncrType)
|
||||||
|
*/
|
||||||
template<typename FirstType,typename LastType, typename IncrType>
|
template<typename FirstType,typename LastType, typename IncrType>
|
||||||
auto seq(FirstType f, LastType l, IncrType incr)
|
auto seq(FirstType f, LastType l, IncrType incr)
|
||||||
-> decltype(seqN(typename internal::cleanup_index_type<FirstType>::type(f),
|
-> decltype(seqN(typename internal::cleanup_index_type<FirstType>::type(f),
|
||||||
@@ -317,26 +309,12 @@ seq(const symbolic::BaseExpr<FirstTypeDerived> &f, const symbolic::BaseExpr<Last
|
|||||||
}
|
}
|
||||||
#endif // EIGEN_HAS_CXX11
|
#endif // EIGEN_HAS_CXX11
|
||||||
|
|
||||||
#endif // EIGEN_PARSED_BY_DOXYGEN
|
#if EIGEN_HAS_CXX11
|
||||||
|
|
||||||
|
|
||||||
#if EIGEN_HAS_CXX11 || defined(EIGEN_PARSED_BY_DOXYGEN)
|
|
||||||
/** \cpp11
|
|
||||||
* \returns a symbolic ArithmeticSequence representing the last \a size elements with increment \a incr.
|
|
||||||
*
|
|
||||||
* It is a shortcut for: \code seqN(last-(size-fix<1>)*incr, size, incr) \endcode
|
|
||||||
*
|
|
||||||
* \sa lastN(SizeType), seqN(FirstType,SizeType), seq(FirstType,LastType,IncrType) */
|
|
||||||
template<typename SizeType,typename IncrType>
|
|
||||||
auto lastN(SizeType size, IncrType incr)
|
|
||||||
-> decltype(seqN(Eigen::last-(size-fix<1>())*incr, size, incr))
|
|
||||||
{
|
|
||||||
return seqN(Eigen::last-(size-fix<1>())*incr, size, incr);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \cpp11
|
/** \cpp11
|
||||||
* \returns a symbolic ArithmeticSequence representing the last \a size elements with a unit increment.
|
* \returns a symbolic ArithmeticSequence representing the last \a size elements with a unit increment.
|
||||||
*
|
*
|
||||||
|
* \anchor indexing_lastN
|
||||||
|
*
|
||||||
* It is a shortcut for: \code seq(last+fix<1>-size, last) \endcode
|
* It is a shortcut for: \code seq(last+fix<1>-size, last) \endcode
|
||||||
*
|
*
|
||||||
* \sa lastN(SizeType,IncrType, seqN(FirstType,SizeType), seq(FirstType,LastType) */
|
* \sa lastN(SizeType,IncrType, seqN(FirstType,SizeType), seq(FirstType,LastType) */
|
||||||
@@ -346,6 +324,21 @@ auto lastN(SizeType size)
|
|||||||
{
|
{
|
||||||
return seqN(Eigen::last+fix<1>()-size, size);
|
return seqN(Eigen::last+fix<1>()-size, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** \cpp11
|
||||||
|
* \returns a symbolic ArithmeticSequence representing the last \a size elements with increment \a incr.
|
||||||
|
*
|
||||||
|
* \anchor indexing_lastN_with_incr
|
||||||
|
*
|
||||||
|
* It is a shortcut for: \code seqN(last-(size-fix<1>)*incr, size, incr) \endcode
|
||||||
|
*
|
||||||
|
* \sa lastN(SizeType), seqN(FirstType,SizeType), seq(FirstType,LastType,IncrType) */
|
||||||
|
template<typename SizeType,typename IncrType>
|
||||||
|
auto lastN(SizeType size, IncrType incr)
|
||||||
|
-> decltype(seqN(Eigen::last-(size-fix<1>())*incr, size, incr))
|
||||||
|
{
|
||||||
|
return seqN(Eigen::last-(size-fix<1>())*incr, size, incr);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
|||||||
@@ -157,13 +157,21 @@ class Array
|
|||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
Array& operator=(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable<Scalar>::value)
|
Array& operator=(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable<Scalar>::value)
|
||||||
{
|
{
|
||||||
other.swap(*this);
|
Base::operator=(std::move(other));
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if EIGEN_HAS_CXX11
|
#if EIGEN_HAS_CXX11
|
||||||
/** \copydoc PlainObjectBase(const Scalar& a0, const Scalar& a1, const Scalar& a2, const Scalar& a3, const ArgTypes&... args)
|
/** \brief Construct a row of column vector with fixed size from an arbitrary number of coefficients. \cpp11
|
||||||
|
*
|
||||||
|
* \only_for_vectors
|
||||||
|
*
|
||||||
|
* This constructor is for 1D array or vectors with more than 4 coefficients.
|
||||||
|
* There exists C++98 analogue constructors for fixed-size array/vector having 1, 2, 3, or 4 coefficients.
|
||||||
|
*
|
||||||
|
* \warning To construct a column (resp. row) vector of fixed length, the number of values passed to this
|
||||||
|
* constructor must match the the fixed number of rows (resp. columns) of \c *this.
|
||||||
*
|
*
|
||||||
* Example: \include Array_variadic_ctor_cxx11.cpp
|
* Example: \include Array_variadic_ctor_cxx11.cpp
|
||||||
* Output: \verbinclude Array_variadic_ctor_cxx11.out
|
* Output: \verbinclude Array_variadic_ctor_cxx11.out
|
||||||
@@ -288,8 +296,10 @@ class Array
|
|||||||
: Base(other.derived())
|
: Base(other.derived())
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; }
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); }
|
inline Index innerStride() const EIGEN_NOEXCEPT{ return 1; }
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
|
inline Index outerStride() const EIGEN_NOEXCEPT { return this->innerSize(); }
|
||||||
|
|
||||||
#ifdef EIGEN_ARRAY_PLUGIN
|
#ifdef EIGEN_ARRAY_PLUGIN
|
||||||
#include EIGEN_ARRAY_PLUGIN
|
#include EIGEN_ARRAY_PLUGIN
|
||||||
|
|||||||
@@ -153,8 +153,8 @@ template<typename Derived> class ArrayBase
|
|||||||
// inline void evalTo(Dest& dst) const { dst = matrix(); }
|
// inline void evalTo(Dest& dst) const { dst = matrix(); }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEFAULT_COPY_CONSTRUCTOR(ArrayBase)
|
||||||
ArrayBase() : Base() {}
|
EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(ArrayBase)
|
||||||
|
|
||||||
private:
|
private:
|
||||||
explicit ArrayBase(Index);
|
explicit ArrayBase(Index);
|
||||||
|
|||||||
@@ -60,14 +60,14 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
|
|||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
explicit EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {}
|
explicit EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
inline Index rows() const { return m_expression.rows(); }
|
inline Index rows() const EIGEN_NOEXCEPT { return m_expression.rows(); }
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
inline Index cols() const { return m_expression.cols(); }
|
inline Index cols() const EIGEN_NOEXCEPT { return m_expression.cols(); }
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
inline Index outerStride() const { return m_expression.outerStride(); }
|
inline Index outerStride() const EIGEN_NOEXCEPT { return m_expression.outerStride(); }
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
inline Index innerStride() const { return m_expression.innerStride(); }
|
inline Index innerStride() const EIGEN_NOEXCEPT { return m_expression.innerStride(); }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
|
inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
|
||||||
@@ -158,14 +158,14 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
|
|||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
explicit inline MatrixWrapper(ExpressionType& matrix) : m_expression(matrix) {}
|
explicit inline MatrixWrapper(ExpressionType& matrix) : m_expression(matrix) {}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
inline Index rows() const { return m_expression.rows(); }
|
inline Index rows() const EIGEN_NOEXCEPT { return m_expression.rows(); }
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
inline Index cols() const { return m_expression.cols(); }
|
inline Index cols() const EIGEN_NOEXCEPT { return m_expression.cols(); }
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
inline Index outerStride() const { return m_expression.outerStride(); }
|
inline Index outerStride() const EIGEN_NOEXCEPT { return m_expression.outerStride(); }
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
inline Index innerStride() const { return m_expression.innerStride(); }
|
inline Index innerStride() const EIGEN_NOEXCEPT { return m_expression.innerStride(); }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
|
inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
|
||||||
|
|||||||
@@ -99,7 +99,8 @@ private:
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
enum {
|
enum {
|
||||||
Traversal = (int(MayLinearVectorize) && (LinearPacketSize>InnerPacketSize)) ? int(LinearVectorizedTraversal)
|
Traversal = int(Dst::SizeAtCompileTime) == 0 ? int(AllAtOnceTraversal) // If compile-size is zero, traversing will fail at compile-time.
|
||||||
|
: (int(MayLinearVectorize) && (LinearPacketSize>InnerPacketSize)) ? int(LinearVectorizedTraversal)
|
||||||
: int(MayInnerVectorize) ? int(InnerVectorizedTraversal)
|
: int(MayInnerVectorize) ? int(InnerVectorizedTraversal)
|
||||||
: int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
|
: int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
|
||||||
: int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
|
: int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
|
||||||
@@ -316,6 +317,22 @@ template<typename Kernel,
|
|||||||
int Unrolling = Kernel::AssignmentTraits::Unrolling>
|
int Unrolling = Kernel::AssignmentTraits::Unrolling>
|
||||||
struct dense_assignment_loop;
|
struct dense_assignment_loop;
|
||||||
|
|
||||||
|
/************************
|
||||||
|
***** Special Cases *****
|
||||||
|
************************/
|
||||||
|
|
||||||
|
// Zero-sized assignment is a no-op.
|
||||||
|
template<typename Kernel, int Unrolling>
|
||||||
|
struct dense_assignment_loop<Kernel, AllAtOnceTraversal, Unrolling>
|
||||||
|
{
|
||||||
|
EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel& /*kernel*/)
|
||||||
|
{
|
||||||
|
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
|
||||||
|
EIGEN_STATIC_ASSERT(int(DstXprType::SizeAtCompileTime) == 0,
|
||||||
|
EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
/************************
|
/************************
|
||||||
*** Default traversal ***
|
*** Default traversal ***
|
||||||
************************/
|
************************/
|
||||||
@@ -433,7 +450,7 @@ struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrollin
|
|||||||
|
|
||||||
enum { size = DstXprType::SizeAtCompileTime,
|
enum { size = DstXprType::SizeAtCompileTime,
|
||||||
packetSize =unpacket_traits<PacketType>::size,
|
packetSize =unpacket_traits<PacketType>::size,
|
||||||
alignedSize = (size/packetSize)*packetSize };
|
alignedSize = (int(size)/packetSize)*packetSize };
|
||||||
|
|
||||||
copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, alignedSize>::run(kernel);
|
copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, alignedSize>::run(kernel);
|
||||||
copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, alignedSize, size>::run(kernel);
|
copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, alignedSize, size>::run(kernel);
|
||||||
@@ -572,14 +589,15 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, InnerUnrolling>
|
|||||||
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
|
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
|
||||||
typedef typename Kernel::PacketType PacketType;
|
typedef typename Kernel::PacketType PacketType;
|
||||||
|
|
||||||
enum { size = DstXprType::InnerSizeAtCompileTime,
|
enum { innerSize = DstXprType::InnerSizeAtCompileTime,
|
||||||
packetSize =unpacket_traits<PacketType>::size,
|
packetSize =unpacket_traits<PacketType>::size,
|
||||||
vectorizableSize = (size/packetSize)*packetSize };
|
vectorizableSize = (int(innerSize) / int(packetSize)) * int(packetSize),
|
||||||
|
size = DstXprType::SizeAtCompileTime };
|
||||||
|
|
||||||
for(Index outer = 0; outer < kernel.outerSize(); ++outer)
|
for(Index outer = 0; outer < kernel.outerSize(); ++outer)
|
||||||
{
|
{
|
||||||
copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, vectorizableSize, 0, 0>::run(kernel, outer);
|
copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, vectorizableSize, 0, 0>::run(kernel, outer);
|
||||||
copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, vectorizableSize, size>::run(kernel, outer);
|
copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, vectorizableSize, innerSize>::run(kernel, outer);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@@ -620,15 +638,15 @@ public:
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC Index size() const { return m_dstExpr.size(); }
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index size() const EIGEN_NOEXCEPT { return m_dstExpr.size(); }
|
||||||
EIGEN_DEVICE_FUNC Index innerSize() const { return m_dstExpr.innerSize(); }
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index innerSize() const EIGEN_NOEXCEPT { return m_dstExpr.innerSize(); }
|
||||||
EIGEN_DEVICE_FUNC Index outerSize() const { return m_dstExpr.outerSize(); }
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index outerSize() const EIGEN_NOEXCEPT { return m_dstExpr.outerSize(); }
|
||||||
EIGEN_DEVICE_FUNC Index rows() const { return m_dstExpr.rows(); }
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_dstExpr.rows(); }
|
||||||
EIGEN_DEVICE_FUNC Index cols() const { return m_dstExpr.cols(); }
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_dstExpr.cols(); }
|
||||||
EIGEN_DEVICE_FUNC Index outerStride() const { return m_dstExpr.outerStride(); }
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index outerStride() const EIGEN_NOEXCEPT { return m_dstExpr.outerStride(); }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() { return m_dst; }
|
EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() EIGEN_NOEXCEPT { return m_dst; }
|
||||||
EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const { return m_src; }
|
EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const EIGEN_NOEXCEPT { return m_src; }
|
||||||
|
|
||||||
/// Assign src(row,col) to dst(row,col) through the assignment functor.
|
/// Assign src(row,col) to dst(row,col) through the assignment functor.
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col)
|
||||||
@@ -767,6 +785,16 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType
|
|||||||
dense_assignment_loop<Kernel>::run(kernel);
|
dense_assignment_loop<Kernel>::run(kernel);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Specialization for filling the destination with a constant value.
|
||||||
|
#ifndef EIGEN_GPU_COMPILE_PHASE
|
||||||
|
template<typename DstXprType>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const Eigen::CwiseNullaryOp<Eigen::internal::scalar_constant_op<typename DstXprType::Scalar>, DstXprType>& src, const internal::assign_op<typename DstXprType::Scalar,typename DstXprType::Scalar>& func)
|
||||||
|
{
|
||||||
|
resize_if_allowed(dst, src, func);
|
||||||
|
std::fill_n(dst.data(), dst.size(), src.functor()());
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
template<typename DstXprType, typename SrcXprType>
|
template<typename DstXprType, typename SrcXprType>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -67,7 +67,7 @@ class BandMatrixBase : public EigenBase<Derived>
|
|||||||
* \warning the internal storage must be column major. */
|
* \warning the internal storage must be column major. */
|
||||||
inline Block<CoefficientsType,Dynamic,1> col(Index i)
|
inline Block<CoefficientsType,Dynamic,1> col(Index i)
|
||||||
{
|
{
|
||||||
EIGEN_STATIC_ASSERT((Options&RowMajor)==0,THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES);
|
EIGEN_STATIC_ASSERT((int(Options) & int(RowMajor)) == 0, THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES);
|
||||||
Index start = 0;
|
Index start = 0;
|
||||||
Index len = coeffs().rows();
|
Index len = coeffs().rows();
|
||||||
if (i<=supers())
|
if (i<=supers())
|
||||||
@@ -90,7 +90,7 @@ class BandMatrixBase : public EigenBase<Derived>
|
|||||||
|
|
||||||
template<int Index> struct DiagonalIntReturnType {
|
template<int Index> struct DiagonalIntReturnType {
|
||||||
enum {
|
enum {
|
||||||
ReturnOpposite = (Options&SelfAdjoint) && (((Index)>0 && Supers==0) || ((Index)<0 && Subs==0)),
|
ReturnOpposite = (int(Options) & int(SelfAdjoint)) && (((Index) > 0 && Supers == 0) || ((Index) < 0 && Subs == 0)),
|
||||||
Conjugate = ReturnOpposite && NumTraits<Scalar>::IsComplex,
|
Conjugate = ReturnOpposite && NumTraits<Scalar>::IsComplex,
|
||||||
ActualIndex = ReturnOpposite ? -Index : Index,
|
ActualIndex = ReturnOpposite ? -Index : Index,
|
||||||
DiagonalSize = (RowsAtCompileTime==Dynamic || ColsAtCompileTime==Dynamic)
|
DiagonalSize = (RowsAtCompileTime==Dynamic || ColsAtCompileTime==Dynamic)
|
||||||
@@ -192,7 +192,7 @@ struct traits<BandMatrix<_Scalar,_Rows,_Cols,_Supers,_Subs,_Options> >
|
|||||||
Options = _Options,
|
Options = _Options,
|
||||||
DataRowsAtCompileTime = ((Supers!=Dynamic) && (Subs!=Dynamic)) ? 1 + Supers + Subs : Dynamic
|
DataRowsAtCompileTime = ((Supers!=Dynamic) && (Subs!=Dynamic)) ? 1 + Supers + Subs : Dynamic
|
||||||
};
|
};
|
||||||
typedef Matrix<Scalar,DataRowsAtCompileTime,ColsAtCompileTime,Options&RowMajor?RowMajor:ColMajor> CoefficientsType;
|
typedef Matrix<Scalar, DataRowsAtCompileTime, ColsAtCompileTime, int(Options) & int(RowMajor) ? RowMajor : ColMajor> CoefficientsType;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename _Scalar, int Rows, int Cols, int Supers, int Subs, int Options>
|
template<typename _Scalar, int Rows, int Cols, int Supers, int Subs, int Options>
|
||||||
@@ -211,16 +211,16 @@ class BandMatrix : public BandMatrixBase<BandMatrix<_Scalar,Rows,Cols,Supers,Sub
|
|||||||
}
|
}
|
||||||
|
|
||||||
/** \returns the number of columns */
|
/** \returns the number of columns */
|
||||||
inline Index rows() const { return m_rows.value(); }
|
inline EIGEN_CONSTEXPR Index rows() const { return m_rows.value(); }
|
||||||
|
|
||||||
/** \returns the number of rows */
|
/** \returns the number of rows */
|
||||||
inline Index cols() const { return m_coeffs.cols(); }
|
inline EIGEN_CONSTEXPR Index cols() const { return m_coeffs.cols(); }
|
||||||
|
|
||||||
/** \returns the number of super diagonals */
|
/** \returns the number of super diagonals */
|
||||||
inline Index supers() const { return m_supers.value(); }
|
inline EIGEN_CONSTEXPR Index supers() const { return m_supers.value(); }
|
||||||
|
|
||||||
/** \returns the number of sub diagonals */
|
/** \returns the number of sub diagonals */
|
||||||
inline Index subs() const { return m_subs.value(); }
|
inline EIGEN_CONSTEXPR Index subs() const { return m_subs.value(); }
|
||||||
|
|
||||||
inline const CoefficientsType& coeffs() const { return m_coeffs; }
|
inline const CoefficientsType& coeffs() const { return m_coeffs; }
|
||||||
inline CoefficientsType& coeffs() { return m_coeffs; }
|
inline CoefficientsType& coeffs() { return m_coeffs; }
|
||||||
@@ -275,16 +275,16 @@ class BandMatrixWrapper : public BandMatrixBase<BandMatrixWrapper<_CoefficientsT
|
|||||||
}
|
}
|
||||||
|
|
||||||
/** \returns the number of columns */
|
/** \returns the number of columns */
|
||||||
inline Index rows() const { return m_rows.value(); }
|
inline EIGEN_CONSTEXPR Index rows() const { return m_rows.value(); }
|
||||||
|
|
||||||
/** \returns the number of rows */
|
/** \returns the number of rows */
|
||||||
inline Index cols() const { return m_coeffs.cols(); }
|
inline EIGEN_CONSTEXPR Index cols() const { return m_coeffs.cols(); }
|
||||||
|
|
||||||
/** \returns the number of super diagonals */
|
/** \returns the number of super diagonals */
|
||||||
inline Index supers() const { return m_supers.value(); }
|
inline EIGEN_CONSTEXPR Index supers() const { return m_supers.value(); }
|
||||||
|
|
||||||
/** \returns the number of sub diagonals */
|
/** \returns the number of sub diagonals */
|
||||||
inline Index subs() const { return m_subs.value(); }
|
inline EIGEN_CONSTEXPR Index subs() const { return m_subs.value(); }
|
||||||
|
|
||||||
inline const CoefficientsType& coeffs() const { return m_coeffs; }
|
inline const CoefficientsType& coeffs() const { return m_coeffs; }
|
||||||
|
|
||||||
|
|||||||
@@ -260,19 +260,19 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<int LoadMode>
|
template<int LoadMode>
|
||||||
inline PacketScalar packet(Index rowId, Index colId) const
|
EIGEN_DEVICE_FUNC inline PacketScalar packet(Index rowId, Index colId) const
|
||||||
{
|
{
|
||||||
return m_xpr.template packet<Unaligned>(rowId + m_startRow.value(), colId + m_startCol.value());
|
return m_xpr.template packet<Unaligned>(rowId + m_startRow.value(), colId + m_startCol.value());
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int LoadMode>
|
template<int LoadMode>
|
||||||
inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
|
EIGEN_DEVICE_FUNC inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
|
||||||
{
|
{
|
||||||
m_xpr.template writePacket<Unaligned>(rowId + m_startRow.value(), colId + m_startCol.value(), val);
|
m_xpr.template writePacket<Unaligned>(rowId + m_startRow.value(), colId + m_startCol.value(), val);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int LoadMode>
|
template<int LoadMode>
|
||||||
inline PacketScalar packet(Index index) const
|
EIGEN_DEVICE_FUNC inline PacketScalar packet(Index index) const
|
||||||
{
|
{
|
||||||
return m_xpr.template packet<Unaligned>
|
return m_xpr.template packet<Unaligned>
|
||||||
(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
|
(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
|
||||||
@@ -280,7 +280,7 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<int LoadMode>
|
template<int LoadMode>
|
||||||
inline void writePacket(Index index, const PacketScalar& val)
|
EIGEN_DEVICE_FUNC inline void writePacket(Index index, const PacketScalar& val)
|
||||||
{
|
{
|
||||||
m_xpr.template writePacket<Unaligned>
|
m_xpr.template writePacket<Unaligned>
|
||||||
(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
|
(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
|
||||||
@@ -303,14 +303,14 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
|
|||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
XprType& nestedExpression() { return m_xpr; }
|
XprType& nestedExpression() { return m_xpr; }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
|
||||||
StorageIndex startRow() const
|
StorageIndex startRow() const EIGEN_NOEXCEPT
|
||||||
{
|
{
|
||||||
return m_startRow.value();
|
return m_startRow.value();
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
|
||||||
StorageIndex startCol() const
|
StorageIndex startCol() const EIGEN_NOEXCEPT
|
||||||
{
|
{
|
||||||
return m_startCol.value();
|
return m_startCol.value();
|
||||||
}
|
}
|
||||||
@@ -334,6 +334,17 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
|
|||||||
enum {
|
enum {
|
||||||
XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0
|
XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/** \internal Returns base+offset (unless base is null, in which case returns null).
|
||||||
|
* Adding an offset to nullptr is undefined behavior, so we must avoid it.
|
||||||
|
*/
|
||||||
|
template <typename Scalar>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE
|
||||||
|
static Scalar* add_to_nullable_pointer(Scalar* base, Index offset)
|
||||||
|
{
|
||||||
|
return base != NULL ? base+offset : NULL;
|
||||||
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
typedef MapBase<BlockType> Base;
|
typedef MapBase<BlockType> Base;
|
||||||
@@ -344,8 +355,9 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
|
|||||||
*/
|
*/
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
BlockImpl_dense(XprType& xpr, Index i)
|
BlockImpl_dense(XprType& xpr, Index i)
|
||||||
: Base(xpr.data() + i * ( ((BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) && (!XprTypeIsRowMajor))
|
: Base((BlockRows == 0 || BlockCols == 0) ? NULL : add_to_nullable_pointer(xpr.data(),
|
||||||
|| ((BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) && ( XprTypeIsRowMajor)) ? xpr.innerStride() : xpr.outerStride()),
|
i * ( ((BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) && (!XprTypeIsRowMajor))
|
||||||
|
|| ((BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) && ( XprTypeIsRowMajor)) ? xpr.innerStride() : xpr.outerStride())),
|
||||||
BlockRows==1 ? 1 : xpr.rows(),
|
BlockRows==1 ? 1 : xpr.rows(),
|
||||||
BlockCols==1 ? 1 : xpr.cols()),
|
BlockCols==1 ? 1 : xpr.cols()),
|
||||||
m_xpr(xpr),
|
m_xpr(xpr),
|
||||||
@@ -359,7 +371,8 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
|
|||||||
*/
|
*/
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
BlockImpl_dense(XprType& xpr, Index startRow, Index startCol)
|
BlockImpl_dense(XprType& xpr, Index startRow, Index startCol)
|
||||||
: Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol)),
|
: Base((BlockRows == 0 || BlockCols == 0) ? NULL : add_to_nullable_pointer(xpr.data(),
|
||||||
|
xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol))),
|
||||||
m_xpr(xpr), m_startRow(startRow), m_startCol(startCol)
|
m_xpr(xpr), m_startRow(startRow), m_startCol(startCol)
|
||||||
{
|
{
|
||||||
init();
|
init();
|
||||||
@@ -371,14 +384,16 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
|
|||||||
BlockImpl_dense(XprType& xpr,
|
BlockImpl_dense(XprType& xpr,
|
||||||
Index startRow, Index startCol,
|
Index startRow, Index startCol,
|
||||||
Index blockRows, Index blockCols)
|
Index blockRows, Index blockCols)
|
||||||
: Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol), blockRows, blockCols),
|
: Base((blockRows == 0 || blockCols == 0) ? NULL : add_to_nullable_pointer(xpr.data(),
|
||||||
|
xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol)),
|
||||||
|
blockRows, blockCols),
|
||||||
m_xpr(xpr), m_startRow(startRow), m_startCol(startCol)
|
m_xpr(xpr), m_startRow(startRow), m_startCol(startCol)
|
||||||
{
|
{
|
||||||
init();
|
init();
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
const typename internal::remove_all<XprTypeNested>::type& nestedExpression() const
|
const typename internal::remove_all<XprTypeNested>::type& nestedExpression() const EIGEN_NOEXCEPT
|
||||||
{
|
{
|
||||||
return m_xpr;
|
return m_xpr;
|
||||||
}
|
}
|
||||||
@@ -387,8 +402,8 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
|
|||||||
XprType& nestedExpression() { return m_xpr; }
|
XprType& nestedExpression() { return m_xpr; }
|
||||||
|
|
||||||
/** \sa MapBase::innerStride() */
|
/** \sa MapBase::innerStride() */
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
|
||||||
Index innerStride() const
|
Index innerStride() const EIGEN_NOEXCEPT
|
||||||
{
|
{
|
||||||
return internal::traits<BlockType>::HasSameStorageOrderAsXprType
|
return internal::traits<BlockType>::HasSameStorageOrderAsXprType
|
||||||
? m_xpr.innerStride()
|
? m_xpr.innerStride()
|
||||||
@@ -396,23 +411,19 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
|
|||||||
}
|
}
|
||||||
|
|
||||||
/** \sa MapBase::outerStride() */
|
/** \sa MapBase::outerStride() */
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
|
||||||
Index outerStride() const
|
Index outerStride() const EIGEN_NOEXCEPT
|
||||||
{
|
{
|
||||||
return m_outerStride;
|
return internal::traits<BlockType>::HasSameStorageOrderAsXprType
|
||||||
|
? m_xpr.outerStride()
|
||||||
|
: m_xpr.innerStride();
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
|
||||||
StorageIndex startRow() const
|
StorageIndex startRow() const EIGEN_NOEXCEPT { return m_startRow.value(); }
|
||||||
{
|
|
||||||
return m_startRow.value();
|
|
||||||
}
|
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
|
||||||
StorageIndex startCol() const
|
StorageIndex startCol() const EIGEN_NOEXCEPT { return m_startCol.value(); }
|
||||||
{
|
|
||||||
return m_startCol.value();
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifndef __SUNPRO_CC
|
#ifndef __SUNPRO_CC
|
||||||
// FIXME sunstudio is not friendly with the above friend...
|
// FIXME sunstudio is not friendly with the above friend...
|
||||||
|
|||||||
@@ -14,56 +14,58 @@ namespace Eigen {
|
|||||||
|
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
template<typename Derived, int UnrollCount, int Rows>
|
template<typename Derived, int UnrollCount, int InnerSize>
|
||||||
struct all_unroller
|
struct all_unroller
|
||||||
{
|
{
|
||||||
enum {
|
enum {
|
||||||
col = (UnrollCount-1) / Rows,
|
IsRowMajor = (int(Derived::Flags) & int(RowMajor)),
|
||||||
row = (UnrollCount-1) % Rows
|
i = (UnrollCount-1) / InnerSize,
|
||||||
|
j = (UnrollCount-1) % InnerSize
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline bool run(const Derived &mat)
|
EIGEN_DEVICE_FUNC static inline bool run(const Derived &mat)
|
||||||
{
|
{
|
||||||
return all_unroller<Derived, UnrollCount-1, Rows>::run(mat) && mat.coeff(row, col);
|
return all_unroller<Derived, UnrollCount-1, InnerSize>::run(mat) && mat.coeff(IsRowMajor ? i : j, IsRowMajor ? j : i);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename Derived, int Rows>
|
template<typename Derived, int InnerSize>
|
||||||
struct all_unroller<Derived, 0, Rows>
|
struct all_unroller<Derived, 0, InnerSize>
|
||||||
{
|
{
|
||||||
static inline bool run(const Derived &/*mat*/) { return true; }
|
EIGEN_DEVICE_FUNC static inline bool run(const Derived &/*mat*/) { return true; }
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename Derived, int Rows>
|
template<typename Derived, int InnerSize>
|
||||||
struct all_unroller<Derived, Dynamic, Rows>
|
struct all_unroller<Derived, Dynamic, InnerSize>
|
||||||
{
|
{
|
||||||
static inline bool run(const Derived &) { return false; }
|
EIGEN_DEVICE_FUNC static inline bool run(const Derived &) { return false; }
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename Derived, int UnrollCount, int Rows>
|
template<typename Derived, int UnrollCount, int InnerSize>
|
||||||
struct any_unroller
|
struct any_unroller
|
||||||
{
|
{
|
||||||
enum {
|
enum {
|
||||||
col = (UnrollCount-1) / Rows,
|
IsRowMajor = (int(Derived::Flags) & int(RowMajor)),
|
||||||
row = (UnrollCount-1) % Rows
|
i = (UnrollCount-1) / InnerSize,
|
||||||
|
j = (UnrollCount-1) % InnerSize
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline bool run(const Derived &mat)
|
EIGEN_DEVICE_FUNC static inline bool run(const Derived &mat)
|
||||||
{
|
{
|
||||||
return any_unroller<Derived, UnrollCount-1, Rows>::run(mat) || mat.coeff(row, col);
|
return any_unroller<Derived, UnrollCount-1, InnerSize>::run(mat) || mat.coeff(IsRowMajor ? i : j, IsRowMajor ? j : i);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename Derived, int Rows>
|
template<typename Derived, int InnerSize>
|
||||||
struct any_unroller<Derived, 0, Rows>
|
struct any_unroller<Derived, 0, InnerSize>
|
||||||
{
|
{
|
||||||
static inline bool run(const Derived & /*mat*/) { return false; }
|
EIGEN_DEVICE_FUNC static inline bool run(const Derived & /*mat*/) { return false; }
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename Derived, int Rows>
|
template<typename Derived, int InnerSize>
|
||||||
struct any_unroller<Derived, Dynamic, Rows>
|
struct any_unroller<Derived, Dynamic, InnerSize>
|
||||||
{
|
{
|
||||||
static inline bool run(const Derived &) { return false; }
|
EIGEN_DEVICE_FUNC static inline bool run(const Derived &) { return false; }
|
||||||
};
|
};
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
@@ -81,16 +83,16 @@ EIGEN_DEVICE_FUNC inline bool DenseBase<Derived>::all() const
|
|||||||
typedef internal::evaluator<Derived> Evaluator;
|
typedef internal::evaluator<Derived> Evaluator;
|
||||||
enum {
|
enum {
|
||||||
unroll = SizeAtCompileTime != Dynamic
|
unroll = SizeAtCompileTime != Dynamic
|
||||||
&& SizeAtCompileTime * (Evaluator::CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT
|
&& SizeAtCompileTime * (int(Evaluator::CoeffReadCost) + int(NumTraits<Scalar>::AddCost)) <= EIGEN_UNROLLING_LIMIT
|
||||||
};
|
};
|
||||||
Evaluator evaluator(derived());
|
Evaluator evaluator(derived());
|
||||||
if(unroll)
|
if(unroll)
|
||||||
return internal::all_unroller<Evaluator, unroll ? int(SizeAtCompileTime) : Dynamic, internal::traits<Derived>::RowsAtCompileTime>::run(evaluator);
|
return internal::all_unroller<Evaluator, unroll ? int(SizeAtCompileTime) : Dynamic, InnerSizeAtCompileTime>::run(evaluator);
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for(Index j = 0; j < cols(); ++j)
|
for(Index i = 0; i < derived().outerSize(); ++i)
|
||||||
for(Index i = 0; i < rows(); ++i)
|
for(Index j = 0; j < derived().innerSize(); ++j)
|
||||||
if (!evaluator.coeff(i, j)) return false;
|
if (!evaluator.coeff(IsRowMajor ? i : j, IsRowMajor ? j : i)) return false;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -105,16 +107,16 @@ EIGEN_DEVICE_FUNC inline bool DenseBase<Derived>::any() const
|
|||||||
typedef internal::evaluator<Derived> Evaluator;
|
typedef internal::evaluator<Derived> Evaluator;
|
||||||
enum {
|
enum {
|
||||||
unroll = SizeAtCompileTime != Dynamic
|
unroll = SizeAtCompileTime != Dynamic
|
||||||
&& SizeAtCompileTime * (Evaluator::CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT
|
&& SizeAtCompileTime * (int(Evaluator::CoeffReadCost) + int(NumTraits<Scalar>::AddCost)) <= EIGEN_UNROLLING_LIMIT
|
||||||
};
|
};
|
||||||
Evaluator evaluator(derived());
|
Evaluator evaluator(derived());
|
||||||
if(unroll)
|
if(unroll)
|
||||||
return internal::any_unroller<Evaluator, unroll ? int(SizeAtCompileTime) : Dynamic, internal::traits<Derived>::RowsAtCompileTime>::run(evaluator);
|
return internal::any_unroller<Evaluator, unroll ? int(SizeAtCompileTime) : Dynamic, InnerSizeAtCompileTime>::run(evaluator);
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for(Index j = 0; j < cols(); ++j)
|
for(Index i = 0; i < derived().outerSize(); ++i)
|
||||||
for(Index i = 0; i < rows(); ++i)
|
for(Index j = 0; j < derived().innerSize(); ++j)
|
||||||
if (evaluator.coeff(i, j)) return true;
|
if (evaluator.coeff(IsRowMajor ? i : j, IsRowMajor ? j : i)) return true;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -33,6 +33,8 @@ struct CommaInitializer
|
|||||||
inline CommaInitializer(XprType& xpr, const Scalar& s)
|
inline CommaInitializer(XprType& xpr, const Scalar& s)
|
||||||
: m_xpr(xpr), m_row(0), m_col(1), m_currentBlockRows(1)
|
: m_xpr(xpr), m_row(0), m_col(1), m_currentBlockRows(1)
|
||||||
{
|
{
|
||||||
|
eigen_assert(m_xpr.rows() > 0 && m_xpr.cols() > 0
|
||||||
|
&& "Cannot comma-initialize a 0x0 matrix (operator<<)");
|
||||||
m_xpr.coeffRef(0,0) = s;
|
m_xpr.coeffRef(0,0) = s;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -41,6 +43,8 @@ struct CommaInitializer
|
|||||||
inline CommaInitializer(XprType& xpr, const DenseBase<OtherDerived>& other)
|
inline CommaInitializer(XprType& xpr, const DenseBase<OtherDerived>& other)
|
||||||
: m_xpr(xpr), m_row(0), m_col(other.cols()), m_currentBlockRows(other.rows())
|
: m_xpr(xpr), m_row(0), m_col(other.cols()), m_currentBlockRows(other.rows())
|
||||||
{
|
{
|
||||||
|
eigen_assert(m_xpr.rows() >= other.rows() && m_xpr.cols() >= other.cols()
|
||||||
|
&& "Cannot comma-initialize a 0x0 matrix (operator<<)");
|
||||||
m_xpr.block(0, 0, other.rows(), other.cols()) = other;
|
m_xpr.block(0, 0, other.rows(), other.cols()) = other;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -143,8 +143,8 @@ public:
|
|||||||
#endif
|
#endif
|
||||||
eigen_internal_assert(outerStride==OuterStride);
|
eigen_internal_assert(outerStride==OuterStride);
|
||||||
}
|
}
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
|
||||||
Index outerStride() const { return OuterStride; }
|
Index outerStride() const EIGEN_NOEXCEPT { return OuterStride; }
|
||||||
const Scalar *data;
|
const Scalar *data;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -561,7 +561,7 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased >
|
|||||||
typedef CwiseUnaryOp<UnaryOp, ArgType> XprType;
|
typedef CwiseUnaryOp<UnaryOp, ArgType> XprType;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost,
|
CoeffReadCost = int(evaluator<ArgType>::CoeffReadCost) + int(functor_traits<UnaryOp>::Cost),
|
||||||
|
|
||||||
Flags = evaluator<ArgType>::Flags
|
Flags = evaluator<ArgType>::Flags
|
||||||
& (HereditaryBits | LinearAccessBit | (functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0)),
|
& (HereditaryBits | LinearAccessBit | (functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0)),
|
||||||
@@ -606,13 +606,13 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased >
|
|||||||
protected:
|
protected:
|
||||||
|
|
||||||
// this helper permits to completely eliminate the functor if it is empty
|
// this helper permits to completely eliminate the functor if it is empty
|
||||||
class Data : private UnaryOp
|
struct Data
|
||||||
{
|
{
|
||||||
public:
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
Data(const XprType& xpr) : UnaryOp(xpr.functor()), argImpl(xpr.nestedExpression()) {}
|
Data(const XprType& xpr) : op(xpr.functor()), argImpl(xpr.nestedExpression()) {}
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
const UnaryOp& func() const { return static_cast<const UnaryOp&>(*this); }
|
const UnaryOp& func() const { return op; }
|
||||||
|
UnaryOp op;
|
||||||
evaluator<ArgType> argImpl;
|
evaluator<ArgType> argImpl;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -639,7 +639,7 @@ struct ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3>, IndexBased
|
|||||||
typedef CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> XprType;
|
typedef CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> XprType;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
CoeffReadCost = evaluator<Arg1>::CoeffReadCost + evaluator<Arg2>::CoeffReadCost + evaluator<Arg3>::CoeffReadCost + functor_traits<TernaryOp>::Cost,
|
CoeffReadCost = int(evaluator<Arg1>::CoeffReadCost) + int(evaluator<Arg2>::CoeffReadCost) + int(evaluator<Arg3>::CoeffReadCost) + int(functor_traits<TernaryOp>::Cost),
|
||||||
|
|
||||||
Arg1Flags = evaluator<Arg1>::Flags,
|
Arg1Flags = evaluator<Arg1>::Flags,
|
||||||
Arg2Flags = evaluator<Arg2>::Flags,
|
Arg2Flags = evaluator<Arg2>::Flags,
|
||||||
@@ -700,12 +700,13 @@ struct ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3>, IndexBased
|
|||||||
|
|
||||||
protected:
|
protected:
|
||||||
// this helper permits to completely eliminate the functor if it is empty
|
// this helper permits to completely eliminate the functor if it is empty
|
||||||
struct Data : private TernaryOp
|
struct Data
|
||||||
{
|
{
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
Data(const XprType& xpr) : TernaryOp(xpr.functor()), arg1Impl(xpr.arg1()), arg2Impl(xpr.arg2()), arg3Impl(xpr.arg3()) {}
|
Data(const XprType& xpr) : op(xpr.functor()), arg1Impl(xpr.arg1()), arg2Impl(xpr.arg2()), arg3Impl(xpr.arg3()) {}
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
const TernaryOp& func() const { return static_cast<const TernaryOp&>(*this); }
|
const TernaryOp& func() const { return op; }
|
||||||
|
TernaryOp op;
|
||||||
evaluator<Arg1> arg1Impl;
|
evaluator<Arg1> arg1Impl;
|
||||||
evaluator<Arg2> arg2Impl;
|
evaluator<Arg2> arg2Impl;
|
||||||
evaluator<Arg3> arg3Impl;
|
evaluator<Arg3> arg3Impl;
|
||||||
@@ -735,7 +736,7 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase
|
|||||||
typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
|
typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
|
CoeffReadCost = int(evaluator<Lhs>::CoeffReadCost) + int(evaluator<Rhs>::CoeffReadCost) + int(functor_traits<BinaryOp>::Cost),
|
||||||
|
|
||||||
LhsFlags = evaluator<Lhs>::Flags,
|
LhsFlags = evaluator<Lhs>::Flags,
|
||||||
RhsFlags = evaluator<Rhs>::Flags,
|
RhsFlags = evaluator<Rhs>::Flags,
|
||||||
@@ -793,12 +794,13 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase
|
|||||||
protected:
|
protected:
|
||||||
|
|
||||||
// this helper permits to completely eliminate the functor if it is empty
|
// this helper permits to completely eliminate the functor if it is empty
|
||||||
struct Data : private BinaryOp
|
struct Data
|
||||||
{
|
{
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
Data(const XprType& xpr) : BinaryOp(xpr.functor()), lhsImpl(xpr.lhs()), rhsImpl(xpr.rhs()) {}
|
Data(const XprType& xpr) : op(xpr.functor()), lhsImpl(xpr.lhs()), rhsImpl(xpr.rhs()) {}
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
const BinaryOp& func() const { return static_cast<const BinaryOp&>(*this); }
|
const BinaryOp& func() const { return op; }
|
||||||
|
BinaryOp op;
|
||||||
evaluator<Lhs> lhsImpl;
|
evaluator<Lhs> lhsImpl;
|
||||||
evaluator<Rhs> rhsImpl;
|
evaluator<Rhs> rhsImpl;
|
||||||
};
|
};
|
||||||
@@ -815,7 +817,7 @@ struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType>, IndexBased>
|
|||||||
typedef CwiseUnaryView<UnaryOp, ArgType> XprType;
|
typedef CwiseUnaryView<UnaryOp, ArgType> XprType;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost,
|
CoeffReadCost = int(evaluator<ArgType>::CoeffReadCost) + int(functor_traits<UnaryOp>::Cost),
|
||||||
|
|
||||||
Flags = (evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit)),
|
Flags = (evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit)),
|
||||||
|
|
||||||
@@ -858,12 +860,13 @@ struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType>, IndexBased>
|
|||||||
protected:
|
protected:
|
||||||
|
|
||||||
// this helper permits to completely eliminate the functor if it is empty
|
// this helper permits to completely eliminate the functor if it is empty
|
||||||
struct Data : private UnaryOp
|
struct Data
|
||||||
{
|
{
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
Data(const XprType& xpr) : UnaryOp(xpr.functor()), argImpl(xpr.nestedExpression()) {}
|
Data(const XprType& xpr) : op(xpr.functor()), argImpl(xpr.nestedExpression()) {}
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
const UnaryOp& func() const { return static_cast<const UnaryOp&>(*this); }
|
const UnaryOp& func() const { return op; }
|
||||||
|
UnaryOp op;
|
||||||
evaluator<ArgType> argImpl;
|
evaluator<ArgType> argImpl;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -956,10 +959,14 @@ struct mapbase_evaluator : evaluator_base<Derived>
|
|||||||
internal::pstoret<Scalar, PacketType, StoreMode>(m_data + index * m_innerStride.value(), x);
|
internal::pstoret<Scalar, PacketType, StoreMode>(m_data + index * m_innerStride.value(), x);
|
||||||
}
|
}
|
||||||
protected:
|
protected:
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
|
||||||
Index rowStride() const { return XprType::IsRowMajor ? m_outerStride.value() : m_innerStride.value(); }
|
Index rowStride() const EIGEN_NOEXCEPT {
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
return XprType::IsRowMajor ? m_outerStride.value() : m_innerStride.value();
|
||||||
Index colStride() const { return XprType::IsRowMajor ? m_innerStride.value() : m_outerStride.value(); }
|
}
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
|
||||||
|
Index colStride() const EIGEN_NOEXCEPT {
|
||||||
|
return XprType::IsRowMajor ? m_innerStride.value() : m_outerStride.value();
|
||||||
|
}
|
||||||
|
|
||||||
PointerType m_data;
|
PointerType m_data;
|
||||||
const internal::variable_if_dynamic<Index, XprType::InnerStrideAtCompileTime> m_innerStride;
|
const internal::variable_if_dynamic<Index, XprType::InnerStrideAtCompileTime> m_innerStride;
|
||||||
@@ -1648,8 +1655,10 @@ protected:
|
|||||||
const internal::variable_if_dynamicindex<Index, XprType::DiagIndex> m_index;
|
const internal::variable_if_dynamicindex<Index, XprType::DiagIndex> m_index;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value() > 0 ? 0 : -m_index.value(); }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value() > 0 ? m_index.value() : 0; }
|
Index rowOffset() const { return m_index.value() > 0 ? 0 : -m_index.value(); }
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
|
||||||
|
Index colOffset() const { return m_index.value() > 0 ? m_index.value() : 0; }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@@ -1686,12 +1695,12 @@ class EvalToTemp
|
|||||||
return m_arg;
|
return m_arg;
|
||||||
}
|
}
|
||||||
|
|
||||||
Index rows() const
|
EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT
|
||||||
{
|
{
|
||||||
return m_arg.rows();
|
return m_arg.rows();
|
||||||
}
|
}
|
||||||
|
|
||||||
Index cols() const
|
EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT
|
||||||
{
|
{
|
||||||
return m_arg.cols();
|
return m_arg.cols();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -102,7 +102,7 @@ class CwiseBinaryOp :
|
|||||||
|
|
||||||
#if EIGEN_COMP_MSVC && EIGEN_HAS_CXX11
|
#if EIGEN_COMP_MSVC && EIGEN_HAS_CXX11
|
||||||
//Required for Visual Studio or the Copy constructor will probably not get inlined!
|
//Required for Visual Studio or the Copy constructor will probably not get inlined!
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_STRONG_INLINE
|
||||||
CwiseBinaryOp(const CwiseBinaryOp<BinaryOp,LhsType,RhsType>&) = default;
|
CwiseBinaryOp(const CwiseBinaryOp<BinaryOp,LhsType,RhsType>&) = default;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -116,21 +116,15 @@ class CwiseBinaryOp :
|
|||||||
eigen_assert(aLhs.rows() == aRhs.rows() && aLhs.cols() == aRhs.cols());
|
eigen_assert(aLhs.rows() == aRhs.rows() && aLhs.cols() == aRhs.cols());
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
|
||||||
Index rows() const {
|
Index rows() const EIGEN_NOEXCEPT {
|
||||||
// return the fixed size type if available to enable compile time optimizations
|
// return the fixed size type if available to enable compile time optimizations
|
||||||
if (internal::traits<typename internal::remove_all<LhsNested>::type>::RowsAtCompileTime==Dynamic)
|
return internal::traits<typename internal::remove_all<LhsNested>::type>::RowsAtCompileTime==Dynamic ? m_rhs.rows() : m_lhs.rows();
|
||||||
return m_rhs.rows();
|
|
||||||
else
|
|
||||||
return m_lhs.rows();
|
|
||||||
}
|
}
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
|
||||||
Index cols() const {
|
Index cols() const EIGEN_NOEXCEPT {
|
||||||
// return the fixed size type if available to enable compile time optimizations
|
// return the fixed size type if available to enable compile time optimizations
|
||||||
if (internal::traits<typename internal::remove_all<LhsNested>::type>::ColsAtCompileTime==Dynamic)
|
return internal::traits<typename internal::remove_all<LhsNested>::type>::ColsAtCompileTime==Dynamic ? m_rhs.cols() : m_lhs.cols();
|
||||||
return m_rhs.cols();
|
|
||||||
else
|
|
||||||
return m_lhs.cols();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** \returns the left hand side nested expression */
|
/** \returns the left hand side nested expression */
|
||||||
|
|||||||
@@ -74,10 +74,10 @@ class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp
|
|||||||
&& (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols));
|
&& (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols));
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
|
||||||
EIGEN_STRONG_INLINE Index rows() const { return m_rows.value(); }
|
Index rows() const { return m_rows.value(); }
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
|
||||||
EIGEN_STRONG_INLINE Index cols() const { return m_cols.value(); }
|
Index cols() const { return m_cols.value(); }
|
||||||
|
|
||||||
/** \returns the functor representing the nullary operation */
|
/** \returns the functor representing the nullary operation */
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
@@ -292,7 +292,7 @@ DenseBase<Derived>::LinSpaced(Index size, const Scalar& low, const Scalar& high)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \copydoc DenseBase::LinSpaced(Index, const Scalar&, const Scalar&)
|
* \copydoc DenseBase::LinSpaced(Index, const DenseBase::Scalar&, const DenseBase::Scalar&)
|
||||||
* Special version for fixed size types which does not require the size parameter.
|
* Special version for fixed size types which does not require the size parameter.
|
||||||
*/
|
*/
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
@@ -383,6 +383,33 @@ PlainObjectBase<Derived>::setConstant(Index rows, Index cols, const Scalar& val)
|
|||||||
return setConstant(val);
|
return setConstant(val);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Resizes to the given size, changing only the number of columns, and sets all
|
||||||
|
* coefficients in this expression to the given value \a val. For the parameter
|
||||||
|
* of type NoChange_t, just pass the special value \c NoChange.
|
||||||
|
*
|
||||||
|
* \sa MatrixBase::setConstant(const Scalar&), setConstant(Index,const Scalar&), class CwiseNullaryOp, MatrixBase::Constant(const Scalar&)
|
||||||
|
*/
|
||||||
|
template<typename Derived>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
|
||||||
|
PlainObjectBase<Derived>::setConstant(NoChange_t, Index cols, const Scalar& val)
|
||||||
|
{
|
||||||
|
return setConstant(rows(), cols, val);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Resizes to the given size, changing only the number of rows, and sets all
|
||||||
|
* coefficients in this expression to the given value \a val. For the parameter
|
||||||
|
* of type NoChange_t, just pass the special value \c NoChange.
|
||||||
|
*
|
||||||
|
* \sa MatrixBase::setConstant(const Scalar&), setConstant(Index,const Scalar&), class CwiseNullaryOp, MatrixBase::Constant(const Scalar&)
|
||||||
|
*/
|
||||||
|
template<typename Derived>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
|
||||||
|
PlainObjectBase<Derived>::setConstant(Index rows, NoChange_t, const Scalar& val)
|
||||||
|
{
|
||||||
|
return setConstant(rows, cols(), val);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Sets a linearly spaced vector.
|
* \brief Sets a linearly spaced vector.
|
||||||
*
|
*
|
||||||
@@ -556,6 +583,32 @@ PlainObjectBase<Derived>::setZero(Index rows, Index cols)
|
|||||||
return setConstant(Scalar(0));
|
return setConstant(Scalar(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Resizes to the given size, changing only the number of columns, and sets all
|
||||||
|
* coefficients in this expression to zero. For the parameter of type NoChange_t,
|
||||||
|
* just pass the special value \c NoChange.
|
||||||
|
*
|
||||||
|
* \sa DenseBase::setZero(), setZero(Index), setZero(Index, Index), setZero(Index, NoChange_t), class CwiseNullaryOp, DenseBase::Zero()
|
||||||
|
*/
|
||||||
|
template<typename Derived>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
|
||||||
|
PlainObjectBase<Derived>::setZero(NoChange_t, Index cols)
|
||||||
|
{
|
||||||
|
return setZero(rows(), cols);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Resizes to the given size, changing only the number of rows, and sets all
|
||||||
|
* coefficients in this expression to zero. For the parameter of type NoChange_t,
|
||||||
|
* just pass the special value \c NoChange.
|
||||||
|
*
|
||||||
|
* \sa DenseBase::setZero(), setZero(Index), setZero(Index, Index), setZero(NoChange_t, Index), class CwiseNullaryOp, DenseBase::Zero()
|
||||||
|
*/
|
||||||
|
template<typename Derived>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
|
||||||
|
PlainObjectBase<Derived>::setZero(Index rows, NoChange_t)
|
||||||
|
{
|
||||||
|
return setZero(rows, cols());
|
||||||
|
}
|
||||||
|
|
||||||
// ones:
|
// ones:
|
||||||
|
|
||||||
/** \returns an expression of a matrix where all coefficients equal one.
|
/** \returns an expression of a matrix where all coefficients equal one.
|
||||||
@@ -682,6 +735,32 @@ PlainObjectBase<Derived>::setOnes(Index rows, Index cols)
|
|||||||
return setConstant(Scalar(1));
|
return setConstant(Scalar(1));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Resizes to the given size, changing only the number of rows, and sets all
|
||||||
|
* coefficients in this expression to one. For the parameter of type NoChange_t,
|
||||||
|
* just pass the special value \c NoChange.
|
||||||
|
*
|
||||||
|
* \sa MatrixBase::setOnes(), setOnes(Index), setOnes(Index, Index), setOnes(NoChange_t, Index), class CwiseNullaryOp, MatrixBase::Ones()
|
||||||
|
*/
|
||||||
|
template<typename Derived>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
|
||||||
|
PlainObjectBase<Derived>::setOnes(Index rows, NoChange_t)
|
||||||
|
{
|
||||||
|
return setOnes(rows, cols());
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Resizes to the given size, changing only the number of columns, and sets all
|
||||||
|
* coefficients in this expression to one. For the parameter of type NoChange_t,
|
||||||
|
* just pass the special value \c NoChange.
|
||||||
|
*
|
||||||
|
* \sa MatrixBase::setOnes(), setOnes(Index), setOnes(Index, Index), setOnes(Index, NoChange_t) class CwiseNullaryOp, MatrixBase::Ones()
|
||||||
|
*/
|
||||||
|
template<typename Derived>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
|
||||||
|
PlainObjectBase<Derived>::setOnes(NoChange_t, Index cols)
|
||||||
|
{
|
||||||
|
return setOnes(rows(), cols);
|
||||||
|
}
|
||||||
|
|
||||||
// Identity:
|
// Identity:
|
||||||
|
|
||||||
/** \returns an expression of the identity matrix (not necessarily square).
|
/** \returns an expression of the identity matrix (not necessarily square).
|
||||||
|
|||||||
@@ -65,10 +65,10 @@ class CwiseUnaryOp : public CwiseUnaryOpImpl<UnaryOp, XprType, typename internal
|
|||||||
explicit CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp())
|
explicit CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp())
|
||||||
: m_xpr(xpr), m_functor(func) {}
|
: m_xpr(xpr), m_functor(func) {}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
|
||||||
Index rows() const { return m_xpr.rows(); }
|
Index rows() const EIGEN_NOEXCEPT { return m_xpr.rows(); }
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
|
||||||
Index cols() const { return m_xpr.cols(); }
|
Index cols() const EIGEN_NOEXCEPT { return m_xpr.cols(); }
|
||||||
|
|
||||||
/** \returns the functor representing the unary operation */
|
/** \returns the functor representing the unary operation */
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
|||||||
@@ -64,23 +64,25 @@ class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename in
|
|||||||
typedef typename internal::ref_selector<MatrixType>::non_const_type MatrixTypeNested;
|
typedef typename internal::ref_selector<MatrixType>::non_const_type MatrixTypeNested;
|
||||||
typedef typename internal::remove_all<MatrixType>::type NestedExpression;
|
typedef typename internal::remove_all<MatrixType>::type NestedExpression;
|
||||||
|
|
||||||
explicit inline CwiseUnaryView(MatrixType& mat, const ViewOp& func = ViewOp())
|
explicit EIGEN_DEVICE_FUNC inline CwiseUnaryView(MatrixType& mat, const ViewOp& func = ViewOp())
|
||||||
: m_matrix(mat), m_functor(func) {}
|
: m_matrix(mat), m_functor(func) {}
|
||||||
|
|
||||||
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryView)
|
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryView)
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE Index rows() const { return m_matrix.rows(); }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
|
||||||
EIGEN_STRONG_INLINE Index cols() const { return m_matrix.cols(); }
|
Index rows() const EIGEN_NOEXCEPT { return m_matrix.rows(); }
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
|
||||||
|
Index cols() const EIGEN_NOEXCEPT { return m_matrix.cols(); }
|
||||||
|
|
||||||
/** \returns the functor representing unary operation */
|
/** \returns the functor representing unary operation */
|
||||||
const ViewOp& functor() const { return m_functor; }
|
EIGEN_DEVICE_FUNC const ViewOp& functor() const { return m_functor; }
|
||||||
|
|
||||||
/** \returns the nested expression */
|
/** \returns the nested expression */
|
||||||
const typename internal::remove_all<MatrixTypeNested>::type&
|
EIGEN_DEVICE_FUNC const typename internal::remove_all<MatrixTypeNested>::type&
|
||||||
nestedExpression() const { return m_matrix; }
|
nestedExpression() const { return m_matrix; }
|
||||||
|
|
||||||
/** \returns the nested expression */
|
/** \returns the nested expression */
|
||||||
typename internal::remove_reference<MatrixTypeNested>::type&
|
EIGEN_DEVICE_FUNC typename internal::remove_reference<MatrixTypeNested>::type&
|
||||||
nestedExpression() { return m_matrix; }
|
nestedExpression() { return m_matrix; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
@@ -112,15 +114,17 @@ class CwiseUnaryViewImpl<ViewOp,MatrixType,Dense>
|
|||||||
EIGEN_DEVICE_FUNC inline Scalar* data() { return &(this->coeffRef(0)); }
|
EIGEN_DEVICE_FUNC inline Scalar* data() { return &(this->coeffRef(0)); }
|
||||||
EIGEN_DEVICE_FUNC inline const Scalar* data() const { return &(this->coeff(0)); }
|
EIGEN_DEVICE_FUNC inline const Scalar* data() const { return &(this->coeff(0)); }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC inline Index innerStride() const
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index innerStride() const
|
||||||
{
|
{
|
||||||
return derived().nestedExpression().innerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) / sizeof(Scalar);
|
return derived().nestedExpression().innerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) / sizeof(Scalar);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC inline Index outerStride() const
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index outerStride() const
|
||||||
{
|
{
|
||||||
return derived().nestedExpression().outerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) / sizeof(Scalar);
|
return derived().nestedExpression().outerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) / sizeof(Scalar);
|
||||||
}
|
}
|
||||||
|
protected:
|
||||||
|
EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(CwiseUnaryViewImpl)
|
||||||
};
|
};
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ namespace internal {
|
|||||||
// The index type defined by EIGEN_DEFAULT_DENSE_INDEX_TYPE must be a signed type.
|
// The index type defined by EIGEN_DEFAULT_DENSE_INDEX_TYPE must be a signed type.
|
||||||
// This dummy function simply aims at checking that at compile time.
|
// This dummy function simply aims at checking that at compile time.
|
||||||
static inline void check_DenseIndex_is_signed() {
|
static inline void check_DenseIndex_is_signed() {
|
||||||
EIGEN_STATIC_ASSERT(NumTraits<DenseIndex>::IsSigned,THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE);
|
EIGEN_STATIC_ASSERT(NumTraits<DenseIndex>::IsSigned,THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE)
|
||||||
}
|
}
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
@@ -211,7 +211,7 @@ template<typename Derived> class DenseBase
|
|||||||
|
|
||||||
/** \returns the number of nonzero coefficients which is in practice the number
|
/** \returns the number of nonzero coefficients which is in practice the number
|
||||||
* of stored coefficients. */
|
* of stored coefficients. */
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
inline Index nonZeros() const { return size(); }
|
inline Index nonZeros() const { return size(); }
|
||||||
|
|
||||||
/** \returns the outer size.
|
/** \returns the outer size.
|
||||||
@@ -219,7 +219,7 @@ template<typename Derived> class DenseBase
|
|||||||
* \note For a vector, this returns just 1. For a matrix (non-vector), this is the major dimension
|
* \note For a vector, this returns just 1. For a matrix (non-vector), this is the major dimension
|
||||||
* with respect to the \ref TopicStorageOrders "storage order", i.e., the number of columns for a
|
* with respect to the \ref TopicStorageOrders "storage order", i.e., the number of columns for a
|
||||||
* column-major matrix, and the number of rows for a row-major matrix. */
|
* column-major matrix, and the number of rows for a row-major matrix. */
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
Index outerSize() const
|
Index outerSize() const
|
||||||
{
|
{
|
||||||
return IsVectorAtCompileTime ? 1
|
return IsVectorAtCompileTime ? 1
|
||||||
@@ -231,7 +231,7 @@ template<typename Derived> class DenseBase
|
|||||||
* \note For a vector, this is just the size. For a matrix (non-vector), this is the minor dimension
|
* \note For a vector, this is just the size. For a matrix (non-vector), this is the minor dimension
|
||||||
* with respect to the \ref TopicStorageOrders "storage order", i.e., the number of rows for a
|
* with respect to the \ref TopicStorageOrders "storage order", i.e., the number of rows for a
|
||||||
* column-major matrix, and the number of columns for a row-major matrix. */
|
* column-major matrix, and the number of columns for a row-major matrix. */
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
Index innerSize() const
|
Index innerSize() const
|
||||||
{
|
{
|
||||||
return IsVectorAtCompileTime ? this->size()
|
return IsVectorAtCompileTime ? this->size()
|
||||||
@@ -324,9 +324,9 @@ template<typename Derived> class DenseBase
|
|||||||
typedef Transpose<Derived> TransposeReturnType;
|
typedef Transpose<Derived> TransposeReturnType;
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
TransposeReturnType transpose();
|
TransposeReturnType transpose();
|
||||||
typedef typename internal::add_const<Transpose<const Derived> >::type ConstTransposeReturnType;
|
typedef Transpose<const Derived> ConstTransposeReturnType;
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
ConstTransposeReturnType transpose() const;
|
const ConstTransposeReturnType transpose() const;
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
void transposeInPlace();
|
void transposeInPlace();
|
||||||
|
|
||||||
@@ -449,18 +449,58 @@ template<typename Derived> class DenseBase
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC Scalar prod() const;
|
EIGEN_DEVICE_FUNC Scalar prod() const;
|
||||||
|
|
||||||
|
template<int NaNPropagation>
|
||||||
EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar minCoeff() const;
|
EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar minCoeff() const;
|
||||||
|
template<int NaNPropagation>
|
||||||
EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar maxCoeff() const;
|
EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar maxCoeff() const;
|
||||||
|
|
||||||
template<typename IndexType> EIGEN_DEVICE_FUNC
|
|
||||||
|
// By default, the fastest version with undefined NaN propagation semantics is
|
||||||
|
// used.
|
||||||
|
// TODO(rmlarsen): Replace with default template argument when we move to
|
||||||
|
// c++11 or beyond.
|
||||||
|
EIGEN_DEVICE_FUNC inline typename internal::traits<Derived>::Scalar minCoeff() const {
|
||||||
|
return minCoeff<PropagateFast>();
|
||||||
|
}
|
||||||
|
EIGEN_DEVICE_FUNC inline typename internal::traits<Derived>::Scalar maxCoeff() const {
|
||||||
|
return maxCoeff<PropagateFast>();
|
||||||
|
}
|
||||||
|
|
||||||
|
template<int NaNPropagation, typename IndexType>
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
typename internal::traits<Derived>::Scalar minCoeff(IndexType* row, IndexType* col) const;
|
typename internal::traits<Derived>::Scalar minCoeff(IndexType* row, IndexType* col) const;
|
||||||
template<typename IndexType> EIGEN_DEVICE_FUNC
|
template<int NaNPropagation, typename IndexType>
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
typename internal::traits<Derived>::Scalar maxCoeff(IndexType* row, IndexType* col) const;
|
typename internal::traits<Derived>::Scalar maxCoeff(IndexType* row, IndexType* col) const;
|
||||||
template<typename IndexType> EIGEN_DEVICE_FUNC
|
template<int NaNPropagation, typename IndexType>
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
typename internal::traits<Derived>::Scalar minCoeff(IndexType* index) const;
|
typename internal::traits<Derived>::Scalar minCoeff(IndexType* index) const;
|
||||||
template<typename IndexType> EIGEN_DEVICE_FUNC
|
template<int NaNPropagation, typename IndexType>
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
typename internal::traits<Derived>::Scalar maxCoeff(IndexType* index) const;
|
typename internal::traits<Derived>::Scalar maxCoeff(IndexType* index) const;
|
||||||
|
|
||||||
|
// TODO(rmlarsen): Replace these methods with a default template argument.
|
||||||
|
template<typename IndexType>
|
||||||
|
EIGEN_DEVICE_FUNC inline
|
||||||
|
typename internal::traits<Derived>::Scalar minCoeff(IndexType* row, IndexType* col) const {
|
||||||
|
return minCoeff<PropagateFast>(row, col);
|
||||||
|
}
|
||||||
|
template<typename IndexType>
|
||||||
|
EIGEN_DEVICE_FUNC inline
|
||||||
|
typename internal::traits<Derived>::Scalar maxCoeff(IndexType* row, IndexType* col) const {
|
||||||
|
return maxCoeff<PropagateFast>(row, col);
|
||||||
|
}
|
||||||
|
template<typename IndexType>
|
||||||
|
EIGEN_DEVICE_FUNC inline
|
||||||
|
typename internal::traits<Derived>::Scalar minCoeff(IndexType* index) const {
|
||||||
|
return minCoeff<PropagateFast>(index);
|
||||||
|
}
|
||||||
|
template<typename IndexType>
|
||||||
|
EIGEN_DEVICE_FUNC inline
|
||||||
|
typename internal::traits<Derived>::Scalar maxCoeff(IndexType* index) const {
|
||||||
|
return maxCoeff<PropagateFast>(index);
|
||||||
|
}
|
||||||
|
|
||||||
template<typename BinaryOp>
|
template<typename BinaryOp>
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
Scalar redux(const BinaryOp& func) const;
|
Scalar redux(const BinaryOp& func) const;
|
||||||
@@ -530,16 +570,16 @@ template<typename Derived> class DenseBase
|
|||||||
static const RandomReturnType Random();
|
static const RandomReturnType Random();
|
||||||
|
|
||||||
template<typename ThenDerived,typename ElseDerived>
|
template<typename ThenDerived,typename ElseDerived>
|
||||||
const Select<Derived,ThenDerived,ElseDerived>
|
inline EIGEN_DEVICE_FUNC const Select<Derived,ThenDerived,ElseDerived>
|
||||||
select(const DenseBase<ThenDerived>& thenMatrix,
|
select(const DenseBase<ThenDerived>& thenMatrix,
|
||||||
const DenseBase<ElseDerived>& elseMatrix) const;
|
const DenseBase<ElseDerived>& elseMatrix) const;
|
||||||
|
|
||||||
template<typename ThenDerived>
|
template<typename ThenDerived>
|
||||||
inline const Select<Derived,ThenDerived, typename ThenDerived::ConstantReturnType>
|
inline EIGEN_DEVICE_FUNC const Select<Derived,ThenDerived, typename ThenDerived::ConstantReturnType>
|
||||||
select(const DenseBase<ThenDerived>& thenMatrix, const typename ThenDerived::Scalar& elseScalar) const;
|
select(const DenseBase<ThenDerived>& thenMatrix, const typename ThenDerived::Scalar& elseScalar) const;
|
||||||
|
|
||||||
template<typename ElseDerived>
|
template<typename ElseDerived>
|
||||||
inline const Select<Derived, typename ElseDerived::ConstantReturnType, ElseDerived >
|
inline EIGEN_DEVICE_FUNC const Select<Derived, typename ElseDerived::ConstantReturnType, ElseDerived >
|
||||||
select(const typename ElseDerived::Scalar& thenScalar, const DenseBase<ElseDerived>& elseMatrix) const;
|
select(const typename ElseDerived::Scalar& thenScalar, const DenseBase<ElseDerived>& elseMatrix) const;
|
||||||
|
|
||||||
template<int p> RealScalar lpNorm() const;
|
template<int p> RealScalar lpNorm() const;
|
||||||
@@ -636,11 +676,12 @@ template<typename Derived> class DenseBase
|
|||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
EIGEN_DEFAULT_COPY_CONSTRUCTOR(DenseBase)
|
||||||
/** Default constructor. Do nothing. */
|
/** Default constructor. Do nothing. */
|
||||||
EIGEN_DEVICE_FUNC DenseBase()
|
EIGEN_DEVICE_FUNC DenseBase()
|
||||||
{
|
{
|
||||||
/* Just checks for self-consistency of the flags.
|
/* Just checks for self-consistency of the flags.
|
||||||
* Only do it when debugging Eigen, as this borders on paranoiac and could slow compilation down
|
* Only do it when debugging Eigen, as this borders on paranoia and could slow compilation down
|
||||||
*/
|
*/
|
||||||
#ifdef EIGEN_INTERNAL_DEBUGGING
|
#ifdef EIGEN_INTERNAL_DEBUGGING
|
||||||
EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, int(IsRowMajor))
|
EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, int(IsRowMajor))
|
||||||
|
|||||||
@@ -495,7 +495,7 @@ class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived
|
|||||||
*
|
*
|
||||||
* \sa outerStride(), rowStride(), colStride()
|
* \sa outerStride(), rowStride(), colStride()
|
||||||
*/
|
*/
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
inline Index innerStride() const
|
inline Index innerStride() const
|
||||||
{
|
{
|
||||||
return derived().innerStride();
|
return derived().innerStride();
|
||||||
@@ -506,14 +506,14 @@ class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived
|
|||||||
*
|
*
|
||||||
* \sa innerStride(), rowStride(), colStride()
|
* \sa innerStride(), rowStride(), colStride()
|
||||||
*/
|
*/
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
inline Index outerStride() const
|
inline Index outerStride() const
|
||||||
{
|
{
|
||||||
return derived().outerStride();
|
return derived().outerStride();
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME shall we remove it ?
|
// FIXME shall we remove it ?
|
||||||
inline Index stride() const
|
EIGEN_CONSTEXPR inline Index stride() const
|
||||||
{
|
{
|
||||||
return Derived::IsVectorAtCompileTime ? innerStride() : outerStride();
|
return Derived::IsVectorAtCompileTime ? innerStride() : outerStride();
|
||||||
}
|
}
|
||||||
@@ -522,7 +522,7 @@ class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived
|
|||||||
*
|
*
|
||||||
* \sa innerStride(), outerStride(), colStride()
|
* \sa innerStride(), outerStride(), colStride()
|
||||||
*/
|
*/
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
inline Index rowStride() const
|
inline Index rowStride() const
|
||||||
{
|
{
|
||||||
return Derived::IsRowMajor ? outerStride() : innerStride();
|
return Derived::IsRowMajor ? outerStride() : innerStride();
|
||||||
@@ -532,7 +532,7 @@ class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived
|
|||||||
*
|
*
|
||||||
* \sa innerStride(), outerStride(), rowStride()
|
* \sa innerStride(), outerStride(), rowStride()
|
||||||
*/
|
*/
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
inline Index colStride() const
|
inline Index colStride() const
|
||||||
{
|
{
|
||||||
return Derived::IsRowMajor ? innerStride() : outerStride();
|
return Derived::IsRowMajor ? innerStride() : outerStride();
|
||||||
@@ -570,8 +570,8 @@ class DenseCoeffsBase<Derived, DirectWriteAccessors>
|
|||||||
*
|
*
|
||||||
* \sa outerStride(), rowStride(), colStride()
|
* \sa outerStride(), rowStride(), colStride()
|
||||||
*/
|
*/
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
inline Index innerStride() const
|
inline Index innerStride() const EIGEN_NOEXCEPT
|
||||||
{
|
{
|
||||||
return derived().innerStride();
|
return derived().innerStride();
|
||||||
}
|
}
|
||||||
@@ -581,14 +581,14 @@ class DenseCoeffsBase<Derived, DirectWriteAccessors>
|
|||||||
*
|
*
|
||||||
* \sa innerStride(), rowStride(), colStride()
|
* \sa innerStride(), rowStride(), colStride()
|
||||||
*/
|
*/
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
inline Index outerStride() const
|
inline Index outerStride() const EIGEN_NOEXCEPT
|
||||||
{
|
{
|
||||||
return derived().outerStride();
|
return derived().outerStride();
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME shall we remove it ?
|
// FIXME shall we remove it ?
|
||||||
inline Index stride() const
|
EIGEN_CONSTEXPR inline Index stride() const EIGEN_NOEXCEPT
|
||||||
{
|
{
|
||||||
return Derived::IsVectorAtCompileTime ? innerStride() : outerStride();
|
return Derived::IsVectorAtCompileTime ? innerStride() : outerStride();
|
||||||
}
|
}
|
||||||
@@ -597,8 +597,8 @@ class DenseCoeffsBase<Derived, DirectWriteAccessors>
|
|||||||
*
|
*
|
||||||
* \sa innerStride(), outerStride(), colStride()
|
* \sa innerStride(), outerStride(), colStride()
|
||||||
*/
|
*/
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
inline Index rowStride() const
|
inline Index rowStride() const EIGEN_NOEXCEPT
|
||||||
{
|
{
|
||||||
return Derived::IsRowMajor ? outerStride() : innerStride();
|
return Derived::IsRowMajor ? outerStride() : innerStride();
|
||||||
}
|
}
|
||||||
@@ -607,8 +607,8 @@ class DenseCoeffsBase<Derived, DirectWriteAccessors>
|
|||||||
*
|
*
|
||||||
* \sa innerStride(), outerStride(), rowStride()
|
* \sa innerStride(), outerStride(), rowStride()
|
||||||
*/
|
*/
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
inline Index colStride() const
|
inline Index colStride() const EIGEN_NOEXCEPT
|
||||||
{
|
{
|
||||||
return Derived::IsRowMajor ? innerStride() : outerStride();
|
return Derived::IsRowMajor ? innerStride() : outerStride();
|
||||||
}
|
}
|
||||||
@@ -619,7 +619,7 @@ namespace internal {
|
|||||||
template<int Alignment, typename Derived, bool JustReturnZero>
|
template<int Alignment, typename Derived, bool JustReturnZero>
|
||||||
struct first_aligned_impl
|
struct first_aligned_impl
|
||||||
{
|
{
|
||||||
static inline Index run(const Derived&)
|
static EIGEN_CONSTEXPR inline Index run(const Derived&) EIGEN_NOEXCEPT
|
||||||
{ return 0; }
|
{ return 0; }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -163,6 +163,30 @@ struct plain_array<T, 0, MatrixOrArrayOptions, Alignment>
|
|||||||
EIGEN_DEVICE_FUNC plain_array(constructor_without_unaligned_array_assert) {}
|
EIGEN_DEVICE_FUNC plain_array(constructor_without_unaligned_array_assert) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct plain_array_helper {
|
||||||
|
template<typename T, int Size, int MatrixOrArrayOptions, int Alignment>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
static void copy(const plain_array<T, Size, MatrixOrArrayOptions, Alignment>& src, const Eigen::Index size,
|
||||||
|
plain_array<T, Size, MatrixOrArrayOptions, Alignment>& dst) {
|
||||||
|
smart_copy(src.array, src.array + size, dst.array);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T, int Size, int MatrixOrArrayOptions, int Alignment>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
static void swap(plain_array<T, Size, MatrixOrArrayOptions, Alignment>& a, const Eigen::Index a_size,
|
||||||
|
plain_array<T, Size, MatrixOrArrayOptions, Alignment>& b, const Eigen::Index b_size) {
|
||||||
|
if (a_size < b_size) {
|
||||||
|
std::swap_ranges(b.array, b.array + a_size, a.array);
|
||||||
|
smart_move(b.array + a_size, b.array + b_size, a.array + a_size);
|
||||||
|
} else if (a_size > b_size) {
|
||||||
|
std::swap_ranges(a.array, a.array + b_size, b.array);
|
||||||
|
smart_move(a.array + b_size, a.array + a_size, b.array + b_size);
|
||||||
|
} else {
|
||||||
|
std::swap_ranges(a.array, a.array + a_size, b.array);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
/** \internal
|
/** \internal
|
||||||
@@ -190,16 +214,41 @@ template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseSt
|
|||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
|
explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
|
||||||
: m_data(internal::constructor_without_unaligned_array_assert()) {}
|
: m_data(internal::constructor_without_unaligned_array_assert()) {}
|
||||||
|
#if !EIGEN_HAS_CXX11 || defined(EIGEN_DENSE_STORAGE_CTOR_PLUGIN)
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
DenseStorage(const DenseStorage& other) : m_data(other.m_data) {
|
DenseStorage(const DenseStorage& other) : m_data(other.m_data) {
|
||||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = Size)
|
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = Size)
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage&) = default;
|
||||||
|
#endif
|
||||||
|
#if !EIGEN_HAS_CXX11
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
DenseStorage& operator=(const DenseStorage& other)
|
DenseStorage& operator=(const DenseStorage& other)
|
||||||
{
|
{
|
||||||
if (this != &other) m_data = other.m_data;
|
if (this != &other) m_data = other.m_data;
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage&) = default;
|
||||||
|
#endif
|
||||||
|
#if EIGEN_HAS_RVALUE_REFERENCES
|
||||||
|
#if !EIGEN_HAS_CXX11
|
||||||
|
EIGEN_DEVICE_FUNC DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT
|
||||||
|
: m_data(std::move(other.m_data))
|
||||||
|
{
|
||||||
|
}
|
||||||
|
EIGEN_DEVICE_FUNC DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT
|
||||||
|
{
|
||||||
|
if (this != &other)
|
||||||
|
m_data = std::move(other.m_data);
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
EIGEN_DEVICE_FUNC DenseStorage(DenseStorage&&) = default;
|
||||||
|
EIGEN_DEVICE_FUNC DenseStorage& operator=(DenseStorage&&) = default;
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) {
|
EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) {
|
||||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
|
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
|
||||||
eigen_internal_assert(size==rows*cols && rows==_Rows && cols==_Cols);
|
eigen_internal_assert(size==rows*cols && rows==_Rows && cols==_Cols);
|
||||||
@@ -210,8 +259,8 @@ template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseSt
|
|||||||
EIGEN_DEVICE_FUNC void swap(DenseStorage& other) {
|
EIGEN_DEVICE_FUNC void swap(DenseStorage& other) {
|
||||||
numext::swap(m_data, other.m_data);
|
numext::swap(m_data, other.m_data);
|
||||||
}
|
}
|
||||||
EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;}
|
EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR Index rows(void) EIGEN_NOEXCEPT {return _Rows;}
|
||||||
EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;}
|
EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR Index cols(void) EIGEN_NOEXCEPT {return _Cols;}
|
||||||
EIGEN_DEVICE_FUNC void conservativeResize(Index,Index,Index) {}
|
EIGEN_DEVICE_FUNC void conservativeResize(Index,Index,Index) {}
|
||||||
EIGEN_DEVICE_FUNC void resize(Index,Index,Index) {}
|
EIGEN_DEVICE_FUNC void resize(Index,Index,Index) {}
|
||||||
EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
|
EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
|
||||||
@@ -228,8 +277,8 @@ template<typename T, int _Rows, int _Cols, int _Options> class DenseStorage<T, 0
|
|||||||
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage&) { return *this; }
|
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage&) { return *this; }
|
||||||
EIGEN_DEVICE_FUNC DenseStorage(Index,Index,Index) {}
|
EIGEN_DEVICE_FUNC DenseStorage(Index,Index,Index) {}
|
||||||
EIGEN_DEVICE_FUNC void swap(DenseStorage& ) {}
|
EIGEN_DEVICE_FUNC void swap(DenseStorage& ) {}
|
||||||
EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;}
|
EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR Index rows(void) EIGEN_NOEXCEPT {return _Rows;}
|
||||||
EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;}
|
EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR Index cols(void) EIGEN_NOEXCEPT {return _Cols;}
|
||||||
EIGEN_DEVICE_FUNC void conservativeResize(Index,Index,Index) {}
|
EIGEN_DEVICE_FUNC void conservativeResize(Index,Index,Index) {}
|
||||||
EIGEN_DEVICE_FUNC void resize(Index,Index,Index) {}
|
EIGEN_DEVICE_FUNC void resize(Index,Index,Index) {}
|
||||||
EIGEN_DEVICE_FUNC const T *data() const { return 0; }
|
EIGEN_DEVICE_FUNC const T *data() const { return 0; }
|
||||||
@@ -256,21 +305,25 @@ template<typename T, int Size, int _Options> class DenseStorage<T, Size, Dynamic
|
|||||||
EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0), m_cols(0) {}
|
EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0), m_cols(0) {}
|
||||||
EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
|
EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
|
||||||
: m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0), m_cols(0) {}
|
: m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0), m_cols(0) {}
|
||||||
EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows), m_cols(other.m_cols) {}
|
EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
|
||||||
|
: m_data(internal::constructor_without_unaligned_array_assert()), m_rows(other.m_rows), m_cols(other.m_cols)
|
||||||
|
{
|
||||||
|
internal::plain_array_helper::copy(other.m_data, m_rows * m_cols, m_data);
|
||||||
|
}
|
||||||
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
|
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
|
||||||
{
|
{
|
||||||
if (this != &other)
|
if (this != &other)
|
||||||
{
|
{
|
||||||
m_data = other.m_data;
|
|
||||||
m_rows = other.m_rows;
|
m_rows = other.m_rows;
|
||||||
m_cols = other.m_cols;
|
m_cols = other.m_cols;
|
||||||
|
internal::plain_array_helper::copy(other.m_data, m_rows * m_cols, m_data);
|
||||||
}
|
}
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index cols) : m_rows(rows), m_cols(cols) {}
|
EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index cols) : m_rows(rows), m_cols(cols) {}
|
||||||
EIGEN_DEVICE_FUNC void swap(DenseStorage& other)
|
EIGEN_DEVICE_FUNC void swap(DenseStorage& other)
|
||||||
{
|
{
|
||||||
numext::swap(m_data,other.m_data);
|
internal::plain_array_helper::swap(m_data, m_rows * m_cols, other.m_data, other.m_rows * other.m_cols);
|
||||||
numext::swap(m_rows,other.m_rows);
|
numext::swap(m_rows,other.m_rows);
|
||||||
numext::swap(m_cols,other.m_cols);
|
numext::swap(m_cols,other.m_cols);
|
||||||
}
|
}
|
||||||
@@ -291,24 +344,29 @@ template<typename T, int Size, int _Cols, int _Options> class DenseStorage<T, Si
|
|||||||
EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0) {}
|
EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0) {}
|
||||||
EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
|
EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
|
||||||
: m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0) {}
|
: m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0) {}
|
||||||
EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows) {}
|
EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
|
||||||
|
: m_data(internal::constructor_without_unaligned_array_assert()), m_rows(other.m_rows)
|
||||||
|
{
|
||||||
|
internal::plain_array_helper::copy(other.m_data, m_rows * _Cols, m_data);
|
||||||
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
|
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
|
||||||
{
|
{
|
||||||
if (this != &other)
|
if (this != &other)
|
||||||
{
|
{
|
||||||
m_data = other.m_data;
|
|
||||||
m_rows = other.m_rows;
|
m_rows = other.m_rows;
|
||||||
|
internal::plain_array_helper::copy(other.m_data, m_rows * _Cols, m_data);
|
||||||
}
|
}
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index) : m_rows(rows) {}
|
EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index) : m_rows(rows) {}
|
||||||
EIGEN_DEVICE_FUNC void swap(DenseStorage& other)
|
EIGEN_DEVICE_FUNC void swap(DenseStorage& other)
|
||||||
{
|
{
|
||||||
numext::swap(m_data,other.m_data);
|
internal::plain_array_helper::swap(m_data, m_rows * _Cols, other.m_data, other.m_rows * _Cols);
|
||||||
numext::swap(m_rows, other.m_rows);
|
numext::swap(m_rows, other.m_rows);
|
||||||
}
|
}
|
||||||
EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;}
|
EIGEN_DEVICE_FUNC Index rows(void) const EIGEN_NOEXCEPT {return m_rows;}
|
||||||
EIGEN_DEVICE_FUNC Index cols(void) const {return _Cols;}
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index cols(void) const EIGEN_NOEXCEPT {return _Cols;}
|
||||||
EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index) { m_rows = rows; }
|
EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index) { m_rows = rows; }
|
||||||
EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index) { m_rows = rows; }
|
EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index) { m_rows = rows; }
|
||||||
EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
|
EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
|
||||||
@@ -324,23 +382,27 @@ template<typename T, int Size, int _Rows, int _Options> class DenseStorage<T, Si
|
|||||||
EIGEN_DEVICE_FUNC DenseStorage() : m_cols(0) {}
|
EIGEN_DEVICE_FUNC DenseStorage() : m_cols(0) {}
|
||||||
EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
|
EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
|
||||||
: m_data(internal::constructor_without_unaligned_array_assert()), m_cols(0) {}
|
: m_data(internal::constructor_without_unaligned_array_assert()), m_cols(0) {}
|
||||||
EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_cols(other.m_cols) {}
|
EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
|
||||||
|
: m_data(internal::constructor_without_unaligned_array_assert()), m_cols(other.m_cols)
|
||||||
|
{
|
||||||
|
internal::plain_array_helper::copy(other.m_data, _Rows * m_cols, m_data);
|
||||||
|
}
|
||||||
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
|
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
|
||||||
{
|
{
|
||||||
if (this != &other)
|
if (this != &other)
|
||||||
{
|
{
|
||||||
m_data = other.m_data;
|
|
||||||
m_cols = other.m_cols;
|
m_cols = other.m_cols;
|
||||||
|
internal::plain_array_helper::copy(other.m_data, _Rows * m_cols, m_data);
|
||||||
}
|
}
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
EIGEN_DEVICE_FUNC DenseStorage(Index, Index, Index cols) : m_cols(cols) {}
|
EIGEN_DEVICE_FUNC DenseStorage(Index, Index, Index cols) : m_cols(cols) {}
|
||||||
EIGEN_DEVICE_FUNC void swap(DenseStorage& other) {
|
EIGEN_DEVICE_FUNC void swap(DenseStorage& other) {
|
||||||
numext::swap(m_data,other.m_data);
|
internal::plain_array_helper::swap(m_data, _Rows * m_cols, other.m_data, _Rows * other.m_cols);
|
||||||
numext::swap(m_cols, other.m_cols);
|
numext::swap(m_cols, other.m_cols);
|
||||||
}
|
}
|
||||||
EIGEN_DEVICE_FUNC Index rows(void) const {return _Rows;}
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index rows(void) const EIGEN_NOEXCEPT {return _Rows;}
|
||||||
EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;}
|
EIGEN_DEVICE_FUNC Index cols(void) const EIGEN_NOEXCEPT {return m_cols;}
|
||||||
EIGEN_DEVICE_FUNC void conservativeResize(Index, Index, Index cols) { m_cols = cols; }
|
EIGEN_DEVICE_FUNC void conservativeResize(Index, Index, Index cols) { m_cols = cols; }
|
||||||
EIGEN_DEVICE_FUNC void resize(Index, Index, Index cols) { m_cols = cols; }
|
EIGEN_DEVICE_FUNC void resize(Index, Index, Index cols) { m_cols = cols; }
|
||||||
EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
|
EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
|
||||||
@@ -407,8 +469,8 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
|
|||||||
numext::swap(m_rows,other.m_rows);
|
numext::swap(m_rows,other.m_rows);
|
||||||
numext::swap(m_cols,other.m_cols);
|
numext::swap(m_cols,other.m_cols);
|
||||||
}
|
}
|
||||||
EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;}
|
EIGEN_DEVICE_FUNC Index rows(void) const EIGEN_NOEXCEPT {return m_rows;}
|
||||||
EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;}
|
EIGEN_DEVICE_FUNC Index cols(void) const EIGEN_NOEXCEPT {return m_cols;}
|
||||||
void conservativeResize(Index size, Index rows, Index cols)
|
void conservativeResize(Index size, Index rows, Index cols)
|
||||||
{
|
{
|
||||||
m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*m_cols);
|
m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*m_cols);
|
||||||
@@ -485,8 +547,8 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
|
|||||||
numext::swap(m_data,other.m_data);
|
numext::swap(m_data,other.m_data);
|
||||||
numext::swap(m_cols,other.m_cols);
|
numext::swap(m_cols,other.m_cols);
|
||||||
}
|
}
|
||||||
EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;}
|
EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR Index rows(void) EIGEN_NOEXCEPT {return _Rows;}
|
||||||
EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;}
|
EIGEN_DEVICE_FUNC Index cols(void) const EIGEN_NOEXCEPT {return m_cols;}
|
||||||
EIGEN_DEVICE_FUNC void conservativeResize(Index size, Index, Index cols)
|
EIGEN_DEVICE_FUNC void conservativeResize(Index size, Index, Index cols)
|
||||||
{
|
{
|
||||||
m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, _Rows*m_cols);
|
m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, _Rows*m_cols);
|
||||||
@@ -561,8 +623,8 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
|
|||||||
numext::swap(m_data,other.m_data);
|
numext::swap(m_data,other.m_data);
|
||||||
numext::swap(m_rows,other.m_rows);
|
numext::swap(m_rows,other.m_rows);
|
||||||
}
|
}
|
||||||
EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;}
|
EIGEN_DEVICE_FUNC Index rows(void) const EIGEN_NOEXCEPT {return m_rows;}
|
||||||
EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;}
|
EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR Index cols(void) {return _Cols;}
|
||||||
void conservativeResize(Index size, Index rows, Index)
|
void conservativeResize(Index size, Index rows, Index)
|
||||||
{
|
{
|
||||||
m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*_Cols);
|
m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*_Cols);
|
||||||
|
|||||||
@@ -84,20 +84,16 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
|
|||||||
: numext::mini<Index>(m_matrix.rows(),m_matrix.cols()-m_index.value());
|
: numext::mini<Index>(m_matrix.rows(),m_matrix.cols()-m_index.value());
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
inline Index cols() const { return 1; }
|
inline Index cols() const EIGEN_NOEXCEPT { return 1; }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
inline Index innerStride() const
|
inline Index innerStride() const EIGEN_NOEXCEPT {
|
||||||
{
|
|
||||||
return m_matrix.outerStride() + 1;
|
return m_matrix.outerStride() + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
inline Index outerStride() const
|
inline Index outerStride() const EIGEN_NOEXCEPT { return 0; }
|
||||||
{
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
typedef typename internal::conditional<
|
typedef typename internal::conditional<
|
||||||
internal::is_lvalue<MatrixType>::value,
|
internal::is_lvalue<MatrixType>::value,
|
||||||
@@ -167,12 +163,12 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
// some compilers may fail to optimize std::max etc in case of compile-time constants...
|
// some compilers may fail to optimize std::max etc in case of compile-time constants...
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
|
||||||
EIGEN_STRONG_INLINE Index absDiagIndex() const { return m_index.value()>0 ? m_index.value() : -m_index.value(); }
|
Index absDiagIndex() const EIGEN_NOEXCEPT { return m_index.value()>0 ? m_index.value() : -m_index.value(); }
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
|
||||||
EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value()>0 ? 0 : -m_index.value(); }
|
Index rowOffset() const EIGEN_NOEXCEPT { return m_index.value()>0 ? 0 : -m_index.value(); }
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
|
||||||
EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value()>0 ? m_index.value() : 0; }
|
Index colOffset() const EIGEN_NOEXCEPT { return m_index.value()>0 ? m_index.value() : 0; }
|
||||||
// trigger a compile-time error if someone try to call packet
|
// trigger a compile-time error if someone try to call packet
|
||||||
template<int LoadMode> typename MatrixType::PacketReturnType packet(Index) const;
|
template<int LoadMode> typename MatrixType::PacketReturnType packet(Index) const;
|
||||||
template<int LoadMode> typename MatrixType::PacketReturnType packet(Index,Index) const;
|
template<int LoadMode> typename MatrixType::PacketReturnType packet(Index,Index) const;
|
||||||
@@ -195,7 +191,8 @@ MatrixBase<Derived>::diagonal()
|
|||||||
|
|
||||||
/** This is the const version of diagonal(). */
|
/** This is the const version of diagonal(). */
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::ConstDiagonalReturnType
|
EIGEN_DEVICE_FUNC inline
|
||||||
|
const typename MatrixBase<Derived>::ConstDiagonalReturnType
|
||||||
MatrixBase<Derived>::diagonal() const
|
MatrixBase<Derived>::diagonal() const
|
||||||
{
|
{
|
||||||
return ConstDiagonalReturnType(derived());
|
return ConstDiagonalReturnType(derived());
|
||||||
@@ -213,18 +210,18 @@ MatrixBase<Derived>::diagonal() const
|
|||||||
*
|
*
|
||||||
* \sa MatrixBase::diagonal(), class Diagonal */
|
* \sa MatrixBase::diagonal(), class Diagonal */
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::DiagonalDynamicIndexReturnType
|
EIGEN_DEVICE_FUNC inline Diagonal<Derived, DynamicIndex>
|
||||||
MatrixBase<Derived>::diagonal(Index index)
|
MatrixBase<Derived>::diagonal(Index index)
|
||||||
{
|
{
|
||||||
return DiagonalDynamicIndexReturnType(derived(), index);
|
return Diagonal<Derived, DynamicIndex>(derived(), index);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** This is the const version of diagonal(Index). */
|
/** This is the const version of diagonal(Index). */
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::ConstDiagonalDynamicIndexReturnType
|
EIGEN_DEVICE_FUNC inline const Diagonal<const Derived, DynamicIndex>
|
||||||
MatrixBase<Derived>::diagonal(Index index) const
|
MatrixBase<Derived>::diagonal(Index index) const
|
||||||
{
|
{
|
||||||
return ConstDiagonalDynamicIndexReturnType(derived(), index);
|
return Diagonal<const Derived, DynamicIndex>(derived(), index);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** \returns an expression of the \a DiagIndex-th sub or super diagonal of the matrix \c *this
|
/** \returns an expression of the \a DiagIndex-th sub or super diagonal of the matrix \c *this
|
||||||
@@ -241,20 +238,20 @@ MatrixBase<Derived>::diagonal(Index index) const
|
|||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
template<int Index_>
|
template<int Index_>
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
inline typename MatrixBase<Derived>::template DiagonalIndexReturnType<Index_>::Type
|
inline Diagonal<Derived, Index_>
|
||||||
MatrixBase<Derived>::diagonal()
|
MatrixBase<Derived>::diagonal()
|
||||||
{
|
{
|
||||||
return typename DiagonalIndexReturnType<Index_>::Type(derived());
|
return Diagonal<Derived, Index_>(derived());
|
||||||
}
|
}
|
||||||
|
|
||||||
/** This is the const version of diagonal<int>(). */
|
/** This is the const version of diagonal<int>(). */
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
template<int Index_>
|
template<int Index_>
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
inline typename MatrixBase<Derived>::template ConstDiagonalIndexReturnType<Index_>::Type
|
inline const Diagonal<const Derived, Index_>
|
||||||
MatrixBase<Derived>::diagonal() const
|
MatrixBase<Derived>::diagonal() const
|
||||||
{
|
{
|
||||||
return typename ConstDiagonalIndexReturnType<Index_>::Type(derived());
|
return Diagonal<const Derived, Index_>(derived());
|
||||||
}
|
}
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
|||||||
@@ -18,14 +18,9 @@ namespace internal {
|
|||||||
// with mismatched types, the compiler emits errors about failing to instantiate cwiseProduct BEFORE
|
// with mismatched types, the compiler emits errors about failing to instantiate cwiseProduct BEFORE
|
||||||
// looking at the static assertions. Thus this is a trick to get better compile errors.
|
// looking at the static assertions. Thus this is a trick to get better compile errors.
|
||||||
template<typename T, typename U,
|
template<typename T, typename U,
|
||||||
// the NeedToTranspose condition here is taken straight from Assign.h
|
bool NeedToTranspose = T::IsVectorAtCompileTime && U::IsVectorAtCompileTime &&
|
||||||
bool NeedToTranspose = T::IsVectorAtCompileTime
|
((int(T::RowsAtCompileTime) == 1 && int(U::ColsAtCompileTime) == 1) ||
|
||||||
&& U::IsVectorAtCompileTime
|
(int(T::ColsAtCompileTime) == 1 && int(U::RowsAtCompileTime) == 1))>
|
||||||
&& ((int(T::RowsAtCompileTime) == 1 && int(U::ColsAtCompileTime) == 1)
|
|
||||||
| // FIXME | instead of || to please GCC 4.4.0 stupid warning "suggest parentheses around &&".
|
|
||||||
// revert to || as soon as not needed anymore.
|
|
||||||
(int(T::ColsAtCompileTime) == 1 && int(U::RowsAtCompileTime) == 1))
|
|
||||||
>
|
|
||||||
struct dot_nocheck
|
struct dot_nocheck
|
||||||
{
|
{
|
||||||
typedef scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> conj_prod;
|
typedef scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> conj_prod;
|
||||||
@@ -86,7 +81,7 @@ MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
|
|||||||
|
|
||||||
//---------- implementation of L2 norm and related functions ----------
|
//---------- implementation of L2 norm and related functions ----------
|
||||||
|
|
||||||
/** \returns, for vectors, the squared \em l2 norm of \c *this, and for matrices the Frobenius norm.
|
/** \returns, for vectors, the squared \em l2 norm of \c *this, and for matrices the squared Frobenius norm.
|
||||||
* In both cases, it consists in the sum of the square of all the matrix entries.
|
* In both cases, it consists in the sum of the square of all the matrix entries.
|
||||||
* For vectors, this is also equals to the dot product of \c *this with itself.
|
* For vectors, this is also equals to the dot product of \c *this with itself.
|
||||||
*
|
*
|
||||||
@@ -207,7 +202,7 @@ struct lpNorm_selector
|
|||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
static inline RealScalar run(const MatrixBase<Derived>& m)
|
static inline RealScalar run(const MatrixBase<Derived>& m)
|
||||||
{
|
{
|
||||||
EIGEN_USING_STD_MATH(pow)
|
EIGEN_USING_STD(pow)
|
||||||
return pow(m.cwiseAbs().array().pow(p).sum(), RealScalar(1)/p);
|
return pow(m.cwiseAbs().array().pow(p).sum(), RealScalar(1)/p);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -56,15 +56,15 @@ template<typename Derived> struct EigenBase
|
|||||||
{ return *static_cast<const Derived*>(this); }
|
{ return *static_cast<const Derived*>(this); }
|
||||||
|
|
||||||
/** \returns the number of rows. \sa cols(), RowsAtCompileTime */
|
/** \returns the number of rows. \sa cols(), RowsAtCompileTime */
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
inline Index rows() const { return derived().rows(); }
|
inline Index rows() const EIGEN_NOEXCEPT { return derived().rows(); }
|
||||||
/** \returns the number of columns. \sa rows(), ColsAtCompileTime*/
|
/** \returns the number of columns. \sa rows(), ColsAtCompileTime*/
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
inline Index cols() const { return derived().cols(); }
|
inline Index cols() const EIGEN_NOEXCEPT { return derived().cols(); }
|
||||||
/** \returns the number of coefficients, which is rows()*cols().
|
/** \returns the number of coefficients, which is rows()*cols().
|
||||||
* \sa rows(), cols(), SizeAtCompileTime. */
|
* \sa rows(), cols(), SizeAtCompileTime. */
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
inline Index size() const { return rows() * cols(); }
|
inline Index size() const EIGEN_NOEXCEPT { return rows() * cols(); }
|
||||||
|
|
||||||
/** \internal Don't use it, but do the equivalent: \code dst = *this; \endcode */
|
/** \internal Don't use it, but do the equivalent: \code dst = *this; \endcode */
|
||||||
template<typename Dest>
|
template<typename Dest>
|
||||||
|
|||||||
@@ -41,10 +41,14 @@ template<typename ExpressionType> class ForceAlignedAccess
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC explicit inline ForceAlignedAccess(const ExpressionType& matrix) : m_expression(matrix) {}
|
EIGEN_DEVICE_FUNC explicit inline ForceAlignedAccess(const ExpressionType& matrix) : m_expression(matrix) {}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC inline Index rows() const { return m_expression.rows(); }
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
EIGEN_DEVICE_FUNC inline Index cols() const { return m_expression.cols(); }
|
inline Index rows() const EIGEN_NOEXCEPT { return m_expression.rows(); }
|
||||||
EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_expression.outerStride(); }
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_expression.innerStride(); }
|
inline Index cols() const EIGEN_NOEXCEPT { return m_expression.cols(); }
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
|
inline Index outerStride() const EIGEN_NOEXCEPT { return m_expression.outerStride(); }
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
|
inline Index innerStride() const EIGEN_NOEXCEPT { return m_expression.innerStride(); }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index row, Index col) const
|
EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index row, Index col) const
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -228,8 +228,7 @@ template<> struct gemv_dense_selector<OnTheRight,ColMajor,true>
|
|||||||
ActualLhsType actualLhs = LhsBlasTraits::extract(lhs);
|
ActualLhsType actualLhs = LhsBlasTraits::extract(lhs);
|
||||||
ActualRhsType actualRhs = RhsBlasTraits::extract(rhs);
|
ActualRhsType actualRhs = RhsBlasTraits::extract(rhs);
|
||||||
|
|
||||||
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
|
ResScalar actualAlpha = combine_scalar_factors(alpha, lhs, rhs);
|
||||||
* RhsBlasTraits::extractScalarFactor(rhs);
|
|
||||||
|
|
||||||
// make sure Dest is a compile-time vector type (bug 1166)
|
// make sure Dest is a compile-time vector type (bug 1166)
|
||||||
typedef typename conditional<Dest::IsVectorAtCompileTime, Dest, typename Dest::ColXpr>::type ActualDest;
|
typedef typename conditional<Dest::IsVectorAtCompileTime, Dest, typename Dest::ColXpr>::type ActualDest;
|
||||||
@@ -320,8 +319,7 @@ template<> struct gemv_dense_selector<OnTheRight,RowMajor,true>
|
|||||||
typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
|
typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
|
||||||
typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
|
typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
|
||||||
|
|
||||||
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
|
ResScalar actualAlpha = combine_scalar_factors(alpha, lhs, rhs);
|
||||||
* RhsBlasTraits::extractScalarFactor(rhs);
|
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
// FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
|
// FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -81,14 +81,16 @@ namespace Eigen
|
|||||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(expm1,scalar_expm1_op,exponential of a value minus 1,\sa ArrayBase::expm1)
|
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(expm1,scalar_expm1_op,exponential of a value minus 1,\sa ArrayBase::expm1)
|
||||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op,natural logarithm,\sa Eigen::log10 DOXCOMMA ArrayBase::log)
|
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op,natural logarithm,\sa Eigen::log10 DOXCOMMA ArrayBase::log)
|
||||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log1p,scalar_log1p_op,natural logarithm of 1 plus the value,\sa ArrayBase::log1p)
|
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log1p,scalar_log1p_op,natural logarithm of 1 plus the value,\sa ArrayBase::log1p)
|
||||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log10,scalar_log10_op,base 10 logarithm,\sa Eigen::log DOXCOMMA ArrayBase::log)
|
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log10,scalar_log10_op,base 10 logarithm,\sa Eigen::log DOXCOMMA ArrayBase::log10)
|
||||||
|
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log2,scalar_log2_op,base 2 logarithm,\sa Eigen::log DOXCOMMA ArrayBase::log2)
|
||||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs,scalar_abs_op,absolute value,\sa ArrayBase::abs DOXCOMMA MatrixBase::cwiseAbs)
|
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs,scalar_abs_op,absolute value,\sa ArrayBase::abs DOXCOMMA MatrixBase::cwiseAbs)
|
||||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs2,scalar_abs2_op,squared absolute value,\sa ArrayBase::abs2 DOXCOMMA MatrixBase::cwiseAbs2)
|
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs2,scalar_abs2_op,squared absolute value,\sa ArrayBase::abs2 DOXCOMMA MatrixBase::cwiseAbs2)
|
||||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(arg,scalar_arg_op,complex argument,\sa ArrayBase::arg)
|
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(arg,scalar_arg_op,complex argument,\sa ArrayBase::arg DOXCOMMA MatrixBase::cwiseArg)
|
||||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sqrt,scalar_sqrt_op,square root,\sa ArrayBase::sqrt DOXCOMMA MatrixBase::cwiseSqrt)
|
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sqrt,scalar_sqrt_op,square root,\sa ArrayBase::sqrt DOXCOMMA MatrixBase::cwiseSqrt)
|
||||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(rsqrt,scalar_rsqrt_op,reciprocal square root,\sa ArrayBase::rsqrt)
|
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(rsqrt,scalar_rsqrt_op,reciprocal square root,\sa ArrayBase::rsqrt)
|
||||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(square,scalar_square_op,square (power 2),\sa Eigen::abs2 DOXCOMMA Eigen::pow DOXCOMMA ArrayBase::square)
|
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(square,scalar_square_op,square (power 2),\sa Eigen::abs2 DOXCOMMA Eigen::pow DOXCOMMA ArrayBase::square)
|
||||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cube,scalar_cube_op,cube (power 3),\sa Eigen::pow DOXCOMMA ArrayBase::cube)
|
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cube,scalar_cube_op,cube (power 3),\sa Eigen::pow DOXCOMMA ArrayBase::cube)
|
||||||
|
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(rint,scalar_rint_op,nearest integer,\sa Eigen::floor DOXCOMMA Eigen::ceil DOXCOMMA ArrayBase::round)
|
||||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(round,scalar_round_op,nearest integer,\sa Eigen::floor DOXCOMMA Eigen::ceil DOXCOMMA ArrayBase::round)
|
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(round,scalar_round_op,nearest integer,\sa Eigen::floor DOXCOMMA Eigen::ceil DOXCOMMA ArrayBase::round)
|
||||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(floor,scalar_floor_op,nearest integer not greater than the giben value,\sa Eigen::ceil DOXCOMMA ArrayBase::floor)
|
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(floor,scalar_floor_op,nearest integer not greater than the giben value,\sa Eigen::ceil DOXCOMMA ArrayBase::floor)
|
||||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(ceil,scalar_ceil_op,nearest integer not less than the giben value,\sa Eigen::floor DOXCOMMA ArrayBase::ceil)
|
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(ceil,scalar_ceil_op,nearest integer not less than the giben value,\sa Eigen::floor DOXCOMMA ArrayBase::ceil)
|
||||||
|
|||||||
@@ -130,6 +130,9 @@ struct significant_decimals_impl
|
|||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat& fmt)
|
std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat& fmt)
|
||||||
{
|
{
|
||||||
|
using internal::is_same;
|
||||||
|
using internal::conditional;
|
||||||
|
|
||||||
if(_m.size() == 0)
|
if(_m.size() == 0)
|
||||||
{
|
{
|
||||||
s << fmt.matPrefix << fmt.matSuffix;
|
s << fmt.matPrefix << fmt.matSuffix;
|
||||||
@@ -138,6 +141,22 @@ std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat&
|
|||||||
|
|
||||||
typename Derived::Nested m = _m;
|
typename Derived::Nested m = _m;
|
||||||
typedef typename Derived::Scalar Scalar;
|
typedef typename Derived::Scalar Scalar;
|
||||||
|
typedef typename
|
||||||
|
conditional<
|
||||||
|
is_same<Scalar, char>::value ||
|
||||||
|
is_same<Scalar, unsigned char>::value ||
|
||||||
|
is_same<Scalar, numext::int8_t>::value ||
|
||||||
|
is_same<Scalar, numext::uint8_t>::value,
|
||||||
|
int,
|
||||||
|
typename conditional<
|
||||||
|
is_same<Scalar, std::complex<char> >::value ||
|
||||||
|
is_same<Scalar, std::complex<unsigned char> >::value ||
|
||||||
|
is_same<Scalar, std::complex<numext::int8_t> >::value ||
|
||||||
|
is_same<Scalar, std::complex<numext::uint8_t> >::value,
|
||||||
|
std::complex<int>,
|
||||||
|
const Scalar&
|
||||||
|
>::type
|
||||||
|
>::type PrintType;
|
||||||
|
|
||||||
Index width = 0;
|
Index width = 0;
|
||||||
|
|
||||||
@@ -174,7 +193,7 @@ std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat&
|
|||||||
{
|
{
|
||||||
std::stringstream sstr;
|
std::stringstream sstr;
|
||||||
sstr.copyfmt(s);
|
sstr.copyfmt(s);
|
||||||
sstr << m.coeff(i,j);
|
sstr << static_cast<PrintType>(m.coeff(i,j));
|
||||||
width = std::max<Index>(width, Index(sstr.str().length()));
|
width = std::max<Index>(width, Index(sstr.str().length()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -190,7 +209,7 @@ std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat&
|
|||||||
s.fill(fmt.fill);
|
s.fill(fmt.fill);
|
||||||
s.width(width);
|
s.width(width);
|
||||||
}
|
}
|
||||||
s << m.coeff(i, 0);
|
s << static_cast<PrintType>(m.coeff(i, 0));
|
||||||
for(Index j = 1; j < m.cols(); ++j)
|
for(Index j = 1; j < m.cols(); ++j)
|
||||||
{
|
{
|
||||||
s << fmt.coeffSeparator;
|
s << fmt.coeffSeparator;
|
||||||
@@ -198,7 +217,7 @@ std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat&
|
|||||||
s.fill(fmt.fill);
|
s.fill(fmt.fill);
|
||||||
s.width(width);
|
s.width(width);
|
||||||
}
|
}
|
||||||
s << m.coeff(i, j);
|
s << static_cast<PrintType>(m.coeff(i, j));
|
||||||
}
|
}
|
||||||
s << fmt.rowSuffix;
|
s << fmt.rowSuffix;
|
||||||
if( i < m.rows() - 1)
|
if( i < m.rows() - 1)
|
||||||
|
|||||||
@@ -54,7 +54,8 @@ struct traits<IndexedView<XprType, RowIndices, ColIndices> >
|
|||||||
DirectAccessMask = (int(InnerIncr)!=UndefinedIncr && int(OuterIncr)!=UndefinedIncr && InnerIncr>=0 && OuterIncr>=0) ? DirectAccessBit : 0,
|
DirectAccessMask = (int(InnerIncr)!=UndefinedIncr && int(OuterIncr)!=UndefinedIncr && InnerIncr>=0 && OuterIncr>=0) ? DirectAccessBit : 0,
|
||||||
FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0,
|
FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0,
|
||||||
FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0,
|
FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0,
|
||||||
Flags = (traits<XprType>::Flags & (HereditaryBits | DirectAccessMask)) | FlagsLvalueBit | FlagsRowMajorBit
|
FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1) ? LinearAccessBit : 0,
|
||||||
|
Flags = (traits<XprType>::Flags & (HereditaryBits | DirectAccessMask )) | FlagsLvalueBit | FlagsRowMajorBit | FlagsLinearAccessBit
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef Block<XprType,RowsAtCompileTime,ColsAtCompileTime,IsInnerPannel> BlockType;
|
typedef Block<XprType,RowsAtCompileTime,ColsAtCompileTime,IsInnerPannel> BlockType;
|
||||||
@@ -121,10 +122,10 @@ public:
|
|||||||
{}
|
{}
|
||||||
|
|
||||||
/** \returns number of rows */
|
/** \returns number of rows */
|
||||||
Index rows() const { return internal::size(m_rowIndices); }
|
Index rows() const { return internal::index_list_size(m_rowIndices); }
|
||||||
|
|
||||||
/** \returns number of columns */
|
/** \returns number of columns */
|
||||||
Index cols() const { return internal::size(m_colIndices); }
|
Index cols() const { return internal::index_list_size(m_colIndices); }
|
||||||
|
|
||||||
/** \returns the nested expression */
|
/** \returns the nested expression */
|
||||||
const typename internal::remove_all<XprType>::type&
|
const typename internal::remove_all<XprType>::type&
|
||||||
@@ -168,7 +169,11 @@ struct unary_evaluator<IndexedView<ArgType, RowIndices, ColIndices>, IndexBased>
|
|||||||
enum {
|
enum {
|
||||||
CoeffReadCost = evaluator<ArgType>::CoeffReadCost /* TODO + cost of row/col index */,
|
CoeffReadCost = evaluator<ArgType>::CoeffReadCost /* TODO + cost of row/col index */,
|
||||||
|
|
||||||
Flags = (evaluator<ArgType>::Flags & (HereditaryBits /*| LinearAccessBit | DirectAccessBit*/)),
|
FlagsLinearAccessBit = (traits<XprType>::RowsAtCompileTime == 1 || traits<XprType>::ColsAtCompileTime == 1) ? LinearAccessBit : 0,
|
||||||
|
|
||||||
|
FlagsRowMajorBit = traits<XprType>::FlagsRowMajorBit,
|
||||||
|
|
||||||
|
Flags = (evaluator<ArgType>::Flags & (HereditaryBits & ~RowMajorBit /*| LinearAccessBit | DirectAccessBit*/)) | FlagsLinearAccessBit | FlagsRowMajorBit,
|
||||||
|
|
||||||
Alignment = 0
|
Alignment = 0
|
||||||
};
|
};
|
||||||
@@ -184,15 +189,50 @@ struct unary_evaluator<IndexedView<ArgType, RowIndices, ColIndices>, IndexBased>
|
|||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
CoeffReturnType coeff(Index row, Index col) const
|
CoeffReturnType coeff(Index row, Index col) const
|
||||||
{
|
{
|
||||||
|
eigen_assert(m_xpr.rowIndices()[row] >= 0 && m_xpr.rowIndices()[row] < m_xpr.nestedExpression().rows()
|
||||||
|
&& m_xpr.colIndices()[col] >= 0 && m_xpr.colIndices()[col] < m_xpr.nestedExpression().cols());
|
||||||
return m_argImpl.coeff(m_xpr.rowIndices()[row], m_xpr.colIndices()[col]);
|
return m_argImpl.coeff(m_xpr.rowIndices()[row], m_xpr.colIndices()[col]);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
Scalar& coeffRef(Index row, Index col)
|
Scalar& coeffRef(Index row, Index col)
|
||||||
{
|
{
|
||||||
|
eigen_assert(m_xpr.rowIndices()[row] >= 0 && m_xpr.rowIndices()[row] < m_xpr.nestedExpression().rows()
|
||||||
|
&& m_xpr.colIndices()[col] >= 0 && m_xpr.colIndices()[col] < m_xpr.nestedExpression().cols());
|
||||||
return m_argImpl.coeffRef(m_xpr.rowIndices()[row], m_xpr.colIndices()[col]);
|
return m_argImpl.coeffRef(m_xpr.rowIndices()[row], m_xpr.colIndices()[col]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
Scalar& coeffRef(Index index)
|
||||||
|
{
|
||||||
|
EIGEN_STATIC_ASSERT_LVALUE(XprType)
|
||||||
|
Index row = XprType::RowsAtCompileTime == 1 ? 0 : index;
|
||||||
|
Index col = XprType::RowsAtCompileTime == 1 ? index : 0;
|
||||||
|
eigen_assert(m_xpr.rowIndices()[row] >= 0 && m_xpr.rowIndices()[row] < m_xpr.nestedExpression().rows()
|
||||||
|
&& m_xpr.colIndices()[col] >= 0 && m_xpr.colIndices()[col] < m_xpr.nestedExpression().cols());
|
||||||
|
return m_argImpl.coeffRef( m_xpr.rowIndices()[row], m_xpr.colIndices()[col]);
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
const Scalar& coeffRef(Index index) const
|
||||||
|
{
|
||||||
|
Index row = XprType::RowsAtCompileTime == 1 ? 0 : index;
|
||||||
|
Index col = XprType::RowsAtCompileTime == 1 ? index : 0;
|
||||||
|
eigen_assert(m_xpr.rowIndices()[row] >= 0 && m_xpr.rowIndices()[row] < m_xpr.nestedExpression().rows()
|
||||||
|
&& m_xpr.colIndices()[col] >= 0 && m_xpr.colIndices()[col] < m_xpr.nestedExpression().cols());
|
||||||
|
return m_argImpl.coeffRef( m_xpr.rowIndices()[row], m_xpr.colIndices()[col]);
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
const CoeffReturnType coeff(Index index) const
|
||||||
|
{
|
||||||
|
Index row = XprType::RowsAtCompileTime == 1 ? 0 : index;
|
||||||
|
Index col = XprType::RowsAtCompileTime == 1 ? index : 0;
|
||||||
|
eigen_assert(m_xpr.rowIndices()[row] >= 0 && m_xpr.rowIndices()[row] < m_xpr.nestedExpression().rows()
|
||||||
|
&& m_xpr.colIndices()[col] >= 0 && m_xpr.colIndices()[col] < m_xpr.nestedExpression().cols());
|
||||||
|
return m_argImpl.coeff( m_xpr.rowIndices()[row], m_xpr.colIndices()[col]);
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
||||||
evaluator<ArgType> m_argImpl;
|
evaluator<ArgType> m_argImpl;
|
||||||
|
|||||||
@@ -54,8 +54,8 @@ public:
|
|||||||
: m_xpr(xpr)
|
: m_xpr(xpr)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.cols(); }
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_xpr.cols(); }
|
||||||
EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.rows(); }
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_xpr.rows(); }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC const XprTypeNestedCleaned& nestedExpression() const { return m_xpr; }
|
EIGEN_DEVICE_FUNC const XprTypeNestedCleaned& nestedExpression() const { return m_xpr; }
|
||||||
|
|
||||||
|
|||||||
@@ -47,7 +47,7 @@ private:
|
|||||||
* \brief A matrix or vector expression mapping an existing array of data.
|
* \brief A matrix or vector expression mapping an existing array of data.
|
||||||
*
|
*
|
||||||
* \tparam PlainObjectType the equivalent matrix type of the mapped data
|
* \tparam PlainObjectType the equivalent matrix type of the mapped data
|
||||||
* \tparam MapOptions specifies the pointer alignment in bytes. It can be: \c #Aligned128, , \c #Aligned64, \c #Aligned32, \c #Aligned16, \c #Aligned8 or \c #Unaligned.
|
* \tparam MapOptions specifies the pointer alignment in bytes. It can be: \c #Aligned128, \c #Aligned64, \c #Aligned32, \c #Aligned16, \c #Aligned8 or \c #Unaligned.
|
||||||
* The default is \c #Unaligned.
|
* The default is \c #Unaligned.
|
||||||
* \tparam StrideType optionally specifies strides. By default, Map assumes the memory layout
|
* \tparam StrideType optionally specifies strides. By default, Map assumes the memory layout
|
||||||
* of an ordinary, contiguous array. This can be overridden by specifying strides.
|
* of an ordinary, contiguous array. This can be overridden by specifying strides.
|
||||||
@@ -104,13 +104,13 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
|
|||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
inline PointerType cast_to_pointer_type(PointerArgType ptr) { return ptr; }
|
inline PointerType cast_to_pointer_type(PointerArgType ptr) { return ptr; }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
inline Index innerStride() const
|
inline Index innerStride() const
|
||||||
{
|
{
|
||||||
return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1;
|
return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
inline Index outerStride() const
|
inline Index outerStride() const
|
||||||
{
|
{
|
||||||
return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer()
|
return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer()
|
||||||
|
|||||||
@@ -87,9 +87,11 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
|
|||||||
typedef typename Base::CoeffReturnType CoeffReturnType;
|
typedef typename Base::CoeffReturnType CoeffReturnType;
|
||||||
|
|
||||||
/** \copydoc DenseBase::rows() */
|
/** \copydoc DenseBase::rows() */
|
||||||
EIGEN_DEVICE_FUNC inline Index rows() const { return m_rows.value(); }
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
|
inline Index rows() const EIGEN_NOEXCEPT { return m_rows.value(); }
|
||||||
/** \copydoc DenseBase::cols() */
|
/** \copydoc DenseBase::cols() */
|
||||||
EIGEN_DEVICE_FUNC inline Index cols() const { return m_cols.value(); }
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
|
inline Index cols() const EIGEN_NOEXCEPT { return m_cols.value(); }
|
||||||
|
|
||||||
/** Returns a pointer to the first coefficient of the matrix or vector.
|
/** Returns a pointer to the first coefficient of the matrix or vector.
|
||||||
*
|
*
|
||||||
@@ -182,6 +184,8 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
EIGEN_DEFAULT_COPY_CONSTRUCTOR(MapBase)
|
||||||
|
EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(MapBase)
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
@@ -294,6 +298,9 @@ template<typename Derived> class MapBase<Derived, WriteAccessors>
|
|||||||
// In theory we could simply refer to Base:Base::operator=, but MSVC does not like Base::Base,
|
// In theory we could simply refer to Base:Base::operator=, but MSVC does not like Base::Base,
|
||||||
// see bugs 821 and 920.
|
// see bugs 821 and 920.
|
||||||
using ReadOnlyMapBase::Base::operator=;
|
using ReadOnlyMapBase::Base::operator=;
|
||||||
|
protected:
|
||||||
|
EIGEN_DEFAULT_COPY_CONSTRUCTOR(MapBase)
|
||||||
|
EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(MapBase)
|
||||||
};
|
};
|
||||||
|
|
||||||
#undef EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS
|
#undef EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
// for linear algebra.
|
// for linear algebra.
|
||||||
//
|
//
|
||||||
// Copyright (C) 2006-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
|
// Copyright (C) 2006-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||||
|
// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||||
//
|
//
|
||||||
// This Source Code Form is subject to the terms of the Mozilla
|
// This Source Code Form is subject to the terms of the Mozilla
|
||||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||||
@@ -10,9 +11,11 @@
|
|||||||
#ifndef EIGEN_MATHFUNCTIONS_H
|
#ifndef EIGEN_MATHFUNCTIONS_H
|
||||||
#define EIGEN_MATHFUNCTIONS_H
|
#define EIGEN_MATHFUNCTIONS_H
|
||||||
|
|
||||||
// source: http://www.geom.uiuc.edu/~huberty/math5337/groupe/digits.html
|
|
||||||
// TODO this should better be moved to NumTraits
|
// TODO this should better be moved to NumTraits
|
||||||
|
// Source: WolframAlpha
|
||||||
#define EIGEN_PI 3.141592653589793238462643383279502884197169399375105820974944592307816406L
|
#define EIGEN_PI 3.141592653589793238462643383279502884197169399375105820974944592307816406L
|
||||||
|
#define EIGEN_LOG2E 1.442695040888963407359924681001892137426645954152985934135449406931109219L
|
||||||
|
#define EIGEN_LN2 0.693147180559945309417232121458176568075500134360255254120680009493393621L
|
||||||
|
|
||||||
namespace Eigen {
|
namespace Eigen {
|
||||||
|
|
||||||
@@ -212,12 +215,12 @@ struct imag_ref_default_impl
|
|||||||
template<typename Scalar>
|
template<typename Scalar>
|
||||||
struct imag_ref_default_impl<Scalar, false>
|
struct imag_ref_default_impl<Scalar, false>
|
||||||
{
|
{
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
static inline Scalar run(Scalar&)
|
static inline Scalar run(Scalar&)
|
||||||
{
|
{
|
||||||
return Scalar(0);
|
return Scalar(0);
|
||||||
}
|
}
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
static inline const Scalar run(const Scalar&)
|
static inline const Scalar run(const Scalar&)
|
||||||
{
|
{
|
||||||
return Scalar(0);
|
return Scalar(0);
|
||||||
@@ -258,19 +261,8 @@ struct conj_default_impl<Scalar,true>
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename Scalar> struct conj_impl : conj_default_impl<Scalar> {};
|
template<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
|
||||||
|
struct conj_impl : conj_default_impl<Scalar, IsComplex> {};
|
||||||
#if defined(EIGEN_GPU_COMPILE_PHASE)
|
|
||||||
template<typename T>
|
|
||||||
struct conj_impl<std::complex<T> >
|
|
||||||
{
|
|
||||||
EIGEN_DEVICE_FUNC
|
|
||||||
static inline std::complex<T> run(const std::complex<T>& x)
|
|
||||||
{
|
|
||||||
return std::complex<T>(x.real(), -x.imag());
|
|
||||||
}
|
|
||||||
};
|
|
||||||
#endif
|
|
||||||
|
|
||||||
template<typename Scalar>
|
template<typename Scalar>
|
||||||
struct conj_retval
|
struct conj_retval
|
||||||
@@ -321,6 +313,65 @@ struct abs2_retval
|
|||||||
typedef typename NumTraits<Scalar>::Real type;
|
typedef typename NumTraits<Scalar>::Real type;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/****************************************************************************
|
||||||
|
* Implementation of sqrt/rsqrt *
|
||||||
|
****************************************************************************/
|
||||||
|
|
||||||
|
template<typename Scalar>
|
||||||
|
struct sqrt_impl
|
||||||
|
{
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
static EIGEN_ALWAYS_INLINE Scalar run(const Scalar& x)
|
||||||
|
{
|
||||||
|
EIGEN_USING_STD(sqrt);
|
||||||
|
return sqrt(x);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Complex sqrt defined in MathFunctionsImpl.h.
|
||||||
|
template<typename T> EIGEN_DEVICE_FUNC std::complex<T> complex_sqrt(const std::complex<T>& a_x);
|
||||||
|
|
||||||
|
// Custom implementation is faster than `std::sqrt`, works on
|
||||||
|
// GPU, and correctly handles special cases (unlike MSVC).
|
||||||
|
template<typename T>
|
||||||
|
struct sqrt_impl<std::complex<T> >
|
||||||
|
{
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
static EIGEN_ALWAYS_INLINE std::complex<T> run(const std::complex<T>& x)
|
||||||
|
{
|
||||||
|
return complex_sqrt<T>(x);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename Scalar>
|
||||||
|
struct sqrt_retval
|
||||||
|
{
|
||||||
|
typedef Scalar type;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Default implementation relies on numext::sqrt, at bottom of file.
|
||||||
|
template<typename T>
|
||||||
|
struct rsqrt_impl;
|
||||||
|
|
||||||
|
// Complex rsqrt defined in MathFunctionsImpl.h.
|
||||||
|
template<typename T> EIGEN_DEVICE_FUNC std::complex<T> complex_rsqrt(const std::complex<T>& a_x);
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
struct rsqrt_impl<std::complex<T> >
|
||||||
|
{
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
static EIGEN_ALWAYS_INLINE std::complex<T> run(const std::complex<T>& x)
|
||||||
|
{
|
||||||
|
return complex_rsqrt<T>(x);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename Scalar>
|
||||||
|
struct rsqrt_retval
|
||||||
|
{
|
||||||
|
typedef Scalar type;
|
||||||
|
};
|
||||||
|
|
||||||
/****************************************************************************
|
/****************************************************************************
|
||||||
* Implementation of norm1 *
|
* Implementation of norm1 *
|
||||||
****************************************************************************/
|
****************************************************************************/
|
||||||
@@ -335,7 +386,7 @@ struct norm1_default_impl<Scalar,true>
|
|||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
static inline RealScalar run(const Scalar& x)
|
static inline RealScalar run(const Scalar& x)
|
||||||
{
|
{
|
||||||
EIGEN_USING_STD_MATH(abs);
|
EIGEN_USING_STD(abs);
|
||||||
return abs(x.real()) + abs(x.imag());
|
return abs(x.real()) + abs(x.imag());
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@@ -346,7 +397,7 @@ struct norm1_default_impl<Scalar, false>
|
|||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
static inline Scalar run(const Scalar& x)
|
static inline Scalar run(const Scalar& x)
|
||||||
{
|
{
|
||||||
EIGEN_USING_STD_MATH(abs);
|
EIGEN_USING_STD(abs);
|
||||||
return abs(x);
|
return abs(x);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@@ -376,7 +427,7 @@ struct hypot_retval
|
|||||||
* Implementation of cast *
|
* Implementation of cast *
|
||||||
****************************************************************************/
|
****************************************************************************/
|
||||||
|
|
||||||
template<typename OldType, typename NewType>
|
template<typename OldType, typename NewType, typename EnableIf = void>
|
||||||
struct cast_impl
|
struct cast_impl
|
||||||
{
|
{
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
@@ -386,6 +437,22 @@ struct cast_impl
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Casting from S -> Complex<T> leads to an implicit conversion from S to T,
|
||||||
|
// generating warnings on clang. Here we explicitly cast the real component.
|
||||||
|
template<typename OldType, typename NewType>
|
||||||
|
struct cast_impl<OldType, NewType,
|
||||||
|
typename internal::enable_if<
|
||||||
|
!NumTraits<OldType>::IsComplex && NumTraits<NewType>::IsComplex
|
||||||
|
>::type>
|
||||||
|
{
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
static inline NewType run(const OldType& x)
|
||||||
|
{
|
||||||
|
typedef typename NumTraits<NewType>::Real NewReal;
|
||||||
|
return static_cast<NewType>(static_cast<NewReal>(x));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
// here, for once, we're plainly returning NewType: we don't want cast to do weird things.
|
// here, for once, we're plainly returning NewType: we don't want cast to do weird things.
|
||||||
|
|
||||||
template<typename OldType, typename NewType>
|
template<typename OldType, typename NewType>
|
||||||
@@ -399,29 +466,59 @@ inline NewType cast(const OldType& x)
|
|||||||
* Implementation of round *
|
* Implementation of round *
|
||||||
****************************************************************************/
|
****************************************************************************/
|
||||||
|
|
||||||
#if EIGEN_HAS_CXX11_MATH
|
|
||||||
template<typename Scalar>
|
template<typename Scalar>
|
||||||
struct round_impl {
|
struct round_impl
|
||||||
|
{
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
static inline Scalar run(const Scalar& x)
|
static inline Scalar run(const Scalar& x)
|
||||||
{
|
{
|
||||||
EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL)
|
EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL)
|
||||||
EIGEN_USING_STD_MATH(round);
|
#if EIGEN_HAS_CXX11_MATH
|
||||||
return round(x);
|
EIGEN_USING_STD(round);
|
||||||
|
#endif
|
||||||
|
return Scalar(round(x));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#if !EIGEN_HAS_CXX11_MATH
|
||||||
|
#if EIGEN_HAS_C99_MATH
|
||||||
|
// Use ::roundf for float.
|
||||||
|
template<>
|
||||||
|
struct round_impl<float> {
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
static inline float run(const float& x)
|
||||||
|
{
|
||||||
|
return ::roundf(x);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
#else
|
#else
|
||||||
template<typename Scalar>
|
template<typename Scalar>
|
||||||
struct round_impl
|
struct round_using_floor_ceil_impl
|
||||||
{
|
{
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
static inline Scalar run(const Scalar& x)
|
static inline Scalar run(const Scalar& x)
|
||||||
{
|
{
|
||||||
EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL)
|
EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL)
|
||||||
EIGEN_USING_STD_MATH(floor);
|
// Without C99 round/roundf, resort to floor/ceil.
|
||||||
EIGEN_USING_STD_MATH(ceil);
|
EIGEN_USING_STD(floor);
|
||||||
return (x > Scalar(0)) ? floor(x + Scalar(0.5)) : ceil(x - Scalar(0.5));
|
EIGEN_USING_STD(ceil);
|
||||||
|
// If not enough precision to resolve a decimal at all, return the input.
|
||||||
|
// Otherwise, adding 0.5 can trigger an increment by 1.
|
||||||
|
const Scalar limit = Scalar(1ull << (NumTraits<Scalar>::digits() - 1));
|
||||||
|
if (x >= limit || x <= -limit) {
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
return (x > Scalar(0)) ? Scalar(floor(x + Scalar(0.5))) : Scalar(ceil(x - Scalar(0.5)));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
#endif
|
|
||||||
|
template<>
|
||||||
|
struct round_impl<float> : round_using_floor_ceil_impl<float> {};
|
||||||
|
|
||||||
|
template<>
|
||||||
|
struct round_impl<double> : round_using_floor_ceil_impl<double> {};
|
||||||
|
#endif // EIGEN_HAS_C99_MATH
|
||||||
|
#endif // !EIGEN_HAS_CXX11_MATH
|
||||||
|
|
||||||
template<typename Scalar>
|
template<typename Scalar>
|
||||||
struct round_retval
|
struct round_retval
|
||||||
@@ -429,22 +526,82 @@ struct round_retval
|
|||||||
typedef Scalar type;
|
typedef Scalar type;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/****************************************************************************
|
||||||
|
* Implementation of rint *
|
||||||
|
****************************************************************************/
|
||||||
|
|
||||||
|
template<typename Scalar>
|
||||||
|
struct rint_impl {
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
static inline Scalar run(const Scalar& x)
|
||||||
|
{
|
||||||
|
EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL)
|
||||||
|
#if EIGEN_HAS_CXX11_MATH
|
||||||
|
EIGEN_USING_STD(rint);
|
||||||
|
#endif
|
||||||
|
return rint(x);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#if !EIGEN_HAS_CXX11_MATH
|
||||||
|
template<>
|
||||||
|
struct rint_impl<double> {
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
static inline double run(const double& x)
|
||||||
|
{
|
||||||
|
return ::rint(x);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template<>
|
||||||
|
struct rint_impl<float> {
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
static inline float run(const float& x)
|
||||||
|
{
|
||||||
|
return ::rintf(x);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template<typename Scalar>
|
||||||
|
struct rint_retval
|
||||||
|
{
|
||||||
|
typedef Scalar type;
|
||||||
|
};
|
||||||
|
|
||||||
/****************************************************************************
|
/****************************************************************************
|
||||||
* Implementation of arg *
|
* Implementation of arg *
|
||||||
****************************************************************************/
|
****************************************************************************/
|
||||||
|
|
||||||
#if EIGEN_HAS_CXX11_MATH
|
// Visual Studio 2017 has a bug where arg(float) returns 0 for negative inputs.
|
||||||
|
// This seems to be fixed in VS 2019.
|
||||||
|
#if EIGEN_HAS_CXX11_MATH && (!EIGEN_COMP_MSVC || EIGEN_COMP_MSVC >= 1920)
|
||||||
|
// std::arg is only defined for types of std::complex, or integer types or float/double/long double
|
||||||
|
template<typename Scalar,
|
||||||
|
bool HasStdImpl = NumTraits<Scalar>::IsComplex || is_integral<Scalar>::value
|
||||||
|
|| is_same<Scalar, float>::value || is_same<Scalar, double>::value
|
||||||
|
|| is_same<Scalar, long double>::value >
|
||||||
|
struct arg_default_impl;
|
||||||
|
|
||||||
template<typename Scalar>
|
template<typename Scalar>
|
||||||
struct arg_impl {
|
struct arg_default_impl<Scalar, true> {
|
||||||
static inline Scalar run(const Scalar& x)
|
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
static inline RealScalar run(const Scalar& x)
|
||||||
{
|
{
|
||||||
#if defined(EIGEN_HIP_DEVICE_COMPILE)
|
// There is no official ::arg on device in CUDA/HIP, so we always need to use std::arg.
|
||||||
// HIP does not seem to have a native device side implementation for the math routine "arg"
|
|
||||||
using std::arg;
|
using std::arg;
|
||||||
#else
|
return static_cast<RealScalar>(arg(x));
|
||||||
EIGEN_USING_STD_MATH(arg);
|
}
|
||||||
#endif
|
};
|
||||||
return arg(x);
|
|
||||||
|
// Must be non-complex floating-point type (e.g. half/bfloat16).
|
||||||
|
template<typename Scalar>
|
||||||
|
struct arg_default_impl<Scalar, false> {
|
||||||
|
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
static inline RealScalar run(const Scalar& x)
|
||||||
|
{
|
||||||
|
return (x < Scalar(0)) ? RealScalar(EIGEN_PI) : RealScalar(0);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
#else
|
#else
|
||||||
@@ -455,7 +612,8 @@ struct round_retval
|
|||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
static inline RealScalar run(const Scalar& x)
|
static inline RealScalar run(const Scalar& x)
|
||||||
{
|
{
|
||||||
return (x < Scalar(0)) ? Scalar(EIGEN_PI) : Scalar(0); }
|
return (x < RealScalar(0)) ? RealScalar(EIGEN_PI) : RealScalar(0);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename Scalar>
|
template<typename Scalar>
|
||||||
@@ -465,13 +623,12 @@ struct round_retval
|
|||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
static inline RealScalar run(const Scalar& x)
|
static inline RealScalar run(const Scalar& x)
|
||||||
{
|
{
|
||||||
EIGEN_USING_STD_MATH(arg);
|
EIGEN_USING_STD(arg);
|
||||||
return arg(x);
|
return arg(x);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename Scalar> struct arg_impl : arg_default_impl<Scalar> {};
|
|
||||||
#endif
|
#endif
|
||||||
|
template<typename Scalar> struct arg_impl : arg_default_impl<Scalar> {};
|
||||||
|
|
||||||
template<typename Scalar>
|
template<typename Scalar>
|
||||||
struct arg_retval
|
struct arg_retval
|
||||||
@@ -493,7 +650,7 @@ namespace std_fallback {
|
|||||||
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
|
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
|
||||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||||
|
|
||||||
EIGEN_USING_STD_MATH(exp);
|
EIGEN_USING_STD(exp);
|
||||||
Scalar u = exp(x);
|
Scalar u = exp(x);
|
||||||
if (numext::equal_strict(u, Scalar(1))) {
|
if (numext::equal_strict(u, Scalar(1))) {
|
||||||
return x;
|
return x;
|
||||||
@@ -503,7 +660,7 @@ namespace std_fallback {
|
|||||||
return RealScalar(-1);
|
return RealScalar(-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_USING_STD_MATH(log);
|
EIGEN_USING_STD(log);
|
||||||
Scalar logu = log(u);
|
Scalar logu = log(u);
|
||||||
return numext::equal_strict(u, logu) ? u : (u - RealScalar(1)) * x / logu;
|
return numext::equal_strict(u, logu) ? u : (u - RealScalar(1)) * x / logu;
|
||||||
}
|
}
|
||||||
@@ -523,22 +680,36 @@ struct expm1_impl {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Specialization for complex types that are not supported by std::expm1.
|
|
||||||
template <typename RealScalar>
|
|
||||||
struct expm1_impl<std::complex<RealScalar> > {
|
|
||||||
EIGEN_DEVICE_FUNC static inline std::complex<RealScalar> run(
|
|
||||||
const std::complex<RealScalar>& x) {
|
|
||||||
EIGEN_STATIC_ASSERT_NON_INTEGER(RealScalar)
|
|
||||||
return std_fallback::expm1(x);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<typename Scalar>
|
template<typename Scalar>
|
||||||
struct expm1_retval
|
struct expm1_retval
|
||||||
{
|
{
|
||||||
typedef Scalar type;
|
typedef Scalar type;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/****************************************************************************
|
||||||
|
* Implementation of log *
|
||||||
|
****************************************************************************/
|
||||||
|
|
||||||
|
// Complex log defined in MathFunctionsImpl.h.
|
||||||
|
template<typename T> EIGEN_DEVICE_FUNC std::complex<T> complex_log(const std::complex<T>& z);
|
||||||
|
|
||||||
|
template<typename Scalar>
|
||||||
|
struct log_impl {
|
||||||
|
EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x)
|
||||||
|
{
|
||||||
|
EIGEN_USING_STD(log);
|
||||||
|
return static_cast<Scalar>(log(x));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename Scalar>
|
||||||
|
struct log_impl<std::complex<Scalar> > {
|
||||||
|
EIGEN_DEVICE_FUNC static inline std::complex<Scalar> run(const std::complex<Scalar>& z)
|
||||||
|
{
|
||||||
|
return complex_log(z);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
/****************************************************************************
|
/****************************************************************************
|
||||||
* Implementation of log1p *
|
* Implementation of log1p *
|
||||||
****************************************************************************/
|
****************************************************************************/
|
||||||
@@ -550,9 +721,9 @@ namespace std_fallback {
|
|||||||
EIGEN_DEVICE_FUNC inline Scalar log1p(const Scalar& x) {
|
EIGEN_DEVICE_FUNC inline Scalar log1p(const Scalar& x) {
|
||||||
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
|
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
|
||||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||||
EIGEN_USING_STD_MATH(log);
|
EIGEN_USING_STD(log);
|
||||||
Scalar x1p = RealScalar(1) + x;
|
Scalar x1p = RealScalar(1) + x;
|
||||||
Scalar log_1p = log(x1p);
|
Scalar log_1p = log_impl<Scalar>::run(x1p);
|
||||||
const bool is_small = numext::equal_strict(x1p, Scalar(1));
|
const bool is_small = numext::equal_strict(x1p, Scalar(1));
|
||||||
const bool is_inf = numext::equal_strict(x1p, log_1p);
|
const bool is_inf = numext::equal_strict(x1p, log_1p);
|
||||||
return (is_small || is_inf) ? x : x * (log_1p / (x1p - RealScalar(1)));
|
return (is_small || is_inf) ? x : x * (log_1p / (x1p - RealScalar(1)));
|
||||||
@@ -600,7 +771,7 @@ struct pow_impl
|
|||||||
typedef typename ScalarBinaryOpTraits<ScalarX,ScalarY,internal::scalar_pow_op<ScalarX,ScalarY> >::ReturnType result_type;
|
typedef typename ScalarBinaryOpTraits<ScalarX,ScalarY,internal::scalar_pow_op<ScalarX,ScalarY> >::ReturnType result_type;
|
||||||
static EIGEN_DEVICE_FUNC inline result_type run(const ScalarX& x, const ScalarY& y)
|
static EIGEN_DEVICE_FUNC inline result_type run(const ScalarX& x, const ScalarY& y)
|
||||||
{
|
{
|
||||||
EIGEN_USING_STD_MATH(pow);
|
EIGEN_USING_STD(pow);
|
||||||
return pow(x, y);
|
return pow(x, y);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@@ -706,13 +877,159 @@ struct meta_floor_log2<n, lower, upper, meta_floor_log2_bogus>
|
|||||||
// no value, error at compile time
|
// no value, error at compile time
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename BitsType, typename EnableIf = void>
|
||||||
|
struct count_bits_impl {
|
||||||
|
static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
|
||||||
|
EIGEN_STATIC_ASSERT(
|
||||||
|
is_integral<BitsType>::value && !NumTraits<BitsType>::IsSigned,
|
||||||
|
THIS_TYPE_IS_NOT_SUPPORTED);
|
||||||
|
int n = CHAR_BIT * sizeof(BitsType);
|
||||||
|
int shift = n / 2;
|
||||||
|
while (bits > 0 && shift > 0) {
|
||||||
|
BitsType y = bits >> shift;
|
||||||
|
if (y > 0) {
|
||||||
|
n -= shift;
|
||||||
|
bits = y;
|
||||||
|
}
|
||||||
|
shift /= 2;
|
||||||
|
}
|
||||||
|
if (shift == 0) {
|
||||||
|
--n;
|
||||||
|
}
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) {
|
||||||
|
EIGEN_STATIC_ASSERT(
|
||||||
|
is_integral<BitsType>::value && !NumTraits<BitsType>::IsSigned,
|
||||||
|
THIS_TYPE_IS_NOT_SUPPORTED);
|
||||||
|
int n = CHAR_BIT * sizeof(BitsType);
|
||||||
|
int shift = n / 2;
|
||||||
|
while (bits > 0 && shift > 0) {
|
||||||
|
BitsType y = bits << shift;
|
||||||
|
if (y > 0) {
|
||||||
|
n -= shift;
|
||||||
|
bits = y;
|
||||||
|
}
|
||||||
|
shift /= 2;
|
||||||
|
}
|
||||||
|
if (shift == 0) {
|
||||||
|
--n;
|
||||||
|
}
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Count leading zeros.
|
||||||
|
template <typename BitsType>
|
||||||
|
EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
|
||||||
|
return count_bits_impl<BitsType>::clz(bits);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Count trailing zeros.
|
||||||
|
template <typename BitsType>
|
||||||
|
EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) {
|
||||||
|
return count_bits_impl<BitsType>::ctz(bits);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if EIGEN_COMP_GNUC || EIGEN_COMP_CLANG
|
||||||
|
|
||||||
|
template <typename BitsType>
|
||||||
|
struct count_bits_impl<BitsType, typename enable_if<sizeof(BitsType) <= sizeof(unsigned int)>::type> {
|
||||||
|
static const int kNumBits = static_cast<int>(sizeof(BitsType) * CHAR_BIT);
|
||||||
|
static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
|
||||||
|
EIGEN_STATIC_ASSERT(is_integral<BitsType>::value, THIS_TYPE_IS_NOT_SUPPORTED);
|
||||||
|
static const int kLeadingBitsOffset = (sizeof(unsigned int) - sizeof(BitsType)) * CHAR_BIT;
|
||||||
|
return bits == 0 ? kNumBits : __builtin_clz(static_cast<unsigned int>(bits)) - kLeadingBitsOffset;
|
||||||
|
}
|
||||||
|
|
||||||
|
static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) {
|
||||||
|
EIGEN_STATIC_ASSERT(is_integral<BitsType>::value, THIS_TYPE_IS_NOT_SUPPORTED);
|
||||||
|
return bits == 0 ? kNumBits : __builtin_ctz(static_cast<unsigned int>(bits));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename BitsType>
|
||||||
|
struct count_bits_impl<
|
||||||
|
BitsType, typename enable_if<sizeof(unsigned int) < sizeof(BitsType) && sizeof(BitsType) <= sizeof(unsigned long)>::type> {
|
||||||
|
static const int kNumBits = static_cast<int>(sizeof(BitsType) * CHAR_BIT);
|
||||||
|
static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
|
||||||
|
EIGEN_STATIC_ASSERT(is_integral<BitsType>::value, THIS_TYPE_IS_NOT_SUPPORTED);
|
||||||
|
static const int kLeadingBitsOffset = (sizeof(unsigned long) - sizeof(BitsType)) * CHAR_BIT;
|
||||||
|
return bits == 0 ? kNumBits : __builtin_clzl(static_cast<unsigned long>(bits)) - kLeadingBitsOffset;
|
||||||
|
}
|
||||||
|
|
||||||
|
static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) {
|
||||||
|
EIGEN_STATIC_ASSERT(is_integral<BitsType>::value, THIS_TYPE_IS_NOT_SUPPORTED);
|
||||||
|
return bits == 0 ? kNumBits : __builtin_ctzl(static_cast<unsigned long>(bits));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename BitsType>
|
||||||
|
struct count_bits_impl<BitsType, typename enable_if<sizeof(unsigned long) < sizeof(BitsType) &&
|
||||||
|
sizeof(BitsType) <= sizeof(unsigned long long)>::type> {
|
||||||
|
static const int kNumBits = static_cast<int>(sizeof(BitsType) * CHAR_BIT);
|
||||||
|
static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
|
||||||
|
EIGEN_STATIC_ASSERT(is_integral<BitsType>::value, THIS_TYPE_IS_NOT_SUPPORTED);
|
||||||
|
static const int kLeadingBitsOffset = (sizeof(unsigned long long) - sizeof(BitsType)) * CHAR_BIT;
|
||||||
|
return bits == 0 ? kNumBits : __builtin_clzll(static_cast<unsigned long long>(bits)) - kLeadingBitsOffset;
|
||||||
|
}
|
||||||
|
|
||||||
|
static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) {
|
||||||
|
EIGEN_STATIC_ASSERT(is_integral<BitsType>::value, THIS_TYPE_IS_NOT_SUPPORTED);
|
||||||
|
return bits == 0 ? kNumBits : __builtin_ctzll(static_cast<unsigned long long>(bits));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#elif EIGEN_COMP_MSVC
|
||||||
|
|
||||||
|
template <typename BitsType>
|
||||||
|
struct count_bits_impl<BitsType, typename enable_if<sizeof(BitsType) <= sizeof(unsigned long)>::type> {
|
||||||
|
static const int kNumBits = static_cast<int>(sizeof(BitsType) * CHAR_BIT);
|
||||||
|
static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
|
||||||
|
EIGEN_STATIC_ASSERT(is_integral<BitsType>::value, THIS_TYPE_IS_NOT_SUPPORTED);
|
||||||
|
unsigned long out;
|
||||||
|
_BitScanReverse(&out, static_cast<unsigned long>(bits));
|
||||||
|
return bits == 0 ? kNumBits : (kNumBits - 1) - static_cast<int>(out);
|
||||||
|
}
|
||||||
|
|
||||||
|
static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) {
|
||||||
|
EIGEN_STATIC_ASSERT(is_integral<BitsType>::value, THIS_TYPE_IS_NOT_SUPPORTED);
|
||||||
|
unsigned long out;
|
||||||
|
_BitScanForward(&out, static_cast<unsigned long>(bits));
|
||||||
|
return bits == 0 ? kNumBits : static_cast<int>(out);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#ifdef _WIN64
|
||||||
|
|
||||||
|
template <typename BitsType>
|
||||||
|
struct count_bits_impl<
|
||||||
|
BitsType, typename enable_if<sizeof(unsigned long) < sizeof(BitsType) && sizeof(BitsType) <= sizeof(__int64)>::type> {
|
||||||
|
static const int kNumBits = static_cast<int>(sizeof(BitsType) * CHAR_BIT);
|
||||||
|
static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
|
||||||
|
EIGEN_STATIC_ASSERT(is_integral<BitsType>::value, THIS_TYPE_IS_NOT_SUPPORTED);
|
||||||
|
unsigned long out;
|
||||||
|
_BitScanReverse64(&out, static_cast<unsigned __int64>(bits));
|
||||||
|
return bits == 0 ? kNumBits : (kNumBits - 1) - static_cast<int>(out);
|
||||||
|
}
|
||||||
|
|
||||||
|
static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) {
|
||||||
|
EIGEN_STATIC_ASSERT(is_integral<BitsType>::value, THIS_TYPE_IS_NOT_SUPPORTED);
|
||||||
|
unsigned long out;
|
||||||
|
_BitScanForward64(&out, static_cast<unsigned __int64>(bits));
|
||||||
|
return bits == 0 ? kNumBits : static_cast<int>(out);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // _WIN64
|
||||||
|
|
||||||
|
#endif // EIGEN_COMP_GNUC || EIGEN_COMP_CLANG
|
||||||
|
|
||||||
template <typename Scalar>
|
template <typename Scalar>
|
||||||
struct random_default_impl<Scalar, false, true>
|
struct random_default_impl<Scalar, false, true> {
|
||||||
{
|
static inline Scalar run(const Scalar& x, const Scalar& y) {
|
||||||
static inline Scalar run(const Scalar& x, const Scalar& y)
|
if (y <= x) return x;
|
||||||
{
|
|
||||||
if (y <= x)
|
|
||||||
return x;
|
|
||||||
// ScalarU is the unsigned counterpart of Scalar, possibly Scalar itself.
|
// ScalarU is the unsigned counterpart of Scalar, possibly Scalar itself.
|
||||||
typedef typename make_unsigned<Scalar>::type ScalarU;
|
typedef typename make_unsigned<Scalar>::type ScalarU;
|
||||||
// ScalarX is the widest of ScalarU and unsigned int.
|
// ScalarX is the widest of ScalarU and unsigned int.
|
||||||
@@ -857,11 +1174,15 @@ template<typename T> EIGEN_DEVICE_FUNC bool isinf_msvc_helper(T x)
|
|||||||
}
|
}
|
||||||
|
|
||||||
//MSVC defines a _isnan builtin function, but for double only
|
//MSVC defines a _isnan builtin function, but for double only
|
||||||
|
#ifndef EIGEN_GPU_COMPILE_PHASE
|
||||||
EIGEN_DEVICE_FUNC inline bool isnan_impl(const long double& x) { return _isnan(x)!=0; }
|
EIGEN_DEVICE_FUNC inline bool isnan_impl(const long double& x) { return _isnan(x)!=0; }
|
||||||
|
#endif
|
||||||
EIGEN_DEVICE_FUNC inline bool isnan_impl(const double& x) { return _isnan(x)!=0; }
|
EIGEN_DEVICE_FUNC inline bool isnan_impl(const double& x) { return _isnan(x)!=0; }
|
||||||
EIGEN_DEVICE_FUNC inline bool isnan_impl(const float& x) { return _isnan(x)!=0; }
|
EIGEN_DEVICE_FUNC inline bool isnan_impl(const float& x) { return _isnan(x)!=0; }
|
||||||
|
|
||||||
|
#ifndef EIGEN_GPU_COMPILE_PHASE
|
||||||
EIGEN_DEVICE_FUNC inline bool isinf_impl(const long double& x) { return isinf_msvc_helper(x); }
|
EIGEN_DEVICE_FUNC inline bool isinf_impl(const long double& x) { return isinf_msvc_helper(x); }
|
||||||
|
#endif
|
||||||
EIGEN_DEVICE_FUNC inline bool isinf_impl(const double& x) { return isinf_msvc_helper(x); }
|
EIGEN_DEVICE_FUNC inline bool isinf_impl(const double& x) { return isinf_msvc_helper(x); }
|
||||||
EIGEN_DEVICE_FUNC inline bool isinf_impl(const float& x) { return isinf_msvc_helper(x); }
|
EIGEN_DEVICE_FUNC inline bool isinf_impl(const float& x) { return isinf_msvc_helper(x); }
|
||||||
|
|
||||||
@@ -875,12 +1196,16 @@ EIGEN_DEVICE_FUNC inline bool isinf_impl(const float& x) { return isinf_ms
|
|||||||
#define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC inline __attribute__((noinline,optimize("no-finite-math-only")))
|
#define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC inline __attribute__((noinline,optimize("no-finite-math-only")))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef EIGEN_GPU_COMPILE_PHASE
|
||||||
template<> EIGEN_TMP_NOOPT_ATTRIB bool isnan_impl(const long double& x) { return __builtin_isnan(x); }
|
template<> EIGEN_TMP_NOOPT_ATTRIB bool isnan_impl(const long double& x) { return __builtin_isnan(x); }
|
||||||
|
#endif
|
||||||
template<> EIGEN_TMP_NOOPT_ATTRIB bool isnan_impl(const double& x) { return __builtin_isnan(x); }
|
template<> EIGEN_TMP_NOOPT_ATTRIB bool isnan_impl(const double& x) { return __builtin_isnan(x); }
|
||||||
template<> EIGEN_TMP_NOOPT_ATTRIB bool isnan_impl(const float& x) { return __builtin_isnan(x); }
|
template<> EIGEN_TMP_NOOPT_ATTRIB bool isnan_impl(const float& x) { return __builtin_isnan(x); }
|
||||||
template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const double& x) { return __builtin_isinf(x); }
|
template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const double& x) { return __builtin_isinf(x); }
|
||||||
template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const float& x) { return __builtin_isinf(x); }
|
template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const float& x) { return __builtin_isinf(x); }
|
||||||
|
#ifndef EIGEN_GPU_COMPILE_PHASE
|
||||||
template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const long double& x) { return __builtin_isinf(x); }
|
template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const long double& x) { return __builtin_isinf(x); }
|
||||||
|
#endif
|
||||||
|
|
||||||
#undef EIGEN_TMP_NOOPT_ATTRIB
|
#undef EIGEN_TMP_NOOPT_ATTRIB
|
||||||
|
|
||||||
@@ -907,7 +1232,7 @@ template<typename T>
|
|||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y)
|
EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y)
|
||||||
{
|
{
|
||||||
EIGEN_USING_STD_MATH(min);
|
EIGEN_USING_STD(min)
|
||||||
return min EIGEN_NOT_A_MACRO (x,y);
|
return min EIGEN_NOT_A_MACRO (x,y);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -915,7 +1240,7 @@ template<typename T>
|
|||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y)
|
EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y)
|
||||||
{
|
{
|
||||||
EIGEN_USING_STD_MATH(max);
|
EIGEN_USING_STD(max)
|
||||||
return max EIGEN_NOT_A_MACRO (x,y);
|
return max EIGEN_NOT_A_MACRO (x,y);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
@@ -937,6 +1262,8 @@ EIGEN_ALWAYS_INLINE double mini(const double& x, const double& y)
|
|||||||
{
|
{
|
||||||
return fmin(x, y);
|
return fmin(x, y);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef EIGEN_GPU_COMPILE_PHASE
|
||||||
template<>
|
template<>
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
EIGEN_ALWAYS_INLINE long double mini(const long double& x, const long double& y)
|
EIGEN_ALWAYS_INLINE long double mini(const long double& x, const long double& y)
|
||||||
@@ -948,6 +1275,7 @@ EIGEN_ALWAYS_INLINE long double mini(const long double& x, const long double& y)
|
|||||||
return fminl(x, y);
|
return fminl(x, y);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
@@ -967,6 +1295,7 @@ EIGEN_ALWAYS_INLINE double maxi(const double& x, const double& y)
|
|||||||
{
|
{
|
||||||
return fmax(x, y);
|
return fmax(x, y);
|
||||||
}
|
}
|
||||||
|
#ifndef EIGEN_GPU_COMPILE_PHASE
|
||||||
template<>
|
template<>
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
EIGEN_ALWAYS_INLINE long double maxi(const long double& x, const long double& y)
|
EIGEN_ALWAYS_INLINE long double maxi(const long double& x, const long double& y)
|
||||||
@@ -979,6 +1308,7 @@ EIGEN_ALWAYS_INLINE long double maxi(const long double& x, const long double& y)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined(SYCL_DEVICE_ONLY)
|
#if defined(SYCL_DEVICE_ONLY)
|
||||||
|
|
||||||
@@ -1116,6 +1446,34 @@ inline EIGEN_MATHFUNC_RETVAL(abs2, Scalar) abs2(const Scalar& x)
|
|||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
inline bool abs2(bool x) { return x; }
|
inline bool abs2(bool x) { return x; }
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
EIGEN_ALWAYS_INLINE T absdiff(const T& x, const T& y)
|
||||||
|
{
|
||||||
|
return x > y ? x - y : y - x;
|
||||||
|
}
|
||||||
|
template<>
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
EIGEN_ALWAYS_INLINE float absdiff(const float& x, const float& y)
|
||||||
|
{
|
||||||
|
return fabsf(x - y);
|
||||||
|
}
|
||||||
|
template<>
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
EIGEN_ALWAYS_INLINE double absdiff(const double& x, const double& y)
|
||||||
|
{
|
||||||
|
return fabs(x - y);
|
||||||
|
}
|
||||||
|
|
||||||
|
// HIP and CUDA do not support long double.
|
||||||
|
#ifndef EIGEN_GPU_COMPILE_PHASE
|
||||||
|
template<>
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
EIGEN_ALWAYS_INLINE long double absdiff(const long double& x, const long double& y) {
|
||||||
|
return fabsl(x - y);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
template<typename Scalar>
|
template<typename Scalar>
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
inline EIGEN_MATHFUNC_RETVAL(norm1, Scalar) norm1(const Scalar& x)
|
inline EIGEN_MATHFUNC_RETVAL(norm1, Scalar) norm1(const Scalar& x)
|
||||||
@@ -1174,6 +1532,13 @@ SYCL_SPECIALIZE_FLOATING_TYPES_UNARY_FUNC_RET_TYPE(isinf, isinf, bool)
|
|||||||
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY_FUNC_RET_TYPE(isfinite, isfinite, bool)
|
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY_FUNC_RET_TYPE(isfinite, isfinite, bool)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
template<typename Scalar>
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
inline EIGEN_MATHFUNC_RETVAL(rint, Scalar) rint(const Scalar& x)
|
||||||
|
{
|
||||||
|
return EIGEN_MATHFUNC_IMPL(rint, Scalar)::run(x);
|
||||||
|
}
|
||||||
|
|
||||||
template<typename Scalar>
|
template<typename Scalar>
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
inline EIGEN_MATHFUNC_RETVAL(round, Scalar) round(const Scalar& x)
|
inline EIGEN_MATHFUNC_RETVAL(round, Scalar) round(const Scalar& x)
|
||||||
@@ -1189,7 +1554,7 @@ template<typename T>
|
|||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
T (floor)(const T& x)
|
T (floor)(const T& x)
|
||||||
{
|
{
|
||||||
EIGEN_USING_STD_MATH(floor);
|
EIGEN_USING_STD(floor)
|
||||||
return floor(x);
|
return floor(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1209,7 +1574,7 @@ template<typename T>
|
|||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
T (ceil)(const T& x)
|
T (ceil)(const T& x)
|
||||||
{
|
{
|
||||||
EIGEN_USING_STD_MATH(ceil);
|
EIGEN_USING_STD(ceil);
|
||||||
return ceil(x);
|
return ceil(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1250,23 +1615,34 @@ inline int log2(int x)
|
|||||||
*
|
*
|
||||||
* It's usage is justified in performance critical functions, like norm/normalize.
|
* It's usage is justified in performance critical functions, like norm/normalize.
|
||||||
*/
|
*/
|
||||||
template<typename T>
|
template<typename Scalar>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
EIGEN_DEVICE_FUNC
|
||||||
T sqrt(const T &x)
|
EIGEN_ALWAYS_INLINE EIGEN_MATHFUNC_RETVAL(sqrt, Scalar) sqrt(const Scalar& x)
|
||||||
{
|
{
|
||||||
EIGEN_USING_STD_MATH(sqrt);
|
return EIGEN_MATHFUNC_IMPL(sqrt, Scalar)::run(x);
|
||||||
return sqrt(x);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Boolean specialization, avoids implicit float to bool conversion (-Wimplicit-conversion-floating-point-to-bool).
|
||||||
|
template<>
|
||||||
|
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_DEVICE_FUNC
|
||||||
|
bool sqrt<bool>(const bool &x) { return x; }
|
||||||
|
|
||||||
#if defined(SYCL_DEVICE_ONLY)
|
#if defined(SYCL_DEVICE_ONLY)
|
||||||
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(sqrt, sqrt)
|
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(sqrt, sqrt)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/** \returns the reciprocal square root of \a x. **/
|
||||||
|
template<typename T>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
|
T rsqrt(const T& x)
|
||||||
|
{
|
||||||
|
return internal::rsqrt_impl<T>::run(x);
|
||||||
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
T log(const T &x) {
|
T log(const T &x) {
|
||||||
EIGEN_USING_STD_MATH(log);
|
return internal::log_impl<T>::run(x);
|
||||||
return log(x);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(SYCL_DEVICE_ONLY)
|
#if defined(SYCL_DEVICE_ONLY)
|
||||||
@@ -1286,7 +1662,7 @@ template<typename T>
|
|||||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
typename internal::enable_if<NumTraits<T>::IsSigned || NumTraits<T>::IsComplex,typename NumTraits<T>::Real>::type
|
typename internal::enable_if<NumTraits<T>::IsSigned || NumTraits<T>::IsComplex,typename NumTraits<T>::Real>::type
|
||||||
abs(const T &x) {
|
abs(const T &x) {
|
||||||
EIGEN_USING_STD_MATH(abs);
|
EIGEN_USING_STD(abs);
|
||||||
return abs(x);
|
return abs(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1323,7 +1699,7 @@ double abs(const std::complex<double>& x) {
|
|||||||
template<typename T>
|
template<typename T>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
T exp(const T &x) {
|
T exp(const T &x) {
|
||||||
EIGEN_USING_STD_MATH(exp);
|
EIGEN_USING_STD(exp);
|
||||||
return exp(x);
|
return exp(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1377,7 +1753,7 @@ double expm1(const double &x) { return ::expm1(x); }
|
|||||||
template<typename T>
|
template<typename T>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
T cos(const T &x) {
|
T cos(const T &x) {
|
||||||
EIGEN_USING_STD_MATH(cos);
|
EIGEN_USING_STD(cos);
|
||||||
return cos(x);
|
return cos(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1396,7 +1772,7 @@ double cos(const double &x) { return ::cos(x); }
|
|||||||
template<typename T>
|
template<typename T>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
T sin(const T &x) {
|
T sin(const T &x) {
|
||||||
EIGEN_USING_STD_MATH(sin);
|
EIGEN_USING_STD(sin);
|
||||||
return sin(x);
|
return sin(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1415,7 +1791,7 @@ double sin(const double &x) { return ::sin(x); }
|
|||||||
template<typename T>
|
template<typename T>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
T tan(const T &x) {
|
T tan(const T &x) {
|
||||||
EIGEN_USING_STD_MATH(tan);
|
EIGEN_USING_STD(tan);
|
||||||
return tan(x);
|
return tan(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1434,7 +1810,7 @@ double tan(const double &x) { return ::tan(x); }
|
|||||||
template<typename T>
|
template<typename T>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
T acos(const T &x) {
|
T acos(const T &x) {
|
||||||
EIGEN_USING_STD_MATH(acos);
|
EIGEN_USING_STD(acos);
|
||||||
return acos(x);
|
return acos(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1442,8 +1818,8 @@ T acos(const T &x) {
|
|||||||
template<typename T>
|
template<typename T>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
T acosh(const T &x) {
|
T acosh(const T &x) {
|
||||||
EIGEN_USING_STD_MATH(acosh);
|
EIGEN_USING_STD(acosh);
|
||||||
return acosh(x);
|
return static_cast<T>(acosh(x));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -1463,7 +1839,7 @@ double acos(const double &x) { return ::acos(x); }
|
|||||||
template<typename T>
|
template<typename T>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
T asin(const T &x) {
|
T asin(const T &x) {
|
||||||
EIGEN_USING_STD_MATH(asin);
|
EIGEN_USING_STD(asin);
|
||||||
return asin(x);
|
return asin(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1471,8 +1847,8 @@ T asin(const T &x) {
|
|||||||
template<typename T>
|
template<typename T>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
T asinh(const T &x) {
|
T asinh(const T &x) {
|
||||||
EIGEN_USING_STD_MATH(asinh);
|
EIGEN_USING_STD(asinh);
|
||||||
return asinh(x);
|
return static_cast<T>(asinh(x));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -1492,16 +1868,16 @@ double asin(const double &x) { return ::asin(x); }
|
|||||||
template<typename T>
|
template<typename T>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
T atan(const T &x) {
|
T atan(const T &x) {
|
||||||
EIGEN_USING_STD_MATH(atan);
|
EIGEN_USING_STD(atan);
|
||||||
return atan(x);
|
return static_cast<T>(atan(x));
|
||||||
}
|
}
|
||||||
|
|
||||||
#if EIGEN_HAS_CXX11_MATH
|
#if EIGEN_HAS_CXX11_MATH
|
||||||
template<typename T>
|
template<typename T>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
T atanh(const T &x) {
|
T atanh(const T &x) {
|
||||||
EIGEN_USING_STD_MATH(atanh);
|
EIGEN_USING_STD(atanh);
|
||||||
return atanh(x);
|
return static_cast<T>(atanh(x));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -1522,8 +1898,8 @@ double atan(const double &x) { return ::atan(x); }
|
|||||||
template<typename T>
|
template<typename T>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
T cosh(const T &x) {
|
T cosh(const T &x) {
|
||||||
EIGEN_USING_STD_MATH(cosh);
|
EIGEN_USING_STD(cosh);
|
||||||
return cosh(x);
|
return static_cast<T>(cosh(x));
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(SYCL_DEVICE_ONLY)
|
#if defined(SYCL_DEVICE_ONLY)
|
||||||
@@ -1541,8 +1917,8 @@ double cosh(const double &x) { return ::cosh(x); }
|
|||||||
template<typename T>
|
template<typename T>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
T sinh(const T &x) {
|
T sinh(const T &x) {
|
||||||
EIGEN_USING_STD_MATH(sinh);
|
EIGEN_USING_STD(sinh);
|
||||||
return sinh(x);
|
return static_cast<T>(sinh(x));
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(SYCL_DEVICE_ONLY)
|
#if defined(SYCL_DEVICE_ONLY)
|
||||||
@@ -1560,7 +1936,7 @@ double sinh(const double &x) { return ::sinh(x); }
|
|||||||
template<typename T>
|
template<typename T>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
T tanh(const T &x) {
|
T tanh(const T &x) {
|
||||||
EIGEN_USING_STD_MATH(tanh);
|
EIGEN_USING_STD(tanh);
|
||||||
return tanh(x);
|
return tanh(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1584,7 +1960,7 @@ double tanh(const double &x) { return ::tanh(x); }
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
T fmod(const T& a, const T& b) {
|
T fmod(const T& a, const T& b) {
|
||||||
EIGEN_USING_STD_MATH(fmod);
|
EIGEN_USING_STD(fmod);
|
||||||
return fmod(a, b);
|
return fmod(a, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1746,6 +2122,11 @@ template<> struct random_impl<bool>
|
|||||||
{
|
{
|
||||||
return random<int>(0,1)==0 ? false : true;
|
return random<int>(0,1)==0 ? false : true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool run(const bool& a, const bool& b)
|
||||||
|
{
|
||||||
|
return random<int>(a, b)==0 ? false : true;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> struct scalar_fuzzy_impl<bool>
|
template<> struct scalar_fuzzy_impl<bool>
|
||||||
@@ -1772,6 +2153,57 @@ template<> struct scalar_fuzzy_impl<bool>
|
|||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
} // end namespace internal
|
||||||
|
|
||||||
|
// Default implementations that rely on other numext implementations
|
||||||
|
namespace internal {
|
||||||
|
|
||||||
|
// Specialization for complex types that are not supported by std::expm1.
|
||||||
|
template <typename RealScalar>
|
||||||
|
struct expm1_impl<std::complex<RealScalar> > {
|
||||||
|
EIGEN_DEVICE_FUNC static inline std::complex<RealScalar> run(
|
||||||
|
const std::complex<RealScalar>& x) {
|
||||||
|
EIGEN_STATIC_ASSERT_NON_INTEGER(RealScalar)
|
||||||
|
RealScalar xr = x.real();
|
||||||
|
RealScalar xi = x.imag();
|
||||||
|
// expm1(z) = exp(z) - 1
|
||||||
|
// = exp(x + i * y) - 1
|
||||||
|
// = exp(x) * (cos(y) + i * sin(y)) - 1
|
||||||
|
// = exp(x) * cos(y) - 1 + i * exp(x) * sin(y)
|
||||||
|
// Imag(expm1(z)) = exp(x) * sin(y)
|
||||||
|
// Real(expm1(z)) = exp(x) * cos(y) - 1
|
||||||
|
// = exp(x) * cos(y) - 1.
|
||||||
|
// = expm1(x) + exp(x) * (cos(y) - 1)
|
||||||
|
// = expm1(x) + exp(x) * (2 * sin(y / 2) ** 2)
|
||||||
|
RealScalar erm1 = numext::expm1<RealScalar>(xr);
|
||||||
|
RealScalar er = erm1 + RealScalar(1.);
|
||||||
|
RealScalar sin2 = numext::sin(xi / RealScalar(2.));
|
||||||
|
sin2 = sin2 * sin2;
|
||||||
|
RealScalar s = numext::sin(xi);
|
||||||
|
RealScalar real_part = erm1 - RealScalar(2.) * er * sin2;
|
||||||
|
return std::complex<RealScalar>(real_part, er * s);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
struct rsqrt_impl {
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
static EIGEN_ALWAYS_INLINE T run(const T& x) {
|
||||||
|
return T(1)/numext::sqrt(x);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#if defined(EIGEN_GPU_COMPILE_PHASE)
|
||||||
|
template<typename T>
|
||||||
|
struct conj_impl<std::complex<T>, true>
|
||||||
|
{
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
static inline std::complex<T> run(const std::complex<T>& x)
|
||||||
|
{
|
||||||
|
return std::complex<T>(numext::real(x), -numext::imag(x));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
|
|||||||
@@ -17,19 +17,28 @@ namespace internal {
|
|||||||
|
|
||||||
/** \internal \returns the hyperbolic tan of \a a (coeff-wise)
|
/** \internal \returns the hyperbolic tan of \a a (coeff-wise)
|
||||||
Doesn't do anything fancy, just a 13/6-degree rational interpolant which
|
Doesn't do anything fancy, just a 13/6-degree rational interpolant which
|
||||||
is accurate up to a couple of ulp in the range [-9, 9], outside of which
|
is accurate up to a couple of ulps in the (approximate) range [-8, 8],
|
||||||
the tanh(x) = +/-1.
|
outside of which tanh(x) = +/-1 in single precision. The input is clamped
|
||||||
|
to the range [-c, c]. The value c is chosen as the smallest value where
|
||||||
|
the approximation evaluates to exactly 1. In the reange [-0.0004, 0.0004]
|
||||||
|
the approxmation tanh(x) ~= x is used for better accuracy as x tends to zero.
|
||||||
|
|
||||||
This implementation works on both scalars and packets.
|
This implementation works on both scalars and packets.
|
||||||
*/
|
*/
|
||||||
template<typename T>
|
template<typename T>
|
||||||
T generic_fast_tanh_float(const T& a_x)
|
T generic_fast_tanh_float(const T& a_x)
|
||||||
{
|
{
|
||||||
// Clamp the inputs to the range [-9, 9] since anything outside
|
// Clamp the inputs to the range [-c, c]
|
||||||
// this range is +/-1.0f in single-precision.
|
#ifdef EIGEN_VECTORIZE_FMA
|
||||||
const T plus_9 = pset1<T>(9.f);
|
const T plus_clamp = pset1<T>(7.99881172180175781f);
|
||||||
const T minus_9 = pset1<T>(-9.f);
|
const T minus_clamp = pset1<T>(-7.99881172180175781f);
|
||||||
const T x = pmax(pmin(a_x, plus_9), minus_9);
|
#else
|
||||||
|
const T plus_clamp = pset1<T>(7.90531110763549805f);
|
||||||
|
const T minus_clamp = pset1<T>(-7.90531110763549805f);
|
||||||
|
#endif
|
||||||
|
const T tiny = pset1<T>(0.0004f);
|
||||||
|
const T x = pmax(pmin(a_x, plus_clamp), minus_clamp);
|
||||||
|
const T tiny_mask = pcmp_lt(pabs(a_x), tiny);
|
||||||
// The monomial coefficients of the numerator polynomial (odd).
|
// The monomial coefficients of the numerator polynomial (odd).
|
||||||
const T alpha_1 = pset1<T>(4.89352455891786e-03f);
|
const T alpha_1 = pset1<T>(4.89352455891786e-03f);
|
||||||
const T alpha_3 = pset1<T>(6.37261928875436e-04f);
|
const T alpha_3 = pset1<T>(6.37261928875436e-04f);
|
||||||
@@ -57,20 +66,26 @@ T generic_fast_tanh_float(const T& a_x)
|
|||||||
p = pmadd(x2, p, alpha_1);
|
p = pmadd(x2, p, alpha_1);
|
||||||
p = pmul(x, p);
|
p = pmul(x, p);
|
||||||
|
|
||||||
// Evaluate the denominator polynomial p.
|
// Evaluate the denominator polynomial q.
|
||||||
T q = pmadd(x2, beta_6, beta_4);
|
T q = pmadd(x2, beta_6, beta_4);
|
||||||
q = pmadd(x2, q, beta_2);
|
q = pmadd(x2, q, beta_2);
|
||||||
q = pmadd(x2, q, beta_0);
|
q = pmadd(x2, q, beta_0);
|
||||||
|
|
||||||
// Divide the numerator by the denominator.
|
// Divide the numerator by the denominator.
|
||||||
return pdiv(p, q);
|
return pselect(tiny_mask, x, pdiv(p, q));
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename RealScalar>
|
template<typename RealScalar>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
RealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y)
|
RealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y)
|
||||||
{
|
{
|
||||||
EIGEN_USING_STD_MATH(sqrt);
|
// IEEE IEC 6059 special cases.
|
||||||
|
if ((numext::isinf)(x) || (numext::isinf)(y))
|
||||||
|
return NumTraits<RealScalar>::infinity();
|
||||||
|
if ((numext::isnan)(x) || (numext::isnan)(y))
|
||||||
|
return NumTraits<RealScalar>::quiet_NaN();
|
||||||
|
|
||||||
|
EIGEN_USING_STD(sqrt);
|
||||||
RealScalar p, qp;
|
RealScalar p, qp;
|
||||||
p = numext::maxi(x,y);
|
p = numext::maxi(x,y);
|
||||||
if(p==RealScalar(0)) return RealScalar(0);
|
if(p==RealScalar(0)) return RealScalar(0);
|
||||||
@@ -85,11 +100,99 @@ struct hypot_impl
|
|||||||
static EIGEN_DEVICE_FUNC
|
static EIGEN_DEVICE_FUNC
|
||||||
inline RealScalar run(const Scalar& x, const Scalar& y)
|
inline RealScalar run(const Scalar& x, const Scalar& y)
|
||||||
{
|
{
|
||||||
EIGEN_USING_STD_MATH(abs);
|
EIGEN_USING_STD(abs);
|
||||||
return positive_real_hypot<RealScalar>(abs(x), abs(y));
|
return positive_real_hypot<RealScalar>(abs(x), abs(y));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Generic complex sqrt implementation that correctly handles corner cases
|
||||||
|
// according to https://en.cppreference.com/w/cpp/numeric/complex/sqrt
|
||||||
|
template<typename T>
|
||||||
|
EIGEN_DEVICE_FUNC std::complex<T> complex_sqrt(const std::complex<T>& z) {
|
||||||
|
// Computes the principal sqrt of the input.
|
||||||
|
//
|
||||||
|
// For a complex square root of the number x + i*y. We want to find real
|
||||||
|
// numbers u and v such that
|
||||||
|
// (u + i*v)^2 = x + i*y <=>
|
||||||
|
// u^2 - v^2 + i*2*u*v = x + i*v.
|
||||||
|
// By equating the real and imaginary parts we get:
|
||||||
|
// u^2 - v^2 = x
|
||||||
|
// 2*u*v = y.
|
||||||
|
//
|
||||||
|
// For x >= 0, this has the numerically stable solution
|
||||||
|
// u = sqrt(0.5 * (x + sqrt(x^2 + y^2)))
|
||||||
|
// v = y / (2 * u)
|
||||||
|
// and for x < 0,
|
||||||
|
// v = sign(y) * sqrt(0.5 * (-x + sqrt(x^2 + y^2)))
|
||||||
|
// u = y / (2 * v)
|
||||||
|
//
|
||||||
|
// Letting w = sqrt(0.5 * (|x| + |z|)),
|
||||||
|
// if x == 0: u = w, v = sign(y) * w
|
||||||
|
// if x > 0: u = w, v = y / (2 * w)
|
||||||
|
// if x < 0: u = |y| / (2 * w), v = sign(y) * w
|
||||||
|
|
||||||
|
const T x = numext::real(z);
|
||||||
|
const T y = numext::imag(z);
|
||||||
|
const T zero = T(0);
|
||||||
|
const T w = numext::sqrt(T(0.5) * (numext::abs(x) + numext::hypot(x, y)));
|
||||||
|
|
||||||
|
return
|
||||||
|
(numext::isinf)(y) ? std::complex<T>(NumTraits<T>::infinity(), y)
|
||||||
|
: x == zero ? std::complex<T>(w, y < zero ? -w : w)
|
||||||
|
: x > zero ? std::complex<T>(w, y / (2 * w))
|
||||||
|
: std::complex<T>(numext::abs(y) / (2 * w), y < zero ? -w : w );
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generic complex rsqrt implementation.
|
||||||
|
template<typename T>
|
||||||
|
EIGEN_DEVICE_FUNC std::complex<T> complex_rsqrt(const std::complex<T>& z) {
|
||||||
|
// Computes the principal reciprocal sqrt of the input.
|
||||||
|
//
|
||||||
|
// For a complex reciprocal square root of the number z = x + i*y. We want to
|
||||||
|
// find real numbers u and v such that
|
||||||
|
// (u + i*v)^2 = 1 / (x + i*y) <=>
|
||||||
|
// u^2 - v^2 + i*2*u*v = x/|z|^2 - i*v/|z|^2.
|
||||||
|
// By equating the real and imaginary parts we get:
|
||||||
|
// u^2 - v^2 = x/|z|^2
|
||||||
|
// 2*u*v = y/|z|^2.
|
||||||
|
//
|
||||||
|
// For x >= 0, this has the numerically stable solution
|
||||||
|
// u = sqrt(0.5 * (x + |z|)) / |z|
|
||||||
|
// v = -y / (2 * u * |z|)
|
||||||
|
// and for x < 0,
|
||||||
|
// v = -sign(y) * sqrt(0.5 * (-x + |z|)) / |z|
|
||||||
|
// u = -y / (2 * v * |z|)
|
||||||
|
//
|
||||||
|
// Letting w = sqrt(0.5 * (|x| + |z|)),
|
||||||
|
// if x == 0: u = w / |z|, v = -sign(y) * w / |z|
|
||||||
|
// if x > 0: u = w / |z|, v = -y / (2 * w * |z|)
|
||||||
|
// if x < 0: u = |y| / (2 * w * |z|), v = -sign(y) * w / |z|
|
||||||
|
|
||||||
|
const T x = numext::real(z);
|
||||||
|
const T y = numext::imag(z);
|
||||||
|
const T zero = T(0);
|
||||||
|
|
||||||
|
const T abs_z = numext::hypot(x, y);
|
||||||
|
const T w = numext::sqrt(T(0.5) * (numext::abs(x) + abs_z));
|
||||||
|
const T woz = w / abs_z;
|
||||||
|
// Corner cases consistent with 1/sqrt(z) on gcc/clang.
|
||||||
|
return
|
||||||
|
abs_z == zero ? std::complex<T>(NumTraits<T>::infinity(), NumTraits<T>::quiet_NaN())
|
||||||
|
: ((numext::isinf)(x) || (numext::isinf)(y)) ? std::complex<T>(zero, zero)
|
||||||
|
: x == zero ? std::complex<T>(woz, y < zero ? woz : -woz)
|
||||||
|
: x > zero ? std::complex<T>(woz, -y / (2 * w * abs_z))
|
||||||
|
: std::complex<T>(numext::abs(y) / (2 * w * abs_z), y < zero ? woz : -woz );
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
EIGEN_DEVICE_FUNC std::complex<T> complex_log(const std::complex<T>& z) {
|
||||||
|
// Computes complex log.
|
||||||
|
T a = numext::abs(z);
|
||||||
|
EIGEN_USING_STD(atan2);
|
||||||
|
T b = atan2(z.imag(), z.real());
|
||||||
|
return std::complex<T>(numext::log(a), b);
|
||||||
|
}
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
|||||||
@@ -225,8 +225,6 @@ class Matrix
|
|||||||
return Base::_set(other);
|
return Base::_set(other);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Here, doxygen failed to copy the brief information when using \copydoc */
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Copies the generic expression \a other into *this.
|
* \brief Copies the generic expression \a other into *this.
|
||||||
* \copydetails DenseBase::operator=(const EigenBase<OtherDerived> &other)
|
* \copydetails DenseBase::operator=(const EigenBase<OtherDerived> &other)
|
||||||
@@ -278,13 +276,21 @@ class Matrix
|
|||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
Matrix& operator=(Matrix&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable<Scalar>::value)
|
Matrix& operator=(Matrix&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable<Scalar>::value)
|
||||||
{
|
{
|
||||||
other.swap(*this);
|
Base::operator=(std::move(other));
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if EIGEN_HAS_CXX11
|
#if EIGEN_HAS_CXX11
|
||||||
/** \copydoc PlainObjectBase(const Scalar&, const Scalar&, const Scalar&, const Scalar&, const ArgTypes&... args)
|
/** \brief Construct a row of column vector with fixed size from an arbitrary number of coefficients. \cpp11
|
||||||
|
*
|
||||||
|
* \only_for_vectors
|
||||||
|
*
|
||||||
|
* This constructor is for 1D array or vectors with more than 4 coefficients.
|
||||||
|
* There exists C++98 analogue constructors for fixed-size array/vector having 1, 2, 3, or 4 coefficients.
|
||||||
|
*
|
||||||
|
* \warning To construct a column (resp. row) vector of fixed length, the number of values passed to this
|
||||||
|
* constructor must match the the fixed number of rows (resp. columns) of \c *this.
|
||||||
*
|
*
|
||||||
* Example: \include Matrix_variadic_ctor_cxx11.cpp
|
* Example: \include Matrix_variadic_ctor_cxx11.cpp
|
||||||
* Output: \verbinclude Matrix_variadic_ctor_cxx11.out
|
* Output: \verbinclude Matrix_variadic_ctor_cxx11.out
|
||||||
@@ -297,6 +303,8 @@ class Matrix
|
|||||||
: Base(a0, a1, a2, a3, args...) {}
|
: Base(a0, a1, a2, a3, args...) {}
|
||||||
|
|
||||||
/** \brief Constructs a Matrix and initializes it from the coefficients given as initializer-lists grouped by row. \cpp11
|
/** \brief Constructs a Matrix and initializes it from the coefficients given as initializer-lists grouped by row. \cpp11
|
||||||
|
*
|
||||||
|
* \anchor matrix_constructor_initializer_list
|
||||||
*
|
*
|
||||||
* In the general case, the constructor takes a list of rows, each row being represented as a list of coefficients:
|
* In the general case, the constructor takes a list of rows, each row being represented as a list of coefficients:
|
||||||
*
|
*
|
||||||
@@ -423,8 +431,10 @@ class Matrix
|
|||||||
: Base(other.derived())
|
: Base(other.derived())
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; }
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); }
|
inline Index innerStride() const EIGEN_NOEXCEPT { return 1; }
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
|
inline Index outerStride() const EIGEN_NOEXCEPT { return this->innerSize(); }
|
||||||
|
|
||||||
/////////// Geometry module ///////////
|
/////////// Geometry module ///////////
|
||||||
|
|
||||||
@@ -478,16 +488,21 @@ class Matrix
|
|||||||
|
|
||||||
#define EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, Size, SizeSuffix) \
|
#define EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, Size, SizeSuffix) \
|
||||||
/** \ingroup matrixtypedefs */ \
|
/** \ingroup matrixtypedefs */ \
|
||||||
|
/** \brief \noop */ \
|
||||||
typedef Matrix<Type, Size, Size> Matrix##SizeSuffix##TypeSuffix; \
|
typedef Matrix<Type, Size, Size> Matrix##SizeSuffix##TypeSuffix; \
|
||||||
/** \ingroup matrixtypedefs */ \
|
/** \ingroup matrixtypedefs */ \
|
||||||
|
/** \brief \noop */ \
|
||||||
typedef Matrix<Type, Size, 1> Vector##SizeSuffix##TypeSuffix; \
|
typedef Matrix<Type, Size, 1> Vector##SizeSuffix##TypeSuffix; \
|
||||||
/** \ingroup matrixtypedefs */ \
|
/** \ingroup matrixtypedefs */ \
|
||||||
|
/** \brief \noop */ \
|
||||||
typedef Matrix<Type, 1, Size> RowVector##SizeSuffix##TypeSuffix;
|
typedef Matrix<Type, 1, Size> RowVector##SizeSuffix##TypeSuffix;
|
||||||
|
|
||||||
#define EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, Size) \
|
#define EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, Size) \
|
||||||
/** \ingroup matrixtypedefs */ \
|
/** \ingroup matrixtypedefs */ \
|
||||||
|
/** \brief \noop */ \
|
||||||
typedef Matrix<Type, Size, Dynamic> Matrix##Size##X##TypeSuffix; \
|
typedef Matrix<Type, Size, Dynamic> Matrix##Size##X##TypeSuffix; \
|
||||||
/** \ingroup matrixtypedefs */ \
|
/** \ingroup matrixtypedefs */ \
|
||||||
|
/** \brief \noop */ \
|
||||||
typedef Matrix<Type, Dynamic, Size> Matrix##X##Size##TypeSuffix;
|
typedef Matrix<Type, Dynamic, Size> Matrix##X##Size##TypeSuffix;
|
||||||
|
|
||||||
#define EIGEN_MAKE_TYPEDEFS_ALL_SIZES(Type, TypeSuffix) \
|
#define EIGEN_MAKE_TYPEDEFS_ALL_SIZES(Type, TypeSuffix) \
|
||||||
|
|||||||
@@ -206,28 +206,22 @@ template<typename Derived> class MatrixBase
|
|||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
DiagonalReturnType diagonal();
|
DiagonalReturnType diagonal();
|
||||||
|
|
||||||
typedef typename internal::add_const<Diagonal<const Derived> >::type ConstDiagonalReturnType;
|
typedef Diagonal<const Derived> ConstDiagonalReturnType;
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
ConstDiagonalReturnType diagonal() const;
|
const ConstDiagonalReturnType diagonal() const;
|
||||||
|
|
||||||
template<int Index> struct DiagonalIndexReturnType { typedef Diagonal<Derived,Index> Type; };
|
|
||||||
template<int Index> struct ConstDiagonalIndexReturnType { typedef const Diagonal<const Derived,Index> Type; };
|
|
||||||
|
|
||||||
template<int Index>
|
template<int Index>
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
typename DiagonalIndexReturnType<Index>::Type diagonal();
|
Diagonal<Derived, Index> diagonal();
|
||||||
|
|
||||||
template<int Index>
|
template<int Index>
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
typename ConstDiagonalIndexReturnType<Index>::Type diagonal() const;
|
const Diagonal<const Derived, Index> diagonal() const;
|
||||||
|
|
||||||
typedef Diagonal<Derived,DynamicIndex> DiagonalDynamicIndexReturnType;
|
|
||||||
typedef typename internal::add_const<Diagonal<const Derived,DynamicIndex> >::type ConstDiagonalDynamicIndexReturnType;
|
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
DiagonalDynamicIndexReturnType diagonal(Index index);
|
Diagonal<Derived, DynamicIndex> diagonal(Index index);
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
ConstDiagonalDynamicIndexReturnType diagonal(Index index) const;
|
const Diagonal<const Derived, DynamicIndex> diagonal(Index index) const;
|
||||||
|
|
||||||
template<unsigned int Mode> struct TriangularViewReturnType { typedef TriangularView<Derived, Mode> Type; };
|
template<unsigned int Mode> struct TriangularViewReturnType { typedef TriangularView<Derived, Mode> Type; };
|
||||||
template<unsigned int Mode> struct ConstTriangularViewReturnType { typedef const TriangularView<const Derived, Mode> Type; };
|
template<unsigned int Mode> struct ConstTriangularViewReturnType { typedef const TriangularView<const Derived, Mode> Type; };
|
||||||
@@ -481,7 +475,8 @@ template<typename Derived> class MatrixBase
|
|||||||
EIGEN_MATRIX_FUNCTION_1(MatrixComplexPowerReturnValue, pow, power to \c p, const std::complex<RealScalar>& p)
|
EIGEN_MATRIX_FUNCTION_1(MatrixComplexPowerReturnValue, pow, power to \c p, const std::complex<RealScalar>& p)
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
EIGEN_DEVICE_FUNC MatrixBase() : Base() {}
|
EIGEN_DEFAULT_COPY_CONSTRUCTOR(MatrixBase)
|
||||||
|
EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(MatrixBase)
|
||||||
|
|
||||||
private:
|
private:
|
||||||
EIGEN_DEVICE_FUNC explicit MatrixBase(int);
|
EIGEN_DEVICE_FUNC explicit MatrixBase(int);
|
||||||
|
|||||||
@@ -45,8 +45,8 @@ template<typename ExpressionType> class NestByValue
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC explicit inline NestByValue(const ExpressionType& matrix) : m_expression(matrix) {}
|
EIGEN_DEVICE_FUNC explicit inline NestByValue(const ExpressionType& matrix) : m_expression(matrix) {}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC inline Index rows() const { return m_expression.rows(); }
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index rows() const EIGEN_NOEXCEPT { return m_expression.rows(); }
|
||||||
EIGEN_DEVICE_FUNC inline Index cols() const { return m_expression.cols(); }
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index cols() const EIGEN_NOEXCEPT { return m_expression.cols(); }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC operator const ExpressionType&() const { return m_expression; }
|
EIGEN_DEVICE_FUNC operator const ExpressionType&() const { return m_expression; }
|
||||||
|
|
||||||
|
|||||||
@@ -21,14 +21,14 @@ template< typename T,
|
|||||||
bool is_integer = NumTraits<T>::IsInteger>
|
bool is_integer = NumTraits<T>::IsInteger>
|
||||||
struct default_digits10_impl
|
struct default_digits10_impl
|
||||||
{
|
{
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
static int run() { return std::numeric_limits<T>::digits10; }
|
static int run() { return std::numeric_limits<T>::digits10; }
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
struct default_digits10_impl<T,false,false> // Floating point
|
struct default_digits10_impl<T,false,false> // Floating point
|
||||||
{
|
{
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
static int run() {
|
static int run() {
|
||||||
using std::log10;
|
using std::log10;
|
||||||
using std::ceil;
|
using std::ceil;
|
||||||
@@ -40,7 +40,7 @@ struct default_digits10_impl<T,false,false> // Floating point
|
|||||||
template<typename T>
|
template<typename T>
|
||||||
struct default_digits10_impl<T,false,true> // Integer
|
struct default_digits10_impl<T,false,true> // Integer
|
||||||
{
|
{
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
static int run() { return 0; }
|
static int run() { return 0; }
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -52,14 +52,14 @@ template< typename T,
|
|||||||
bool is_integer = NumTraits<T>::IsInteger>
|
bool is_integer = NumTraits<T>::IsInteger>
|
||||||
struct default_digits_impl
|
struct default_digits_impl
|
||||||
{
|
{
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
static int run() { return std::numeric_limits<T>::digits; }
|
static int run() { return std::numeric_limits<T>::digits; }
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
struct default_digits_impl<T,false,false> // Floating point
|
struct default_digits_impl<T,false,false> // Floating point
|
||||||
{
|
{
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
static int run() {
|
static int run() {
|
||||||
using std::log;
|
using std::log;
|
||||||
using std::ceil;
|
using std::ceil;
|
||||||
@@ -71,12 +71,34 @@ struct default_digits_impl<T,false,false> // Floating point
|
|||||||
template<typename T>
|
template<typename T>
|
||||||
struct default_digits_impl<T,false,true> // Integer
|
struct default_digits_impl<T,false,true> // Integer
|
||||||
{
|
{
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
static int run() { return 0; }
|
static int run() { return 0; }
|
||||||
};
|
};
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
|
namespace numext {
|
||||||
|
/** \internal bit-wise cast without changing the underlying bit representation. */
|
||||||
|
|
||||||
|
// TODO: Replace by std::bit_cast (available in C++20)
|
||||||
|
template <typename Tgt, typename Src>
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Tgt bit_cast(const Src& src) {
|
||||||
|
#if EIGEN_HAS_TYPE_TRAITS
|
||||||
|
// The behaviour of memcpy is not specified for non-trivially copyable types
|
||||||
|
EIGEN_STATIC_ASSERT(std::is_trivially_copyable<Src>::value, THIS_TYPE_IS_NOT_SUPPORTED);
|
||||||
|
EIGEN_STATIC_ASSERT(std::is_trivially_copyable<Tgt>::value && std::is_default_constructible<Tgt>::value,
|
||||||
|
THIS_TYPE_IS_NOT_SUPPORTED);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
EIGEN_STATIC_ASSERT(sizeof(Src) == sizeof(Tgt), THIS_TYPE_IS_NOT_SUPPORTED);
|
||||||
|
Tgt tgt;
|
||||||
|
EIGEN_USING_STD(memcpy)
|
||||||
|
memcpy(&tgt, &src, sizeof(Tgt));
|
||||||
|
return tgt;
|
||||||
|
}
|
||||||
|
} // namespace numext
|
||||||
|
|
||||||
|
// clang-format off
|
||||||
/** \class NumTraits
|
/** \class NumTraits
|
||||||
* \ingroup Core_Module
|
* \ingroup Core_Module
|
||||||
*
|
*
|
||||||
@@ -88,36 +110,47 @@ struct default_digits_impl<T,false,true> // Integer
|
|||||||
*
|
*
|
||||||
* The provided data consists of:
|
* The provided data consists of:
|
||||||
* \li A typedef \c Real, giving the "real part" type of \a T. If \a T is already real,
|
* \li A typedef \c Real, giving the "real part" type of \a T. If \a T is already real,
|
||||||
* then \c Real is just a typedef to \a T. If \a T is \c std::complex<U> then \c Real
|
* then \c Real is just a typedef to \a T. If \a T is `std::complex<U>` then \c Real
|
||||||
* is a typedef to \a U.
|
* is a typedef to \a U.
|
||||||
* \li A typedef \c NonInteger, giving the type that should be used for operations producing non-integral values,
|
* \li A typedef \c NonInteger, giving the type that should be used for operations producing non-integral values,
|
||||||
* such as quotients, square roots, etc. If \a T is a floating-point type, then this typedef just gives
|
* such as quotients, square roots, etc. If \a T is a floating-point type, then this typedef just gives
|
||||||
* \a T again. Note however that many Eigen functions such as internal::sqrt simply refuse to
|
* \a T again. Note however that many Eigen functions such as `internal::sqrt` simply refuse to
|
||||||
* take integers. Outside of a few cases, Eigen doesn't do automatic type promotion. Thus, this typedef is
|
* take integers. Outside of a few cases, Eigen doesn't do automatic type promotion. Thus, this typedef is
|
||||||
* only intended as a helper for code that needs to explicitly promote types.
|
* only intended as a helper for code that needs to explicitly promote types.
|
||||||
* \li A typedef \c Literal giving the type to use for numeric literals such as "2" or "0.5". For instance, for \c std::complex<U>, Literal is defined as \c U.
|
* \li A typedef \c Literal giving the type to use for numeric literals such as "2" or "0.5". For instance, for `std::complex<U>`,
|
||||||
|
* Literal is defined as \c U.
|
||||||
* Of course, this type must be fully compatible with \a T. In doubt, just use \a T here.
|
* Of course, this type must be fully compatible with \a T. In doubt, just use \a T here.
|
||||||
* \li A typedef \a Nested giving the type to use to nest a value inside of the expression tree. If you don't know what
|
* \li A typedef \c Nested giving the type to use to nest a value inside of the expression tree. If you don't know what
|
||||||
* this means, just use \a T here.
|
* this means, just use \a T here.
|
||||||
* \li An enum value \a IsComplex. It is equal to 1 if \a T is a \c std::complex
|
* \li An enum value \c IsComplex. It is equal to 1 if \a T is a \c std::complex
|
||||||
* type, and to 0 otherwise.
|
* type, and to 0 otherwise.
|
||||||
* \li An enum value \a IsInteger. It is equal to \c 1 if \a T is an integer type such as \c int,
|
* \li An enum value \c IsInteger. It is equal to \c 1 if \a T is an integer type such as \c int,
|
||||||
* and to \c 0 otherwise.
|
* and to \c 0 otherwise.
|
||||||
* \li Enum values ReadCost, AddCost and MulCost representing a rough estimate of the number of CPU cycles needed
|
* \li Enum values \c ReadCost, \c AddCost and \c MulCost representing a rough estimate of the number of CPU cycles needed
|
||||||
* to by move / add / mul instructions respectively, assuming the data is already stored in CPU registers.
|
* to by move / add / mul instructions respectively, assuming the data is already stored in CPU registers.
|
||||||
* Stay vague here. No need to do architecture-specific stuff. If you don't know what this means, just use \c Eigen::HugeCost.
|
* Stay vague here. No need to do architecture-specific stuff. If you don't know what this means, just use \c Eigen::HugeCost.
|
||||||
* \li An enum value \a IsSigned. It is equal to \c 1 if \a T is a signed type and to 0 if \a T is unsigned.
|
* \li An enum value \c IsSigned. It is equal to \c 1 if \a T is a signed type and to 0 if \a T is unsigned.
|
||||||
* \li An enum value \a RequireInitialization. It is equal to \c 1 if the constructor of the numeric type \a T must
|
* \li An enum value \c RequireInitialization. It is equal to \c 1 if the constructor of the numeric type \a T must
|
||||||
* be called, and to 0 if it is safe not to call it. Default is 0 if \a T is an arithmetic type, and 1 otherwise.
|
* be called, and to 0 if it is safe not to call it. Default is 0 if \a T is an arithmetic type, and 1 otherwise.
|
||||||
* \li An epsilon() function which, unlike <a href="http://en.cppreference.com/w/cpp/types/numeric_limits/epsilon">std::numeric_limits::epsilon()</a>,
|
* \li An `epsilon()` function which, unlike <a href="http://en.cppreference.com/w/cpp/types/numeric_limits/epsilon">`std::numeric_limits::epsilon()`</a>,
|
||||||
* it returns a \a Real instead of a \a T.
|
* it returns a \c Real instead of a \a T.
|
||||||
* \li A dummy_precision() function returning a weak epsilon value. It is mainly used as a default
|
* \li A `dummy_precision()` function returning a weak epsilon value. It is mainly used as a default
|
||||||
* value by the fuzzy comparison operators.
|
* value by the fuzzy comparison operators.
|
||||||
* \li highest() and lowest() functions returning the highest and lowest possible values respectively.
|
* \li `highest()` and `lowest()` functions returning the highest and lowest possible values respectively.
|
||||||
* \li digits10() function returning the number of decimal digits that can be represented without change. This is
|
* \li `digits()` function returning the number of radix digits (non-sign digits for integers, mantissa for floating-point). This is
|
||||||
|
* the analogue of <a href="http://en.cppreference.com/w/cpp/types/numeric_limits/digits">std::numeric_limits<T>::digits</a>
|
||||||
|
* which is used as the default implementation if specialized.
|
||||||
|
* \li `digits10()` function returning the number of decimal digits that can be represented without change. This is
|
||||||
* the analogue of <a href="http://en.cppreference.com/w/cpp/types/numeric_limits/digits10">std::numeric_limits<T>::digits10</a>
|
* the analogue of <a href="http://en.cppreference.com/w/cpp/types/numeric_limits/digits10">std::numeric_limits<T>::digits10</a>
|
||||||
* which is used as the default implementation if specialized.
|
* which is used as the default implementation if specialized.
|
||||||
|
* \li `min_exponent()` and `max_exponent()` functions returning the highest and lowest possible values, respectively,
|
||||||
|
* such that the radix raised to the power exponent-1 is a normalized floating-point number. These are equivalent to
|
||||||
|
* <a href="http://en.cppreference.com/w/cpp/types/numeric_limits/min_exponent">`std::numeric_limits<T>::min_exponent`</a>/
|
||||||
|
* <a href="http://en.cppreference.com/w/cpp/types/numeric_limits/max_exponent">`std::numeric_limits<T>::max_exponent`</a>.
|
||||||
|
* \li `infinity()` function returning a representation of positive infinity, if available.
|
||||||
|
* \li `quiet_NaN` function returning a non-signaling "not-a-number", if available.
|
||||||
*/
|
*/
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
template<typename T> struct GenericNumTraits
|
template<typename T> struct GenericNumTraits
|
||||||
{
|
{
|
||||||
@@ -140,49 +173,60 @@ template<typename T> struct GenericNumTraits
|
|||||||
typedef T Nested;
|
typedef T Nested;
|
||||||
typedef T Literal;
|
typedef T Literal;
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
static inline Real epsilon()
|
static inline Real epsilon()
|
||||||
{
|
{
|
||||||
return numext::numeric_limits<T>::epsilon();
|
return numext::numeric_limits<T>::epsilon();
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
static inline int digits10()
|
static inline int digits10()
|
||||||
{
|
{
|
||||||
return internal::default_digits10_impl<T>::run();
|
return internal::default_digits10_impl<T>::run();
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
static inline int digits()
|
static inline int digits()
|
||||||
{
|
{
|
||||||
return internal::default_digits_impl<T>::run();
|
return internal::default_digits_impl<T>::run();
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
|
static inline int min_exponent()
|
||||||
|
{
|
||||||
|
return numext::numeric_limits<T>::min_exponent;
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
|
static inline int max_exponent()
|
||||||
|
{
|
||||||
|
return numext::numeric_limits<T>::max_exponent;
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
static inline Real dummy_precision()
|
static inline Real dummy_precision()
|
||||||
{
|
{
|
||||||
// make sure to override this for floating-point types
|
// make sure to override this for floating-point types
|
||||||
return Real(0);
|
return Real(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
EIGEN_DEVICE_FUNC
|
|
||||||
static inline T highest() {
|
static inline T highest() {
|
||||||
return (numext::numeric_limits<T>::max)();
|
return (numext::numeric_limits<T>::max)();
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
static inline T lowest() {
|
static inline T lowest() {
|
||||||
return IsInteger ? (numext::numeric_limits<T>::min)()
|
return IsInteger ? (numext::numeric_limits<T>::min)()
|
||||||
: static_cast<T>(-(numext::numeric_limits<T>::max)());
|
: static_cast<T>(-(numext::numeric_limits<T>::max)());
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
static inline T infinity() {
|
static inline T infinity() {
|
||||||
return numext::numeric_limits<T>::infinity();
|
return numext::numeric_limits<T>::infinity();
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
static inline T quiet_NaN() {
|
static inline T quiet_NaN() {
|
||||||
return numext::numeric_limits<T>::quiet_NaN();
|
return numext::numeric_limits<T>::quiet_NaN();
|
||||||
}
|
}
|
||||||
@@ -194,21 +238,35 @@ template<typename T> struct NumTraits : GenericNumTraits<T>
|
|||||||
template<> struct NumTraits<float>
|
template<> struct NumTraits<float>
|
||||||
: GenericNumTraits<float>
|
: GenericNumTraits<float>
|
||||||
{
|
{
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
static inline float dummy_precision() { return 1e-5f; }
|
static inline float dummy_precision() { return 1e-5f; }
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> struct NumTraits<double> : GenericNumTraits<double>
|
template<> struct NumTraits<double> : GenericNumTraits<double>
|
||||||
{
|
{
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
static inline double dummy_precision() { return 1e-12; }
|
static inline double dummy_precision() { return 1e-12; }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// GPU devices treat `long double` as `double`.
|
||||||
|
#ifndef EIGEN_GPU_COMPILE_PHASE
|
||||||
template<> struct NumTraits<long double>
|
template<> struct NumTraits<long double>
|
||||||
: GenericNumTraits<long double>
|
: GenericNumTraits<long double>
|
||||||
{
|
{
|
||||||
static inline long double dummy_precision() { return 1e-15l; }
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
|
static inline long double dummy_precision() { return static_cast<long double>(1e-15l); }
|
||||||
|
|
||||||
|
#if defined(EIGEN_ARCH_PPC) && (__LDBL_MANT_DIG__ == 106)
|
||||||
|
// PowerPC double double causes issues with some values
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
|
static inline long double epsilon()
|
||||||
|
{
|
||||||
|
// 2^(-(__LDBL_MANT_DIG__)+1)
|
||||||
|
return static_cast<long double>(2.4651903288156618919116517665087e-32l);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
template<typename _Real> struct NumTraits<std::complex<_Real> >
|
template<typename _Real> struct NumTraits<std::complex<_Real> >
|
||||||
: GenericNumTraits<std::complex<_Real> >
|
: GenericNumTraits<std::complex<_Real> >
|
||||||
@@ -223,11 +281,11 @@ template<typename _Real> struct NumTraits<std::complex<_Real> >
|
|||||||
MulCost = 4 * NumTraits<Real>::MulCost + 2 * NumTraits<Real>::AddCost
|
MulCost = 4 * NumTraits<Real>::MulCost + 2 * NumTraits<Real>::AddCost
|
||||||
};
|
};
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
static inline Real epsilon() { return NumTraits<Real>::epsilon(); }
|
static inline Real epsilon() { return NumTraits<Real>::epsilon(); }
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
static inline Real dummy_precision() { return NumTraits<Real>::dummy_precision(); }
|
static inline Real dummy_precision() { return NumTraits<Real>::dummy_precision(); }
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
static inline int digits10() { return NumTraits<Real>::digits10(); }
|
static inline int digits10() { return NumTraits<Real>::digits10(); }
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -247,16 +305,17 @@ struct NumTraits<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
|
|||||||
IsInteger = NumTraits<Scalar>::IsInteger,
|
IsInteger = NumTraits<Scalar>::IsInteger,
|
||||||
IsSigned = NumTraits<Scalar>::IsSigned,
|
IsSigned = NumTraits<Scalar>::IsSigned,
|
||||||
RequireInitialization = 1,
|
RequireInitialization = 1,
|
||||||
ReadCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::ReadCost,
|
ReadCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * int(NumTraits<Scalar>::ReadCost),
|
||||||
AddCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::AddCost,
|
AddCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * int(NumTraits<Scalar>::AddCost),
|
||||||
MulCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::MulCost
|
MulCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * int(NumTraits<Scalar>::MulCost)
|
||||||
};
|
};
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
static inline RealScalar epsilon() { return NumTraits<RealScalar>::epsilon(); }
|
static inline RealScalar epsilon() { return NumTraits<RealScalar>::epsilon(); }
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
static inline RealScalar dummy_precision() { return NumTraits<RealScalar>::dummy_precision(); }
|
static inline RealScalar dummy_precision() { return NumTraits<RealScalar>::dummy_precision(); }
|
||||||
|
|
||||||
|
EIGEN_CONSTEXPR
|
||||||
static inline int digits10() { return NumTraits<Scalar>::digits10(); }
|
static inline int digits10() { return NumTraits<Scalar>::digits10(); }
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -270,6 +329,7 @@ template<> struct NumTraits<std::string>
|
|||||||
MulCost = HugeCost
|
MulCost = HugeCost
|
||||||
};
|
};
|
||||||
|
|
||||||
|
EIGEN_CONSTEXPR
|
||||||
static inline int digits10() { return 0; }
|
static inline int digits10() { return 0; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@@ -284,6 +344,8 @@ private:
|
|||||||
// Empty specialization for void to allow template specialization based on NumTraits<T>::Real with T==void and SFINAE.
|
// Empty specialization for void to allow template specialization based on NumTraits<T>::Real with T==void and SFINAE.
|
||||||
template<> struct NumTraits<void> {};
|
template<> struct NumTraits<void> {};
|
||||||
|
|
||||||
|
template<> struct NumTraits<bool> : GenericNumTraits<bool> {};
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
|
||||||
#endif // EIGEN_NUMTRAITS_H
|
#endif // EIGEN_NUMTRAITS_H
|
||||||
|
|||||||
@@ -54,12 +54,17 @@ struct packetwise_redux_traits
|
|||||||
/* Value to be returned when size==0 , by default let's return 0 */
|
/* Value to be returned when size==0 , by default let's return 0 */
|
||||||
template<typename PacketType,typename Func>
|
template<typename PacketType,typename Func>
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
PacketType packetwise_redux_empty_value(const Func& ) { return pset1<PacketType>(0); }
|
PacketType packetwise_redux_empty_value(const Func& ) {
|
||||||
|
const typename unpacket_traits<PacketType>::type zero(0);
|
||||||
|
return pset1<PacketType>(zero);
|
||||||
|
}
|
||||||
|
|
||||||
/* For products the default is 1 */
|
/* For products the default is 1 */
|
||||||
template<typename PacketType,typename Scalar>
|
template<typename PacketType,typename Scalar>
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
PacketType packetwise_redux_empty_value(const scalar_product_op<Scalar,Scalar>& ) { return pset1<PacketType>(1); }
|
PacketType packetwise_redux_empty_value(const scalar_product_op<Scalar,Scalar>& ) {
|
||||||
|
return pset1<PacketType>(Scalar(1));
|
||||||
|
}
|
||||||
|
|
||||||
/* Perform the actual reduction */
|
/* Perform the actual reduction */
|
||||||
template<typename Func, typename Evaluator,
|
template<typename Func, typename Evaluator,
|
||||||
@@ -145,7 +150,7 @@ struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
|
|||||||
enum {
|
enum {
|
||||||
CoeffReadCost = TraversalSize==Dynamic ? HugeCost
|
CoeffReadCost = TraversalSize==Dynamic ? HugeCost
|
||||||
: TraversalSize==0 ? 1
|
: TraversalSize==0 ? 1
|
||||||
: TraversalSize * evaluator<ArgType>::CoeffReadCost + int(CostOpType::value),
|
: int(TraversalSize) * int(evaluator<ArgType>::CoeffReadCost) + int(CostOpType::value),
|
||||||
|
|
||||||
_ArgFlags = evaluator<ArgType>::Flags,
|
_ArgFlags = evaluator<ArgType>::Flags,
|
||||||
|
|
||||||
|
|||||||
@@ -13,10 +13,10 @@
|
|||||||
|
|
||||||
#if defined(EIGEN_INITIALIZE_MATRICES_BY_ZERO)
|
#if defined(EIGEN_INITIALIZE_MATRICES_BY_ZERO)
|
||||||
# define EIGEN_INITIALIZE_COEFFS
|
# define EIGEN_INITIALIZE_COEFFS
|
||||||
# define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED for(int i=0;i<base().size();++i) coeffRef(i)=Scalar(0);
|
# define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED for(Index i=0;i<base().size();++i) coeffRef(i)=Scalar(0);
|
||||||
#elif defined(EIGEN_INITIALIZE_MATRICES_BY_NAN)
|
#elif defined(EIGEN_INITIALIZE_MATRICES_BY_NAN)
|
||||||
# define EIGEN_INITIALIZE_COEFFS
|
# define EIGEN_INITIALIZE_COEFFS
|
||||||
# define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED for(int i=0;i<base().size();++i) coeffRef(i)=std::numeric_limits<Scalar>::quiet_NaN();
|
# define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED for(Index i=0;i<base().size();++i) coeffRef(i)=std::numeric_limits<Scalar>::quiet_NaN();
|
||||||
#else
|
#else
|
||||||
# undef EIGEN_INITIALIZE_COEFFS
|
# undef EIGEN_INITIALIZE_COEFFS
|
||||||
# define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
|
# define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
|
||||||
@@ -118,16 +118,8 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
|||||||
using Base::IsVectorAtCompileTime;
|
using Base::IsVectorAtCompileTime;
|
||||||
using Base::Flags;
|
using Base::Flags;
|
||||||
|
|
||||||
template<typename PlainObjectType, int MapOptions, typename StrideType> friend class Eigen::Map;
|
|
||||||
friend class Eigen::Map<Derived, Unaligned>;
|
|
||||||
typedef Eigen::Map<Derived, Unaligned> MapType;
|
typedef Eigen::Map<Derived, Unaligned> MapType;
|
||||||
friend class Eigen::Map<const Derived, Unaligned>;
|
|
||||||
typedef const Eigen::Map<const Derived, Unaligned> ConstMapType;
|
typedef const Eigen::Map<const Derived, Unaligned> ConstMapType;
|
||||||
#if EIGEN_MAX_ALIGN_BYTES>0
|
|
||||||
// for EIGEN_MAX_ALIGN_BYTES==0, AlignedMax==Unaligned, and many compilers generate warnings for friend-ing a class twice.
|
|
||||||
friend class Eigen::Map<Derived, AlignedMax>;
|
|
||||||
friend class Eigen::Map<const Derived, AlignedMax>;
|
|
||||||
#endif
|
|
||||||
typedef Eigen::Map<Derived, AlignedMax> AlignedMapType;
|
typedef Eigen::Map<Derived, AlignedMax> AlignedMapType;
|
||||||
typedef const Eigen::Map<const Derived, AlignedMax> ConstAlignedMapType;
|
typedef const Eigen::Map<const Derived, AlignedMax> ConstAlignedMapType;
|
||||||
template<typename StrideType> struct StridedMapType { typedef Eigen::Map<Derived, Unaligned, StrideType> type; };
|
template<typename StrideType> struct StridedMapType { typedef Eigen::Map<Derived, Unaligned, StrideType> type; };
|
||||||
@@ -147,10 +139,10 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
|||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
const Base& base() const { return *static_cast<const Base*>(this); }
|
const Base& base() const { return *static_cast<const Base*>(this); }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
|
||||||
EIGEN_STRONG_INLINE Index rows() const { return m_storage.rows(); }
|
Index rows() const EIGEN_NOEXCEPT { return m_storage.rows(); }
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
|
||||||
EIGEN_STRONG_INLINE Index cols() const { return m_storage.cols(); }
|
Index cols() const EIGEN_NOEXCEPT { return m_storage.cols(); }
|
||||||
|
|
||||||
/** This is an overloaded version of DenseCoeffsBase<Derived,ReadOnlyAccessors>::coeff(Index,Index) const
|
/** This is an overloaded version of DenseCoeffsBase<Derived,ReadOnlyAccessors>::coeff(Index,Index) const
|
||||||
* provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts.
|
* provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts.
|
||||||
@@ -508,8 +500,8 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
|||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
PlainObjectBase& operator=(PlainObjectBase&& other) EIGEN_NOEXCEPT
|
PlainObjectBase& operator=(PlainObjectBase&& other) EIGEN_NOEXCEPT
|
||||||
{
|
{
|
||||||
using std::swap;
|
_check_template_params();
|
||||||
swap(m_storage, other.m_storage);
|
m_storage = std::move(other.m_storage);
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@@ -548,7 +540,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
|||||||
m_storage.data()[1] = a1;
|
m_storage.data()[1] = a1;
|
||||||
m_storage.data()[2] = a2;
|
m_storage.data()[2] = a2;
|
||||||
m_storage.data()[3] = a3;
|
m_storage.data()[3] = a3;
|
||||||
int i = 4;
|
Index i = 4;
|
||||||
auto x = {(m_storage.data()[i++] = args, 0)...};
|
auto x = {(m_storage.data()[i++] = args, 0)...};
|
||||||
static_cast<void>(x);
|
static_cast<void>(x);
|
||||||
}
|
}
|
||||||
@@ -717,18 +709,26 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
|||||||
using Base::setConstant;
|
using Base::setConstant;
|
||||||
EIGEN_DEVICE_FUNC Derived& setConstant(Index size, const Scalar& val);
|
EIGEN_DEVICE_FUNC Derived& setConstant(Index size, const Scalar& val);
|
||||||
EIGEN_DEVICE_FUNC Derived& setConstant(Index rows, Index cols, const Scalar& val);
|
EIGEN_DEVICE_FUNC Derived& setConstant(Index rows, Index cols, const Scalar& val);
|
||||||
|
EIGEN_DEVICE_FUNC Derived& setConstant(NoChange_t, Index cols, const Scalar& val);
|
||||||
|
EIGEN_DEVICE_FUNC Derived& setConstant(Index rows, NoChange_t, const Scalar& val);
|
||||||
|
|
||||||
using Base::setZero;
|
using Base::setZero;
|
||||||
EIGEN_DEVICE_FUNC Derived& setZero(Index size);
|
EIGEN_DEVICE_FUNC Derived& setZero(Index size);
|
||||||
EIGEN_DEVICE_FUNC Derived& setZero(Index rows, Index cols);
|
EIGEN_DEVICE_FUNC Derived& setZero(Index rows, Index cols);
|
||||||
|
EIGEN_DEVICE_FUNC Derived& setZero(NoChange_t, Index cols);
|
||||||
|
EIGEN_DEVICE_FUNC Derived& setZero(Index rows, NoChange_t);
|
||||||
|
|
||||||
using Base::setOnes;
|
using Base::setOnes;
|
||||||
EIGEN_DEVICE_FUNC Derived& setOnes(Index size);
|
EIGEN_DEVICE_FUNC Derived& setOnes(Index size);
|
||||||
EIGEN_DEVICE_FUNC Derived& setOnes(Index rows, Index cols);
|
EIGEN_DEVICE_FUNC Derived& setOnes(Index rows, Index cols);
|
||||||
|
EIGEN_DEVICE_FUNC Derived& setOnes(NoChange_t, Index cols);
|
||||||
|
EIGEN_DEVICE_FUNC Derived& setOnes(Index rows, NoChange_t);
|
||||||
|
|
||||||
using Base::setRandom;
|
using Base::setRandom;
|
||||||
Derived& setRandom(Index size);
|
Derived& setRandom(Index size);
|
||||||
Derived& setRandom(Index rows, Index cols);
|
Derived& setRandom(Index rows, Index cols);
|
||||||
|
Derived& setRandom(NoChange_t, Index cols);
|
||||||
|
Derived& setRandom(Index rows, NoChange_t);
|
||||||
|
|
||||||
#ifdef EIGEN_PLAINOBJECTBASE_PLUGIN
|
#ifdef EIGEN_PLAINOBJECTBASE_PLUGIN
|
||||||
#include EIGEN_PLAINOBJECTBASE_PLUGIN
|
#include EIGEN_PLAINOBJECTBASE_PLUGIN
|
||||||
@@ -967,8 +967,8 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
|||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
static EIGEN_STRONG_INLINE void _check_template_params()
|
static EIGEN_STRONG_INLINE void _check_template_params()
|
||||||
{
|
{
|
||||||
EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, (Options&RowMajor)==RowMajor)
|
EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, (int(Options)&RowMajor)==RowMajor)
|
||||||
&& EIGEN_IMPLIES(MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1, (Options&RowMajor)==0)
|
&& EIGEN_IMPLIES(MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1, (int(Options)&RowMajor)==0)
|
||||||
&& ((RowsAtCompileTime == Dynamic) || (RowsAtCompileTime >= 0))
|
&& ((RowsAtCompileTime == Dynamic) || (RowsAtCompileTime >= 0))
|
||||||
&& ((ColsAtCompileTime == Dynamic) || (ColsAtCompileTime >= 0))
|
&& ((ColsAtCompileTime == Dynamic) || (ColsAtCompileTime >= 0))
|
||||||
&& ((MaxRowsAtCompileTime == Dynamic) || (MaxRowsAtCompileTime >= 0))
|
&& ((MaxRowsAtCompileTime == Dynamic) || (MaxRowsAtCompileTime >= 0))
|
||||||
@@ -980,6 +980,17 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
|||||||
}
|
}
|
||||||
|
|
||||||
enum { IsPlainObjectBase = 1 };
|
enum { IsPlainObjectBase = 1 };
|
||||||
|
#endif
|
||||||
|
public:
|
||||||
|
// These apparently need to be down here for nvcc+icc to prevent duplicate
|
||||||
|
// Map symbol.
|
||||||
|
template<typename PlainObjectType, int MapOptions, typename StrideType> friend class Eigen::Map;
|
||||||
|
friend class Eigen::Map<Derived, Unaligned>;
|
||||||
|
friend class Eigen::Map<const Derived, Unaligned>;
|
||||||
|
#if EIGEN_MAX_ALIGN_BYTES>0
|
||||||
|
// for EIGEN_MAX_ALIGN_BYTES==0, AlignedMax==Unaligned, and many compilers generate warnings for friend-ing a class twice.
|
||||||
|
friend class Eigen::Map<Derived, AlignedMax>;
|
||||||
|
friend class Eigen::Map<const Derived, AlignedMax>;
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -1008,7 +1019,7 @@ struct conservative_resize_like_impl
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
// The storage order does not allow us to use reallocation.
|
// The storage order does not allow us to use reallocation.
|
||||||
typename Derived::PlainObject tmp(rows,cols);
|
Derived tmp(rows,cols);
|
||||||
const Index common_rows = numext::mini(rows, _this.rows());
|
const Index common_rows = numext::mini(rows, _this.rows());
|
||||||
const Index common_cols = numext::mini(cols, _this.cols());
|
const Index common_cols = numext::mini(cols, _this.cols());
|
||||||
tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
|
tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
|
||||||
@@ -1043,7 +1054,7 @@ struct conservative_resize_like_impl
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
// The storage order does not allow us to use reallocation.
|
// The storage order does not allow us to use reallocation.
|
||||||
typename Derived::PlainObject tmp(other);
|
Derived tmp(other);
|
||||||
const Index common_rows = numext::mini(tmp.rows(), _this.rows());
|
const Index common_rows = numext::mini(tmp.rows(), _this.rows());
|
||||||
const Index common_cols = numext::mini(tmp.cols(), _this.cols());
|
const Index common_cols = numext::mini(tmp.cols(), _this.cols());
|
||||||
tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
|
tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
|
||||||
|
|||||||
@@ -98,10 +98,10 @@ class Product : public ProductImpl<_Lhs,_Rhs,Option,
|
|||||||
&& "if you wanted a coeff-wise or a dot product use the respective explicit functions");
|
&& "if you wanted a coeff-wise or a dot product use the respective explicit functions");
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
|
||||||
Index rows() const { return m_lhs.rows(); }
|
Index rows() const EIGEN_NOEXCEPT { return m_lhs.rows(); }
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
|
||||||
Index cols() const { return m_rhs.cols(); }
|
Index cols() const EIGEN_NOEXCEPT { return m_rhs.cols(); }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
const LhsNestedCleaned& lhs() const { return m_lhs; }
|
const LhsNestedCleaned& lhs() const { return m_lhs; }
|
||||||
|
|||||||
@@ -375,6 +375,11 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct>
|
|||||||
template<typename Dest>
|
template<typename Dest>
|
||||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
||||||
{
|
{
|
||||||
|
// Fallback to inner product if both the lhs and rhs is a runtime vector.
|
||||||
|
if (lhs.rows() == 1 && rhs.cols() == 1) {
|
||||||
|
dst.coeffRef(0,0) += alpha * lhs.row(0).conjugate().dot(rhs.col(0));
|
||||||
|
return;
|
||||||
|
}
|
||||||
LhsNested actual_lhs(lhs);
|
LhsNested actual_lhs(lhs);
|
||||||
RhsNested actual_rhs(rhs);
|
RhsNested actual_rhs(rhs);
|
||||||
internal::gemv_dense_selector<Side,
|
internal::gemv_dense_selector<Side,
|
||||||
@@ -436,8 +441,8 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
|
|||||||
};
|
};
|
||||||
// FIXME: in c++11 this should be auto, and extractScalarFactor should also return auto
|
// FIXME: in c++11 this should be auto, and extractScalarFactor should also return auto
|
||||||
// this is important for real*complex_mat
|
// this is important for real*complex_mat
|
||||||
Scalar actualAlpha = blas_traits<Lhs>::extractScalarFactor(lhs)
|
Scalar actualAlpha = combine_scalar_factors<Scalar>(lhs, rhs);
|
||||||
* blas_traits<Rhs>::extractScalarFactor(rhs);
|
|
||||||
eval_dynamic_impl(dst,
|
eval_dynamic_impl(dst,
|
||||||
blas_traits<Lhs>::extract(lhs).template conjugateIf<ConjLhs>(),
|
blas_traits<Lhs>::extract(lhs).template conjugateIf<ConjLhs>(),
|
||||||
blas_traits<Rhs>::extract(rhs).template conjugateIf<ConjRhs>(),
|
blas_traits<Rhs>::extract(rhs).template conjugateIf<ConjRhs>(),
|
||||||
@@ -544,7 +549,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
|||||||
RhsCoeffReadCost = RhsEtorType::CoeffReadCost,
|
RhsCoeffReadCost = RhsEtorType::CoeffReadCost,
|
||||||
CoeffReadCost = InnerSize==0 ? NumTraits<Scalar>::ReadCost
|
CoeffReadCost = InnerSize==0 ? NumTraits<Scalar>::ReadCost
|
||||||
: InnerSize == Dynamic ? HugeCost
|
: InnerSize == Dynamic ? HugeCost
|
||||||
: InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
|
: InnerSize * (NumTraits<Scalar>::MulCost + int(LhsCoeffReadCost) + int(RhsCoeffReadCost))
|
||||||
+ (InnerSize - 1) * NumTraits<Scalar>::AddCost,
|
+ (InnerSize - 1) * NumTraits<Scalar>::AddCost,
|
||||||
|
|
||||||
Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
|
Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
|
||||||
@@ -571,7 +576,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
|||||||
: (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
|
: (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
|
||||||
: (bool(RhsRowMajor) && !CanVectorizeLhs),
|
: (bool(RhsRowMajor) && !CanVectorizeLhs),
|
||||||
|
|
||||||
Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
|
Flags = ((int(LhsFlags) | int(RhsFlags)) & HereditaryBits & ~RowMajorBit)
|
||||||
| (EvalToRowMajor ? RowMajorBit : 0)
|
| (EvalToRowMajor ? RowMajorBit : 0)
|
||||||
// TODO enable vectorization for mixed types
|
// TODO enable vectorization for mixed types
|
||||||
| (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0)
|
| (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0)
|
||||||
@@ -592,8 +597,8 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
|||||||
CanVectorizeInner = SameType
|
CanVectorizeInner = SameType
|
||||||
&& LhsRowMajor
|
&& LhsRowMajor
|
||||||
&& (!RhsRowMajor)
|
&& (!RhsRowMajor)
|
||||||
&& (LhsFlags & RhsFlags & ActualPacketAccessBit)
|
&& (int(LhsFlags) & int(RhsFlags) & ActualPacketAccessBit)
|
||||||
&& (InnerSize % packet_traits<Scalar>::size == 0)
|
&& (int(InnerSize) % packet_traits<Scalar>::size == 0)
|
||||||
};
|
};
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const
|
||||||
@@ -668,7 +673,7 @@ struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, LazyCoeffBasedProduc
|
|||||||
template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||||
struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
|
struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
|
||||||
{
|
{
|
||||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
|
||||||
{
|
{
|
||||||
etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
|
etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
|
||||||
res = pmadd(pset1<Packet>(lhs.coeff(row, Index(UnrollingIndex-1))), rhs.template packet<LoadMode,Packet>(Index(UnrollingIndex-1), col), res);
|
res = pmadd(pset1<Packet>(lhs.coeff(row, Index(UnrollingIndex-1))), rhs.template packet<LoadMode,Packet>(Index(UnrollingIndex-1), col), res);
|
||||||
@@ -678,7 +683,7 @@ struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, Load
|
|||||||
template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||||
struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
|
struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
|
||||||
{
|
{
|
||||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
|
||||||
{
|
{
|
||||||
etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
|
etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
|
||||||
res = pmadd(lhs.template packet<LoadMode,Packet>(row, Index(UnrollingIndex-1)), pset1<Packet>(rhs.coeff(Index(UnrollingIndex-1), col)), res);
|
res = pmadd(lhs.template packet<LoadMode,Packet>(row, Index(UnrollingIndex-1)), pset1<Packet>(rhs.coeff(Index(UnrollingIndex-1), col)), res);
|
||||||
@@ -688,7 +693,7 @@ struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, Load
|
|||||||
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||||
struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode>
|
struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode>
|
||||||
{
|
{
|
||||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
|
||||||
{
|
{
|
||||||
res = pmul(pset1<Packet>(lhs.coeff(row, Index(0))),rhs.template packet<LoadMode,Packet>(Index(0), col));
|
res = pmul(pset1<Packet>(lhs.coeff(row, Index(0))),rhs.template packet<LoadMode,Packet>(Index(0), col));
|
||||||
}
|
}
|
||||||
@@ -697,7 +702,7 @@ struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode>
|
|||||||
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||||
struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode>
|
struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode>
|
||||||
{
|
{
|
||||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
|
||||||
{
|
{
|
||||||
res = pmul(lhs.template packet<LoadMode,Packet>(row, Index(0)), pset1<Packet>(rhs.coeff(Index(0), col)));
|
res = pmul(lhs.template packet<LoadMode,Packet>(row, Index(0)), pset1<Packet>(rhs.coeff(Index(0), col)));
|
||||||
}
|
}
|
||||||
@@ -706,7 +711,7 @@ struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode>
|
|||||||
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||||
struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
||||||
{
|
{
|
||||||
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
|
||||||
{
|
{
|
||||||
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
|
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
|
||||||
}
|
}
|
||||||
@@ -715,7 +720,7 @@ struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
|||||||
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||||
struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
||||||
{
|
{
|
||||||
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
|
||||||
{
|
{
|
||||||
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
|
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
|
||||||
}
|
}
|
||||||
@@ -724,7 +729,7 @@ struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
|||||||
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||||
struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
|
struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
|
||||||
{
|
{
|
||||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
|
||||||
{
|
{
|
||||||
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
|
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
|
||||||
for(Index i = 0; i < innerDim; ++i)
|
for(Index i = 0; i < innerDim; ++i)
|
||||||
@@ -735,7 +740,7 @@ struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
|
|||||||
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||||
struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
|
struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
|
||||||
{
|
{
|
||||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
|
||||||
{
|
{
|
||||||
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
|
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
|
||||||
for(Index i = 0; i < innerDim; ++i)
|
for(Index i = 0; i < innerDim; ++i)
|
||||||
@@ -826,7 +831,7 @@ struct diagonal_product_evaluator_base
|
|||||||
typedef typename ScalarBinaryOpTraits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar;
|
typedef typename ScalarBinaryOpTraits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar;
|
||||||
public:
|
public:
|
||||||
enum {
|
enum {
|
||||||
CoeffReadCost = NumTraits<Scalar>::MulCost + evaluator<MatrixType>::CoeffReadCost + evaluator<DiagonalType>::CoeffReadCost,
|
CoeffReadCost = int(NumTraits<Scalar>::MulCost) + int(evaluator<MatrixType>::CoeffReadCost) + int(evaluator<DiagonalType>::CoeffReadCost),
|
||||||
|
|
||||||
MatrixFlags = evaluator<MatrixType>::Flags,
|
MatrixFlags = evaluator<MatrixType>::Flags,
|
||||||
DiagFlags = evaluator<DiagonalType>::Flags,
|
DiagFlags = evaluator<DiagonalType>::Flags,
|
||||||
@@ -854,7 +859,7 @@ public:
|
|||||||
|| (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::ColsAtCompileTime==1 && ProductOrder==OnTheRight)
|
|| (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::ColsAtCompileTime==1 && ProductOrder==OnTheRight)
|
||||||
};
|
};
|
||||||
|
|
||||||
diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag)
|
EIGEN_DEVICE_FUNC diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag)
|
||||||
: m_diagImpl(diag), m_matImpl(mat)
|
: m_diagImpl(diag), m_matImpl(mat)
|
||||||
{
|
{
|
||||||
EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost);
|
EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost);
|
||||||
@@ -999,7 +1004,7 @@ struct permutation_matrix_product<ExpressionType, Side, Transposed, DenseShape>
|
|||||||
typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;
|
typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;
|
||||||
|
|
||||||
template<typename Dest, typename PermutationType>
|
template<typename Dest, typename PermutationType>
|
||||||
static inline void run(Dest& dst, const PermutationType& perm, const ExpressionType& xpr)
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Dest& dst, const PermutationType& perm, const ExpressionType& xpr)
|
||||||
{
|
{
|
||||||
MatrixType mat(xpr);
|
MatrixType mat(xpr);
|
||||||
const Index n = Side==OnTheLeft ? mat.rows() : mat.cols();
|
const Index n = Side==OnTheLeft ? mat.rows() : mat.cols();
|
||||||
@@ -1053,7 +1058,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|||||||
struct generic_product_impl<Lhs, Rhs, PermutationShape, MatrixShape, ProductTag>
|
struct generic_product_impl<Lhs, Rhs, PermutationShape, MatrixShape, ProductTag>
|
||||||
{
|
{
|
||||||
template<typename Dest>
|
template<typename Dest>
|
||||||
static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
|
||||||
{
|
{
|
||||||
permutation_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);
|
permutation_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);
|
||||||
}
|
}
|
||||||
@@ -1063,7 +1068,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|||||||
struct generic_product_impl<Lhs, Rhs, MatrixShape, PermutationShape, ProductTag>
|
struct generic_product_impl<Lhs, Rhs, MatrixShape, PermutationShape, ProductTag>
|
||||||
{
|
{
|
||||||
template<typename Dest>
|
template<typename Dest>
|
||||||
static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
|
||||||
{
|
{
|
||||||
permutation_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);
|
permutation_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);
|
||||||
}
|
}
|
||||||
@@ -1073,7 +1078,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|||||||
struct generic_product_impl<Inverse<Lhs>, Rhs, PermutationShape, MatrixShape, ProductTag>
|
struct generic_product_impl<Inverse<Lhs>, Rhs, PermutationShape, MatrixShape, ProductTag>
|
||||||
{
|
{
|
||||||
template<typename Dest>
|
template<typename Dest>
|
||||||
static void evalTo(Dest& dst, const Inverse<Lhs>& lhs, const Rhs& rhs)
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Inverse<Lhs>& lhs, const Rhs& rhs)
|
||||||
{
|
{
|
||||||
permutation_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
|
permutation_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
|
||||||
}
|
}
|
||||||
@@ -1083,7 +1088,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|||||||
struct generic_product_impl<Lhs, Inverse<Rhs>, MatrixShape, PermutationShape, ProductTag>
|
struct generic_product_impl<Lhs, Inverse<Rhs>, MatrixShape, PermutationShape, ProductTag>
|
||||||
{
|
{
|
||||||
template<typename Dest>
|
template<typename Dest>
|
||||||
static void evalTo(Dest& dst, const Lhs& lhs, const Inverse<Rhs>& rhs)
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Inverse<Rhs>& rhs)
|
||||||
{
|
{
|
||||||
permutation_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
|
permutation_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
|
||||||
}
|
}
|
||||||
@@ -1107,7 +1112,7 @@ struct transposition_matrix_product
|
|||||||
typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;
|
typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;
|
||||||
|
|
||||||
template<typename Dest, typename TranspositionType>
|
template<typename Dest, typename TranspositionType>
|
||||||
static inline void run(Dest& dst, const TranspositionType& tr, const ExpressionType& xpr)
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Dest& dst, const TranspositionType& tr, const ExpressionType& xpr)
|
||||||
{
|
{
|
||||||
MatrixType mat(xpr);
|
MatrixType mat(xpr);
|
||||||
typedef typename TranspositionType::StorageIndex StorageIndex;
|
typedef typename TranspositionType::StorageIndex StorageIndex;
|
||||||
@@ -1130,7 +1135,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|||||||
struct generic_product_impl<Lhs, Rhs, TranspositionsShape, MatrixShape, ProductTag>
|
struct generic_product_impl<Lhs, Rhs, TranspositionsShape, MatrixShape, ProductTag>
|
||||||
{
|
{
|
||||||
template<typename Dest>
|
template<typename Dest>
|
||||||
static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
|
||||||
{
|
{
|
||||||
transposition_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);
|
transposition_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);
|
||||||
}
|
}
|
||||||
@@ -1140,7 +1145,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|||||||
struct generic_product_impl<Lhs, Rhs, MatrixShape, TranspositionsShape, ProductTag>
|
struct generic_product_impl<Lhs, Rhs, MatrixShape, TranspositionsShape, ProductTag>
|
||||||
{
|
{
|
||||||
template<typename Dest>
|
template<typename Dest>
|
||||||
static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
|
||||||
{
|
{
|
||||||
transposition_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);
|
transposition_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);
|
||||||
}
|
}
|
||||||
@@ -1151,7 +1156,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|||||||
struct generic_product_impl<Transpose<Lhs>, Rhs, TranspositionsShape, MatrixShape, ProductTag>
|
struct generic_product_impl<Transpose<Lhs>, Rhs, TranspositionsShape, MatrixShape, ProductTag>
|
||||||
{
|
{
|
||||||
template<typename Dest>
|
template<typename Dest>
|
||||||
static void evalTo(Dest& dst, const Transpose<Lhs>& lhs, const Rhs& rhs)
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Transpose<Lhs>& lhs, const Rhs& rhs)
|
||||||
{
|
{
|
||||||
transposition_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
|
transposition_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
|
||||||
}
|
}
|
||||||
@@ -1161,7 +1166,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|||||||
struct generic_product_impl<Lhs, Transpose<Rhs>, MatrixShape, TranspositionsShape, ProductTag>
|
struct generic_product_impl<Lhs, Transpose<Rhs>, MatrixShape, TranspositionsShape, ProductTag>
|
||||||
{
|
{
|
||||||
template<typename Dest>
|
template<typename Dest>
|
||||||
static void evalTo(Dest& dst, const Lhs& lhs, const Transpose<Rhs>& rhs)
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Transpose<Rhs>& rhs)
|
||||||
{
|
{
|
||||||
transposition_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
|
transposition_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -177,6 +177,42 @@ PlainObjectBase<Derived>::setRandom(Index rows, Index cols)
|
|||||||
return setRandom();
|
return setRandom();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Resizes to the given size, changing only the number of columns, and sets all
|
||||||
|
* coefficients in this expression to random values. For the parameter of type
|
||||||
|
* NoChange_t, just pass the special value \c NoChange.
|
||||||
|
*
|
||||||
|
* Numbers are uniformly spread through their whole definition range for integer types,
|
||||||
|
* and in the [-1:1] range for floating point scalar types.
|
||||||
|
*
|
||||||
|
* \not_reentrant
|
||||||
|
*
|
||||||
|
* \sa DenseBase::setRandom(), setRandom(Index), setRandom(Index, NoChange_t), class CwiseNullaryOp, DenseBase::Random()
|
||||||
|
*/
|
||||||
|
template<typename Derived>
|
||||||
|
EIGEN_STRONG_INLINE Derived&
|
||||||
|
PlainObjectBase<Derived>::setRandom(NoChange_t, Index cols)
|
||||||
|
{
|
||||||
|
return setRandom(rows(), cols);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Resizes to the given size, changing only the number of rows, and sets all
|
||||||
|
* coefficients in this expression to random values. For the parameter of type
|
||||||
|
* NoChange_t, just pass the special value \c NoChange.
|
||||||
|
*
|
||||||
|
* Numbers are uniformly spread through their whole definition range for integer types,
|
||||||
|
* and in the [-1:1] range for floating point scalar types.
|
||||||
|
*
|
||||||
|
* \not_reentrant
|
||||||
|
*
|
||||||
|
* \sa DenseBase::setRandom(), setRandom(Index), setRandom(NoChange_t, Index), class CwiseNullaryOp, DenseBase::Random()
|
||||||
|
*/
|
||||||
|
template<typename Derived>
|
||||||
|
EIGEN_STRONG_INLINE Derived&
|
||||||
|
PlainObjectBase<Derived>::setRandom(Index rows, NoChange_t)
|
||||||
|
{
|
||||||
|
return setRandom(rows, cols());
|
||||||
|
}
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
|
||||||
#endif // EIGEN_RANDOM_H
|
#endif // EIGEN_RANDOM_H
|
||||||
|
|||||||
@@ -58,7 +58,7 @@ public:
|
|||||||
public:
|
public:
|
||||||
enum {
|
enum {
|
||||||
Cost = Evaluator::SizeAtCompileTime == Dynamic ? HugeCost
|
Cost = Evaluator::SizeAtCompileTime == Dynamic ? HugeCost
|
||||||
: Evaluator::SizeAtCompileTime * Evaluator::CoeffReadCost + (Evaluator::SizeAtCompileTime-1) * functor_traits<Func>::Cost,
|
: int(Evaluator::SizeAtCompileTime) * int(Evaluator::CoeffReadCost) + (Evaluator::SizeAtCompileTime-1) * functor_traits<Func>::Cost,
|
||||||
UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
|
UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -331,7 +331,7 @@ struct redux_impl<Func, Evaluator, LinearVectorizedTraversal, CompleteUnrolling>
|
|||||||
enum {
|
enum {
|
||||||
PacketSize = redux_traits<Func, Evaluator>::PacketSize,
|
PacketSize = redux_traits<Func, Evaluator>::PacketSize,
|
||||||
Size = Evaluator::SizeAtCompileTime,
|
Size = Evaluator::SizeAtCompileTime,
|
||||||
VectorizedSize = (Size / PacketSize) * PacketSize
|
VectorizedSize = (int(Size) / int(PacketSize)) * int(PacketSize)
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename XprType>
|
template<typename XprType>
|
||||||
@@ -419,25 +419,33 @@ DenseBase<Derived>::redux(const Func& func) const
|
|||||||
}
|
}
|
||||||
|
|
||||||
/** \returns the minimum of all coefficients of \c *this.
|
/** \returns the minimum of all coefficients of \c *this.
|
||||||
|
* In case \c *this contains NaN, NaNPropagation determines the behavior:
|
||||||
|
* NaNPropagation == PropagateFast : undefined
|
||||||
|
* NaNPropagation == PropagateNaN : result is NaN
|
||||||
|
* NaNPropagation == PropagateNumbers : result is minimum of elements that are not NaN
|
||||||
* \warning the matrix must be not empty, otherwise an assertion is triggered.
|
* \warning the matrix must be not empty, otherwise an assertion is triggered.
|
||||||
* \warning the result is undefined if \c *this contains NaN.
|
|
||||||
*/
|
*/
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
|
template<int NaNPropagation>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
|
||||||
DenseBase<Derived>::minCoeff() const
|
DenseBase<Derived>::minCoeff() const
|
||||||
{
|
{
|
||||||
return derived().redux(Eigen::internal::scalar_min_op<Scalar,Scalar>());
|
return derived().redux(Eigen::internal::scalar_min_op<Scalar,Scalar, NaNPropagation>());
|
||||||
}
|
}
|
||||||
|
|
||||||
/** \returns the maximum of all coefficients of \c *this.
|
/** \returns the maximum of all coefficients of \c *this.
|
||||||
|
* In case \c *this contains NaN, NaNPropagation determines the behavior:
|
||||||
|
* NaNPropagation == PropagateFast : undefined
|
||||||
|
* NaNPropagation == PropagateNaN : result is NaN
|
||||||
|
* NaNPropagation == PropagateNumbers : result is maximum of elements that are not NaN
|
||||||
* \warning the matrix must be not empty, otherwise an assertion is triggered.
|
* \warning the matrix must be not empty, otherwise an assertion is triggered.
|
||||||
* \warning the result is undefined if \c *this contains NaN.
|
|
||||||
*/
|
*/
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
|
template<int NaNPropagation>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
|
||||||
DenseBase<Derived>::maxCoeff() const
|
DenseBase<Derived>::maxCoeff() const
|
||||||
{
|
{
|
||||||
return derived().redux(Eigen::internal::scalar_max_op<Scalar,Scalar>());
|
return derived().redux(Eigen::internal::scalar_max_op<Scalar,Scalar, NaNPropagation>());
|
||||||
}
|
}
|
||||||
|
|
||||||
/** \returns the sum of all coefficients of \c *this
|
/** \returns the sum of all coefficients of \c *this
|
||||||
|
|||||||
@@ -67,12 +67,12 @@ public:
|
|||||||
typedef MapBase<Derived> Base;
|
typedef MapBase<Derived> Base;
|
||||||
EIGEN_DENSE_PUBLIC_INTERFACE(RefBase)
|
EIGEN_DENSE_PUBLIC_INTERFACE(RefBase)
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC inline Index innerStride() const
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index innerStride() const
|
||||||
{
|
{
|
||||||
return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1;
|
return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC inline Index outerStride() const
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index outerStride() const
|
||||||
{
|
{
|
||||||
return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer()
|
return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer()
|
||||||
: IsVectorAtCompileTime ? this->size()
|
: IsVectorAtCompileTime ? this->size()
|
||||||
@@ -93,29 +93,115 @@ protected:
|
|||||||
|
|
||||||
typedef Stride<StrideType::OuterStrideAtCompileTime,StrideType::InnerStrideAtCompileTime> StrideBase;
|
typedef Stride<StrideType::OuterStrideAtCompileTime,StrideType::InnerStrideAtCompileTime> StrideBase;
|
||||||
|
|
||||||
template<typename Expression>
|
// Resolves inner stride if default 0.
|
||||||
EIGEN_DEVICE_FUNC void construct(Expression& expr)
|
static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index resolveInnerStride(Index inner) {
|
||||||
{
|
return inner == 0 ? 1 : inner;
|
||||||
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(PlainObjectType,Expression);
|
}
|
||||||
|
|
||||||
|
// Resolves outer stride if default 0.
|
||||||
|
static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index resolveOuterStride(Index inner, Index outer, Index rows, Index cols, bool isVectorAtCompileTime, bool isRowMajor) {
|
||||||
|
return outer == 0 ? isVectorAtCompileTime ? inner * rows * cols : isRowMajor ? inner * cols : inner * rows : outer;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns true if construction is valid, false if there is a stride mismatch,
|
||||||
|
// and fails if there is a size mismatch.
|
||||||
|
template<typename Expression>
|
||||||
|
EIGEN_DEVICE_FUNC bool construct(Expression& expr)
|
||||||
|
{
|
||||||
|
// Check matrix sizes. If this is a compile-time vector, we do allow
|
||||||
|
// implicitly transposing.
|
||||||
|
EIGEN_STATIC_ASSERT(
|
||||||
|
EIGEN_PREDICATE_SAME_MATRIX_SIZE(PlainObjectType, Expression)
|
||||||
|
// If it is a vector, the transpose sizes might match.
|
||||||
|
|| ( PlainObjectType::IsVectorAtCompileTime
|
||||||
|
&& ((int(PlainObjectType::RowsAtCompileTime)==Eigen::Dynamic
|
||||||
|
|| int(Expression::ColsAtCompileTime)==Eigen::Dynamic
|
||||||
|
|| int(PlainObjectType::RowsAtCompileTime)==int(Expression::ColsAtCompileTime))
|
||||||
|
&& (int(PlainObjectType::ColsAtCompileTime)==Eigen::Dynamic
|
||||||
|
|| int(Expression::RowsAtCompileTime)==Eigen::Dynamic
|
||||||
|
|| int(PlainObjectType::ColsAtCompileTime)==int(Expression::RowsAtCompileTime)))),
|
||||||
|
YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES
|
||||||
|
)
|
||||||
|
|
||||||
|
// Determine runtime rows and columns.
|
||||||
|
Index rows = expr.rows();
|
||||||
|
Index cols = expr.cols();
|
||||||
if(PlainObjectType::RowsAtCompileTime==1)
|
if(PlainObjectType::RowsAtCompileTime==1)
|
||||||
{
|
{
|
||||||
eigen_assert(expr.rows()==1 || expr.cols()==1);
|
eigen_assert(expr.rows()==1 || expr.cols()==1);
|
||||||
::new (static_cast<Base*>(this)) Base(expr.data(), 1, expr.size());
|
rows = 1;
|
||||||
|
cols = expr.size();
|
||||||
}
|
}
|
||||||
else if(PlainObjectType::ColsAtCompileTime==1)
|
else if(PlainObjectType::ColsAtCompileTime==1)
|
||||||
{
|
{
|
||||||
eigen_assert(expr.rows()==1 || expr.cols()==1);
|
eigen_assert(expr.rows()==1 || expr.cols()==1);
|
||||||
::new (static_cast<Base*>(this)) Base(expr.data(), expr.size(), 1);
|
rows = expr.size();
|
||||||
|
cols = 1;
|
||||||
}
|
}
|
||||||
else
|
// Verify that the sizes are valid.
|
||||||
::new (static_cast<Base*>(this)) Base(expr.data(), expr.rows(), expr.cols());
|
eigen_assert(
|
||||||
|
(PlainObjectType::RowsAtCompileTime == Dynamic) || (PlainObjectType::RowsAtCompileTime == rows));
|
||||||
|
eigen_assert(
|
||||||
|
(PlainObjectType::ColsAtCompileTime == Dynamic) || (PlainObjectType::ColsAtCompileTime == cols));
|
||||||
|
|
||||||
if(Expression::IsVectorAtCompileTime && (!PlainObjectType::IsVectorAtCompileTime) && ((Expression::Flags&RowMajorBit)!=(PlainObjectType::Flags&RowMajorBit)))
|
|
||||||
::new (&m_stride) StrideBase(expr.innerStride(), StrideType::InnerStrideAtCompileTime==0?0:1);
|
// If this is a vector, we might be transposing, which means that stride should swap.
|
||||||
else
|
const bool transpose = PlainObjectType::IsVectorAtCompileTime && (rows != expr.rows());
|
||||||
::new (&m_stride) StrideBase(StrideType::OuterStrideAtCompileTime==0?0:expr.outerStride(),
|
// If the storage format differs, we also need to swap the stride.
|
||||||
StrideType::InnerStrideAtCompileTime==0?0:expr.innerStride());
|
const bool row_major = ((PlainObjectType::Flags)&RowMajorBit) != 0;
|
||||||
|
const bool expr_row_major = (Expression::Flags&RowMajorBit) != 0;
|
||||||
|
const bool storage_differs = (row_major != expr_row_major);
|
||||||
|
|
||||||
|
const bool swap_stride = (transpose != storage_differs);
|
||||||
|
|
||||||
|
// Determine expr's actual strides, resolving any defaults if zero.
|
||||||
|
const Index expr_inner_actual = resolveInnerStride(expr.innerStride());
|
||||||
|
const Index expr_outer_actual = resolveOuterStride(expr_inner_actual,
|
||||||
|
expr.outerStride(),
|
||||||
|
expr.rows(),
|
||||||
|
expr.cols(),
|
||||||
|
Expression::IsVectorAtCompileTime != 0,
|
||||||
|
expr_row_major);
|
||||||
|
|
||||||
|
// If this is a column-major row vector or row-major column vector, the inner-stride
|
||||||
|
// is arbitrary, so set it to either the compile-time inner stride or 1.
|
||||||
|
const bool row_vector = (rows == 1);
|
||||||
|
const bool col_vector = (cols == 1);
|
||||||
|
const Index inner_stride =
|
||||||
|
( (!row_major && row_vector) || (row_major && col_vector) ) ?
|
||||||
|
( StrideType::InnerStrideAtCompileTime > 0 ? Index(StrideType::InnerStrideAtCompileTime) : 1)
|
||||||
|
: swap_stride ? expr_outer_actual : expr_inner_actual;
|
||||||
|
|
||||||
|
// If this is a column-major column vector or row-major row vector, the outer-stride
|
||||||
|
// is arbitrary, so set it to either the compile-time outer stride or vector size.
|
||||||
|
const Index outer_stride =
|
||||||
|
( (!row_major && col_vector) || (row_major && row_vector) ) ?
|
||||||
|
( StrideType::OuterStrideAtCompileTime > 0 ? Index(StrideType::OuterStrideAtCompileTime) : rows * cols * inner_stride)
|
||||||
|
: swap_stride ? expr_inner_actual : expr_outer_actual;
|
||||||
|
|
||||||
|
// Check if given inner/outer strides are compatible with compile-time strides.
|
||||||
|
const bool inner_valid = (StrideType::InnerStrideAtCompileTime == Dynamic)
|
||||||
|
|| (resolveInnerStride(Index(StrideType::InnerStrideAtCompileTime)) == inner_stride);
|
||||||
|
if (!inner_valid) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const bool outer_valid = (StrideType::OuterStrideAtCompileTime == Dynamic)
|
||||||
|
|| (resolveOuterStride(
|
||||||
|
inner_stride,
|
||||||
|
Index(StrideType::OuterStrideAtCompileTime),
|
||||||
|
rows, cols, PlainObjectType::IsVectorAtCompileTime != 0,
|
||||||
|
row_major)
|
||||||
|
== outer_stride);
|
||||||
|
if (!outer_valid) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
::new (static_cast<Base*>(this)) Base(expr.data(), rows, cols);
|
||||||
|
::new (&m_stride) StrideBase(
|
||||||
|
(StrideType::OuterStrideAtCompileTime == 0) ? 0 : outer_stride,
|
||||||
|
(StrideType::InnerStrideAtCompileTime == 0) ? 0 : inner_stride );
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
StrideBase m_stride;
|
StrideBase m_stride;
|
||||||
@@ -212,7 +298,10 @@ template<typename PlainObjectType, int Options, typename StrideType> class Ref
|
|||||||
typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0)
|
typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0)
|
||||||
{
|
{
|
||||||
EIGEN_STATIC_ASSERT(bool(Traits::template match<Derived>::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH);
|
EIGEN_STATIC_ASSERT(bool(Traits::template match<Derived>::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH);
|
||||||
Base::construct(expr.derived());
|
// Construction must pass since we will not create temprary storage in the non-const case.
|
||||||
|
const bool success = Base::construct(expr.derived());
|
||||||
|
EIGEN_UNUSED_VARIABLE(success)
|
||||||
|
eigen_assert(success);
|
||||||
}
|
}
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr,
|
EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr,
|
||||||
@@ -223,10 +312,13 @@ template<typename PlainObjectType, int Options, typename StrideType> class Ref
|
|||||||
inline Ref(DenseBase<Derived>& expr)
|
inline Ref(DenseBase<Derived>& expr)
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
EIGEN_STATIC_ASSERT(bool(internal::is_lvalue<Derived>::value), THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY);
|
EIGEN_STATIC_ASSERT((static_cast<bool>(internal::is_lvalue<Derived>::value)), THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY);
|
||||||
EIGEN_STATIC_ASSERT(bool(Traits::template match<Derived>::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH);
|
EIGEN_STATIC_ASSERT((static_cast<bool>(Traits::template match<Derived>::MatchAtCompileTime)), STORAGE_LAYOUT_DOES_NOT_MATCH);
|
||||||
EIGEN_STATIC_ASSERT(!Derived::IsPlainObjectBase,THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY);
|
EIGEN_STATIC_ASSERT(!Derived::IsPlainObjectBase,THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY);
|
||||||
Base::construct(expr.const_cast_derived());
|
// Construction must pass since we will not create temporary storage in the non-const case.
|
||||||
|
const bool success = Base::construct(expr.const_cast_derived());
|
||||||
|
EIGEN_UNUSED_VARIABLE(success)
|
||||||
|
eigen_assert(success);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Ref)
|
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Ref)
|
||||||
@@ -267,7 +359,10 @@ template<typename TPlainObjectType, int Options, typename StrideType> class Ref<
|
|||||||
template<typename Expression>
|
template<typename Expression>
|
||||||
EIGEN_DEVICE_FUNC void construct(const Expression& expr,internal::true_type)
|
EIGEN_DEVICE_FUNC void construct(const Expression& expr,internal::true_type)
|
||||||
{
|
{
|
||||||
Base::construct(expr);
|
// Check if we can use the underlying expr's storage directly, otherwise call the copy version.
|
||||||
|
if (!Base::construct(expr)) {
|
||||||
|
construct(expr, internal::false_type());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Expression>
|
template<typename Expression>
|
||||||
|
|||||||
@@ -88,9 +88,9 @@ template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
|
|||||||
THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE)
|
THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE)
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
inline Index rows() const { return m_matrix.rows() * m_rowFactor.value(); }
|
inline Index rows() const { return m_matrix.rows() * m_rowFactor.value(); }
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
inline Index cols() const { return m_matrix.cols() * m_colFactor.value(); }
|
inline Index cols() const { return m_matrix.cols() * m_colFactor.value(); }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
|
|||||||
@@ -12,7 +12,6 @@
|
|||||||
#define EIGEN_RESHAPED_H
|
#define EIGEN_RESHAPED_H
|
||||||
|
|
||||||
namespace Eigen {
|
namespace Eigen {
|
||||||
namespace internal {
|
|
||||||
|
|
||||||
/** \class Reshaped
|
/** \class Reshaped
|
||||||
* \ingroup Core_Module
|
* \ingroup Core_Module
|
||||||
@@ -44,6 +43,8 @@ namespace internal {
|
|||||||
* \sa DenseBase::reshaped(NRowsType,NColsType)
|
* \sa DenseBase::reshaped(NRowsType,NColsType)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
namespace internal {
|
||||||
|
|
||||||
template<typename XprType, int Rows, int Cols, int Order>
|
template<typename XprType, int Rows, int Cols, int Order>
|
||||||
struct traits<Reshaped<XprType, Rows, Cols, Order> > : traits<XprType>
|
struct traits<Reshaped<XprType, Rows, Cols, Order> > : traits<XprType>
|
||||||
{
|
{
|
||||||
@@ -239,17 +240,17 @@ class ReshapedImpl_dense<XprType, Rows, Cols, Order, true>
|
|||||||
XprType& nestedExpression() { return m_xpr; }
|
XprType& nestedExpression() { return m_xpr; }
|
||||||
|
|
||||||
/** \sa MapBase::innerStride() */
|
/** \sa MapBase::innerStride() */
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
inline Index innerStride() const
|
inline Index innerStride() const
|
||||||
{
|
{
|
||||||
return m_xpr.innerStride();
|
return m_xpr.innerStride();
|
||||||
}
|
}
|
||||||
|
|
||||||
/** \sa MapBase::outerStride() */
|
/** \sa MapBase::outerStride() */
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
inline Index outerStride() const
|
inline Index outerStride() const
|
||||||
{
|
{
|
||||||
return ((Flags&RowMajorBit)==RowMajorBit) ? this->cols() : this->rows();
|
return (((Flags&RowMajorBit)==RowMajorBit) ? this->cols() : this->rows()) * m_xpr.innerStride();
|
||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|||||||
@@ -60,8 +60,10 @@ template<typename Derived> class ReturnByValue
|
|||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
inline void evalTo(Dest& dst) const
|
inline void evalTo(Dest& dst) const
|
||||||
{ static_cast<const Derived*>(this)->evalTo(dst); }
|
{ static_cast<const Derived*>(this)->evalTo(dst); }
|
||||||
EIGEN_DEVICE_FUNC inline Index rows() const { return static_cast<const Derived*>(this)->rows(); }
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
EIGEN_DEVICE_FUNC inline Index cols() const { return static_cast<const Derived*>(this)->cols(); }
|
inline Index rows() const EIGEN_NOEXCEPT { return static_cast<const Derived*>(this)->rows(); }
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
|
inline Index cols() const EIGEN_NOEXCEPT { return static_cast<const Derived*>(this)->cols(); }
|
||||||
|
|
||||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||||
#define Unusable YOU_ARE_TRYING_TO_ACCESS_A_SINGLE_COEFFICIENT_IN_A_SPECIAL_EXPRESSION_WHERE_THAT_IS_NOT_ALLOWED_BECAUSE_THAT_WOULD_BE_INEFFICIENT
|
#define Unusable YOU_ARE_TRYING_TO_ACCESS_A_SINGLE_COEFFICIENT_IN_A_SPECIAL_EXPRESSION_WHERE_THAT_IS_NOT_ALLOWED_BECAUSE_THAT_WOULD_BE_INEFFICIENT
|
||||||
|
|||||||
@@ -89,8 +89,10 @@ template<typename MatrixType, int Direction> class Reverse
|
|||||||
|
|
||||||
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Reverse)
|
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Reverse)
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows(); }
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols(); }
|
inline Index rows() const EIGEN_NOEXCEPT { return m_matrix.rows(); }
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
|
inline Index cols() const EIGEN_NOEXCEPT { return m_matrix.cols(); }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC inline Index innerStride() const
|
EIGEN_DEVICE_FUNC inline Index innerStride() const
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -67,8 +67,10 @@ class Select : public internal::dense_xpr_base< Select<ConditionMatrixType, Then
|
|||||||
eigen_assert(m_condition.cols() == m_then.cols() && m_condition.cols() == m_else.cols());
|
eigen_assert(m_condition.cols() == m_then.cols() && m_condition.cols() == m_else.cols());
|
||||||
}
|
}
|
||||||
|
|
||||||
inline EIGEN_DEVICE_FUNC Index rows() const { return m_condition.rows(); }
|
inline EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
inline EIGEN_DEVICE_FUNC Index cols() const { return m_condition.cols(); }
|
Index rows() const EIGEN_NOEXCEPT { return m_condition.rows(); }
|
||||||
|
inline EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
|
Index cols() const EIGEN_NOEXCEPT { return m_condition.cols(); }
|
||||||
|
|
||||||
inline EIGEN_DEVICE_FUNC
|
inline EIGEN_DEVICE_FUNC
|
||||||
const Scalar coeff(Index i, Index j) const
|
const Scalar coeff(Index i, Index j) const
|
||||||
@@ -120,7 +122,7 @@ class Select : public internal::dense_xpr_base< Select<ConditionMatrixType, Then
|
|||||||
*/
|
*/
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
template<typename ThenDerived,typename ElseDerived>
|
template<typename ThenDerived,typename ElseDerived>
|
||||||
inline const Select<Derived,ThenDerived,ElseDerived>
|
inline EIGEN_DEVICE_FUNC const Select<Derived,ThenDerived,ElseDerived>
|
||||||
DenseBase<Derived>::select(const DenseBase<ThenDerived>& thenMatrix,
|
DenseBase<Derived>::select(const DenseBase<ThenDerived>& thenMatrix,
|
||||||
const DenseBase<ElseDerived>& elseMatrix) const
|
const DenseBase<ElseDerived>& elseMatrix) const
|
||||||
{
|
{
|
||||||
@@ -134,7 +136,7 @@ DenseBase<Derived>::select(const DenseBase<ThenDerived>& thenMatrix,
|
|||||||
*/
|
*/
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
template<typename ThenDerived>
|
template<typename ThenDerived>
|
||||||
inline const Select<Derived,ThenDerived, typename ThenDerived::ConstantReturnType>
|
inline EIGEN_DEVICE_FUNC const Select<Derived,ThenDerived, typename ThenDerived::ConstantReturnType>
|
||||||
DenseBase<Derived>::select(const DenseBase<ThenDerived>& thenMatrix,
|
DenseBase<Derived>::select(const DenseBase<ThenDerived>& thenMatrix,
|
||||||
const typename ThenDerived::Scalar& elseScalar) const
|
const typename ThenDerived::Scalar& elseScalar) const
|
||||||
{
|
{
|
||||||
@@ -149,7 +151,7 @@ DenseBase<Derived>::select(const DenseBase<ThenDerived>& thenMatrix,
|
|||||||
*/
|
*/
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
template<typename ElseDerived>
|
template<typename ElseDerived>
|
||||||
inline const Select<Derived, typename ElseDerived::ConstantReturnType, ElseDerived >
|
inline EIGEN_DEVICE_FUNC const Select<Derived, typename ElseDerived::ConstantReturnType, ElseDerived >
|
||||||
DenseBase<Derived>::select(const typename ElseDerived::Scalar& thenScalar,
|
DenseBase<Derived>::select(const typename ElseDerived::Scalar& thenScalar,
|
||||||
const DenseBase<ElseDerived>& elseMatrix) const
|
const DenseBase<ElseDerived>& elseMatrix) const
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -66,7 +66,7 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
|
|||||||
enum {
|
enum {
|
||||||
Mode = internal::traits<SelfAdjointView>::Mode,
|
Mode = internal::traits<SelfAdjointView>::Mode,
|
||||||
Flags = internal::traits<SelfAdjointView>::Flags,
|
Flags = internal::traits<SelfAdjointView>::Flags,
|
||||||
TransposeMode = ((Mode & Upper) ? Lower : 0) | ((Mode & Lower) ? Upper : 0)
|
TransposeMode = ((int(Mode) & int(Upper)) ? Lower : 0) | ((int(Mode) & int(Lower)) ? Upper : 0)
|
||||||
};
|
};
|
||||||
typedef typename MatrixType::PlainObject PlainObject;
|
typedef typename MatrixType::PlainObject PlainObject;
|
||||||
|
|
||||||
@@ -76,14 +76,14 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
|
|||||||
EIGEN_STATIC_ASSERT(UpLo==Lower || UpLo==Upper,SELFADJOINTVIEW_ACCEPTS_UPPER_AND_LOWER_MODE_ONLY);
|
EIGEN_STATIC_ASSERT(UpLo==Lower || UpLo==Upper,SELFADJOINTVIEW_ACCEPTS_UPPER_AND_LOWER_MODE_ONLY);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
inline Index rows() const { return m_matrix.rows(); }
|
inline Index rows() const EIGEN_NOEXCEPT { return m_matrix.rows(); }
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
inline Index cols() const { return m_matrix.cols(); }
|
inline Index cols() const EIGEN_NOEXCEPT { return m_matrix.cols(); }
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
inline Index outerStride() const { return m_matrix.outerStride(); }
|
inline Index outerStride() const EIGEN_NOEXCEPT { return m_matrix.outerStride(); }
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
inline Index innerStride() const { return m_matrix.innerStride(); }
|
inline Index innerStride() const EIGEN_NOEXCEPT { return m_matrix.innerStride(); }
|
||||||
|
|
||||||
/** \sa MatrixBase::coeff()
|
/** \sa MatrixBase::coeff()
|
||||||
* \warning the coordinates must fit into the referenced triangular part
|
* \warning the coordinates must fit into the referenced triangular part
|
||||||
|
|||||||
@@ -69,8 +69,8 @@ public:
|
|||||||
: m_dec(dec), m_rhs(rhs)
|
: m_dec(dec), m_rhs(rhs)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC Index rows() const { return m_dec.cols(); }
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_dec.cols(); }
|
||||||
EIGEN_DEVICE_FUNC Index cols() const { return m_rhs.cols(); }
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_rhs.cols(); }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC const Decomposition& dec() const { return m_dec; }
|
EIGEN_DEVICE_FUNC const Decomposition& dec() const { return m_dec; }
|
||||||
EIGEN_DEVICE_FUNC const RhsType& rhs() const { return m_rhs; }
|
EIGEN_DEVICE_FUNC const RhsType& rhs() const { return m_rhs; }
|
||||||
|
|||||||
@@ -54,7 +54,7 @@ struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,1>
|
|||||||
typedef blas_traits<Lhs> LhsProductTraits;
|
typedef blas_traits<Lhs> LhsProductTraits;
|
||||||
typedef typename LhsProductTraits::ExtractType ActualLhsType;
|
typedef typename LhsProductTraits::ExtractType ActualLhsType;
|
||||||
typedef Map<Matrix<RhsScalar,Dynamic,1>, Aligned> MappedRhs;
|
typedef Map<Matrix<RhsScalar,Dynamic,1>, Aligned> MappedRhs;
|
||||||
static void run(const Lhs& lhs, Rhs& rhs)
|
static EIGEN_DEVICE_FUNC void run(const Lhs& lhs, Rhs& rhs)
|
||||||
{
|
{
|
||||||
ActualLhsType actualLhs = LhsProductTraits::extract(lhs);
|
ActualLhsType actualLhs = LhsProductTraits::extract(lhs);
|
||||||
|
|
||||||
@@ -85,7 +85,7 @@ struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,Dynamic>
|
|||||||
typedef blas_traits<Lhs> LhsProductTraits;
|
typedef blas_traits<Lhs> LhsProductTraits;
|
||||||
typedef typename LhsProductTraits::DirectLinearAccessType ActualLhsType;
|
typedef typename LhsProductTraits::DirectLinearAccessType ActualLhsType;
|
||||||
|
|
||||||
static void run(const Lhs& lhs, Rhs& rhs)
|
static EIGEN_DEVICE_FUNC void run(const Lhs& lhs, Rhs& rhs)
|
||||||
{
|
{
|
||||||
typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsProductTraits::extract(lhs);
|
typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsProductTraits::extract(lhs);
|
||||||
|
|
||||||
@@ -118,7 +118,7 @@ struct triangular_solver_unroller<Lhs,Rhs,Mode,LoopIndex,Size,false> {
|
|||||||
DiagIndex = IsLower ? LoopIndex : Size - LoopIndex - 1,
|
DiagIndex = IsLower ? LoopIndex : Size - LoopIndex - 1,
|
||||||
StartIndex = IsLower ? 0 : DiagIndex+1
|
StartIndex = IsLower ? 0 : DiagIndex+1
|
||||||
};
|
};
|
||||||
static void run(const Lhs& lhs, Rhs& rhs)
|
static EIGEN_DEVICE_FUNC void run(const Lhs& lhs, Rhs& rhs)
|
||||||
{
|
{
|
||||||
if (LoopIndex>0)
|
if (LoopIndex>0)
|
||||||
rhs.coeffRef(DiagIndex) -= lhs.row(DiagIndex).template segment<LoopIndex>(StartIndex).transpose()
|
rhs.coeffRef(DiagIndex) -= lhs.row(DiagIndex).template segment<LoopIndex>(StartIndex).transpose()
|
||||||
@@ -133,18 +133,18 @@ struct triangular_solver_unroller<Lhs,Rhs,Mode,LoopIndex,Size,false> {
|
|||||||
|
|
||||||
template<typename Lhs, typename Rhs, int Mode, int LoopIndex, int Size>
|
template<typename Lhs, typename Rhs, int Mode, int LoopIndex, int Size>
|
||||||
struct triangular_solver_unroller<Lhs,Rhs,Mode,LoopIndex,Size,true> {
|
struct triangular_solver_unroller<Lhs,Rhs,Mode,LoopIndex,Size,true> {
|
||||||
static void run(const Lhs&, Rhs&) {}
|
static EIGEN_DEVICE_FUNC void run(const Lhs&, Rhs&) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename Lhs, typename Rhs, int Mode>
|
template<typename Lhs, typename Rhs, int Mode>
|
||||||
struct triangular_solver_selector<Lhs,Rhs,OnTheLeft,Mode,CompleteUnrolling,1> {
|
struct triangular_solver_selector<Lhs,Rhs,OnTheLeft,Mode,CompleteUnrolling,1> {
|
||||||
static void run(const Lhs& lhs, Rhs& rhs)
|
static EIGEN_DEVICE_FUNC void run(const Lhs& lhs, Rhs& rhs)
|
||||||
{ triangular_solver_unroller<Lhs,Rhs,Mode,0,Rhs::SizeAtCompileTime>::run(lhs,rhs); }
|
{ triangular_solver_unroller<Lhs,Rhs,Mode,0,Rhs::SizeAtCompileTime>::run(lhs,rhs); }
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename Lhs, typename Rhs, int Mode>
|
template<typename Lhs, typename Rhs, int Mode>
|
||||||
struct triangular_solver_selector<Lhs,Rhs,OnTheRight,Mode,CompleteUnrolling,1> {
|
struct triangular_solver_selector<Lhs,Rhs,OnTheRight,Mode,CompleteUnrolling,1> {
|
||||||
static void run(const Lhs& lhs, Rhs& rhs)
|
static EIGEN_DEVICE_FUNC void run(const Lhs& lhs, Rhs& rhs)
|
||||||
{
|
{
|
||||||
Transpose<const Lhs> trLhs(lhs);
|
Transpose<const Lhs> trLhs(lhs);
|
||||||
Transpose<Rhs> trRhs(rhs);
|
Transpose<Rhs> trRhs(rhs);
|
||||||
@@ -168,7 +168,7 @@ EIGEN_DEVICE_FUNC void TriangularViewImpl<MatrixType,Mode,Dense>::solveInPlace(c
|
|||||||
{
|
{
|
||||||
OtherDerived& other = _other.const_cast_derived();
|
OtherDerived& other = _other.const_cast_derived();
|
||||||
eigen_assert( derived().cols() == derived().rows() && ((Side==OnTheLeft && derived().cols() == other.rows()) || (Side==OnTheRight && derived().cols() == other.cols())) );
|
eigen_assert( derived().cols() == derived().rows() && ((Side==OnTheLeft && derived().cols() == other.rows()) || (Side==OnTheRight && derived().cols() == other.cols())) );
|
||||||
eigen_assert((!(Mode & ZeroDiag)) && bool(Mode & (Upper|Lower)));
|
eigen_assert((!(int(Mode) & int(ZeroDiag))) && bool(int(Mode) & (int(Upper) | int(Lower))));
|
||||||
// If solving for a 0x0 matrix, nothing to do, simply return.
|
// If solving for a 0x0 matrix, nothing to do, simply return.
|
||||||
if (derived().cols() == 0)
|
if (derived().cols() == 0)
|
||||||
return;
|
return;
|
||||||
@@ -213,8 +213,8 @@ template<int Side, typename TriangularType, typename Rhs> struct triangular_solv
|
|||||||
: m_triangularMatrix(tri), m_rhs(rhs)
|
: m_triangularMatrix(tri), m_rhs(rhs)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
inline Index rows() const { return m_rhs.rows(); }
|
inline EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_rhs.rows(); }
|
||||||
inline Index cols() const { return m_rhs.cols(); }
|
inline EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_rhs.cols(); }
|
||||||
|
|
||||||
template<typename Dest> inline void evalTo(Dest& dst) const
|
template<typename Dest> inline void evalTo(Dest& dst) const
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -110,7 +110,7 @@ class SolverBase : public EigenBase<Derived>
|
|||||||
}
|
}
|
||||||
|
|
||||||
/** \internal the return type of transpose() */
|
/** \internal the return type of transpose() */
|
||||||
typedef typename internal::add_const<Transpose<const Derived> >::type ConstTransposeReturnType;
|
typedef Transpose<const Derived> ConstTransposeReturnType;
|
||||||
/** \returns an expression of the transposed of the factored matrix.
|
/** \returns an expression of the transposed of the factored matrix.
|
||||||
*
|
*
|
||||||
* A typical usage is to solve for the transposed problem A^T x = b:
|
* A typical usage is to solve for the transposed problem A^T x = b:
|
||||||
@@ -118,15 +118,15 @@ class SolverBase : public EigenBase<Derived>
|
|||||||
*
|
*
|
||||||
* \sa adjoint(), solve()
|
* \sa adjoint(), solve()
|
||||||
*/
|
*/
|
||||||
inline ConstTransposeReturnType transpose() const
|
inline const ConstTransposeReturnType transpose() const
|
||||||
{
|
{
|
||||||
return ConstTransposeReturnType(derived());
|
return ConstTransposeReturnType(derived());
|
||||||
}
|
}
|
||||||
|
|
||||||
/** \internal the return type of adjoint() */
|
/** \internal the return type of adjoint() */
|
||||||
typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
|
typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
|
||||||
CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, ConstTransposeReturnType>,
|
CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, const ConstTransposeReturnType>,
|
||||||
ConstTransposeReturnType
|
const ConstTransposeReturnType
|
||||||
>::type AdjointReturnType;
|
>::type AdjointReturnType;
|
||||||
/** \returns an expression of the adjoint of the factored matrix
|
/** \returns an expression of the adjoint of the factored matrix
|
||||||
*
|
*
|
||||||
@@ -137,7 +137,7 @@ class SolverBase : public EigenBase<Derived>
|
|||||||
*
|
*
|
||||||
* \sa transpose(), solve()
|
* \sa transpose(), solve()
|
||||||
*/
|
*/
|
||||||
inline AdjointReturnType adjoint() const
|
inline const AdjointReturnType adjoint() const
|
||||||
{
|
{
|
||||||
return AdjointReturnType(derived().transpose());
|
return AdjointReturnType(derived().transpose());
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -123,13 +123,7 @@ blueNorm_impl(const EigenBase<Derived>& _vec)
|
|||||||
using std::pow;
|
using std::pow;
|
||||||
using std::sqrt;
|
using std::sqrt;
|
||||||
using std::abs;
|
using std::abs;
|
||||||
const Derived& vec(_vec.derived());
|
|
||||||
static bool initialized = false;
|
|
||||||
static RealScalar b1, b2, s1m, s2m, rbig, relerr;
|
|
||||||
if(!initialized)
|
|
||||||
{
|
|
||||||
int ibeta, it, iemin, iemax, iexp;
|
|
||||||
RealScalar eps;
|
|
||||||
// This program calculates the machine-dependent constants
|
// This program calculates the machine-dependent constants
|
||||||
// bl, b2, slm, s2m, relerr overfl
|
// bl, b2, slm, s2m, relerr overfl
|
||||||
// from the "basic" machine-dependent numbers
|
// from the "basic" machine-dependent numbers
|
||||||
@@ -138,26 +132,19 @@ blueNorm_impl(const EigenBase<Derived>& _vec)
|
|||||||
// For portability, the PORT subprograms "ilmaeh" and "rlmach"
|
// For portability, the PORT subprograms "ilmaeh" and "rlmach"
|
||||||
// are used. For any specific computer, each of the assignment
|
// are used. For any specific computer, each of the assignment
|
||||||
// statements can be replaced
|
// statements can be replaced
|
||||||
ibeta = std::numeric_limits<RealScalar>::radix; // base for floating-point numbers
|
static const int ibeta = std::numeric_limits<RealScalar>::radix; // base for floating-point numbers
|
||||||
it = NumTraits<RealScalar>::digits(); // number of base-beta digits in mantissa
|
static const int it = NumTraits<RealScalar>::digits(); // number of base-beta digits in mantissa
|
||||||
iemin = std::numeric_limits<RealScalar>::min_exponent; // minimum exponent
|
static const int iemin = NumTraits<RealScalar>::min_exponent(); // minimum exponent
|
||||||
iemax = std::numeric_limits<RealScalar>::max_exponent; // maximum exponent
|
static const int iemax = NumTraits<RealScalar>::max_exponent(); // maximum exponent
|
||||||
rbig = (std::numeric_limits<RealScalar>::max)(); // largest floating-point number
|
static const RealScalar rbig = NumTraits<RealScalar>::highest(); // largest floating-point number
|
||||||
|
static const RealScalar b1 = RealScalar(pow(RealScalar(ibeta),RealScalar(-((1-iemin)/2)))); // lower boundary of midrange
|
||||||
|
static const RealScalar b2 = RealScalar(pow(RealScalar(ibeta),RealScalar((iemax + 1 - it)/2))); // upper boundary of midrange
|
||||||
|
static const RealScalar s1m = RealScalar(pow(RealScalar(ibeta),RealScalar((2-iemin)/2))); // scaling factor for lower range
|
||||||
|
static const RealScalar s2m = RealScalar(pow(RealScalar(ibeta),RealScalar(- ((iemax+it)/2)))); // scaling factor for upper range
|
||||||
|
static const RealScalar eps = RealScalar(pow(double(ibeta), 1-it));
|
||||||
|
static const RealScalar relerr = sqrt(eps); // tolerance for neglecting asml
|
||||||
|
|
||||||
iexp = -((1-iemin)/2);
|
const Derived& vec(_vec.derived());
|
||||||
b1 = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // lower boundary of midrange
|
|
||||||
iexp = (iemax + 1 - it)/2;
|
|
||||||
b2 = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // upper boundary of midrange
|
|
||||||
|
|
||||||
iexp = (2-iemin)/2;
|
|
||||||
s1m = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // scaling factor for lower range
|
|
||||||
iexp = - ((iemax+it)/2);
|
|
||||||
s2m = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // scaling factor for upper range
|
|
||||||
|
|
||||||
eps = RealScalar(pow(double(ibeta), 1-it));
|
|
||||||
relerr = sqrt(eps); // tolerance for neglecting asml
|
|
||||||
initialized = true;
|
|
||||||
}
|
|
||||||
Index n = vec.size();
|
Index n = vec.size();
|
||||||
RealScalar ab2 = b2 / RealScalar(n);
|
RealScalar ab2 = b2 / RealScalar(n);
|
||||||
RealScalar asml = RealScalar(0);
|
RealScalar asml = RealScalar(0);
|
||||||
@@ -166,9 +153,9 @@ blueNorm_impl(const EigenBase<Derived>& _vec)
|
|||||||
|
|
||||||
for(Index j=0; j<vec.outerSize(); ++j)
|
for(Index j=0; j<vec.outerSize(); ++j)
|
||||||
{
|
{
|
||||||
for(typename Derived::InnerIterator it(vec, j); it; ++it)
|
for(typename Derived::InnerIterator iter(vec, j); iter; ++iter)
|
||||||
{
|
{
|
||||||
RealScalar ax = abs(it.value());
|
RealScalar ax = abs(iter.value());
|
||||||
if(ax > ab2) abig += numext::abs2(ax*s2m);
|
if(ax > ab2) abig += numext::abs2(ax*s2m);
|
||||||
else if(ax < b1) asml += numext::abs2(ax*s1m);
|
else if(ax < b1) asml += numext::abs2(ax*s1m);
|
||||||
else amed += numext::abs2(ax);
|
else amed += numext::abs2(ax);
|
||||||
|
|||||||
@@ -7,6 +7,9 @@
|
|||||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
|
||||||
|
#ifndef EIGEN_STLITERATORS_H
|
||||||
|
#define EIGEN_STLITERATORS_H
|
||||||
|
|
||||||
namespace Eigen {
|
namespace Eigen {
|
||||||
|
|
||||||
namespace internal {
|
namespace internal {
|
||||||
@@ -30,10 +33,10 @@ public:
|
|||||||
typedef Index difference_type;
|
typedef Index difference_type;
|
||||||
typedef std::random_access_iterator_tag iterator_category;
|
typedef std::random_access_iterator_tag iterator_category;
|
||||||
|
|
||||||
indexed_based_stl_iterator_base() : mp_xpr(0), m_index(0) {}
|
indexed_based_stl_iterator_base() EIGEN_NO_THROW : mp_xpr(0), m_index(0) {}
|
||||||
indexed_based_stl_iterator_base(XprType& xpr, Index index) : mp_xpr(&xpr), m_index(index) {}
|
indexed_based_stl_iterator_base(XprType& xpr, Index index) EIGEN_NO_THROW : mp_xpr(&xpr), m_index(index) {}
|
||||||
|
|
||||||
indexed_based_stl_iterator_base(const non_const_iterator& other)
|
indexed_based_stl_iterator_base(const non_const_iterator& other) EIGEN_NO_THROW
|
||||||
: mp_xpr(other.mp_xpr), m_index(other.m_index)
|
: mp_xpr(other.mp_xpr), m_index(other.m_index)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
@@ -93,6 +96,85 @@ protected:
|
|||||||
Index m_index;
|
Index m_index;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template<typename Derived>
|
||||||
|
class indexed_based_stl_reverse_iterator_base
|
||||||
|
{
|
||||||
|
protected:
|
||||||
|
typedef indexed_based_stl_iterator_traits<Derived> traits;
|
||||||
|
typedef typename traits::XprType XprType;
|
||||||
|
typedef indexed_based_stl_reverse_iterator_base<typename traits::non_const_iterator> non_const_iterator;
|
||||||
|
typedef indexed_based_stl_reverse_iterator_base<typename traits::const_iterator> const_iterator;
|
||||||
|
typedef typename internal::conditional<internal::is_const<XprType>::value,non_const_iterator,const_iterator>::type other_iterator;
|
||||||
|
// NOTE: in C++03 we cannot declare friend classes through typedefs because we need to write friend class:
|
||||||
|
friend class indexed_based_stl_reverse_iterator_base<typename traits::const_iterator>;
|
||||||
|
friend class indexed_based_stl_reverse_iterator_base<typename traits::non_const_iterator>;
|
||||||
|
public:
|
||||||
|
typedef Index difference_type;
|
||||||
|
typedef std::random_access_iterator_tag iterator_category;
|
||||||
|
|
||||||
|
indexed_based_stl_reverse_iterator_base() : mp_xpr(0), m_index(0) {}
|
||||||
|
indexed_based_stl_reverse_iterator_base(XprType& xpr, Index index) : mp_xpr(&xpr), m_index(index) {}
|
||||||
|
|
||||||
|
indexed_based_stl_reverse_iterator_base(const non_const_iterator& other)
|
||||||
|
: mp_xpr(other.mp_xpr), m_index(other.m_index)
|
||||||
|
{}
|
||||||
|
|
||||||
|
indexed_based_stl_reverse_iterator_base& operator=(const non_const_iterator& other)
|
||||||
|
{
|
||||||
|
mp_xpr = other.mp_xpr;
|
||||||
|
m_index = other.m_index;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
Derived& operator++() { --m_index; return derived(); }
|
||||||
|
Derived& operator--() { ++m_index; return derived(); }
|
||||||
|
|
||||||
|
Derived operator++(int) { Derived prev(derived()); operator++(); return prev;}
|
||||||
|
Derived operator--(int) { Derived prev(derived()); operator--(); return prev;}
|
||||||
|
|
||||||
|
friend Derived operator+(const indexed_based_stl_reverse_iterator_base& a, Index b) { Derived ret(a.derived()); ret += b; return ret; }
|
||||||
|
friend Derived operator-(const indexed_based_stl_reverse_iterator_base& a, Index b) { Derived ret(a.derived()); ret -= b; return ret; }
|
||||||
|
friend Derived operator+(Index a, const indexed_based_stl_reverse_iterator_base& b) { Derived ret(b.derived()); ret += a; return ret; }
|
||||||
|
friend Derived operator-(Index a, const indexed_based_stl_reverse_iterator_base& b) { Derived ret(b.derived()); ret -= a; return ret; }
|
||||||
|
|
||||||
|
Derived& operator+=(Index b) { m_index -= b; return derived(); }
|
||||||
|
Derived& operator-=(Index b) { m_index += b; return derived(); }
|
||||||
|
|
||||||
|
difference_type operator-(const indexed_based_stl_reverse_iterator_base& other) const
|
||||||
|
{
|
||||||
|
eigen_assert(mp_xpr == other.mp_xpr);
|
||||||
|
return other.m_index - m_index;
|
||||||
|
}
|
||||||
|
|
||||||
|
difference_type operator-(const other_iterator& other) const
|
||||||
|
{
|
||||||
|
eigen_assert(mp_xpr == other.mp_xpr);
|
||||||
|
return other.m_index - m_index;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool operator==(const indexed_based_stl_reverse_iterator_base& other) const { eigen_assert(mp_xpr == other.mp_xpr); return m_index == other.m_index; }
|
||||||
|
bool operator!=(const indexed_based_stl_reverse_iterator_base& other) const { eigen_assert(mp_xpr == other.mp_xpr); return m_index != other.m_index; }
|
||||||
|
bool operator< (const indexed_based_stl_reverse_iterator_base& other) const { eigen_assert(mp_xpr == other.mp_xpr); return m_index > other.m_index; }
|
||||||
|
bool operator<=(const indexed_based_stl_reverse_iterator_base& other) const { eigen_assert(mp_xpr == other.mp_xpr); return m_index >= other.m_index; }
|
||||||
|
bool operator> (const indexed_based_stl_reverse_iterator_base& other) const { eigen_assert(mp_xpr == other.mp_xpr); return m_index < other.m_index; }
|
||||||
|
bool operator>=(const indexed_based_stl_reverse_iterator_base& other) const { eigen_assert(mp_xpr == other.mp_xpr); return m_index <= other.m_index; }
|
||||||
|
|
||||||
|
bool operator==(const other_iterator& other) const { eigen_assert(mp_xpr == other.mp_xpr); return m_index == other.m_index; }
|
||||||
|
bool operator!=(const other_iterator& other) const { eigen_assert(mp_xpr == other.mp_xpr); return m_index != other.m_index; }
|
||||||
|
bool operator< (const other_iterator& other) const { eigen_assert(mp_xpr == other.mp_xpr); return m_index > other.m_index; }
|
||||||
|
bool operator<=(const other_iterator& other) const { eigen_assert(mp_xpr == other.mp_xpr); return m_index >= other.m_index; }
|
||||||
|
bool operator> (const other_iterator& other) const { eigen_assert(mp_xpr == other.mp_xpr); return m_index < other.m_index; }
|
||||||
|
bool operator>=(const other_iterator& other) const { eigen_assert(mp_xpr == other.mp_xpr); return m_index <= other.m_index; }
|
||||||
|
|
||||||
|
protected:
|
||||||
|
|
||||||
|
Derived& derived() { return static_cast<Derived&>(*this); }
|
||||||
|
const Derived& derived() const { return static_cast<const Derived&>(*this); }
|
||||||
|
|
||||||
|
XprType *mp_xpr;
|
||||||
|
Index m_index;
|
||||||
|
};
|
||||||
|
|
||||||
template<typename XprType>
|
template<typename XprType>
|
||||||
class pointer_based_stl_iterator
|
class pointer_based_stl_iterator
|
||||||
{
|
{
|
||||||
@@ -111,17 +193,17 @@ public:
|
|||||||
typedef typename internal::conditional<bool(is_lvalue), value_type&, const value_type&>::type reference;
|
typedef typename internal::conditional<bool(is_lvalue), value_type&, const value_type&>::type reference;
|
||||||
|
|
||||||
|
|
||||||
pointer_based_stl_iterator() : m_ptr(0) {}
|
pointer_based_stl_iterator() EIGEN_NO_THROW : m_ptr(0) {}
|
||||||
pointer_based_stl_iterator(XprType& xpr, Index index) : m_incr(xpr.innerStride())
|
pointer_based_stl_iterator(XprType& xpr, Index index) EIGEN_NO_THROW : m_incr(xpr.innerStride())
|
||||||
{
|
{
|
||||||
m_ptr = xpr.data() + index * m_incr.value();
|
m_ptr = xpr.data() + index * m_incr.value();
|
||||||
}
|
}
|
||||||
|
|
||||||
pointer_based_stl_iterator(const non_const_iterator& other)
|
pointer_based_stl_iterator(const non_const_iterator& other) EIGEN_NO_THROW
|
||||||
: m_ptr(other.m_ptr), m_incr(other.m_incr)
|
: m_ptr(other.m_ptr), m_incr(other.m_incr)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
pointer_based_stl_iterator& operator=(const non_const_iterator& other)
|
pointer_based_stl_iterator& operator=(const non_const_iterator& other) EIGEN_NO_THROW
|
||||||
{
|
{
|
||||||
m_ptr = other.m_ptr;
|
m_ptr = other.m_ptr;
|
||||||
m_incr.setValue(other.m_incr);
|
m_incr.setValue(other.m_incr);
|
||||||
@@ -267,6 +349,54 @@ public:
|
|||||||
pointer operator->() const { return (*mp_xpr).template subVector<Direction>(m_index); }
|
pointer operator->() const { return (*mp_xpr).template subVector<Direction>(m_index); }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template<typename _XprType, DirectionType Direction>
|
||||||
|
struct indexed_based_stl_iterator_traits<subvector_stl_reverse_iterator<_XprType,Direction> >
|
||||||
|
{
|
||||||
|
typedef _XprType XprType;
|
||||||
|
typedef subvector_stl_reverse_iterator<typename internal::remove_const<XprType>::type, Direction> non_const_iterator;
|
||||||
|
typedef subvector_stl_reverse_iterator<typename internal::add_const<XprType>::type, Direction> const_iterator;
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename XprType, DirectionType Direction>
|
||||||
|
class subvector_stl_reverse_iterator : public indexed_based_stl_reverse_iterator_base<subvector_stl_reverse_iterator<XprType,Direction> >
|
||||||
|
{
|
||||||
|
protected:
|
||||||
|
|
||||||
|
enum { is_lvalue = internal::is_lvalue<XprType>::value };
|
||||||
|
|
||||||
|
typedef indexed_based_stl_reverse_iterator_base<subvector_stl_reverse_iterator> Base;
|
||||||
|
using Base::m_index;
|
||||||
|
using Base::mp_xpr;
|
||||||
|
|
||||||
|
typedef typename internal::conditional<Direction==Vertical,typename XprType::ColXpr,typename XprType::RowXpr>::type SubVectorType;
|
||||||
|
typedef typename internal::conditional<Direction==Vertical,typename XprType::ConstColXpr,typename XprType::ConstRowXpr>::type ConstSubVectorType;
|
||||||
|
|
||||||
|
|
||||||
|
public:
|
||||||
|
typedef typename internal::conditional<bool(is_lvalue), SubVectorType, ConstSubVectorType>::type reference;
|
||||||
|
typedef typename reference::PlainObject value_type;
|
||||||
|
|
||||||
|
private:
|
||||||
|
class subvector_stl_reverse_iterator_ptr
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
subvector_stl_reverse_iterator_ptr(const reference &subvector) : m_subvector(subvector) {}
|
||||||
|
reference* operator->() { return &m_subvector; }
|
||||||
|
private:
|
||||||
|
reference m_subvector;
|
||||||
|
};
|
||||||
|
public:
|
||||||
|
|
||||||
|
typedef subvector_stl_reverse_iterator_ptr pointer;
|
||||||
|
|
||||||
|
subvector_stl_reverse_iterator() : Base() {}
|
||||||
|
subvector_stl_reverse_iterator(XprType& xpr, Index index) : Base(xpr,index) {}
|
||||||
|
|
||||||
|
reference operator*() const { return (*mp_xpr).template subVector<Direction>(m_index); }
|
||||||
|
reference operator[](Index i) const { return (*mp_xpr).template subVector<Direction>(m_index+i); }
|
||||||
|
pointer operator->() const { return (*mp_xpr).template subVector<Direction>(m_index); }
|
||||||
|
};
|
||||||
|
|
||||||
} // namespace internal
|
} // namespace internal
|
||||||
|
|
||||||
|
|
||||||
@@ -329,3 +459,5 @@ inline typename DenseBase<Derived>::const_iterator DenseBase<Derived>::cend() co
|
|||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Eigen
|
} // namespace Eigen
|
||||||
|
|
||||||
|
#endif // EIGEN_STLITERATORS_H
|
||||||
|
|||||||
@@ -38,6 +38,14 @@ namespace Eigen {
|
|||||||
* \include Map_general_stride.cpp
|
* \include Map_general_stride.cpp
|
||||||
* Output: \verbinclude Map_general_stride.out
|
* Output: \verbinclude Map_general_stride.out
|
||||||
*
|
*
|
||||||
|
* Both strides can be negative. However, a negative stride of -1 cannot be specified at compile time
|
||||||
|
* because of the ambiguity with Dynamic which is defined to -1 (historically, negative strides were
|
||||||
|
* not allowed).
|
||||||
|
*
|
||||||
|
* Note that for compile-time vectors (ColsAtCompileTime==1 or RowsAtCompile==1),
|
||||||
|
* the inner stride is the pointer increment between two consecutive elements,
|
||||||
|
* regardless of storage layout.
|
||||||
|
*
|
||||||
* \sa class InnerStride, class OuterStride, \ref TopicStorageOrders
|
* \sa class InnerStride, class OuterStride, \ref TopicStorageOrders
|
||||||
*/
|
*/
|
||||||
template<int _OuterStrideAtCompileTime, int _InnerStrideAtCompileTime>
|
template<int _OuterStrideAtCompileTime, int _InnerStrideAtCompileTime>
|
||||||
@@ -55,6 +63,8 @@ class Stride
|
|||||||
Stride()
|
Stride()
|
||||||
: m_outer(OuterStrideAtCompileTime), m_inner(InnerStrideAtCompileTime)
|
: m_outer(OuterStrideAtCompileTime), m_inner(InnerStrideAtCompileTime)
|
||||||
{
|
{
|
||||||
|
// FIXME: for Eigen 4 we should use DynamicIndex instead of Dynamic.
|
||||||
|
// FIXME: for Eigen 4 we should also unify this API with fix<>
|
||||||
eigen_assert(InnerStrideAtCompileTime != Dynamic && OuterStrideAtCompileTime != Dynamic);
|
eigen_assert(InnerStrideAtCompileTime != Dynamic && OuterStrideAtCompileTime != Dynamic);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -63,7 +73,6 @@ class Stride
|
|||||||
Stride(Index outerStride, Index innerStride)
|
Stride(Index outerStride, Index innerStride)
|
||||||
: m_outer(outerStride), m_inner(innerStride)
|
: m_outer(outerStride), m_inner(innerStride)
|
||||||
{
|
{
|
||||||
eigen_assert(innerStride>=0 && outerStride>=0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Copy constructor */
|
/** Copy constructor */
|
||||||
@@ -73,10 +82,10 @@ class Stride
|
|||||||
{}
|
{}
|
||||||
|
|
||||||
/** \returns the outer stride */
|
/** \returns the outer stride */
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
inline Index outer() const { return m_outer.value(); }
|
inline Index outer() const { return m_outer.value(); }
|
||||||
/** \returns the inner stride */
|
/** \returns the inner stride */
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
inline Index inner() const { return m_inner.value(); }
|
inline Index inner() const { return m_inner.value(); }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|||||||
@@ -65,10 +65,10 @@ template<typename MatrixType> class Transpose
|
|||||||
|
|
||||||
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Transpose)
|
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Transpose)
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
|
||||||
Index rows() const { return m_matrix.cols(); }
|
Index rows() const EIGEN_NOEXCEPT { return m_matrix.cols(); }
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
|
||||||
Index cols() const { return m_matrix.rows(); }
|
Index cols() const EIGEN_NOEXCEPT { return m_matrix.rows(); }
|
||||||
|
|
||||||
/** \returns the nested expression */
|
/** \returns the nested expression */
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
@@ -153,6 +153,8 @@ template<typename MatrixType> class TransposeImpl<MatrixType,Dense>
|
|||||||
{
|
{
|
||||||
return derived().nestedExpression().coeffRef(index);
|
return derived().nestedExpression().coeffRef(index);
|
||||||
}
|
}
|
||||||
|
protected:
|
||||||
|
EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(TransposeImpl)
|
||||||
};
|
};
|
||||||
|
|
||||||
/** \returns an expression of the transpose of *this.
|
/** \returns an expression of the transpose of *this.
|
||||||
@@ -176,7 +178,7 @@ template<typename MatrixType> class TransposeImpl<MatrixType,Dense>
|
|||||||
* \sa transposeInPlace(), adjoint() */
|
* \sa transposeInPlace(), adjoint() */
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
Transpose<Derived>
|
typename DenseBase<Derived>::TransposeReturnType
|
||||||
DenseBase<Derived>::transpose()
|
DenseBase<Derived>::transpose()
|
||||||
{
|
{
|
||||||
return TransposeReturnType(derived());
|
return TransposeReturnType(derived());
|
||||||
@@ -189,7 +191,7 @@ DenseBase<Derived>::transpose()
|
|||||||
* \sa transposeInPlace(), adjoint() */
|
* \sa transposeInPlace(), adjoint() */
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
typename DenseBase<Derived>::ConstTransposeReturnType
|
const typename DenseBase<Derived>::ConstTransposeReturnType
|
||||||
DenseBase<Derived>::transpose() const
|
DenseBase<Derived>::transpose() const
|
||||||
{
|
{
|
||||||
return ConstTransposeReturnType(derived());
|
return ConstTransposeReturnType(derived());
|
||||||
@@ -241,7 +243,6 @@ struct inplace_transpose_selector<MatrixType,true,false> { // square matrix
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// TODO: vectorized path is currently limited to LargestPacketSize x LargestPacketSize cases only.
|
|
||||||
template<typename MatrixType>
|
template<typename MatrixType>
|
||||||
struct inplace_transpose_selector<MatrixType,true,true> { // PacketSize x PacketSize
|
struct inplace_transpose_selector<MatrixType,true,true> { // PacketSize x PacketSize
|
||||||
static void run(MatrixType& m) {
|
static void run(MatrixType& m) {
|
||||||
@@ -258,16 +259,66 @@ struct inplace_transpose_selector<MatrixType,true,true> { // PacketSize x Packet
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
template <typename MatrixType, Index Alignment>
|
||||||
|
void BlockedInPlaceTranspose(MatrixType& m) {
|
||||||
|
typedef typename MatrixType::Scalar Scalar;
|
||||||
|
typedef typename internal::packet_traits<typename MatrixType::Scalar>::type Packet;
|
||||||
|
const Index PacketSize = internal::packet_traits<Scalar>::size;
|
||||||
|
eigen_assert(m.rows() == m.cols());
|
||||||
|
int row_start = 0;
|
||||||
|
for (; row_start + PacketSize <= m.rows(); row_start += PacketSize) {
|
||||||
|
for (int col_start = row_start; col_start + PacketSize <= m.cols(); col_start += PacketSize) {
|
||||||
|
PacketBlock<Packet> A;
|
||||||
|
if (row_start == col_start) {
|
||||||
|
for (Index i=0; i<PacketSize; ++i)
|
||||||
|
A.packet[i] = m.template packetByOuterInner<Alignment>(row_start + i,col_start);
|
||||||
|
internal::ptranspose(A);
|
||||||
|
for (Index i=0; i<PacketSize; ++i)
|
||||||
|
m.template writePacket<Alignment>(m.rowIndexByOuterInner(row_start + i, col_start), m.colIndexByOuterInner(row_start + i,col_start), A.packet[i]);
|
||||||
|
} else {
|
||||||
|
PacketBlock<Packet> B;
|
||||||
|
for (Index i=0; i<PacketSize; ++i) {
|
||||||
|
A.packet[i] = m.template packetByOuterInner<Alignment>(row_start + i,col_start);
|
||||||
|
B.packet[i] = m.template packetByOuterInner<Alignment>(col_start + i, row_start);
|
||||||
|
}
|
||||||
|
internal::ptranspose(A);
|
||||||
|
internal::ptranspose(B);
|
||||||
|
for (Index i=0; i<PacketSize; ++i) {
|
||||||
|
m.template writePacket<Alignment>(m.rowIndexByOuterInner(row_start + i, col_start), m.colIndexByOuterInner(row_start + i,col_start), B.packet[i]);
|
||||||
|
m.template writePacket<Alignment>(m.rowIndexByOuterInner(col_start + i, row_start), m.colIndexByOuterInner(col_start + i,row_start), A.packet[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (Index row = row_start; row < m.rows(); ++row) {
|
||||||
|
m.matrix().row(row).head(row).swap(
|
||||||
|
m.matrix().col(row).head(row).transpose());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template<typename MatrixType,bool MatchPacketSize>
|
template<typename MatrixType,bool MatchPacketSize>
|
||||||
struct inplace_transpose_selector<MatrixType,false,MatchPacketSize> { // non square matrix
|
struct inplace_transpose_selector<MatrixType,false,MatchPacketSize> { // non square or dynamic matrix
|
||||||
static void run(MatrixType& m) {
|
static void run(MatrixType& m) {
|
||||||
if (m.rows()==m.cols())
|
typedef typename MatrixType::Scalar Scalar;
|
||||||
m.matrix().template triangularView<StrictlyUpper>().swap(m.matrix().transpose().template triangularView<StrictlyUpper>());
|
if (m.rows() == m.cols()) {
|
||||||
|
const Index PacketSize = internal::packet_traits<Scalar>::size;
|
||||||
|
if (!NumTraits<Scalar>::IsComplex && m.rows() >= PacketSize) {
|
||||||
|
if ((m.rows() % PacketSize) == 0)
|
||||||
|
BlockedInPlaceTranspose<MatrixType,internal::evaluator<MatrixType>::Alignment>(m);
|
||||||
else
|
else
|
||||||
|
BlockedInPlaceTranspose<MatrixType,Unaligned>(m);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
m.matrix().template triangularView<StrictlyUpper>().swap(m.matrix().transpose().template triangularView<StrictlyUpper>());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
m = m.transpose().eval();
|
m = m.transpose().eval();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
/** This is the "in place" version of transpose(): it replaces \c *this by its own transpose.
|
/** This is the "in place" version of transpose(): it replaces \c *this by its own transpose.
|
||||||
|
|||||||
@@ -23,7 +23,9 @@ class TranspositionsBase
|
|||||||
typedef typename IndicesType::Scalar StorageIndex;
|
typedef typename IndicesType::Scalar StorageIndex;
|
||||||
typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
|
typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
Derived& derived() { return *static_cast<Derived*>(this); }
|
Derived& derived() { return *static_cast<Derived*>(this); }
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
const Derived& derived() const { return *static_cast<const Derived*>(this); }
|
const Derived& derived() const { return *static_cast<const Derived*>(this); }
|
||||||
|
|
||||||
/** Copies the \a other transpositions into \c *this */
|
/** Copies the \a other transpositions into \c *this */
|
||||||
@@ -35,13 +37,17 @@ class TranspositionsBase
|
|||||||
}
|
}
|
||||||
|
|
||||||
/** \returns the number of transpositions */
|
/** \returns the number of transpositions */
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
Index size() const { return indices().size(); }
|
Index size() const { return indices().size(); }
|
||||||
/** \returns the number of rows of the equivalent permutation matrix */
|
/** \returns the number of rows of the equivalent permutation matrix */
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
Index rows() const { return indices().size(); }
|
Index rows() const { return indices().size(); }
|
||||||
/** \returns the number of columns of the equivalent permutation matrix */
|
/** \returns the number of columns of the equivalent permutation matrix */
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
Index cols() const { return indices().size(); }
|
Index cols() const { return indices().size(); }
|
||||||
|
|
||||||
/** Direct access to the underlying index vector */
|
/** Direct access to the underlying index vector */
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
inline const StorageIndex& coeff(Index i) const { return indices().coeff(i); }
|
inline const StorageIndex& coeff(Index i) const { return indices().coeff(i); }
|
||||||
/** Direct access to the underlying index vector */
|
/** Direct access to the underlying index vector */
|
||||||
inline StorageIndex& coeffRef(Index i) { return indices().coeffRef(i); }
|
inline StorageIndex& coeffRef(Index i) { return indices().coeffRef(i); }
|
||||||
@@ -55,8 +61,10 @@ class TranspositionsBase
|
|||||||
inline StorageIndex& operator[](Index i) { return indices()(i); }
|
inline StorageIndex& operator[](Index i) { return indices()(i); }
|
||||||
|
|
||||||
/** const version of indices(). */
|
/** const version of indices(). */
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
const IndicesType& indices() const { return derived().indices(); }
|
const IndicesType& indices() const { return derived().indices(); }
|
||||||
/** \returns a reference to the stored array representing the transpositions. */
|
/** \returns a reference to the stored array representing the transpositions. */
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
IndicesType& indices() { return derived().indices(); }
|
IndicesType& indices() { return derived().indices(); }
|
||||||
|
|
||||||
/** Resizes to given size. */
|
/** Resizes to given size. */
|
||||||
@@ -178,8 +186,10 @@ class Transpositions : public TranspositionsBase<Transpositions<SizeAtCompileTim
|
|||||||
{}
|
{}
|
||||||
|
|
||||||
/** const version of indices(). */
|
/** const version of indices(). */
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
const IndicesType& indices() const { return m_indices; }
|
const IndicesType& indices() const { return m_indices; }
|
||||||
/** \returns a reference to the stored array representing the transpositions. */
|
/** \returns a reference to the stored array representing the transpositions. */
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
IndicesType& indices() { return m_indices; }
|
IndicesType& indices() { return m_indices; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
@@ -237,9 +247,11 @@ class Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex>,P
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
/** const version of indices(). */
|
/** const version of indices(). */
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
const IndicesType& indices() const { return m_indices; }
|
const IndicesType& indices() const { return m_indices; }
|
||||||
|
|
||||||
/** \returns a reference to the stored array representing the transpositions. */
|
/** \returns a reference to the stored array representing the transpositions. */
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
IndicesType& indices() { return m_indices; }
|
IndicesType& indices() { return m_indices; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
@@ -279,9 +291,11 @@ class TranspositionsWrapper
|
|||||||
}
|
}
|
||||||
|
|
||||||
/** const version of indices(). */
|
/** const version of indices(). */
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
const IndicesType& indices() const { return m_indices; }
|
const IndicesType& indices() const { return m_indices; }
|
||||||
|
|
||||||
/** \returns a reference to the stored array representing the transpositions. */
|
/** \returns a reference to the stored array representing the transpositions. */
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
IndicesType& indices() { return m_indices; }
|
IndicesType& indices() { return m_indices; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
@@ -335,9 +349,12 @@ class Transpose<TranspositionsBase<TranspositionsDerived> >
|
|||||||
|
|
||||||
explicit Transpose(const TranspositionType& t) : m_transpositions(t) {}
|
explicit Transpose(const TranspositionType& t) : m_transpositions(t) {}
|
||||||
|
|
||||||
Index size() const { return m_transpositions.size(); }
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
Index rows() const { return m_transpositions.size(); }
|
Index size() const EIGEN_NOEXCEPT { return m_transpositions.size(); }
|
||||||
Index cols() const { return m_transpositions.size(); }
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
|
Index rows() const EIGEN_NOEXCEPT { return m_transpositions.size(); }
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
|
Index cols() const EIGEN_NOEXCEPT { return m_transpositions.size(); }
|
||||||
|
|
||||||
/** \returns the \a matrix with the inverse transpositions applied to the columns.
|
/** \returns the \a matrix with the inverse transpositions applied to the columns.
|
||||||
*/
|
*/
|
||||||
@@ -357,6 +374,7 @@ class Transpose<TranspositionsBase<TranspositionsDerived> >
|
|||||||
return Product<Transpose, OtherDerived, AliasFreeProduct>(*this, matrix.derived());
|
return Product<Transpose, OtherDerived, AliasFreeProduct>(*this, matrix.derived());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
const TranspositionType& nestedExpression() const { return m_transpositions; }
|
const TranspositionType& nestedExpression() const { return m_transpositions; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|||||||
@@ -53,16 +53,16 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
|
|||||||
typedef Derived const& Nested;
|
typedef Derived const& Nested;
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
inline TriangularBase() { eigen_assert(!((Mode&UnitDiag) && (Mode&ZeroDiag))); }
|
inline TriangularBase() { eigen_assert(!((int(Mode) & int(UnitDiag)) && (int(Mode) & int(ZeroDiag)))); }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
inline Index rows() const { return derived().rows(); }
|
inline Index rows() const EIGEN_NOEXCEPT { return derived().rows(); }
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
inline Index cols() const { return derived().cols(); }
|
inline Index cols() const EIGEN_NOEXCEPT { return derived().cols(); }
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
inline Index outerStride() const { return derived().outerStride(); }
|
inline Index outerStride() const EIGEN_NOEXCEPT { return derived().outerStride(); }
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
inline Index innerStride() const { return derived().innerStride(); }
|
inline Index innerStride() const EIGEN_NOEXCEPT { return derived().innerStride(); }
|
||||||
|
|
||||||
// dummy resize function
|
// dummy resize function
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
@@ -100,12 +100,10 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
|
|||||||
return coeffRef(row,col);
|
return coeffRef(row,col);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
inline const Derived& derived() const { return *static_cast<const Derived*>(this); }
|
inline const Derived& derived() const { return *static_cast<const Derived*>(this); }
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
inline Derived& derived() { return *static_cast<Derived*>(this); }
|
inline Derived& derived() { return *static_cast<Derived*>(this); }
|
||||||
#endif // not EIGEN_PARSED_BY_DOXYGEN
|
|
||||||
|
|
||||||
template<typename DenseDerived>
|
template<typename DenseDerived>
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
@@ -219,16 +217,14 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
|
|||||||
explicit inline TriangularView(MatrixType& matrix) : m_matrix(matrix)
|
explicit inline TriangularView(MatrixType& matrix) : m_matrix(matrix)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
using Base::operator=;
|
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(TriangularView)
|
||||||
TriangularView& operator=(const TriangularView &other)
|
|
||||||
{ return Base::operator=(other); }
|
|
||||||
|
|
||||||
/** \copydoc EigenBase::rows() */
|
/** \copydoc EigenBase::rows() */
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
inline Index rows() const { return m_matrix.rows(); }
|
inline Index rows() const EIGEN_NOEXCEPT { return m_matrix.rows(); }
|
||||||
/** \copydoc EigenBase::cols() */
|
/** \copydoc EigenBase::cols() */
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
inline Index cols() const { return m_matrix.cols(); }
|
inline Index cols() const EIGEN_NOEXCEPT { return m_matrix.cols(); }
|
||||||
|
|
||||||
/** \returns a const reference to the nested expression */
|
/** \returns a const reference to the nested expression */
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
@@ -444,7 +440,6 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_Mat
|
|||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
TriangularViewType& operator=(const MatrixBase<OtherDerived>& other);
|
TriangularViewType& operator=(const MatrixBase<OtherDerived>& other);
|
||||||
|
|
||||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
TriangularViewType& operator=(const TriangularViewImpl& other)
|
TriangularViewType& operator=(const TriangularViewImpl& other)
|
||||||
{ return *this = other.derived().nestedExpression(); }
|
{ return *this = other.derived().nestedExpression(); }
|
||||||
@@ -458,7 +453,6 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_Mat
|
|||||||
/** \deprecated */
|
/** \deprecated */
|
||||||
EIGEN_DEPRECATED EIGEN_DEVICE_FUNC
|
EIGEN_DEPRECATED EIGEN_DEVICE_FUNC
|
||||||
void lazyAssign(const MatrixBase<OtherDerived>& other);
|
void lazyAssign(const MatrixBase<OtherDerived>& other);
|
||||||
#endif
|
|
||||||
|
|
||||||
/** Efficient triangular matrix times vector/matrix product */
|
/** Efficient triangular matrix times vector/matrix product */
|
||||||
template<typename OtherDerived>
|
template<typename OtherDerived>
|
||||||
@@ -526,11 +520,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_Mat
|
|||||||
/** Swaps the coefficients of the common triangular parts of two matrices */
|
/** Swaps the coefficients of the common triangular parts of two matrices */
|
||||||
template<typename OtherDerived>
|
template<typename OtherDerived>
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
#ifdef EIGEN_PARSED_BY_DOXYGEN
|
|
||||||
void swap(TriangularBase<OtherDerived> &other)
|
|
||||||
#else
|
|
||||||
void swap(TriangularBase<OtherDerived> const & other)
|
void swap(TriangularBase<OtherDerived> const & other)
|
||||||
#endif
|
|
||||||
{
|
{
|
||||||
EIGEN_STATIC_ASSERT_LVALUE(OtherDerived);
|
EIGEN_STATIC_ASSERT_LVALUE(OtherDerived);
|
||||||
call_assignment(derived(), other.const_cast_derived(), internal::swap_assign_op<Scalar>());
|
call_assignment(derived(), other.const_cast_derived(), internal::swap_assign_op<Scalar>());
|
||||||
@@ -555,8 +545,13 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_Mat
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename ProductType>
|
template <typename ProductType>
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TriangularViewType& _assignProduct(const ProductType& prod, const Scalar& alpha,
|
||||||
EIGEN_STRONG_INLINE TriangularViewType& _assignProduct(const ProductType& prod, const Scalar& alpha, bool beta);
|
bool beta);
|
||||||
|
|
||||||
|
protected:
|
||||||
|
EIGEN_DEFAULT_COPY_CONSTRUCTOR(TriangularViewImpl)
|
||||||
|
EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(TriangularViewImpl)
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/***************************************************************************
|
/***************************************************************************
|
||||||
@@ -817,7 +812,7 @@ void call_triangular_assignment_loop(DstXprType& dst, const SrcXprType& src, con
|
|||||||
enum {
|
enum {
|
||||||
unroll = DstXprType::SizeAtCompileTime != Dynamic
|
unroll = DstXprType::SizeAtCompileTime != Dynamic
|
||||||
&& SrcEvaluatorType::CoeffReadCost < HugeCost
|
&& SrcEvaluatorType::CoeffReadCost < HugeCost
|
||||||
&& DstXprType::SizeAtCompileTime * (DstEvaluatorType::CoeffReadCost+SrcEvaluatorType::CoeffReadCost) / 2 <= EIGEN_UNROLLING_LIMIT
|
&& DstXprType::SizeAtCompileTime * (int(DstEvaluatorType::CoeffReadCost) + int(SrcEvaluatorType::CoeffReadCost)) / 2 <= EIGEN_UNROLLING_LIMIT
|
||||||
};
|
};
|
||||||
|
|
||||||
triangular_assignment_loop<Kernel, Mode, unroll ? int(DstXprType::SizeAtCompileTime) : Dynamic, SetOpposite>::run(kernel);
|
triangular_assignment_loop<Kernel, Mode, unroll ? int(DstXprType::SizeAtCompileTime) : Dynamic, SetOpposite>::run(kernel);
|
||||||
@@ -851,7 +846,7 @@ struct Assignment<DstXprType, SrcXprType, Functor, Triangular2Dense>
|
|||||||
{
|
{
|
||||||
EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
|
EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
|
||||||
{
|
{
|
||||||
call_triangular_assignment_loop<SrcXprType::Mode, (SrcXprType::Mode&SelfAdjoint)==0>(dst, src, func);
|
call_triangular_assignment_loop<SrcXprType::Mode, (int(SrcXprType::Mode) & int(SelfAdjoint)) == 0>(dst, src, func);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -949,7 +944,7 @@ template<typename DenseDerived>
|
|||||||
EIGEN_DEVICE_FUNC void TriangularBase<Derived>::evalToLazy(MatrixBase<DenseDerived> &other) const
|
EIGEN_DEVICE_FUNC void TriangularBase<Derived>::evalToLazy(MatrixBase<DenseDerived> &other) const
|
||||||
{
|
{
|
||||||
other.derived().resize(this->rows(), this->cols());
|
other.derived().resize(this->rows(), this->cols());
|
||||||
internal::call_triangular_assignment_loop<Derived::Mode,(Derived::Mode&SelfAdjoint)==0 /* SetOpposite */>(other.derived(), derived().nestedExpression());
|
internal::call_triangular_assignment_loop<Derived::Mode, (int(Derived::Mode) & int(SelfAdjoint)) == 0 /* SetOpposite */>(other.derived(), derived().nestedExpression());
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace internal {
|
namespace internal {
|
||||||
@@ -966,7 +961,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::assign_
|
|||||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||||
dst.resize(dstRows, dstCols);
|
dst.resize(dstRows, dstCols);
|
||||||
|
|
||||||
dst._assignProduct(src, 1, 0);
|
dst._assignProduct(src, Scalar(1), false);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -977,7 +972,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::add_ass
|
|||||||
typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
|
typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
|
||||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,typename SrcXprType::Scalar> &)
|
static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,typename SrcXprType::Scalar> &)
|
||||||
{
|
{
|
||||||
dst._assignProduct(src, 1, 1);
|
dst._assignProduct(src, Scalar(1), true);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -988,7 +983,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::sub_ass
|
|||||||
typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
|
typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
|
||||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,typename SrcXprType::Scalar> &)
|
static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,typename SrcXprType::Scalar> &)
|
||||||
{
|
{
|
||||||
dst._assignProduct(src, -1, 1);
|
dst._assignProduct(src, Scalar(-1), true);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -65,10 +65,10 @@ class PartialReduxExpr : public internal::dense_xpr_base< PartialReduxExpr<Matri
|
|||||||
explicit PartialReduxExpr(const MatrixType& mat, const MemberOp& func = MemberOp())
|
explicit PartialReduxExpr(const MatrixType& mat, const MemberOp& func = MemberOp())
|
||||||
: m_matrix(mat), m_functor(func) {}
|
: m_matrix(mat), m_functor(func) {}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
Index rows() const { return (Direction==Vertical ? 1 : m_matrix.rows()); }
|
Index rows() const EIGEN_NOEXCEPT { return (Direction==Vertical ? 1 : m_matrix.rows()); }
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||||
Index cols() const { return (Direction==Horizontal ? 1 : m_matrix.cols()); }
|
Index cols() const EIGEN_NOEXCEPT { return (Direction==Horizontal ? 1 : m_matrix.cols()); }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
typename MatrixType::Nested nestedExpression() const { return m_matrix; }
|
typename MatrixType::Nested nestedExpression() const { return m_matrix; }
|
||||||
@@ -281,6 +281,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
|||||||
#else
|
#else
|
||||||
typedef internal::subvector_stl_iterator<ExpressionType, DirectionType(Direction)> iterator;
|
typedef internal::subvector_stl_iterator<ExpressionType, DirectionType(Direction)> iterator;
|
||||||
typedef internal::subvector_stl_iterator<const ExpressionType, DirectionType(Direction)> const_iterator;
|
typedef internal::subvector_stl_iterator<const ExpressionType, DirectionType(Direction)> const_iterator;
|
||||||
|
typedef internal::subvector_stl_reverse_iterator<ExpressionType, DirectionType(Direction)> reverse_iterator;
|
||||||
|
typedef internal::subvector_stl_reverse_iterator<const ExpressionType, DirectionType(Direction)> const_reverse_iterator;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/** returns an iterator to the first row (rowwise) or column (colwise) of the nested expression.
|
/** returns an iterator to the first row (rowwise) or column (colwise) of the nested expression.
|
||||||
@@ -292,6 +294,15 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
|||||||
/** const version of begin() */
|
/** const version of begin() */
|
||||||
const_iterator cbegin() const { return const_iterator(m_matrix, 0); }
|
const_iterator cbegin() const { return const_iterator(m_matrix, 0); }
|
||||||
|
|
||||||
|
/** returns a reverse iterator to the last row (rowwise) or column (colwise) of the nested expression.
|
||||||
|
* \sa rend(), crbegin()
|
||||||
|
*/
|
||||||
|
reverse_iterator rbegin() { return reverse_iterator (m_matrix, m_matrix.template subVectors<DirectionType(Direction)>()-1); }
|
||||||
|
/** const version of rbegin() */
|
||||||
|
const_reverse_iterator rbegin() const { return const_reverse_iterator (m_matrix, m_matrix.template subVectors<DirectionType(Direction)>()-1); }
|
||||||
|
/** const version of rbegin() */
|
||||||
|
const_reverse_iterator crbegin() const { return const_reverse_iterator (m_matrix, m_matrix.template subVectors<DirectionType(Direction)>()-1); }
|
||||||
|
|
||||||
/** returns an iterator to the row (resp. column) following the last row (resp. column) of the nested expression
|
/** returns an iterator to the row (resp. column) following the last row (resp. column) of the nested expression
|
||||||
* \sa begin(), cend()
|
* \sa begin(), cend()
|
||||||
*/
|
*/
|
||||||
@@ -301,6 +312,15 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
|||||||
/** const version of end() */
|
/** const version of end() */
|
||||||
const_iterator cend() const { return const_iterator(m_matrix, m_matrix.template subVectors<DirectionType(Direction)>()); }
|
const_iterator cend() const { return const_iterator(m_matrix, m_matrix.template subVectors<DirectionType(Direction)>()); }
|
||||||
|
|
||||||
|
/** returns a reverse iterator to the row (resp. column) before the first row (resp. column) of the nested expression
|
||||||
|
* \sa begin(), cend()
|
||||||
|
*/
|
||||||
|
reverse_iterator rend() { return reverse_iterator (m_matrix, -1); }
|
||||||
|
/** const version of rend() */
|
||||||
|
const_reverse_iterator rend() const { return const_reverse_iterator (m_matrix, -1); }
|
||||||
|
/** const version of rend() */
|
||||||
|
const_reverse_iterator crend() const { return const_reverse_iterator (m_matrix, -1); }
|
||||||
|
|
||||||
/** \returns a row or column vector expression of \c *this reduxed by \a func
|
/** \returns a row or column vector expression of \c *this reduxed by \a func
|
||||||
*
|
*
|
||||||
* The template parameter \a BinaryOp is the type of the functor
|
* The template parameter \a BinaryOp is the type of the functor
|
||||||
@@ -719,6 +739,10 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
|||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
const HNormalizedReturnType hnormalized() const;
|
const HNormalizedReturnType hnormalized() const;
|
||||||
|
|
||||||
|
# ifdef EIGEN_VECTORWISEOP_PLUGIN
|
||||||
|
# include EIGEN_VECTORWISEOP_PLUGIN
|
||||||
|
# endif
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
Index redux_length() const
|
Index redux_length() const
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -79,9 +79,9 @@ public:
|
|||||||
CoeffReadCost = internal::evaluator<XprType>::CoeffReadCost
|
CoeffReadCost = internal::evaluator<XprType>::CoeffReadCost
|
||||||
};
|
};
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); }
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_xpr.rows(); }
|
||||||
EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); }
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_xpr.cols(); }
|
||||||
EIGEN_DEVICE_FUNC Index size() const { return m_xpr.size(); }
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index size() const EIGEN_NOEXCEPT { return m_xpr.size(); }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
|
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
|
||||||
{ return m_evaluator.coeff(row, col); }
|
{ return m_evaluator.coeff(row, col); }
|
||||||
@@ -124,7 +124,7 @@ void DenseBase<Derived>::visit(Visitor& visitor) const
|
|||||||
|
|
||||||
enum {
|
enum {
|
||||||
unroll = SizeAtCompileTime != Dynamic
|
unroll = SizeAtCompileTime != Dynamic
|
||||||
&& SizeAtCompileTime * ThisEvaluator::CoeffReadCost + (SizeAtCompileTime-1) * internal::functor_traits<Visitor>::Cost <= EIGEN_UNROLLING_LIMIT
|
&& SizeAtCompileTime * int(ThisEvaluator::CoeffReadCost) + (SizeAtCompileTime-1) * int(internal::functor_traits<Visitor>::Cost) <= EIGEN_UNROLLING_LIMIT
|
||||||
};
|
};
|
||||||
return internal::visitor_impl<Visitor, ThisEvaluator, unroll ? int(SizeAtCompileTime) : Dynamic>::run(thisEval, visitor);
|
return internal::visitor_impl<Visitor, ThisEvaluator, unroll ? int(SizeAtCompileTime) : Dynamic>::run(thisEval, visitor);
|
||||||
}
|
}
|
||||||
@@ -157,7 +157,7 @@ struct coeff_visitor
|
|||||||
*
|
*
|
||||||
* \sa DenseBase::minCoeff(Index*, Index*)
|
* \sa DenseBase::minCoeff(Index*, Index*)
|
||||||
*/
|
*/
|
||||||
template <typename Derived>
|
template <typename Derived, int NaNPropagation>
|
||||||
struct min_coeff_visitor : coeff_visitor<Derived>
|
struct min_coeff_visitor : coeff_visitor<Derived>
|
||||||
{
|
{
|
||||||
typedef typename Derived::Scalar Scalar;
|
typedef typename Derived::Scalar Scalar;
|
||||||
@@ -173,8 +173,40 @@ struct min_coeff_visitor : coeff_visitor<Derived>
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename Scalar>
|
template <typename Derived>
|
||||||
struct functor_traits<min_coeff_visitor<Scalar> > {
|
struct min_coeff_visitor<Derived, PropagateNumbers> : coeff_visitor<Derived>
|
||||||
|
{
|
||||||
|
typedef typename Derived::Scalar Scalar;
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
void operator() (const Scalar& value, Index i, Index j)
|
||||||
|
{
|
||||||
|
if((numext::isnan)(this->res) || (!(numext::isnan)(value) && value < this->res))
|
||||||
|
{
|
||||||
|
this->res = value;
|
||||||
|
this->row = i;
|
||||||
|
this->col = j;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Derived>
|
||||||
|
struct min_coeff_visitor<Derived, PropagateNaN> : coeff_visitor<Derived>
|
||||||
|
{
|
||||||
|
typedef typename Derived::Scalar Scalar;
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
void operator() (const Scalar& value, Index i, Index j)
|
||||||
|
{
|
||||||
|
if((numext::isnan)(value) || value < this->res)
|
||||||
|
{
|
||||||
|
this->res = value;
|
||||||
|
this->row = i;
|
||||||
|
this->col = j;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename Scalar, int NaNPropagation>
|
||||||
|
struct functor_traits<min_coeff_visitor<Scalar, NaNPropagation> > {
|
||||||
enum {
|
enum {
|
||||||
Cost = NumTraits<Scalar>::AddCost
|
Cost = NumTraits<Scalar>::AddCost
|
||||||
};
|
};
|
||||||
@@ -185,7 +217,7 @@ struct functor_traits<min_coeff_visitor<Scalar> > {
|
|||||||
*
|
*
|
||||||
* \sa DenseBase::maxCoeff(Index*, Index*)
|
* \sa DenseBase::maxCoeff(Index*, Index*)
|
||||||
*/
|
*/
|
||||||
template <typename Derived>
|
template <typename Derived, int NaNPropagation>
|
||||||
struct max_coeff_visitor : coeff_visitor<Derived>
|
struct max_coeff_visitor : coeff_visitor<Derived>
|
||||||
{
|
{
|
||||||
typedef typename Derived::Scalar Scalar;
|
typedef typename Derived::Scalar Scalar;
|
||||||
@@ -201,8 +233,40 @@ struct max_coeff_visitor : coeff_visitor<Derived>
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename Scalar>
|
template <typename Derived>
|
||||||
struct functor_traits<max_coeff_visitor<Scalar> > {
|
struct max_coeff_visitor<Derived, PropagateNumbers> : coeff_visitor<Derived>
|
||||||
|
{
|
||||||
|
typedef typename Derived::Scalar Scalar;
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
void operator() (const Scalar& value, Index i, Index j)
|
||||||
|
{
|
||||||
|
if((numext::isnan)(this->res) || (!(numext::isnan)(value) && value > this->res))
|
||||||
|
{
|
||||||
|
this->res = value;
|
||||||
|
this->row = i;
|
||||||
|
this->col = j;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Derived>
|
||||||
|
struct max_coeff_visitor<Derived, PropagateNaN> : coeff_visitor<Derived>
|
||||||
|
{
|
||||||
|
typedef typename Derived::Scalar Scalar;
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
void operator() (const Scalar& value, Index i, Index j)
|
||||||
|
{
|
||||||
|
if((numext::isnan)(value) || value > this->res)
|
||||||
|
{
|
||||||
|
this->res = value;
|
||||||
|
this->row = i;
|
||||||
|
this->col = j;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename Scalar, int NaNPropagation>
|
||||||
|
struct functor_traits<max_coeff_visitor<Scalar, NaNPropagation> > {
|
||||||
enum {
|
enum {
|
||||||
Cost = NumTraits<Scalar>::AddCost
|
Cost = NumTraits<Scalar>::AddCost
|
||||||
};
|
};
|
||||||
@@ -213,21 +277,23 @@ struct functor_traits<max_coeff_visitor<Scalar> > {
|
|||||||
/** \fn DenseBase<Derived>::minCoeff(IndexType* rowId, IndexType* colId) const
|
/** \fn DenseBase<Derived>::minCoeff(IndexType* rowId, IndexType* colId) const
|
||||||
* \returns the minimum of all coefficients of *this and puts in *row and *col its location.
|
* \returns the minimum of all coefficients of *this and puts in *row and *col its location.
|
||||||
*
|
*
|
||||||
|
* In case \c *this contains NaN, NaNPropagation determines the behavior:
|
||||||
|
* NaNPropagation == PropagateFast : undefined
|
||||||
|
* NaNPropagation == PropagateNaN : result is NaN
|
||||||
|
* NaNPropagation == PropagateNumbers : result is maximum of elements that are not NaN
|
||||||
* \warning the matrix must be not empty, otherwise an assertion is triggered.
|
* \warning the matrix must be not empty, otherwise an assertion is triggered.
|
||||||
*
|
*
|
||||||
* \warning the result is undefined if \c *this contains NaN.
|
|
||||||
*
|
|
||||||
* \sa DenseBase::minCoeff(Index*), DenseBase::maxCoeff(Index*,Index*), DenseBase::visit(), DenseBase::minCoeff()
|
* \sa DenseBase::minCoeff(Index*), DenseBase::maxCoeff(Index*,Index*), DenseBase::visit(), DenseBase::minCoeff()
|
||||||
*/
|
*/
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
template<typename IndexType>
|
template<int NaNPropagation, typename IndexType>
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
typename internal::traits<Derived>::Scalar
|
typename internal::traits<Derived>::Scalar
|
||||||
DenseBase<Derived>::minCoeff(IndexType* rowId, IndexType* colId) const
|
DenseBase<Derived>::minCoeff(IndexType* rowId, IndexType* colId) const
|
||||||
{
|
{
|
||||||
eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
|
eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
|
||||||
|
|
||||||
internal::min_coeff_visitor<Derived> minVisitor;
|
internal::min_coeff_visitor<Derived, NaNPropagation> minVisitor;
|
||||||
this->visit(minVisitor);
|
this->visit(minVisitor);
|
||||||
*rowId = minVisitor.row;
|
*rowId = minVisitor.row;
|
||||||
if (colId) *colId = minVisitor.col;
|
if (colId) *colId = minVisitor.col;
|
||||||
@@ -236,14 +302,16 @@ DenseBase<Derived>::minCoeff(IndexType* rowId, IndexType* colId) const
|
|||||||
|
|
||||||
/** \returns the minimum of all coefficients of *this and puts in *index its location.
|
/** \returns the minimum of all coefficients of *this and puts in *index its location.
|
||||||
*
|
*
|
||||||
|
* In case \c *this contains NaN, NaNPropagation determines the behavior:
|
||||||
|
* NaNPropagation == PropagateFast : undefined
|
||||||
|
* NaNPropagation == PropagateNaN : result is NaN
|
||||||
|
* NaNPropagation == PropagateNumbers : result is maximum of elements that are not NaN
|
||||||
* \warning the matrix must be not empty, otherwise an assertion is triggered.
|
* \warning the matrix must be not empty, otherwise an assertion is triggered.
|
||||||
*
|
*
|
||||||
* \warning the result is undefined if \c *this contains NaN.
|
|
||||||
*
|
|
||||||
* \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::visit(), DenseBase::minCoeff()
|
* \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::visit(), DenseBase::minCoeff()
|
||||||
*/
|
*/
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
template<typename IndexType>
|
template<int NaNPropagation, typename IndexType>
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
typename internal::traits<Derived>::Scalar
|
typename internal::traits<Derived>::Scalar
|
||||||
DenseBase<Derived>::minCoeff(IndexType* index) const
|
DenseBase<Derived>::minCoeff(IndexType* index) const
|
||||||
@@ -251,7 +319,7 @@ DenseBase<Derived>::minCoeff(IndexType* index) const
|
|||||||
eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
|
eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
|
||||||
|
|
||||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||||
internal::min_coeff_visitor<Derived> minVisitor;
|
internal::min_coeff_visitor<Derived, NaNPropagation> minVisitor;
|
||||||
this->visit(minVisitor);
|
this->visit(minVisitor);
|
||||||
*index = IndexType((RowsAtCompileTime==1) ? minVisitor.col : minVisitor.row);
|
*index = IndexType((RowsAtCompileTime==1) ? minVisitor.col : minVisitor.row);
|
||||||
return minVisitor.res;
|
return minVisitor.res;
|
||||||
@@ -260,21 +328,23 @@ DenseBase<Derived>::minCoeff(IndexType* index) const
|
|||||||
/** \fn DenseBase<Derived>::maxCoeff(IndexType* rowId, IndexType* colId) const
|
/** \fn DenseBase<Derived>::maxCoeff(IndexType* rowId, IndexType* colId) const
|
||||||
* \returns the maximum of all coefficients of *this and puts in *row and *col its location.
|
* \returns the maximum of all coefficients of *this and puts in *row and *col its location.
|
||||||
*
|
*
|
||||||
|
* In case \c *this contains NaN, NaNPropagation determines the behavior:
|
||||||
|
* NaNPropagation == PropagateFast : undefined
|
||||||
|
* NaNPropagation == PropagateNaN : result is NaN
|
||||||
|
* NaNPropagation == PropagateNumbers : result is maximum of elements that are not NaN
|
||||||
* \warning the matrix must be not empty, otherwise an assertion is triggered.
|
* \warning the matrix must be not empty, otherwise an assertion is triggered.
|
||||||
*
|
*
|
||||||
* \warning the result is undefined if \c *this contains NaN.
|
|
||||||
*
|
|
||||||
* \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visit(), DenseBase::maxCoeff()
|
* \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visit(), DenseBase::maxCoeff()
|
||||||
*/
|
*/
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
template<typename IndexType>
|
template<int NaNPropagation, typename IndexType>
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
typename internal::traits<Derived>::Scalar
|
typename internal::traits<Derived>::Scalar
|
||||||
DenseBase<Derived>::maxCoeff(IndexType* rowPtr, IndexType* colPtr) const
|
DenseBase<Derived>::maxCoeff(IndexType* rowPtr, IndexType* colPtr) const
|
||||||
{
|
{
|
||||||
eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
|
eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
|
||||||
|
|
||||||
internal::max_coeff_visitor<Derived> maxVisitor;
|
internal::max_coeff_visitor<Derived, NaNPropagation> maxVisitor;
|
||||||
this->visit(maxVisitor);
|
this->visit(maxVisitor);
|
||||||
*rowPtr = maxVisitor.row;
|
*rowPtr = maxVisitor.row;
|
||||||
if (colPtr) *colPtr = maxVisitor.col;
|
if (colPtr) *colPtr = maxVisitor.col;
|
||||||
@@ -283,14 +353,16 @@ DenseBase<Derived>::maxCoeff(IndexType* rowPtr, IndexType* colPtr) const
|
|||||||
|
|
||||||
/** \returns the maximum of all coefficients of *this and puts in *index its location.
|
/** \returns the maximum of all coefficients of *this and puts in *index its location.
|
||||||
*
|
*
|
||||||
|
* In case \c *this contains NaN, NaNPropagation determines the behavior:
|
||||||
|
* NaNPropagation == PropagateFast : undefined
|
||||||
|
* NaNPropagation == PropagateNaN : result is NaN
|
||||||
|
* NaNPropagation == PropagateNumbers : result is maximum of elements that are not NaN
|
||||||
* \warning the matrix must be not empty, otherwise an assertion is triggered.
|
* \warning the matrix must be not empty, otherwise an assertion is triggered.
|
||||||
*
|
*
|
||||||
* \warning the result is undefined if \c *this contains NaN.
|
|
||||||
*
|
|
||||||
* \sa DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visitor(), DenseBase::maxCoeff()
|
* \sa DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visitor(), DenseBase::maxCoeff()
|
||||||
*/
|
*/
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
template<typename IndexType>
|
template<int NaNPropagation, typename IndexType>
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
typename internal::traits<Derived>::Scalar
|
typename internal::traits<Derived>::Scalar
|
||||||
DenseBase<Derived>::maxCoeff(IndexType* index) const
|
DenseBase<Derived>::maxCoeff(IndexType* index) const
|
||||||
@@ -298,7 +370,7 @@ DenseBase<Derived>::maxCoeff(IndexType* index) const
|
|||||||
eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
|
eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
|
||||||
|
|
||||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||||
internal::max_coeff_visitor<Derived> maxVisitor;
|
internal::max_coeff_visitor<Derived, NaNPropagation> maxVisitor;
|
||||||
this->visit(maxVisitor);
|
this->visit(maxVisitor);
|
||||||
*index = (RowsAtCompileTime==1) ? maxVisitor.col : maxVisitor.row;
|
*index = (RowsAtCompileTime==1) ? maxVisitor.col : maxVisitor.row;
|
||||||
return maxVisitor.res;
|
return maxVisitor.res;
|
||||||
|
|||||||
@@ -38,6 +38,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
|||||||
HasMul = 1,
|
HasMul = 1,
|
||||||
HasDiv = 1,
|
HasDiv = 1,
|
||||||
HasNegate = 1,
|
HasNegate = 1,
|
||||||
|
HasSqrt = 1,
|
||||||
HasAbs = 0,
|
HasAbs = 0,
|
||||||
HasAbs2 = 0,
|
HasAbs2 = 0,
|
||||||
HasMin = 0,
|
HasMin = 0,
|
||||||
@@ -47,7 +48,18 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
|||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
template<> struct unpacket_traits<Packet4cf> { typedef std::complex<float> type; enum {size=4, alignment=Aligned32, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet2cf half; };
|
template<> struct unpacket_traits<Packet4cf> {
|
||||||
|
typedef std::complex<float> type;
|
||||||
|
typedef Packet2cf half;
|
||||||
|
typedef Packet8f as_real;
|
||||||
|
enum {
|
||||||
|
size=4,
|
||||||
|
alignment=Aligned32,
|
||||||
|
vectorizable=true,
|
||||||
|
masked_load_available=false,
|
||||||
|
masked_store_available=false
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4cf padd<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_add_ps(a.v,b.v)); }
|
template<> EIGEN_STRONG_INLINE Packet4cf padd<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_add_ps(a.v,b.v)); }
|
||||||
template<> EIGEN_STRONG_INLINE Packet4cf psub<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_sub_ps(a.v,b.v)); }
|
template<> EIGEN_STRONG_INLINE Packet4cf psub<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_sub_ps(a.v,b.v)); }
|
||||||
@@ -76,7 +88,6 @@ EIGEN_STRONG_INLINE Packet4cf pcmp_eq(const Packet4cf& a, const Packet4cf& b) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4cf ptrue<Packet4cf>(const Packet4cf& a) { return Packet4cf(ptrue(Packet8f(a.v))); }
|
template<> EIGEN_STRONG_INLINE Packet4cf ptrue<Packet4cf>(const Packet4cf& a) { return Packet4cf(ptrue(Packet8f(a.v))); }
|
||||||
template<> EIGEN_STRONG_INLINE Packet4cf pnot<Packet4cf>(const Packet4cf& a) { return Packet4cf(pnot(Packet8f(a.v))); }
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4cf pand <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_and_ps(a.v,b.v)); }
|
template<> EIGEN_STRONG_INLINE Packet4cf pand <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_and_ps(a.v,b.v)); }
|
||||||
template<> EIGEN_STRONG_INLINE Packet4cf por <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_or_ps(a.v,b.v)); }
|
template<> EIGEN_STRONG_INLINE Packet4cf por <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_or_ps(a.v,b.v)); }
|
||||||
template<> EIGEN_STRONG_INLINE Packet4cf pxor <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_xor_ps(a.v,b.v)); }
|
template<> EIGEN_STRONG_INLINE Packet4cf pxor <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_xor_ps(a.v,b.v)); }
|
||||||
@@ -88,7 +99,9 @@ template<> EIGEN_STRONG_INLINE Packet4cf ploadu<Packet4cf>(const std::complex<fl
|
|||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4cf pset1<Packet4cf>(const std::complex<float>& from)
|
template<> EIGEN_STRONG_INLINE Packet4cf pset1<Packet4cf>(const std::complex<float>& from)
|
||||||
{
|
{
|
||||||
return Packet4cf(_mm256_castpd_ps(_mm256_broadcast_sd((const double*)(const void*)&from)));
|
const float re = std::real(from);
|
||||||
|
const float im = std::imag(from);
|
||||||
|
return Packet4cf(_mm256_set_ps(im, re, im, re, im, re, im, re));
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4cf ploaddup<Packet4cf>(const std::complex<float>* from)
|
template<> EIGEN_STRONG_INLINE Packet4cf ploaddup<Packet4cf>(const std::complex<float>* from)
|
||||||
@@ -150,79 +163,18 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet4cf>(const Packe
|
|||||||
Packet2cf(_mm256_extractf128_ps(a.v,1))));
|
Packet2cf(_mm256_extractf128_ps(a.v,1))));
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4cf preduxp<Packet4cf>(const Packet4cf* vecs)
|
|
||||||
{
|
|
||||||
Packet8f t0 = _mm256_shuffle_ps(vecs[0].v, vecs[0].v, _MM_SHUFFLE(3, 1, 2 ,0));
|
|
||||||
Packet8f t1 = _mm256_shuffle_ps(vecs[1].v, vecs[1].v, _MM_SHUFFLE(3, 1, 2 ,0));
|
|
||||||
t0 = _mm256_hadd_ps(t0,t1);
|
|
||||||
Packet8f t2 = _mm256_shuffle_ps(vecs[2].v, vecs[2].v, _MM_SHUFFLE(3, 1, 2 ,0));
|
|
||||||
Packet8f t3 = _mm256_shuffle_ps(vecs[3].v, vecs[3].v, _MM_SHUFFLE(3, 1, 2 ,0));
|
|
||||||
t2 = _mm256_hadd_ps(t2,t3);
|
|
||||||
|
|
||||||
t1 = _mm256_permute2f128_ps(t0,t2, 0 + (2<<4));
|
|
||||||
t3 = _mm256_permute2f128_ps(t0,t2, 1 + (3<<4));
|
|
||||||
|
|
||||||
return Packet4cf(_mm256_add_ps(t1,t3));
|
|
||||||
}
|
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet4cf>(const Packet4cf& a)
|
template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet4cf>(const Packet4cf& a)
|
||||||
{
|
{
|
||||||
return predux_mul(pmul(Packet2cf(_mm256_extractf128_ps(a.v, 0)),
|
return predux_mul(pmul(Packet2cf(_mm256_extractf128_ps(a.v, 0)),
|
||||||
Packet2cf(_mm256_extractf128_ps(a.v, 1))));
|
Packet2cf(_mm256_extractf128_ps(a.v, 1))));
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int Offset>
|
|
||||||
struct palign_impl<Offset,Packet4cf>
|
|
||||||
{
|
|
||||||
static EIGEN_STRONG_INLINE void run(Packet4cf& first, const Packet4cf& second)
|
|
||||||
{
|
|
||||||
if (Offset==0) return;
|
|
||||||
palign_impl<Offset*2,Packet8f>::run(first.v, second.v);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<> struct conj_helper<Packet4cf, Packet4cf, false,true>
|
|
||||||
{
|
|
||||||
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const
|
|
||||||
{ return padd(pmul(x,y),c); }
|
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) const
|
|
||||||
{
|
|
||||||
return internal::pmul(a, pconj(b));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<> struct conj_helper<Packet4cf, Packet4cf, true,false>
|
|
||||||
{
|
|
||||||
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const
|
|
||||||
{ return padd(pmul(x,y),c); }
|
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) const
|
|
||||||
{
|
|
||||||
return internal::pmul(pconj(a), b);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<> struct conj_helper<Packet4cf, Packet4cf, true,true>
|
|
||||||
{
|
|
||||||
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const
|
|
||||||
{ return padd(pmul(x,y),c); }
|
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) const
|
|
||||||
{
|
|
||||||
return pconj(internal::pmul(a, b));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet4cf,Packet8f)
|
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet4cf,Packet8f)
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4cf pdiv<Packet4cf>(const Packet4cf& a, const Packet4cf& b)
|
template<> EIGEN_STRONG_INLINE Packet4cf pdiv<Packet4cf>(const Packet4cf& a, const Packet4cf& b)
|
||||||
{
|
{
|
||||||
Packet4cf num = pmul(a, pconj(b));
|
return pdiv_complex(a, b);
|
||||||
__m256 tmp = _mm256_mul_ps(b.v, b.v);
|
|
||||||
__m256 tmp2 = _mm256_shuffle_ps(tmp,tmp,0xB1);
|
|
||||||
__m256 denom = _mm256_add_ps(tmp, tmp2);
|
|
||||||
return Packet4cf(_mm256_div_ps(num.v, denom));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4cf pcplxflip<Packet4cf>(const Packet4cf& x)
|
template<> EIGEN_STRONG_INLINE Packet4cf pcplxflip<Packet4cf>(const Packet4cf& x)
|
||||||
@@ -254,6 +206,7 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
|
|||||||
HasMul = 1,
|
HasMul = 1,
|
||||||
HasDiv = 1,
|
HasDiv = 1,
|
||||||
HasNegate = 1,
|
HasNegate = 1,
|
||||||
|
HasSqrt = 1,
|
||||||
HasAbs = 0,
|
HasAbs = 0,
|
||||||
HasAbs2 = 0,
|
HasAbs2 = 0,
|
||||||
HasMin = 0,
|
HasMin = 0,
|
||||||
@@ -263,7 +216,18 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
|
|||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
template<> struct unpacket_traits<Packet2cd> { typedef std::complex<double> type; enum {size=2, alignment=Aligned32, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet1cd half; };
|
template<> struct unpacket_traits<Packet2cd> {
|
||||||
|
typedef std::complex<double> type;
|
||||||
|
typedef Packet1cd half;
|
||||||
|
typedef Packet4d as_real;
|
||||||
|
enum {
|
||||||
|
size=2,
|
||||||
|
alignment=Aligned32,
|
||||||
|
vectorizable=true,
|
||||||
|
masked_load_available=false,
|
||||||
|
masked_store_available=false
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet2cd padd<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_add_pd(a.v,b.v)); }
|
template<> EIGEN_STRONG_INLINE Packet2cd padd<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_add_pd(a.v,b.v)); }
|
||||||
template<> EIGEN_STRONG_INLINE Packet2cd psub<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_sub_pd(a.v,b.v)); }
|
template<> EIGEN_STRONG_INLINE Packet2cd psub<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_sub_pd(a.v,b.v)); }
|
||||||
@@ -291,7 +255,6 @@ EIGEN_STRONG_INLINE Packet2cd pcmp_eq(const Packet2cd& a, const Packet2cd& b) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet2cd ptrue<Packet2cd>(const Packet2cd& a) { return Packet2cd(ptrue(Packet4d(a.v))); }
|
template<> EIGEN_STRONG_INLINE Packet2cd ptrue<Packet2cd>(const Packet2cd& a) { return Packet2cd(ptrue(Packet4d(a.v))); }
|
||||||
template<> EIGEN_STRONG_INLINE Packet2cd pnot<Packet2cd>(const Packet2cd& a) { return Packet2cd(pnot(Packet4d(a.v))); }
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet2cd pand <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_and_pd(a.v,b.v)); }
|
template<> EIGEN_STRONG_INLINE Packet2cd pand <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_and_pd(a.v,b.v)); }
|
||||||
template<> EIGEN_STRONG_INLINE Packet2cd por <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_or_pd(a.v,b.v)); }
|
template<> EIGEN_STRONG_INLINE Packet2cd por <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_or_pd(a.v,b.v)); }
|
||||||
template<> EIGEN_STRONG_INLINE Packet2cd pxor <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_xor_pd(a.v,b.v)); }
|
template<> EIGEN_STRONG_INLINE Packet2cd pxor <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_xor_pd(a.v,b.v)); }
|
||||||
@@ -347,71 +310,17 @@ template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet2cd>(const Pack
|
|||||||
Packet1cd(_mm256_extractf128_pd(a.v,1))));
|
Packet1cd(_mm256_extractf128_pd(a.v,1))));
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet2cd preduxp<Packet2cd>(const Packet2cd* vecs)
|
|
||||||
{
|
|
||||||
Packet4d t0 = _mm256_permute2f128_pd(vecs[0].v,vecs[1].v, 0 + (2<<4));
|
|
||||||
Packet4d t1 = _mm256_permute2f128_pd(vecs[0].v,vecs[1].v, 1 + (3<<4));
|
|
||||||
|
|
||||||
return Packet2cd(_mm256_add_pd(t0,t1));
|
|
||||||
}
|
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet2cd>(const Packet2cd& a)
|
template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet2cd>(const Packet2cd& a)
|
||||||
{
|
{
|
||||||
return predux(pmul(Packet1cd(_mm256_extractf128_pd(a.v,0)),
|
return predux(pmul(Packet1cd(_mm256_extractf128_pd(a.v,0)),
|
||||||
Packet1cd(_mm256_extractf128_pd(a.v,1))));
|
Packet1cd(_mm256_extractf128_pd(a.v,1))));
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int Offset>
|
|
||||||
struct palign_impl<Offset,Packet2cd>
|
|
||||||
{
|
|
||||||
static EIGEN_STRONG_INLINE void run(Packet2cd& first, const Packet2cd& second)
|
|
||||||
{
|
|
||||||
if (Offset==0) return;
|
|
||||||
palign_impl<Offset*2,Packet4d>::run(first.v, second.v);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<> struct conj_helper<Packet2cd, Packet2cd, false,true>
|
|
||||||
{
|
|
||||||
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const
|
|
||||||
{ return padd(pmul(x,y),c); }
|
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) const
|
|
||||||
{
|
|
||||||
return internal::pmul(a, pconj(b));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<> struct conj_helper<Packet2cd, Packet2cd, true,false>
|
|
||||||
{
|
|
||||||
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const
|
|
||||||
{ return padd(pmul(x,y),c); }
|
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) const
|
|
||||||
{
|
|
||||||
return internal::pmul(pconj(a), b);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<> struct conj_helper<Packet2cd, Packet2cd, true,true>
|
|
||||||
{
|
|
||||||
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const
|
|
||||||
{ return padd(pmul(x,y),c); }
|
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) const
|
|
||||||
{
|
|
||||||
return pconj(internal::pmul(a, b));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cd,Packet4d)
|
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cd,Packet4d)
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet2cd pdiv<Packet2cd>(const Packet2cd& a, const Packet2cd& b)
|
template<> EIGEN_STRONG_INLINE Packet2cd pdiv<Packet2cd>(const Packet2cd& a, const Packet2cd& b)
|
||||||
{
|
{
|
||||||
Packet2cd num = pmul(a, pconj(b));
|
return pdiv_complex(a, b);
|
||||||
__m256d tmp = _mm256_mul_pd(b.v, b.v);
|
|
||||||
__m256d denom = _mm256_hadd_pd(tmp, tmp);
|
|
||||||
return Packet2cd(_mm256_div_pd(num.v, denom));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet2cd pcplxflip<Packet2cd>(const Packet2cd& x)
|
template<> EIGEN_STRONG_INLINE Packet2cd pcplxflip<Packet2cd>(const Packet2cd& x)
|
||||||
@@ -444,24 +353,12 @@ ptranspose(PacketBlock<Packet2cd,2>& kernel) {
|
|||||||
kernel.packet[0].v = tmp;
|
kernel.packet[0].v = tmp;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4cf pinsertfirst(const Packet4cf& a, std::complex<float> b)
|
template<> EIGEN_STRONG_INLINE Packet2cd psqrt<Packet2cd>(const Packet2cd& a) {
|
||||||
{
|
return psqrt_complex<Packet2cd>(a);
|
||||||
return Packet4cf(_mm256_blend_ps(a.v,pset1<Packet4cf>(b).v,1|2));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet2cd pinsertfirst(const Packet2cd& a, std::complex<double> b)
|
template<> EIGEN_STRONG_INLINE Packet4cf psqrt<Packet4cf>(const Packet4cf& a) {
|
||||||
{
|
return psqrt_complex<Packet4cf>(a);
|
||||||
return Packet2cd(_mm256_blend_pd(a.v,pset1<Packet2cd>(b).v,1|2));
|
|
||||||
}
|
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4cf pinsertlast(const Packet4cf& a, std::complex<float> b)
|
|
||||||
{
|
|
||||||
return Packet4cf(_mm256_blend_ps(a.v,pset1<Packet4cf>(b).v,(1<<7)|(1<<6)));
|
|
||||||
}
|
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet2cd pinsertlast(const Packet2cd& a, std::complex<double> b)
|
|
||||||
{
|
|
||||||
return Packet2cd(_mm256_blend_pd(a.v,pset1<Packet2cd>(b).v,(1<<3)|(1<<2)));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|||||||
@@ -36,6 +36,24 @@ plog<Packet8f>(const Packet8f& _x) {
|
|||||||
return plog_float(_x);
|
return plog_float(_x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d
|
||||||
|
plog<Packet4d>(const Packet4d& _x) {
|
||||||
|
return plog_double(_x);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
|
||||||
|
plog2<Packet8f>(const Packet8f& _x) {
|
||||||
|
return plog2_float(_x);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d
|
||||||
|
plog2<Packet4d>(const Packet4d& _x) {
|
||||||
|
return plog2_double(_x);
|
||||||
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||||
Packet8f plog1p<Packet8f>(const Packet8f& _x) {
|
Packet8f plog1p<Packet8f>(const Packet8f& _x) {
|
||||||
return generic_plog1p(_x);
|
return generic_plog1p(_x);
|
||||||
@@ -58,15 +76,15 @@ pexp<Packet8f>(const Packet8f& _x) {
|
|||||||
// Hyperbolic Tangent function.
|
// Hyperbolic Tangent function.
|
||||||
template <>
|
template <>
|
||||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
|
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
|
||||||
ptanh<Packet8f>(const Packet8f& x) {
|
ptanh<Packet8f>(const Packet8f& _x) {
|
||||||
return internal::generic_fast_tanh_float(x);
|
return internal::generic_fast_tanh_float(_x);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Exponential function for doubles.
|
// Exponential function for doubles.
|
||||||
template <>
|
template <>
|
||||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d
|
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d
|
||||||
pexp<Packet4d>(const Packet4d& x) {
|
pexp<Packet4d>(const Packet4d& _x) {
|
||||||
return pexp_double(x);
|
return pexp_double(_x);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Functions for sqrt.
|
// Functions for sqrt.
|
||||||
@@ -79,33 +97,36 @@ pexp<Packet4d>(const Packet4d& x) {
|
|||||||
// For detail see here: http://www.beyond3d.com/content/articles/8/
|
// For detail see here: http://www.beyond3d.com/content/articles/8/
|
||||||
#if EIGEN_FAST_MATH
|
#if EIGEN_FAST_MATH
|
||||||
template <>
|
template <>
|
||||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
|
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||||
psqrt<Packet8f>(const Packet8f& _x) {
|
Packet8f psqrt<Packet8f>(const Packet8f& _x) {
|
||||||
Packet8f half = pmul(_x, pset1<Packet8f>(.5f));
|
Packet8f minus_half_x = pmul(_x, pset1<Packet8f>(-0.5f));
|
||||||
Packet8f denormal_mask = _mm256_and_ps(
|
Packet8f denormal_mask = pandnot(
|
||||||
_mm256_cmp_ps(_x, pset1<Packet8f>((std::numeric_limits<float>::min)()),
|
pcmp_lt(_x, pset1<Packet8f>((std::numeric_limits<float>::min)())),
|
||||||
_CMP_LT_OQ),
|
pcmp_lt(_x, pzero(_x)));
|
||||||
_mm256_cmp_ps(_x, _mm256_setzero_ps(), _CMP_GE_OQ));
|
|
||||||
|
|
||||||
// Compute approximate reciprocal sqrt.
|
// Compute approximate reciprocal sqrt.
|
||||||
Packet8f x = _mm256_rsqrt_ps(_x);
|
Packet8f x = _mm256_rsqrt_ps(_x);
|
||||||
// Do a single step of Newton's iteration.
|
// Do a single step of Newton's iteration.
|
||||||
x = pmul(x, psub(pset1<Packet8f>(1.5f), pmul(half, pmul(x,x))));
|
x = pmul(x, pmadd(minus_half_x, pmul(x,x), pset1<Packet8f>(1.5f)));
|
||||||
// Flush results for denormals to zero.
|
// Flush results for denormals to zero.
|
||||||
return _mm256_andnot_ps(denormal_mask, pmul(_x,x));
|
return pandnot(pmul(_x,x), denormal_mask);
|
||||||
}
|
}
|
||||||
#else
|
|
||||||
template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
|
||||||
Packet8f psqrt<Packet8f>(const Packet8f& x) {
|
|
||||||
return _mm256_sqrt_ps(x);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
|
||||||
Packet4d psqrt<Packet4d>(const Packet4d& x) {
|
|
||||||
return _mm256_sqrt_pd(x);
|
|
||||||
}
|
|
||||||
#if EIGEN_FAST_MATH
|
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||||
|
Packet8f psqrt<Packet8f>(const Packet8f& _x) {
|
||||||
|
return _mm256_sqrt_ps(_x);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||||
|
Packet4d psqrt<Packet4d>(const Packet4d& _x) {
|
||||||
|
return _mm256_sqrt_pd(_x);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if EIGEN_FAST_MATH
|
||||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||||
Packet8f prsqrt<Packet8f>(const Packet8f& _x) {
|
Packet8f prsqrt<Packet8f>(const Packet8f& _x) {
|
||||||
_EIGEN_DECLARE_CONST_Packet8f_FROM_INT(inf, 0x7f800000);
|
_EIGEN_DECLARE_CONST_Packet8f_FROM_INT(inf, 0x7f800000);
|
||||||
@@ -140,18 +161,65 @@ Packet8f prsqrt<Packet8f>(const Packet8f& _x) {
|
|||||||
|
|
||||||
#else
|
#else
|
||||||
template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||||
Packet8f prsqrt<Packet8f>(const Packet8f& x) {
|
Packet8f prsqrt<Packet8f>(const Packet8f& _x) {
|
||||||
_EIGEN_DECLARE_CONST_Packet8f(one, 1.0f);
|
_EIGEN_DECLARE_CONST_Packet8f(one, 1.0f);
|
||||||
return _mm256_div_ps(p8f_one, _mm256_sqrt_ps(x));
|
return _mm256_div_ps(p8f_one, _mm256_sqrt_ps(_x));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||||
Packet4d prsqrt<Packet4d>(const Packet4d& x) {
|
Packet4d prsqrt<Packet4d>(const Packet4d& _x) {
|
||||||
_EIGEN_DECLARE_CONST_Packet4d(one, 1.0);
|
_EIGEN_DECLARE_CONST_Packet4d(one, 1.0);
|
||||||
return _mm256_div_pd(p4d_one, _mm256_sqrt_pd(x));
|
return _mm256_div_pd(p4d_one, _mm256_sqrt_pd(_x));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
F16_PACKET_FUNCTION(Packet8f, Packet8h, psin)
|
||||||
|
F16_PACKET_FUNCTION(Packet8f, Packet8h, pcos)
|
||||||
|
F16_PACKET_FUNCTION(Packet8f, Packet8h, plog)
|
||||||
|
F16_PACKET_FUNCTION(Packet8f, Packet8h, plog2)
|
||||||
|
F16_PACKET_FUNCTION(Packet8f, Packet8h, plog1p)
|
||||||
|
F16_PACKET_FUNCTION(Packet8f, Packet8h, pexpm1)
|
||||||
|
F16_PACKET_FUNCTION(Packet8f, Packet8h, pexp)
|
||||||
|
F16_PACKET_FUNCTION(Packet8f, Packet8h, ptanh)
|
||||||
|
F16_PACKET_FUNCTION(Packet8f, Packet8h, psqrt)
|
||||||
|
F16_PACKET_FUNCTION(Packet8f, Packet8h, prsqrt)
|
||||||
|
|
||||||
|
template <>
|
||||||
|
EIGEN_STRONG_INLINE Packet8h pfrexp(const Packet8h& a, Packet8h& exponent) {
|
||||||
|
Packet8f fexponent;
|
||||||
|
const Packet8h out = float2half(pfrexp<Packet8f>(half2float(a), fexponent));
|
||||||
|
exponent = float2half(fexponent);
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
EIGEN_STRONG_INLINE Packet8h pldexp(const Packet8h& a, const Packet8h& exponent) {
|
||||||
|
return float2half(pldexp<Packet8f>(half2float(a), half2float(exponent)));
|
||||||
|
}
|
||||||
|
|
||||||
|
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, psin)
|
||||||
|
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pcos)
|
||||||
|
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, plog)
|
||||||
|
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, plog2)
|
||||||
|
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, plog1p)
|
||||||
|
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pexpm1)
|
||||||
|
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pexp)
|
||||||
|
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, ptanh)
|
||||||
|
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, psqrt)
|
||||||
|
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, prsqrt)
|
||||||
|
|
||||||
|
template <>
|
||||||
|
EIGEN_STRONG_INLINE Packet8bf pfrexp(const Packet8bf& a, Packet8bf& exponent) {
|
||||||
|
Packet8f fexponent;
|
||||||
|
const Packet8bf out = F32ToBf16(pfrexp<Packet8f>(Bf16ToF32(a), fexponent));
|
||||||
|
exponent = F32ToBf16(fexponent);
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
EIGEN_STRONG_INLINE Packet8bf pldexp(const Packet8bf& a, const Packet8bf& exponent) {
|
||||||
|
return F32ToBf16(pldexp<Packet8f>(Bf16ToF32(a), Bf16ToF32(exponent)));
|
||||||
|
}
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -35,6 +35,46 @@ struct type_casting_traits<int, float> {
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef EIGEN_VECTORIZE_AVX512
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct type_casting_traits<Eigen::half, float> {
|
||||||
|
enum {
|
||||||
|
VectorizedCast = 1,
|
||||||
|
SrcCoeffRatio = 1,
|
||||||
|
TgtCoeffRatio = 1
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct type_casting_traits<float, Eigen::half> {
|
||||||
|
enum {
|
||||||
|
VectorizedCast = 1,
|
||||||
|
SrcCoeffRatio = 1,
|
||||||
|
TgtCoeffRatio = 1
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct type_casting_traits<bfloat16, float> {
|
||||||
|
enum {
|
||||||
|
VectorizedCast = 1,
|
||||||
|
SrcCoeffRatio = 1,
|
||||||
|
TgtCoeffRatio = 1
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct type_casting_traits<float, bfloat16> {
|
||||||
|
enum {
|
||||||
|
VectorizedCast = 1,
|
||||||
|
SrcCoeffRatio = 1,
|
||||||
|
TgtCoeffRatio = 1
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // EIGEN_VECTORIZE_AVX512
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet8i pcast<Packet8f, Packet8i>(const Packet8f& a) {
|
template<> EIGEN_STRONG_INLINE Packet8i pcast<Packet8f, Packet8i>(const Packet8f& a) {
|
||||||
return _mm256_cvttps_epi32(a);
|
return _mm256_cvttps_epi32(a);
|
||||||
@@ -52,36 +92,22 @@ template<> EIGEN_STRONG_INLINE Packet8f preinterpret<Packet8f,Packet8i>(const Pa
|
|||||||
return _mm256_castsi256_ps(a);
|
return _mm256_castsi256_ps(a);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef EIGEN_VECTORIZE_AVX512
|
|
||||||
|
|
||||||
template <>
|
|
||||||
struct type_casting_traits<Eigen::half, float> {
|
|
||||||
enum {
|
|
||||||
VectorizedCast = 1,
|
|
||||||
SrcCoeffRatio = 1,
|
|
||||||
TgtCoeffRatio = 1
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8h, Packet8f>(const Packet8h& a) {
|
template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8h, Packet8f>(const Packet8h& a) {
|
||||||
return half2float(a);
|
return half2float(a);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8bf, Packet8f>(const Packet8bf& a) {
|
||||||
struct type_casting_traits<float, Eigen::half> {
|
return Bf16ToF32(a);
|
||||||
enum {
|
}
|
||||||
VectorizedCast = 1,
|
|
||||||
SrcCoeffRatio = 1,
|
|
||||||
TgtCoeffRatio = 1
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif // EIGEN_VECTORIZE_AVX512
|
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet8h pcast<Packet8f, Packet8h>(const Packet8f& a) {
|
template<> EIGEN_STRONG_INLINE Packet8h pcast<Packet8f, Packet8h>(const Packet8f& a) {
|
||||||
return float2half(a);
|
return float2half(a);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet8bf pcast<Packet8f, Packet8bf>(const Packet8f& a) {
|
||||||
|
return F32ToBf16(a);
|
||||||
|
}
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
|||||||
@@ -37,17 +37,19 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
|||||||
HasMul = 1,
|
HasMul = 1,
|
||||||
HasDiv = 1,
|
HasDiv = 1,
|
||||||
HasNegate = 1,
|
HasNegate = 1,
|
||||||
|
HasSqrt = EIGEN_HAS_AVX512_MATH,
|
||||||
HasAbs = 0,
|
HasAbs = 0,
|
||||||
HasAbs2 = 0,
|
HasAbs2 = 0,
|
||||||
HasMin = 0,
|
HasMin = 0,
|
||||||
HasMax = 0,
|
HasMax = 0,
|
||||||
HasSetLinear = 0,
|
HasSetLinear = 0
|
||||||
HasReduxp = 0
|
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> struct unpacket_traits<Packet8cf> {
|
template<> struct unpacket_traits<Packet8cf> {
|
||||||
typedef std::complex<float> type;
|
typedef std::complex<float> type;
|
||||||
|
typedef Packet4cf half;
|
||||||
|
typedef Packet16f as_real;
|
||||||
enum {
|
enum {
|
||||||
size = 8,
|
size = 8,
|
||||||
alignment=unpacket_traits<Packet16f>::alignment,
|
alignment=unpacket_traits<Packet16f>::alignment,
|
||||||
@@ -55,11 +57,9 @@ template<> struct unpacket_traits<Packet8cf> {
|
|||||||
masked_load_available=false,
|
masked_load_available=false,
|
||||||
masked_store_available=false
|
masked_store_available=false
|
||||||
};
|
};
|
||||||
typedef Packet4cf half;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet8cf ptrue<Packet8cf>(const Packet8cf& a) { return Packet8cf(ptrue(Packet16f(a.v))); }
|
template<> EIGEN_STRONG_INLINE Packet8cf ptrue<Packet8cf>(const Packet8cf& a) { return Packet8cf(ptrue(Packet16f(a.v))); }
|
||||||
template<> EIGEN_STRONG_INLINE Packet8cf pnot<Packet8cf>(const Packet8cf& a) { return Packet8cf(pnot(Packet16f(a.v))); }
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet8cf padd<Packet8cf>(const Packet8cf& a, const Packet8cf& b) { return Packet8cf(_mm512_add_ps(a.v,b.v)); }
|
template<> EIGEN_STRONG_INLINE Packet8cf padd<Packet8cf>(const Packet8cf& a, const Packet8cf& b) { return Packet8cf(_mm512_add_ps(a.v,b.v)); }
|
||||||
template<> EIGEN_STRONG_INLINE Packet8cf psub<Packet8cf>(const Packet8cf& a, const Packet8cf& b) { return Packet8cf(_mm512_sub_ps(a.v,b.v)); }
|
template<> EIGEN_STRONG_INLINE Packet8cf psub<Packet8cf>(const Packet8cf& a, const Packet8cf& b) { return Packet8cf(_mm512_sub_ps(a.v,b.v)); }
|
||||||
template<> EIGEN_STRONG_INLINE Packet8cf pnegate(const Packet8cf& a)
|
template<> EIGEN_STRONG_INLINE Packet8cf pnegate(const Packet8cf& a)
|
||||||
@@ -97,7 +97,9 @@ template<> EIGEN_STRONG_INLINE Packet8cf ploadu<Packet8cf>(const std::complex<fl
|
|||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet8cf pset1<Packet8cf>(const std::complex<float>& from)
|
template<> EIGEN_STRONG_INLINE Packet8cf pset1<Packet8cf>(const std::complex<float>& from)
|
||||||
{
|
{
|
||||||
return Packet8cf(_mm512_castpd_ps(pload1<Packet8d>((const double*)(const void*)&from)));
|
const float re = std::real(from);
|
||||||
|
const float im = std::imag(from);
|
||||||
|
return Packet8cf(_mm512_set_ps(im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re));
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet8cf ploaddup<Packet8cf>(const std::complex<float>* from)
|
template<> EIGEN_STRONG_INLINE Packet8cf ploaddup<Packet8cf>(const std::complex<float>* from)
|
||||||
@@ -153,58 +155,11 @@ EIGEN_STRONG_INLINE Packet4cf predux_half_dowto4<Packet8cf>(const Packet8cf& a)
|
|||||||
return Packet4cf(res);
|
return Packet4cf(res);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int Offset>
|
|
||||||
struct palign_impl<Offset,Packet8cf>
|
|
||||||
{
|
|
||||||
static EIGEN_STRONG_INLINE void run(Packet8cf& first, const Packet8cf& second)
|
|
||||||
{
|
|
||||||
if (Offset==0) return;
|
|
||||||
palign_impl<Offset*2,Packet16f>::run(first.v, second.v);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<> struct conj_helper<Packet8cf, Packet8cf, false,true>
|
|
||||||
{
|
|
||||||
EIGEN_STRONG_INLINE Packet8cf pmadd(const Packet8cf& x, const Packet8cf& y, const Packet8cf& c) const
|
|
||||||
{ return padd(pmul(x,y),c); }
|
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE Packet8cf pmul(const Packet8cf& a, const Packet8cf& b) const
|
|
||||||
{
|
|
||||||
return internal::pmul(a, pconj(b));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<> struct conj_helper<Packet8cf, Packet8cf, true,false>
|
|
||||||
{
|
|
||||||
EIGEN_STRONG_INLINE Packet8cf pmadd(const Packet8cf& x, const Packet8cf& y, const Packet8cf& c) const
|
|
||||||
{ return padd(pmul(x,y),c); }
|
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE Packet8cf pmul(const Packet8cf& a, const Packet8cf& b) const
|
|
||||||
{
|
|
||||||
return internal::pmul(pconj(a), b);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<> struct conj_helper<Packet8cf, Packet8cf, true,true>
|
|
||||||
{
|
|
||||||
EIGEN_STRONG_INLINE Packet8cf pmadd(const Packet8cf& x, const Packet8cf& y, const Packet8cf& c) const
|
|
||||||
{ return padd(pmul(x,y),c); }
|
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE Packet8cf pmul(const Packet8cf& a, const Packet8cf& b) const
|
|
||||||
{
|
|
||||||
return pconj(internal::pmul(a, b));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet8cf,Packet16f)
|
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet8cf,Packet16f)
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet8cf pdiv<Packet8cf>(const Packet8cf& a, const Packet8cf& b)
|
template<> EIGEN_STRONG_INLINE Packet8cf pdiv<Packet8cf>(const Packet8cf& a, const Packet8cf& b)
|
||||||
{
|
{
|
||||||
Packet8cf num = pmul(a, pconj(b));
|
return pdiv_complex(a, b);
|
||||||
__m512 tmp = _mm512_mul_ps(b.v, b.v);
|
|
||||||
__m512 tmp2 = _mm512_shuffle_ps(tmp,tmp,0xB1);
|
|
||||||
__m512 denom = _mm512_add_ps(tmp, tmp2);
|
|
||||||
return Packet8cf(_mm512_div_ps(num.v, denom));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet8cf pcplxflip<Packet8cf>(const Packet8cf& x)
|
template<> EIGEN_STRONG_INLINE Packet8cf pcplxflip<Packet8cf>(const Packet8cf& x)
|
||||||
@@ -235,17 +190,19 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
|
|||||||
HasMul = 1,
|
HasMul = 1,
|
||||||
HasDiv = 1,
|
HasDiv = 1,
|
||||||
HasNegate = 1,
|
HasNegate = 1,
|
||||||
|
HasSqrt = EIGEN_HAS_AVX512_MATH,
|
||||||
HasAbs = 0,
|
HasAbs = 0,
|
||||||
HasAbs2 = 0,
|
HasAbs2 = 0,
|
||||||
HasMin = 0,
|
HasMin = 0,
|
||||||
HasMax = 0,
|
HasMax = 0,
|
||||||
HasSetLinear = 0,
|
HasSetLinear = 0
|
||||||
HasReduxp = 0
|
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> struct unpacket_traits<Packet4cd> {
|
template<> struct unpacket_traits<Packet4cd> {
|
||||||
typedef std::complex<double> type;
|
typedef std::complex<double> type;
|
||||||
|
typedef Packet2cd half;
|
||||||
|
typedef Packet8d as_real;
|
||||||
enum {
|
enum {
|
||||||
size = 4,
|
size = 4,
|
||||||
alignment = unpacket_traits<Packet8d>::alignment,
|
alignment = unpacket_traits<Packet8d>::alignment,
|
||||||
@@ -253,7 +210,6 @@ template<> struct unpacket_traits<Packet4cd> {
|
|||||||
masked_load_available=false,
|
masked_load_available=false,
|
||||||
masked_store_available=false
|
masked_store_available=false
|
||||||
};
|
};
|
||||||
typedef Packet2cd half;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4cd padd<Packet4cd>(const Packet4cd& a, const Packet4cd& b) { return Packet4cd(_mm512_add_pd(a.v,b.v)); }
|
template<> EIGEN_STRONG_INLINE Packet4cd padd<Packet4cd>(const Packet4cd& a, const Packet4cd& b) { return Packet4cd(_mm512_add_pd(a.v,b.v)); }
|
||||||
@@ -277,7 +233,6 @@ template<> EIGEN_STRONG_INLINE Packet4cd pmul<Packet4cd>(const Packet4cd& a, con
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4cd ptrue<Packet4cd>(const Packet4cd& a) { return Packet4cd(ptrue(Packet8d(a.v))); }
|
template<> EIGEN_STRONG_INLINE Packet4cd ptrue<Packet4cd>(const Packet4cd& a) { return Packet4cd(ptrue(Packet8d(a.v))); }
|
||||||
template<> EIGEN_STRONG_INLINE Packet4cd pnot<Packet4cd>(const Packet4cd& a) { return Packet4cd(pnot(Packet8d(a.v))); }
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4cd pand <Packet4cd>(const Packet4cd& a, const Packet4cd& b) { return Packet4cd(pand(a.v,b.v)); }
|
template<> EIGEN_STRONG_INLINE Packet4cd pand <Packet4cd>(const Packet4cd& a, const Packet4cd& b) { return Packet4cd(pand(a.v,b.v)); }
|
||||||
template<> EIGEN_STRONG_INLINE Packet4cd por <Packet4cd>(const Packet4cd& a, const Packet4cd& b) { return Packet4cd(por(a.v,b.v)); }
|
template<> EIGEN_STRONG_INLINE Packet4cd por <Packet4cd>(const Packet4cd& a, const Packet4cd& b) { return Packet4cd(por(a.v,b.v)); }
|
||||||
template<> EIGEN_STRONG_INLINE Packet4cd pxor <Packet4cd>(const Packet4cd& a, const Packet4cd& b) { return Packet4cd(pxor(a.v,b.v)); }
|
template<> EIGEN_STRONG_INLINE Packet4cd pxor <Packet4cd>(const Packet4cd& a, const Packet4cd& b) { return Packet4cd(pxor(a.v,b.v)); }
|
||||||
@@ -296,11 +251,7 @@ template<> EIGEN_STRONG_INLINE Packet4cd ploadu<Packet4cd>(const std::complex<do
|
|||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4cd pset1<Packet4cd>(const std::complex<double>& from)
|
template<> EIGEN_STRONG_INLINE Packet4cd pset1<Packet4cd>(const std::complex<double>& from)
|
||||||
{
|
{
|
||||||
#ifdef EIGEN_VECTORIZE_AVX512DQ
|
|
||||||
return Packet4cd(_mm512_broadcast_f64x2(pset1<Packet1cd>(from).v));
|
|
||||||
#else
|
|
||||||
return Packet4cd(_mm512_castps_pd(_mm512_broadcast_f32x4( _mm_castpd_ps(pset1<Packet1cd>(from).v))));
|
return Packet4cd(_mm512_castps_pd(_mm512_broadcast_f32x4( _mm_castpd_ps(pset1<Packet1cd>(from).v))));
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4cd ploaddup<Packet4cd>(const std::complex<double>* from) {
|
template<> EIGEN_STRONG_INLINE Packet4cd ploaddup<Packet4cd>(const std::complex<double>* from) {
|
||||||
@@ -337,7 +288,7 @@ template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet4cd>(const Pack
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4cd preverse(const Packet4cd& a) {
|
template<> EIGEN_STRONG_INLINE Packet4cd preverse(const Packet4cd& a) {
|
||||||
return Packet4cd(_mm512_shuffle_f64x2(a.v, a.v, EIGEN_SSE_SHUFFLE_MASK(3,2,1,0)));
|
return Packet4cd(_mm512_shuffle_f64x2(a.v, a.v, (shuffle_mask<3,2,1,0>::mask)));
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet4cd>(const Packet4cd& a)
|
template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet4cd>(const Packet4cd& a)
|
||||||
@@ -352,57 +303,11 @@ template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet4cd>(const
|
|||||||
Packet2cd(_mm512_extractf64x4_pd(a.v,1))));
|
Packet2cd(_mm512_extractf64x4_pd(a.v,1))));
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int Offset>
|
|
||||||
struct palign_impl<Offset,Packet4cd>
|
|
||||||
{
|
|
||||||
static EIGEN_STRONG_INLINE void run(Packet4cd& first, const Packet4cd& second)
|
|
||||||
{
|
|
||||||
if (Offset==0) return;
|
|
||||||
palign_impl<Offset*2,Packet8d>::run(first.v, second.v);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<> struct conj_helper<Packet4cd, Packet4cd, false,true>
|
|
||||||
{
|
|
||||||
EIGEN_STRONG_INLINE Packet4cd pmadd(const Packet4cd& x, const Packet4cd& y, const Packet4cd& c) const
|
|
||||||
{ return padd(pmul(x,y),c); }
|
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE Packet4cd pmul(const Packet4cd& a, const Packet4cd& b) const
|
|
||||||
{
|
|
||||||
return internal::pmul(a, pconj(b));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<> struct conj_helper<Packet4cd, Packet4cd, true,false>
|
|
||||||
{
|
|
||||||
EIGEN_STRONG_INLINE Packet4cd pmadd(const Packet4cd& x, const Packet4cd& y, const Packet4cd& c) const
|
|
||||||
{ return padd(pmul(x,y),c); }
|
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE Packet4cd pmul(const Packet4cd& a, const Packet4cd& b) const
|
|
||||||
{
|
|
||||||
return internal::pmul(pconj(a), b);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<> struct conj_helper<Packet4cd, Packet4cd, true,true>
|
|
||||||
{
|
|
||||||
EIGEN_STRONG_INLINE Packet4cd pmadd(const Packet4cd& x, const Packet4cd& y, const Packet4cd& c) const
|
|
||||||
{ return padd(pmul(x,y),c); }
|
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE Packet4cd pmul(const Packet4cd& a, const Packet4cd& b) const
|
|
||||||
{
|
|
||||||
return pconj(internal::pmul(a, b));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet4cd,Packet8d)
|
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet4cd,Packet8d)
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4cd pdiv<Packet4cd>(const Packet4cd& a, const Packet4cd& b)
|
template<> EIGEN_STRONG_INLINE Packet4cd pdiv<Packet4cd>(const Packet4cd& a, const Packet4cd& b)
|
||||||
{
|
{
|
||||||
Packet4cd num = pmul(a, pconj(b));
|
return pdiv_complex(a, b);
|
||||||
__m512d tmp = _mm512_mul_pd(b.v, b.v);
|
|
||||||
__m512d denom = padd(_mm512_permute_pd(tmp,0x55), tmp);
|
|
||||||
return Packet4cd(_mm512_div_pd(num.v, denom));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4cd pcplxflip<Packet4cd>(const Packet4cd& x)
|
template<> EIGEN_STRONG_INLINE Packet4cd pcplxflip<Packet4cd>(const Packet4cd& x)
|
||||||
@@ -450,43 +355,30 @@ ptranspose(PacketBlock<Packet8cf,8>& kernel) {
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC inline void
|
EIGEN_DEVICE_FUNC inline void
|
||||||
ptranspose(PacketBlock<Packet4cd,4>& kernel) {
|
ptranspose(PacketBlock<Packet4cd,4>& kernel) {
|
||||||
__m512d T0 = _mm512_shuffle_f64x2(kernel.packet[0].v, kernel.packet[1].v, EIGEN_SSE_SHUFFLE_MASK(0,1,0,1)); // [a0 a1 b0 b1]
|
__m512d T0 = _mm512_shuffle_f64x2(kernel.packet[0].v, kernel.packet[1].v, (shuffle_mask<0,1,0,1>::mask)); // [a0 a1 b0 b1]
|
||||||
__m512d T1 = _mm512_shuffle_f64x2(kernel.packet[0].v, kernel.packet[1].v, EIGEN_SSE_SHUFFLE_MASK(2,3,2,3)); // [a2 a3 b2 b3]
|
__m512d T1 = _mm512_shuffle_f64x2(kernel.packet[0].v, kernel.packet[1].v, (shuffle_mask<2,3,2,3>::mask)); // [a2 a3 b2 b3]
|
||||||
__m512d T2 = _mm512_shuffle_f64x2(kernel.packet[2].v, kernel.packet[3].v, EIGEN_SSE_SHUFFLE_MASK(0,1,0,1)); // [c0 c1 d0 d1]
|
__m512d T2 = _mm512_shuffle_f64x2(kernel.packet[2].v, kernel.packet[3].v, (shuffle_mask<0,1,0,1>::mask)); // [c0 c1 d0 d1]
|
||||||
__m512d T3 = _mm512_shuffle_f64x2(kernel.packet[2].v, kernel.packet[3].v, EIGEN_SSE_SHUFFLE_MASK(2,3,2,3)); // [c2 c3 d2 d3]
|
__m512d T3 = _mm512_shuffle_f64x2(kernel.packet[2].v, kernel.packet[3].v, (shuffle_mask<2,3,2,3>::mask)); // [c2 c3 d2 d3]
|
||||||
|
|
||||||
kernel.packet[3] = Packet4cd(_mm512_shuffle_f64x2(T1, T3, EIGEN_SSE_SHUFFLE_MASK(1,3,1,3))); // [a3 b3 c3 d3]
|
kernel.packet[3] = Packet4cd(_mm512_shuffle_f64x2(T1, T3, (shuffle_mask<1,3,1,3>::mask))); // [a3 b3 c3 d3]
|
||||||
kernel.packet[2] = Packet4cd(_mm512_shuffle_f64x2(T1, T3, EIGEN_SSE_SHUFFLE_MASK(0,2,0,2))); // [a2 b2 c2 d2]
|
kernel.packet[2] = Packet4cd(_mm512_shuffle_f64x2(T1, T3, (shuffle_mask<0,2,0,2>::mask))); // [a2 b2 c2 d2]
|
||||||
kernel.packet[1] = Packet4cd(_mm512_shuffle_f64x2(T0, T2, EIGEN_SSE_SHUFFLE_MASK(1,3,1,3))); // [a1 b1 c1 d1]
|
kernel.packet[1] = Packet4cd(_mm512_shuffle_f64x2(T0, T2, (shuffle_mask<1,3,1,3>::mask))); // [a1 b1 c1 d1]
|
||||||
kernel.packet[0] = Packet4cd(_mm512_shuffle_f64x2(T0, T2, EIGEN_SSE_SHUFFLE_MASK(0,2,0,2))); // [a0 b0 c0 d0]
|
kernel.packet[0] = Packet4cd(_mm512_shuffle_f64x2(T0, T2, (shuffle_mask<0,2,0,2>::mask))); // [a0 b0 c0 d0]
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet8cf pinsertfirst(const Packet8cf& a, std::complex<float> b)
|
#if EIGEN_HAS_AVX512_MATH
|
||||||
{
|
|
||||||
Packet2cf tmp = Packet2cf(_mm512_extractf32x4_ps(a.v,0));
|
template<> EIGEN_STRONG_INLINE Packet4cd psqrt<Packet4cd>(const Packet4cd& a) {
|
||||||
tmp = pinsertfirst(tmp, b);
|
return psqrt_complex<Packet4cd>(a);
|
||||||
return Packet8cf( _mm512_insertf32x4(a.v, tmp.v, 0) );
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4cd pinsertfirst(const Packet4cd& a, std::complex<double> b)
|
template<> EIGEN_STRONG_INLINE Packet8cf psqrt<Packet8cf>(const Packet8cf& a) {
|
||||||
{
|
return psqrt_complex<Packet8cf>(a);
|
||||||
return Packet4cd(_mm512_castsi512_pd( _mm512_inserti32x4(_mm512_castpd_si512(a.v), _mm_castpd_si128(pset1<Packet1cd>(b).v), 0) ));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet8cf pinsertlast(const Packet8cf& a, std::complex<float> b)
|
#endif
|
||||||
{
|
|
||||||
Packet2cf tmp = Packet2cf(_mm512_extractf32x4_ps(a.v,3) );
|
|
||||||
tmp = pinsertlast(tmp, b);
|
|
||||||
return Packet8cf( _mm512_insertf32x4(a.v, tmp.v, 3) );
|
|
||||||
}
|
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4cd pinsertlast(const Packet4cd& a, std::complex<double> b)
|
|
||||||
{
|
|
||||||
return Packet4cd(_mm512_castsi512_pd( _mm512_inserti32x4(_mm512_castpd_si512(a.v), _mm_castpd_si128(pset1<Packet1cd>(b).v), 3) ));
|
|
||||||
}
|
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
|
||||||
#endif // EIGEN_COMPLEX_AVX512_H
|
#endif // EIGEN_COMPLEX_AVX512_H
|
||||||
|
|||||||
@@ -14,8 +14,7 @@ namespace Eigen {
|
|||||||
|
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
// Disable the code for older versions of gcc that don't support many of the required avx512 instrinsics.
|
#if EIGEN_HAS_AVX512_MATH
|
||||||
#if EIGEN_GNUC_AT_LEAST(5, 3) || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC >= 1923
|
|
||||||
|
|
||||||
#define _EIGEN_DECLARE_CONST_Packet16f(NAME, X) \
|
#define _EIGEN_DECLARE_CONST_Packet16f(NAME, X) \
|
||||||
const Packet16f p16f_##NAME = pset1<Packet16f>(X)
|
const Packet16f p16f_##NAME = pset1<Packet16f>(X)
|
||||||
@@ -29,106 +28,41 @@ namespace internal {
|
|||||||
#define _EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(NAME, X) \
|
#define _EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(NAME, X) \
|
||||||
const Packet8d p8d_##NAME = _mm512_castsi512_pd(_mm512_set1_epi64(X))
|
const Packet8d p8d_##NAME = _mm512_castsi512_pd(_mm512_set1_epi64(X))
|
||||||
|
|
||||||
// Natural logarithm
|
#define _EIGEN_DECLARE_CONST_Packet16bf(NAME, X) \
|
||||||
// Computes log(x) as log(2^e * m) = C*e + log(m), where the constant C =log(2)
|
const Packet16bf p16bf_##NAME = pset1<Packet16bf>(X)
|
||||||
// and m is in the range [sqrt(1/2),sqrt(2)). In this range, the logarithm can
|
|
||||||
// be easily approximated by a polynomial centered on m=1 for stability.
|
#define _EIGEN_DECLARE_CONST_Packet16bf_FROM_INT(NAME, X) \
|
||||||
#if defined(EIGEN_VECTORIZE_AVX512DQ)
|
const Packet16bf p16bf_##NAME = preinterpret<Packet16bf,Packet16i>(pset1<Packet16i>(X))
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
|
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
|
||||||
plog<Packet16f>(const Packet16f& _x) {
|
plog<Packet16f>(const Packet16f& _x) {
|
||||||
Packet16f x = _x;
|
return plog_float(_x);
|
||||||
_EIGEN_DECLARE_CONST_Packet16f(1, 1.0f);
|
|
||||||
_EIGEN_DECLARE_CONST_Packet16f(half, 0.5f);
|
|
||||||
_EIGEN_DECLARE_CONST_Packet16f(126f, 126.0f);
|
|
||||||
|
|
||||||
_EIGEN_DECLARE_CONST_Packet16f_FROM_INT(inv_mant_mask, ~0x7f800000);
|
|
||||||
|
|
||||||
// The smallest non denormalized float number.
|
|
||||||
_EIGEN_DECLARE_CONST_Packet16f_FROM_INT(min_norm_pos, 0x00800000);
|
|
||||||
_EIGEN_DECLARE_CONST_Packet16f_FROM_INT(minus_inf, 0xff800000);
|
|
||||||
_EIGEN_DECLARE_CONST_Packet16f_FROM_INT(pos_inf, 0x7f800000);
|
|
||||||
_EIGEN_DECLARE_CONST_Packet16f_FROM_INT(nan, 0x7fc00000);
|
|
||||||
|
|
||||||
// Polynomial coefficients.
|
|
||||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_SQRTHF, 0.707106781186547524f);
|
|
||||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_p0, 7.0376836292E-2f);
|
|
||||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_p1, -1.1514610310E-1f);
|
|
||||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_p2, 1.1676998740E-1f);
|
|
||||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_p3, -1.2420140846E-1f);
|
|
||||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_p4, +1.4249322787E-1f);
|
|
||||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_p5, -1.6668057665E-1f);
|
|
||||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_p6, +2.0000714765E-1f);
|
|
||||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_p7, -2.4999993993E-1f);
|
|
||||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_p8, +3.3333331174E-1f);
|
|
||||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_q1, -2.12194440e-4f);
|
|
||||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_q2, 0.693359375f);
|
|
||||||
|
|
||||||
// invalid_mask is set to true when x is NaN
|
|
||||||
__mmask16 invalid_mask = _mm512_cmp_ps_mask(x, _mm512_setzero_ps(), _CMP_NGE_UQ);
|
|
||||||
__mmask16 iszero_mask = _mm512_cmp_ps_mask(x, _mm512_setzero_ps(), _CMP_EQ_OQ);
|
|
||||||
|
|
||||||
// Truncate input values to the minimum positive normal.
|
|
||||||
x = pmax(x, p16f_min_norm_pos);
|
|
||||||
|
|
||||||
// Extract the shifted exponents.
|
|
||||||
Packet16f emm0 = _mm512_cvtepi32_ps(_mm512_srli_epi32(preinterpret<Packet16i,Packet16f>(x), 23));
|
|
||||||
Packet16f e = _mm512_sub_ps(emm0, p16f_126f);
|
|
||||||
|
|
||||||
// Set the exponents to -1, i.e. x are in the range [0.5,1).
|
|
||||||
x = _mm512_and_ps(x, p16f_inv_mant_mask);
|
|
||||||
x = _mm512_or_ps(x, p16f_half);
|
|
||||||
|
|
||||||
// part2: Shift the inputs from the range [0.5,1) to [sqrt(1/2),sqrt(2))
|
|
||||||
// and shift by -1. The values are then centered around 0, which improves
|
|
||||||
// the stability of the polynomial evaluation.
|
|
||||||
// if( x < SQRTHF ) {
|
|
||||||
// e -= 1;
|
|
||||||
// x = x + x - 1.0;
|
|
||||||
// } else { x = x - 1.0; }
|
|
||||||
__mmask16 mask = _mm512_cmp_ps_mask(x, p16f_cephes_SQRTHF, _CMP_LT_OQ);
|
|
||||||
Packet16f tmp = _mm512_mask_blend_ps(mask, _mm512_setzero_ps(), x);
|
|
||||||
x = psub(x, p16f_1);
|
|
||||||
e = psub(e, _mm512_mask_blend_ps(mask, _mm512_setzero_ps(), p16f_1));
|
|
||||||
x = padd(x, tmp);
|
|
||||||
|
|
||||||
Packet16f x2 = pmul(x, x);
|
|
||||||
Packet16f x3 = pmul(x2, x);
|
|
||||||
|
|
||||||
// Evaluate the polynomial approximant of degree 8 in three parts, probably
|
|
||||||
// to improve instruction-level parallelism.
|
|
||||||
Packet16f y, y1, y2;
|
|
||||||
y = pmadd(p16f_cephes_log_p0, x, p16f_cephes_log_p1);
|
|
||||||
y1 = pmadd(p16f_cephes_log_p3, x, p16f_cephes_log_p4);
|
|
||||||
y2 = pmadd(p16f_cephes_log_p6, x, p16f_cephes_log_p7);
|
|
||||||
y = pmadd(y, x, p16f_cephes_log_p2);
|
|
||||||
y1 = pmadd(y1, x, p16f_cephes_log_p5);
|
|
||||||
y2 = pmadd(y2, x, p16f_cephes_log_p8);
|
|
||||||
y = pmadd(y, x3, y1);
|
|
||||||
y = pmadd(y, x3, y2);
|
|
||||||
y = pmul(y, x3);
|
|
||||||
|
|
||||||
// Add the logarithm of the exponent back to the result of the interpolation.
|
|
||||||
y1 = pmul(e, p16f_cephes_log_q1);
|
|
||||||
tmp = pmul(x2, p16f_half);
|
|
||||||
y = padd(y, y1);
|
|
||||||
x = psub(x, tmp);
|
|
||||||
y2 = pmul(e, p16f_cephes_log_q2);
|
|
||||||
x = padd(x, y);
|
|
||||||
x = padd(x, y2);
|
|
||||||
|
|
||||||
__mmask16 pos_inf_mask = _mm512_cmp_ps_mask(_x,p16f_pos_inf,_CMP_EQ_OQ);
|
|
||||||
// Filter out invalid inputs, i.e.:
|
|
||||||
// - negative arg will be NAN,
|
|
||||||
// - 0 will be -INF.
|
|
||||||
// - +INF will be +INF
|
|
||||||
return _mm512_mask_blend_ps(iszero_mask,
|
|
||||||
_mm512_mask_blend_ps(invalid_mask,
|
|
||||||
_mm512_mask_blend_ps(pos_inf_mask,x,p16f_pos_inf),
|
|
||||||
p16f_nan),
|
|
||||||
p16f_minus_inf);
|
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
template <>
|
||||||
|
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
|
||||||
|
plog<Packet8d>(const Packet8d& _x) {
|
||||||
|
return plog_double(_x);
|
||||||
|
}
|
||||||
|
|
||||||
|
F16_PACKET_FUNCTION(Packet16f, Packet16h, plog)
|
||||||
|
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, plog)
|
||||||
|
|
||||||
|
template <>
|
||||||
|
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
|
||||||
|
plog2<Packet16f>(const Packet16f& _x) {
|
||||||
|
return plog2_float(_x);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
|
||||||
|
plog2<Packet8d>(const Packet8d& _x) {
|
||||||
|
return plog2_double(_x);
|
||||||
|
}
|
||||||
|
|
||||||
|
F16_PACKET_FUNCTION(Packet16f, Packet16h, plog2)
|
||||||
|
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, plog2)
|
||||||
|
|
||||||
// Exponential function. Works by writing "x = m*log(2) + r" where
|
// Exponential function. Works by writing "x = m*log(2) + r" where
|
||||||
// "m = floor(x/log(2)+1/2)" and "r" is the remainder. The result is then
|
// "m = floor(x/log(2)+1/2)" and "r" is the remainder. The result is then
|
||||||
@@ -164,17 +98,17 @@ pexp<Packet16f>(const Packet16f& _x) {
|
|||||||
_EIGEN_DECLARE_CONST_Packet16f(nln2, -0.6931471805599453f);
|
_EIGEN_DECLARE_CONST_Packet16f(nln2, -0.6931471805599453f);
|
||||||
Packet16f r = _mm512_fmadd_ps(m, p16f_nln2, x);
|
Packet16f r = _mm512_fmadd_ps(m, p16f_nln2, x);
|
||||||
Packet16f r2 = pmul(r, r);
|
Packet16f r2 = pmul(r, r);
|
||||||
|
Packet16f r3 = pmul(r2, r);
|
||||||
|
|
||||||
// TODO(gonnet): Split into odd/even polynomials and try to exploit
|
// Evaluate the polynomial approximant,improved by instruction-level parallelism.
|
||||||
// instruction-level parallelism.
|
Packet16f y, y1, y2;
|
||||||
Packet16f y = p16f_cephes_exp_p0;
|
y = pmadd(p16f_cephes_exp_p0, r, p16f_cephes_exp_p1);
|
||||||
y = pmadd(y, r, p16f_cephes_exp_p1);
|
y1 = pmadd(p16f_cephes_exp_p3, r, p16f_cephes_exp_p4);
|
||||||
|
y2 = padd(r, p16f_1);
|
||||||
y = pmadd(y, r, p16f_cephes_exp_p2);
|
y = pmadd(y, r, p16f_cephes_exp_p2);
|
||||||
y = pmadd(y, r, p16f_cephes_exp_p3);
|
y1 = pmadd(y1, r, p16f_cephes_exp_p5);
|
||||||
y = pmadd(y, r, p16f_cephes_exp_p4);
|
y = pmadd(y, r3, y1);
|
||||||
y = pmadd(y, r, p16f_cephes_exp_p5);
|
y = pmadd(y, r2, y2);
|
||||||
y = pmadd(y, r2, r);
|
|
||||||
y = padd(y, p16f_1);
|
|
||||||
|
|
||||||
// Build emm0 = 2^m.
|
// Build emm0 = 2^m.
|
||||||
Packet16i emm0 = _mm512_cvttps_epi32(padd(m, p16f_127));
|
Packet16i emm0 = _mm512_cvttps_epi32(padd(m, p16f_127));
|
||||||
@@ -184,75 +118,40 @@ pexp<Packet16f>(const Packet16f& _x) {
|
|||||||
return pmax(pmul(y, _mm512_castsi512_ps(emm0)), _x);
|
return pmax(pmul(y, _mm512_castsi512_ps(emm0)), _x);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*template <>
|
template <>
|
||||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
|
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
|
||||||
pexp<Packet8d>(const Packet8d& _x) {
|
pexp<Packet8d>(const Packet8d& _x) {
|
||||||
Packet8d x = _x;
|
return pexp_double(_x);
|
||||||
|
}
|
||||||
|
|
||||||
_EIGEN_DECLARE_CONST_Packet8d(1, 1.0);
|
F16_PACKET_FUNCTION(Packet16f, Packet16h, pexp)
|
||||||
_EIGEN_DECLARE_CONST_Packet8d(2, 2.0);
|
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pexp)
|
||||||
|
|
||||||
_EIGEN_DECLARE_CONST_Packet8d(exp_hi, 709.437);
|
template <>
|
||||||
_EIGEN_DECLARE_CONST_Packet8d(exp_lo, -709.436139303);
|
EIGEN_STRONG_INLINE Packet16h pfrexp(const Packet16h& a, Packet16h& exponent) {
|
||||||
|
Packet16f fexponent;
|
||||||
|
const Packet16h out = float2half(pfrexp<Packet16f>(half2float(a), fexponent));
|
||||||
|
exponent = float2half(fexponent);
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
_EIGEN_DECLARE_CONST_Packet8d(cephes_LOG2EF, 1.4426950408889634073599);
|
template <>
|
||||||
|
EIGEN_STRONG_INLINE Packet16h pldexp(const Packet16h& a, const Packet16h& exponent) {
|
||||||
|
return float2half(pldexp<Packet16f>(half2float(a), half2float(exponent)));
|
||||||
|
}
|
||||||
|
|
||||||
_EIGEN_DECLARE_CONST_Packet8d(cephes_exp_p0, 1.26177193074810590878e-4);
|
template <>
|
||||||
_EIGEN_DECLARE_CONST_Packet8d(cephes_exp_p1, 3.02994407707441961300e-2);
|
EIGEN_STRONG_INLINE Packet16bf pfrexp(const Packet16bf& a, Packet16bf& exponent) {
|
||||||
_EIGEN_DECLARE_CONST_Packet8d(cephes_exp_p2, 9.99999999999999999910e-1);
|
Packet16f fexponent;
|
||||||
|
const Packet16bf out = F32ToBf16(pfrexp<Packet16f>(Bf16ToF32(a), fexponent));
|
||||||
_EIGEN_DECLARE_CONST_Packet8d(cephes_exp_q0, 3.00198505138664455042e-6);
|
exponent = F32ToBf16(fexponent);
|
||||||
_EIGEN_DECLARE_CONST_Packet8d(cephes_exp_q1, 2.52448340349684104192e-3);
|
return out;
|
||||||
_EIGEN_DECLARE_CONST_Packet8d(cephes_exp_q2, 2.27265548208155028766e-1);
|
}
|
||||||
_EIGEN_DECLARE_CONST_Packet8d(cephes_exp_q3, 2.00000000000000000009e0);
|
|
||||||
|
|
||||||
_EIGEN_DECLARE_CONST_Packet8d(cephes_exp_C1, 0.693145751953125);
|
|
||||||
_EIGEN_DECLARE_CONST_Packet8d(cephes_exp_C2, 1.42860682030941723212e-6);
|
|
||||||
|
|
||||||
// clamp x
|
|
||||||
x = pmax(pmin(x, p8d_exp_hi), p8d_exp_lo);
|
|
||||||
|
|
||||||
// Express exp(x) as exp(g + n*log(2)).
|
|
||||||
const Packet8d n =
|
|
||||||
_mm512_mul_round_pd(p8d_cephes_LOG2EF, x, _MM_FROUND_TO_NEAREST_INT);
|
|
||||||
|
|
||||||
// Get the remainder modulo log(2), i.e. the "g" described above. Subtract
|
|
||||||
// n*log(2) out in two steps, i.e. n*C1 + n*C2, C1+C2=log2 to get the last
|
|
||||||
// digits right.
|
|
||||||
const Packet8d nC1 = pmul(n, p8d_cephes_exp_C1);
|
|
||||||
const Packet8d nC2 = pmul(n, p8d_cephes_exp_C2);
|
|
||||||
x = psub(x, nC1);
|
|
||||||
x = psub(x, nC2);
|
|
||||||
|
|
||||||
const Packet8d x2 = pmul(x, x);
|
|
||||||
|
|
||||||
// Evaluate the numerator polynomial of the rational interpolant.
|
|
||||||
Packet8d px = p8d_cephes_exp_p0;
|
|
||||||
px = pmadd(px, x2, p8d_cephes_exp_p1);
|
|
||||||
px = pmadd(px, x2, p8d_cephes_exp_p2);
|
|
||||||
px = pmul(px, x);
|
|
||||||
|
|
||||||
// Evaluate the denominator polynomial of the rational interpolant.
|
|
||||||
Packet8d qx = p8d_cephes_exp_q0;
|
|
||||||
qx = pmadd(qx, x2, p8d_cephes_exp_q1);
|
|
||||||
qx = pmadd(qx, x2, p8d_cephes_exp_q2);
|
|
||||||
qx = pmadd(qx, x2, p8d_cephes_exp_q3);
|
|
||||||
|
|
||||||
// I don't really get this bit, copied from the SSE2 routines, so...
|
|
||||||
// TODO(gonnet): Figure out what is going on here, perhaps find a better
|
|
||||||
// rational interpolant?
|
|
||||||
x = _mm512_div_pd(px, psub(qx, px));
|
|
||||||
x = pmadd(p8d_2, x, p8d_1);
|
|
||||||
|
|
||||||
// Build e=2^n.
|
|
||||||
const Packet8d e = _mm512_castsi512_pd(_mm512_slli_epi64(
|
|
||||||
_mm512_add_epi64(_mm512_cvtpd_epi64(n), _mm512_set1_epi64(1023)), 52));
|
|
||||||
|
|
||||||
// Construct the result 2^n * exp(g) = e * x. The max is used to catch
|
|
||||||
// non-finite values in the input.
|
|
||||||
return pmax(pmul(x, e), _x);
|
|
||||||
}*/
|
|
||||||
|
|
||||||
|
template <>
|
||||||
|
EIGEN_STRONG_INLINE Packet16bf pldexp(const Packet16bf& a, const Packet16bf& exponent) {
|
||||||
|
return F32ToBf16(pldexp<Packet16f>(Bf16ToF32(a), Bf16ToF32(exponent)));
|
||||||
|
}
|
||||||
|
|
||||||
// Functions for sqrt.
|
// Functions for sqrt.
|
||||||
// The EIGEN_FAST_MATH version uses the _mm_rsqrt_ps approximation and one step
|
// The EIGEN_FAST_MATH version uses the _mm_rsqrt_ps approximation and one step
|
||||||
@@ -303,12 +202,16 @@ template <>
|
|||||||
EIGEN_STRONG_INLINE Packet16f psqrt<Packet16f>(const Packet16f& x) {
|
EIGEN_STRONG_INLINE Packet16f psqrt<Packet16f>(const Packet16f& x) {
|
||||||
return _mm512_sqrt_ps(x);
|
return _mm512_sqrt_ps(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
EIGEN_STRONG_INLINE Packet8d psqrt<Packet8d>(const Packet8d& x) {
|
EIGEN_STRONG_INLINE Packet8d psqrt<Packet8d>(const Packet8d& x) {
|
||||||
return _mm512_sqrt_pd(x);
|
return _mm512_sqrt_pd(x);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
F16_PACKET_FUNCTION(Packet16f, Packet16h, psqrt)
|
||||||
|
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, psqrt)
|
||||||
|
|
||||||
// prsqrt for float.
|
// prsqrt for float.
|
||||||
#if defined(EIGEN_VECTORIZE_AVX512ER)
|
#if defined(EIGEN_VECTORIZE_AVX512ER)
|
||||||
|
|
||||||
@@ -316,7 +219,6 @@ template <>
|
|||||||
EIGEN_STRONG_INLINE Packet16f prsqrt<Packet16f>(const Packet16f& x) {
|
EIGEN_STRONG_INLINE Packet16f prsqrt<Packet16f>(const Packet16f& x) {
|
||||||
return _mm512_rsqrt28_ps(x);
|
return _mm512_rsqrt28_ps(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif EIGEN_FAST_MATH
|
#elif EIGEN_FAST_MATH
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
@@ -348,7 +250,6 @@ prsqrt<Packet16f>(const Packet16f& _x) {
|
|||||||
// return rsqrt(+inf) = 0, rsqrt(x) = NaN if x < 0, and rsqrt(0) = +inf.
|
// return rsqrt(+inf) = 0, rsqrt(x) = NaN if x < 0, and rsqrt(0) = +inf.
|
||||||
return _mm512_mask_blend_ps(not_finite_pos_mask, y_newton, y_approx);
|
return _mm512_mask_blend_ps(not_finite_pos_mask, y_newton, y_approx);
|
||||||
}
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
@@ -356,9 +257,11 @@ EIGEN_STRONG_INLINE Packet16f prsqrt<Packet16f>(const Packet16f& x) {
|
|||||||
_EIGEN_DECLARE_CONST_Packet16f(one, 1.0f);
|
_EIGEN_DECLARE_CONST_Packet16f(one, 1.0f);
|
||||||
return _mm512_div_ps(p16f_one, _mm512_sqrt_ps(x));
|
return _mm512_div_ps(p16f_one, _mm512_sqrt_ps(x));
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
F16_PACKET_FUNCTION(Packet16f, Packet16h, prsqrt)
|
||||||
|
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, prsqrt)
|
||||||
|
|
||||||
// prsqrt for double.
|
// prsqrt for double.
|
||||||
#if EIGEN_FAST_MATH
|
#if EIGEN_FAST_MATH
|
||||||
template <>
|
template <>
|
||||||
@@ -406,19 +309,23 @@ EIGEN_STRONG_INLINE Packet8d prsqrt<Packet8d>(const Packet8d& x) {
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(EIGEN_VECTORIZE_AVX512DQ)
|
|
||||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||||
Packet16f plog1p<Packet16f>(const Packet16f& _x) {
|
Packet16f plog1p<Packet16f>(const Packet16f& _x) {
|
||||||
return generic_plog1p(_x);
|
return generic_plog1p(_x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
F16_PACKET_FUNCTION(Packet16f, Packet16h, plog1p)
|
||||||
|
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, plog1p)
|
||||||
|
|
||||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||||
Packet16f pexpm1<Packet16f>(const Packet16f& _x) {
|
Packet16f pexpm1<Packet16f>(const Packet16f& _x) {
|
||||||
return generic_expm1(_x);
|
return generic_expm1(_x);
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
F16_PACKET_FUNCTION(Packet16f, Packet16h, pexpm1)
|
||||||
|
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pexpm1)
|
||||||
|
|
||||||
|
#endif // EIGEN_HAS_AVX512_MATH
|
||||||
|
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
@@ -439,6 +346,14 @@ ptanh<Packet16f>(const Packet16f& _x) {
|
|||||||
return internal::generic_fast_tanh_float(_x);
|
return internal::generic_fast_tanh_float(_x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
F16_PACKET_FUNCTION(Packet16f, Packet16h, psin)
|
||||||
|
F16_PACKET_FUNCTION(Packet16f, Packet16h, pcos)
|
||||||
|
F16_PACKET_FUNCTION(Packet16f, Packet16h, ptanh)
|
||||||
|
|
||||||
|
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, psin)
|
||||||
|
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pcos)
|
||||||
|
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, ptanh)
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -14,6 +14,22 @@ namespace Eigen {
|
|||||||
|
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet16i pcast<Packet16f, Packet16i>(const Packet16f& a) {
|
||||||
|
return _mm512_cvttps_epi32(a);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet16f pcast<Packet16i, Packet16f>(const Packet16i& a) {
|
||||||
|
return _mm512_cvtepi32_ps(a);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet16i preinterpret<Packet16i, Packet16f>(const Packet16f& a) {
|
||||||
|
return _mm512_castps_si512(a);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet16f preinterpret<Packet16f, Packet16i>(const Packet16i& a) {
|
||||||
|
return _mm512_castsi512_ps(a);
|
||||||
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
struct type_casting_traits<half, float> {
|
struct type_casting_traits<half, float> {
|
||||||
enum {
|
enum {
|
||||||
@@ -40,6 +56,32 @@ template<> EIGEN_STRONG_INLINE Packet16h pcast<Packet16f, Packet16h>(const Packe
|
|||||||
return float2half(a);
|
return float2half(a);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct type_casting_traits<bfloat16, float> {
|
||||||
|
enum {
|
||||||
|
VectorizedCast = 1,
|
||||||
|
SrcCoeffRatio = 1,
|
||||||
|
TgtCoeffRatio = 1
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet16f pcast<Packet16bf, Packet16f>(const Packet16bf& a) {
|
||||||
|
return Bf16ToF32(a);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct type_casting_traits<float, bfloat16> {
|
||||||
|
enum {
|
||||||
|
VectorizedCast = 1,
|
||||||
|
SrcCoeffRatio = 1,
|
||||||
|
TgtCoeffRatio = 1
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet16bf pcast<Packet16f, Packet16bf>(const Packet16f& a) {
|
||||||
|
return F32ToBf16(a);
|
||||||
|
}
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
|||||||
@@ -15,8 +15,10 @@ namespace Eigen {
|
|||||||
|
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
static Packet4ui p4ui_CONJ_XOR = vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_MZERO);//{ 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
|
inline Packet4ui p4ui_CONJ_XOR() {
|
||||||
#ifdef __VSX__
|
return vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_MZERO);//{ 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
|
||||||
|
}
|
||||||
|
#ifdef EIGEN_VECTORIZE_VSX
|
||||||
#if defined(_BIG_ENDIAN)
|
#if defined(_BIG_ENDIAN)
|
||||||
static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2d_MZERO, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
|
static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2d_MZERO, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
|
||||||
static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_MZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
|
static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_MZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
|
||||||
@@ -29,8 +31,54 @@ static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2d_MZERO, (P
|
|||||||
//---------- float ----------
|
//---------- float ----------
|
||||||
struct Packet2cf
|
struct Packet2cf
|
||||||
{
|
{
|
||||||
EIGEN_STRONG_INLINE explicit Packet2cf() : v(p4f_ZERO) {}
|
EIGEN_STRONG_INLINE explicit Packet2cf() {}
|
||||||
EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
|
EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
|
||||||
|
|
||||||
|
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b)
|
||||||
|
{
|
||||||
|
Packet4f v1, v2;
|
||||||
|
|
||||||
|
// Permute and multiply the real parts of a and b
|
||||||
|
v1 = vec_perm(a.v, a.v, p16uc_PSET32_WODD);
|
||||||
|
// Get the imaginary parts of a
|
||||||
|
v2 = vec_perm(a.v, a.v, p16uc_PSET32_WEVEN);
|
||||||
|
// multiply a_re * b
|
||||||
|
v1 = vec_madd(v1, b.v, p4f_ZERO);
|
||||||
|
// multiply a_im * b and get the conjugate result
|
||||||
|
v2 = vec_madd(v2, b.v, p4f_ZERO);
|
||||||
|
v2 = reinterpret_cast<Packet4f>(pxor(v2, reinterpret_cast<Packet4f>(p4ui_CONJ_XOR())));
|
||||||
|
// permute back to a proper order
|
||||||
|
v2 = vec_perm(v2, v2, p16uc_COMPLEX32_REV);
|
||||||
|
|
||||||
|
return Packet2cf(padd<Packet4f>(v1, v2));
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_STRONG_INLINE Packet2cf& operator*=(const Packet2cf& b) {
|
||||||
|
v = pmul(Packet2cf(*this), b).v;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE Packet2cf operator*(const Packet2cf& b) const {
|
||||||
|
return Packet2cf(*this) *= b;
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_STRONG_INLINE Packet2cf& operator+=(const Packet2cf& b) {
|
||||||
|
v = padd(v, b.v);
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE Packet2cf operator+(const Packet2cf& b) const {
|
||||||
|
return Packet2cf(*this) += b;
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE Packet2cf& operator-=(const Packet2cf& b) {
|
||||||
|
v = psub(v, b.v);
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE Packet2cf operator-(const Packet2cf& b) const {
|
||||||
|
return Packet2cf(*this) -= b;
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE Packet2cf operator-(void) const {
|
||||||
|
return Packet2cf(-v);
|
||||||
|
}
|
||||||
|
|
||||||
Packet4f v;
|
Packet4f v;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -38,6 +86,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
|||||||
{
|
{
|
||||||
typedef Packet2cf type;
|
typedef Packet2cf type;
|
||||||
typedef Packet2cf half;
|
typedef Packet2cf half;
|
||||||
|
typedef Packet4f as_real;
|
||||||
enum {
|
enum {
|
||||||
Vectorizable = 1,
|
Vectorizable = 1,
|
||||||
AlignedOnScalar = 1,
|
AlignedOnScalar = 1,
|
||||||
@@ -53,14 +102,15 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
|||||||
HasAbs2 = 0,
|
HasAbs2 = 0,
|
||||||
HasMin = 0,
|
HasMin = 0,
|
||||||
HasMax = 0,
|
HasMax = 0,
|
||||||
#ifdef __VSX__
|
HasSqrt = 1,
|
||||||
|
#ifdef EIGEN_VECTORIZE_VSX
|
||||||
HasBlend = 1,
|
HasBlend = 1,
|
||||||
#endif
|
#endif
|
||||||
HasSetLinear = 0
|
HasSetLinear = 0
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet2cf half; };
|
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet2cf half; typedef Packet4f as_real; };
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
|
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
|
||||||
{
|
{
|
||||||
@@ -80,6 +130,25 @@ template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<
|
|||||||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { pstore((float*)to, from.v); }
|
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { pstore((float*)to, from.v); }
|
||||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { pstoreu((float*)to, from.v); }
|
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { pstoreu((float*)to, from.v); }
|
||||||
|
|
||||||
|
EIGEN_STRONG_INLINE Packet2cf pload2(const std::complex<float>& from0, const std::complex<float>& from1)
|
||||||
|
{
|
||||||
|
Packet4f res0, res1;
|
||||||
|
#ifdef EIGEN_VECTORIZE_VSX
|
||||||
|
__asm__ ("lxsdx %x0,%y1" : "=wa" (res0) : "Z" (from0));
|
||||||
|
__asm__ ("lxsdx %x0,%y1" : "=wa" (res1) : "Z" (from1));
|
||||||
|
#ifdef _BIG_ENDIAN
|
||||||
|
__asm__ ("xxpermdi %x0, %x1, %x2, 0" : "=wa" (res0) : "wa" (res0), "wa" (res1));
|
||||||
|
#else
|
||||||
|
__asm__ ("xxpermdi %x0, %x2, %x1, 0" : "=wa" (res0) : "wa" (res0), "wa" (res1));
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
*reinterpret_cast<std::complex<float> *>(&res0) = from0;
|
||||||
|
*reinterpret_cast<std::complex<float> *>(&res1) = from1;
|
||||||
|
res0 = vec_perm(res0, res1, p16uc_TRANSPOSE64_HI);
|
||||||
|
#endif
|
||||||
|
return Packet2cf(res0);
|
||||||
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
|
template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
|
||||||
{
|
{
|
||||||
EIGEN_ALIGN16 std::complex<float> af[2];
|
EIGEN_ALIGN16 std::complex<float> af[2];
|
||||||
@@ -98,26 +167,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf
|
|||||||
template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(a.v + b.v); }
|
template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(a.v + b.v); }
|
||||||
template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(a.v - b.v); }
|
template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(a.v - b.v); }
|
||||||
template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(a.v)); }
|
template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(a.v)); }
|
||||||
template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) { return Packet2cf(pxor<Packet4f>(a.v, reinterpret_cast<Packet4f>(p4ui_CONJ_XOR))); }
|
template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) { return Packet2cf(pxor<Packet4f>(a.v, reinterpret_cast<Packet4f>(p4ui_CONJ_XOR()))); }
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
|
||||||
{
|
|
||||||
Packet4f v1, v2;
|
|
||||||
|
|
||||||
// Permute and multiply the real parts of a and b
|
|
||||||
v1 = vec_perm(a.v, a.v, p16uc_PSET32_WODD);
|
|
||||||
// Get the imaginary parts of a
|
|
||||||
v2 = vec_perm(a.v, a.v, p16uc_PSET32_WEVEN);
|
|
||||||
// multiply a_re * b
|
|
||||||
v1 = vec_madd(v1, b.v, p4f_ZERO);
|
|
||||||
// multiply a_im * b and get the conjugate result
|
|
||||||
v2 = vec_madd(v2, b.v, p4f_ZERO);
|
|
||||||
v2 = reinterpret_cast<Packet4f>(pxor(v2, reinterpret_cast<Packet4f>(p4ui_CONJ_XOR)));
|
|
||||||
// permute back to a proper order
|
|
||||||
v2 = vec_perm(v2, v2, p16uc_COMPLEX32_REV);
|
|
||||||
|
|
||||||
return Packet2cf(padd<Packet4f>(v1, v2));
|
|
||||||
}
|
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pand<Packet4f>(a.v, b.v)); }
|
template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pand<Packet4f>(a.v, b.v)); }
|
||||||
template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(por<Packet4f>(a.v, b.v)); }
|
template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(por<Packet4f>(a.v, b.v)); }
|
||||||
@@ -149,22 +199,6 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packe
|
|||||||
return pfirst<Packet2cf>(Packet2cf(b));
|
return pfirst<Packet2cf>(Packet2cf(b));
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
|
|
||||||
{
|
|
||||||
Packet4f b1, b2;
|
|
||||||
#ifdef _BIG_ENDIAN
|
|
||||||
b1 = vec_sld(vecs[0].v, vecs[1].v, 8);
|
|
||||||
b2 = vec_sld(vecs[1].v, vecs[0].v, 8);
|
|
||||||
#else
|
|
||||||
b1 = vec_sld(vecs[1].v, vecs[0].v, 8);
|
|
||||||
b2 = vec_sld(vecs[0].v, vecs[1].v, 8);
|
|
||||||
#endif
|
|
||||||
b2 = vec_sld(b2, b2, 8);
|
|
||||||
b2 = padd<Packet4f>(b1, b2);
|
|
||||||
|
|
||||||
return Packet2cf(b2);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
|
template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
|
||||||
{
|
{
|
||||||
Packet4f b;
|
Packet4f b;
|
||||||
@@ -175,63 +209,11 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const P
|
|||||||
return pfirst<Packet2cf>(prod);
|
return pfirst<Packet2cf>(prod);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int Offset>
|
|
||||||
struct palign_impl<Offset,Packet2cf>
|
|
||||||
{
|
|
||||||
static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
|
|
||||||
{
|
|
||||||
if (Offset==1)
|
|
||||||
{
|
|
||||||
#ifdef _BIG_ENDIAN
|
|
||||||
first.v = vec_sld(first.v, second.v, 8);
|
|
||||||
#else
|
|
||||||
first.v = vec_sld(second.v, first.v, 8);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
|
|
||||||
{
|
|
||||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
|
|
||||||
{ return padd(pmul(x,y),c); }
|
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
|
||||||
{
|
|
||||||
return internal::pmul(a, pconj(b));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
|
|
||||||
{
|
|
||||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
|
|
||||||
{ return padd(pmul(x,y),c); }
|
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
|
||||||
{
|
|
||||||
return internal::pmul(pconj(a), b);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
|
|
||||||
{
|
|
||||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
|
|
||||||
{ return padd(pmul(x,y),c); }
|
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
|
||||||
{
|
|
||||||
return pconj(internal::pmul(a, b));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
|
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
||||||
{
|
{
|
||||||
// TODO optimize it for AltiVec
|
return pdiv_complex(a, b);
|
||||||
Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a, b);
|
|
||||||
Packet4f s = pmul<Packet4f>(b.v, b.v);
|
|
||||||
return Packet2cf(pdiv(res.v, padd<Packet4f>(s, vec_perm(s, s, p16uc_COMPLEX32_REV))));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& x)
|
template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& x)
|
||||||
@@ -251,77 +233,27 @@ template<> EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf& a, const Packe
|
|||||||
return Packet2cf(vec_and(eq, vec_perm(eq, eq, p16uc_COMPLEX32_REV)));
|
return Packet2cf(vec_and(eq, vec_perm(eq, eq, p16uc_COMPLEX32_REV)));
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __VSX__
|
#ifdef EIGEN_VECTORIZE_VSX
|
||||||
template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) {
|
template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) {
|
||||||
Packet2cf result;
|
Packet2cf result;
|
||||||
result.v = reinterpret_cast<Packet4f>(pblend<Packet2d>(ifPacket, reinterpret_cast<Packet2d>(thenPacket.v), reinterpret_cast<Packet2d>(elsePacket.v)));
|
result.v = reinterpret_cast<Packet4f>(pblend<Packet2d>(ifPacket, reinterpret_cast<Packet2d>(thenPacket.v), reinterpret_cast<Packet2d>(elsePacket.v)));
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet2cf psqrt<Packet2cf>(const Packet2cf& a)
|
||||||
|
{
|
||||||
|
return psqrt_complex<Packet2cf>(a);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
//---------- double ----------
|
//---------- double ----------
|
||||||
#ifdef __VSX__
|
#ifdef EIGEN_VECTORIZE_VSX
|
||||||
struct Packet1cd
|
struct Packet1cd
|
||||||
{
|
{
|
||||||
EIGEN_STRONG_INLINE Packet1cd() {}
|
EIGEN_STRONG_INLINE Packet1cd() {}
|
||||||
EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {}
|
EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {}
|
||||||
Packet2d v;
|
|
||||||
};
|
|
||||||
|
|
||||||
template<> struct packet_traits<std::complex<double> > : default_packet_traits
|
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b)
|
||||||
{
|
|
||||||
typedef Packet1cd type;
|
|
||||||
typedef Packet1cd half;
|
|
||||||
enum {
|
|
||||||
Vectorizable = 1,
|
|
||||||
AlignedOnScalar = 0,
|
|
||||||
size = 1,
|
|
||||||
HasHalfPacket = 0,
|
|
||||||
|
|
||||||
HasAdd = 1,
|
|
||||||
HasSub = 1,
|
|
||||||
HasMul = 1,
|
|
||||||
HasDiv = 1,
|
|
||||||
HasNegate = 1,
|
|
||||||
HasAbs = 0,
|
|
||||||
HasAbs2 = 0,
|
|
||||||
HasMin = 0,
|
|
||||||
HasMax = 0,
|
|
||||||
HasSetLinear = 0
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet1cd half; };
|
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from) { return Packet1cd(pload<Packet2d>((const double*)from)); }
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { return Packet1cd(ploadu<Packet2d>((const double*)from)); }
|
|
||||||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { pstore((double*)to, from.v); }
|
|
||||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { pstoreu((double*)to, from.v); }
|
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
|
|
||||||
{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }
|
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride)
|
|
||||||
{
|
|
||||||
EIGEN_ALIGN16 std::complex<double> af[2];
|
|
||||||
af[0] = from[0*stride];
|
|
||||||
af[1] = from[1*stride];
|
|
||||||
return pload<Packet1cd>(af);
|
|
||||||
}
|
|
||||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index stride)
|
|
||||||
{
|
|
||||||
EIGEN_ALIGN16 std::complex<double> af[2];
|
|
||||||
pstore<std::complex<double> >(af, from);
|
|
||||||
to[0*stride] = af[0];
|
|
||||||
to[1*stride] = af[1];
|
|
||||||
}
|
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v + b.v); }
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v - b.v); }
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); }
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd(pxor(a.v, reinterpret_cast<Packet2d>(p2ul_CONJ_XOR2))); }
|
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
|
||||||
{
|
{
|
||||||
Packet2d a_re, a_im, v1, v2;
|
Packet2d a_re, a_im, v1, v2;
|
||||||
|
|
||||||
@@ -339,6 +271,84 @@ template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, con
|
|||||||
return Packet1cd(padd<Packet2d>(v1, v2));
|
return Packet1cd(padd<Packet2d>(v1, v2));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EIGEN_STRONG_INLINE Packet1cd& operator*=(const Packet1cd& b) {
|
||||||
|
v = pmul(Packet1cd(*this), b).v;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE Packet1cd operator*(const Packet1cd& b) const {
|
||||||
|
return Packet1cd(*this) *= b;
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_STRONG_INLINE Packet1cd& operator+=(const Packet1cd& b) {
|
||||||
|
v = padd(v, b.v);
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE Packet1cd operator+(const Packet1cd& b) const {
|
||||||
|
return Packet1cd(*this) += b;
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE Packet1cd& operator-=(const Packet1cd& b) {
|
||||||
|
v = psub(v, b.v);
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE Packet1cd operator-(const Packet1cd& b) const {
|
||||||
|
return Packet1cd(*this) -= b;
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE Packet1cd operator-(void) const {
|
||||||
|
return Packet1cd(-v);
|
||||||
|
}
|
||||||
|
|
||||||
|
Packet2d v;
|
||||||
|
};
|
||||||
|
|
||||||
|
template<> struct packet_traits<std::complex<double> > : default_packet_traits
|
||||||
|
{
|
||||||
|
typedef Packet1cd type;
|
||||||
|
typedef Packet1cd half;
|
||||||
|
typedef Packet2d as_real;
|
||||||
|
enum {
|
||||||
|
Vectorizable = 1,
|
||||||
|
AlignedOnScalar = 0,
|
||||||
|
size = 1,
|
||||||
|
HasHalfPacket = 0,
|
||||||
|
|
||||||
|
HasAdd = 1,
|
||||||
|
HasSub = 1,
|
||||||
|
HasMul = 1,
|
||||||
|
HasDiv = 1,
|
||||||
|
HasNegate = 1,
|
||||||
|
HasAbs = 0,
|
||||||
|
HasAbs2 = 0,
|
||||||
|
HasMin = 0,
|
||||||
|
HasMax = 0,
|
||||||
|
HasSqrt = 1,
|
||||||
|
HasSetLinear = 0
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet1cd half; typedef Packet2d as_real; };
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from) { return Packet1cd(pload<Packet2d>((const double*)from)); }
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { return Packet1cd(ploadu<Packet2d>((const double*)from)); }
|
||||||
|
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { pstore((double*)to, from.v); }
|
||||||
|
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { pstoreu((double*)to, from.v); }
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
|
||||||
|
{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }
|
||||||
|
|
||||||
|
template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index)
|
||||||
|
{
|
||||||
|
return pload<Packet1cd>(from);
|
||||||
|
}
|
||||||
|
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index)
|
||||||
|
{
|
||||||
|
pstore<std::complex<double> >(to, from);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v + b.v); }
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v - b.v); }
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); }
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd(pxor(a.v, reinterpret_cast<Packet2d>(p2ul_CONJ_XOR2))); }
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pand(a.v,b.v)); }
|
template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pand(a.v,b.v)); }
|
||||||
template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(por(a.v,b.v)); }
|
template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(por(a.v,b.v)); }
|
||||||
template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pxor(a.v,b.v)); }
|
template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pxor(a.v,b.v)); }
|
||||||
@@ -359,61 +369,14 @@ template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Pac
|
|||||||
template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
|
template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
|
template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
|
||||||
template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs) { return vecs[0]; }
|
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
|
template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
|
||||||
|
|
||||||
template<int Offset>
|
|
||||||
struct palign_impl<Offset,Packet1cd>
|
|
||||||
{
|
|
||||||
static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
|
|
||||||
{
|
|
||||||
// FIXME is it sure we never have to align a Packet1cd?
|
|
||||||
// Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
|
|
||||||
{
|
|
||||||
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
|
|
||||||
{ return padd(pmul(x,y),c); }
|
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
|
|
||||||
{
|
|
||||||
return internal::pmul(a, pconj(b));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
|
|
||||||
{
|
|
||||||
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
|
|
||||||
{ return padd(pmul(x,y),c); }
|
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
|
|
||||||
{
|
|
||||||
return internal::pmul(pconj(a), b);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
|
|
||||||
{
|
|
||||||
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
|
|
||||||
{ return padd(pmul(x,y),c); }
|
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
|
|
||||||
{
|
|
||||||
return pconj(internal::pmul(a, b));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
|
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
||||||
{
|
{
|
||||||
// TODO optimize it for AltiVec
|
return pdiv_complex(a, b);
|
||||||
Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
|
|
||||||
Packet2d s = pmul<Packet2d>(b.v, b.v);
|
|
||||||
return Packet1cd(pdiv(res.v, padd<Packet2d>(s, vec_perm(s, s, p16uc_REVERSE64))));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
|
EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
|
||||||
@@ -439,7 +402,12 @@ template<> EIGEN_STRONG_INLINE Packet1cd pcmp_eq(const Packet1cd& a, const Packe
|
|||||||
return Packet1cd(vec_and(eq, eq_swapped));
|
return Packet1cd(vec_and(eq, eq_swapped));
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // __VSX__
|
template<> EIGEN_STRONG_INLINE Packet1cd psqrt<Packet1cd>(const Packet1cd& a)
|
||||||
|
{
|
||||||
|
return psqrt_complex<Packet1cd>(a);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // EIGEN_VECTORIZE_VSX
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
|||||||
@@ -40,16 +40,14 @@ Packet4f pcos<Packet4f>(const Packet4f& _x)
|
|||||||
return pcos_float(_x);
|
return pcos_float(_x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef EIGEN_VECTORIZE_VSX
|
||||||
#ifndef EIGEN_COMP_CLANG
|
#ifndef EIGEN_COMP_CLANG
|
||||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||||
Packet4f prsqrt<Packet4f>(const Packet4f& x)
|
Packet4f prsqrt<Packet4f>(const Packet4f& x)
|
||||||
{
|
{
|
||||||
return vec_rsqrt(x);
|
return vec_rsqrt(x);
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef __VSX__
|
|
||||||
#ifndef EIGEN_COMP_CLANG
|
|
||||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||||
Packet2d prsqrt<Packet2d>(const Packet2d& x)
|
Packet2d prsqrt<Packet2d>(const Packet2d& x)
|
||||||
{
|
{
|
||||||
@@ -57,7 +55,7 @@ Packet2d prsqrt<Packet2d>(const Packet2d& x)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||||
Packet4f psqrt<Packet4f>(const Packet4f& x)
|
Packet4f psqrt<Packet4f>(const Packet4f& x)
|
||||||
{
|
{
|
||||||
return vec_sqrt(x);
|
return vec_sqrt(x);
|
||||||
@@ -69,12 +67,43 @@ Packet2d psqrt<Packet2d>(const Packet2d& x)
|
|||||||
return vec_sqrt(x);
|
return vec_sqrt(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
#if !EIGEN_COMP_CLANG
|
||||||
|
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||||
|
Packet4f prsqrt<Packet4f>(const Packet4f& x)
|
||||||
|
{
|
||||||
|
return pset1<Packet4f>(1.0f) / psqrt<Packet4f>(x);
|
||||||
|
// vec_rsqrt returns different results from the generic version
|
||||||
|
// return vec_rsqrt(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||||
|
Packet2d prsqrt<Packet2d>(const Packet2d& x)
|
||||||
|
{
|
||||||
|
return pset1<Packet2d>(1.0) / psqrt<Packet2d>(x);
|
||||||
|
// vec_rsqrt returns different results from the generic version
|
||||||
|
// return vec_rsqrt(x);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||||
Packet2d pexp<Packet2d>(const Packet2d& _x)
|
Packet2d pexp<Packet2d>(const Packet2d& _x)
|
||||||
{
|
{
|
||||||
return pexp_double(_x);
|
return pexp_double(_x);
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet8bf psqrt<Packet8bf> (const Packet8bf& a){
|
||||||
|
BF16_TO_F32_UNARY_OP_WRAPPER(vec_sqrt, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet8bf prsqrt<Packet8bf> (const Packet8bf& a){
|
||||||
|
BF16_TO_F32_UNARY_OP_WRAPPER(prsqrt<Packet4f>, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet8bf pexp<Packet8bf> (const Packet8bf& a){
|
||||||
|
BF16_TO_F32_UNARY_OP_WRAPPER(pexp_float, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // EIGEN_VECTORIZE_VSX
|
||||||
|
|
||||||
// Hyperbolic Tangent function.
|
// Hyperbolic Tangent function.
|
||||||
template <>
|
template <>
|
||||||
|
|||||||
2776
Eigen/src/Core/arch/AltiVec/MatrixProduct.h
Normal file
2776
Eigen/src/Core/arch/AltiVec/MatrixProduct.h
Normal file
File diff suppressed because it is too large
Load Diff
159
Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h
Normal file
159
Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h
Normal file
@@ -0,0 +1,159 @@
|
|||||||
|
//#define EIGEN_POWER_USE_PREFETCH // Use prefetching in gemm routines
|
||||||
|
#ifdef EIGEN_POWER_USE_PREFETCH
|
||||||
|
#define EIGEN_POWER_PREFETCH(p) prefetch(p)
|
||||||
|
#else
|
||||||
|
#define EIGEN_POWER_PREFETCH(p)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace Eigen {
|
||||||
|
|
||||||
|
namespace internal {
|
||||||
|
|
||||||
|
template<typename Scalar, typename Packet, typename DataMapper, typename Index, const Index accRows, const Index accCols>
|
||||||
|
EIGEN_ALWAYS_INLINE void gemm_extra_row(
|
||||||
|
const DataMapper& res,
|
||||||
|
const Scalar* lhs_base,
|
||||||
|
const Scalar* rhs_base,
|
||||||
|
Index depth,
|
||||||
|
Index strideA,
|
||||||
|
Index offsetA,
|
||||||
|
Index row,
|
||||||
|
Index col,
|
||||||
|
Index rows,
|
||||||
|
Index cols,
|
||||||
|
Index remaining_rows,
|
||||||
|
const Packet& pAlpha,
|
||||||
|
const Packet& pMask);
|
||||||
|
|
||||||
|
template<typename Scalar, typename Packet, typename DataMapper, typename Index, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
|
||||||
|
EIGEN_STRONG_INLINE void gemm_extra_cols(
|
||||||
|
const DataMapper& res,
|
||||||
|
const Scalar* blockA,
|
||||||
|
const Scalar* blockB,
|
||||||
|
Index depth,
|
||||||
|
Index strideA,
|
||||||
|
Index offsetA,
|
||||||
|
Index strideB,
|
||||||
|
Index offsetB,
|
||||||
|
Index col,
|
||||||
|
Index rows,
|
||||||
|
Index cols,
|
||||||
|
Index remaining_rows,
|
||||||
|
const Packet& pAlpha,
|
||||||
|
const Packet& pMask);
|
||||||
|
|
||||||
|
template<typename Packet>
|
||||||
|
EIGEN_ALWAYS_INLINE Packet bmask(const int remaining_rows);
|
||||||
|
|
||||||
|
template<typename Scalar, typename Packet, typename Packetc, typename DataMapper, typename Index, const Index accRows, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
|
||||||
|
EIGEN_ALWAYS_INLINE void gemm_complex_extra_row(
|
||||||
|
const DataMapper& res,
|
||||||
|
const Scalar* lhs_base,
|
||||||
|
const Scalar* rhs_base,
|
||||||
|
Index depth,
|
||||||
|
Index strideA,
|
||||||
|
Index offsetA,
|
||||||
|
Index strideB,
|
||||||
|
Index row,
|
||||||
|
Index col,
|
||||||
|
Index rows,
|
||||||
|
Index cols,
|
||||||
|
Index remaining_rows,
|
||||||
|
const Packet& pAlphaReal,
|
||||||
|
const Packet& pAlphaImag,
|
||||||
|
const Packet& pMask);
|
||||||
|
|
||||||
|
template<typename Scalar, typename Packet, typename Packetc, typename DataMapper, typename Index, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
|
||||||
|
EIGEN_STRONG_INLINE void gemm_complex_extra_cols(
|
||||||
|
const DataMapper& res,
|
||||||
|
const Scalar* blockA,
|
||||||
|
const Scalar* blockB,
|
||||||
|
Index depth,
|
||||||
|
Index strideA,
|
||||||
|
Index offsetA,
|
||||||
|
Index strideB,
|
||||||
|
Index offsetB,
|
||||||
|
Index col,
|
||||||
|
Index rows,
|
||||||
|
Index cols,
|
||||||
|
Index remaining_rows,
|
||||||
|
const Packet& pAlphaReal,
|
||||||
|
const Packet& pAlphaImag,
|
||||||
|
const Packet& pMask);
|
||||||
|
|
||||||
|
template<typename Scalar, typename Packet>
|
||||||
|
EIGEN_ALWAYS_INLINE Packet ploadLhs(const Scalar* lhs);
|
||||||
|
|
||||||
|
template<typename DataMapper, typename Packet, typename Index, const Index accCols, int StorageOrder, bool Complex, int N>
|
||||||
|
EIGEN_ALWAYS_INLINE void bload(PacketBlock<Packet,N>& acc, const DataMapper& res, Index row, Index col);
|
||||||
|
|
||||||
|
template<typename Packet, int N>
|
||||||
|
EIGEN_ALWAYS_INLINE void bscale(PacketBlock<Packet,N>& acc, PacketBlock<Packet,N>& accZ, const Packet& pAlpha);
|
||||||
|
|
||||||
|
template<typename Packet, int N>
|
||||||
|
EIGEN_ALWAYS_INLINE void bscalec(PacketBlock<Packet,N>& aReal, PacketBlock<Packet,N>& aImag, const Packet& bReal, const Packet& bImag, PacketBlock<Packet,N>& cReal, PacketBlock<Packet,N>& cImag);
|
||||||
|
|
||||||
|
// Grab two decouples real/imaginary PacketBlocks and return two coupled (real/imaginary pairs) PacketBlocks.
|
||||||
|
template<typename Packet, typename Packetc, int N>
|
||||||
|
EIGEN_ALWAYS_INLINE void bcouple_common(PacketBlock<Packet,N>& taccReal, PacketBlock<Packet,N>& taccImag, PacketBlock<Packetc, N>& acc1, PacketBlock<Packetc, N>& acc2)
|
||||||
|
{
|
||||||
|
acc1.packet[0].v = vec_mergeh(taccReal.packet[0], taccImag.packet[0]);
|
||||||
|
if (N > 1) {
|
||||||
|
acc1.packet[1].v = vec_mergeh(taccReal.packet[1], taccImag.packet[1]);
|
||||||
|
}
|
||||||
|
if (N > 2) {
|
||||||
|
acc1.packet[2].v = vec_mergeh(taccReal.packet[2], taccImag.packet[2]);
|
||||||
|
}
|
||||||
|
if (N > 3) {
|
||||||
|
acc1.packet[3].v = vec_mergeh(taccReal.packet[3], taccImag.packet[3]);
|
||||||
|
}
|
||||||
|
|
||||||
|
acc2.packet[0].v = vec_mergel(taccReal.packet[0], taccImag.packet[0]);
|
||||||
|
if (N > 1) {
|
||||||
|
acc2.packet[1].v = vec_mergel(taccReal.packet[1], taccImag.packet[1]);
|
||||||
|
}
|
||||||
|
if (N > 2) {
|
||||||
|
acc2.packet[2].v = vec_mergel(taccReal.packet[2], taccImag.packet[2]);
|
||||||
|
}
|
||||||
|
if (N > 3) {
|
||||||
|
acc2.packet[3].v = vec_mergel(taccReal.packet[3], taccImag.packet[3]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Packet, typename Packetc, int N>
|
||||||
|
EIGEN_ALWAYS_INLINE void bcouple(PacketBlock<Packet,N>& taccReal, PacketBlock<Packet,N>& taccImag, PacketBlock<Packetc,N*2>& tRes, PacketBlock<Packetc, N>& acc1, PacketBlock<Packetc, N>& acc2)
|
||||||
|
{
|
||||||
|
bcouple_common<Packet, Packetc, N>(taccReal, taccImag, acc1, acc2);
|
||||||
|
|
||||||
|
acc1.packet[0] = padd<Packetc>(tRes.packet[0], acc1.packet[0]);
|
||||||
|
if (N > 1) {
|
||||||
|
acc1.packet[1] = padd<Packetc>(tRes.packet[1], acc1.packet[1]);
|
||||||
|
}
|
||||||
|
if (N > 2) {
|
||||||
|
acc1.packet[2] = padd<Packetc>(tRes.packet[2], acc1.packet[2]);
|
||||||
|
}
|
||||||
|
if (N > 3) {
|
||||||
|
acc1.packet[3] = padd<Packetc>(tRes.packet[3], acc1.packet[3]);
|
||||||
|
}
|
||||||
|
|
||||||
|
acc2.packet[0] = padd<Packetc>(tRes.packet[0+N], acc2.packet[0]);
|
||||||
|
if (N > 1) {
|
||||||
|
acc2.packet[1] = padd<Packetc>(tRes.packet[1+N], acc2.packet[1]);
|
||||||
|
}
|
||||||
|
if (N > 2) {
|
||||||
|
acc2.packet[2] = padd<Packetc>(tRes.packet[2+N], acc2.packet[2]);
|
||||||
|
}
|
||||||
|
if (N > 3) {
|
||||||
|
acc2.packet[3] = padd<Packetc>(tRes.packet[3+N], acc2.packet[3]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// This is necessary because ploadRhs for double returns a pair of vectors when MMA is enabled.
|
||||||
|
template<typename Scalar, typename Packet>
|
||||||
|
EIGEN_ALWAYS_INLINE Packet ploadRhs(const Scalar* rhs)
|
||||||
|
{
|
||||||
|
return ploadu<Packet>(rhs);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // end namespace internal
|
||||||
|
} // end namespace Eigen
|
||||||
627
Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h
Normal file
627
Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h
Normal file
@@ -0,0 +1,627 @@
|
|||||||
|
// This file is part of Eigen, a lightweight C++ template library
|
||||||
|
// for linear algebra.
|
||||||
|
//
|
||||||
|
// Copyright (C) 2020 Everton Constantino (everton.constantino@ibm.com)
|
||||||
|
// Copyright (C) 2021 Chip Kerchner (chip.kerchner@ibm.com)
|
||||||
|
//
|
||||||
|
// This Source Code Form is subject to the terms of the Mozilla
|
||||||
|
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||||
|
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
|
||||||
|
#ifndef EIGEN_MATRIX_PRODUCT_MMA_ALTIVEC_H
|
||||||
|
#define EIGEN_MATRIX_PRODUCT_MMA_ALTIVEC_H
|
||||||
|
|
||||||
|
// If using dynamic dispatch, set the CPU target.
|
||||||
|
#if defined(EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH)
|
||||||
|
#pragma GCC push_options
|
||||||
|
#pragma GCC target("cpu=power10,htm")
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __has_builtin
|
||||||
|
#if !__has_builtin(__builtin_vsx_assemble_pair)
|
||||||
|
#define __builtin_vsx_assemble_pair __builtin_mma_assemble_pair
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace Eigen {
|
||||||
|
|
||||||
|
namespace internal {
|
||||||
|
|
||||||
|
template<typename Scalar, typename Packet>
|
||||||
|
EIGEN_ALWAYS_INLINE void bsetzeroMMA(__vector_quad* acc)
|
||||||
|
{
|
||||||
|
__builtin_mma_xxsetaccz(acc);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename DataMapper, typename Index, typename Packet, const Index accCols>
|
||||||
|
EIGEN_ALWAYS_INLINE void storeAccumulator(Index i, const DataMapper& data, const Packet& alpha, __vector_quad* acc)
|
||||||
|
{
|
||||||
|
PacketBlock<Packet, 4> result;
|
||||||
|
__builtin_mma_disassemble_acc(&result.packet, acc);
|
||||||
|
|
||||||
|
PacketBlock<Packet, 4> tRes;
|
||||||
|
bload<DataMapper, Packet, Index, accCols, ColMajor, false, 4>(tRes, data, i, 0);
|
||||||
|
|
||||||
|
bscale<Packet, 4>(tRes, result, alpha);
|
||||||
|
|
||||||
|
data.template storePacketBlock<Packet, 4>(i, 0, tRes);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename DataMapper, typename Index, typename Packet, typename Packetc, const Index accColsC>
|
||||||
|
EIGEN_ALWAYS_INLINE void storeComplexAccumulator(Index i, const DataMapper& data, const Packet& alphaReal, const Packet& alphaImag, __vector_quad* accReal, __vector_quad* accImag)
|
||||||
|
{
|
||||||
|
PacketBlock<Packet, 4> resultReal, resultImag;
|
||||||
|
__builtin_mma_disassemble_acc(&resultReal.packet, accReal);
|
||||||
|
__builtin_mma_disassemble_acc(&resultImag.packet, accImag);
|
||||||
|
|
||||||
|
PacketBlock<Packetc, 8> tRes;
|
||||||
|
bload<DataMapper, Packetc, Index, accColsC, ColMajor, true, 4>(tRes, data, i, 0);
|
||||||
|
|
||||||
|
PacketBlock<Packet,4> taccReal, taccImag;
|
||||||
|
bscalec<Packet,4>(resultReal, resultImag, alphaReal, alphaImag, taccReal, taccImag);
|
||||||
|
|
||||||
|
PacketBlock<Packetc, 4> acc1, acc2;
|
||||||
|
bcouple<Packet, Packetc, 4>(taccReal, taccImag, tRes, acc1, acc2);
|
||||||
|
|
||||||
|
data.template storePacketBlock<Packetc, 4>(i, 0, acc1);
|
||||||
|
data.template storePacketBlock<Packetc, 4>(i + accColsC, 0, acc2);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Defaults to float32, since Eigen still supports C++03 we can't use default template arguments
|
||||||
|
template<typename LhsPacket, typename RhsPacket, bool NegativeAccumulate>
|
||||||
|
EIGEN_ALWAYS_INLINE void pgerMMA(__vector_quad* acc, const RhsPacket& a, const LhsPacket& b)
|
||||||
|
{
|
||||||
|
if(NegativeAccumulate)
|
||||||
|
{
|
||||||
|
__builtin_mma_xvf32gernp(acc, (__vector unsigned char)a, (__vector unsigned char)b);
|
||||||
|
} else {
|
||||||
|
__builtin_mma_xvf32gerpp(acc, (__vector unsigned char)a, (__vector unsigned char)b);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename LhsPacket, typename RhsPacket, bool NegativeAccumulate>
|
||||||
|
EIGEN_ALWAYS_INLINE void pgerMMA(__vector_quad* acc, const PacketBlock<Packet2d,2>& a, const Packet2d& b)
|
||||||
|
{
|
||||||
|
__vector_pair* a0 = (__vector_pair *)(&a.packet[0]);
|
||||||
|
if(NegativeAccumulate)
|
||||||
|
{
|
||||||
|
__builtin_mma_xvf64gernp(acc, *a0, (__vector unsigned char)b);
|
||||||
|
} else {
|
||||||
|
__builtin_mma_xvf64gerpp(acc, *a0, (__vector unsigned char)b);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename LhsPacket, typename RhsPacket, bool NegativeAccumulate>
|
||||||
|
EIGEN_ALWAYS_INLINE void pgerMMA(__vector_quad* acc, const __vector_pair& a, const Packet2d& b)
|
||||||
|
{
|
||||||
|
if(NegativeAccumulate)
|
||||||
|
{
|
||||||
|
__builtin_mma_xvf64gernp(acc, (__vector_pair)a, (__vector unsigned char)b);
|
||||||
|
} else {
|
||||||
|
__builtin_mma_xvf64gerpp(acc, (__vector_pair)a, (__vector unsigned char)b);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename LhsPacket, typename RhsPacket, bool NegativeAccumulate>
|
||||||
|
EIGEN_ALWAYS_INLINE void pgerMMA(__vector_quad*, const __vector_pair&, const Packet4f&)
|
||||||
|
{
|
||||||
|
// Just for compilation
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Scalar, typename Packet, typename RhsPacket, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
|
||||||
|
EIGEN_ALWAYS_INLINE void pgercMMA(__vector_quad* accReal, __vector_quad* accImag, const Packet& lhsV, const Packet& lhsVi, const RhsPacket& rhsV, const RhsPacket& rhsVi)
|
||||||
|
{
|
||||||
|
pgerMMA<Packet, RhsPacket, false>(accReal, rhsV, lhsV);
|
||||||
|
if(LhsIsReal) {
|
||||||
|
pgerMMA<Packet, RhsPacket, ConjugateRhs>(accImag, rhsVi, lhsV);
|
||||||
|
} else {
|
||||||
|
if(!RhsIsReal) {
|
||||||
|
pgerMMA<Packet, RhsPacket, ConjugateLhs == ConjugateRhs>(accReal, rhsVi, lhsVi);
|
||||||
|
pgerMMA<Packet, RhsPacket, ConjugateRhs>(accImag, rhsVi, lhsV);
|
||||||
|
} else {
|
||||||
|
EIGEN_UNUSED_VARIABLE(rhsVi);
|
||||||
|
}
|
||||||
|
pgerMMA<Packet, RhsPacket, ConjugateLhs>(accImag, rhsV, lhsVi);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// This is necessary because ploadRhs for double returns a pair of vectors when MMA is enabled.
|
||||||
|
template<typename Scalar, typename Packet>
|
||||||
|
EIGEN_ALWAYS_INLINE void ploadRhsMMA(const Scalar* rhs, Packet& rhsV)
|
||||||
|
{
|
||||||
|
rhsV = ploadRhs<Scalar, Packet>(rhs);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
EIGEN_ALWAYS_INLINE void ploadRhsMMA<double, PacketBlock<Packet2d, 2> >(const double* rhs, PacketBlock<Packet2d, 2>& rhsV)
|
||||||
|
{
|
||||||
|
rhsV.packet[0] = ploadRhs<double, Packet2d>((const double *)((Packet2d *)rhs ));
|
||||||
|
rhsV.packet[1] = ploadRhs<double, Packet2d>((const double *)(((Packet2d *)rhs) + 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
EIGEN_ALWAYS_INLINE void ploadRhsMMA<double, __vector_pair>(const double* rhs, __vector_pair& rhsV)
|
||||||
|
{
|
||||||
|
#if EIGEN_COMP_LLVM
|
||||||
|
__builtin_vsx_assemble_pair(&rhsV,
|
||||||
|
(__vector unsigned char)(ploadRhs<double, Packet2d>((const double *)(((Packet2d *)rhs) + 1))),
|
||||||
|
(__vector unsigned char)(ploadRhs<double, Packet2d>((const double *)((Packet2d *)rhs ))));
|
||||||
|
#else
|
||||||
|
__asm__ ("lxvp %x0,%1" : "=wa" (rhsV) : "Y" (*rhs));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
EIGEN_ALWAYS_INLINE void ploadRhsMMA(const float*, __vector_pair&)
|
||||||
|
{
|
||||||
|
// Just for compilation
|
||||||
|
}
|
||||||
|
|
||||||
|
// PEEL_MMA loop factor.
|
||||||
|
#define PEEL_MMA 7
|
||||||
|
|
||||||
|
#define MICRO_MMA_UNROLL(func) \
|
||||||
|
func(0) func(1) func(2) func(3) func(4) func(5) func(6) func(7)
|
||||||
|
|
||||||
|
#define MICRO_MMA_LOAD_ONE(iter) \
|
||||||
|
if (unroll_factor > iter) { \
|
||||||
|
lhsV##iter = ploadLhs<Scalar, Packet>(lhs_ptr##iter); \
|
||||||
|
lhs_ptr##iter += accCols; \
|
||||||
|
} else { \
|
||||||
|
EIGEN_UNUSED_VARIABLE(lhsV##iter); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define MICRO_MMA_WORK_ONE(iter, type, peel) \
|
||||||
|
if (unroll_factor > iter) { \
|
||||||
|
pgerMMA<Packet, type, false>(&accZero##iter, rhsV##peel, lhsV##iter); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define MICRO_MMA_TYPE_PEEL(func, func2, type, peel) \
|
||||||
|
if (PEEL_MMA > peel) { \
|
||||||
|
Packet lhsV0, lhsV1, lhsV2, lhsV3, lhsV4, lhsV5, lhsV6, lhsV7; \
|
||||||
|
ploadRhsMMA<Scalar, type>(rhs_ptr + (accRows * peel), rhsV##peel); \
|
||||||
|
MICRO_MMA_UNROLL(func2); \
|
||||||
|
func(0,type,peel) func(1,type,peel) func(2,type,peel) func(3,type,peel) \
|
||||||
|
func(4,type,peel) func(5,type,peel) func(6,type,peel) func(7,type,peel) \
|
||||||
|
} else { \
|
||||||
|
EIGEN_UNUSED_VARIABLE(rhsV##peel); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define MICRO_MMA_UNROLL_TYPE_PEEL(func, func2, type) \
|
||||||
|
type rhsV0, rhsV1, rhsV2, rhsV3, rhsV4, rhsV5, rhsV6, rhsV7; \
|
||||||
|
MICRO_MMA_TYPE_PEEL(func,func2,type,0); MICRO_MMA_TYPE_PEEL(func,func2,type,1); \
|
||||||
|
MICRO_MMA_TYPE_PEEL(func,func2,type,2); MICRO_MMA_TYPE_PEEL(func,func2,type,3); \
|
||||||
|
MICRO_MMA_TYPE_PEEL(func,func2,type,4); MICRO_MMA_TYPE_PEEL(func,func2,type,5); \
|
||||||
|
MICRO_MMA_TYPE_PEEL(func,func2,type,6); MICRO_MMA_TYPE_PEEL(func,func2,type,7);
|
||||||
|
|
||||||
|
#define MICRO_MMA_UNROLL_TYPE_ONE(func, func2, type) \
|
||||||
|
type rhsV0; \
|
||||||
|
MICRO_MMA_TYPE_PEEL(func,func2,type,0);
|
||||||
|
|
||||||
|
#define MICRO_MMA_ONE_PEEL \
|
||||||
|
if (sizeof(Scalar) == sizeof(float)) { \
|
||||||
|
MICRO_MMA_UNROLL_TYPE_PEEL(MICRO_MMA_WORK_ONE, MICRO_MMA_LOAD_ONE, RhsPacket); \
|
||||||
|
} else { \
|
||||||
|
MICRO_MMA_UNROLL_TYPE_PEEL(MICRO_MMA_WORK_ONE, MICRO_MMA_LOAD_ONE, __vector_pair); \
|
||||||
|
} \
|
||||||
|
rhs_ptr += (accRows * PEEL_MMA);
|
||||||
|
|
||||||
|
#define MICRO_MMA_ONE \
|
||||||
|
if (sizeof(Scalar) == sizeof(float)) { \
|
||||||
|
MICRO_MMA_UNROLL_TYPE_ONE(MICRO_MMA_WORK_ONE, MICRO_MMA_LOAD_ONE, RhsPacket); \
|
||||||
|
} else { \
|
||||||
|
MICRO_MMA_UNROLL_TYPE_ONE(MICRO_MMA_WORK_ONE, MICRO_MMA_LOAD_ONE, __vector_pair); \
|
||||||
|
} \
|
||||||
|
rhs_ptr += accRows;
|
||||||
|
|
||||||
|
#define MICRO_MMA_DST_PTR_ONE(iter) \
|
||||||
|
if (unroll_factor > iter) { \
|
||||||
|
bsetzeroMMA<Scalar, Packet>(&accZero##iter); \
|
||||||
|
} else { \
|
||||||
|
EIGEN_UNUSED_VARIABLE(accZero##iter); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define MICRO_MMA_DST_PTR MICRO_MMA_UNROLL(MICRO_MMA_DST_PTR_ONE)
|
||||||
|
|
||||||
|
#define MICRO_MMA_SRC_PTR_ONE(iter) \
|
||||||
|
if (unroll_factor > iter) { \
|
||||||
|
lhs_ptr##iter = lhs_base + ( (row/accCols) + iter )*strideA*accCols; \
|
||||||
|
} else { \
|
||||||
|
EIGEN_UNUSED_VARIABLE(lhs_ptr##iter); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define MICRO_MMA_SRC_PTR MICRO_MMA_UNROLL(MICRO_MMA_SRC_PTR_ONE)
|
||||||
|
|
||||||
|
#define MICRO_MMA_PREFETCH_ONE(iter) \
|
||||||
|
if (unroll_factor > iter) { \
|
||||||
|
EIGEN_POWER_PREFETCH(lhs_ptr##iter); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define MICRO_MMA_PREFETCH MICRO_MMA_UNROLL(MICRO_MMA_PREFETCH_ONE)
|
||||||
|
|
||||||
|
#define MICRO_MMA_STORE_ONE(iter) \
|
||||||
|
if (unroll_factor > iter) { \
|
||||||
|
storeAccumulator<DataMapper, Index, Packet, accCols>(row + iter*accCols, res, pAlpha, &accZero##iter); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define MICRO_MMA_STORE MICRO_MMA_UNROLL(MICRO_MMA_STORE_ONE)
|
||||||
|
|
||||||
|
template<int unroll_factor, typename Scalar, typename Packet, typename RhsPacket, typename DataMapper, typename Index, const Index accRows, const Index accCols>
|
||||||
|
EIGEN_ALWAYS_INLINE void gemm_unrolled_MMA_iteration(
|
||||||
|
const DataMapper& res,
|
||||||
|
const Scalar* lhs_base,
|
||||||
|
const Scalar* rhs_base,
|
||||||
|
Index depth,
|
||||||
|
Index strideA,
|
||||||
|
Index& row,
|
||||||
|
const Packet& pAlpha)
|
||||||
|
{
|
||||||
|
const Scalar* rhs_ptr = rhs_base;
|
||||||
|
const Scalar* lhs_ptr0 = NULL, * lhs_ptr1 = NULL, * lhs_ptr2 = NULL, * lhs_ptr3 = NULL, * lhs_ptr4 = NULL, * lhs_ptr5 = NULL, * lhs_ptr6 = NULL, * lhs_ptr7 = NULL;
|
||||||
|
__vector_quad accZero0, accZero1, accZero2, accZero3, accZero4, accZero5, accZero6, accZero7;
|
||||||
|
|
||||||
|
MICRO_MMA_SRC_PTR
|
||||||
|
MICRO_MMA_DST_PTR
|
||||||
|
|
||||||
|
Index k = 0;
|
||||||
|
for(; k + PEEL_MMA <= depth; k+= PEEL_MMA)
|
||||||
|
{
|
||||||
|
EIGEN_POWER_PREFETCH(rhs_ptr);
|
||||||
|
MICRO_MMA_PREFETCH
|
||||||
|
MICRO_MMA_ONE_PEEL
|
||||||
|
}
|
||||||
|
for(; k < depth; k++)
|
||||||
|
{
|
||||||
|
MICRO_MMA_ONE
|
||||||
|
}
|
||||||
|
MICRO_MMA_STORE
|
||||||
|
|
||||||
|
row += unroll_factor*accCols;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Scalar, typename Packet, typename RhsPacket, typename DataMapper, typename Index, const Index accRows, const Index accCols>
|
||||||
|
EIGEN_ALWAYS_INLINE void gemmMMA_cols(
|
||||||
|
const DataMapper& res,
|
||||||
|
const Scalar* blockA,
|
||||||
|
const Scalar* blockB,
|
||||||
|
Index depth,
|
||||||
|
Index strideA,
|
||||||
|
Index offsetA,
|
||||||
|
Index strideB,
|
||||||
|
Index offsetB,
|
||||||
|
Index col,
|
||||||
|
Index rows,
|
||||||
|
Index cols,
|
||||||
|
Index remaining_rows,
|
||||||
|
const Packet& pAlpha,
|
||||||
|
const Packet& pMask)
|
||||||
|
{
|
||||||
|
const DataMapper res3 = res.getSubMapper(0, col);
|
||||||
|
|
||||||
|
const Scalar* rhs_base = blockB + col*strideB + accRows*offsetB;
|
||||||
|
const Scalar* lhs_base = blockA + accCols*offsetA;
|
||||||
|
Index row = 0;
|
||||||
|
|
||||||
|
#define MAX_MMA_UNROLL 7
|
||||||
|
while(row + MAX_MMA_UNROLL*accCols <= rows) {
|
||||||
|
gemm_unrolled_MMA_iteration<MAX_MMA_UNROLL, Scalar, Packet, RhsPacket, DataMapper, Index, accRows, accCols>(res3, lhs_base, rhs_base, depth, strideA, row, pAlpha);
|
||||||
|
}
|
||||||
|
switch( (rows-row)/accCols ) {
|
||||||
|
#if MAX_MMA_UNROLL > 7
|
||||||
|
case 7:
|
||||||
|
gemm_unrolled_MMA_iteration<7, Scalar, Packet, RhsPacket, DataMapper, Index, accRows, accCols>(res3, lhs_base, rhs_base, depth, strideA, row, pAlpha);
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
|
#if MAX_MMA_UNROLL > 6
|
||||||
|
case 6:
|
||||||
|
gemm_unrolled_MMA_iteration<6, Scalar, Packet, RhsPacket, DataMapper, Index, accRows, accCols>(res3, lhs_base, rhs_base, depth, strideA, row, pAlpha);
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
|
#if MAX_MMA_UNROLL > 5
|
||||||
|
case 5:
|
||||||
|
gemm_unrolled_MMA_iteration<5, Scalar, Packet, RhsPacket, DataMapper, Index, accRows, accCols>(res3, lhs_base, rhs_base, depth, strideA, row, pAlpha);
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
|
#if MAX_MMA_UNROLL > 4
|
||||||
|
case 4:
|
||||||
|
gemm_unrolled_MMA_iteration<4, Scalar, Packet, RhsPacket, DataMapper, Index, accRows, accCols>(res3, lhs_base, rhs_base, depth, strideA, row, pAlpha);
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
|
#if MAX_MMA_UNROLL > 3
|
||||||
|
case 3:
|
||||||
|
gemm_unrolled_MMA_iteration<3, Scalar, Packet, RhsPacket, DataMapper, Index, accRows, accCols>(res3, lhs_base, rhs_base, depth, strideA, row, pAlpha);
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
|
#if MAX_MMA_UNROLL > 2
|
||||||
|
case 2:
|
||||||
|
gemm_unrolled_MMA_iteration<2, Scalar, Packet, RhsPacket, DataMapper, Index, accRows, accCols>(res3, lhs_base, rhs_base, depth, strideA, row, pAlpha);
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
|
#if MAX_MMA_UNROLL > 1
|
||||||
|
case 1:
|
||||||
|
gemm_unrolled_MMA_iteration<1, Scalar, Packet, RhsPacket, DataMapper, Index, accRows, accCols>(res3, lhs_base, rhs_base, depth, strideA, row, pAlpha);
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
#undef MAX_MMA_UNROLL
|
||||||
|
|
||||||
|
if(remaining_rows > 0)
|
||||||
|
{
|
||||||
|
gemm_extra_row<Scalar, Packet, DataMapper, Index, accRows, accCols>(res3, blockA, rhs_base, depth, strideA, offsetA, row, col, rows, cols, remaining_rows, pAlpha, pMask);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Scalar, typename Index, typename Packet, typename RhsPacket, typename DataMapper, const Index accRows, const Index accCols>
|
||||||
|
void gemmMMA(const DataMapper& res, const Scalar* blockA, const Scalar* blockB, Index rows, Index depth, Index cols, Scalar alpha, Index strideA, Index strideB, Index offsetA, Index offsetB)
|
||||||
|
{
|
||||||
|
const Index remaining_rows = rows % accCols;
|
||||||
|
|
||||||
|
if( strideA == -1 ) strideA = depth;
|
||||||
|
if( strideB == -1 ) strideB = depth;
|
||||||
|
|
||||||
|
const Packet pAlpha = pset1<Packet>(alpha);
|
||||||
|
const Packet pMask = bmask<Packet>((const int)(remaining_rows));
|
||||||
|
|
||||||
|
Index col = 0;
|
||||||
|
for(; col + accRows <= cols; col += accRows)
|
||||||
|
{
|
||||||
|
gemmMMA_cols<Scalar, Packet, RhsPacket, DataMapper, Index, accRows, accCols>(res, blockA, blockB, depth, strideA, offsetA, strideB, offsetB, col, rows, cols, remaining_rows, pAlpha, pMask);
|
||||||
|
}
|
||||||
|
|
||||||
|
gemm_extra_cols<Scalar, Packet, DataMapper, Index, accCols>(res, blockA, blockB, depth, strideA, offsetA, strideB, offsetB, col, rows, cols, remaining_rows, pAlpha, pMask);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define accColsC (accCols / 2)
|
||||||
|
#define advanceRows ((LhsIsReal) ? 1 : 2)
|
||||||
|
#define advanceCols ((RhsIsReal) ? 1 : 2)
|
||||||
|
|
||||||
|
// PEEL_COMPLEX_MMA loop factor.
|
||||||
|
#define PEEL_COMPLEX_MMA 3
|
||||||
|
|
||||||
|
#define MICRO_COMPLEX_MMA_UNROLL(func) \
|
||||||
|
func(0) func(1) func(2) func(3)
|
||||||
|
|
||||||
|
#define MICRO_COMPLEX_MMA_LOAD_ONE(iter) \
|
||||||
|
if (unroll_factor > iter) { \
|
||||||
|
lhsV##iter = ploadLhs<Scalar, Packet>(lhs_ptr_real##iter); \
|
||||||
|
if(!LhsIsReal) { \
|
||||||
|
lhsVi##iter = ploadLhs<Scalar, Packet>(lhs_ptr_real##iter + imag_delta); \
|
||||||
|
} else { \
|
||||||
|
EIGEN_UNUSED_VARIABLE(lhsVi##iter); \
|
||||||
|
} \
|
||||||
|
lhs_ptr_real##iter += accCols; \
|
||||||
|
} else { \
|
||||||
|
EIGEN_UNUSED_VARIABLE(lhsV##iter); \
|
||||||
|
EIGEN_UNUSED_VARIABLE(lhsVi##iter); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define MICRO_COMPLEX_MMA_WORK_ONE(iter, type, peel) \
|
||||||
|
if (unroll_factor > iter) { \
|
||||||
|
pgercMMA<Scalar, Packet, type, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(&accReal##iter, &accImag##iter, lhsV##iter, lhsVi##iter, rhsV##peel, rhsVi##peel); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define MICRO_COMPLEX_MMA_TYPE_PEEL(func, func2, type, peel) \
|
||||||
|
if (PEEL_COMPLEX_MMA > peel) { \
|
||||||
|
Packet lhsV0, lhsV1, lhsV2, lhsV3; \
|
||||||
|
Packet lhsVi0, lhsVi1, lhsVi2, lhsVi3; \
|
||||||
|
ploadRhsMMA<Scalar, type>(rhs_ptr_real + (accRows * peel), rhsV##peel); \
|
||||||
|
if(!RhsIsReal) { \
|
||||||
|
ploadRhsMMA<Scalar, type>(rhs_ptr_imag + (accRows * peel), rhsVi##peel); \
|
||||||
|
} else { \
|
||||||
|
EIGEN_UNUSED_VARIABLE(rhsVi##peel); \
|
||||||
|
} \
|
||||||
|
MICRO_COMPLEX_MMA_UNROLL(func2); \
|
||||||
|
func(0,type,peel) func(1,type,peel) func(2,type,peel) func(3,type,peel) \
|
||||||
|
} else { \
|
||||||
|
EIGEN_UNUSED_VARIABLE(rhsV##peel); \
|
||||||
|
EIGEN_UNUSED_VARIABLE(rhsVi##peel); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define MICRO_COMPLEX_MMA_UNROLL_TYPE_PEEL(func, func2, type) \
|
||||||
|
type rhsV0, rhsV1, rhsV2, rhsV3; \
|
||||||
|
type rhsVi0, rhsVi1, rhsVi2, rhsVi3; \
|
||||||
|
MICRO_COMPLEX_MMA_TYPE_PEEL(func,func2,type,0); MICRO_COMPLEX_MMA_TYPE_PEEL(func,func2,type,1); \
|
||||||
|
MICRO_COMPLEX_MMA_TYPE_PEEL(func,func2,type,2); MICRO_COMPLEX_MMA_TYPE_PEEL(func,func2,type,3);
|
||||||
|
|
||||||
|
#define MICRO_COMPLEX_MMA_UNROLL_TYPE_ONE(func, func2, type) \
|
||||||
|
type rhsV0, rhsVi0; \
|
||||||
|
MICRO_COMPLEX_MMA_TYPE_PEEL(func,func2,type,0);
|
||||||
|
|
||||||
|
#define MICRO_COMPLEX_MMA_ONE_PEEL \
|
||||||
|
if (sizeof(Scalar) == sizeof(float)) { \
|
||||||
|
MICRO_COMPLEX_MMA_UNROLL_TYPE_PEEL(MICRO_COMPLEX_MMA_WORK_ONE, MICRO_COMPLEX_MMA_LOAD_ONE, RhsPacket); \
|
||||||
|
} else { \
|
||||||
|
MICRO_COMPLEX_MMA_UNROLL_TYPE_PEEL(MICRO_COMPLEX_MMA_WORK_ONE, MICRO_COMPLEX_MMA_LOAD_ONE, __vector_pair); \
|
||||||
|
} \
|
||||||
|
rhs_ptr_real += (accRows * PEEL_COMPLEX_MMA); \
|
||||||
|
if(!RhsIsReal) rhs_ptr_imag += (accRows * PEEL_COMPLEX_MMA);
|
||||||
|
|
||||||
|
#define MICRO_COMPLEX_MMA_ONE \
|
||||||
|
if (sizeof(Scalar) == sizeof(float)) { \
|
||||||
|
MICRO_COMPLEX_MMA_UNROLL_TYPE_ONE(MICRO_COMPLEX_MMA_WORK_ONE, MICRO_COMPLEX_MMA_LOAD_ONE, RhsPacket); \
|
||||||
|
} else { \
|
||||||
|
MICRO_COMPLEX_MMA_UNROLL_TYPE_ONE(MICRO_COMPLEX_MMA_WORK_ONE, MICRO_COMPLEX_MMA_LOAD_ONE, __vector_pair); \
|
||||||
|
} \
|
||||||
|
rhs_ptr_real += accRows; \
|
||||||
|
if(!RhsIsReal) rhs_ptr_imag += accRows;
|
||||||
|
|
||||||
|
#define MICRO_COMPLEX_MMA_DST_PTR_ONE(iter) \
|
||||||
|
if (unroll_factor > iter) { \
|
||||||
|
bsetzeroMMA<Scalar, Packet>(&accReal##iter); \
|
||||||
|
bsetzeroMMA<Scalar, Packet>(&accImag##iter); \
|
||||||
|
} else { \
|
||||||
|
EIGEN_UNUSED_VARIABLE(accReal##iter); \
|
||||||
|
EIGEN_UNUSED_VARIABLE(accImag##iter); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define MICRO_COMPLEX_MMA_DST_PTR MICRO_COMPLEX_MMA_UNROLL(MICRO_COMPLEX_MMA_DST_PTR_ONE)
|
||||||
|
|
||||||
|
#define MICRO_COMPLEX_MMA_SRC_PTR_ONE(iter) \
|
||||||
|
if (unroll_factor > iter) { \
|
||||||
|
lhs_ptr_real##iter = lhs_base + ( ((advanceRows*row)/accCols) + iter*advanceRows )*strideA*accCols; \
|
||||||
|
} else { \
|
||||||
|
EIGEN_UNUSED_VARIABLE(lhs_ptr_real##iter); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define MICRO_COMPLEX_MMA_SRC_PTR MICRO_COMPLEX_MMA_UNROLL(MICRO_COMPLEX_MMA_SRC_PTR_ONE)
|
||||||
|
|
||||||
|
#define MICRO_COMPLEX_MMA_PREFETCH_ONE(iter) \
|
||||||
|
if (unroll_factor > iter) { \
|
||||||
|
EIGEN_POWER_PREFETCH(lhs_ptr_real##iter); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define MICRO_COMPLEX_MMA_PREFETCH MICRO_COMPLEX_MMA_UNROLL(MICRO_COMPLEX_MMA_PREFETCH_ONE)
|
||||||
|
|
||||||
|
#define MICRO_COMPLEX_MMA_STORE_ONE(iter) \
|
||||||
|
if (unroll_factor > iter) { \
|
||||||
|
storeComplexAccumulator<DataMapper, Index, Packet, Packetc, accColsC>(row + iter*accCols, res, pAlphaReal, pAlphaImag, &accReal##iter, &accImag##iter); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define MICRO_COMPLEX_MMA_STORE MICRO_COMPLEX_MMA_UNROLL(MICRO_COMPLEX_MMA_STORE_ONE)
|
||||||
|
|
||||||
|
template<int unroll_factor, typename Scalar, typename Packet, typename Packetc, typename RhsPacket, typename DataMapper, typename Index, const Index accRows, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
|
||||||
|
EIGEN_ALWAYS_INLINE void gemm_complex_unrolled_MMA_iteration(
|
||||||
|
const DataMapper& res,
|
||||||
|
const Scalar* lhs_base,
|
||||||
|
const Scalar* rhs_base,
|
||||||
|
Index depth,
|
||||||
|
Index strideA,
|
||||||
|
Index strideB,
|
||||||
|
Index& row,
|
||||||
|
const Packet& pAlphaReal,
|
||||||
|
const Packet& pAlphaImag)
|
||||||
|
{
|
||||||
|
const Scalar* rhs_ptr_real = rhs_base;
|
||||||
|
const Scalar* rhs_ptr_imag = NULL;
|
||||||
|
const Index imag_delta = accCols*strideA;
|
||||||
|
if(!RhsIsReal) {
|
||||||
|
rhs_ptr_imag = rhs_base + accRows*strideB;
|
||||||
|
} else {
|
||||||
|
EIGEN_UNUSED_VARIABLE(rhs_ptr_imag);
|
||||||
|
}
|
||||||
|
const Scalar* lhs_ptr_real0 = NULL, * lhs_ptr_real1 = NULL;
|
||||||
|
const Scalar* lhs_ptr_real2 = NULL, * lhs_ptr_real3 = NULL;
|
||||||
|
__vector_quad accReal0, accImag0, accReal1, accImag1, accReal2, accImag2, accReal3, accImag3;
|
||||||
|
|
||||||
|
MICRO_COMPLEX_MMA_SRC_PTR
|
||||||
|
MICRO_COMPLEX_MMA_DST_PTR
|
||||||
|
|
||||||
|
Index k = 0;
|
||||||
|
for(; k + PEEL_COMPLEX_MMA <= depth; k+= PEEL_COMPLEX_MMA)
|
||||||
|
{
|
||||||
|
EIGEN_POWER_PREFETCH(rhs_ptr_real);
|
||||||
|
if(!RhsIsReal) {
|
||||||
|
EIGEN_POWER_PREFETCH(rhs_ptr_imag);
|
||||||
|
}
|
||||||
|
MICRO_COMPLEX_MMA_PREFETCH
|
||||||
|
MICRO_COMPLEX_MMA_ONE_PEEL
|
||||||
|
}
|
||||||
|
for(; k < depth; k++)
|
||||||
|
{
|
||||||
|
MICRO_COMPLEX_MMA_ONE
|
||||||
|
}
|
||||||
|
MICRO_COMPLEX_MMA_STORE
|
||||||
|
|
||||||
|
row += unroll_factor*accCols;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Scalar, typename Packet, typename Packetc, typename RhsPacket, typename DataMapper, typename Index, const Index accRows, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
|
||||||
|
EIGEN_ALWAYS_INLINE void gemmMMA_complex_cols(
|
||||||
|
const DataMapper& res,
|
||||||
|
const Scalar* blockA,
|
||||||
|
const Scalar* blockB,
|
||||||
|
Index depth,
|
||||||
|
Index strideA,
|
||||||
|
Index offsetA,
|
||||||
|
Index strideB,
|
||||||
|
Index offsetB,
|
||||||
|
Index col,
|
||||||
|
Index rows,
|
||||||
|
Index cols,
|
||||||
|
Index remaining_rows,
|
||||||
|
const Packet& pAlphaReal,
|
||||||
|
const Packet& pAlphaImag,
|
||||||
|
const Packet& pMask)
|
||||||
|
{
|
||||||
|
const DataMapper res3 = res.getSubMapper(0, col);
|
||||||
|
|
||||||
|
const Scalar* rhs_base = blockB + advanceCols*col*strideB + accRows*offsetB;
|
||||||
|
const Scalar* lhs_base = blockA + accCols*offsetA;
|
||||||
|
Index row = 0;
|
||||||
|
|
||||||
|
#define MAX_COMPLEX_MMA_UNROLL 4
|
||||||
|
while(row + MAX_COMPLEX_MMA_UNROLL*accCols <= rows) {
|
||||||
|
gemm_complex_unrolled_MMA_iteration<MAX_COMPLEX_MMA_UNROLL, Scalar, Packet, Packetc, RhsPacket, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res3, lhs_base, rhs_base, depth, strideA, strideB, row, pAlphaReal, pAlphaImag);
|
||||||
|
}
|
||||||
|
switch( (rows-row)/accCols ) {
|
||||||
|
#if MAX_COMPLEX_MMA_UNROLL > 4
|
||||||
|
case 4:
|
||||||
|
gemm_complex_unrolled_MMA_iteration<4, Scalar, Packet, Packetc, RhsPacket, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res3, lhs_base, rhs_base, depth, strideA, strideB, row, pAlphaReal, pAlphaImag);
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
|
#if MAX_COMPLEX_MMA_UNROLL > 3
|
||||||
|
case 3:
|
||||||
|
gemm_complex_unrolled_MMA_iteration<3, Scalar, Packet, Packetc, RhsPacket, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res3, lhs_base, rhs_base, depth, strideA, strideB, row, pAlphaReal, pAlphaImag);
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
|
#if MAX_COMPLEX_MMA_UNROLL > 2
|
||||||
|
case 2:
|
||||||
|
gemm_complex_unrolled_MMA_iteration<2, Scalar, Packet, Packetc, RhsPacket, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res3, lhs_base, rhs_base, depth, strideA, strideB, row, pAlphaReal, pAlphaImag);
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
|
#if MAX_COMPLEX_MMA_UNROLL > 1
|
||||||
|
case 1:
|
||||||
|
gemm_complex_unrolled_MMA_iteration<1, Scalar, Packet, Packetc, RhsPacket, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res3, lhs_base, rhs_base, depth, strideA, strideB, row, pAlphaReal, pAlphaImag);
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
#undef MAX_COMPLEX_MMA_UNROLL
|
||||||
|
|
||||||
|
if(remaining_rows > 0)
|
||||||
|
{
|
||||||
|
gemm_complex_extra_row<Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res3, blockA, rhs_base, depth, strideA, offsetA, strideB, row, col, rows, cols, remaining_rows, pAlphaReal, pAlphaImag, pMask);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename LhsScalar, typename RhsScalar, typename Scalarc, typename Scalar, typename Index, typename Packet, typename Packetc, typename RhsPacket, typename DataMapper, const Index accRows, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
|
||||||
|
void gemm_complexMMA(const DataMapper& res, const LhsScalar* blockAc, const RhsScalar* blockBc, Index rows, Index depth, Index cols, Scalarc alpha, Index strideA, Index strideB, Index offsetA, Index offsetB)
|
||||||
|
{
|
||||||
|
const Index remaining_rows = rows % accCols;
|
||||||
|
|
||||||
|
if( strideA == -1 ) strideA = depth;
|
||||||
|
if( strideB == -1 ) strideB = depth;
|
||||||
|
|
||||||
|
const Packet pAlphaReal = pset1<Packet>(alpha.real());
|
||||||
|
const Packet pAlphaImag = pset1<Packet>(alpha.imag());
|
||||||
|
const Packet pMask = bmask<Packet>((const int)(remaining_rows));
|
||||||
|
|
||||||
|
const Scalar* blockA = (Scalar *) blockAc;
|
||||||
|
const Scalar* blockB = (Scalar *) blockBc;
|
||||||
|
|
||||||
|
Index col = 0;
|
||||||
|
for(; col + accRows <= cols; col += accRows)
|
||||||
|
{
|
||||||
|
gemmMMA_complex_cols<Scalar, Packet, Packetc, RhsPacket, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, blockA, blockB, depth, strideA, offsetA, strideB, offsetB, col, rows, cols, remaining_rows, pAlphaReal, pAlphaImag, pMask);
|
||||||
|
}
|
||||||
|
|
||||||
|
gemm_complex_extra_cols<Scalar, Packet, Packetc, DataMapper, Index, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, blockA, blockB, depth, strideA, offsetA, strideB, offsetB, col, rows, cols, remaining_rows, pAlphaReal, pAlphaImag, pMask);
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef accColsC
|
||||||
|
#undef advanceRows
|
||||||
|
#undef advanceCols
|
||||||
|
|
||||||
|
} // end namespace internal
|
||||||
|
|
||||||
|
} // end namespace Eigen
|
||||||
|
|
||||||
|
#if defined(EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH)
|
||||||
|
#pragma GCC pop_options
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif // EIGEN_MATRIX_PRODUCT_MMA_ALTIVEC_H
|
||||||
|
|
||||||
2400
Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.h
Normal file
2400
Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.h
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -2,6 +2,7 @@
|
|||||||
// for linear algebra.
|
// for linear algebra.
|
||||||
//
|
//
|
||||||
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
|
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
|
||||||
|
// Copyright (C) 2021 C. Antonio Sanchez <cantonios@google.com>
|
||||||
//
|
//
|
||||||
// This Source Code Form is subject to the terms of the Mozilla
|
// This Source Code Form is subject to the terms of the Mozilla
|
||||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||||
@@ -10,94 +11,259 @@
|
|||||||
#ifndef EIGEN_COMPLEX_CUDA_H
|
#ifndef EIGEN_COMPLEX_CUDA_H
|
||||||
#define EIGEN_COMPLEX_CUDA_H
|
#define EIGEN_COMPLEX_CUDA_H
|
||||||
|
|
||||||
// clang-format off
|
// Many std::complex methods such as operator+, operator-, operator* and
|
||||||
|
// operator/ are not constexpr. Due to this, GCC and older versions of clang do
|
||||||
|
// not treat them as device functions and thus Eigen functors making use of
|
||||||
|
// these operators fail to compile. Here, we manually specialize these
|
||||||
|
// operators and functors for complex types when building for CUDA to enable
|
||||||
|
// their use on-device.
|
||||||
|
//
|
||||||
|
// NOTES:
|
||||||
|
// - Compound assignment operators +=,-=,*=,/=(Scalar) will not work on device,
|
||||||
|
// since they are already specialized in the standard. Using them will result
|
||||||
|
// in silent kernel failures.
|
||||||
|
// - Compiling with MSVC and using +=,-=,*=,/=(std::complex<Scalar>) will lead
|
||||||
|
// to duplicate definition errors, since these are already specialized in
|
||||||
|
// Visual Studio's <complex> header (contrary to the standard). This is
|
||||||
|
// preferable to removing such definitions, which will lead to silent kernel
|
||||||
|
// failures.
|
||||||
|
// - Compiling with ICC requires defining _USE_COMPLEX_SPECIALIZATION_ prior
|
||||||
|
// to the first inclusion of <complex>.
|
||||||
|
|
||||||
|
#if defined(EIGEN_CUDACC) && defined(EIGEN_GPU_COMPILE_PHASE)
|
||||||
|
|
||||||
|
// ICC already specializes std::complex<float> and std::complex<double>
|
||||||
|
// operators, preventing us from making them device functions here.
|
||||||
|
// This will lead to silent runtime errors if the operators are used on device.
|
||||||
|
//
|
||||||
|
// To allow std::complex operator use on device, define _OVERRIDE_COMPLEX_SPECIALIZATION_
|
||||||
|
// prior to first inclusion of <complex>. This prevents ICC from adding
|
||||||
|
// its own specializations, so our custom ones below can be used instead.
|
||||||
|
#if !(defined(EIGEN_COMP_ICC) && defined(_USE_COMPLEX_SPECIALIZATION_))
|
||||||
|
|
||||||
|
// Import Eigen's internal operator specializations.
|
||||||
|
#define EIGEN_USING_STD_COMPLEX_OPERATORS \
|
||||||
|
using Eigen::complex_operator_detail::operator+; \
|
||||||
|
using Eigen::complex_operator_detail::operator-; \
|
||||||
|
using Eigen::complex_operator_detail::operator*; \
|
||||||
|
using Eigen::complex_operator_detail::operator/; \
|
||||||
|
using Eigen::complex_operator_detail::operator+=; \
|
||||||
|
using Eigen::complex_operator_detail::operator-=; \
|
||||||
|
using Eigen::complex_operator_detail::operator*=; \
|
||||||
|
using Eigen::complex_operator_detail::operator/=; \
|
||||||
|
using Eigen::complex_operator_detail::operator==; \
|
||||||
|
using Eigen::complex_operator_detail::operator!=;
|
||||||
|
|
||||||
namespace Eigen {
|
namespace Eigen {
|
||||||
|
|
||||||
namespace internal {
|
// Specialized std::complex overloads.
|
||||||
|
namespace complex_operator_detail {
|
||||||
|
|
||||||
#if defined(EIGEN_CUDACC) && defined(EIGEN_USE_GPU)
|
template<typename T>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
// Many std::complex methods such as operator+, operator-, operator* and
|
std::complex<T> complex_multiply(const std::complex<T>& a, const std::complex<T>& b) {
|
||||||
// operator/ are not constexpr. Due to this, clang does not treat them as device
|
|
||||||
// functions and thus Eigen functors making use of these operators fail to
|
|
||||||
// compile. Here, we manually specialize these functors for complex types when
|
|
||||||
// building for CUDA to avoid non-constexpr methods.
|
|
||||||
|
|
||||||
// Sum
|
|
||||||
template<typename T> struct scalar_sum_op<const std::complex<T>, const std::complex<T> > : binary_op_base<const std::complex<T>, const std::complex<T> > {
|
|
||||||
typedef typename std::complex<T> result_type;
|
|
||||||
|
|
||||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op)
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
|
|
||||||
return std::complex<T>(numext::real(a) + numext::real(b),
|
|
||||||
numext::imag(a) + numext::imag(b));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<typename T> struct scalar_sum_op<std::complex<T>, std::complex<T> > : scalar_sum_op<const std::complex<T>, const std::complex<T> > {};
|
|
||||||
|
|
||||||
|
|
||||||
// Difference
|
|
||||||
template<typename T> struct scalar_difference_op<const std::complex<T>, const std::complex<T> > : binary_op_base<const std::complex<T>, const std::complex<T> > {
|
|
||||||
typedef typename std::complex<T> result_type;
|
|
||||||
|
|
||||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op)
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
|
|
||||||
return std::complex<T>(numext::real(a) - numext::real(b),
|
|
||||||
numext::imag(a) - numext::imag(b));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<typename T> struct scalar_difference_op<std::complex<T>, std::complex<T> > : scalar_difference_op<const std::complex<T>, const std::complex<T> > {};
|
|
||||||
|
|
||||||
|
|
||||||
// Product
|
|
||||||
template<typename T> struct scalar_product_op<const std::complex<T>, const std::complex<T> > : binary_op_base<const std::complex<T>, const std::complex<T> > {
|
|
||||||
enum {
|
|
||||||
Vectorizable = packet_traits<std::complex<T> >::HasMul
|
|
||||||
};
|
|
||||||
typedef typename std::complex<T> result_type;
|
|
||||||
|
|
||||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op)
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
|
|
||||||
const T a_real = numext::real(a);
|
const T a_real = numext::real(a);
|
||||||
const T a_imag = numext::imag(a);
|
const T a_imag = numext::imag(a);
|
||||||
const T b_real = numext::real(b);
|
const T b_real = numext::real(b);
|
||||||
const T b_imag = numext::imag(b);
|
const T b_imag = numext::imag(b);
|
||||||
return std::complex<T>(a_real * b_real - a_imag * b_imag,
|
return std::complex<T>(
|
||||||
a_real * b_imag + a_imag * b_real);
|
a_real * b_real - a_imag * b_imag,
|
||||||
|
a_imag * b_real + a_real * b_imag);
|
||||||
}
|
}
|
||||||
};
|
|
||||||
|
|
||||||
template<typename T> struct scalar_product_op<std::complex<T>, std::complex<T> > : scalar_product_op<const std::complex<T>, const std::complex<T> > {};
|
template<typename T>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
std::complex<T> complex_divide_fast(const std::complex<T>& a, const std::complex<T>& b) {
|
||||||
// Quotient
|
|
||||||
template<typename T> struct scalar_quotient_op<const std::complex<T>, const std::complex<T> > : binary_op_base<const std::complex<T>, const std::complex<T> > {
|
|
||||||
enum {
|
|
||||||
Vectorizable = packet_traits<std::complex<T> >::HasDiv
|
|
||||||
};
|
|
||||||
typedef typename std::complex<T> result_type;
|
|
||||||
|
|
||||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op)
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
|
|
||||||
const T a_real = numext::real(a);
|
const T a_real = numext::real(a);
|
||||||
const T a_imag = numext::imag(a);
|
const T a_imag = numext::imag(a);
|
||||||
const T b_real = numext::real(b);
|
const T b_real = numext::real(b);
|
||||||
const T b_imag = numext::imag(b);
|
const T b_imag = numext::imag(b);
|
||||||
const T norm = T(1) / (b_real * b_real + b_imag * b_imag);
|
const T norm = (b_real * b_real + b_imag * b_imag);
|
||||||
return std::complex<T>((a_real * b_real + a_imag * b_imag) * norm,
|
return std::complex<T>((a_real * b_real + a_imag * b_imag) / norm,
|
||||||
(a_imag * b_real - a_real * b_imag) * norm);
|
(a_imag * b_real - a_real * b_imag) / norm);
|
||||||
}
|
}
|
||||||
};
|
|
||||||
|
|
||||||
template<typename T> struct scalar_quotient_op<std::complex<T>, std::complex<T> > : scalar_quotient_op<const std::complex<T>, const std::complex<T> > {};
|
template<typename T>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
std::complex<T> complex_divide_stable(const std::complex<T>& a, const std::complex<T>& b) {
|
||||||
|
const T a_real = numext::real(a);
|
||||||
|
const T a_imag = numext::imag(a);
|
||||||
|
const T b_real = numext::real(b);
|
||||||
|
const T b_imag = numext::imag(b);
|
||||||
|
// Smith's complex division (https://arxiv.org/pdf/1210.4539.pdf),
|
||||||
|
// guards against over/under-flow.
|
||||||
|
const bool scale_imag = numext::abs(b_imag) <= numext::abs(b_real);
|
||||||
|
const T rscale = scale_imag ? T(1) : b_real / b_imag;
|
||||||
|
const T iscale = scale_imag ? b_imag / b_real : T(1);
|
||||||
|
const T denominator = b_real * rscale + b_imag * iscale;
|
||||||
|
return std::complex<T>((a_real * rscale + a_imag * iscale) / denominator,
|
||||||
|
(a_imag * rscale - a_real * iscale) / denominator);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
std::complex<T> complex_divide(const std::complex<T>& a, const std::complex<T>& b) {
|
||||||
|
#if EIGEN_FAST_MATH
|
||||||
|
return complex_divide_fast(a, b);
|
||||||
|
#else
|
||||||
|
return complex_divide_stable(a, b);
|
||||||
#endif
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
} // end namespace internal
|
// NOTE: We cannot specialize compound assignment operators with Scalar T,
|
||||||
|
// (i.e. operator@=(const T&), for @=+,-,*,/)
|
||||||
|
// since they are already specialized for float/double/long double within
|
||||||
|
// the standard <complex> header. We also do not specialize the stream
|
||||||
|
// operators.
|
||||||
|
#define EIGEN_CREATE_STD_COMPLEX_OPERATOR_SPECIALIZATIONS(T) \
|
||||||
|
\
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
||||||
|
std::complex<T> operator+(const std::complex<T>& a) { return a; } \
|
||||||
|
\
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
||||||
|
std::complex<T> operator-(const std::complex<T>& a) { \
|
||||||
|
return std::complex<T>(-numext::real(a), -numext::imag(a)); \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
||||||
|
std::complex<T> operator+(const std::complex<T>& a, const std::complex<T>& b) { \
|
||||||
|
return std::complex<T>(numext::real(a) + numext::real(b), numext::imag(a) + numext::imag(b)); \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
||||||
|
std::complex<T> operator+(const std::complex<T>& a, const T& b) { \
|
||||||
|
return std::complex<T>(numext::real(a) + b, numext::imag(a)); \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
||||||
|
std::complex<T> operator+(const T& a, const std::complex<T>& b) { \
|
||||||
|
return std::complex<T>(a + numext::real(b), numext::imag(b)); \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
||||||
|
std::complex<T> operator-(const std::complex<T>& a, const std::complex<T>& b) { \
|
||||||
|
return std::complex<T>(numext::real(a) - numext::real(b), numext::imag(a) - numext::imag(b)); \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
||||||
|
std::complex<T> operator-(const std::complex<T>& a, const T& b) { \
|
||||||
|
return std::complex<T>(numext::real(a) - b, numext::imag(a)); \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
||||||
|
std::complex<T> operator-(const T& a, const std::complex<T>& b) { \
|
||||||
|
return std::complex<T>(a - numext::real(b), -numext::imag(b)); \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
||||||
|
std::complex<T> operator*(const std::complex<T>& a, const std::complex<T>& b) { \
|
||||||
|
return complex_multiply(a, b); \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
||||||
|
std::complex<T> operator*(const std::complex<T>& a, const T& b) { \
|
||||||
|
return std::complex<T>(numext::real(a) * b, numext::imag(a) * b); \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
||||||
|
std::complex<T> operator*(const T& a, const std::complex<T>& b) { \
|
||||||
|
return std::complex<T>(a * numext::real(b), a * numext::imag(b)); \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
||||||
|
std::complex<T> operator/(const std::complex<T>& a, const std::complex<T>& b) { \
|
||||||
|
return complex_divide(a, b); \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
||||||
|
std::complex<T> operator/(const std::complex<T>& a, const T& b) { \
|
||||||
|
return std::complex<T>(numext::real(a) / b, numext::imag(a) / b); \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
||||||
|
std::complex<T> operator/(const T& a, const std::complex<T>& b) { \
|
||||||
|
return complex_divide(std::complex<T>(a, 0), b); \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
||||||
|
std::complex<T>& operator+=(std::complex<T>& a, const std::complex<T>& b) { \
|
||||||
|
numext::real_ref(a) += numext::real(b); \
|
||||||
|
numext::imag_ref(a) += numext::imag(b); \
|
||||||
|
return a; \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
||||||
|
std::complex<T>& operator-=(std::complex<T>& a, const std::complex<T>& b) { \
|
||||||
|
numext::real_ref(a) -= numext::real(b); \
|
||||||
|
numext::imag_ref(a) -= numext::imag(b); \
|
||||||
|
return a; \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
||||||
|
std::complex<T>& operator*=(std::complex<T>& a, const std::complex<T>& b) { \
|
||||||
|
a = complex_multiply(a, b); \
|
||||||
|
return a; \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
||||||
|
std::complex<T>& operator/=(std::complex<T>& a, const std::complex<T>& b) { \
|
||||||
|
a = complex_divide(a, b); \
|
||||||
|
return a; \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
||||||
|
bool operator==(const std::complex<T>& a, const std::complex<T>& b) { \
|
||||||
|
return numext::real(a) == numext::real(b) && numext::imag(a) == numext::imag(b); \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
||||||
|
bool operator==(const std::complex<T>& a, const T& b) { \
|
||||||
|
return numext::real(a) == b && numext::imag(a) == 0; \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
||||||
|
bool operator==(const T& a, const std::complex<T>& b) { \
|
||||||
|
return a == numext::real(b) && 0 == numext::imag(b); \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
||||||
|
bool operator!=(const std::complex<T>& a, const std::complex<T>& b) { \
|
||||||
|
return !(a == b); \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
||||||
|
bool operator!=(const std::complex<T>& a, const T& b) { \
|
||||||
|
return !(a == b); \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
||||||
|
bool operator!=(const T& a, const std::complex<T>& b) { \
|
||||||
|
return !(a == b); \
|
||||||
|
}
|
||||||
|
|
||||||
} // end namespace Eigen
|
// Do not specialize for long double, since that reduces to double on device.
|
||||||
|
EIGEN_CREATE_STD_COMPLEX_OPERATOR_SPECIALIZATIONS(float)
|
||||||
|
EIGEN_CREATE_STD_COMPLEX_OPERATOR_SPECIALIZATIONS(double)
|
||||||
|
|
||||||
|
#undef EIGEN_CREATE_STD_COMPLEX_OPERATOR_SPECIALIZATIONS
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace complex_operator_detail
|
||||||
|
|
||||||
|
EIGEN_USING_STD_COMPLEX_OPERATORS
|
||||||
|
|
||||||
|
namespace numext {
|
||||||
|
EIGEN_USING_STD_COMPLEX_OPERATORS
|
||||||
|
} // namespace numext
|
||||||
|
|
||||||
|
namespace internal {
|
||||||
|
EIGEN_USING_STD_COMPLEX_OPERATORS
|
||||||
|
|
||||||
|
} // namespace internal
|
||||||
|
} // namespace Eigen
|
||||||
|
|
||||||
|
#endif // !(EIGEN_COMP_ICC && _USE_COMPLEX_SPECIALIZATION_)
|
||||||
|
|
||||||
|
#endif // EIGEN_CUDACC && EIGEN_GPU_COMPILE_PHASE
|
||||||
|
|
||||||
#endif // EIGEN_COMPLEX_CUDA_H
|
#endif // EIGEN_COMPLEX_CUDA_H
|
||||||
|
|||||||
688
Eigen/src/Core/arch/Default/BFloat16.h
Normal file
688
Eigen/src/Core/arch/Default/BFloat16.h
Normal file
@@ -0,0 +1,688 @@
|
|||||||
|
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==============================================================================*/
|
||||||
|
|
||||||
|
#ifndef EIGEN_BFLOAT16_H
|
||||||
|
#define EIGEN_BFLOAT16_H
|
||||||
|
|
||||||
|
#define BF16_PACKET_FUNCTION(PACKET_F, PACKET_BF16, METHOD) \
|
||||||
|
template <> \
|
||||||
|
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED \
|
||||||
|
PACKET_BF16 METHOD<PACKET_BF16>(const PACKET_BF16& _x) { \
|
||||||
|
return F32ToBf16(METHOD<PACKET_F>(Bf16ToF32(_x))); \
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace Eigen {
|
||||||
|
|
||||||
|
struct bfloat16;
|
||||||
|
|
||||||
|
namespace bfloat16_impl {
|
||||||
|
|
||||||
|
// Make our own __bfloat16_raw definition.
|
||||||
|
struct __bfloat16_raw {
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw() : value(0) {}
|
||||||
|
explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw(unsigned short raw) : value(raw) {}
|
||||||
|
unsigned short value;
|
||||||
|
};
|
||||||
|
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw raw_uint16_to_bfloat16(unsigned short value);
|
||||||
|
template <bool AssumeArgumentIsNormalOrInfinityOrZero>
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne(float ff);
|
||||||
|
// Forward declarations of template specializations, to avoid Visual C++ 2019 errors, saying:
|
||||||
|
// > error C2908: explicit specialization; 'float_to_bfloat16_rtne' has already been instantiated
|
||||||
|
template <>
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne<false>(float ff);
|
||||||
|
template <>
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne<true>(float ff);
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float bfloat16_to_float(__bfloat16_raw h);
|
||||||
|
|
||||||
|
struct bfloat16_base : public __bfloat16_raw {
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16_base() {}
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16_base(const __bfloat16_raw& h) : __bfloat16_raw(h) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace bfloat16_impl
|
||||||
|
|
||||||
|
// Class definition.
|
||||||
|
struct bfloat16 : public bfloat16_impl::bfloat16_base {
|
||||||
|
|
||||||
|
typedef bfloat16_impl::__bfloat16_raw __bfloat16_raw;
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16() {}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16(const __bfloat16_raw& h) : bfloat16_impl::bfloat16_base(h) {}
|
||||||
|
|
||||||
|
explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16(bool b)
|
||||||
|
: bfloat16_impl::bfloat16_base(bfloat16_impl::raw_uint16_to_bfloat16(b ? 0x3f80 : 0)) {}
|
||||||
|
|
||||||
|
template<class T>
|
||||||
|
explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16(T val)
|
||||||
|
: bfloat16_impl::bfloat16_base(bfloat16_impl::float_to_bfloat16_rtne<internal::is_integral<T>::value>(static_cast<float>(val))) {}
|
||||||
|
|
||||||
|
explicit EIGEN_DEVICE_FUNC bfloat16(float f)
|
||||||
|
: bfloat16_impl::bfloat16_base(bfloat16_impl::float_to_bfloat16_rtne<false>(f)) {}
|
||||||
|
|
||||||
|
// Following the convention of numpy, converting between complex and
|
||||||
|
// float will lead to loss of imag value.
|
||||||
|
template<typename RealScalar>
|
||||||
|
explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16(const std::complex<RealScalar>& val)
|
||||||
|
: bfloat16_impl::bfloat16_base(bfloat16_impl::float_to_bfloat16_rtne<false>(static_cast<float>(val.real()))) {}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC operator float() const { // NOLINT: Allow implicit conversion to float, because it is lossless.
|
||||||
|
return bfloat16_impl::bfloat16_to_float(*this);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} // namespace Eigen
|
||||||
|
|
||||||
|
namespace std {
|
||||||
|
template<>
|
||||||
|
struct numeric_limits<Eigen::bfloat16> {
|
||||||
|
static const bool is_specialized = true;
|
||||||
|
static const bool is_signed = true;
|
||||||
|
static const bool is_integer = false;
|
||||||
|
static const bool is_exact = false;
|
||||||
|
static const bool has_infinity = true;
|
||||||
|
static const bool has_quiet_NaN = true;
|
||||||
|
static const bool has_signaling_NaN = true;
|
||||||
|
static const float_denorm_style has_denorm = std::denorm_absent;
|
||||||
|
static const bool has_denorm_loss = false;
|
||||||
|
static const std::float_round_style round_style = numeric_limits<float>::round_style;
|
||||||
|
static const bool is_iec559 = false;
|
||||||
|
static const bool is_bounded = true;
|
||||||
|
static const bool is_modulo = false;
|
||||||
|
static const int digits = 8;
|
||||||
|
static const int digits10 = 2;
|
||||||
|
static const int max_digits10 = 4;
|
||||||
|
static const int radix = 2;
|
||||||
|
static const int min_exponent = numeric_limits<float>::min_exponent;
|
||||||
|
static const int min_exponent10 = numeric_limits<float>::min_exponent10;
|
||||||
|
static const int max_exponent = numeric_limits<float>::max_exponent;
|
||||||
|
static const int max_exponent10 = numeric_limits<float>::max_exponent10;
|
||||||
|
static const bool traps = numeric_limits<float>::traps;
|
||||||
|
static const bool tinyness_before = numeric_limits<float>::tinyness_before;
|
||||||
|
|
||||||
|
static Eigen::bfloat16 (min)() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x0080); }
|
||||||
|
static Eigen::bfloat16 lowest() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0xff7f); }
|
||||||
|
static Eigen::bfloat16 (max)() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7f7f); }
|
||||||
|
static Eigen::bfloat16 epsilon() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x3c00); }
|
||||||
|
static Eigen::bfloat16 round_error() { return Eigen::bfloat16(0x3f00); }
|
||||||
|
static Eigen::bfloat16 infinity() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7f80); }
|
||||||
|
static Eigen::bfloat16 quiet_NaN() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7fc0); }
|
||||||
|
static Eigen::bfloat16 signaling_NaN() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7f81); }
|
||||||
|
static Eigen::bfloat16 denorm_min() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x0001); }
|
||||||
|
};
|
||||||
|
|
||||||
|
// If std::numeric_limits<T> is specialized, should also specialize
|
||||||
|
// std::numeric_limits<const T>, std::numeric_limits<volatile T>, and
|
||||||
|
// std::numeric_limits<const volatile T>
|
||||||
|
// https://stackoverflow.com/a/16519653/
|
||||||
|
template<>
|
||||||
|
struct numeric_limits<const Eigen::bfloat16> : numeric_limits<Eigen::bfloat16> {};
|
||||||
|
template<>
|
||||||
|
struct numeric_limits<volatile Eigen::bfloat16> : numeric_limits<Eigen::bfloat16> {};
|
||||||
|
template<>
|
||||||
|
struct numeric_limits<const volatile Eigen::bfloat16> : numeric_limits<Eigen::bfloat16> {};
|
||||||
|
} // namespace std
|
||||||
|
|
||||||
|
namespace Eigen {
|
||||||
|
|
||||||
|
namespace bfloat16_impl {
|
||||||
|
|
||||||
|
// We need to distinguish ‘clang as the CUDA compiler’ from ‘clang as the host compiler,
|
||||||
|
// invoked by NVCC’ (e.g. on MacOS). The former needs to see both host and device implementation
|
||||||
|
// of the functions, while the latter can only deal with one of them.
|
||||||
|
#if !defined(EIGEN_HAS_NATIVE_BF16) || (EIGEN_COMP_CLANG && !EIGEN_COMP_NVCC) // Emulate support for bfloat16 floats
|
||||||
|
|
||||||
|
#if EIGEN_COMP_CLANG && defined(EIGEN_CUDACC)
|
||||||
|
// We need to provide emulated *host-side* BF16 operators for clang.
|
||||||
|
#pragma push_macro("EIGEN_DEVICE_FUNC")
|
||||||
|
#undef EIGEN_DEVICE_FUNC
|
||||||
|
#if defined(EIGEN_HAS_CUDA_BF16) && defined(EIGEN_HAS_NATIVE_BF16)
|
||||||
|
#define EIGEN_DEVICE_FUNC __host__
|
||||||
|
#else // both host and device need emulated ops.
|
||||||
|
#define EIGEN_DEVICE_FUNC __host__ __device__
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Definitions for CPUs, mostly working through conversion
|
||||||
|
// to/from fp32.
|
||||||
|
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator + (const bfloat16& a, const bfloat16& b) {
|
||||||
|
return bfloat16(float(a) + float(b));
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator + (const bfloat16& a, const int& b) {
|
||||||
|
return bfloat16(float(a) + static_cast<float>(b));
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator + (const int& a, const bfloat16& b) {
|
||||||
|
return bfloat16(static_cast<float>(a) + float(b));
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator * (const bfloat16& a, const bfloat16& b) {
|
||||||
|
return bfloat16(float(a) * float(b));
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator - (const bfloat16& a, const bfloat16& b) {
|
||||||
|
return bfloat16(float(a) - float(b));
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator / (const bfloat16& a, const bfloat16& b) {
|
||||||
|
return bfloat16(float(a) / float(b));
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator - (const bfloat16& a) {
|
||||||
|
bfloat16 result;
|
||||||
|
result.value = a.value ^ 0x8000;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16& operator += (bfloat16& a, const bfloat16& b) {
|
||||||
|
a = bfloat16(float(a) + float(b));
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16& operator *= (bfloat16& a, const bfloat16& b) {
|
||||||
|
a = bfloat16(float(a) * float(b));
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16& operator -= (bfloat16& a, const bfloat16& b) {
|
||||||
|
a = bfloat16(float(a) - float(b));
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16& operator /= (bfloat16& a, const bfloat16& b) {
|
||||||
|
a = bfloat16(float(a) / float(b));
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator++(bfloat16& a) {
|
||||||
|
a += bfloat16(1);
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator--(bfloat16& a) {
|
||||||
|
a -= bfloat16(1);
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator++(bfloat16& a, int) {
|
||||||
|
bfloat16 original_value = a;
|
||||||
|
++a;
|
||||||
|
return original_value;
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator--(bfloat16& a, int) {
|
||||||
|
bfloat16 original_value = a;
|
||||||
|
--a;
|
||||||
|
return original_value;
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator == (const bfloat16& a, const bfloat16& b) {
|
||||||
|
return numext::equal_strict(float(a),float(b));
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator != (const bfloat16& a, const bfloat16& b) {
|
||||||
|
return numext::not_equal_strict(float(a), float(b));
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator < (const bfloat16& a, const bfloat16& b) {
|
||||||
|
return float(a) < float(b);
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator <= (const bfloat16& a, const bfloat16& b) {
|
||||||
|
return float(a) <= float(b);
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator > (const bfloat16& a, const bfloat16& b) {
|
||||||
|
return float(a) > float(b);
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator >= (const bfloat16& a, const bfloat16& b) {
|
||||||
|
return float(a) >= float(b);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if EIGEN_COMP_CLANG && defined(EIGEN_CUDACC)
|
||||||
|
#pragma pop_macro("EIGEN_DEVICE_FUNC")
|
||||||
|
#endif
|
||||||
|
#endif // Emulate support for bfloat16 floats
|
||||||
|
|
||||||
|
// Division by an index. Do it in full float precision to avoid accuracy
|
||||||
|
// issues in converting the denominator to bfloat16.
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator / (const bfloat16& a, Index b) {
|
||||||
|
return bfloat16(static_cast<float>(a) / static_cast<float>(b));
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw truncate_to_bfloat16(const float v) {
|
||||||
|
__bfloat16_raw output;
|
||||||
|
if (Eigen::numext::isnan EIGEN_NOT_A_MACRO(v)) {
|
||||||
|
output.value = std::signbit(v) ? 0xFFC0: 0x7FC0;
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
output.value = static_cast<numext::uint16_t>(numext::bit_cast<numext::uint32_t>(v) >> 16);
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw raw_uint16_to_bfloat16(numext::uint16_t value) {
|
||||||
|
return __bfloat16_raw(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR numext::uint16_t raw_bfloat16_as_uint16(const __bfloat16_raw& bf) {
|
||||||
|
return bf.value;
|
||||||
|
}
|
||||||
|
|
||||||
|
// float_to_bfloat16_rtne template specialization that does not make any
|
||||||
|
// assumption about the value of its function argument (ff).
|
||||||
|
template <>
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne<false>(float ff) {
|
||||||
|
#if (defined(EIGEN_HAS_CUDA_BF16) && defined(EIGEN_HAS_HIP_BF16))
|
||||||
|
// Nothing to do here
|
||||||
|
#else
|
||||||
|
__bfloat16_raw output;
|
||||||
|
|
||||||
|
if (Eigen::numext::isnan EIGEN_NOT_A_MACRO(ff)) {
|
||||||
|
// If the value is a NaN, squash it to a qNaN with msb of fraction set,
|
||||||
|
// this makes sure after truncation we don't end up with an inf.
|
||||||
|
//
|
||||||
|
// qNaN magic: All exponent bits set + most significant bit of fraction
|
||||||
|
// set.
|
||||||
|
output.value = std::signbit(ff) ? 0xFFC0: 0x7FC0;
|
||||||
|
} else {
|
||||||
|
// Fast rounding algorithm that rounds a half value to nearest even. This
|
||||||
|
// reduces expected error when we convert a large number of floats. Here
|
||||||
|
// is how it works:
|
||||||
|
//
|
||||||
|
// Definitions:
|
||||||
|
// To convert a float 32 to bfloat16, a float 32 can be viewed as 32 bits
|
||||||
|
// with the following tags:
|
||||||
|
//
|
||||||
|
// Sign | Exp (8 bits) | Frac (23 bits)
|
||||||
|
// S EEEEEEEE FFFFFFLRTTTTTTTTTTTTTTT
|
||||||
|
//
|
||||||
|
// S: Sign bit.
|
||||||
|
// E: Exponent bits.
|
||||||
|
// F: First 6 bits of fraction.
|
||||||
|
// L: Least significant bit of resulting bfloat16 if we truncate away the
|
||||||
|
// rest of the float32. This is also the 7th bit of fraction
|
||||||
|
// R: Rounding bit, 8th bit of fraction.
|
||||||
|
// T: Sticky bits, rest of fraction, 15 bits.
|
||||||
|
//
|
||||||
|
// To round half to nearest even, there are 3 cases where we want to round
|
||||||
|
// down (simply truncate the result of the bits away, which consists of
|
||||||
|
// rounding bit and sticky bits) and two cases where we want to round up
|
||||||
|
// (truncate then add one to the result).
|
||||||
|
//
|
||||||
|
// The fast converting algorithm simply adds lsb (L) to 0x7fff (15 bits of
|
||||||
|
// 1s) as the rounding bias, adds the rounding bias to the input, then
|
||||||
|
// truncates the last 16 bits away.
|
||||||
|
//
|
||||||
|
// To understand how it works, we can analyze this algorithm case by case:
|
||||||
|
//
|
||||||
|
// 1. L = 0, R = 0:
|
||||||
|
// Expect: round down, this is less than half value.
|
||||||
|
//
|
||||||
|
// Algorithm:
|
||||||
|
// - Rounding bias: 0x7fff + 0 = 0x7fff
|
||||||
|
// - Adding rounding bias to input may create any carry, depending on
|
||||||
|
// whether there is any value set to 1 in T bits.
|
||||||
|
// - R may be set to 1 if there is a carry.
|
||||||
|
// - L remains 0.
|
||||||
|
// - Note that this case also handles Inf and -Inf, where all fraction
|
||||||
|
// bits, including L, R and Ts are all 0. The output remains Inf after
|
||||||
|
// this algorithm.
|
||||||
|
//
|
||||||
|
// 2. L = 1, R = 0:
|
||||||
|
// Expect: round down, this is less than half value.
|
||||||
|
//
|
||||||
|
// Algorithm:
|
||||||
|
// - Rounding bias: 0x7fff + 1 = 0x8000
|
||||||
|
// - Adding rounding bias to input doesn't change sticky bits but
|
||||||
|
// adds 1 to rounding bit.
|
||||||
|
// - L remains 1.
|
||||||
|
//
|
||||||
|
// 3. L = 0, R = 1, all of T are 0:
|
||||||
|
// Expect: round down, this is exactly at half, the result is already
|
||||||
|
// even (L=0).
|
||||||
|
//
|
||||||
|
// Algorithm:
|
||||||
|
// - Rounding bias: 0x7fff + 0 = 0x7fff
|
||||||
|
// - Adding rounding bias to input sets all sticky bits to 1, but
|
||||||
|
// doesn't create a carry.
|
||||||
|
// - R remains 1.
|
||||||
|
// - L remains 0.
|
||||||
|
//
|
||||||
|
// 4. L = 1, R = 1:
|
||||||
|
// Expect: round up, this is exactly at half, the result needs to be
|
||||||
|
// round to the next even number.
|
||||||
|
//
|
||||||
|
// Algorithm:
|
||||||
|
// - Rounding bias: 0x7fff + 1 = 0x8000
|
||||||
|
// - Adding rounding bias to input doesn't change sticky bits, but
|
||||||
|
// creates a carry from rounding bit.
|
||||||
|
// - The carry sets L to 0, creates another carry bit and propagate
|
||||||
|
// forward to F bits.
|
||||||
|
// - If all the F bits are 1, a carry then propagates to the exponent
|
||||||
|
// bits, which then creates the minimum value with the next exponent
|
||||||
|
// value. Note that we won't have the case where exponents are all 1,
|
||||||
|
// since that's either a NaN (handled in the other if condition) or inf
|
||||||
|
// (handled in case 1).
|
||||||
|
//
|
||||||
|
// 5. L = 0, R = 1, any of T is 1:
|
||||||
|
// Expect: round up, this is greater than half.
|
||||||
|
//
|
||||||
|
// Algorithm:
|
||||||
|
// - Rounding bias: 0x7fff + 0 = 0x7fff
|
||||||
|
// - Adding rounding bias to input creates a carry from sticky bits,
|
||||||
|
// sets rounding bit to 0, then create another carry.
|
||||||
|
// - The second carry sets L to 1.
|
||||||
|
//
|
||||||
|
// Examples:
|
||||||
|
//
|
||||||
|
// Exact half value that is already even:
|
||||||
|
// Input:
|
||||||
|
// Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit)
|
||||||
|
// S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT
|
||||||
|
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1000000000000000
|
||||||
|
//
|
||||||
|
// This falls into case 3. We truncate the rest of 16 bits and no
|
||||||
|
// carry is created into F and L:
|
||||||
|
//
|
||||||
|
// Output:
|
||||||
|
// Sign | Exp (8 bit) | Frac (first 7 bit)
|
||||||
|
// S E E E E E E E E F F F F F F L
|
||||||
|
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
|
||||||
|
//
|
||||||
|
// Exact half value, round to next even number:
|
||||||
|
// Input:
|
||||||
|
// Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit)
|
||||||
|
// S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT
|
||||||
|
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1000000000000000
|
||||||
|
//
|
||||||
|
// This falls into case 4. We create a carry from R and T,
|
||||||
|
// which then propagates into L and F:
|
||||||
|
//
|
||||||
|
// Output:
|
||||||
|
// Sign | Exp (8 bit) | Frac (first 7 bit)
|
||||||
|
// S E E E E E E E E F F F F F F L
|
||||||
|
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// Max denormal value round to min normal value:
|
||||||
|
// Input:
|
||||||
|
// Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit)
|
||||||
|
// S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT
|
||||||
|
// 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1111111111111111
|
||||||
|
//
|
||||||
|
// This falls into case 4. We create a carry from R and T,
|
||||||
|
// propagate into L and F, which then propagates into exponent
|
||||||
|
// bits:
|
||||||
|
//
|
||||||
|
// Output:
|
||||||
|
// Sign | Exp (8 bit) | Frac (first 7 bit)
|
||||||
|
// S E E E E E E E E F F F F F F L
|
||||||
|
// 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
|
||||||
|
//
|
||||||
|
// Max normal value round to Inf:
|
||||||
|
// Input:
|
||||||
|
// Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit)
|
||||||
|
// S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT
|
||||||
|
// 0 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1111111111111111
|
||||||
|
//
|
||||||
|
// This falls into case 4. We create a carry from R and T,
|
||||||
|
// propagate into L and F, which then propagates into exponent
|
||||||
|
// bits:
|
||||||
|
//
|
||||||
|
// Sign | Exp (8 bit) | Frac (first 7 bit)
|
||||||
|
// S E E E E E E E E F F F F F F L
|
||||||
|
// 0 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0
|
||||||
|
|
||||||
|
// At this point, ff must be either a normal float, or +/-infinity.
|
||||||
|
output = float_to_bfloat16_rtne<true>(ff);
|
||||||
|
}
|
||||||
|
return output;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
// float_to_bfloat16_rtne template specialization that assumes that its function
|
||||||
|
// argument (ff) is either a normal floating point number, or +/-infinity, or
|
||||||
|
// zero. Used to improve the runtime performance of conversion from an integer
|
||||||
|
// type to bfloat16.
|
||||||
|
template <>
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne<true>(float ff) {
|
||||||
|
#if (defined(EIGEN_HAS_CUDA_BF16) && defined(EIGEN_HAS_HIP_BF16))
|
||||||
|
// Nothing to do here
|
||||||
|
#else
|
||||||
|
numext::uint32_t input = numext::bit_cast<numext::uint32_t>(ff);
|
||||||
|
__bfloat16_raw output;
|
||||||
|
|
||||||
|
// Least significant bit of resulting bfloat.
|
||||||
|
numext::uint32_t lsb = (input >> 16) & 1;
|
||||||
|
numext::uint32_t rounding_bias = 0x7fff + lsb;
|
||||||
|
input += rounding_bias;
|
||||||
|
output.value = static_cast<numext::uint16_t>(input >> 16);
|
||||||
|
return output;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float bfloat16_to_float(__bfloat16_raw h) {
|
||||||
|
return numext::bit_cast<float>(static_cast<numext::uint32_t>(h.value) << 16);
|
||||||
|
}
|
||||||
|
// --- standard functions ---
|
||||||
|
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isinf)(const bfloat16& a) {
|
||||||
|
EIGEN_USING_STD(isinf);
|
||||||
|
return (isinf)(float(a));
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isnan)(const bfloat16& a) {
|
||||||
|
EIGEN_USING_STD(isnan);
|
||||||
|
return (isnan)(float(a));
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isfinite)(const bfloat16& a) {
|
||||||
|
return !(isinf EIGEN_NOT_A_MACRO (a)) && !(isnan EIGEN_NOT_A_MACRO (a));
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 abs(const bfloat16& a) {
|
||||||
|
bfloat16 result;
|
||||||
|
result.value = a.value & 0x7FFF;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 exp(const bfloat16& a) {
|
||||||
|
return bfloat16(::expf(float(a)));
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 expm1(const bfloat16& a) {
|
||||||
|
return bfloat16(numext::expm1(float(a)));
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log(const bfloat16& a) {
|
||||||
|
return bfloat16(::logf(float(a)));
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log1p(const bfloat16& a) {
|
||||||
|
return bfloat16(numext::log1p(float(a)));
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log10(const bfloat16& a) {
|
||||||
|
return bfloat16(::log10f(float(a)));
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log2(const bfloat16& a) {
|
||||||
|
return bfloat16(static_cast<float>(EIGEN_LOG2E) * ::logf(float(a)));
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 sqrt(const bfloat16& a) {
|
||||||
|
return bfloat16(::sqrtf(float(a)));
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 pow(const bfloat16& a, const bfloat16& b) {
|
||||||
|
return bfloat16(::powf(float(a), float(b)));
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 sin(const bfloat16& a) {
|
||||||
|
return bfloat16(::sinf(float(a)));
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 cos(const bfloat16& a) {
|
||||||
|
return bfloat16(::cosf(float(a)));
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 tan(const bfloat16& a) {
|
||||||
|
return bfloat16(::tanf(float(a)));
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 asin(const bfloat16& a) {
|
||||||
|
return bfloat16(::asinf(float(a)));
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 acos(const bfloat16& a) {
|
||||||
|
return bfloat16(::acosf(float(a)));
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 atan(const bfloat16& a) {
|
||||||
|
return bfloat16(::atanf(float(a)));
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 sinh(const bfloat16& a) {
|
||||||
|
return bfloat16(::sinhf(float(a)));
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 cosh(const bfloat16& a) {
|
||||||
|
return bfloat16(::coshf(float(a)));
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 tanh(const bfloat16& a) {
|
||||||
|
return bfloat16(::tanhf(float(a)));
|
||||||
|
}
|
||||||
|
#if EIGEN_HAS_CXX11_MATH
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 asinh(const bfloat16& a) {
|
||||||
|
return bfloat16(::asinhf(float(a)));
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 acosh(const bfloat16& a) {
|
||||||
|
return bfloat16(::acoshf(float(a)));
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 atanh(const bfloat16& a) {
|
||||||
|
return bfloat16(::atanhf(float(a)));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 floor(const bfloat16& a) {
|
||||||
|
return bfloat16(::floorf(float(a)));
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 ceil(const bfloat16& a) {
|
||||||
|
return bfloat16(::ceilf(float(a)));
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 rint(const bfloat16& a) {
|
||||||
|
return bfloat16(::rintf(float(a)));
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 round(const bfloat16& a) {
|
||||||
|
return bfloat16(::roundf(float(a)));
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 fmod(const bfloat16& a, const bfloat16& b) {
|
||||||
|
return bfloat16(::fmodf(float(a), float(b)));
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 (min)(const bfloat16& a, const bfloat16& b) {
|
||||||
|
const float f1 = static_cast<float>(a);
|
||||||
|
const float f2 = static_cast<float>(b);
|
||||||
|
return f2 < f1 ? b : a;
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 (max)(const bfloat16& a, const bfloat16& b) {
|
||||||
|
const float f1 = static_cast<float>(a);
|
||||||
|
const float f2 = static_cast<float>(b);
|
||||||
|
return f1 < f2 ? b : a;
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 fmin(const bfloat16& a, const bfloat16& b) {
|
||||||
|
const float f1 = static_cast<float>(a);
|
||||||
|
const float f2 = static_cast<float>(b);
|
||||||
|
return bfloat16(::fminf(f1, f2));
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 fmax(const bfloat16& a, const bfloat16& b) {
|
||||||
|
const float f1 = static_cast<float>(a);
|
||||||
|
const float f2 = static_cast<float>(b);
|
||||||
|
return bfloat16(::fmaxf(f1, f2));
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifndef EIGEN_NO_IO
|
||||||
|
EIGEN_ALWAYS_INLINE std::ostream& operator << (std::ostream& os, const bfloat16& v) {
|
||||||
|
os << static_cast<float>(v);
|
||||||
|
return os;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
} // namespace bfloat16_impl
|
||||||
|
|
||||||
|
namespace internal {
|
||||||
|
|
||||||
|
template<>
|
||||||
|
struct random_default_impl<bfloat16, false, false>
|
||||||
|
{
|
||||||
|
static inline bfloat16 run(const bfloat16& x, const bfloat16& y)
|
||||||
|
{
|
||||||
|
return x + (y-x) * bfloat16(float(std::rand()) / float(RAND_MAX));
|
||||||
|
}
|
||||||
|
static inline bfloat16 run()
|
||||||
|
{
|
||||||
|
return run(bfloat16(-1.f), bfloat16(1.f));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<> struct is_arithmetic<bfloat16> { enum { value = true }; };
|
||||||
|
|
||||||
|
} // namespace internal
|
||||||
|
|
||||||
|
template<> struct NumTraits<Eigen::bfloat16>
|
||||||
|
: GenericNumTraits<Eigen::bfloat16>
|
||||||
|
{
|
||||||
|
enum {
|
||||||
|
IsSigned = true,
|
||||||
|
IsInteger = false,
|
||||||
|
IsComplex = false,
|
||||||
|
RequireInitialization = false
|
||||||
|
};
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 epsilon() {
|
||||||
|
return bfloat16_impl::raw_uint16_to_bfloat16(0x3c00);
|
||||||
|
}
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 dummy_precision() {
|
||||||
|
return bfloat16_impl::raw_uint16_to_bfloat16(0x3D4D); // bfloat16(5e-2f);
|
||||||
|
|
||||||
|
}
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 highest() {
|
||||||
|
return bfloat16_impl::raw_uint16_to_bfloat16(0x7F7F);
|
||||||
|
}
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 lowest() {
|
||||||
|
return bfloat16_impl::raw_uint16_to_bfloat16(0xFF7F);
|
||||||
|
}
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 infinity() {
|
||||||
|
return bfloat16_impl::raw_uint16_to_bfloat16(0x7f80);
|
||||||
|
}
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 quiet_NaN() {
|
||||||
|
return bfloat16_impl::raw_uint16_to_bfloat16(0x7fc0);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Eigen
|
||||||
|
|
||||||
|
namespace Eigen {
|
||||||
|
namespace numext {
|
||||||
|
|
||||||
|
template<>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
|
bool (isnan)(const Eigen::bfloat16& h) {
|
||||||
|
return (bfloat16_impl::isnan)(h);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
|
bool (isinf)(const Eigen::bfloat16& h) {
|
||||||
|
return (bfloat16_impl::isinf)(h);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
|
bool (isfinite)(const Eigen::bfloat16& h) {
|
||||||
|
return (bfloat16_impl::isfinite)(h);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 bit_cast<Eigen::bfloat16, uint16_t>(const uint16_t& src) {
|
||||||
|
return Eigen::bfloat16(Eigen::bfloat16_impl::raw_uint16_to_bfloat16(src));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC uint16_t bit_cast<uint16_t, Eigen::bfloat16>(const Eigen::bfloat16& src) {
|
||||||
|
return Eigen::bfloat16_impl::raw_bfloat16_as_uint16(src);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace numext
|
||||||
|
} // namespace Eigen
|
||||||
|
|
||||||
|
#if EIGEN_HAS_STD_HASH
|
||||||
|
namespace std {
|
||||||
|
template <>
|
||||||
|
struct hash<Eigen::bfloat16> {
|
||||||
|
EIGEN_STRONG_INLINE std::size_t operator()(const Eigen::bfloat16& a) const {
|
||||||
|
return static_cast<std::size_t>(Eigen::numext::bit_cast<Eigen::numext::uint16_t>(a));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} // namespace std
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#endif // EIGEN_BFLOAT16_H
|
||||||
@@ -12,18 +12,106 @@
|
|||||||
#define EIGEN_ARCH_CONJ_HELPER_H
|
#define EIGEN_ARCH_CONJ_HELPER_H
|
||||||
|
|
||||||
#define EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(PACKET_CPLX, PACKET_REAL) \
|
#define EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(PACKET_CPLX, PACKET_REAL) \
|
||||||
template<> struct conj_helper<PACKET_REAL, PACKET_CPLX, false,false> { \
|
template <> \
|
||||||
EIGEN_STRONG_INLINE PACKET_CPLX pmadd(const PACKET_REAL& x, const PACKET_CPLX& y, const PACKET_CPLX& c) const \
|
struct conj_helper<PACKET_REAL, PACKET_CPLX, false, false> { \
|
||||||
{ return padd(c, pmul(x,y)); } \
|
EIGEN_STRONG_INLINE PACKET_CPLX pmadd(const PACKET_REAL& x, \
|
||||||
EIGEN_STRONG_INLINE PACKET_CPLX pmul(const PACKET_REAL& x, const PACKET_CPLX& y) const \
|
const PACKET_CPLX& y, \
|
||||||
{ return PACKET_CPLX(Eigen::internal::pmul<PACKET_REAL>(x, y.v)); } \
|
const PACKET_CPLX& c) const { \
|
||||||
|
return padd(c, this->pmul(x, y)); \
|
||||||
|
} \
|
||||||
|
EIGEN_STRONG_INLINE PACKET_CPLX pmul(const PACKET_REAL& x, \
|
||||||
|
const PACKET_CPLX& y) const { \
|
||||||
|
return PACKET_CPLX(Eigen::internal::pmul<PACKET_REAL>(x, y.v)); \
|
||||||
|
} \
|
||||||
}; \
|
}; \
|
||||||
\
|
\
|
||||||
template<> struct conj_helper<PACKET_CPLX, PACKET_REAL, false,false> { \
|
template <> \
|
||||||
EIGEN_STRONG_INLINE PACKET_CPLX pmadd(const PACKET_CPLX& x, const PACKET_REAL& y, const PACKET_CPLX& c) const \
|
struct conj_helper<PACKET_CPLX, PACKET_REAL, false, false> { \
|
||||||
{ return padd(c, pmul(x,y)); } \
|
EIGEN_STRONG_INLINE PACKET_CPLX pmadd(const PACKET_CPLX& x, \
|
||||||
EIGEN_STRONG_INLINE PACKET_CPLX pmul(const PACKET_CPLX& x, const PACKET_REAL& y) const \
|
const PACKET_REAL& y, \
|
||||||
{ return PACKET_CPLX(Eigen::internal::pmul<PACKET_REAL>(x.v, y)); } \
|
const PACKET_CPLX& c) const { \
|
||||||
|
return padd(c, this->pmul(x, y)); \
|
||||||
|
} \
|
||||||
|
EIGEN_STRONG_INLINE PACKET_CPLX pmul(const PACKET_CPLX& x, \
|
||||||
|
const PACKET_REAL& y) const { \
|
||||||
|
return PACKET_CPLX(Eigen::internal::pmul<PACKET_REAL>(x.v, y)); \
|
||||||
|
} \
|
||||||
};
|
};
|
||||||
|
|
||||||
|
namespace Eigen {
|
||||||
|
namespace internal {
|
||||||
|
|
||||||
|
template<bool Conjugate> struct conj_if;
|
||||||
|
|
||||||
|
template<> struct conj_if<true> {
|
||||||
|
template<typename T>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(const T& x) const { return numext::conj(x); }
|
||||||
|
template<typename T>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T pconj(const T& x) const { return internal::pconj(x); }
|
||||||
|
};
|
||||||
|
|
||||||
|
template<> struct conj_if<false> {
|
||||||
|
template<typename T>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T& operator()(const T& x) const { return x; }
|
||||||
|
template<typename T>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T& pconj(const T& x) const { return x; }
|
||||||
|
};
|
||||||
|
|
||||||
|
// Generic Implementation, assume scalars since the packet-version is
|
||||||
|
// specialized below.
|
||||||
|
template<typename LhsType, typename RhsType, bool ConjLhs, bool ConjRhs>
|
||||||
|
struct conj_helper {
|
||||||
|
typedef typename ScalarBinaryOpTraits<LhsType, RhsType>::ReturnType ResultType;
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ResultType
|
||||||
|
pmadd(const LhsType& x, const RhsType& y, const ResultType& c) const
|
||||||
|
{ return this->pmul(x, y) + c; }
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ResultType
|
||||||
|
pmul(const LhsType& x, const RhsType& y) const
|
||||||
|
{ return conj_if<ConjLhs>()(x) * conj_if<ConjRhs>()(y); }
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename LhsScalar, typename RhsScalar>
|
||||||
|
struct conj_helper<LhsScalar, RhsScalar, true, true> {
|
||||||
|
typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar>::ReturnType ResultType;
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ResultType
|
||||||
|
pmadd(const LhsScalar& x, const RhsScalar& y, const ResultType& c) const
|
||||||
|
{ return this->pmul(x, y) + c; }
|
||||||
|
|
||||||
|
// We save a conjuation by using the identity conj(a)*conj(b) = conj(a*b).
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ResultType
|
||||||
|
pmul(const LhsScalar& x, const RhsScalar& y) const
|
||||||
|
{ return numext::conj(x * y); }
|
||||||
|
};
|
||||||
|
|
||||||
|
// Implementation with equal type, use packet operations.
|
||||||
|
template<typename Packet, bool ConjLhs, bool ConjRhs>
|
||||||
|
struct conj_helper<Packet, Packet, ConjLhs, ConjRhs>
|
||||||
|
{
|
||||||
|
typedef Packet ResultType;
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pmadd(const Packet& x, const Packet& y, const Packet& c) const
|
||||||
|
{ return Eigen::internal::pmadd(conj_if<ConjLhs>().pconj(x), conj_if<ConjRhs>().pconj(y), c); }
|
||||||
|
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pmul(const Packet& x, const Packet& y) const
|
||||||
|
{ return Eigen::internal::pmul(conj_if<ConjLhs>().pconj(x), conj_if<ConjRhs>().pconj(y)); }
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename Packet>
|
||||||
|
struct conj_helper<Packet, Packet, true, true>
|
||||||
|
{
|
||||||
|
typedef Packet ResultType;
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pmadd(const Packet& x, const Packet& y, const Packet& c) const
|
||||||
|
{ return Eigen::internal::pmadd(pconj(x), pconj(y), c); }
|
||||||
|
// We save a conjuation by using the identity conj(a)*conj(b) = conj(a*b).
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pmul(const Packet& x, const Packet& y) const
|
||||||
|
{ return pconj(Eigen::internal::pmul(x, y)); }
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace internal
|
||||||
|
} // namespace Eigen
|
||||||
|
|
||||||
#endif // EIGEN_ARCH_CONJ_HELPER_H
|
#endif // EIGEN_ARCH_CONJ_HELPER_H
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user