mirror of
https://gitlab.com/libeigen/eigen.git
synced 2026-04-10 11:34:33 +08:00
Compare commits
438 Commits
3.0.3
...
3.1.0-alph
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fe0350cf1b | ||
|
|
99c694623a | ||
|
|
6ad48c5d92 | ||
|
|
4ed87c59c7 | ||
|
|
1763f86364 | ||
|
|
fe85b7ebc6 | ||
|
|
bc7b251cd9 | ||
|
|
a594d7ffd7 | ||
|
|
ad4aa7873f | ||
|
|
fd4aefadcd | ||
|
|
a64407f086 | ||
|
|
13abb37721 | ||
|
|
7002639844 | ||
|
|
13e46ad847 | ||
|
|
9a954d29ec | ||
|
|
634fedaf68 | ||
|
|
10cd52350f | ||
|
|
9c86ee2695 | ||
|
|
8d6e394b06 | ||
|
|
670e3af5a8 | ||
|
|
18e3ac0f0d | ||
|
|
87138075da | ||
|
|
fc2d85d139 | ||
|
|
27d222d23e | ||
|
|
ed244e9c1a | ||
|
|
0251bb6c1d | ||
|
|
65d5311c68 | ||
|
|
d9f5840f7a | ||
|
|
a108216af1 | ||
|
|
362fcabc44 | ||
|
|
5e4dfa4a09 | ||
|
|
606e204f6d | ||
|
|
c68616b3b5 | ||
|
|
87f2af5930 | ||
|
|
d615d39af0 | ||
|
|
0d03492e1e | ||
|
|
ee9f3e34b0 | ||
|
|
039408cd66 | ||
|
|
238999045c | ||
|
|
0e1e0a2a58 | ||
|
|
274f8a0947 | ||
|
|
589cc627f8 | ||
|
|
db8f528737 | ||
|
|
d6bf9f848a | ||
|
|
2d4fee0b40 | ||
|
|
e7ef367db1 | ||
|
|
bdee0c9baa | ||
|
|
15ea999f84 | ||
|
|
901bcdd2a8 | ||
|
|
96a18ef230 | ||
|
|
8171adb7ff | ||
|
|
67ae94f3a2 | ||
|
|
e3e39ea26d | ||
|
|
2c03e6fccc | ||
|
|
7f04845023 | ||
|
|
e4cea957df | ||
|
|
7e866c447f | ||
|
|
6f92b75874 | ||
|
|
50d756b9ea | ||
|
|
15d781b64c | ||
|
|
fcc966b40d | ||
|
|
33e52a3943 | ||
|
|
732a50d043 | ||
|
|
40c0f3af57 | ||
|
|
3db6455896 | ||
|
|
0308c11849 | ||
|
|
1e7712771e | ||
|
|
1aa6c7f122 | ||
|
|
d738bedc5b | ||
|
|
f60e6f5ee8 | ||
|
|
594fd2d11d | ||
|
|
9d7d634897 | ||
|
|
f35708d2e0 | ||
|
|
105e170d8b | ||
|
|
2600ba1731 | ||
|
|
c06ae325a4 | ||
|
|
36457178f9 | ||
|
|
d400a6245e | ||
|
|
38277e8a9b | ||
|
|
2d7c3eea53 | ||
|
|
37f304a2e6 | ||
|
|
fff25a4b46 | ||
|
|
57c6bfba08 | ||
|
|
081abb701d | ||
|
|
10447a7b57 | ||
|
|
43cdd242d0 | ||
|
|
015c331252 | ||
|
|
e270a5656a | ||
|
|
86bb20c431 | ||
|
|
e36a4c880a | ||
|
|
06450882ab | ||
|
|
dd232e30b0 | ||
|
|
a1fa05f14e | ||
|
|
a0da96e2f4 | ||
|
|
80f8ed9f9c | ||
|
|
c3ad1f9382 | ||
|
|
6ec0af6dc7 | ||
|
|
e017f798eb | ||
|
|
accae638b2 | ||
|
|
84cf1b5b1d | ||
|
|
9ca673daed | ||
|
|
dd504d6aae | ||
|
|
59576014a9 | ||
|
|
b60624dc2a | ||
|
|
82f9aa194d | ||
|
|
69966e90e1 | ||
|
|
5dc9650f11 | ||
|
|
a8a2bf3b5a | ||
|
|
9bd902ed9c | ||
|
|
9353bbac4a | ||
|
|
32917515df | ||
|
|
1cdbae62db | ||
|
|
91e392a042 | ||
|
|
a09cc5d4c0 | ||
|
|
c861e05181 | ||
|
|
9ae606866c | ||
|
|
950eeab4d7 | ||
|
|
c0e36516f3 | ||
|
|
3f56de2628 | ||
|
|
e759086dcd | ||
|
|
4ca89f32ed | ||
|
|
f10bae74e8 | ||
|
|
a0bcaa88af | ||
|
|
b85bcd91bf | ||
|
|
7aaae9d6df | ||
|
|
3a4c78b588 | ||
|
|
9fdb6a2ead | ||
|
|
b00a33bc70 | ||
|
|
49d652c600 | ||
|
|
6b8d6887ac | ||
|
|
00d4a360ba | ||
|
|
d1b54ecfa3 | ||
|
|
cda397b117 | ||
|
|
2d621d235d | ||
|
|
a2810aa32f | ||
|
|
8107b3da75 | ||
|
|
f56316f7ed | ||
|
|
70206ab1e1 | ||
|
|
57d1ccb2dc | ||
|
|
2d4fe54b73 | ||
|
|
01b4b6e456 | ||
|
|
be9b87377f | ||
|
|
63dcdb65fd | ||
|
|
ffe6d1f901 | ||
|
|
f278a3eaba | ||
|
|
bc6d78982f | ||
|
|
de22ad117c | ||
|
|
08c0edae86 | ||
|
|
db36e4204f | ||
|
|
8fbbbe7521 | ||
|
|
cb2f1944e2 | ||
|
|
53fa851724 | ||
|
|
dcb66d6b40 | ||
|
|
3e4a68cc60 | ||
|
|
c110abb7d2 | ||
|
|
9d82a7e204 | ||
|
|
3a82aa1133 | ||
|
|
fb3aa7220f | ||
|
|
45a6bb34c3 | ||
|
|
f422668d39 | ||
|
|
1b98b73472 | ||
|
|
aa3e420df5 | ||
|
|
ab3f138b23 | ||
|
|
478de03bd8 | ||
|
|
cdd3e85060 | ||
|
|
b4d1d4a2e0 | ||
|
|
c5ddaf0c87 | ||
|
|
1de769d122 | ||
|
|
05de3dddca | ||
|
|
94d87abbdb | ||
|
|
a594ac3966 | ||
|
|
57207239f3 | ||
|
|
fa7c08a831 | ||
|
|
0cf2a05f3e | ||
|
|
9df2f5c923 | ||
|
|
0609dbeec6 | ||
|
|
6a1caf0351 | ||
|
|
4477843bdd | ||
|
|
5e431779f3 | ||
|
|
7bf0e8cd82 | ||
|
|
bca18a13ea | ||
|
|
d7e70edfb3 | ||
|
|
e44c19d1cc | ||
|
|
1ddf88060b | ||
|
|
a997dacc67 | ||
|
|
39d4585bff | ||
|
|
5d43b4049d | ||
|
|
70df09b76d | ||
|
|
a2d414f568 | ||
|
|
de69129f56 | ||
|
|
16b638c159 | ||
|
|
dcbc985a28 | ||
|
|
739559b08a | ||
|
|
0c6055c285 | ||
|
|
c1170d2e93 | ||
|
|
3fce43a704 | ||
|
|
a5761d6dd7 | ||
|
|
15cb4f5b09 | ||
|
|
21d27c6f71 | ||
|
|
cd3c2451b6 | ||
|
|
3172749f32 | ||
|
|
4f237f035c | ||
|
|
5dc8458293 | ||
|
|
b94c00226f | ||
|
|
ae9c96a32d | ||
|
|
4e7f38ffc7 | ||
|
|
e97879857b | ||
|
|
1beb8a6564 | ||
|
|
2fc1b58cd2 | ||
|
|
e1dec359ba | ||
|
|
b96d0bd240 | ||
|
|
683ea3c93f | ||
|
|
ac3ad9c1e7 | ||
|
|
17c321617d | ||
|
|
961a825b97 | ||
|
|
9bba0e7ba1 | ||
|
|
b2988375e8 | ||
|
|
6799fabba9 | ||
|
|
d8ae978b65 | ||
|
|
823b2105b6 | ||
|
|
b0adbfbae7 | ||
|
|
c331c092d5 | ||
|
|
7301f4345c | ||
|
|
83563dee3c | ||
|
|
ebfed5a512 | ||
|
|
1d796acb05 | ||
|
|
5d1836b182 | ||
|
|
e0a6ce50dd | ||
|
|
2092b45d0d | ||
|
|
16b13596a6 | ||
|
|
edf4c4b217 | ||
|
|
9053729d68 | ||
|
|
f4122e9f94 | ||
|
|
6b006772f1 | ||
|
|
59b83c14fd | ||
|
|
3e7aaadb1d | ||
|
|
d52d8e4a53 | ||
|
|
7706bafcfd | ||
|
|
7898281b2b | ||
|
|
b38d3b360e | ||
|
|
f1d98aad1b | ||
|
|
063042bca3 | ||
|
|
477d3e5726 | ||
|
|
a2feb6f3c7 | ||
|
|
dd598ef8ce | ||
|
|
6b31aa4bd1 | ||
|
|
7ee084f82f | ||
|
|
c01ed935dd | ||
|
|
8ddd1e390b | ||
|
|
8414be739b | ||
|
|
b3f5fbbd9a | ||
|
|
b85c89c313 | ||
|
|
9bf4d709e4 | ||
|
|
9e667e28f5 | ||
|
|
6d7a32231d | ||
|
|
ea4a1960f0 | ||
|
|
79ad55a901 | ||
|
|
42e2578ef9 | ||
|
|
5734ee6df4 | ||
|
|
ca7d3dca79 | ||
|
|
f162f7c323 | ||
|
|
a660e6425c | ||
|
|
5748d3c96f | ||
|
|
b12522f696 | ||
|
|
3431c052c6 | ||
|
|
3a2cabc275 | ||
|
|
51f706b916 | ||
|
|
66fa6f39a2 | ||
|
|
80b1d1371d | ||
|
|
8fa7e92e77 | ||
|
|
97ac0fd192 | ||
|
|
e8313364c1 | ||
|
|
47a2bca89f | ||
|
|
26d7dad138 | ||
|
|
22bff949c8 | ||
|
|
d4bd8bddb5 | ||
|
|
705023fd85 | ||
|
|
3fb65734ab | ||
|
|
22cc2b727b | ||
|
|
38a4e3053d | ||
|
|
0d02182ae8 | ||
|
|
a8f66fec65 | ||
|
|
bdb545ce3b | ||
|
|
5fdebc2fa5 | ||
|
|
08074843ac | ||
|
|
c52268c649 | ||
|
|
2489c81562 | ||
|
|
c98cd5e564 | ||
|
|
0b308e79c4 | ||
|
|
16db255333 | ||
|
|
9b52fe0432 | ||
|
|
3ecf7e8f6e | ||
|
|
7aabce7c76 | ||
|
|
16a2d896bc | ||
|
|
4a95badf74 | ||
|
|
2f32e48517 | ||
|
|
a55c27a15f | ||
|
|
642d452921 | ||
|
|
e3491beb48 | ||
|
|
a871f3cdb8 | ||
|
|
aedccbf52f | ||
|
|
d2673d89bd | ||
|
|
842881cfb1 | ||
|
|
40287d2fd9 | ||
|
|
f82b3ea241 | ||
|
|
cf04a7c682 | ||
|
|
6d3dee1b66 | ||
|
|
8c8ab9ae10 | ||
|
|
a6d42e28fe | ||
|
|
86ca35ccff | ||
|
|
91fe1507d1 | ||
|
|
421ece38e1 | ||
|
|
7a61a564ef | ||
|
|
5bc4abc45e | ||
|
|
562d3ea91d | ||
|
|
35c1158ee3 | ||
|
|
b495203310 | ||
|
|
5830f90983 | ||
|
|
9d6fdbced7 | ||
|
|
5b71d44e18 | ||
|
|
9464745385 | ||
|
|
7b46d7ed0f | ||
|
|
d23845c4cc | ||
|
|
87ac09daa8 | ||
|
|
5541bcb769 | ||
|
|
117d17ee58 | ||
|
|
46bee5682f | ||
|
|
074b067624 | ||
|
|
7209d6a126 | ||
|
|
96464f8563 | ||
|
|
501bc602ec | ||
|
|
f2837aebc4 | ||
|
|
8170ef0b2d | ||
|
|
7f2a88c91f | ||
|
|
85c137ccd4 | ||
|
|
179d42bb2b | ||
|
|
d4fd298fbb | ||
|
|
9a06055870 | ||
|
|
a34a216e82 | ||
|
|
3de2f4b75a | ||
|
|
ae3b6cc324 | ||
|
|
e22a523021 | ||
|
|
0aa7425f15 | ||
|
|
0c463a21c4 | ||
|
|
d7e3c949be | ||
|
|
dac4bb640a | ||
|
|
837db08cbd | ||
|
|
6e1573f66a | ||
|
|
6b4e215710 | ||
|
|
0896c6d97d | ||
|
|
4e7e5d09e1 | ||
|
|
fb76452cbc | ||
|
|
97b6d26f5b | ||
|
|
883219041f | ||
|
|
a18a1be42d | ||
|
|
012419166e | ||
|
|
781e75cbd7 | ||
|
|
cc23b0a3d9 | ||
|
|
a96c849c20 | ||
|
|
1947da39ab | ||
|
|
10426b7647 | ||
|
|
0358a8247c | ||
|
|
9e0c8549ce | ||
|
|
06fb7cf470 | ||
|
|
3b60d2dbc4 | ||
|
|
2d11041e24 | ||
|
|
3457965bf5 | ||
|
|
f924722f3b | ||
|
|
bb2d70d211 | ||
|
|
6441e8727b | ||
|
|
392eb9fee8 | ||
|
|
f85db18c1c | ||
|
|
50c00d14c8 | ||
|
|
e87f653924 | ||
|
|
67d50f539b | ||
|
|
e48bc0dfe3 | ||
|
|
0b40b36d10 | ||
|
|
820545cddb | ||
|
|
c9b5531d6c | ||
|
|
e654405900 | ||
|
|
7e86324898 | ||
|
|
11164830f5 | ||
|
|
12a30a982f | ||
|
|
88b3116b99 | ||
|
|
0c146bee1b | ||
|
|
eae5a6bb09 | ||
|
|
11ea81858a | ||
|
|
cca7b146a2 | ||
|
|
a6b5314c20 | ||
|
|
ae06b8af5c | ||
|
|
afdd26f229 | ||
|
|
0d58c36ffd | ||
|
|
70d5837e00 | ||
|
|
77a1373c3a | ||
|
|
d90a8ee8bd | ||
|
|
b471161f28 | ||
|
|
969e92261d | ||
|
|
10dae8dd4d | ||
|
|
8175fe43e0 | ||
|
|
00991b5b64 | ||
|
|
4f1419e9c3 | ||
|
|
6feb1d3c0b | ||
|
|
568478ffe5 | ||
|
|
f4ac7d2b43 | ||
|
|
b175bc464f | ||
|
|
1b17a674dd | ||
|
|
5c204d1ff7 | ||
|
|
e6fa4a267a | ||
|
|
931814d7c0 | ||
|
|
c6ad2deead | ||
|
|
42bc1f77be | ||
|
|
abc8c0821c | ||
|
|
4ada45bc76 | ||
|
|
7d24cf283a | ||
|
|
7bb4f6ae2f | ||
|
|
3ef0da6efb | ||
|
|
816541d82c | ||
|
|
cfd5c2d74e | ||
|
|
611fc17894 | ||
|
|
ec32d2c807 | ||
|
|
b3e43246bc | ||
|
|
f9da1ccc3b | ||
|
|
e35b1ef3f3 | ||
|
|
fe595e91ae | ||
|
|
9cca79f5ca | ||
|
|
da3f3586e0 | ||
|
|
22c7609d72 | ||
|
|
5fda8cdfb3 | ||
|
|
eb9c6b6cfd | ||
|
|
bb8a25e94b | ||
|
|
535a61ede8 | ||
|
|
eba023d082 | ||
|
|
b8ecda5c66 | ||
|
|
bbb4b35dfc | ||
|
|
290205dfc0 | ||
|
|
5991d247f9 | ||
|
|
37c5341d64 | ||
|
|
50a3cd678a |
11
.hgeol
11
.hgeol
@@ -1,3 +1,8 @@
|
||||
[patterns]
|
||||
**.* = native
|
||||
eigen_autoexp_part.dat = CRLF
|
||||
[patterns]
|
||||
scripts/*.in = LF
|
||||
debug/msvc/*.dat = CRLF
|
||||
unsupported/test/mpreal/*.* = CRLF
|
||||
** = native
|
||||
|
||||
[repository]
|
||||
native = LF
|
||||
|
||||
@@ -64,6 +64,10 @@ set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
|
||||
|
||||
find_package(StandardMathLibrary)
|
||||
|
||||
|
||||
set(EIGEN_TEST_CUSTOM_LINKER_FLAGS "" CACHE STRING "Additional linker flags when linking unit tests.")
|
||||
set(EIGEN_TEST_CUSTOM_CXX_FLAGS "" CACHE STRING "Additional compiler flags when compiling unit tests.")
|
||||
|
||||
set(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO "")
|
||||
|
||||
if(NOT STANDARD_MATH_LIBRARY_FOUND)
|
||||
@@ -103,6 +107,8 @@ endif()
|
||||
|
||||
add_definitions("-DEIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS")
|
||||
|
||||
set(EIGEN_TEST_MAX_SIZE "320" CACHE STRING "Maximal matrix/vector size, default is 320")
|
||||
|
||||
if(CMAKE_COMPILER_IS_GNUCXX)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wnon-virtual-dtor -Wno-long-long -ansi -Wundef -Wcast-align -Wchar-subscripts -Wall -W -Wpointer-arith -Wwrite-strings -Wformat-security -fexceptions -fno-check-new -fno-common -fstrict-aliasing")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "-g3")
|
||||
@@ -283,11 +289,17 @@ if(EIGEN_BUILD_PKGCONFIG)
|
||||
STRING(REPLACE ${path_separator} ";" pkg_config_libdir_search "$ENV{PKG_CONFIG_LIBDIR}")
|
||||
message(STATUS "searching for 'pkgconfig' directory in PKG_CONFIG_LIBDIR ( $ENV{PKG_CONFIG_LIBDIR} ), ${CMAKE_INSTALL_PREFIX}/share, and ${CMAKE_INSTALL_PREFIX}/lib")
|
||||
FIND_PATH(pkg_config_libdir pkgconfig ${pkg_config_libdir_search} ${CMAKE_INSTALL_PREFIX}/share ${CMAKE_INSTALL_PREFIX}/lib ${pkg_config_libdir_search})
|
||||
message(STATUS "found ${pkg_config_libdir}/pkgconfig" )
|
||||
if(pkg_config_libdir)
|
||||
SET(pkg_config_install_dir ${pkg_config_libdir})
|
||||
message(STATUS "found ${pkg_config_libdir}/pkgconfig" )
|
||||
else(pkg_config_libdir)
|
||||
SET(pkg_config_install_dir ${CMAKE_INSTALL_PREFIX}/share)
|
||||
message(STATUS "pkgconfig not found; installing in ${pkg_config_install_dir}" )
|
||||
endif(pkg_config_libdir)
|
||||
|
||||
configure_file(eigen3.pc.in eigen3.pc)
|
||||
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/eigen3.pc
|
||||
DESTINATION ${pkg_config_libdir}/pkgconfig
|
||||
DESTINATION ${pkg_config_install_dir}/pkgconfig
|
||||
)
|
||||
endif(EIGEN_BUILD_PKGCONFIG)
|
||||
|
||||
@@ -295,44 +307,9 @@ add_subdirectory(Eigen)
|
||||
|
||||
add_subdirectory(doc EXCLUDE_FROM_ALL)
|
||||
|
||||
add_custom_target(buildtests)
|
||||
add_custom_target(check COMMAND "ctest")
|
||||
add_dependencies(check buildtests)
|
||||
|
||||
# CMake/Ctest does not allow us to change the build command,
|
||||
# so we have to workaround by directly editing the generated DartConfiguration.tcl file
|
||||
# save CMAKE_MAKE_PROGRAM
|
||||
set(CMAKE_MAKE_PROGRAM_SAVE ${CMAKE_MAKE_PROGRAM})
|
||||
# and set a fake one
|
||||
set(CMAKE_MAKE_PROGRAM "@EIGEN_MAKECOMMAND_PLACEHOLDER@")
|
||||
|
||||
include(CTest)
|
||||
include(EigenConfigureTesting)
|
||||
# fixme, not sure this line is still needed:
|
||||
enable_testing() # must be called from the root CMakeLists, see man page
|
||||
include(EigenTesting)
|
||||
ei_init_testing()
|
||||
|
||||
# overwrite default DartConfiguration.tcl
|
||||
# The worarounds are different for each version of the MSVC IDE
|
||||
if(MSVC_IDE)
|
||||
if(MSVC_VERSION EQUAL 1600) # MSVC 2010
|
||||
set(EIGEN_MAKECOMMAND_PLACEHOLDER "${CMAKE_MAKE_PROGRAM_SAVE} buildtests.vcxproj /p:Configuration=\${CTEST_CONFIGURATION_TYPE} \n # ")
|
||||
else() # MSVC 2008 (TODO check MSVC 2005)
|
||||
set(EIGEN_MAKECOMMAND_PLACEHOLDER "${CMAKE_MAKE_PROGRAM_SAVE} /project buildtests")
|
||||
endif()
|
||||
else()
|
||||
# for make and nmake
|
||||
set(EIGEN_MAKECOMMAND_PLACEHOLDER "${CMAKE_MAKE_PROGRAM_SAVE} buildtests")
|
||||
endif()
|
||||
|
||||
configure_file(${CMAKE_BINARY_DIR}/DartConfiguration.tcl ${CMAKE_BINARY_DIR}/DartConfiguration.tcl)
|
||||
# restore default CMAKE_MAKE_PROGRAM
|
||||
set(CMAKE_MAKE_PROGRAM ${CMAKE_MAKE_PROGRAM_SAVE})
|
||||
# un-set temporary variables so that it is like they never existed.
|
||||
# CMake 2.6.3 introduces the more logical unset() syntax for this.
|
||||
set(CMAKE_MAKE_PROGRAM_SAVE)
|
||||
set(EIGEN_MAKECOMMAND_PLACEHOLDER)
|
||||
|
||||
configure_file(${CMAKE_SOURCE_DIR}/CTestCustom.cmake.in ${CMAKE_BINARY_DIR}/CTestCustom.cmake)
|
||||
|
||||
|
||||
if(EIGEN_LEAVE_TEST_IN_ALL_TARGET)
|
||||
@@ -341,15 +318,13 @@ else()
|
||||
add_subdirectory(test EXCLUDE_FROM_ALL)
|
||||
endif()
|
||||
|
||||
if(NOT MSVC)
|
||||
if(EIGEN_LEAVE_TEST_IN_ALL_TARGET)
|
||||
add_subdirectory(blas)
|
||||
add_subdirectory(lapack)
|
||||
else()
|
||||
add_subdirectory(blas EXCLUDE_FROM_ALL)
|
||||
add_subdirectory(lapack EXCLUDE_FROM_ALL)
|
||||
endif()
|
||||
endif(NOT MSVC)
|
||||
if(EIGEN_LEAVE_TEST_IN_ALL_TARGET)
|
||||
add_subdirectory(blas)
|
||||
add_subdirectory(lapack)
|
||||
else()
|
||||
add_subdirectory(blas EXCLUDE_FROM_ALL)
|
||||
add_subdirectory(lapack EXCLUDE_FROM_ALL)
|
||||
endif()
|
||||
|
||||
add_subdirectory(unsupported)
|
||||
|
||||
|
||||
26
COPYING.BSD
Normal file
26
COPYING.BSD
Normal file
@@ -0,0 +1,26 @@
|
||||
/*
|
||||
Copyright (c) 2011, Intel Corporation. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software without
|
||||
specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
@@ -24,6 +24,9 @@ namespace Eigen {
|
||||
#include "src/misc/Solve.h"
|
||||
#include "src/Cholesky/LLT.h"
|
||||
#include "src/Cholesky/LDLT.h"
|
||||
#ifdef EIGEN_USE_LAPACKE
|
||||
#include "src/Cholesky/LLT_MKL.h"
|
||||
#endif
|
||||
|
||||
} // namespace Eigen
|
||||
|
||||
|
||||
34
Eigen/CholmodSupport
Normal file
34
Eigen/CholmodSupport
Normal file
@@ -0,0 +1,34 @@
|
||||
#ifndef EIGEN_CHOLMODSUPPORT_MODULE_H
|
||||
#define EIGEN_CHOLMODSUPPORT_MODULE_H
|
||||
|
||||
#include "SparseCore"
|
||||
|
||||
#include "src/Core/util/DisableStupidWarnings.h"
|
||||
|
||||
extern "C" {
|
||||
#include <cholmod.h>
|
||||
}
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \ingroup Support_modules
|
||||
* \defgroup CholmodSupport_Module CholmodSupport module
|
||||
*
|
||||
*
|
||||
* \code
|
||||
* #include <Eigen/CholmodSupport>
|
||||
* \endcode
|
||||
*/
|
||||
|
||||
#include "src/misc/Solve.h"
|
||||
#include "src/misc/SparseSolve.h"
|
||||
|
||||
#include "src/CholmodSupport/CholmodSupport.h"
|
||||
|
||||
|
||||
} // namespace Eigen
|
||||
|
||||
#include "src/Core/util/ReenableStupidWarnings.h"
|
||||
|
||||
#endif // EIGEN_CHOLMODSUPPORT_MODULE_H
|
||||
|
||||
30
Eigen/Core
30
Eigen/Core
@@ -34,6 +34,10 @@
|
||||
// defined e.g. EIGEN_DONT_ALIGN) so it needs to be done before we do anything with vectorization.
|
||||
#include "src/Core/util/Macros.h"
|
||||
|
||||
// this include file manages BLAS and MKL related macros
|
||||
// and inclusion of their respective header files
|
||||
#include "src/Core/util/MKL_support.h"
|
||||
|
||||
// if alignment is disabled, then disable vectorization. Note: EIGEN_ALIGN is the proper check, it takes into
|
||||
// account both the user's will (EIGEN_DONT_ALIGN) and our own platform checks
|
||||
#if !EIGEN_ALIGN
|
||||
@@ -167,7 +171,7 @@
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(EIGEN_NO_EXCEPTIONS)
|
||||
#if defined(_CPPUNWIND) || defined(__EXCEPTIONS)
|
||||
#define EIGEN_EXCEPTIONS
|
||||
#endif
|
||||
|
||||
@@ -175,9 +179,6 @@
|
||||
#include <new>
|
||||
#endif
|
||||
|
||||
// defined in bits/termios.h
|
||||
#undef B0
|
||||
|
||||
/** \brief Namespace containing all symbols from the %Eigen library. */
|
||||
namespace Eigen {
|
||||
|
||||
@@ -247,6 +248,10 @@ using std::ptrdiff_t;
|
||||
* \endcode
|
||||
*/
|
||||
|
||||
/** \defgroup Support_modules Support modules [category]
|
||||
* Category of modules which add support for external libraries.
|
||||
*/
|
||||
|
||||
#include "src/Core/util/Constants.h"
|
||||
#include "src/Core/util/ForwardDeclarations.h"
|
||||
#include "src/Core/util/Meta.h"
|
||||
@@ -318,7 +323,7 @@ using std::ptrdiff_t;
|
||||
#include "src/Core/CommaInitializer.h"
|
||||
#include "src/Core/Flagged.h"
|
||||
#include "src/Core/ProductBase.h"
|
||||
#include "src/Core/Product.h"
|
||||
#include "src/Core/GeneralProduct.h"
|
||||
#include "src/Core/TriangularMatrix.h"
|
||||
#include "src/Core/SelfAdjointView.h"
|
||||
#include "src/Core/SolveTriangular.h"
|
||||
@@ -347,6 +352,21 @@ using std::ptrdiff_t;
|
||||
#include "src/Core/ArrayBase.h"
|
||||
#include "src/Core/ArrayWrapper.h"
|
||||
|
||||
#ifdef EIGEN_USE_BLAS
|
||||
#include "src/Core/products/GeneralMatrixMatrix_MKL.h"
|
||||
#include "src/Core/products/GeneralMatrixVector_MKL.h"
|
||||
#include "src/Core/products/GeneralMatrixMatrixTriangular_MKL.h"
|
||||
#include "src/Core/products/SelfadjointMatrixMatrix_MKL.h"
|
||||
#include "src/Core/products/SelfadjointMatrixVector_MKL.h"
|
||||
#include "src/Core/products/TriangularMatrixMatrix_MKL.h"
|
||||
#include "src/Core/products/TriangularMatrixVector_MKL.h"
|
||||
#include "src/Core/products/TriangularSolverMatrix_MKL.h"
|
||||
#endif // EIGEN_USE_BLAS
|
||||
|
||||
#ifdef EIGEN_USE_MKL_VML
|
||||
#include "src/Core/Assign_MKL.h"
|
||||
#endif
|
||||
|
||||
} // namespace Eigen
|
||||
|
||||
#include "src/Core/GlobalFunctions.h"
|
||||
|
||||
@@ -33,7 +33,8 @@
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \defgroup Eigen2Support_Module Eigen2 support module
|
||||
/** \ingroup Support_modules
|
||||
* \defgroup Eigen2Support_Module Eigen2 support module
|
||||
* This module provides a couple of deprecated functions improving the compatibility with Eigen2.
|
||||
*
|
||||
* To use it, define EIGEN2_SUPPORT before including any Eigen header
|
||||
@@ -63,6 +64,24 @@ namespace Eigen {
|
||||
// Eigen2 used to include iostream
|
||||
#include<iostream>
|
||||
|
||||
#define EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, SizeSuffix) \
|
||||
using Eigen::Matrix##SizeSuffix##TypeSuffix; \
|
||||
using Eigen::Vector##SizeSuffix##TypeSuffix; \
|
||||
using Eigen::RowVector##SizeSuffix##TypeSuffix;
|
||||
|
||||
#define EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(TypeSuffix) \
|
||||
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 2) \
|
||||
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 3) \
|
||||
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 4) \
|
||||
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, X) \
|
||||
|
||||
#define EIGEN_USING_MATRIX_TYPEDEFS \
|
||||
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(i) \
|
||||
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(f) \
|
||||
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(d) \
|
||||
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(cf) \
|
||||
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(cd)
|
||||
|
||||
#define USING_PART_OF_NAMESPACE_EIGEN \
|
||||
EIGEN_USING_MATRIX_TYPEDEFS \
|
||||
using Eigen::Matrix; \
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
#include "Jacobi"
|
||||
#include "Householder"
|
||||
#include "LU"
|
||||
#include "Geometry"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
@@ -35,6 +36,11 @@ namespace Eigen {
|
||||
#include "src/Eigenvalues/ComplexSchur.h"
|
||||
#include "src/Eigenvalues/ComplexEigenSolver.h"
|
||||
#include "src/Eigenvalues/MatrixBaseEigenvalues.h"
|
||||
#ifdef EIGEN_USE_LAPACKE
|
||||
#include "src/Eigenvalues/RealSchur_MKL.h"
|
||||
#include "src/Eigenvalues/ComplexSchur_MKL.h"
|
||||
#include "src/Eigenvalues/SelfAdjointEigenSolver_MKL.h"
|
||||
#endif
|
||||
|
||||
} // namespace Eigen
|
||||
|
||||
|
||||
37
Eigen/IterativeLinearSolvers
Normal file
37
Eigen/IterativeLinearSolvers
Normal file
@@ -0,0 +1,37 @@
|
||||
#ifndef EIGEN_ITERATIVELINEARSOLVERS_MODULE_H
|
||||
#define EIGEN_ITERATIVELINEARSOLVERS_MODULE_H
|
||||
|
||||
#include "SparseCore"
|
||||
|
||||
#include "src/Core/util/DisableStupidWarnings.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \ingroup Sparse_modules
|
||||
* \defgroup IterativeLinearSolvers_Module IterativeLinearSolvers module
|
||||
*
|
||||
* This module currently provides iterative methods to solve problems of the form \c A \c x = \c b, where \c A is a squared matrix, usually very large and sparse.
|
||||
* Those solvers are accessible via the following classes:
|
||||
* - ConjugateGradient for selfadjoint (hermitian) matrices,
|
||||
* - BiCGSTAB for general square matrices.
|
||||
*
|
||||
* Such problems can also be solved using the direct sparse decomposition modules: SparseCholesky, CholmodSupport, UmfPackSupport, SuperLUSupport.
|
||||
*
|
||||
* \code
|
||||
* #include <Eigen/IterativeLinearSolvers>
|
||||
* \endcode
|
||||
*/
|
||||
|
||||
#include "src/misc/Solve.h"
|
||||
#include "src/misc/SparseSolve.h"
|
||||
|
||||
#include "src/IterativeLinearSolvers/IterativeSolverBase.h"
|
||||
#include "src/IterativeLinearSolvers/BasicPreconditioners.h"
|
||||
#include "src/IterativeLinearSolvers/ConjugateGradient.h"
|
||||
#include "src/IterativeLinearSolvers/BiCGSTAB.h"
|
||||
|
||||
} // namespace Eigen
|
||||
|
||||
#include "src/Core/util/ReenableStupidWarnings.h"
|
||||
|
||||
#endif // EIGEN_ITERATIVELINEARSOLVERS_MODULE_H
|
||||
3
Eigen/LU
3
Eigen/LU
@@ -23,6 +23,9 @@ namespace Eigen {
|
||||
#include "src/misc/Image.h"
|
||||
#include "src/LU/FullPivLU.h"
|
||||
#include "src/LU/PartialPivLU.h"
|
||||
#ifdef EIGEN_USE_LAPACKE
|
||||
#include "src/LU/PartialPivLU_MKL.h"
|
||||
#endif
|
||||
#include "src/LU/Determinant.h"
|
||||
#include "src/LU/Inverse.h"
|
||||
|
||||
|
||||
27
Eigen/OrderingMethods
Normal file
27
Eigen/OrderingMethods
Normal file
@@ -0,0 +1,27 @@
|
||||
#ifndef EIGEN_ORDERINGMETHODS_MODULE_H
|
||||
#define EIGEN_ORDERINGMETHODS_MODULE_H
|
||||
|
||||
#include "SparseCore"
|
||||
|
||||
#include "src/Core/util/DisableStupidWarnings.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \ingroup Sparse_modules
|
||||
* \defgroup OrderingMethods_Module OrderingMethods module
|
||||
*
|
||||
* This module is currently for internal use only.
|
||||
*
|
||||
*
|
||||
* \code
|
||||
* #include <Eigen/OrderingMethods>
|
||||
* \endcode
|
||||
*/
|
||||
|
||||
#include "src/OrderingMethods/Amd.h"
|
||||
|
||||
} // namespace Eigen
|
||||
|
||||
#include "src/Core/util/ReenableStupidWarnings.h"
|
||||
|
||||
#endif // EIGEN_ORDERINGMETHODS_MODULE_H
|
||||
30
Eigen/PARDISOSupport
Normal file
30
Eigen/PARDISOSupport
Normal file
@@ -0,0 +1,30 @@
|
||||
#ifndef EIGEN_PARDISOSUPPORT_MODULE_H
|
||||
#define EIGEN_PARDISOSUPPORT_MODULE_H
|
||||
|
||||
#include "SparseCore"
|
||||
|
||||
#include "src/Core/util/DisableStupidWarnings.h"
|
||||
|
||||
#include <mkl_pardiso.h>
|
||||
|
||||
#include <unsupported/Eigen/SparseExtra>
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \ingroup Support_modules
|
||||
* \defgroup PARDISOSupport_Module PARDISOSupport module
|
||||
*
|
||||
* This module brings support for the Intel(R) MKL PARDISO direct sparse solvers
|
||||
*
|
||||
* \code
|
||||
* #include <Eigen/PARDISOSupport>
|
||||
* \endcode
|
||||
*/
|
||||
|
||||
#include "src/PARDISOSupport/PARDISOSupport.h"
|
||||
|
||||
} // namespace Eigen
|
||||
|
||||
#include "src/Core/util/ReenableStupidWarnings.h"
|
||||
|
||||
#endif // EIGEN_PARDISOSUPPORT_MODULE_H
|
||||
4
Eigen/QR
4
Eigen/QR
@@ -28,6 +28,10 @@ namespace Eigen {
|
||||
#include "src/QR/HouseholderQR.h"
|
||||
#include "src/QR/FullPivHouseholderQR.h"
|
||||
#include "src/QR/ColPivHouseholderQR.h"
|
||||
#ifdef EIGEN_USE_LAPACKE
|
||||
#include "src/QR/HouseholderQR_MKL.h"
|
||||
#include "src/QR/ColPivHouseholderQR_MKL.h"
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
#include "src/Eigen2Support/QR.h"
|
||||
|
||||
@@ -13,9 +13,9 @@ namespace Eigen {
|
||||
*
|
||||
*
|
||||
*
|
||||
* This module provides SVD decomposition for (currently) real matrices.
|
||||
* This module provides SVD decomposition for matrices (both real and complex).
|
||||
* This decomposition is accessible via the following MatrixBase method:
|
||||
* - MatrixBase::svd()
|
||||
* - MatrixBase::jacobiSvd()
|
||||
*
|
||||
* \code
|
||||
* #include <Eigen/SVD>
|
||||
@@ -24,6 +24,9 @@ namespace Eigen {
|
||||
|
||||
#include "src/misc/Solve.h"
|
||||
#include "src/SVD/JacobiSVD.h"
|
||||
#if defined(EIGEN_USE_LAPACKE) && !defined(EIGEN_USE_LAPACKE_STRICT)
|
||||
#include "src/SVD/JacobiSVD_MKL.h"
|
||||
#endif
|
||||
#include "src/SVD/UpperBidiagonalization.h"
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
|
||||
62
Eigen/Sparse
62
Eigen/Sparse
@@ -1,69 +1,27 @@
|
||||
#ifndef EIGEN_SPARSE_MODULE_H
|
||||
#define EIGEN_SPARSE_MODULE_H
|
||||
|
||||
#include "Core"
|
||||
|
||||
#include "src/Core/util/DisableStupidWarnings.h"
|
||||
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <algorithm>
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
#define EIGEN_YES_I_KNOW_SPARSE_MODULE_IS_NOT_STABLE_YET
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_YES_I_KNOW_SPARSE_MODULE_IS_NOT_STABLE_YET
|
||||
#error The sparse module API is not stable yet. To use it anyway, please define the EIGEN_YES_I_KNOW_SPARSE_MODULE_IS_NOT_STABLE_YET preprocessor token.
|
||||
#endif
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \defgroup Sparse_Module Sparse module
|
||||
/** \defgroup Sparse_modules Sparse modules
|
||||
*
|
||||
*
|
||||
*
|
||||
* See the \ref TutorialSparse "Sparse tutorial"
|
||||
* Meta-module including all related modules:
|
||||
* - SparseCore
|
||||
* - OrderingMethods
|
||||
* - SparseCholesky
|
||||
* - IterativeLinearSolvers
|
||||
*
|
||||
* \code
|
||||
* #include <Eigen/Sparse>
|
||||
* \endcode
|
||||
*/
|
||||
|
||||
/** The type used to identify a general sparse storage. */
|
||||
struct Sparse {};
|
||||
|
||||
#include "src/Sparse/SparseUtil.h"
|
||||
#include "src/Sparse/SparseMatrixBase.h"
|
||||
#include "src/Sparse/CompressedStorage.h"
|
||||
#include "src/Sparse/AmbiVector.h"
|
||||
#include "src/Sparse/SparseMatrix.h"
|
||||
#include "src/Sparse/DynamicSparseMatrix.h"
|
||||
#include "src/Sparse/MappedSparseMatrix.h"
|
||||
#include "src/Sparse/SparseVector.h"
|
||||
#include "src/Sparse/CoreIterators.h"
|
||||
#include "src/Sparse/SparseBlock.h"
|
||||
#include "src/Sparse/SparseTranspose.h"
|
||||
#include "src/Sparse/SparseCwiseUnaryOp.h"
|
||||
#include "src/Sparse/SparseCwiseBinaryOp.h"
|
||||
#include "src/Sparse/SparseDot.h"
|
||||
#include "src/Sparse/SparseAssign.h"
|
||||
#include "src/Sparse/SparseRedux.h"
|
||||
#include "src/Sparse/SparseFuzzy.h"
|
||||
#include "src/Sparse/SparseProduct.h"
|
||||
#include "src/Sparse/SparseSparseProduct.h"
|
||||
#include "src/Sparse/SparseDenseProduct.h"
|
||||
#include "src/Sparse/SparseDiagonalProduct.h"
|
||||
#include "src/Sparse/SparseTriangularView.h"
|
||||
#include "src/Sparse/SparseSelfAdjointView.h"
|
||||
#include "src/Sparse/TriangularSolver.h"
|
||||
#include "src/Sparse/SparseView.h"
|
||||
|
||||
} // namespace Eigen
|
||||
|
||||
#include "src/Core/util/ReenableStupidWarnings.h"
|
||||
#include "SparseCore"
|
||||
#include "OrderingMethods"
|
||||
#include "SparseCholesky"
|
||||
#include "IterativeLinearSolvers"
|
||||
|
||||
#endif // EIGEN_SPARSE_MODULE_H
|
||||
|
||||
|
||||
34
Eigen/SparseCholesky
Normal file
34
Eigen/SparseCholesky
Normal file
@@ -0,0 +1,34 @@
|
||||
#ifndef EIGEN_SPARSECHOLESKY_MODULE_H
|
||||
#define EIGEN_SPARSECHOLESKY_MODULE_H
|
||||
|
||||
#include "SparseCore"
|
||||
|
||||
#include "src/Core/util/DisableStupidWarnings.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \ingroup Sparse_modules
|
||||
* \defgroup SparseCholesky_Module SparseCholesky module
|
||||
*
|
||||
* This module currently provides two variants of the direct sparse Cholesky decomposition for selfadjoint (hermitian) matrices.
|
||||
* Those decompositions are accessible via the following classes:
|
||||
* - SimplicialLLt,
|
||||
* - SimplicialLDLt
|
||||
*
|
||||
* Such problems can also be solved using the ConjugateGradient solver from the IterativeLinearSolvers module.
|
||||
*
|
||||
* \code
|
||||
* #include <Eigen/SparseCholesky>
|
||||
* \endcode
|
||||
*/
|
||||
|
||||
#include "src/misc/Solve.h"
|
||||
#include "src/misc/SparseSolve.h"
|
||||
|
||||
#include "src/SparseCholesky/SimplicialCholesky.h"
|
||||
|
||||
} // namespace Eigen
|
||||
|
||||
#include "src/Core/util/ReenableStupidWarnings.h"
|
||||
|
||||
#endif // EIGEN_SPARSECHOLESKY_MODULE_H
|
||||
65
Eigen/SparseCore
Normal file
65
Eigen/SparseCore
Normal file
@@ -0,0 +1,65 @@
|
||||
#ifndef EIGEN_SPARSECORE_MODULE_H
|
||||
#define EIGEN_SPARSECORE_MODULE_H
|
||||
|
||||
#include "Core"
|
||||
|
||||
#include "src/Core/util/DisableStupidWarnings.h"
|
||||
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <algorithm>
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \ingroup Sparse_modules
|
||||
* \defgroup SparseCore_Module SparseCore module
|
||||
*
|
||||
* This module provides a sparse matrix representation, and basic associatd matrix manipulations
|
||||
* and operations.
|
||||
*
|
||||
* See the \ref TutorialSparse "Sparse tutorial"
|
||||
*
|
||||
* \code
|
||||
* #include <Eigen/SparseCore>
|
||||
* \endcode
|
||||
*
|
||||
* This module depends on: Core.
|
||||
*/
|
||||
|
||||
/** The type used to identify a general sparse storage. */
|
||||
struct Sparse {};
|
||||
|
||||
#include "src/SparseCore/SparseUtil.h"
|
||||
#include "src/SparseCore/SparseMatrixBase.h"
|
||||
#include "src/SparseCore/CompressedStorage.h"
|
||||
#include "src/SparseCore/AmbiVector.h"
|
||||
#include "src/SparseCore/SparseMatrix.h"
|
||||
#include "src/SparseCore/MappedSparseMatrix.h"
|
||||
#include "src/SparseCore/SparseVector.h"
|
||||
#include "src/SparseCore/CoreIterators.h"
|
||||
#include "src/SparseCore/SparseBlock.h"
|
||||
#include "src/SparseCore/SparseTranspose.h"
|
||||
#include "src/SparseCore/SparseCwiseUnaryOp.h"
|
||||
#include "src/SparseCore/SparseCwiseBinaryOp.h"
|
||||
#include "src/SparseCore/SparseDot.h"
|
||||
#include "src/SparseCore/SparseAssign.h"
|
||||
#include "src/SparseCore/SparseRedux.h"
|
||||
#include "src/SparseCore/SparseFuzzy.h"
|
||||
#include "src/SparseCore/ConservativeSparseSparseProduct.h"
|
||||
#include "src/SparseCore/SparseSparseProductWithPruning.h"
|
||||
#include "src/SparseCore/SparseProduct.h"
|
||||
#include "src/SparseCore/SparseDenseProduct.h"
|
||||
#include "src/SparseCore/SparseDiagonalProduct.h"
|
||||
#include "src/SparseCore/SparseTriangularView.h"
|
||||
#include "src/SparseCore/SparseSelfAdjointView.h"
|
||||
#include "src/SparseCore/TriangularSolver.h"
|
||||
#include "src/SparseCore/SparseView.h"
|
||||
|
||||
} // namespace Eigen
|
||||
|
||||
#include "src/Core/util/ReenableStupidWarnings.h"
|
||||
|
||||
#endif // EIGEN_SPARSECORE_MODULE_H
|
||||
|
||||
53
Eigen/SuperLUSupport
Normal file
53
Eigen/SuperLUSupport
Normal file
@@ -0,0 +1,53 @@
|
||||
#ifndef EIGEN_SUPERLUSUPPORT_MODULE_H
|
||||
#define EIGEN_SUPERLUSUPPORT_MODULE_H
|
||||
|
||||
#include "SparseCore"
|
||||
|
||||
#include "src/Core/util/DisableStupidWarnings.h"
|
||||
|
||||
#ifdef EMPTY
|
||||
#define EIGEN_EMPTY_WAS_ALREADY_DEFINED
|
||||
#endif
|
||||
|
||||
typedef int int_t;
|
||||
#include <slu_Cnames.h>
|
||||
#include <supermatrix.h>
|
||||
#include <slu_util.h>
|
||||
|
||||
// slu_util.h defines a preprocessor token named EMPTY which is really polluting,
|
||||
// so we remove it in favor of a SUPERLU_EMPTY token.
|
||||
// If EMPTY was already, defined then we don't undef it.
|
||||
|
||||
#if defined(EIGEN_EMPTY_WAS_ALREADY_DEFINED)
|
||||
# undef EIGEN_EMPTY_WAS_ALREADY_DEFINED
|
||||
#elif defined(EMPTY)
|
||||
# undef EMPTY
|
||||
#endif
|
||||
|
||||
#define SUPERLU_EMPTY (-1)
|
||||
|
||||
namespace Eigen { struct SluMatrix; }
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \ingroup Support_modules
|
||||
* \defgroup SuperLUSupport_Module SuperLUSupport module
|
||||
*
|
||||
* \warning When including this module, you have to use SUPERLU_EMPTY instead of EMPTY which is no longer defined because it is too polluting.
|
||||
*
|
||||
* \code
|
||||
* #include <Eigen/SuperLUSupport>
|
||||
* \endcode
|
||||
*/
|
||||
|
||||
#include "src/misc/Solve.h"
|
||||
#include "src/misc/SparseSolve.h"
|
||||
|
||||
#include "src/SuperLUSupport/SuperLUSupport.h"
|
||||
|
||||
|
||||
} // namespace Eigen
|
||||
|
||||
#include "src/Core/util/ReenableStupidWarnings.h"
|
||||
|
||||
#endif // EIGEN_SUPERLUSUPPORT_MODULE_H
|
||||
34
Eigen/UmfPackSupport
Normal file
34
Eigen/UmfPackSupport
Normal file
@@ -0,0 +1,34 @@
|
||||
#ifndef EIGEN_UMFPACKSUPPORT_MODULE_H
|
||||
#define EIGEN_UMFPACKSUPPORT_MODULE_H
|
||||
|
||||
#include "SparseCore"
|
||||
|
||||
#include "src/Core/util/DisableStupidWarnings.h"
|
||||
|
||||
extern "C" {
|
||||
#include <umfpack.h>
|
||||
}
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \ingroup Support_modules
|
||||
* \defgroup UmfPackSupport_Module UmfPackSupport module
|
||||
*
|
||||
*
|
||||
*
|
||||
*
|
||||
* \code
|
||||
* #include <Eigen/UmfPackSupport>
|
||||
* \endcode
|
||||
*/
|
||||
|
||||
#include "src/misc/Solve.h"
|
||||
#include "src/misc/SparseSolve.h"
|
||||
|
||||
#include "src/UmfPackSupport/UmfPackSupport.h"
|
||||
|
||||
} // namespace Eigen
|
||||
|
||||
#include "src/Core/util/ReenableStupidWarnings.h"
|
||||
|
||||
#endif // EIGEN_UMFPACKSUPPORT_MODULE_H
|
||||
@@ -1,9 +1,10 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2009 Keir Mierle <mierle@gmail.com>
|
||||
// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2011 Timothy E. Holy <tim.holy@gmail.com >
|
||||
//
|
||||
// Eigen is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU Lesser General Public
|
||||
@@ -31,13 +32,15 @@ namespace internal {
|
||||
template<typename MatrixType, int UpLo> struct LDLT_Traits;
|
||||
}
|
||||
|
||||
/** \ingroup cholesky_Module
|
||||
/** \ingroup Cholesky_Module
|
||||
*
|
||||
* \class LDLT
|
||||
*
|
||||
* \brief Robust Cholesky decomposition of a matrix with pivoting
|
||||
*
|
||||
* \param MatrixType the type of the matrix of which to compute the LDL^T Cholesky decomposition
|
||||
* \param UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.
|
||||
* The other triangular part won't be read.
|
||||
*
|
||||
* Perform a robust Cholesky decomposition of a positive semidefinite or negative semidefinite
|
||||
* matrix \f$ A \f$ such that \f$ A = P^TLDL^*P \f$, where P is a permutation matrix, L
|
||||
@@ -48,14 +51,10 @@ template<typename MatrixType, int UpLo> struct LDLT_Traits;
|
||||
* on D also stabilizes the computation.
|
||||
*
|
||||
* Remember that Cholesky decompositions are not rank-revealing. Also, do not use a Cholesky
|
||||
* decomposition to determine whether a system of equations has a solution.
|
||||
* decomposition to determine whether a system of equations has a solution.
|
||||
*
|
||||
* \sa MatrixBase::ldlt(), class LLT
|
||||
*/
|
||||
/* THIS PART OF THE DOX IS CURRENTLY DISABLED BECAUSE INACCURATE BECAUSE OF BUG IN THE DECOMPOSITION CODE
|
||||
* Note that during the decomposition, only the upper triangular part of A is considered. Therefore,
|
||||
* the strict lower part does not have to store correct values.
|
||||
*/
|
||||
template<typename _MatrixType, int _UpLo> class LDLT
|
||||
{
|
||||
public:
|
||||
@@ -98,6 +97,11 @@ template<typename _MatrixType, int _UpLo> class LDLT
|
||||
m_isInitialized(false)
|
||||
{}
|
||||
|
||||
/** \brief Constructor with decomposition
|
||||
*
|
||||
* This calculates the decomposition for the input \a matrix.
|
||||
* \sa LDLT(Index size)
|
||||
*/
|
||||
LDLT(const MatrixType& matrix)
|
||||
: m_matrix(matrix.rows(), matrix.cols()),
|
||||
m_transpositions(matrix.rows()),
|
||||
@@ -107,6 +111,14 @@ template<typename _MatrixType, int _UpLo> class LDLT
|
||||
compute(matrix);
|
||||
}
|
||||
|
||||
/** Clear any existing decomposition
|
||||
* \sa rankUpdate(w,sigma)
|
||||
*/
|
||||
void setZero()
|
||||
{
|
||||
m_isInitialized = false;
|
||||
}
|
||||
|
||||
/** \returns a view of the upper triangular matrix U */
|
||||
inline typename Traits::MatrixU matrixU() const
|
||||
{
|
||||
@@ -130,14 +142,14 @@ template<typename _MatrixType, int _UpLo> class LDLT
|
||||
}
|
||||
|
||||
/** \returns the coefficients of the diagonal matrix D */
|
||||
inline Diagonal<const MatrixType> vectorD(void) const
|
||||
inline Diagonal<const MatrixType> vectorD() const
|
||||
{
|
||||
eigen_assert(m_isInitialized && "LDLT is not initialized.");
|
||||
return m_matrix.diagonal();
|
||||
}
|
||||
|
||||
/** \returns true if the matrix is positive (semidefinite) */
|
||||
inline bool isPositive(void) const
|
||||
inline bool isPositive() const
|
||||
{
|
||||
eigen_assert(m_isInitialized && "LDLT is not initialized.");
|
||||
return m_sign == 1;
|
||||
@@ -196,6 +208,9 @@ template<typename _MatrixType, int _UpLo> class LDLT
|
||||
|
||||
LDLT& compute(const MatrixType& matrix);
|
||||
|
||||
template <typename Derived>
|
||||
LDLT& rankUpdate(const MatrixBase<Derived>& w,RealScalar alpha=1);
|
||||
|
||||
/** \returns the internal LDLT decomposition matrix
|
||||
*
|
||||
* TODO: document the storage layout
|
||||
@@ -211,6 +226,17 @@ template<typename _MatrixType, int _UpLo> class LDLT
|
||||
inline Index rows() const { return m_matrix.rows(); }
|
||||
inline Index cols() const { return m_matrix.cols(); }
|
||||
|
||||
/** \brief Reports whether previous computation was successful.
|
||||
*
|
||||
* \returns \c Success if computation was succesful,
|
||||
* \c NumericalIssue if the matrix.appears to be negative.
|
||||
*/
|
||||
ComputationInfo info() const
|
||||
{
|
||||
eigen_assert(m_isInitialized && "LDLT is not initialized.");
|
||||
return Success;
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
/** \internal
|
||||
@@ -249,7 +275,7 @@ template<> struct ldlt_inplace<Lower>
|
||||
return true;
|
||||
}
|
||||
|
||||
RealScalar cutoff = 0, biggest_in_corner;
|
||||
RealScalar cutoff(0), biggest_in_corner;
|
||||
|
||||
for (Index k = 0; k < size; ++k)
|
||||
{
|
||||
@@ -317,6 +343,61 @@ template<> struct ldlt_inplace<Lower>
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Reference for the algorithm: Davis and Hager, "Multiple Rank
|
||||
// Modifications of a Sparse Cholesky Factorization" (Algorithm 1)
|
||||
// Trivial rearrangements of their computations (Timothy E. Holy)
|
||||
// allow their algorithm to work for rank-1 updates even if the
|
||||
// original matrix is not of full rank.
|
||||
// Here only rank-1 updates are implemented, to reduce the
|
||||
// requirement for intermediate storage and improve accuracy
|
||||
template<typename MatrixType, typename WDerived>
|
||||
static bool updateInPlace(MatrixType& mat, MatrixBase<WDerived>& w, typename MatrixType::RealScalar sigma=1)
|
||||
{
|
||||
using internal::isfinite;
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
typedef typename MatrixType::RealScalar RealScalar;
|
||||
typedef typename MatrixType::Index Index;
|
||||
|
||||
const Index size = mat.rows();
|
||||
eigen_assert(mat.cols() == size && w.size()==size);
|
||||
|
||||
RealScalar alpha = 1;
|
||||
|
||||
// Apply the update
|
||||
for (Index j = 0; j < size; j++)
|
||||
{
|
||||
// Check for termination due to an original decomposition of low-rank
|
||||
if (!isfinite(alpha))
|
||||
break;
|
||||
|
||||
// Update the diagonal terms
|
||||
RealScalar dj = real(mat.coeff(j,j));
|
||||
Scalar wj = w.coeff(j);
|
||||
RealScalar swj2 = sigma*abs2(wj);
|
||||
RealScalar gamma = dj*alpha + swj2;
|
||||
|
||||
mat.coeffRef(j,j) += swj2/alpha;
|
||||
alpha += swj2/dj;
|
||||
|
||||
|
||||
// Update the terms of L
|
||||
Index rs = size-j-1;
|
||||
w.tail(rs) -= wj * mat.col(j).tail(rs);
|
||||
if(gamma != 0)
|
||||
mat.col(j).tail(rs) += (sigma*conj(wj)/gamma)*w.tail(rs);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template<typename MatrixType, typename TranspositionType, typename Workspace, typename WType>
|
||||
static bool update(MatrixType& mat, const TranspositionType& transpositions, Workspace& tmp, const WType& w, typename MatrixType::RealScalar sigma=1)
|
||||
{
|
||||
// Apply the permutation to the input w
|
||||
tmp = transpositions * w;
|
||||
|
||||
return ldlt_inplace<Lower>::updateInPlace(mat,tmp,sigma);
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct ldlt_inplace<Upper>
|
||||
@@ -327,22 +408,29 @@ template<> struct ldlt_inplace<Upper>
|
||||
Transpose<MatrixType> matt(mat);
|
||||
return ldlt_inplace<Lower>::unblocked(matt, transpositions, temp, sign);
|
||||
}
|
||||
|
||||
template<typename MatrixType, typename TranspositionType, typename Workspace, typename WType>
|
||||
static EIGEN_STRONG_INLINE bool update(MatrixType& mat, TranspositionType& transpositions, Workspace& tmp, WType& w, typename MatrixType::RealScalar sigma=1)
|
||||
{
|
||||
Transpose<MatrixType> matt(mat);
|
||||
return ldlt_inplace<Lower>::update(matt, transpositions, tmp, w.conjugate(), sigma);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename MatrixType> struct LDLT_Traits<MatrixType,Lower>
|
||||
{
|
||||
typedef TriangularView<MatrixType, UnitLower> MatrixL;
|
||||
typedef TriangularView<typename MatrixType::AdjointReturnType, UnitUpper> MatrixU;
|
||||
inline static MatrixL getL(const MatrixType& m) { return m; }
|
||||
inline static MatrixU getU(const MatrixType& m) { return m.adjoint(); }
|
||||
typedef TriangularView<const MatrixType, UnitLower> MatrixL;
|
||||
typedef TriangularView<const typename MatrixType::AdjointReturnType, UnitUpper> MatrixU;
|
||||
static inline MatrixL getL(const MatrixType& m) { return m; }
|
||||
static inline MatrixU getU(const MatrixType& m) { return m.adjoint(); }
|
||||
};
|
||||
|
||||
template<typename MatrixType> struct LDLT_Traits<MatrixType,Upper>
|
||||
{
|
||||
typedef TriangularView<typename MatrixType::AdjointReturnType, UnitLower> MatrixL;
|
||||
typedef TriangularView<MatrixType, UnitUpper> MatrixU;
|
||||
inline static MatrixL getL(const MatrixType& m) { return m.adjoint(); }
|
||||
inline static MatrixU getU(const MatrixType& m) { return m; }
|
||||
typedef TriangularView<const typename MatrixType::AdjointReturnType, UnitLower> MatrixL;
|
||||
typedef TriangularView<const MatrixType, UnitUpper> MatrixU;
|
||||
static inline MatrixL getL(const MatrixType& m) { return m.adjoint(); }
|
||||
static inline MatrixU getU(const MatrixType& m) { return m; }
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
@@ -367,6 +455,37 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const MatrixType& a)
|
||||
return *this;
|
||||
}
|
||||
|
||||
/** Update the LDLT decomposition: given A = L D L^T, efficiently compute the decomposition of A + sigma w w^T.
|
||||
* \param w a vector to be incorporated into the decomposition.
|
||||
* \param sigma a scalar, +1 for updates and -1 for "downdates," which correspond to removing previously-added column vectors. Optional; default value is +1.
|
||||
* \sa setZero()
|
||||
*/
|
||||
template<typename MatrixType, int _UpLo>
|
||||
template<typename Derived>
|
||||
LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Derived>& w,typename NumTraits<typename MatrixType::Scalar>::Real sigma)
|
||||
{
|
||||
const Index size = w.rows();
|
||||
if (m_isInitialized)
|
||||
{
|
||||
eigen_assert(m_matrix.rows()==size);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_matrix.resize(size,size);
|
||||
m_matrix.setZero();
|
||||
m_transpositions.resize(size);
|
||||
for (Index i = 0; i < size; i++)
|
||||
m_transpositions.coeffRef(i) = i;
|
||||
m_temporary.resize(size);
|
||||
m_sign = sigma;
|
||||
m_isInitialized = true;
|
||||
}
|
||||
|
||||
internal::ldlt_inplace<UpLo>::update(m_matrix, m_transpositions, m_temporary, w, sigma);
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
namespace internal {
|
||||
template<typename _MatrixType, int _UpLo, typename Rhs>
|
||||
struct solve_retval<LDLT<_MatrixType,_UpLo>, Rhs>
|
||||
|
||||
@@ -29,13 +29,15 @@ namespace internal{
|
||||
template<typename MatrixType, int UpLo> struct LLT_Traits;
|
||||
}
|
||||
|
||||
/** \ingroup cholesky_Module
|
||||
/** \ingroup Cholesky_Module
|
||||
*
|
||||
* \class LLT
|
||||
*
|
||||
* \brief Standard Cholesky decomposition (LL^T) of a matrix and associated features
|
||||
*
|
||||
* \param MatrixType the type of the matrix of which we are computing the LL^T Cholesky decomposition
|
||||
* \param UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.
|
||||
* The other triangular part won't be read.
|
||||
*
|
||||
* This class performs a LL^T Cholesky decomposition of a symmetric, positive definite
|
||||
* matrix A such that A = LL^* = U^*U, where L is lower triangular.
|
||||
@@ -49,6 +51,9 @@ template<typename MatrixType, int UpLo> struct LLT_Traits;
|
||||
* use LDLT instead for the semidefinite case. Also, do not use a Cholesky decomposition to determine whether a system of equations
|
||||
* has a solution.
|
||||
*
|
||||
* Example: \include LLT_example.cpp
|
||||
* Output: \verbinclude LLT_example.out
|
||||
*
|
||||
* \sa MatrixBase::llt(), class LDLT
|
||||
*/
|
||||
/* HEY THIS DOX IS DISABLED BECAUSE THERE's A BUG EITHER HERE OR IN LDLT ABOUT THAT (OR BOTH)
|
||||
@@ -178,6 +183,9 @@ template<typename _MatrixType, int _UpLo> class LLT
|
||||
inline Index rows() const { return m_matrix.rows(); }
|
||||
inline Index cols() const { return m_matrix.cols(); }
|
||||
|
||||
template<typename VectorType>
|
||||
LLT& rankUpdate(const VectorType& vec, const RealScalar& sigma = 1);
|
||||
|
||||
protected:
|
||||
/** \internal
|
||||
* Used to compute and store L
|
||||
@@ -190,16 +198,15 @@ template<typename _MatrixType, int _UpLo> class LLT
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<int UpLo> struct llt_inplace;
|
||||
template<typename Scalar, int UpLo> struct llt_inplace;
|
||||
|
||||
template<> struct llt_inplace<Lower>
|
||||
template<typename Scalar> struct llt_inplace<Scalar, Lower>
|
||||
{
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
template<typename MatrixType>
|
||||
static typename MatrixType::Index unblocked(MatrixType& mat)
|
||||
{
|
||||
typedef typename MatrixType::Index Index;
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
typedef typename MatrixType::RealScalar RealScalar;
|
||||
|
||||
eigen_assert(mat.rows()==mat.cols());
|
||||
const Index size = mat.rows();
|
||||
@@ -254,55 +261,133 @@ template<> struct llt_inplace<Lower>
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
template<typename MatrixType, typename VectorType>
|
||||
static typename MatrixType::Index rankUpdate(MatrixType& mat, const VectorType& vec, const RealScalar& sigma)
|
||||
{
|
||||
typedef typename MatrixType::Index Index;
|
||||
typedef typename MatrixType::ColXpr ColXpr;
|
||||
typedef typename internal::remove_all<ColXpr>::type ColXprCleaned;
|
||||
typedef typename ColXprCleaned::SegmentReturnType ColXprSegment;
|
||||
typedef Matrix<Scalar,Dynamic,1> TempVectorType;
|
||||
typedef typename TempVectorType::SegmentReturnType TempVecSegment;
|
||||
|
||||
int n = mat.cols();
|
||||
eigen_assert(mat.rows()==n && vec.size()==n);
|
||||
|
||||
TempVectorType temp;
|
||||
|
||||
if(sigma>0)
|
||||
{
|
||||
// This version is based on Givens rotations.
|
||||
// It is faster than the other one below, but only works for updates,
|
||||
// i.e., for sigma > 0
|
||||
temp = sqrt(sigma) * vec;
|
||||
|
||||
for(int i=0; i<n; ++i)
|
||||
{
|
||||
JacobiRotation<Scalar> g;
|
||||
g.makeGivens(mat(i,i), -temp(i), &mat(i,i));
|
||||
|
||||
int rs = n-i-1;
|
||||
if(rs>0)
|
||||
{
|
||||
ColXprSegment x(mat.col(i).tail(rs));
|
||||
TempVecSegment y(temp.tail(rs));
|
||||
apply_rotation_in_the_plane(x, y, g);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
temp = vec;
|
||||
RealScalar beta = 1;
|
||||
for(int j=0; j<n; ++j)
|
||||
{
|
||||
RealScalar Ljj = real(mat.coeff(j,j));
|
||||
RealScalar dj = abs2(Ljj);
|
||||
Scalar wj = temp.coeff(j);
|
||||
RealScalar swj2 = sigma*abs2(wj);
|
||||
RealScalar gamma = dj*beta + swj2;
|
||||
|
||||
RealScalar x = dj + swj2/beta;
|
||||
if (x<=RealScalar(0))
|
||||
return j;
|
||||
RealScalar nLjj = sqrt(x);
|
||||
mat.coeffRef(j,j) = nLjj;
|
||||
beta += swj2/dj;
|
||||
|
||||
// Update the terms of L
|
||||
Index rs = n-j-1;
|
||||
if(rs)
|
||||
{
|
||||
temp.tail(rs) -= (wj/Ljj) * mat.col(j).tail(rs);
|
||||
if(gamma != 0)
|
||||
mat.col(j).tail(rs) = (nLjj/Ljj) * mat.col(j).tail(rs) + (nLjj * sigma*conj(wj)/gamma)*temp.tail(rs);
|
||||
}
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct llt_inplace<Upper>
|
||||
template<typename Scalar> struct llt_inplace<Scalar, Upper>
|
||||
{
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
|
||||
template<typename MatrixType>
|
||||
static EIGEN_STRONG_INLINE typename MatrixType::Index unblocked(MatrixType& mat)
|
||||
{
|
||||
Transpose<MatrixType> matt(mat);
|
||||
return llt_inplace<Lower>::unblocked(matt);
|
||||
return llt_inplace<Scalar, Lower>::unblocked(matt);
|
||||
}
|
||||
template<typename MatrixType>
|
||||
static EIGEN_STRONG_INLINE typename MatrixType::Index blocked(MatrixType& mat)
|
||||
{
|
||||
Transpose<MatrixType> matt(mat);
|
||||
return llt_inplace<Lower>::blocked(matt);
|
||||
return llt_inplace<Scalar, Lower>::blocked(matt);
|
||||
}
|
||||
template<typename MatrixType, typename VectorType>
|
||||
static typename MatrixType::Index rankUpdate(MatrixType& mat, const VectorType& vec, const RealScalar& sigma)
|
||||
{
|
||||
Transpose<MatrixType> matt(mat);
|
||||
return llt_inplace<Scalar, Lower>::rankUpdate(matt, vec.conjugate(), sigma);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename MatrixType> struct LLT_Traits<MatrixType,Lower>
|
||||
{
|
||||
typedef TriangularView<MatrixType, Lower> MatrixL;
|
||||
typedef TriangularView<typename MatrixType::AdjointReturnType, Upper> MatrixU;
|
||||
inline static MatrixL getL(const MatrixType& m) { return m; }
|
||||
inline static MatrixU getU(const MatrixType& m) { return m.adjoint(); }
|
||||
typedef TriangularView<const MatrixType, Lower> MatrixL;
|
||||
typedef TriangularView<const typename MatrixType::AdjointReturnType, Upper> MatrixU;
|
||||
static inline MatrixL getL(const MatrixType& m) { return m; }
|
||||
static inline MatrixU getU(const MatrixType& m) { return m.adjoint(); }
|
||||
static bool inplace_decomposition(MatrixType& m)
|
||||
{ return llt_inplace<Lower>::blocked(m)==-1; }
|
||||
{ return llt_inplace<typename MatrixType::Scalar, Lower>::blocked(m)==-1; }
|
||||
};
|
||||
|
||||
template<typename MatrixType> struct LLT_Traits<MatrixType,Upper>
|
||||
{
|
||||
typedef TriangularView<typename MatrixType::AdjointReturnType, Lower> MatrixL;
|
||||
typedef TriangularView<MatrixType, Upper> MatrixU;
|
||||
inline static MatrixL getL(const MatrixType& m) { return m.adjoint(); }
|
||||
inline static MatrixU getU(const MatrixType& m) { return m; }
|
||||
typedef TriangularView<const typename MatrixType::AdjointReturnType, Lower> MatrixL;
|
||||
typedef TriangularView<const MatrixType, Upper> MatrixU;
|
||||
static inline MatrixL getL(const MatrixType& m) { return m.adjoint(); }
|
||||
static inline MatrixU getU(const MatrixType& m) { return m; }
|
||||
static bool inplace_decomposition(MatrixType& m)
|
||||
{ return llt_inplace<Upper>::blocked(m)==-1; }
|
||||
{ return llt_inplace<typename MatrixType::Scalar, Upper>::blocked(m)==-1; }
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/** Computes / recomputes the Cholesky decomposition A = LL^* = U^*U of \a matrix
|
||||
*
|
||||
*
|
||||
* \returns a reference to *this
|
||||
*
|
||||
* Example: \include TutorialLinAlgComputeTwice.cpp
|
||||
* Output: \verbinclude TutorialLinAlgComputeTwice.out
|
||||
*/
|
||||
template<typename MatrixType, int _UpLo>
|
||||
LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const MatrixType& a)
|
||||
{
|
||||
assert(a.rows()==a.cols());
|
||||
eigen_assert(a.rows()==a.cols());
|
||||
const Index size = a.rows();
|
||||
m_matrix.resize(size, size);
|
||||
m_matrix = a;
|
||||
@@ -314,6 +399,26 @@ LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const MatrixType& a)
|
||||
return *this;
|
||||
}
|
||||
|
||||
/** Performs a rank one update (or dowdate) of the current decomposition.
|
||||
* If A = LL^* before the rank one update,
|
||||
* then after it we have LL^* = A + sigma * v v^* where \a v must be a vector
|
||||
* of same dimension.
|
||||
*/
|
||||
template<typename MatrixType, int _UpLo>
|
||||
template<typename VectorType>
|
||||
LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::rankUpdate(const VectorType& v, const RealScalar& sigma)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorType);
|
||||
eigen_assert(v.size()==m_matrix.cols());
|
||||
eigen_assert(m_isInitialized);
|
||||
if(internal::llt_inplace<typename MatrixType::Scalar, UpLo>::rankUpdate(m_matrix,v,sigma)>=0)
|
||||
m_info = NumericalIssue;
|
||||
else
|
||||
m_info = Success;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
namespace internal {
|
||||
template<typename _MatrixType, int UpLo, typename Rhs>
|
||||
struct solve_retval<LLT<_MatrixType, UpLo>, Rhs>
|
||||
@@ -384,3 +489,4 @@ SelfAdjointView<MatrixType, UpLo>::llt() const
|
||||
}
|
||||
|
||||
#endif // EIGEN_LLT_H
|
||||
|
||||
|
||||
123
Eigen/src/Cholesky/LLT_MKL.h
Normal file
123
Eigen/src/Cholesky/LLT_MKL.h
Normal file
@@ -0,0 +1,123 @@
|
||||
/*
|
||||
Copyright (c) 2011, Intel Corporation. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software without
|
||||
specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
********************************************************************************
|
||||
* Content : Eigen bindings to Intel(R) MKL
|
||||
* LLt decomposition based on LAPACKE_?potrf function.
|
||||
********************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef EIGEN_LLT_MKL_H
|
||||
#define EIGEN_LLT_MKL_H
|
||||
|
||||
#include "Eigen/src/Core/util/MKL_support.h"
|
||||
#include <iostream>
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename Scalar> struct mkl_llt;
|
||||
|
||||
#define EIGEN_MKL_LLT(EIGTYPE, MKLTYPE, MKLPREFIX) \
|
||||
template<> struct mkl_llt<EIGTYPE> \
|
||||
{ \
|
||||
template<typename MatrixType> \
|
||||
static inline typename MatrixType::Index potrf(MatrixType& m, char uplo) \
|
||||
{ \
|
||||
lapack_int matrix_order; \
|
||||
lapack_int size, lda, info, StorageOrder; \
|
||||
EIGTYPE* a; \
|
||||
eigen_assert(m.rows()==m.cols()); \
|
||||
/* Set up parameters for ?potrf */ \
|
||||
size = m.rows(); \
|
||||
StorageOrder = MatrixType::Flags&RowMajorBit?RowMajor:ColMajor; \
|
||||
matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \
|
||||
a = &(m.coeffRef(0,0)); \
|
||||
lda = m.outerStride(); \
|
||||
\
|
||||
info = LAPACKE_##MKLPREFIX##potrf( matrix_order, uplo, size, (MKLTYPE*)a, lda ); \
|
||||
info = (info==0) ? Success : NumericalIssue; \
|
||||
return info; \
|
||||
} \
|
||||
}; \
|
||||
template<> struct llt_inplace<EIGTYPE, Lower> \
|
||||
{ \
|
||||
template<typename MatrixType> \
|
||||
static typename MatrixType::Index blocked(MatrixType& m) \
|
||||
{ \
|
||||
return mkl_llt<EIGTYPE>::potrf(m, 'L'); \
|
||||
} \
|
||||
template<typename MatrixType, typename VectorType> \
|
||||
static void rankUpdate(MatrixType& mat, const VectorType& vec) \
|
||||
{ \
|
||||
typedef typename MatrixType::ColXpr ColXpr; \
|
||||
typedef typename internal::remove_all<ColXpr>::type ColXprCleaned; \
|
||||
typedef typename ColXprCleaned::SegmentReturnType ColXprSegment; \
|
||||
typedef typename MatrixType::Scalar Scalar; \
|
||||
typedef Matrix<Scalar,Dynamic,1> TempVectorType; \
|
||||
typedef typename TempVectorType::SegmentReturnType TempVecSegment; \
|
||||
\
|
||||
int n = mat.cols(); \
|
||||
eigen_assert(mat.rows()==n && vec.size()==n); \
|
||||
TempVectorType temp(vec); \
|
||||
\
|
||||
for(int i=0; i<n; ++i) \
|
||||
{ \
|
||||
JacobiRotation<Scalar> g; \
|
||||
g.makeGivens(mat(i,i), -temp(i), &mat(i,i)); \
|
||||
\
|
||||
int rs = n-i-1; \
|
||||
if(rs>0) \
|
||||
{ \
|
||||
ColXprSegment x(mat.col(i).tail(rs)); \
|
||||
TempVecSegment y(temp.tail(rs)); \
|
||||
apply_rotation_in_the_plane(x, y, g); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
}; \
|
||||
template<> struct llt_inplace<EIGTYPE, Upper> \
|
||||
{ \
|
||||
template<typename MatrixType> \
|
||||
static typename MatrixType::Index blocked(MatrixType& m) \
|
||||
{ \
|
||||
return mkl_llt<EIGTYPE>::potrf(m, 'U'); \
|
||||
} \
|
||||
template<typename MatrixType, typename VectorType> \
|
||||
static void rankUpdate(MatrixType& mat, const VectorType& vec) \
|
||||
{ \
|
||||
Transpose<MatrixType> matt(mat); \
|
||||
return llt_inplace<EIGTYPE, Lower>::rankUpdate(matt, vec.conjugate()); \
|
||||
} \
|
||||
};
|
||||
|
||||
EIGEN_MKL_LLT(double, double, d)
|
||||
EIGEN_MKL_LLT(float, float, s)
|
||||
EIGEN_MKL_LLT(dcomplex, MKL_Complex16, z)
|
||||
EIGEN_MKL_LLT(scomplex, MKL_Complex8, c)
|
||||
|
||||
}
|
||||
|
||||
#endif // EIGEN_LLT_MKL_H
|
||||
6
Eigen/src/CholmodSupport/CMakeLists.txt
Normal file
6
Eigen/src/CholmodSupport/CMakeLists.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
FILE(GLOB Eigen_CholmodSupport_SRCS "*.h")
|
||||
|
||||
INSTALL(FILES
|
||||
${Eigen_CholmodSupport_SRCS}
|
||||
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/CholmodSupport COMPONENT Devel
|
||||
)
|
||||
@@ -69,11 +69,20 @@ cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_Index>& mat)
|
||||
res.nzmax = mat.nonZeros();
|
||||
res.nrow = mat.rows();;
|
||||
res.ncol = mat.cols();
|
||||
res.p = mat._outerIndexPtr();
|
||||
res.i = mat._innerIndexPtr();
|
||||
res.x = mat._valuePtr();
|
||||
res.p = mat.outerIndexPtr();
|
||||
res.i = mat.innerIndexPtr();
|
||||
res.x = mat.valuePtr();
|
||||
res.sorted = 1;
|
||||
res.packed = 1;
|
||||
if(mat.isCompressed())
|
||||
{
|
||||
res.packed = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
res.packed = 0;
|
||||
res.nz = mat.innerNonZeroPtr();
|
||||
}
|
||||
|
||||
res.dtype = 0;
|
||||
res.stype = -1;
|
||||
|
||||
@@ -149,7 +158,9 @@ enum CholmodMode {
|
||||
CholmodAuto, CholmodSimplicialLLt, CholmodSupernodalLLt, CholmodLDLt
|
||||
};
|
||||
|
||||
/** \brief A Cholesky factorization and solver based on Cholmod
|
||||
/** \ingroup CholmodSupport_Module
|
||||
* \class CholmodDecomposition
|
||||
* \brief A Cholesky factorization and solver based on Cholmod
|
||||
*
|
||||
* This class allows to solve for A.X = B sparse linear problems via a LL^T or LDL^T Cholesky factorization
|
||||
* using the Cholmod library. The sparse matrix A must be selfajoint and positive definite. The vectors or matrices
|
||||
@@ -159,6 +170,9 @@ enum CholmodMode {
|
||||
* \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
|
||||
* or Upper. Default is Lower.
|
||||
*
|
||||
* This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; isCompressed() or unisCompressed().
|
||||
*
|
||||
* \sa \ref TutorialSparseDirectSolvers
|
||||
*/
|
||||
template<typename _MatrixType, int _UpLo = Lower>
|
||||
class CholmodDecomposition
|
||||
@@ -68,10 +68,8 @@ class Array
|
||||
friend struct internal::conservative_resize_like_impl;
|
||||
|
||||
using Base::m_storage;
|
||||
|
||||
public:
|
||||
enum { NeedsToAlign = (!(Options&DontAlign))
|
||||
&& SizeAtCompileTime!=Dynamic && ((static_cast<int>(sizeof(Scalar))*SizeAtCompileTime)%16)==0 };
|
||||
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
|
||||
|
||||
using Base::base;
|
||||
using Base::coeff;
|
||||
|
||||
@@ -159,7 +159,7 @@ template<typename Derived> class ArrayBase
|
||||
/** \returns an \link MatrixBase Matrix \endlink expression of this array
|
||||
* \sa MatrixBase::array() */
|
||||
MatrixWrapper<Derived> matrix() { return derived(); }
|
||||
const MatrixWrapper<Derived> matrix() const { return derived(); }
|
||||
const MatrixWrapper<const Derived> matrix() const { return derived(); }
|
||||
|
||||
// template<typename Dest>
|
||||
// inline void evalTo(Dest& dst) const { dst = matrix(); }
|
||||
@@ -174,10 +174,10 @@ template<typename Derived> class ArrayBase
|
||||
protected:
|
||||
// mixing arrays and matrices is not legal
|
||||
template<typename OtherDerived> Derived& operator+=(const MatrixBase<OtherDerived>& )
|
||||
{EIGEN_STATIC_ASSERT(sizeof(typename OtherDerived::Scalar)==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES);}
|
||||
{EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
|
||||
// mixing arrays and matrices is not legal
|
||||
template<typename OtherDerived> Derived& operator-=(const MatrixBase<OtherDerived>& )
|
||||
{EIGEN_STATIC_ASSERT(sizeof(typename OtherDerived::Scalar)==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES);}
|
||||
{EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
|
||||
};
|
||||
|
||||
/** replaces \c *this by \c *this - \a other.
|
||||
|
||||
@@ -61,7 +61,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
|
||||
|
||||
typedef typename internal::nested<ExpressionType>::type NestedExpressionType;
|
||||
|
||||
inline ArrayWrapper(const ExpressionType& matrix) : m_expression(matrix) {}
|
||||
inline ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {}
|
||||
|
||||
inline Index rows() const { return m_expression.rows(); }
|
||||
inline Index cols() const { return m_expression.cols(); }
|
||||
@@ -71,7 +71,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
|
||||
inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
|
||||
inline const Scalar* data() const { return m_expression.data(); }
|
||||
|
||||
inline const CoeffReturnType coeff(Index row, Index col) const
|
||||
inline CoeffReturnType coeff(Index row, Index col) const
|
||||
{
|
||||
return m_expression.coeff(row, col);
|
||||
}
|
||||
@@ -86,7 +86,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
|
||||
return m_expression.const_cast_derived().coeffRef(row, col);
|
||||
}
|
||||
|
||||
inline const CoeffReturnType coeff(Index index) const
|
||||
inline CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
return m_expression.coeff(index);
|
||||
}
|
||||
@@ -128,8 +128,14 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
|
||||
template<typename Dest>
|
||||
inline void evalTo(Dest& dst) const { dst = m_expression; }
|
||||
|
||||
const typename internal::remove_all<NestedExpressionType>::type&
|
||||
nestedExpression() const
|
||||
{
|
||||
return m_expression;
|
||||
}
|
||||
|
||||
protected:
|
||||
const NestedExpressionType m_expression;
|
||||
NestedExpressionType m_expression;
|
||||
};
|
||||
|
||||
/** \class MatrixWrapper
|
||||
@@ -168,7 +174,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
|
||||
|
||||
typedef typename internal::nested<ExpressionType>::type NestedExpressionType;
|
||||
|
||||
inline MatrixWrapper(const ExpressionType& matrix) : m_expression(matrix) {}
|
||||
inline MatrixWrapper(ExpressionType& matrix) : m_expression(matrix) {}
|
||||
|
||||
inline Index rows() const { return m_expression.rows(); }
|
||||
inline Index cols() const { return m_expression.cols(); }
|
||||
@@ -178,7 +184,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
|
||||
inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
|
||||
inline const Scalar* data() const { return m_expression.data(); }
|
||||
|
||||
inline const CoeffReturnType coeff(Index row, Index col) const
|
||||
inline CoeffReturnType coeff(Index row, Index col) const
|
||||
{
|
||||
return m_expression.coeff(row, col);
|
||||
}
|
||||
@@ -193,7 +199,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
|
||||
return m_expression.derived().coeffRef(row, col);
|
||||
}
|
||||
|
||||
inline const CoeffReturnType coeff(Index index) const
|
||||
inline CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
return m_expression.coeff(index);
|
||||
}
|
||||
@@ -232,8 +238,14 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
|
||||
m_expression.const_cast_derived().template writePacket<LoadMode>(index, x);
|
||||
}
|
||||
|
||||
const typename internal::remove_all<NestedExpressionType>::type&
|
||||
nestedExpression() const
|
||||
{
|
||||
return m_expression;
|
||||
}
|
||||
|
||||
protected:
|
||||
const NestedExpressionType m_expression;
|
||||
NestedExpressionType m_expression;
|
||||
};
|
||||
|
||||
#endif // EIGEN_ARRAYWRAPPER_H
|
||||
|
||||
@@ -152,7 +152,7 @@ struct assign_DefaultTraversal_CompleteUnrolling
|
||||
inner = Index % Derived1::InnerSizeAtCompileTime
|
||||
};
|
||||
|
||||
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
|
||||
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
dst.copyCoeffByOuterInner(outer, inner, src);
|
||||
assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src);
|
||||
@@ -162,13 +162,13 @@ struct assign_DefaultTraversal_CompleteUnrolling
|
||||
template<typename Derived1, typename Derived2, int Stop>
|
||||
struct assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &) {}
|
||||
static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {}
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2, int Index, int Stop>
|
||||
struct assign_DefaultTraversal_InnerUnrolling
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src, int outer)
|
||||
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, int outer)
|
||||
{
|
||||
dst.copyCoeffByOuterInner(outer, Index, src);
|
||||
assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src, outer);
|
||||
@@ -178,7 +178,7 @@ struct assign_DefaultTraversal_InnerUnrolling
|
||||
template<typename Derived1, typename Derived2, int Stop>
|
||||
struct assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Stop, Stop>
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &, int) {}
|
||||
static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, int) {}
|
||||
};
|
||||
|
||||
/***********************
|
||||
@@ -188,7 +188,7 @@ struct assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Stop, Stop>
|
||||
template<typename Derived1, typename Derived2, int Index, int Stop>
|
||||
struct assign_LinearTraversal_CompleteUnrolling
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
|
||||
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
dst.copyCoeff(Index, src);
|
||||
assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src);
|
||||
@@ -198,7 +198,7 @@ struct assign_LinearTraversal_CompleteUnrolling
|
||||
template<typename Derived1, typename Derived2, int Stop>
|
||||
struct assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &) {}
|
||||
static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {}
|
||||
};
|
||||
|
||||
/**************************
|
||||
@@ -214,7 +214,7 @@ struct assign_innervec_CompleteUnrolling
|
||||
JointAlignment = assign_traits<Derived1,Derived2>::JointAlignment
|
||||
};
|
||||
|
||||
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
|
||||
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
dst.template copyPacketByOuterInner<Derived2, Aligned, JointAlignment>(outer, inner, src);
|
||||
assign_innervec_CompleteUnrolling<Derived1, Derived2,
|
||||
@@ -225,13 +225,13 @@ struct assign_innervec_CompleteUnrolling
|
||||
template<typename Derived1, typename Derived2, int Stop>
|
||||
struct assign_innervec_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &) {}
|
||||
static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {}
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2, int Index, int Stop>
|
||||
struct assign_innervec_InnerUnrolling
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src, int outer)
|
||||
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, int outer)
|
||||
{
|
||||
dst.template copyPacketByOuterInner<Derived2, Aligned, Aligned>(outer, Index, src);
|
||||
assign_innervec_InnerUnrolling<Derived1, Derived2,
|
||||
@@ -242,7 +242,7 @@ struct assign_innervec_InnerUnrolling
|
||||
template<typename Derived1, typename Derived2, int Stop>
|
||||
struct assign_innervec_InnerUnrolling<Derived1, Derived2, Stop, Stop>
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &, int) {}
|
||||
static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, int) {}
|
||||
};
|
||||
|
||||
/***************************************************************************
|
||||
@@ -251,24 +251,25 @@ struct assign_innervec_InnerUnrolling<Derived1, Derived2, Stop, Stop>
|
||||
|
||||
template<typename Derived1, typename Derived2,
|
||||
int Traversal = assign_traits<Derived1, Derived2>::Traversal,
|
||||
int Unrolling = assign_traits<Derived1, Derived2>::Unrolling>
|
||||
int Unrolling = assign_traits<Derived1, Derived2>::Unrolling,
|
||||
int Version = Specialized>
|
||||
struct assign_impl;
|
||||
|
||||
/************************
|
||||
*** Default traversal ***
|
||||
************************/
|
||||
|
||||
template<typename Derived1, typename Derived2, int Unrolling>
|
||||
struct assign_impl<Derived1, Derived2, InvalidTraversal, Unrolling>
|
||||
template<typename Derived1, typename Derived2, int Unrolling, int Version>
|
||||
struct assign_impl<Derived1, Derived2, InvalidTraversal, Unrolling, Version>
|
||||
{
|
||||
inline static void run(Derived1 &, const Derived2 &) { }
|
||||
static inline void run(Derived1 &, const Derived2 &) { }
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling>
|
||||
template<typename Derived1, typename Derived2, int Version>
|
||||
struct assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling, Version>
|
||||
{
|
||||
typedef typename Derived1::Index Index;
|
||||
inline static void run(Derived1 &dst, const Derived2 &src)
|
||||
static inline void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
const Index innerSize = dst.innerSize();
|
||||
const Index outerSize = dst.outerSize();
|
||||
@@ -278,21 +279,21 @@ struct assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling>
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct assign_impl<Derived1, Derived2, DefaultTraversal, CompleteUnrolling>
|
||||
template<typename Derived1, typename Derived2, int Version>
|
||||
struct assign_impl<Derived1, Derived2, DefaultTraversal, CompleteUnrolling, Version>
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
|
||||
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
|
||||
::run(dst, src);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct assign_impl<Derived1, Derived2, DefaultTraversal, InnerUnrolling>
|
||||
template<typename Derived1, typename Derived2, int Version>
|
||||
struct assign_impl<Derived1, Derived2, DefaultTraversal, InnerUnrolling, Version>
|
||||
{
|
||||
typedef typename Derived1::Index Index;
|
||||
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
|
||||
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
const Index outerSize = dst.outerSize();
|
||||
for(Index outer = 0; outer < outerSize; ++outer)
|
||||
@@ -305,11 +306,11 @@ struct assign_impl<Derived1, Derived2, DefaultTraversal, InnerUnrolling>
|
||||
*** Linear traversal ***
|
||||
***********************/
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct assign_impl<Derived1, Derived2, LinearTraversal, NoUnrolling>
|
||||
template<typename Derived1, typename Derived2, int Version>
|
||||
struct assign_impl<Derived1, Derived2, LinearTraversal, NoUnrolling, Version>
|
||||
{
|
||||
typedef typename Derived1::Index Index;
|
||||
inline static void run(Derived1 &dst, const Derived2 &src)
|
||||
static inline void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
const Index size = dst.size();
|
||||
for(Index i = 0; i < size; ++i)
|
||||
@@ -317,10 +318,10 @@ struct assign_impl<Derived1, Derived2, LinearTraversal, NoUnrolling>
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct assign_impl<Derived1, Derived2, LinearTraversal, CompleteUnrolling>
|
||||
template<typename Derived1, typename Derived2, int Version>
|
||||
struct assign_impl<Derived1, Derived2, LinearTraversal, CompleteUnrolling, Version>
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
|
||||
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
|
||||
::run(dst, src);
|
||||
@@ -331,11 +332,11 @@ struct assign_impl<Derived1, Derived2, LinearTraversal, CompleteUnrolling>
|
||||
*** Inner vectorization ***
|
||||
**************************/
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, NoUnrolling>
|
||||
template<typename Derived1, typename Derived2, int Version>
|
||||
struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, NoUnrolling, Version>
|
||||
{
|
||||
typedef typename Derived1::Index Index;
|
||||
inline static void run(Derived1 &dst, const Derived2 &src)
|
||||
static inline void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
const Index innerSize = dst.innerSize();
|
||||
const Index outerSize = dst.outerSize();
|
||||
@@ -346,21 +347,21 @@ struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, NoUnrolling>
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, CompleteUnrolling>
|
||||
template<typename Derived1, typename Derived2, int Version>
|
||||
struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, CompleteUnrolling, Version>
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
|
||||
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
assign_innervec_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
|
||||
::run(dst, src);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, InnerUnrolling>
|
||||
template<typename Derived1, typename Derived2, int Version>
|
||||
struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, InnerUnrolling, Version>
|
||||
{
|
||||
typedef typename Derived1::Index Index;
|
||||
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
|
||||
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
const Index outerSize = dst.outerSize();
|
||||
for(Index outer = 0; outer < outerSize; ++outer)
|
||||
@@ -398,11 +399,11 @@ struct unaligned_assign_impl<false>
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling>
|
||||
template<typename Derived1, typename Derived2, int Version>
|
||||
struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling, Version>
|
||||
{
|
||||
typedef typename Derived1::Index Index;
|
||||
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
|
||||
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
const Index size = dst.size();
|
||||
typedef packet_traits<typename Derived1::Scalar> PacketTraits;
|
||||
@@ -412,7 +413,7 @@ struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling>
|
||||
srcAlignment = assign_traits<Derived1,Derived2>::JointAlignment
|
||||
};
|
||||
const Index alignedStart = assign_traits<Derived1,Derived2>::DstIsAligned ? 0
|
||||
: first_aligned(&dst.coeffRef(0), size);
|
||||
: internal::first_aligned(&dst.coeffRef(0), size);
|
||||
const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
|
||||
|
||||
unaligned_assign_impl<assign_traits<Derived1,Derived2>::DstIsAligned!=0>::run(src,dst,0,alignedStart);
|
||||
@@ -426,11 +427,11 @@ struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling>
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, CompleteUnrolling>
|
||||
template<typename Derived1, typename Derived2, int Version>
|
||||
struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, CompleteUnrolling, Version>
|
||||
{
|
||||
typedef typename Derived1::Index Index;
|
||||
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
|
||||
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
enum { size = Derived1::SizeAtCompileTime,
|
||||
packetSize = packet_traits<typename Derived1::Scalar>::size,
|
||||
@@ -445,11 +446,11 @@ struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, CompleteUnroll
|
||||
*** Slice vectorization ***
|
||||
***************************/
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling>
|
||||
template<typename Derived1, typename Derived2, int Version>
|
||||
struct assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling, Version>
|
||||
{
|
||||
typedef typename Derived1::Index Index;
|
||||
inline static void run(Derived1 &dst, const Derived2 &src)
|
||||
static inline void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
typedef packet_traits<typename Derived1::Scalar> PacketTraits;
|
||||
enum {
|
||||
@@ -463,7 +464,7 @@ struct assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling>
|
||||
const Index outerSize = dst.outerSize();
|
||||
const Index alignedStep = alignable ? (packetSize - dst.outerStride() % packetSize) & packetAlignedMask : 0;
|
||||
Index alignedStart = ((!alignable) || assign_traits<Derived1,Derived2>::DstIsAligned) ? 0
|
||||
: first_aligned(&dst.coeffRef(0,0), innerSize);
|
||||
: internal::first_aligned(&dst.coeffRef(0,0), innerSize);
|
||||
|
||||
for(Index outer = 0; outer < outerSize; ++outer)
|
||||
{
|
||||
@@ -531,19 +532,19 @@ struct assign_selector;
|
||||
|
||||
template<typename Derived, typename OtherDerived>
|
||||
struct assign_selector<Derived,OtherDerived,false,false> {
|
||||
EIGEN_STRONG_INLINE static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.derived()); }
|
||||
static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.derived()); }
|
||||
};
|
||||
template<typename Derived, typename OtherDerived>
|
||||
struct assign_selector<Derived,OtherDerived,true,false> {
|
||||
EIGEN_STRONG_INLINE static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.eval()); }
|
||||
static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.eval()); }
|
||||
};
|
||||
template<typename Derived, typename OtherDerived>
|
||||
struct assign_selector<Derived,OtherDerived,false,true> {
|
||||
EIGEN_STRONG_INLINE static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose()); }
|
||||
static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose()); }
|
||||
};
|
||||
template<typename Derived, typename OtherDerived>
|
||||
struct assign_selector<Derived,OtherDerived,true,true> {
|
||||
EIGEN_STRONG_INLINE static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose().eval()); }
|
||||
static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose().eval()); }
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
217
Eigen/src/Core/Assign_MKL.h
Normal file
217
Eigen/src/Core/Assign_MKL.h
Normal file
@@ -0,0 +1,217 @@
|
||||
/*
|
||||
Copyright (c) 2011, Intel Corporation. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software without
|
||||
specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
********************************************************************************
|
||||
* Content : Eigen bindings to Intel(R) MKL
|
||||
* MKL VML support for coefficient-wise unary Eigen expressions like a=b.sin()
|
||||
********************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef EIGEN_ASSIGN_VML_H
|
||||
#define EIGEN_ASSIGN_VML_H
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename Op> struct vml_call
|
||||
{ enum { IsSupported = 0 }; };
|
||||
|
||||
template<typename Dst, typename Src, typename UnaryOp>
|
||||
class vml_assign_traits
|
||||
{
|
||||
private:
|
||||
enum {
|
||||
DstHasDirectAccess = Dst::Flags & DirectAccessBit,
|
||||
SrcHasDirectAccess = Src::Flags & DirectAccessBit,
|
||||
|
||||
StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)),
|
||||
InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
|
||||
: int(Dst::Flags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
|
||||
: int(Dst::RowsAtCompileTime),
|
||||
InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
|
||||
: int(Dst::Flags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
|
||||
: int(Dst::MaxRowsAtCompileTime),
|
||||
MaxSizeAtCompileTime = Dst::SizeAtCompileTime,
|
||||
|
||||
MightEnableVml = vml_call<UnaryOp>::IsSupported && StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess
|
||||
&& Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1,
|
||||
MightLinearize = MightEnableVml && (int(Dst::Flags) & int(Src::Flags) & LinearAccessBit),
|
||||
VmlSize = MightLinearize ? MaxSizeAtCompileTime : InnerMaxSize,
|
||||
LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD,
|
||||
MayEnableVml = MightEnableVml && LargeEnough,
|
||||
MayLinearize = MayEnableVml && MightLinearize
|
||||
};
|
||||
public:
|
||||
enum {
|
||||
Traversal = MayLinearize ? LinearVectorizedTraversal
|
||||
: MayEnableVml ? InnerVectorizedTraversal
|
||||
: DefaultTraversal
|
||||
};
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling,
|
||||
int VmlTraversal = vml_assign_traits<Derived1, Derived2, UnaryOp>::Traversal >
|
||||
struct vml_assign_impl
|
||||
: assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>
|
||||
{
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling>
|
||||
struct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, InnerVectorizedTraversal>
|
||||
{
|
||||
typedef typename Derived1::Scalar Scalar;
|
||||
typedef typename Derived1::Index Index;
|
||||
static inline void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src)
|
||||
{
|
||||
// in case we want to (or have to) skip VML at runtime we can call:
|
||||
// assign_impl<Derived1,Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>::run(dst,src);
|
||||
const Index innerSize = dst.innerSize();
|
||||
const Index outerSize = dst.outerSize();
|
||||
for(Index outer = 0; outer < outerSize; ++outer) {
|
||||
const Scalar *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) :
|
||||
&(src.nestedExpression().coeffRef(0, outer));
|
||||
Scalar *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer));
|
||||
vml_call<UnaryOp>::run(src.functor(), innerSize, src_ptr, dst_ptr );
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling>
|
||||
struct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, LinearVectorizedTraversal>
|
||||
{
|
||||
static inline void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src)
|
||||
{
|
||||
// in case we want to (or have to) skip VML at runtime we can call:
|
||||
// assign_impl<Derived1,Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>::run(dst,src);
|
||||
vml_call<UnaryOp>::run(src.functor(), dst.size(), src.nestedExpression().data(), dst.data() );
|
||||
}
|
||||
};
|
||||
|
||||
// Macroses
|
||||
|
||||
#define EIGEN_MKL_VML_SPECIALIZE_ASSIGN(TRAVERSAL,UNROLLING) \
|
||||
template<typename Derived1, typename Derived2, typename UnaryOp> \
|
||||
struct assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>, TRAVERSAL, UNROLLING, Specialized> { \
|
||||
static inline void run(Derived1 &dst, const Eigen::CwiseUnaryOp<UnaryOp, Derived2> &src) { \
|
||||
vml_assign_impl<Derived1,Derived2,UnaryOp,TRAVERSAL,UNROLLING>::run(dst, src); \
|
||||
} \
|
||||
};
|
||||
|
||||
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,NoUnrolling)
|
||||
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,CompleteUnrolling)
|
||||
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,InnerUnrolling)
|
||||
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,NoUnrolling)
|
||||
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,CompleteUnrolling)
|
||||
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,NoUnrolling)
|
||||
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,CompleteUnrolling)
|
||||
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,InnerUnrolling)
|
||||
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,CompleteUnrolling)
|
||||
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,NoUnrolling)
|
||||
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(SliceVectorizedTraversal,NoUnrolling)
|
||||
|
||||
|
||||
#if !defined (EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1)
|
||||
#define EIGEN_MKL_VML_MODE VML_HA
|
||||
#else
|
||||
#define EIGEN_MKL_VML_MODE VML_LA
|
||||
#endif
|
||||
|
||||
#define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \
|
||||
template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \
|
||||
enum { IsSupported = 1 }; \
|
||||
static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& func, \
|
||||
int size, const EIGENTYPE* src, EIGENTYPE* dst) { \
|
||||
VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst); \
|
||||
} \
|
||||
};
|
||||
|
||||
#define EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \
|
||||
template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \
|
||||
enum { IsSupported = 1 }; \
|
||||
static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& func, \
|
||||
int size, const EIGENTYPE* src, EIGENTYPE* dst) { \
|
||||
MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \
|
||||
VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst, vmlMode); \
|
||||
} \
|
||||
};
|
||||
|
||||
#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \
|
||||
template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \
|
||||
enum { IsSupported = 1 }; \
|
||||
static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& func, \
|
||||
int size, const EIGENTYPE* src, EIGENTYPE* dst) { \
|
||||
EIGENTYPE exponent = func.m_exponent; \
|
||||
MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \
|
||||
VMLOP(&size, (const VMLTYPE*)src, (const VMLTYPE*)&exponent, \
|
||||
(VMLTYPE*)dst, &vmlMode); \
|
||||
} \
|
||||
};
|
||||
|
||||
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vs##VMLOP, float, float) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vd##VMLOP, double, double)
|
||||
|
||||
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vc##VMLOP, scomplex, MKL_Complex8) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vz##VMLOP, dcomplex, MKL_Complex16)
|
||||
|
||||
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP)
|
||||
|
||||
|
||||
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vms##VMLOP, float, float) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmd##VMLOP, double, double)
|
||||
|
||||
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmc##VMLOP, scomplex, MKL_Complex8) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmz##VMLOP, dcomplex, MKL_Complex16)
|
||||
|
||||
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(EIGENOP, VMLOP) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP)
|
||||
|
||||
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sin, Sin)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(asin, Asin)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(cos, Cos)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(acos, Acos)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(tan, Tan)
|
||||
//EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs, Abs)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(exp, Exp)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(log, Ln)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sqrt, Sqrt)
|
||||
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr)
|
||||
|
||||
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmspowx_, float, float)
|
||||
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdpowx_, double, double)
|
||||
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcpowx_, scomplex, MKL_Complex8)
|
||||
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzpowx_, dcomplex, MKL_Complex16)
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
#endif // EIGEN_ASSIGN_VML_H
|
||||
@@ -94,7 +94,7 @@ struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel, HasDirectAccess>
|
||||
MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % packet_traits<Scalar>::size) == 0)
|
||||
&& (InnerStrideAtCompileTime == 1)
|
||||
? PacketAccessBit : 0,
|
||||
MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && ((OuterStrideAtCompileTime % packet_traits<Scalar>::size) == 0)) ? AlignedBit : 0,
|
||||
MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % 16) == 0)) ? AlignedBit : 0,
|
||||
FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1) ? LinearAccessBit : 0,
|
||||
FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0,
|
||||
FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0,
|
||||
@@ -242,6 +242,21 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
|
||||
inline Index outerStride() const;
|
||||
#endif
|
||||
|
||||
const typename internal::remove_all<typename XprType::Nested>::type& nestedExpression() const
|
||||
{
|
||||
return m_xpr;
|
||||
}
|
||||
|
||||
Index startRow() const
|
||||
{
|
||||
return m_startRow.value();
|
||||
}
|
||||
|
||||
Index startCol() const
|
||||
{
|
||||
return m_startCol.value();
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
const typename XprType::Nested m_xpr;
|
||||
@@ -304,6 +319,11 @@ class Block<XprType,BlockRows,BlockCols, InnerPanel,true>
|
||||
init();
|
||||
}
|
||||
|
||||
const typename internal::remove_all<typename XprType::Nested>::type& nestedExpression() const
|
||||
{
|
||||
return m_xpr;
|
||||
}
|
||||
|
||||
/** \sa MapBase::innerStride() */
|
||||
inline Index innerStride() const
|
||||
{
|
||||
@@ -341,7 +361,7 @@ class Block<XprType,BlockRows,BlockCols, InnerPanel,true>
|
||||
: m_xpr.innerStride();
|
||||
}
|
||||
|
||||
const typename XprType::Nested m_xpr;
|
||||
typename XprType::Nested m_xpr;
|
||||
int m_outerStride;
|
||||
};
|
||||
|
||||
|
||||
@@ -35,7 +35,7 @@ struct all_unroller
|
||||
row = (UnrollCount-1) % Derived::RowsAtCompileTime
|
||||
};
|
||||
|
||||
inline static bool run(const Derived &mat)
|
||||
static inline bool run(const Derived &mat)
|
||||
{
|
||||
return all_unroller<Derived, UnrollCount-1>::run(mat) && mat.coeff(row, col);
|
||||
}
|
||||
@@ -44,13 +44,13 @@ struct all_unroller
|
||||
template<typename Derived>
|
||||
struct all_unroller<Derived, 1>
|
||||
{
|
||||
inline static bool run(const Derived &mat) { return mat.coeff(0, 0); }
|
||||
static inline bool run(const Derived &mat) { return mat.coeff(0, 0); }
|
||||
};
|
||||
|
||||
template<typename Derived>
|
||||
struct all_unroller<Derived, Dynamic>
|
||||
{
|
||||
inline static bool run(const Derived &) { return false; }
|
||||
static inline bool run(const Derived &) { return false; }
|
||||
};
|
||||
|
||||
template<typename Derived, int UnrollCount>
|
||||
@@ -61,7 +61,7 @@ struct any_unroller
|
||||
row = (UnrollCount-1) % Derived::RowsAtCompileTime
|
||||
};
|
||||
|
||||
inline static bool run(const Derived &mat)
|
||||
static inline bool run(const Derived &mat)
|
||||
{
|
||||
return any_unroller<Derived, UnrollCount-1>::run(mat) || mat.coeff(row, col);
|
||||
}
|
||||
@@ -70,13 +70,13 @@ struct any_unroller
|
||||
template<typename Derived>
|
||||
struct any_unroller<Derived, 1>
|
||||
{
|
||||
inline static bool run(const Derived &mat) { return mat.coeff(0, 0); }
|
||||
static inline bool run(const Derived &mat) { return mat.coeff(0, 0); }
|
||||
};
|
||||
|
||||
template<typename Derived>
|
||||
struct any_unroller<Derived, Dynamic>
|
||||
{
|
||||
inline static bool run(const Derived &) { return false; }
|
||||
static inline bool run(const Derived &) { return false; }
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
@@ -167,8 +167,8 @@ class CwiseBinaryOp : internal::no_assignment_operator,
|
||||
const BinaryOp& functor() const { return m_functor; }
|
||||
|
||||
protected:
|
||||
const LhsNested m_lhs;
|
||||
const RhsNested m_rhs;
|
||||
LhsNested m_lhs;
|
||||
RhsNested m_rhs;
|
||||
const BinaryOp m_functor;
|
||||
};
|
||||
|
||||
|
||||
@@ -101,6 +101,9 @@ class CwiseNullaryOp : internal::no_assignment_operator,
|
||||
return m_functor.packetOp(index);
|
||||
}
|
||||
|
||||
/** \returns the functor representing the nullary operation */
|
||||
const NullaryOp& functor() const { return m_functor; }
|
||||
|
||||
protected:
|
||||
const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_rows;
|
||||
const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_cols;
|
||||
|
||||
@@ -95,7 +95,7 @@ class CwiseUnaryOp : internal::no_assignment_operator,
|
||||
nestedExpression() { return m_xpr.const_cast_derived(); }
|
||||
|
||||
protected:
|
||||
const typename XprType::Nested m_xpr;
|
||||
typename XprType::Nested m_xpr;
|
||||
const UnaryOp m_functor;
|
||||
};
|
||||
|
||||
|
||||
@@ -97,7 +97,7 @@ class CwiseUnaryView : internal::no_assignment_operator,
|
||||
|
||||
protected:
|
||||
// FIXME changed from MatrixType::Nested because of a weird compilation error with sun CC
|
||||
const typename internal::nested<MatrixType>::type m_matrix;
|
||||
typename internal::nested<MatrixType>::type m_matrix;
|
||||
ViewOp m_functor;
|
||||
};
|
||||
|
||||
|
||||
@@ -376,12 +376,13 @@ template<typename Derived> class DenseBase
|
||||
inline Derived& operator*=(const Scalar& other);
|
||||
inline Derived& operator/=(const Scalar& other);
|
||||
|
||||
typedef typename internal::add_const_on_value_type<typename internal::eval<Derived>::type>::type EvalReturnType;
|
||||
/** \returns the matrix or vector obtained by evaluating this expression.
|
||||
*
|
||||
* Notice that in the case of a plain matrix or vector (not an expression) this function just returns
|
||||
* a const reference, in order to avoid a useless copy.
|
||||
*/
|
||||
EIGEN_STRONG_INLINE const typename internal::eval<Derived>::type eval() const
|
||||
EIGEN_STRONG_INLINE EvalReturnType eval() const
|
||||
{
|
||||
// Even though MSVC does not honor strong inlining when the return type
|
||||
// is a dynamic matrix, we desperately need strong inlining for fixed
|
||||
|
||||
@@ -710,16 +710,16 @@ namespace internal {
|
||||
template<typename Derived, bool JustReturnZero>
|
||||
struct first_aligned_impl
|
||||
{
|
||||
inline static typename Derived::Index run(const Derived&)
|
||||
static inline typename Derived::Index run(const Derived&)
|
||||
{ return 0; }
|
||||
};
|
||||
|
||||
template<typename Derived>
|
||||
struct first_aligned_impl<Derived, false>
|
||||
{
|
||||
inline static typename Derived::Index run(const Derived& m)
|
||||
static inline typename Derived::Index run(const Derived& m)
|
||||
{
|
||||
return first_aligned(&m.const_cast_derived().coeffRef(0,0), m.size());
|
||||
return internal::first_aligned(&m.const_cast_derived().coeffRef(0,0), m.size());
|
||||
}
|
||||
};
|
||||
|
||||
@@ -729,7 +729,7 @@ struct first_aligned_impl<Derived, false>
|
||||
* documentation.
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline static typename Derived::Index first_aligned(const Derived& m)
|
||||
static inline typename Derived::Index first_aligned(const Derived& m)
|
||||
{
|
||||
return first_aligned_impl
|
||||
<Derived, (Derived::Flags & AlignedBit) || !(Derived::Flags & DirectAccessBit)>
|
||||
|
||||
@@ -104,8 +104,8 @@ template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseSt
|
||||
: m_data(internal::constructor_without_unaligned_array_assert()) {}
|
||||
inline DenseStorage(DenseIndex,DenseIndex,DenseIndex) {}
|
||||
inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); }
|
||||
inline static DenseIndex rows(void) {return _Rows;}
|
||||
inline static DenseIndex cols(void) {return _Cols;}
|
||||
static inline DenseIndex rows(void) {return _Rows;}
|
||||
static inline DenseIndex cols(void) {return _Cols;}
|
||||
inline void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {}
|
||||
inline void resize(DenseIndex,DenseIndex,DenseIndex) {}
|
||||
inline const T *data() const { return m_data.array; }
|
||||
@@ -120,14 +120,24 @@ template<typename T, int _Rows, int _Cols, int _Options> class DenseStorage<T, 0
|
||||
inline DenseStorage(internal::constructor_without_unaligned_array_assert) {}
|
||||
inline DenseStorage(DenseIndex,DenseIndex,DenseIndex) {}
|
||||
inline void swap(DenseStorage& ) {}
|
||||
inline static DenseIndex rows(void) {return _Rows;}
|
||||
inline static DenseIndex cols(void) {return _Cols;}
|
||||
static inline DenseIndex rows(void) {return _Rows;}
|
||||
static inline DenseIndex cols(void) {return _Cols;}
|
||||
inline void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {}
|
||||
inline void resize(DenseIndex,DenseIndex,DenseIndex) {}
|
||||
inline const T *data() const { return 0; }
|
||||
inline T *data() { return 0; }
|
||||
};
|
||||
|
||||
// more specializations for null matrices; these are necessary to resolve ambiguities
|
||||
template<typename T, int _Options> class DenseStorage<T, 0, Dynamic, Dynamic, _Options>
|
||||
: public DenseStorage<T, 0, 0, 0, _Options> { };
|
||||
|
||||
template<typename T, int _Rows, int _Options> class DenseStorage<T, 0, _Rows, Dynamic, _Options>
|
||||
: public DenseStorage<T, 0, 0, 0, _Options> { };
|
||||
|
||||
template<typename T, int _Cols, int _Options> class DenseStorage<T, 0, Dynamic, _Cols, _Options>
|
||||
: public DenseStorage<T, 0, 0, 0, _Options> { };
|
||||
|
||||
// dynamic-size matrix with fixed-size storage
|
||||
template<typename T, int Size, int _Options> class DenseStorage<T, Size, Dynamic, Dynamic, _Options>
|
||||
{
|
||||
@@ -241,7 +251,7 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
|
||||
{ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN }
|
||||
inline ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols); }
|
||||
inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
|
||||
inline static DenseIndex rows(void) {return _Rows;}
|
||||
static inline DenseIndex rows(void) {return _Rows;}
|
||||
inline DenseIndex cols(void) const {return m_cols;}
|
||||
inline void conservativeResize(DenseIndex size, DenseIndex, DenseIndex cols)
|
||||
{
|
||||
@@ -278,7 +288,7 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
|
||||
inline ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows); }
|
||||
inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
|
||||
inline DenseIndex rows(void) const {return m_rows;}
|
||||
inline static DenseIndex cols(void) {return _Cols;}
|
||||
static inline DenseIndex cols(void) {return _Cols;}
|
||||
inline void conservativeResize(DenseIndex size, DenseIndex rows, DenseIndex)
|
||||
{
|
||||
m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*_Cols);
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2007-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// Eigen is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU Lesser General Public
|
||||
@@ -101,6 +102,15 @@ template<typename MatrixType, int DiagIndex> class Diagonal
|
||||
return 0;
|
||||
}
|
||||
|
||||
typedef typename internal::conditional<
|
||||
internal::is_lvalue<MatrixType>::value,
|
||||
Scalar,
|
||||
const Scalar
|
||||
>::type ScalarWithConstIfNotLvalue;
|
||||
|
||||
inline ScalarWithConstIfNotLvalue* data() { return &(m_matrix.const_cast_derived().coeffRef(rowOffset(), colOffset())); }
|
||||
inline const Scalar* data() const { return &(m_matrix.const_cast_derived().coeffRef(rowOffset(), colOffset())); }
|
||||
|
||||
inline Scalar& coeffRef(Index row, Index)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
|
||||
@@ -133,8 +143,19 @@ template<typename MatrixType, int DiagIndex> class Diagonal
|
||||
return m_matrix.coeff(index+rowOffset(), index+colOffset());
|
||||
}
|
||||
|
||||
const typename internal::remove_all<typename MatrixType::Nested>::type&
|
||||
nestedExpression() const
|
||||
{
|
||||
return m_matrix;
|
||||
}
|
||||
|
||||
int index() const
|
||||
{
|
||||
return m_index.value();
|
||||
}
|
||||
|
||||
protected:
|
||||
const typename MatrixType::Nested m_matrix;
|
||||
typename MatrixType::Nested m_matrix;
|
||||
const internal::variable_if_dynamic<Index, DiagIndex> m_index;
|
||||
|
||||
private:
|
||||
|
||||
@@ -72,7 +72,7 @@ class DiagonalBase : public EigenBase<Derived>
|
||||
const DiagonalProduct<MatrixDerived, Derived, OnTheLeft>
|
||||
operator*(const MatrixBase<MatrixDerived> &matrix) const;
|
||||
|
||||
inline const DiagonalWrapper<CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const DiagonalVectorType> >
|
||||
inline const DiagonalWrapper<const CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const DiagonalVectorType> >
|
||||
inverse() const
|
||||
{
|
||||
return diagonal().cwiseInverse();
|
||||
@@ -251,13 +251,13 @@ class DiagonalWrapper
|
||||
#endif
|
||||
|
||||
/** Constructor from expression of diagonal coefficients to wrap. */
|
||||
inline DiagonalWrapper(const DiagonalVectorType& diagonal) : m_diagonal(diagonal) {}
|
||||
inline DiagonalWrapper(DiagonalVectorType& diagonal) : m_diagonal(diagonal) {}
|
||||
|
||||
/** \returns a const reference to the wrapped expression of diagonal coefficients. */
|
||||
const DiagonalVectorType& diagonal() const { return m_diagonal; }
|
||||
|
||||
protected:
|
||||
const typename DiagonalVectorType::Nested m_diagonal;
|
||||
typename DiagonalVectorType::Nested m_diagonal;
|
||||
};
|
||||
|
||||
/** \returns a pseudo-expression of a diagonal matrix with *this as vector of diagonal coefficients
|
||||
|
||||
@@ -107,8 +107,8 @@ class DiagonalProduct : internal::no_assignment_operator,
|
||||
m_diagonal.diagonal().template packet<DiagonalVectorPacketLoadMode>(id));
|
||||
}
|
||||
|
||||
const typename MatrixType::Nested m_matrix;
|
||||
const typename DiagonalType::Nested m_diagonal;
|
||||
typename MatrixType::Nested m_matrix;
|
||||
typename DiagonalType::Nested m_diagonal;
|
||||
};
|
||||
|
||||
/** \returns the diagonal matrix product of \c *this by the diagonal matrix \a diagonal.
|
||||
|
||||
@@ -176,7 +176,7 @@ template<typename Derived, int p>
|
||||
struct lpNorm_selector
|
||||
{
|
||||
typedef typename NumTraits<typename traits<Derived>::Scalar>::Real RealScalar;
|
||||
inline static RealScalar run(const MatrixBase<Derived>& m)
|
||||
static inline RealScalar run(const MatrixBase<Derived>& m)
|
||||
{
|
||||
return pow(m.cwiseAbs().array().pow(p).sum(), RealScalar(1)/p);
|
||||
}
|
||||
@@ -185,7 +185,7 @@ struct lpNorm_selector
|
||||
template<typename Derived>
|
||||
struct lpNorm_selector<Derived, 1>
|
||||
{
|
||||
inline static typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
|
||||
static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
|
||||
{
|
||||
return m.cwiseAbs().sum();
|
||||
}
|
||||
@@ -194,7 +194,7 @@ struct lpNorm_selector<Derived, 1>
|
||||
template<typename Derived>
|
||||
struct lpNorm_selector<Derived, 2>
|
||||
{
|
||||
inline static typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
|
||||
static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
|
||||
{
|
||||
return m.norm();
|
||||
}
|
||||
@@ -203,7 +203,7 @@ struct lpNorm_selector<Derived, 2>
|
||||
template<typename Derived>
|
||||
struct lpNorm_selector<Derived, Infinity>
|
||||
{
|
||||
inline static typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
|
||||
static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
|
||||
{
|
||||
return m.cwiseAbs().maxCoeff();
|
||||
}
|
||||
|
||||
@@ -220,6 +220,38 @@ struct functor_traits<scalar_quotient_op<Scalar> > {
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the and of two booleans
|
||||
*
|
||||
* \sa class CwiseBinaryOp, ArrayBase::operator&&
|
||||
*/
|
||||
struct scalar_boolean_and_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_and_op)
|
||||
EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a && b; }
|
||||
};
|
||||
template<> struct functor_traits<scalar_boolean_and_op> {
|
||||
enum {
|
||||
Cost = NumTraits<bool>::AddCost,
|
||||
PacketAccess = false
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the or of two booleans
|
||||
*
|
||||
* \sa class CwiseBinaryOp, ArrayBase::operator||
|
||||
*/
|
||||
struct scalar_boolean_or_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_or_op)
|
||||
EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a || b; }
|
||||
};
|
||||
template<> struct functor_traits<scalar_boolean_or_op> {
|
||||
enum {
|
||||
Cost = NumTraits<bool>::AddCost,
|
||||
PacketAccess = false
|
||||
};
|
||||
};
|
||||
|
||||
// unary functors:
|
||||
|
||||
/** \internal
|
||||
|
||||
@@ -35,8 +35,8 @@ struct isApprox_selector
|
||||
static bool run(const Derived& x, const OtherDerived& y, typename Derived::RealScalar prec)
|
||||
{
|
||||
using std::min;
|
||||
const typename internal::nested<Derived,2>::type nested(x);
|
||||
const typename internal::nested<OtherDerived,2>::type otherNested(y);
|
||||
typename internal::nested<Derived,2>::type nested(x);
|
||||
typename internal::nested<OtherDerived,2>::type otherNested(y);
|
||||
return (nested - otherNested).cwiseAbs2().sum() <= prec * prec * (min)(nested.cwiseAbs2().sum(), otherNested.cwiseAbs2().sum());
|
||||
}
|
||||
};
|
||||
|
||||
624
Eigen/src/Core/GeneralProduct.h
Normal file
624
Eigen/src/Core/GeneralProduct.h
Normal file
@@ -0,0 +1,624 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// Eigen is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU Lesser General Public
|
||||
// License as published by the Free Software Foundation; either
|
||||
// version 3 of the License, or (at your option) any later version.
|
||||
//
|
||||
// Alternatively, you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License as
|
||||
// published by the Free Software Foundation; either version 2 of
|
||||
// the License, or (at your option) any later version.
|
||||
//
|
||||
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public
|
||||
// License and a copy of the GNU General Public License along with
|
||||
// Eigen. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#ifndef EIGEN_GENERAL_PRODUCT_H
|
||||
#define EIGEN_GENERAL_PRODUCT_H
|
||||
|
||||
/** \class GeneralProduct
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of the product of two general matrices or vectors
|
||||
*
|
||||
* \param LhsNested the type used to store the left-hand side
|
||||
* \param RhsNested the type used to store the right-hand side
|
||||
* \param ProductMode the type of the product
|
||||
*
|
||||
* This class represents an expression of the product of two general matrices.
|
||||
* We call a general matrix, a dense matrix with full storage. For instance,
|
||||
* This excludes triangular, selfadjoint, and sparse matrices.
|
||||
* It is the return type of the operator* between general matrices. Its template
|
||||
* arguments are determined automatically by ProductReturnType. Therefore,
|
||||
* GeneralProduct should never be used direclty. To determine the result type of a
|
||||
* function which involves a matrix product, use ProductReturnType::Type.
|
||||
*
|
||||
* \sa ProductReturnType, MatrixBase::operator*(const MatrixBase<OtherDerived>&)
|
||||
*/
|
||||
template<typename Lhs, typename Rhs, int ProductType = internal::product_type<Lhs,Rhs>::value>
|
||||
class GeneralProduct;
|
||||
|
||||
enum {
|
||||
Large = 2,
|
||||
Small = 3
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<int Rows, int Cols, int Depth> struct product_type_selector;
|
||||
|
||||
template<int Size, int MaxSize> struct product_size_category
|
||||
{
|
||||
enum { is_large = MaxSize == Dynamic ||
|
||||
Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD,
|
||||
value = is_large ? Large
|
||||
: Size == 1 ? 1
|
||||
: Small
|
||||
};
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs> struct product_type
|
||||
{
|
||||
typedef typename remove_all<Lhs>::type _Lhs;
|
||||
typedef typename remove_all<Rhs>::type _Rhs;
|
||||
enum {
|
||||
MaxRows = _Lhs::MaxRowsAtCompileTime,
|
||||
Rows = _Lhs::RowsAtCompileTime,
|
||||
MaxCols = _Rhs::MaxColsAtCompileTime,
|
||||
Cols = _Rhs::ColsAtCompileTime,
|
||||
MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::MaxColsAtCompileTime,
|
||||
_Rhs::MaxRowsAtCompileTime),
|
||||
Depth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::ColsAtCompileTime,
|
||||
_Rhs::RowsAtCompileTime),
|
||||
LargeThreshold = EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
||||
};
|
||||
|
||||
// the splitting into different lines of code here, introducing the _select enums and the typedef below,
|
||||
// is to work around an internal compiler error with gcc 4.1 and 4.2.
|
||||
private:
|
||||
enum {
|
||||
rows_select = product_size_category<Rows,MaxRows>::value,
|
||||
cols_select = product_size_category<Cols,MaxCols>::value,
|
||||
depth_select = product_size_category<Depth,MaxDepth>::value
|
||||
};
|
||||
typedef product_type_selector<rows_select, cols_select, depth_select> selector;
|
||||
|
||||
public:
|
||||
enum {
|
||||
value = selector::ret
|
||||
};
|
||||
#ifdef EIGEN_DEBUG_PRODUCT
|
||||
static void debug()
|
||||
{
|
||||
EIGEN_DEBUG_VAR(Rows);
|
||||
EIGEN_DEBUG_VAR(Cols);
|
||||
EIGEN_DEBUG_VAR(Depth);
|
||||
EIGEN_DEBUG_VAR(rows_select);
|
||||
EIGEN_DEBUG_VAR(cols_select);
|
||||
EIGEN_DEBUG_VAR(depth_select);
|
||||
EIGEN_DEBUG_VAR(value);
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
/* The following allows to select the kind of product at compile time
|
||||
* based on the three dimensions of the product.
|
||||
* This is a compile time mapping from {1,Small,Large}^3 -> {product types} */
|
||||
// FIXME I'm not sure the current mapping is the ideal one.
|
||||
template<int M, int N> struct product_type_selector<M,N,1> { enum { ret = OuterProduct }; };
|
||||
template<int Depth> struct product_type_selector<1, 1, Depth> { enum { ret = InnerProduct }; };
|
||||
template<> struct product_type_selector<1, 1, 1> { enum { ret = InnerProduct }; };
|
||||
template<> struct product_type_selector<Small,1, Small> { enum { ret = CoeffBasedProductMode }; };
|
||||
template<> struct product_type_selector<1, Small,Small> { enum { ret = CoeffBasedProductMode }; };
|
||||
template<> struct product_type_selector<Small,Small,Small> { enum { ret = CoeffBasedProductMode }; };
|
||||
template<> struct product_type_selector<Small, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; };
|
||||
template<> struct product_type_selector<Small, Large, 1> { enum { ret = LazyCoeffBasedProductMode }; };
|
||||
template<> struct product_type_selector<Large, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; };
|
||||
template<> struct product_type_selector<1, Large,Small> { enum { ret = CoeffBasedProductMode }; };
|
||||
template<> struct product_type_selector<1, Large,Large> { enum { ret = GemvProduct }; };
|
||||
template<> struct product_type_selector<1, Small,Large> { enum { ret = CoeffBasedProductMode }; };
|
||||
template<> struct product_type_selector<Large,1, Small> { enum { ret = CoeffBasedProductMode }; };
|
||||
template<> struct product_type_selector<Large,1, Large> { enum { ret = GemvProduct }; };
|
||||
template<> struct product_type_selector<Small,1, Large> { enum { ret = CoeffBasedProductMode }; };
|
||||
template<> struct product_type_selector<Small,Small,Large> { enum { ret = GemmProduct }; };
|
||||
template<> struct product_type_selector<Large,Small,Large> { enum { ret = GemmProduct }; };
|
||||
template<> struct product_type_selector<Small,Large,Large> { enum { ret = GemmProduct }; };
|
||||
template<> struct product_type_selector<Large,Large,Large> { enum { ret = GemmProduct }; };
|
||||
template<> struct product_type_selector<Large,Small,Small> { enum { ret = GemmProduct }; };
|
||||
template<> struct product_type_selector<Small,Large,Small> { enum { ret = GemmProduct }; };
|
||||
template<> struct product_type_selector<Large,Large,Small> { enum { ret = GemmProduct }; };
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/** \class ProductReturnType
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Helper class to get the correct and optimized returned type of operator*
|
||||
*
|
||||
* \param Lhs the type of the left-hand side
|
||||
* \param Rhs the type of the right-hand side
|
||||
* \param ProductMode the type of the product (determined automatically by internal::product_mode)
|
||||
*
|
||||
* This class defines the typename Type representing the optimized product expression
|
||||
* between two matrix expressions. In practice, using ProductReturnType<Lhs,Rhs>::Type
|
||||
* is the recommended way to define the result type of a function returning an expression
|
||||
* which involve a matrix product. The class Product should never be
|
||||
* used directly.
|
||||
*
|
||||
* \sa class Product, MatrixBase::operator*(const MatrixBase<OtherDerived>&)
|
||||
*/
|
||||
template<typename Lhs, typename Rhs, int ProductType>
|
||||
struct ProductReturnType
|
||||
{
|
||||
// TODO use the nested type to reduce instanciations ????
|
||||
// typedef typename internal::nested<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
|
||||
// typedef typename internal::nested<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
|
||||
|
||||
typedef GeneralProduct<Lhs/*Nested*/, Rhs/*Nested*/, ProductType> Type;
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs>
|
||||
struct ProductReturnType<Lhs,Rhs,CoeffBasedProductMode>
|
||||
{
|
||||
typedef typename internal::nested<Lhs, Rhs::ColsAtCompileTime, typename internal::plain_matrix_type<Lhs>::type >::type LhsNested;
|
||||
typedef typename internal::nested<Rhs, Lhs::RowsAtCompileTime, typename internal::plain_matrix_type<Rhs>::type >::type RhsNested;
|
||||
typedef CoeffBasedProduct<LhsNested, RhsNested, EvalBeforeAssigningBit | EvalBeforeNestingBit> Type;
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs>
|
||||
struct ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode>
|
||||
{
|
||||
typedef typename internal::nested<Lhs, Rhs::ColsAtCompileTime, typename internal::plain_matrix_type<Lhs>::type >::type LhsNested;
|
||||
typedef typename internal::nested<Rhs, Lhs::RowsAtCompileTime, typename internal::plain_matrix_type<Rhs>::type >::type RhsNested;
|
||||
typedef CoeffBasedProduct<LhsNested, RhsNested, NestByRefBit> Type;
|
||||
};
|
||||
|
||||
// this is a workaround for sun CC
|
||||
template<typename Lhs, typename Rhs>
|
||||
struct LazyProductReturnType : public ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode>
|
||||
{};
|
||||
|
||||
/***********************************************************************
|
||||
* Implementation of Inner Vector Vector Product
|
||||
***********************************************************************/
|
||||
|
||||
// FIXME : maybe the "inner product" could return a Scalar
|
||||
// instead of a 1x1 matrix ??
|
||||
// Pro: more natural for the user
|
||||
// Cons: this could be a problem if in a meta unrolled algorithm a matrix-matrix
|
||||
// product ends up to a row-vector times col-vector product... To tackle this use
|
||||
// case, we could have a specialization for Block<MatrixType,1,1> with: operator=(Scalar x);
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename Lhs, typename Rhs>
|
||||
struct traits<GeneralProduct<Lhs,Rhs,InnerProduct> >
|
||||
: traits<Matrix<typename scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> >
|
||||
{};
|
||||
|
||||
}
|
||||
|
||||
template<typename Lhs, typename Rhs>
|
||||
class GeneralProduct<Lhs, Rhs, InnerProduct>
|
||||
: internal::no_assignment_operator,
|
||||
public Matrix<typename internal::scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1>
|
||||
{
|
||||
typedef Matrix<typename internal::scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> Base;
|
||||
public:
|
||||
GeneralProduct(const Lhs& lhs, const Rhs& rhs)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
|
||||
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
|
||||
|
||||
Base::coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
|
||||
}
|
||||
|
||||
/** Convertion to scalar */
|
||||
operator const typename Base::Scalar() const {
|
||||
return Base::coeff(0,0);
|
||||
}
|
||||
};
|
||||
|
||||
/***********************************************************************
|
||||
* Implementation of Outer Vector Vector Product
|
||||
***********************************************************************/
|
||||
|
||||
namespace internal {
|
||||
template<int StorageOrder> struct outer_product_selector;
|
||||
|
||||
template<typename Lhs, typename Rhs>
|
||||
struct traits<GeneralProduct<Lhs,Rhs,OuterProduct> >
|
||||
: traits<ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs> >
|
||||
{};
|
||||
|
||||
}
|
||||
|
||||
template<typename Lhs, typename Rhs>
|
||||
class GeneralProduct<Lhs, Rhs, OuterProduct>
|
||||
: public ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs>
|
||||
{
|
||||
public:
|
||||
EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
|
||||
|
||||
GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
|
||||
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
|
||||
}
|
||||
|
||||
template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const
|
||||
{
|
||||
internal::outer_product_selector<(int(Dest::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(*this, dest, alpha);
|
||||
}
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<> struct outer_product_selector<ColMajor> {
|
||||
template<typename ProductType, typename Dest>
|
||||
static EIGEN_DONT_INLINE void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) {
|
||||
typedef typename Dest::Index Index;
|
||||
// FIXME make sure lhs is sequentially stored
|
||||
// FIXME not very good if rhs is real and lhs complex while alpha is real too
|
||||
const Index cols = dest.cols();
|
||||
for (Index j=0; j<cols; ++j)
|
||||
dest.col(j) += (alpha * prod.rhs().coeff(j)) * prod.lhs();
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct outer_product_selector<RowMajor> {
|
||||
template<typename ProductType, typename Dest>
|
||||
static EIGEN_DONT_INLINE void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) {
|
||||
typedef typename Dest::Index Index;
|
||||
// FIXME make sure rhs is sequentially stored
|
||||
// FIXME not very good if lhs is real and rhs complex while alpha is real too
|
||||
const Index rows = dest.rows();
|
||||
for (Index i=0; i<rows; ++i)
|
||||
dest.row(i) += (alpha * prod.lhs().coeff(i)) * prod.rhs();
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/***********************************************************************
|
||||
* Implementation of General Matrix Vector Product
|
||||
***********************************************************************/
|
||||
|
||||
/* According to the shape/flags of the matrix we have to distinghish 3 different cases:
|
||||
* 1 - the matrix is col-major, BLAS compatible and M is large => call fast BLAS-like colmajor routine
|
||||
* 2 - the matrix is row-major, BLAS compatible and N is large => call fast BLAS-like rowmajor routine
|
||||
* 3 - all other cases are handled using a simple loop along the outer-storage direction.
|
||||
* Therefore we need a lower level meta selector.
|
||||
* Furthermore, if the matrix is the rhs, then the product has to be transposed.
|
||||
*/
|
||||
namespace internal {
|
||||
|
||||
template<typename Lhs, typename Rhs>
|
||||
struct traits<GeneralProduct<Lhs,Rhs,GemvProduct> >
|
||||
: traits<ProductBase<GeneralProduct<Lhs,Rhs,GemvProduct>, Lhs, Rhs> >
|
||||
{};
|
||||
|
||||
template<int Side, int StorageOrder, bool BlasCompatible>
|
||||
struct gemv_selector;
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
template<typename Lhs, typename Rhs>
|
||||
class GeneralProduct<Lhs, Rhs, GemvProduct>
|
||||
: public ProductBase<GeneralProduct<Lhs,Rhs,GemvProduct>, Lhs, Rhs>
|
||||
{
|
||||
public:
|
||||
EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
|
||||
|
||||
typedef typename Lhs::Scalar LhsScalar;
|
||||
typedef typename Rhs::Scalar RhsScalar;
|
||||
|
||||
GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
|
||||
{
|
||||
// EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::Scalar, typename Rhs::Scalar>::value),
|
||||
// YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
|
||||
}
|
||||
|
||||
enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
|
||||
typedef typename internal::conditional<int(Side)==OnTheRight,_LhsNested,_RhsNested>::type MatrixType;
|
||||
|
||||
template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
|
||||
{
|
||||
eigen_assert(m_lhs.rows() == dst.rows() && m_rhs.cols() == dst.cols());
|
||||
internal::gemv_selector<Side,(int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
|
||||
bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)>::run(*this, dst, alpha);
|
||||
}
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
|
||||
// The vector is on the left => transposition
|
||||
template<int StorageOrder, bool BlasCompatible>
|
||||
struct gemv_selector<OnTheLeft,StorageOrder,BlasCompatible>
|
||||
{
|
||||
template<typename ProductType, typename Dest>
|
||||
static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
|
||||
{
|
||||
Transpose<Dest> destT(dest);
|
||||
enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor };
|
||||
gemv_selector<OnTheRight,OtherStorageOrder,BlasCompatible>
|
||||
::run(GeneralProduct<Transpose<const typename ProductType::_RhsNested>,Transpose<const typename ProductType::_LhsNested>, GemvProduct>
|
||||
(prod.rhs().transpose(), prod.lhs().transpose()), destT, alpha);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Scalar,int Size,int MaxSize,bool Cond> struct gemv_static_vector_if;
|
||||
|
||||
template<typename Scalar,int Size,int MaxSize>
|
||||
struct gemv_static_vector_if<Scalar,Size,MaxSize,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Scalar* data() { eigen_internal_assert(false && "should never be called"); return 0; }
|
||||
};
|
||||
|
||||
template<typename Scalar,int Size>
|
||||
struct gemv_static_vector_if<Scalar,Size,Dynamic,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Scalar* data() { return 0; }
|
||||
};
|
||||
|
||||
template<typename Scalar,int Size,int MaxSize>
|
||||
struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
|
||||
{
|
||||
#if EIGEN_ALIGN_STATICALLY
|
||||
internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0> m_data;
|
||||
EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
|
||||
#else
|
||||
// Some architectures cannot align on the stack,
|
||||
// => let's manually enforce alignment by allocating more data and return the address of the first aligned element.
|
||||
enum {
|
||||
ForceAlignment = internal::packet_traits<Scalar>::Vectorizable,
|
||||
PacketSize = internal::packet_traits<Scalar>::size
|
||||
};
|
||||
internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?PacketSize:0),0> m_data;
|
||||
EIGEN_STRONG_INLINE Scalar* data() {
|
||||
return ForceAlignment
|
||||
? reinterpret_cast<Scalar*>((reinterpret_cast<size_t>(m_data.array) & ~(size_t(15))) + 16)
|
||||
: m_data.array;
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
template<> struct gemv_selector<OnTheRight,ColMajor,true>
|
||||
{
|
||||
template<typename ProductType, typename Dest>
|
||||
static inline void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
|
||||
{
|
||||
typedef typename ProductType::Index Index;
|
||||
typedef typename ProductType::LhsScalar LhsScalar;
|
||||
typedef typename ProductType::RhsScalar RhsScalar;
|
||||
typedef typename ProductType::Scalar ResScalar;
|
||||
typedef typename ProductType::RealScalar RealScalar;
|
||||
typedef typename ProductType::ActualLhsType ActualLhsType;
|
||||
typedef typename ProductType::ActualRhsType ActualRhsType;
|
||||
typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
|
||||
typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
|
||||
typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
|
||||
|
||||
ActualLhsType actualLhs = LhsBlasTraits::extract(prod.lhs());
|
||||
ActualRhsType actualRhs = RhsBlasTraits::extract(prod.rhs());
|
||||
|
||||
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
|
||||
* RhsBlasTraits::extractScalarFactor(prod.rhs());
|
||||
|
||||
enum {
|
||||
// FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
|
||||
// on, the other hand it is good for the cache to pack the vector anyways...
|
||||
EvalToDestAtCompileTime = Dest::InnerStrideAtCompileTime==1,
|
||||
ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
|
||||
MightCannotUseDest = (Dest::InnerStrideAtCompileTime!=1) || ComplexByReal
|
||||
};
|
||||
|
||||
gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
|
||||
|
||||
bool alphaIsCompatible = (!ComplexByReal) || (imag(actualAlpha)==RealScalar(0));
|
||||
bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
|
||||
|
||||
RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
|
||||
|
||||
ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
|
||||
evalToDest ? dest.data() : static_dest.data());
|
||||
|
||||
if(!evalToDest)
|
||||
{
|
||||
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
int size = dest.size();
|
||||
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
#endif
|
||||
if(!alphaIsCompatible)
|
||||
{
|
||||
MappedDest(actualDestPtr, dest.size()).setZero();
|
||||
compatibleAlpha = RhsScalar(1);
|
||||
}
|
||||
else
|
||||
MappedDest(actualDestPtr, dest.size()) = dest;
|
||||
}
|
||||
|
||||
general_matrix_vector_product
|
||||
<Index,LhsScalar,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run(
|
||||
actualLhs.rows(), actualLhs.cols(),
|
||||
actualLhs.data(), actualLhs.outerStride(),
|
||||
actualRhs.data(), actualRhs.innerStride(),
|
||||
actualDestPtr, 1,
|
||||
compatibleAlpha);
|
||||
|
||||
if (!evalToDest)
|
||||
{
|
||||
if(!alphaIsCompatible)
|
||||
dest += actualAlpha * MappedDest(actualDestPtr, dest.size());
|
||||
else
|
||||
dest = MappedDest(actualDestPtr, dest.size());
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct gemv_selector<OnTheRight,RowMajor,true>
|
||||
{
|
||||
template<typename ProductType, typename Dest>
|
||||
static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
|
||||
{
|
||||
typedef typename ProductType::LhsScalar LhsScalar;
|
||||
typedef typename ProductType::RhsScalar RhsScalar;
|
||||
typedef typename ProductType::Scalar ResScalar;
|
||||
typedef typename ProductType::Index Index;
|
||||
typedef typename ProductType::ActualLhsType ActualLhsType;
|
||||
typedef typename ProductType::ActualRhsType ActualRhsType;
|
||||
typedef typename ProductType::_ActualRhsType _ActualRhsType;
|
||||
typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
|
||||
typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
|
||||
|
||||
typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
|
||||
typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
|
||||
|
||||
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
|
||||
* RhsBlasTraits::extractScalarFactor(prod.rhs());
|
||||
|
||||
enum {
|
||||
// FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
|
||||
// on, the other hand it is good for the cache to pack the vector anyways...
|
||||
DirectlyUseRhs = _ActualRhsType::InnerStrideAtCompileTime==1
|
||||
};
|
||||
|
||||
gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;
|
||||
|
||||
ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(),
|
||||
DirectlyUseRhs ? const_cast<RhsScalar*>(actualRhs.data()) : static_rhs.data());
|
||||
|
||||
if(!DirectlyUseRhs)
|
||||
{
|
||||
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
int size = actualRhs.size();
|
||||
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
#endif
|
||||
Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
|
||||
}
|
||||
|
||||
general_matrix_vector_product
|
||||
<Index,LhsScalar,RowMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run(
|
||||
actualLhs.rows(), actualLhs.cols(),
|
||||
actualLhs.data(), actualLhs.outerStride(),
|
||||
actualRhsPtr, 1,
|
||||
dest.data(), dest.innerStride(),
|
||||
actualAlpha);
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct gemv_selector<OnTheRight,ColMajor,false>
|
||||
{
|
||||
template<typename ProductType, typename Dest>
|
||||
static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
|
||||
{
|
||||
typedef typename Dest::Index Index;
|
||||
// TODO makes sure dest is sequentially stored in memory, otherwise use a temp
|
||||
const Index size = prod.rhs().rows();
|
||||
for(Index k=0; k<size; ++k)
|
||||
dest += (alpha*prod.rhs().coeff(k)) * prod.lhs().col(k);
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct gemv_selector<OnTheRight,RowMajor,false>
|
||||
{
|
||||
template<typename ProductType, typename Dest>
|
||||
static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
|
||||
{
|
||||
typedef typename Dest::Index Index;
|
||||
// TODO makes sure rhs is sequentially stored in memory, otherwise use a temp
|
||||
const Index rows = prod.rows();
|
||||
for(Index i=0; i<rows; ++i)
|
||||
dest.coeffRef(i) += alpha * (prod.lhs().row(i).cwiseProduct(prod.rhs().transpose())).sum();
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/***************************************************************************
|
||||
* Implementation of matrix base methods
|
||||
***************************************************************************/
|
||||
|
||||
/** \returns the matrix product of \c *this and \a other.
|
||||
*
|
||||
* \note If instead of the matrix product you want the coefficient-wise product, see Cwise::operator*().
|
||||
*
|
||||
* \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*()
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
inline const typename ProductReturnType<Derived, OtherDerived>::Type
|
||||
MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
|
||||
{
|
||||
// A note regarding the function declaration: In MSVC, this function will sometimes
|
||||
// not be inlined since DenseStorage is an unwindable object for dynamic
|
||||
// matrices and product types are holding a member to store the result.
|
||||
// Thus it does not help tagging this function with EIGEN_STRONG_INLINE.
|
||||
enum {
|
||||
ProductIsValid = Derived::ColsAtCompileTime==Dynamic
|
||||
|| OtherDerived::RowsAtCompileTime==Dynamic
|
||||
|| int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
|
||||
AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
|
||||
SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
|
||||
};
|
||||
// note to the lost user:
|
||||
// * for a dot product use: v1.dot(v2)
|
||||
// * for a coeff-wise product use: v1.cwiseProduct(v2)
|
||||
EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
|
||||
INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
|
||||
EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
|
||||
INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
|
||||
EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
|
||||
#ifdef EIGEN_DEBUG_PRODUCT
|
||||
internal::product_type<Derived,OtherDerived>::debug();
|
||||
#endif
|
||||
return typename ProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
|
||||
}
|
||||
|
||||
/** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation.
|
||||
*
|
||||
* The returned product will behave like any other expressions: the coefficients of the product will be
|
||||
* computed once at a time as requested. This might be useful in some extremely rare cases when only
|
||||
* a small and no coherent fraction of the result's coefficients have to be computed.
|
||||
*
|
||||
* \warning This version of the matrix product can be much much slower. So use it only if you know
|
||||
* what you are doing and that you measured a true speed improvement.
|
||||
*
|
||||
* \sa operator*(const MatrixBase&)
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
const typename LazyProductReturnType<Derived,OtherDerived>::Type
|
||||
MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const
|
||||
{
|
||||
enum {
|
||||
ProductIsValid = Derived::ColsAtCompileTime==Dynamic
|
||||
|| OtherDerived::RowsAtCompileTime==Dynamic
|
||||
|| int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
|
||||
AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
|
||||
SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
|
||||
};
|
||||
// note to the lost user:
|
||||
// * for a dot product use: v1.dot(v2)
|
||||
// * for a coeff-wise product use: v1.cwiseProduct(v2)
|
||||
EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
|
||||
INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
|
||||
EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
|
||||
INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
|
||||
EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
|
||||
|
||||
return typename LazyProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
|
||||
}
|
||||
|
||||
#endif // EIGEN_PRODUCT_H
|
||||
@@ -312,7 +312,7 @@ template<int Offset,typename PacketType>
|
||||
struct palign_impl
|
||||
{
|
||||
// by default data are aligned, so there is nothing to be done :)
|
||||
inline static void run(PacketType&, const PacketType&) {}
|
||||
static inline void run(PacketType&, const PacketType&) {}
|
||||
};
|
||||
|
||||
/** \internal update \a first using the concatenation of the \a Offset last elements
|
||||
|
||||
@@ -171,7 +171,7 @@ std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat&
|
||||
return s;
|
||||
}
|
||||
|
||||
const typename Derived::Nested m = _m;
|
||||
typename Derived::Nested m = _m;
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
typedef typename Derived::Index Index;
|
||||
|
||||
|
||||
@@ -170,8 +170,8 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
|
||||
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(internal::traits<Derived>::Flags&PacketAccessBit,
|
||||
internal::inner_stride_at_compile_time<Derived>::ret==1),
|
||||
PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1);
|
||||
eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::Flags&AlignedBit, (size_t(m_data) % (sizeof(Scalar)*internal::packet_traits<Scalar>::size)) == 0)
|
||||
&& "data is not aligned");
|
||||
eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::Flags&AlignedBit, (size_t(m_data) % 16) == 0)
|
||||
&& "data is not aligned");
|
||||
}
|
||||
|
||||
PointerType m_data;
|
||||
|
||||
@@ -309,8 +309,7 @@ struct abs2_impl<std::complex<RealScalar> >
|
||||
{
|
||||
static inline RealScalar run(const std::complex<RealScalar>& x)
|
||||
{
|
||||
using std::norm;
|
||||
return norm(x);
|
||||
return real(x)*real(x) + imag(x)*imag(x);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -553,7 +552,7 @@ struct pow_default_impl<Scalar, true>
|
||||
{
|
||||
static inline Scalar run(Scalar x, Scalar y)
|
||||
{
|
||||
Scalar res = 1;
|
||||
Scalar res(1);
|
||||
eigen_assert(!NumTraits<Scalar>::IsSigned || y >= 0);
|
||||
if(y & 1) res *= x;
|
||||
y >>= 1;
|
||||
@@ -838,6 +837,17 @@ template<> struct scalar_fuzzy_impl<bool>
|
||||
|
||||
};
|
||||
|
||||
/****************************************************************************
|
||||
* Special functions *
|
||||
****************************************************************************/
|
||||
|
||||
// std::isfinite is non standard, so let's define our own version,
|
||||
// even though it is not very efficient.
|
||||
template<typename T> bool isfinite(const T& x)
|
||||
{
|
||||
return x<NumTraits<T>::highest() && x>NumTraits<T>::lowest();
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
#endif // EIGEN_MATHFUNCTIONS_H
|
||||
|
||||
@@ -153,10 +153,6 @@ class Matrix
|
||||
|
||||
typedef typename Base::PlainObject PlainObject;
|
||||
|
||||
enum { NeedsToAlign = (!(Options&DontAlign))
|
||||
&& SizeAtCompileTime!=Dynamic && ((static_cast<int>(sizeof(Scalar))*SizeAtCompileTime)%16)==0 };
|
||||
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
|
||||
|
||||
using Base::base;
|
||||
using Base::coeffRef;
|
||||
|
||||
@@ -415,25 +411,6 @@ EIGEN_MAKE_TYPEDEFS_ALL_SIZES(std::complex<double>, cd)
|
||||
|
||||
#undef EIGEN_MAKE_TYPEDEFS_ALL_SIZES
|
||||
#undef EIGEN_MAKE_TYPEDEFS
|
||||
|
||||
#undef EIGEN_MAKE_TYPEDEFS_LARGE
|
||||
|
||||
#define EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, SizeSuffix) \
|
||||
using Eigen::Matrix##SizeSuffix##TypeSuffix; \
|
||||
using Eigen::Vector##SizeSuffix##TypeSuffix; \
|
||||
using Eigen::RowVector##SizeSuffix##TypeSuffix;
|
||||
|
||||
#define EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(TypeSuffix) \
|
||||
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 2) \
|
||||
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 3) \
|
||||
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 4) \
|
||||
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, X) \
|
||||
|
||||
#define EIGEN_USING_MATRIX_TYPEDEFS \
|
||||
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(i) \
|
||||
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(f) \
|
||||
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(d) \
|
||||
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(cf) \
|
||||
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(cd)
|
||||
#undef EIGEN_MAKE_FIXED_TYPEDEFS
|
||||
|
||||
#endif // EIGEN_MATRIX_H
|
||||
|
||||
@@ -250,8 +250,7 @@ template<typename Derived> class MatrixBase
|
||||
|
||||
// huuuge hack. make Eigen2's matrix.part<Diagonal>() work in eigen3. Problem: Diagonal is now a class template instead
|
||||
// of an integer constant. Solution: overload the part() method template wrt template parameters list.
|
||||
// Note: replacing next line by "template<template<typename T, int n> class U>" produces a mysterious error C2082 in MSVC.
|
||||
template<template<typename, int> class U>
|
||||
template<template<typename T, int n> class U>
|
||||
const DiagonalWrapper<ConstDiagonalReturnType> part() const
|
||||
{ return diagonal().asDiagonal(); }
|
||||
#endif // EIGEN2_SUPPORT
|
||||
@@ -331,7 +330,7 @@ template<typename Derived> class MatrixBase
|
||||
/** \returns an \link ArrayBase Array \endlink expression of this matrix
|
||||
* \sa ArrayBase::matrix() */
|
||||
ArrayWrapper<Derived> array() { return derived(); }
|
||||
const ArrayWrapper<Derived> array() const { return derived(); }
|
||||
const ArrayWrapper<const Derived> array() const { return derived(); }
|
||||
|
||||
/////////// LU module ///////////
|
||||
|
||||
@@ -466,6 +465,8 @@ template<typename Derived> class MatrixBase
|
||||
const MatrixFunctionReturnValue<Derived> sinh() const;
|
||||
const MatrixFunctionReturnValue<Derived> cos() const;
|
||||
const MatrixFunctionReturnValue<Derived> sin() const;
|
||||
const MatrixSquareRootReturnValue<Derived> sqrt() const;
|
||||
const MatrixLogarithmReturnValue<Derived> log() const;
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
template<typename ProductDerived, typename Lhs, typename Rhs>
|
||||
@@ -512,10 +513,10 @@ template<typename Derived> class MatrixBase
|
||||
protected:
|
||||
// mixing arrays and matrices is not legal
|
||||
template<typename OtherDerived> Derived& operator+=(const ArrayBase<OtherDerived>& )
|
||||
{EIGEN_STATIC_ASSERT(sizeof(typename OtherDerived::Scalar)==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES);}
|
||||
{EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
|
||||
// mixing arrays and matrices is not legal
|
||||
template<typename OtherDerived> Derived& operator-=(const ArrayBase<OtherDerived>& )
|
||||
{EIGEN_STATIC_ASSERT(sizeof(typename OtherDerived::Scalar)==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES);}
|
||||
{EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
|
||||
};
|
||||
|
||||
#endif // EIGEN_MATRIXBASE_H
|
||||
|
||||
@@ -81,14 +81,14 @@ template<typename T> struct GenericNumTraits
|
||||
>::type NonInteger;
|
||||
typedef T Nested;
|
||||
|
||||
inline static Real epsilon() { return std::numeric_limits<T>::epsilon(); }
|
||||
inline static Real dummy_precision()
|
||||
static inline Real epsilon() { return std::numeric_limits<T>::epsilon(); }
|
||||
static inline Real dummy_precision()
|
||||
{
|
||||
// make sure to override this for floating-point types
|
||||
return Real(0);
|
||||
}
|
||||
inline static T highest() { return (std::numeric_limits<T>::max)(); }
|
||||
inline static T lowest() { return IsInteger ? (std::numeric_limits<T>::min)() : (-(std::numeric_limits<T>::max)()); }
|
||||
static inline T highest() { return (std::numeric_limits<T>::max)(); }
|
||||
static inline T lowest() { return IsInteger ? (std::numeric_limits<T>::min)() : (-(std::numeric_limits<T>::max)()); }
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
enum {
|
||||
@@ -104,12 +104,12 @@ template<typename T> struct NumTraits : GenericNumTraits<T>
|
||||
template<> struct NumTraits<float>
|
||||
: GenericNumTraits<float>
|
||||
{
|
||||
inline static float dummy_precision() { return 1e-5f; }
|
||||
static inline float dummy_precision() { return 1e-5f; }
|
||||
};
|
||||
|
||||
template<> struct NumTraits<double> : GenericNumTraits<double>
|
||||
{
|
||||
inline static double dummy_precision() { return 1e-12; }
|
||||
static inline double dummy_precision() { return 1e-12; }
|
||||
};
|
||||
|
||||
template<> struct NumTraits<long double>
|
||||
@@ -130,8 +130,8 @@ template<typename _Real> struct NumTraits<std::complex<_Real> >
|
||||
MulCost = 4 * NumTraits<Real>::MulCost + 2 * NumTraits<Real>::AddCost
|
||||
};
|
||||
|
||||
inline static Real epsilon() { return NumTraits<Real>::epsilon(); }
|
||||
inline static Real dummy_precision() { return NumTraits<Real>::dummy_precision(); }
|
||||
static inline Real epsilon() { return NumTraits<Real>::epsilon(); }
|
||||
static inline Real dummy_precision() { return NumTraits<Real>::dummy_precision(); }
|
||||
};
|
||||
|
||||
template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
|
||||
|
||||
@@ -511,7 +511,7 @@ class PermutationWrapper : public PermutationBase<PermutationWrapper<_IndicesTyp
|
||||
|
||||
protected:
|
||||
|
||||
const typename IndicesType::Nested m_indices;
|
||||
typename IndicesType::Nested m_indices;
|
||||
};
|
||||
|
||||
/** \returns the matrix with the permutation applied to the columns.
|
||||
@@ -608,7 +608,7 @@ struct permut_matrix_product_retval
|
||||
|
||||
protected:
|
||||
const PermutationType& m_permutation;
|
||||
const typename MatrixType::Nested m_matrix;
|
||||
typename MatrixType::Nested m_matrix;
|
||||
};
|
||||
|
||||
/* Template partial specialization for transposed/inverse permutations */
|
||||
|
||||
@@ -34,13 +34,26 @@
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename Index>
|
||||
EIGEN_ALWAYS_INLINE void check_rows_cols_for_overflow(Index rows, Index cols)
|
||||
{
|
||||
// http://hg.mozilla.org/mozilla-central/file/6c8a909977d3/xpcom/ds/CheckedInt.h#l242
|
||||
// we assume Index is signed
|
||||
Index max_index = (size_t(1) << (8 * sizeof(Index) - 1)) - 1; // assume Index is signed
|
||||
bool error = (rows < 0 || cols < 0) ? true
|
||||
: (rows == 0 || cols == 0) ? false
|
||||
: (rows > max_index / cols);
|
||||
if (error)
|
||||
throw_std_bad_alloc();
|
||||
}
|
||||
|
||||
template <typename Derived, typename OtherDerived = Derived, bool IsVector = static_cast<bool>(Derived::IsVectorAtCompileTime)> struct conservative_resize_like_impl;
|
||||
|
||||
template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers> struct matrix_swap_impl;
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/**
|
||||
/** \class PlainObjectBase
|
||||
* \brief %Dense storage base class for matrices and arrays.
|
||||
*
|
||||
* This class can be extended with the help of the plugin mechanism described on the page
|
||||
@@ -48,8 +61,29 @@ template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers> struct m
|
||||
*
|
||||
* \sa \ref TopicClassHierarchy
|
||||
*/
|
||||
#ifdef EIGEN_PARSED_BY_DOXYGEN
|
||||
namespace internal {
|
||||
|
||||
// this is a warkaround to doxygen not being able to understand the inheritence logic
|
||||
// when it is hidden by the dense_xpr_base helper struct.
|
||||
template<typename Derived> struct dense_xpr_base_dispatcher_for_doxygen;// : public MatrixBase<Derived> {};
|
||||
/** This class is just a workaround for Doxygen and it does not not actually exist. */
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
|
||||
struct dense_xpr_base_dispatcher_for_doxygen<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
|
||||
: public MatrixBase<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > {};
|
||||
/** This class is just a workaround for Doxygen and it does not not actually exist. */
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
|
||||
struct dense_xpr_base_dispatcher_for_doxygen<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
|
||||
: public ArrayBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > {};
|
||||
|
||||
} // namespace internal
|
||||
|
||||
template<typename Derived>
|
||||
class PlainObjectBase : public internal::dense_xpr_base_dispatcher_for_doxygen<Derived>
|
||||
#else
|
||||
template<typename Derived>
|
||||
class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
#endif
|
||||
{
|
||||
public:
|
||||
enum { Options = internal::traits<Derived>::Options };
|
||||
@@ -84,14 +118,12 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
template<typename StrideType> struct StridedConstMapType { typedef Eigen::Map<const Derived, Unaligned, StrideType> type; };
|
||||
template<typename StrideType> struct StridedAlignedMapType { typedef Eigen::Map<Derived, Aligned, StrideType> type; };
|
||||
template<typename StrideType> struct StridedConstAlignedMapType { typedef Eigen::Map<const Derived, Aligned, StrideType> type; };
|
||||
|
||||
|
||||
protected:
|
||||
DenseStorage<Scalar, Base::MaxSizeAtCompileTime, Base::RowsAtCompileTime, Base::ColsAtCompileTime, Options> m_storage;
|
||||
|
||||
public:
|
||||
enum { NeedsToAlign = (!(Options&DontAlign))
|
||||
&& SizeAtCompileTime!=Dynamic && ((static_cast<int>(sizeof(Scalar))*SizeAtCompileTime)%16)==0 };
|
||||
enum { NeedsToAlign = SizeAtCompileTime != Dynamic && (internal::traits<Derived>::Flags & AlignedBit) != 0 };
|
||||
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
|
||||
|
||||
Base& base() { return *static_cast<Base*>(this); }
|
||||
@@ -200,11 +232,13 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
EIGEN_STRONG_INLINE void resize(Index rows, Index cols)
|
||||
{
|
||||
#ifdef EIGEN_INITIALIZE_MATRICES_BY_ZERO
|
||||
internal::check_rows_cols_for_overflow(rows, cols);
|
||||
Index size = rows*cols;
|
||||
bool size_changed = size != this->size();
|
||||
m_storage.resize(size, rows, cols);
|
||||
if(size_changed) EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
|
||||
#else
|
||||
internal::check_rows_cols_for_overflow(rows, cols);
|
||||
m_storage.resize(rows*cols, rows, cols);
|
||||
#endif
|
||||
}
|
||||
@@ -273,6 +307,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
EIGEN_STRONG_INLINE void resizeLike(const EigenBase<OtherDerived>& _other)
|
||||
{
|
||||
const OtherDerived& other = _other.derived();
|
||||
internal::check_rows_cols_for_overflow(other.rows(), other.cols());
|
||||
const Index othersize = other.rows()*other.cols();
|
||||
if(RowsAtCompileTime == 1)
|
||||
{
|
||||
@@ -417,6 +452,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
: m_storage(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols())
|
||||
{
|
||||
_check_template_params();
|
||||
internal::check_rows_cols_for_overflow(other.derived().rows(), other.derived().cols());
|
||||
Base::operator=(other.derived());
|
||||
}
|
||||
|
||||
@@ -428,68 +464,68 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
* \see class Map
|
||||
*/
|
||||
//@{
|
||||
inline static ConstMapType Map(const Scalar* data)
|
||||
static inline ConstMapType Map(const Scalar* data)
|
||||
{ return ConstMapType(data); }
|
||||
inline static MapType Map(Scalar* data)
|
||||
static inline MapType Map(Scalar* data)
|
||||
{ return MapType(data); }
|
||||
inline static ConstMapType Map(const Scalar* data, Index size)
|
||||
static inline ConstMapType Map(const Scalar* data, Index size)
|
||||
{ return ConstMapType(data, size); }
|
||||
inline static MapType Map(Scalar* data, Index size)
|
||||
static inline MapType Map(Scalar* data, Index size)
|
||||
{ return MapType(data, size); }
|
||||
inline static ConstMapType Map(const Scalar* data, Index rows, Index cols)
|
||||
static inline ConstMapType Map(const Scalar* data, Index rows, Index cols)
|
||||
{ return ConstMapType(data, rows, cols); }
|
||||
inline static MapType Map(Scalar* data, Index rows, Index cols)
|
||||
static inline MapType Map(Scalar* data, Index rows, Index cols)
|
||||
{ return MapType(data, rows, cols); }
|
||||
|
||||
inline static ConstAlignedMapType MapAligned(const Scalar* data)
|
||||
static inline ConstAlignedMapType MapAligned(const Scalar* data)
|
||||
{ return ConstAlignedMapType(data); }
|
||||
inline static AlignedMapType MapAligned(Scalar* data)
|
||||
static inline AlignedMapType MapAligned(Scalar* data)
|
||||
{ return AlignedMapType(data); }
|
||||
inline static ConstAlignedMapType MapAligned(const Scalar* data, Index size)
|
||||
static inline ConstAlignedMapType MapAligned(const Scalar* data, Index size)
|
||||
{ return ConstAlignedMapType(data, size); }
|
||||
inline static AlignedMapType MapAligned(Scalar* data, Index size)
|
||||
static inline AlignedMapType MapAligned(Scalar* data, Index size)
|
||||
{ return AlignedMapType(data, size); }
|
||||
inline static ConstAlignedMapType MapAligned(const Scalar* data, Index rows, Index cols)
|
||||
static inline ConstAlignedMapType MapAligned(const Scalar* data, Index rows, Index cols)
|
||||
{ return ConstAlignedMapType(data, rows, cols); }
|
||||
inline static AlignedMapType MapAligned(Scalar* data, Index rows, Index cols)
|
||||
static inline AlignedMapType MapAligned(Scalar* data, Index rows, Index cols)
|
||||
{ return AlignedMapType(data, rows, cols); }
|
||||
|
||||
template<int Outer, int Inner>
|
||||
inline static typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, const Stride<Outer, Inner>& stride)
|
||||
static inline typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, const Stride<Outer, Inner>& stride)
|
||||
{ return typename StridedConstMapType<Stride<Outer, Inner> >::type(data, stride); }
|
||||
template<int Outer, int Inner>
|
||||
inline static typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, const Stride<Outer, Inner>& stride)
|
||||
static inline typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, const Stride<Outer, Inner>& stride)
|
||||
{ return typename StridedMapType<Stride<Outer, Inner> >::type(data, stride); }
|
||||
template<int Outer, int Inner>
|
||||
inline static typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, Index size, const Stride<Outer, Inner>& stride)
|
||||
static inline typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, Index size, const Stride<Outer, Inner>& stride)
|
||||
{ return typename StridedConstMapType<Stride<Outer, Inner> >::type(data, size, stride); }
|
||||
template<int Outer, int Inner>
|
||||
inline static typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, Index size, const Stride<Outer, Inner>& stride)
|
||||
static inline typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, Index size, const Stride<Outer, Inner>& stride)
|
||||
{ return typename StridedMapType<Stride<Outer, Inner> >::type(data, size, stride); }
|
||||
template<int Outer, int Inner>
|
||||
inline static typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
|
||||
static inline typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
|
||||
{ return typename StridedConstMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
|
||||
template<int Outer, int Inner>
|
||||
inline static typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
|
||||
static inline typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
|
||||
{ return typename StridedMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
|
||||
|
||||
template<int Outer, int Inner>
|
||||
inline static typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, const Stride<Outer, Inner>& stride)
|
||||
static inline typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, const Stride<Outer, Inner>& stride)
|
||||
{ return typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type(data, stride); }
|
||||
template<int Outer, int Inner>
|
||||
inline static typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, const Stride<Outer, Inner>& stride)
|
||||
static inline typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, const Stride<Outer, Inner>& stride)
|
||||
{ return typename StridedAlignedMapType<Stride<Outer, Inner> >::type(data, stride); }
|
||||
template<int Outer, int Inner>
|
||||
inline static typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, Index size, const Stride<Outer, Inner>& stride)
|
||||
static inline typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, Index size, const Stride<Outer, Inner>& stride)
|
||||
{ return typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type(data, size, stride); }
|
||||
template<int Outer, int Inner>
|
||||
inline static typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, Index size, const Stride<Outer, Inner>& stride)
|
||||
static inline typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, Index size, const Stride<Outer, Inner>& stride)
|
||||
{ return typename StridedAlignedMapType<Stride<Outer, Inner> >::type(data, size, stride); }
|
||||
template<int Outer, int Inner>
|
||||
inline static typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
|
||||
static inline typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
|
||||
{ return typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
|
||||
template<int Outer, int Inner>
|
||||
inline static typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
|
||||
static inline typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
|
||||
{ return typename StridedAlignedMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
|
||||
//@}
|
||||
|
||||
@@ -579,8 +615,12 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
template<typename T0, typename T1>
|
||||
EIGEN_STRONG_INLINE void _init2(Index rows, Index cols, typename internal::enable_if<Base::SizeAtCompileTime!=2,T0>::type* = 0)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(bool(NumTraits<T0>::IsInteger) &&
|
||||
bool(NumTraits<T1>::IsInteger),
|
||||
FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED)
|
||||
eigen_assert(rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
|
||||
&& cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols));
|
||||
internal::check_rows_cols_for_overflow(rows, cols);
|
||||
m_storage.resize(rows*cols,rows,cols);
|
||||
EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
|
||||
}
|
||||
@@ -607,7 +647,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
|
||||
public:
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
EIGEN_STRONG_INLINE static void _check_template_params()
|
||||
static EIGEN_STRONG_INLINE void _check_template_params()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, (Options&RowMajor)==RowMajor)
|
||||
&& EIGEN_IMPLIES(MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1, (Options&RowMajor)==0)
|
||||
@@ -638,6 +678,7 @@ struct internal::conservative_resize_like_impl
|
||||
if ( ( Derived::IsRowMajor && _this.cols() == cols) || // row-major and we change only the number of rows
|
||||
(!Derived::IsRowMajor && _this.rows() == rows) ) // column-major and we change only the number of columns
|
||||
{
|
||||
internal::check_rows_cols_for_overflow(rows, cols);
|
||||
_this.derived().m_storage.conservativeResize(rows*cols,rows,cols);
|
||||
}
|
||||
else
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// Eigen is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU Lesser General Public
|
||||
@@ -26,600 +25,89 @@
|
||||
#ifndef EIGEN_PRODUCT_H
|
||||
#define EIGEN_PRODUCT_H
|
||||
|
||||
/** \class GeneralProduct
|
||||
template<typename Lhs, typename Rhs> class Product;
|
||||
template<typename Lhs, typename Rhs, typename StorageKind> class ProductImpl;
|
||||
|
||||
/** \class Product
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of the product of two general matrices or vectors
|
||||
* \brief Expression of the product of two arbitrary matrices or vectors
|
||||
*
|
||||
* \param LhsNested the type used to store the left-hand side
|
||||
* \param RhsNested the type used to store the right-hand side
|
||||
* \param ProductMode the type of the product
|
||||
* \param Lhs the type of the left-hand side expression
|
||||
* \param Rhs the type of the right-hand side expression
|
||||
*
|
||||
* This class represents an expression of the product of two general matrices.
|
||||
* We call a general matrix, a dense matrix with full storage. For instance,
|
||||
* This excludes triangular, selfadjoint, and sparse matrices.
|
||||
* It is the return type of the operator* between general matrices. Its template
|
||||
* arguments are determined automatically by ProductReturnType. Therefore,
|
||||
* GeneralProduct should never be used direclty. To determine the result type of a
|
||||
* function which involves a matrix product, use ProductReturnType::Type.
|
||||
* This class represents an expression of the product of two arbitrary matrices.
|
||||
*
|
||||
* \sa ProductReturnType, MatrixBase::operator*(const MatrixBase<OtherDerived>&)
|
||||
*/
|
||||
template<typename Lhs, typename Rhs, int ProductType = internal::product_type<Lhs,Rhs>::value>
|
||||
class GeneralProduct;
|
||||
|
||||
enum {
|
||||
Large = 2,
|
||||
Small = 3
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<int Rows, int Cols, int Depth> struct product_type_selector;
|
||||
|
||||
template<int Size, int MaxSize> struct product_size_category
|
||||
template<typename Lhs, typename Rhs>
|
||||
struct traits<Product<Lhs, Rhs> >
|
||||
{
|
||||
enum { is_large = MaxSize == Dynamic ||
|
||||
Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD,
|
||||
value = is_large ? Large
|
||||
: Size == 1 ? 1
|
||||
: Small
|
||||
typedef MatrixXpr XprKind;
|
||||
typedef typename remove_all<Lhs>::type LhsCleaned;
|
||||
typedef typename remove_all<Rhs>::type RhsCleaned;
|
||||
typedef typename scalar_product_traits<typename traits<LhsCleaned>::Scalar, typename traits<RhsCleaned>::Scalar>::ReturnType Scalar;
|
||||
typedef typename promote_storage_type<typename traits<LhsCleaned>::StorageKind,
|
||||
typename traits<RhsCleaned>::StorageKind>::ret StorageKind;
|
||||
typedef typename promote_index_type<typename traits<LhsCleaned>::Index,
|
||||
typename traits<RhsCleaned>::Index>::type Index;
|
||||
enum {
|
||||
RowsAtCompileTime = LhsCleaned::RowsAtCompileTime,
|
||||
ColsAtCompileTime = RhsCleaned::ColsAtCompileTime,
|
||||
MaxRowsAtCompileTime = LhsCleaned::MaxRowsAtCompileTime,
|
||||
MaxColsAtCompileTime = RhsCleaned::MaxColsAtCompileTime,
|
||||
Flags = (MaxRowsAtCompileTime==1 ? RowMajorBit : 0), // TODO should be no storage order
|
||||
CoeffReadCost = 0 // TODO CoeffReadCost should not be part of the expression traits
|
||||
};
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs> struct product_type
|
||||
{
|
||||
typedef typename remove_all<Lhs>::type _Lhs;
|
||||
typedef typename remove_all<Rhs>::type _Rhs;
|
||||
enum {
|
||||
MaxRows = _Lhs::MaxRowsAtCompileTime,
|
||||
Rows = _Lhs::RowsAtCompileTime,
|
||||
MaxCols = _Rhs::MaxColsAtCompileTime,
|
||||
Cols = _Rhs::ColsAtCompileTime,
|
||||
MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::MaxColsAtCompileTime,
|
||||
_Rhs::MaxRowsAtCompileTime),
|
||||
Depth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::ColsAtCompileTime,
|
||||
_Rhs::RowsAtCompileTime),
|
||||
LargeThreshold = EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
||||
};
|
||||
|
||||
// the splitting into different lines of code here, introducing the _select enums and the typedef below,
|
||||
// is to work around an internal compiler error with gcc 4.1 and 4.2.
|
||||
private:
|
||||
enum {
|
||||
rows_select = product_size_category<Rows,MaxRows>::value,
|
||||
cols_select = product_size_category<Cols,MaxCols>::value,
|
||||
depth_select = product_size_category<Depth,MaxDepth>::value
|
||||
};
|
||||
typedef product_type_selector<rows_select, cols_select, depth_select> selector;
|
||||
|
||||
public:
|
||||
enum {
|
||||
value = selector::ret
|
||||
};
|
||||
#ifdef EIGEN_DEBUG_PRODUCT
|
||||
static void debug()
|
||||
{
|
||||
EIGEN_DEBUG_VAR(Rows);
|
||||
EIGEN_DEBUG_VAR(Cols);
|
||||
EIGEN_DEBUG_VAR(Depth);
|
||||
EIGEN_DEBUG_VAR(rows_select);
|
||||
EIGEN_DEBUG_VAR(cols_select);
|
||||
EIGEN_DEBUG_VAR(depth_select);
|
||||
EIGEN_DEBUG_VAR(value);
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
/* The following allows to select the kind of product at compile time
|
||||
* based on the three dimensions of the product.
|
||||
* This is a compile time mapping from {1,Small,Large}^3 -> {product types} */
|
||||
// FIXME I'm not sure the current mapping is the ideal one.
|
||||
template<int M, int N> struct product_type_selector<M,N,1> { enum { ret = OuterProduct }; };
|
||||
template<int Depth> struct product_type_selector<1, 1, Depth> { enum { ret = InnerProduct }; };
|
||||
template<> struct product_type_selector<1, 1, 1> { enum { ret = InnerProduct }; };
|
||||
template<> struct product_type_selector<Small,1, Small> { enum { ret = CoeffBasedProductMode }; };
|
||||
template<> struct product_type_selector<1, Small,Small> { enum { ret = CoeffBasedProductMode }; };
|
||||
template<> struct product_type_selector<Small,Small,Small> { enum { ret = CoeffBasedProductMode }; };
|
||||
template<> struct product_type_selector<Small, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; };
|
||||
template<> struct product_type_selector<Small, Large, 1> { enum { ret = LazyCoeffBasedProductMode }; };
|
||||
template<> struct product_type_selector<Large, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; };
|
||||
template<> struct product_type_selector<1, Large,Small> { enum { ret = CoeffBasedProductMode }; };
|
||||
template<> struct product_type_selector<1, Large,Large> { enum { ret = GemvProduct }; };
|
||||
template<> struct product_type_selector<1, Small,Large> { enum { ret = CoeffBasedProductMode }; };
|
||||
template<> struct product_type_selector<Large,1, Small> { enum { ret = CoeffBasedProductMode }; };
|
||||
template<> struct product_type_selector<Large,1, Large> { enum { ret = GemvProduct }; };
|
||||
template<> struct product_type_selector<Small,1, Large> { enum { ret = CoeffBasedProductMode }; };
|
||||
template<> struct product_type_selector<Small,Small,Large> { enum { ret = GemmProduct }; };
|
||||
template<> struct product_type_selector<Large,Small,Large> { enum { ret = GemmProduct }; };
|
||||
template<> struct product_type_selector<Small,Large,Large> { enum { ret = GemmProduct }; };
|
||||
template<> struct product_type_selector<Large,Large,Large> { enum { ret = GemmProduct }; };
|
||||
template<> struct product_type_selector<Large,Small,Small> { enum { ret = GemmProduct }; };
|
||||
template<> struct product_type_selector<Small,Large,Small> { enum { ret = GemmProduct }; };
|
||||
template<> struct product_type_selector<Large,Large,Small> { enum { ret = GemmProduct }; };
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/** \class ProductReturnType
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Helper class to get the correct and optimized returned type of operator*
|
||||
*
|
||||
* \param Lhs the type of the left-hand side
|
||||
* \param Rhs the type of the right-hand side
|
||||
* \param ProductMode the type of the product (determined automatically by internal::product_mode)
|
||||
*
|
||||
* This class defines the typename Type representing the optimized product expression
|
||||
* between two matrix expressions. In practice, using ProductReturnType<Lhs,Rhs>::Type
|
||||
* is the recommended way to define the result type of a function returning an expression
|
||||
* which involve a matrix product. The class Product should never be
|
||||
* used directly.
|
||||
*
|
||||
* \sa class Product, MatrixBase::operator*(const MatrixBase<OtherDerived>&)
|
||||
*/
|
||||
template<typename Lhs, typename Rhs, int ProductType>
|
||||
struct ProductReturnType
|
||||
{
|
||||
// TODO use the nested type to reduce instanciations ????
|
||||
// typedef typename internal::nested<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
|
||||
// typedef typename internal::nested<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
|
||||
|
||||
typedef GeneralProduct<Lhs/*Nested*/, Rhs/*Nested*/, ProductType> Type;
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs>
|
||||
struct ProductReturnType<Lhs,Rhs,CoeffBasedProductMode>
|
||||
{
|
||||
typedef typename internal::nested<Lhs, Rhs::ColsAtCompileTime, typename internal::plain_matrix_type<Lhs>::type >::type LhsNested;
|
||||
typedef typename internal::nested<Rhs, Lhs::RowsAtCompileTime, typename internal::plain_matrix_type<Rhs>::type >::type RhsNested;
|
||||
typedef CoeffBasedProduct<LhsNested, RhsNested, EvalBeforeAssigningBit | EvalBeforeNestingBit> Type;
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs>
|
||||
struct ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode>
|
||||
{
|
||||
typedef typename internal::nested<Lhs, Rhs::ColsAtCompileTime, typename internal::plain_matrix_type<Lhs>::type >::type LhsNested;
|
||||
typedef typename internal::nested<Rhs, Lhs::RowsAtCompileTime, typename internal::plain_matrix_type<Rhs>::type >::type RhsNested;
|
||||
typedef CoeffBasedProduct<LhsNested, RhsNested, NestByRefBit> Type;
|
||||
};
|
||||
|
||||
// this is a workaround for sun CC
|
||||
template<typename Lhs, typename Rhs>
|
||||
struct LazyProductReturnType : public ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode>
|
||||
{};
|
||||
|
||||
/***********************************************************************
|
||||
* Implementation of Inner Vector Vector Product
|
||||
***********************************************************************/
|
||||
|
||||
// FIXME : maybe the "inner product" could return a Scalar
|
||||
// instead of a 1x1 matrix ??
|
||||
// Pro: more natural for the user
|
||||
// Cons: this could be a problem if in a meta unrolled algorithm a matrix-matrix
|
||||
// product ends up to a row-vector times col-vector product... To tackle this use
|
||||
// case, we could have a specialization for Block<MatrixType,1,1> with: operator=(Scalar x);
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename Lhs, typename Rhs>
|
||||
struct traits<GeneralProduct<Lhs,Rhs,InnerProduct> >
|
||||
: traits<Matrix<typename scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> >
|
||||
{};
|
||||
|
||||
}
|
||||
|
||||
template<typename Lhs, typename Rhs>
|
||||
class GeneralProduct<Lhs, Rhs, InnerProduct>
|
||||
: internal::no_assignment_operator,
|
||||
public Matrix<typename internal::scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1>
|
||||
{
|
||||
typedef Matrix<typename internal::scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> Base;
|
||||
public:
|
||||
GeneralProduct(const Lhs& lhs, const Rhs& rhs)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
|
||||
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
|
||||
|
||||
Base::coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
|
||||
}
|
||||
|
||||
/** Convertion to scalar */
|
||||
operator const typename Base::Scalar() const {
|
||||
return Base::coeff(0,0);
|
||||
}
|
||||
};
|
||||
|
||||
/***********************************************************************
|
||||
* Implementation of Outer Vector Vector Product
|
||||
***********************************************************************/
|
||||
|
||||
namespace internal {
|
||||
template<int StorageOrder> struct outer_product_selector;
|
||||
|
||||
template<typename Lhs, typename Rhs>
|
||||
struct traits<GeneralProduct<Lhs,Rhs,OuterProduct> >
|
||||
: traits<ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs> >
|
||||
{};
|
||||
|
||||
}
|
||||
|
||||
template<typename Lhs, typename Rhs>
|
||||
class GeneralProduct<Lhs, Rhs, OuterProduct>
|
||||
: public ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs>
|
||||
class Product : public ProductImpl<Lhs,Rhs,typename internal::promote_storage_type<typename internal::traits<Lhs>::StorageKind,
|
||||
typename internal::traits<Rhs>::StorageKind>::ret>
|
||||
{
|
||||
public:
|
||||
EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
|
||||
|
||||
GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
|
||||
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
|
||||
}
|
||||
|
||||
template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const
|
||||
{
|
||||
internal::outer_product_selector<(int(Dest::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(*this, dest, alpha);
|
||||
}
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<> struct outer_product_selector<ColMajor> {
|
||||
template<typename ProductType, typename Dest>
|
||||
static EIGEN_DONT_INLINE void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) {
|
||||
typedef typename Dest::Index Index;
|
||||
// FIXME make sure lhs is sequentially stored
|
||||
// FIXME not very good if rhs is real and lhs complex while alpha is real too
|
||||
const Index cols = dest.cols();
|
||||
for (Index j=0; j<cols; ++j)
|
||||
dest.col(j) += (alpha * prod.rhs().coeff(j)) * prod.lhs();
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct outer_product_selector<RowMajor> {
|
||||
template<typename ProductType, typename Dest>
|
||||
static EIGEN_DONT_INLINE void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) {
|
||||
typedef typename Dest::Index Index;
|
||||
// FIXME make sure rhs is sequentially stored
|
||||
// FIXME not very good if lhs is real and rhs complex while alpha is real too
|
||||
const Index rows = dest.rows();
|
||||
for (Index i=0; i<rows; ++i)
|
||||
dest.row(i) += (alpha * prod.lhs().coeff(i)) * prod.rhs();
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/***********************************************************************
|
||||
* Implementation of General Matrix Vector Product
|
||||
***********************************************************************/
|
||||
|
||||
/* According to the shape/flags of the matrix we have to distinghish 3 different cases:
|
||||
* 1 - the matrix is col-major, BLAS compatible and M is large => call fast BLAS-like colmajor routine
|
||||
* 2 - the matrix is row-major, BLAS compatible and N is large => call fast BLAS-like rowmajor routine
|
||||
* 3 - all other cases are handled using a simple loop along the outer-storage direction.
|
||||
* Therefore we need a lower level meta selector.
|
||||
* Furthermore, if the matrix is the rhs, then the product has to be transposed.
|
||||
*/
|
||||
namespace internal {
|
||||
|
||||
template<typename Lhs, typename Rhs>
|
||||
struct traits<GeneralProduct<Lhs,Rhs,GemvProduct> >
|
||||
: traits<ProductBase<GeneralProduct<Lhs,Rhs,GemvProduct>, Lhs, Rhs> >
|
||||
{};
|
||||
|
||||
template<int Side, int StorageOrder, bool BlasCompatible>
|
||||
struct gemv_selector;
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
template<typename Lhs, typename Rhs>
|
||||
class GeneralProduct<Lhs, Rhs, GemvProduct>
|
||||
: public ProductBase<GeneralProduct<Lhs,Rhs,GemvProduct>, Lhs, Rhs>
|
||||
{
|
||||
public:
|
||||
EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
|
||||
|
||||
typedef typename Lhs::Scalar LhsScalar;
|
||||
typedef typename Rhs::Scalar RhsScalar;
|
||||
|
||||
GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
|
||||
{
|
||||
// EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::Scalar, typename Rhs::Scalar>::value),
|
||||
// YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
|
||||
}
|
||||
|
||||
enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
|
||||
typedef typename internal::conditional<int(Side)==OnTheRight,_LhsNested,_RhsNested>::type MatrixType;
|
||||
|
||||
template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
|
||||
{
|
||||
eigen_assert(m_lhs.rows() == dst.rows() && m_rhs.cols() == dst.cols());
|
||||
internal::gemv_selector<Side,(int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
|
||||
bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)>::run(*this, dst, alpha);
|
||||
}
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
|
||||
// The vector is on the left => transposition
|
||||
template<int StorageOrder, bool BlasCompatible>
|
||||
struct gemv_selector<OnTheLeft,StorageOrder,BlasCompatible>
|
||||
{
|
||||
template<typename ProductType, typename Dest>
|
||||
static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
|
||||
{
|
||||
Transpose<Dest> destT(dest);
|
||||
enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor };
|
||||
gemv_selector<OnTheRight,OtherStorageOrder,BlasCompatible>
|
||||
::run(GeneralProduct<Transpose<const typename ProductType::_RhsNested>,Transpose<const typename ProductType::_LhsNested>, GemvProduct>
|
||||
(prod.rhs().transpose(), prod.lhs().transpose()), destT, alpha);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Scalar,int Size,int MaxSize,bool Cond> struct gemv_static_vector_if;
|
||||
|
||||
template<typename Scalar,int Size,int MaxSize>
|
||||
struct gemv_static_vector_if<Scalar,Size,MaxSize,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Scalar* data() { eigen_internal_assert(false && "should never be called"); return 0; }
|
||||
};
|
||||
|
||||
template<typename Scalar,int Size>
|
||||
struct gemv_static_vector_if<Scalar,Size,Dynamic,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Scalar* data() { return 0; }
|
||||
};
|
||||
|
||||
template<typename Scalar,int Size,int MaxSize>
|
||||
struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
|
||||
{
|
||||
#if EIGEN_ALIGN_STATICALLY
|
||||
internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0> m_data;
|
||||
EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
|
||||
#else
|
||||
// Some architectures cannot align on the stack,
|
||||
// => let's manually enforce alignment by allocating more data and return the address of the first aligned element.
|
||||
enum {
|
||||
ForceAlignment = internal::packet_traits<Scalar>::Vectorizable,
|
||||
PacketSize = internal::packet_traits<Scalar>::size
|
||||
};
|
||||
internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?PacketSize:0),0> m_data;
|
||||
EIGEN_STRONG_INLINE Scalar* data() {
|
||||
return ForceAlignment
|
||||
? reinterpret_cast<Scalar*>((reinterpret_cast<size_t>(m_data.array) & ~(size_t(15))) + 16)
|
||||
: m_data.array;
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
template<> struct gemv_selector<OnTheRight,ColMajor,true>
|
||||
{
|
||||
template<typename ProductType, typename Dest>
|
||||
static inline void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
|
||||
{
|
||||
typedef typename ProductType::Index Index;
|
||||
typedef typename ProductType::LhsScalar LhsScalar;
|
||||
typedef typename ProductType::RhsScalar RhsScalar;
|
||||
typedef typename ProductType::Scalar ResScalar;
|
||||
typedef typename ProductType::RealScalar RealScalar;
|
||||
typedef typename ProductType::ActualLhsType ActualLhsType;
|
||||
typedef typename ProductType::ActualRhsType ActualRhsType;
|
||||
typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
|
||||
typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
|
||||
typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
|
||||
|
||||
const ActualLhsType actualLhs = LhsBlasTraits::extract(prod.lhs());
|
||||
const ActualRhsType actualRhs = RhsBlasTraits::extract(prod.rhs());
|
||||
|
||||
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
|
||||
* RhsBlasTraits::extractScalarFactor(prod.rhs());
|
||||
|
||||
enum {
|
||||
// FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
|
||||
// on, the other hand it is good for the cache to pack the vector anyways...
|
||||
EvalToDestAtCompileTime = Dest::InnerStrideAtCompileTime==1,
|
||||
ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
|
||||
MightCannotUseDest = (Dest::InnerStrideAtCompileTime!=1) || ComplexByReal
|
||||
};
|
||||
|
||||
gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
|
||||
|
||||
// this is written like this (i.e., with a ?:) to workaround an ICE with ICC 12
|
||||
bool alphaIsCompatible = (!ComplexByReal) ? true : (imag(actualAlpha)==RealScalar(0));
|
||||
bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
|
||||
|
||||
RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
|
||||
typedef typename ProductImpl<
|
||||
Lhs, Rhs,
|
||||
typename internal::promote_storage_type<typename Lhs::StorageKind,
|
||||
typename Rhs::StorageKind>::ret>::Base Base;
|
||||
EIGEN_GENERIC_PUBLIC_INTERFACE(Product)
|
||||
|
||||
ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
|
||||
evalToDest ? dest.data() : static_dest.data());
|
||||
|
||||
if(!evalToDest)
|
||||
typedef typename Lhs::Nested LhsNested;
|
||||
typedef typename Rhs::Nested RhsNested;
|
||||
typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
|
||||
typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
|
||||
|
||||
Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs)
|
||||
{
|
||||
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
int size = dest.size();
|
||||
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
#endif
|
||||
if(!alphaIsCompatible)
|
||||
{
|
||||
MappedDest(actualDestPtr, dest.size()).setZero();
|
||||
compatibleAlpha = RhsScalar(1);
|
||||
}
|
||||
else
|
||||
MappedDest(actualDestPtr, dest.size()) = dest;
|
||||
eigen_assert(lhs.cols() == rhs.rows()
|
||||
&& "invalid matrix product"
|
||||
&& "if you wanted a coeff-wise or a dot product use the respective explicit functions");
|
||||
}
|
||||
|
||||
general_matrix_vector_product
|
||||
<Index,LhsScalar,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run(
|
||||
actualLhs.rows(), actualLhs.cols(),
|
||||
&actualLhs.coeffRef(0,0), actualLhs.outerStride(),
|
||||
actualRhs.data(), actualRhs.innerStride(),
|
||||
actualDestPtr, 1,
|
||||
compatibleAlpha);
|
||||
inline Index rows() const { return m_lhs.rows(); }
|
||||
inline Index cols() const { return m_rhs.cols(); }
|
||||
|
||||
if (!evalToDest)
|
||||
{
|
||||
if(!alphaIsCompatible)
|
||||
dest += actualAlpha * MappedDest(actualDestPtr, dest.size());
|
||||
else
|
||||
dest = MappedDest(actualDestPtr, dest.size());
|
||||
}
|
||||
}
|
||||
const LhsNestedCleaned& lhs() const { return m_lhs; }
|
||||
const RhsNestedCleaned& rhs() const { return m_rhs; }
|
||||
|
||||
protected:
|
||||
|
||||
const LhsNested m_lhs;
|
||||
const RhsNested m_rhs;
|
||||
};
|
||||
|
||||
template<> struct gemv_selector<OnTheRight,RowMajor,true>
|
||||
template<typename Lhs, typename Rhs>
|
||||
class ProductImpl<Lhs,Rhs,Dense> : public internal::dense_xpr_base<Product<Lhs,Rhs> >::type
|
||||
{
|
||||
template<typename ProductType, typename Dest>
|
||||
static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
|
||||
{
|
||||
typedef typename ProductType::LhsScalar LhsScalar;
|
||||
typedef typename ProductType::RhsScalar RhsScalar;
|
||||
typedef typename ProductType::Scalar ResScalar;
|
||||
typedef typename ProductType::Index Index;
|
||||
typedef typename ProductType::ActualLhsType ActualLhsType;
|
||||
typedef typename ProductType::ActualRhsType ActualRhsType;
|
||||
typedef typename ProductType::_ActualRhsType _ActualRhsType;
|
||||
typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
|
||||
typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
|
||||
typedef Product<Lhs, Rhs> Derived;
|
||||
public:
|
||||
|
||||
typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
|
||||
typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
|
||||
|
||||
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
|
||||
* RhsBlasTraits::extractScalarFactor(prod.rhs());
|
||||
|
||||
enum {
|
||||
// FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
|
||||
// on, the other hand it is good for the cache to pack the vector anyways...
|
||||
DirectlyUseRhs = _ActualRhsType::InnerStrideAtCompileTime==1
|
||||
};
|
||||
|
||||
gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;
|
||||
|
||||
ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(),
|
||||
DirectlyUseRhs ? const_cast<RhsScalar*>(actualRhs.data()) : static_rhs.data());
|
||||
|
||||
if(!DirectlyUseRhs)
|
||||
{
|
||||
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
int size = actualRhs.size();
|
||||
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
#endif
|
||||
Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
|
||||
}
|
||||
|
||||
general_matrix_vector_product
|
||||
<Index,LhsScalar,RowMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run(
|
||||
actualLhs.rows(), actualLhs.cols(),
|
||||
&actualLhs.coeffRef(0,0), actualLhs.outerStride(),
|
||||
actualRhsPtr, 1,
|
||||
&dest.coeffRef(0,0), dest.innerStride(),
|
||||
actualAlpha);
|
||||
}
|
||||
typedef typename internal::dense_xpr_base<Product<Lhs, Rhs> >::type Base;
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
|
||||
};
|
||||
|
||||
template<> struct gemv_selector<OnTheRight,ColMajor,false>
|
||||
{
|
||||
template<typename ProductType, typename Dest>
|
||||
static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
|
||||
{
|
||||
typedef typename Dest::Index Index;
|
||||
// TODO makes sure dest is sequentially stored in memory, otherwise use a temp
|
||||
const Index size = prod.rhs().rows();
|
||||
for(Index k=0; k<size; ++k)
|
||||
dest += (alpha*prod.rhs().coeff(k)) * prod.lhs().col(k);
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct gemv_selector<OnTheRight,RowMajor,false>
|
||||
{
|
||||
template<typename ProductType, typename Dest>
|
||||
static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
|
||||
{
|
||||
typedef typename Dest::Index Index;
|
||||
// TODO makes sure rhs is sequentially stored in memory, otherwise use a temp
|
||||
const Index rows = prod.rows();
|
||||
for(Index i=0; i<rows; ++i)
|
||||
dest.coeffRef(i) += alpha * (prod.lhs().row(i).cwiseProduct(prod.rhs().transpose())).sum();
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/***************************************************************************
|
||||
* Implementation of matrix base methods
|
||||
***************************************************************************/
|
||||
|
||||
/** \returns the matrix product of \c *this and \a other.
|
||||
*
|
||||
* \note If instead of the matrix product you want the coefficient-wise product, see Cwise::operator*().
|
||||
*
|
||||
* \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*()
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
inline const typename ProductReturnType<Derived,OtherDerived>::Type
|
||||
MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
|
||||
{
|
||||
// A note regarding the function declaration: In MSVC, this function will sometimes
|
||||
// not be inlined since DenseStorage is an unwindable object for dynamic
|
||||
// matrices and product types are holding a member to store the result.
|
||||
// Thus it does not help tagging this function with EIGEN_STRONG_INLINE.
|
||||
enum {
|
||||
ProductIsValid = Derived::ColsAtCompileTime==Dynamic
|
||||
|| OtherDerived::RowsAtCompileTime==Dynamic
|
||||
|| int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
|
||||
AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
|
||||
SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
|
||||
};
|
||||
// note to the lost user:
|
||||
// * for a dot product use: v1.dot(v2)
|
||||
// * for a coeff-wise product use: v1.cwiseProduct(v2)
|
||||
EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
|
||||
INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
|
||||
EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
|
||||
INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
|
||||
EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
|
||||
#ifdef EIGEN_DEBUG_PRODUCT
|
||||
internal::product_type<Derived,OtherDerived>::debug();
|
||||
#endif
|
||||
return typename ProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
|
||||
}
|
||||
|
||||
/** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation.
|
||||
*
|
||||
* The returned product will behave like any other expressions: the coefficients of the product will be
|
||||
* computed once at a time as requested. This might be useful in some extremely rare cases when only
|
||||
* a small and no coherent fraction of the result's coefficients have to be computed.
|
||||
*
|
||||
* \warning This version of the matrix product can be much much slower. So use it only if you know
|
||||
* what you are doing and that you measured a true speed improvement.
|
||||
*
|
||||
* \sa operator*(const MatrixBase&)
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
const typename LazyProductReturnType<Derived,OtherDerived>::Type
|
||||
MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const
|
||||
{
|
||||
enum {
|
||||
ProductIsValid = Derived::ColsAtCompileTime==Dynamic
|
||||
|| OtherDerived::RowsAtCompileTime==Dynamic
|
||||
|| int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
|
||||
AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
|
||||
SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
|
||||
};
|
||||
// note to the lost user:
|
||||
// * for a dot product use: v1.dot(v2)
|
||||
// * for a coeff-wise product use: v1.cwiseProduct(v2)
|
||||
EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
|
||||
INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
|
||||
EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
|
||||
INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
|
||||
EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
|
||||
|
||||
return typename LazyProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
|
||||
}
|
||||
|
||||
#endif // EIGEN_PRODUCT_H
|
||||
|
||||
@@ -115,10 +115,10 @@ class ProductBase : public MatrixBase<Derived>
|
||||
inline void evalTo(Dest& dst) const { dst.setZero(); scaleAndAddTo(dst,Scalar(1)); }
|
||||
|
||||
template<typename Dest>
|
||||
inline void addTo(Dest& dst) const { scaleAndAddTo(dst,1); }
|
||||
inline void addTo(Dest& dst) const { scaleAndAddTo(dst,Scalar(1)); }
|
||||
|
||||
template<typename Dest>
|
||||
inline void subTo(Dest& dst) const { scaleAndAddTo(dst,-1); }
|
||||
inline void subTo(Dest& dst) const { scaleAndAddTo(dst,Scalar(-1)); }
|
||||
|
||||
template<typename Dest>
|
||||
inline void scaleAndAddTo(Dest& dst,Scalar alpha) const { derived().scaleAndAddTo(dst,alpha); }
|
||||
@@ -179,8 +179,8 @@ class ProductBase : public MatrixBase<Derived>
|
||||
|
||||
protected:
|
||||
|
||||
const LhsNested m_lhs;
|
||||
const RhsNested m_rhs;
|
||||
LhsNested m_lhs;
|
||||
RhsNested m_rhs;
|
||||
|
||||
mutable PlainObject m_result;
|
||||
};
|
||||
|
||||
@@ -95,7 +95,7 @@ struct redux_novec_unroller
|
||||
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
|
||||
EIGEN_STRONG_INLINE static Scalar run(const Derived &mat, const Func& func)
|
||||
static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
|
||||
{
|
||||
return func(redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
|
||||
redux_novec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func));
|
||||
@@ -112,7 +112,7 @@ struct redux_novec_unroller<Func, Derived, Start, 1>
|
||||
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
|
||||
EIGEN_STRONG_INLINE static Scalar run(const Derived &mat, const Func&)
|
||||
static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&)
|
||||
{
|
||||
return mat.coeffByOuterInner(outer, inner);
|
||||
}
|
||||
@@ -125,7 +125,7 @@ template<typename Func, typename Derived, int Start>
|
||||
struct redux_novec_unroller<Func, Derived, Start, 0>
|
||||
{
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
EIGEN_STRONG_INLINE static Scalar run(const Derived&, const Func&) { return Scalar(); }
|
||||
static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); }
|
||||
};
|
||||
|
||||
/*** vectorization ***/
|
||||
@@ -141,7 +141,7 @@ struct redux_vec_unroller
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
typedef typename packet_traits<Scalar>::type PacketScalar;
|
||||
|
||||
EIGEN_STRONG_INLINE static PacketScalar run(const Derived &mat, const Func& func)
|
||||
static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func& func)
|
||||
{
|
||||
return func.packetOp(
|
||||
redux_vec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
|
||||
@@ -162,7 +162,7 @@ struct redux_vec_unroller<Func, Derived, Start, 1>
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
typedef typename packet_traits<Scalar>::type PacketScalar;
|
||||
|
||||
EIGEN_STRONG_INLINE static PacketScalar run(const Derived &mat, const Func&)
|
||||
static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&)
|
||||
{
|
||||
return mat.template packetByOuterInner<alignment>(outer, inner);
|
||||
}
|
||||
@@ -214,20 +214,33 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
|
||||
const Index size = mat.size();
|
||||
eigen_assert(size && "you are using an empty matrix");
|
||||
const Index packetSize = packet_traits<Scalar>::size;
|
||||
const Index alignedStart = first_aligned(mat);
|
||||
const Index alignedStart = internal::first_aligned(mat);
|
||||
enum {
|
||||
alignment = bool(Derived::Flags & DirectAccessBit) || bool(Derived::Flags & AlignedBit)
|
||||
? Aligned : Unaligned
|
||||
};
|
||||
const Index alignedSize = ((size-alignedStart)/packetSize)*packetSize;
|
||||
const Index alignedEnd = alignedStart + alignedSize;
|
||||
const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
|
||||
const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
|
||||
const Index alignedEnd2 = alignedStart + alignedSize2;
|
||||
const Index alignedEnd = alignedStart + alignedSize;
|
||||
Scalar res;
|
||||
if(alignedSize)
|
||||
{
|
||||
PacketScalar packet_res = mat.template packet<alignment>(alignedStart);
|
||||
for(Index index = alignedStart + packetSize; index < alignedEnd; index += packetSize)
|
||||
packet_res = func.packetOp(packet_res, mat.template packet<alignment>(index));
|
||||
res = func.predux(packet_res);
|
||||
PacketScalar packet_res0 = mat.template packet<alignment>(alignedStart);
|
||||
if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop
|
||||
{
|
||||
PacketScalar packet_res1 = mat.template packet<alignment>(alignedStart+packetSize);
|
||||
for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize)
|
||||
{
|
||||
packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(index));
|
||||
packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment>(index+packetSize));
|
||||
}
|
||||
|
||||
packet_res0 = func.packetOp(packet_res0,packet_res1);
|
||||
if(alignedEnd>alignedEnd2)
|
||||
packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(alignedEnd2));
|
||||
}
|
||||
res = func.predux(packet_res0);
|
||||
|
||||
for(Index index = 0; index < alignedStart; ++index)
|
||||
res = func(res,mat.coeff(index));
|
||||
@@ -296,7 +309,7 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
|
||||
Size = Derived::SizeAtCompileTime,
|
||||
VectorizedSize = (Size / PacketSize) * PacketSize
|
||||
};
|
||||
EIGEN_STRONG_INLINE static Scalar run(const Derived& mat, const Func& func)
|
||||
static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func)
|
||||
{
|
||||
eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
|
||||
Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));
|
||||
|
||||
@@ -122,9 +122,13 @@ template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
|
||||
return m_matrix.template packet<LoadMode>(actual_row, actual_col);
|
||||
}
|
||||
|
||||
const typename internal::remove_all<typename MatrixType::Nested>::type& nestedExpression() const
|
||||
{
|
||||
return m_matrix;
|
||||
}
|
||||
|
||||
protected:
|
||||
const typename MatrixType::Nested m_matrix;
|
||||
typename MatrixType::Nested m_matrix;
|
||||
const internal::variable_if_dynamic<Index, RowFactor> m_rowFactor;
|
||||
const internal::variable_if_dynamic<Index, ColFactor> m_colFactor;
|
||||
};
|
||||
|
||||
@@ -183,8 +183,14 @@ template<typename MatrixType, int Direction> class Reverse
|
||||
m_matrix.const_cast_derived().template writePacket<LoadMode>(m_matrix.size() - index - PacketSize, internal::preverse(x));
|
||||
}
|
||||
|
||||
const typename internal::remove_all<typename MatrixType::Nested>::type&
|
||||
nestedExpression() const
|
||||
{
|
||||
return m_matrix;
|
||||
}
|
||||
|
||||
protected:
|
||||
const typename MatrixType::Nested m_matrix;
|
||||
typename MatrixType::Nested m_matrix;
|
||||
};
|
||||
|
||||
/** \returns an expression of the reverse of *this.
|
||||
|
||||
@@ -101,10 +101,25 @@ class Select : internal::no_assignment_operator,
|
||||
return m_else.coeff(i);
|
||||
}
|
||||
|
||||
const ConditionMatrixType& conditionMatrix() const
|
||||
{
|
||||
return m_condition;
|
||||
}
|
||||
|
||||
const ThenMatrixType& thenMatrix() const
|
||||
{
|
||||
return m_then;
|
||||
}
|
||||
|
||||
const ElseMatrixType& elseMatrix() const
|
||||
{
|
||||
return m_else;
|
||||
}
|
||||
|
||||
protected:
|
||||
const typename ConditionMatrixType::Nested m_condition;
|
||||
const typename ThenMatrixType::Nested m_then;
|
||||
const typename ElseMatrixType::Nested m_else;
|
||||
typename ConditionMatrixType::Nested m_condition;
|
||||
typename ThenMatrixType::Nested m_then;
|
||||
typename ElseMatrixType::Nested m_else;
|
||||
};
|
||||
|
||||
|
||||
|
||||
@@ -82,7 +82,7 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
|
||||
};
|
||||
typedef typename MatrixType::PlainObject PlainObject;
|
||||
|
||||
inline SelfAdjointView(const MatrixType& matrix) : m_matrix(matrix)
|
||||
inline SelfAdjointView(MatrixType& matrix) : m_matrix(matrix)
|
||||
{}
|
||||
|
||||
inline Index rows() const { return m_matrix.rows(); }
|
||||
@@ -199,7 +199,7 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
|
||||
#endif
|
||||
|
||||
protected:
|
||||
const MatrixTypeNested m_matrix;
|
||||
MatrixTypeNested m_matrix;
|
||||
};
|
||||
|
||||
|
||||
@@ -222,7 +222,7 @@ struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), U
|
||||
row = (UnrollCount-1) % Derived1::RowsAtCompileTime
|
||||
};
|
||||
|
||||
inline static void run(Derived1 &dst, const Derived2 &src)
|
||||
static inline void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), UnrollCount-1, ClearOpposite>::run(dst, src);
|
||||
|
||||
@@ -236,7 +236,7 @@ struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), U
|
||||
template<typename Derived1, typename Derived2, bool ClearOpposite>
|
||||
struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, 0, ClearOpposite>
|
||||
{
|
||||
inline static void run(Derived1 &, const Derived2 &) {}
|
||||
static inline void run(Derived1 &, const Derived2 &) {}
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2, int UnrollCount, bool ClearOpposite>
|
||||
@@ -247,7 +247,7 @@ struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), U
|
||||
row = (UnrollCount-1) % Derived1::RowsAtCompileTime
|
||||
};
|
||||
|
||||
inline static void run(Derived1 &dst, const Derived2 &src)
|
||||
static inline void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), UnrollCount-1, ClearOpposite>::run(dst, src);
|
||||
|
||||
@@ -261,14 +261,14 @@ struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), U
|
||||
template<typename Derived1, typename Derived2, bool ClearOpposite>
|
||||
struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Lower, 0, ClearOpposite>
|
||||
{
|
||||
inline static void run(Derived1 &, const Derived2 &) {}
|
||||
static inline void run(Derived1 &, const Derived2 &) {}
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2, bool ClearOpposite>
|
||||
struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, Dynamic, ClearOpposite>
|
||||
{
|
||||
typedef typename Derived1::Index Index;
|
||||
inline static void run(Derived1 &dst, const Derived2 &src)
|
||||
static inline void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
for(Index j = 0; j < dst.cols(); ++j)
|
||||
{
|
||||
@@ -285,7 +285,7 @@ struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, Dyn
|
||||
template<typename Derived1, typename Derived2, bool ClearOpposite>
|
||||
struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Lower, Dynamic, ClearOpposite>
|
||||
{
|
||||
inline static void run(Derived1 &dst, const Derived2 &src)
|
||||
static inline void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
typedef typename Derived1::Index Index;
|
||||
for(Index i = 0; i < dst.rows(); ++i)
|
||||
|
||||
@@ -163,6 +163,16 @@ template<typename BinaryOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp
|
||||
return Base::operator=(rhs);
|
||||
}
|
||||
|
||||
Lhs& expression() const
|
||||
{
|
||||
return m_matrix;
|
||||
}
|
||||
|
||||
const BinaryOp& functor() const
|
||||
{
|
||||
return m_functor;
|
||||
}
|
||||
|
||||
protected:
|
||||
Lhs& m_matrix;
|
||||
const BinaryOp& m_functor;
|
||||
|
||||
@@ -100,7 +100,7 @@ struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,Dynamic>
|
||||
typedef typename LhsProductTraits::DirectLinearAccessType ActualLhsType;
|
||||
static void run(const Lhs& lhs, Rhs& rhs)
|
||||
{
|
||||
const ActualLhsType actualLhs = LhsProductTraits::extract(lhs);
|
||||
typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsProductTraits::extract(lhs);
|
||||
triangular_solve_matrix<Scalar,Index,Side,Mode,LhsProductTraits::NeedToConjugate,(int(Lhs::Flags) & RowMajorBit) ? RowMajor : ColMajor,
|
||||
(Rhs::Flags&RowMajorBit) ? RowMajor : ColMajor>
|
||||
::run(lhs.rows(), Side==OnTheLeft? rhs.cols() : rhs.rows(), &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &rhs.coeffRef(0,0), rhs.outerStride());
|
||||
@@ -177,10 +177,8 @@ template<int Side, typename OtherDerived>
|
||||
void TriangularView<MatrixType,Mode>::solveInPlace(const MatrixBase<OtherDerived>& _other) const
|
||||
{
|
||||
OtherDerived& other = _other.const_cast_derived();
|
||||
eigen_assert(cols() == rows());
|
||||
eigen_assert( (Side==OnTheLeft && cols() == other.rows()) || (Side==OnTheRight && cols() == other.cols()) );
|
||||
eigen_assert(!(Mode & ZeroDiag));
|
||||
eigen_assert((Mode & (Upper|Lower)) != 0);
|
||||
eigen_assert( cols() == rows() && ((Side==OnTheLeft && cols() == other.rows()) || (Side==OnTheRight && cols() == other.cols())) );
|
||||
eigen_assert((!(Mode & ZeroDiag)) && bool(Mode & (Upper|Lower)));
|
||||
|
||||
enum { copy = internal::traits<OtherDerived>::Flags & RowMajorBit && OtherDerived::IsVectorAtCompileTime };
|
||||
typedef typename internal::conditional<copy,
|
||||
@@ -255,7 +253,7 @@ template<int Side, typename TriangularType, typename Rhs> struct triangular_solv
|
||||
|
||||
protected:
|
||||
const TriangularType& m_triangularMatrix;
|
||||
const typename Rhs::Nested m_rhs;
|
||||
typename Rhs::Nested m_rhs;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
|
||||
@@ -58,9 +58,9 @@ MatrixBase<Derived>::stableNorm() const
|
||||
{
|
||||
using std::min;
|
||||
const Index blockSize = 4096;
|
||||
RealScalar scale = 0;
|
||||
RealScalar invScale = 1;
|
||||
RealScalar ssq = 0; // sum of square
|
||||
RealScalar scale(0);
|
||||
RealScalar invScale(1);
|
||||
RealScalar ssq(0); // sum of square
|
||||
enum {
|
||||
Alignment = (int(Flags)&DirectAccessBit) || (int(Flags)&AlignedBit) ? 1 : 0
|
||||
};
|
||||
|
||||
@@ -52,6 +52,15 @@ template<typename ExpressionType> class SwapWrapper
|
||||
inline Index cols() const { return m_expression.cols(); }
|
||||
inline Index outerStride() const { return m_expression.outerStride(); }
|
||||
inline Index innerStride() const { return m_expression.innerStride(); }
|
||||
|
||||
typedef typename internal::conditional<
|
||||
internal::is_lvalue<ExpressionType>::value,
|
||||
Scalar,
|
||||
const Scalar
|
||||
>::type ScalarWithConstIfNotLvalue;
|
||||
|
||||
inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
|
||||
inline const Scalar* data() const { return m_expression.data(); }
|
||||
|
||||
inline Scalar& coeffRef(Index row, Index col)
|
||||
{
|
||||
@@ -119,6 +128,8 @@ template<typename ExpressionType> class SwapWrapper
|
||||
_other.template writePacket<LoadMode>(index, tmp);
|
||||
}
|
||||
|
||||
ExpressionType& expression() const { return m_expression; }
|
||||
|
||||
protected:
|
||||
ExpressionType& m_expression;
|
||||
};
|
||||
|
||||
@@ -91,7 +91,7 @@ template<typename MatrixType> class Transpose
|
||||
nestedExpression() { return m_matrix.const_cast_derived(); }
|
||||
|
||||
protected:
|
||||
const typename MatrixType::Nested m_matrix;
|
||||
typename MatrixType::Nested m_matrix;
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
@@ -152,12 +152,12 @@ template<typename MatrixType> class TransposeImpl<MatrixType,Dense>
|
||||
return derived().nestedExpression().coeffRef(index);
|
||||
}
|
||||
|
||||
inline const CoeffReturnType coeff(Index row, Index col) const
|
||||
inline CoeffReturnType coeff(Index row, Index col) const
|
||||
{
|
||||
return derived().nestedExpression().coeff(col, row);
|
||||
}
|
||||
|
||||
inline const CoeffReturnType coeff(Index index) const
|
||||
inline CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
return derived().nestedExpression().coeff(index);
|
||||
}
|
||||
|
||||
@@ -404,7 +404,7 @@ struct transposition_matrix_product_retval
|
||||
|
||||
protected:
|
||||
const TranspositionType& m_transpositions;
|
||||
const typename MatrixType::Nested m_matrix;
|
||||
typename MatrixType::Nested m_matrix;
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
@@ -273,11 +273,8 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
|
||||
inline const TriangularView<MatrixConjugateReturnType,Mode> conjugate() const
|
||||
{ return m_matrix.conjugate(); }
|
||||
|
||||
/** \sa MatrixBase::adjoint() */
|
||||
inline TriangularView<typename MatrixType::AdjointReturnType,TransposeMode> adjoint()
|
||||
{ return m_matrix.adjoint(); }
|
||||
/** \sa MatrixBase::adjoint() const */
|
||||
inline const TriangularView<typename MatrixType::AdjointReturnType,TransposeMode> adjoint() const
|
||||
inline const TriangularView<const typename MatrixType::AdjointReturnType,TransposeMode> adjoint() const
|
||||
{ return m_matrix.adjoint(); }
|
||||
|
||||
/** \sa MatrixBase::transpose() */
|
||||
@@ -288,11 +285,13 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
|
||||
}
|
||||
/** \sa MatrixBase::transpose() const */
|
||||
inline const TriangularView<Transpose<MatrixType>,TransposeMode> transpose() const
|
||||
{ return m_matrix.transpose(); }
|
||||
{
|
||||
return m_matrix.transpose();
|
||||
}
|
||||
|
||||
/** Efficient triangular matrix times vector/matrix product */
|
||||
template<typename OtherDerived>
|
||||
TriangularProduct<Mode,true,MatrixType,false,OtherDerived,OtherDerived::IsVectorAtCompileTime>
|
||||
TriangularProduct<Mode,true,MatrixType,false,OtherDerived, OtherDerived::IsVectorAtCompileTime>
|
||||
operator*(const MatrixBase<OtherDerived>& rhs) const
|
||||
{
|
||||
return TriangularProduct
|
||||
@@ -375,7 +374,8 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
|
||||
template<typename OtherDerived>
|
||||
void swap(MatrixBase<OtherDerived> const & other)
|
||||
{
|
||||
TriangularView<SwapWrapper<MatrixType>,Mode>(const_cast<MatrixType&>(m_matrix)).lazyAssign(other.derived());
|
||||
SwapWrapper<MatrixType> swaper(const_cast<MatrixType&>(m_matrix));
|
||||
TriangularView<SwapWrapper<MatrixType>,Mode>(swaper).lazyAssign(other.derived());
|
||||
}
|
||||
|
||||
Scalar determinant() const
|
||||
@@ -433,7 +433,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
|
||||
template<typename ProductDerived, typename Lhs, typename Rhs>
|
||||
EIGEN_STRONG_INLINE TriangularView& assignProduct(const ProductBase<ProductDerived, Lhs,Rhs>& prod, const Scalar& alpha);
|
||||
|
||||
const MatrixTypeNested m_matrix;
|
||||
MatrixTypeNested m_matrix;
|
||||
};
|
||||
|
||||
/***************************************************************************
|
||||
@@ -452,7 +452,7 @@ struct triangular_assignment_selector
|
||||
|
||||
typedef typename Derived1::Scalar Scalar;
|
||||
|
||||
inline static void run(Derived1 &dst, const Derived2 &src)
|
||||
static inline void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
triangular_assignment_selector<Derived1, Derived2, Mode, UnrollCount-1, ClearOpposite>::run(dst, src);
|
||||
|
||||
@@ -480,7 +480,7 @@ struct triangular_assignment_selector
|
||||
template<typename Derived1, typename Derived2, unsigned int Mode, bool ClearOpposite>
|
||||
struct triangular_assignment_selector<Derived1, Derived2, Mode, 0, ClearOpposite>
|
||||
{
|
||||
inline static void run(Derived1 &, const Derived2 &) {}
|
||||
static inline void run(Derived1 &, const Derived2 &) {}
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2, bool ClearOpposite>
|
||||
@@ -488,7 +488,7 @@ struct triangular_assignment_selector<Derived1, Derived2, Upper, Dynamic, ClearO
|
||||
{
|
||||
typedef typename Derived1::Index Index;
|
||||
typedef typename Derived1::Scalar Scalar;
|
||||
inline static void run(Derived1 &dst, const Derived2 &src)
|
||||
static inline void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
for(Index j = 0; j < dst.cols(); ++j)
|
||||
{
|
||||
@@ -506,7 +506,7 @@ template<typename Derived1, typename Derived2, bool ClearOpposite>
|
||||
struct triangular_assignment_selector<Derived1, Derived2, Lower, Dynamic, ClearOpposite>
|
||||
{
|
||||
typedef typename Derived1::Index Index;
|
||||
inline static void run(Derived1 &dst, const Derived2 &src)
|
||||
static inline void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
for(Index j = 0; j < dst.cols(); ++j)
|
||||
{
|
||||
@@ -524,7 +524,7 @@ template<typename Derived1, typename Derived2, bool ClearOpposite>
|
||||
struct triangular_assignment_selector<Derived1, Derived2, StrictlyUpper, Dynamic, ClearOpposite>
|
||||
{
|
||||
typedef typename Derived1::Index Index;
|
||||
inline static void run(Derived1 &dst, const Derived2 &src)
|
||||
static inline void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
for(Index j = 0; j < dst.cols(); ++j)
|
||||
{
|
||||
@@ -542,7 +542,7 @@ template<typename Derived1, typename Derived2, bool ClearOpposite>
|
||||
struct triangular_assignment_selector<Derived1, Derived2, StrictlyLower, Dynamic, ClearOpposite>
|
||||
{
|
||||
typedef typename Derived1::Index Index;
|
||||
inline static void run(Derived1 &dst, const Derived2 &src)
|
||||
static inline void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
for(Index j = 0; j < dst.cols(); ++j)
|
||||
{
|
||||
@@ -560,7 +560,7 @@ template<typename Derived1, typename Derived2, bool ClearOpposite>
|
||||
struct triangular_assignment_selector<Derived1, Derived2, UnitUpper, Dynamic, ClearOpposite>
|
||||
{
|
||||
typedef typename Derived1::Index Index;
|
||||
inline static void run(Derived1 &dst, const Derived2 &src)
|
||||
static inline void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
for(Index j = 0; j < dst.cols(); ++j)
|
||||
{
|
||||
@@ -580,7 +580,7 @@ template<typename Derived1, typename Derived2, bool ClearOpposite>
|
||||
struct triangular_assignment_selector<Derived1, Derived2, UnitLower, Dynamic, ClearOpposite>
|
||||
{
|
||||
typedef typename Derived1::Index Index;
|
||||
inline static void run(Derived1 &dst, const Derived2 &src)
|
||||
static inline void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
for(Index j = 0; j < dst.cols(); ++j)
|
||||
{
|
||||
|
||||
@@ -110,7 +110,7 @@ class PartialReduxExpr : internal::no_assignment_operator,
|
||||
}
|
||||
|
||||
protected:
|
||||
const MatrixTypeNested m_matrix;
|
||||
MatrixTypeNested m_matrix;
|
||||
const MemberOp m_functor;
|
||||
};
|
||||
|
||||
@@ -237,7 +237,10 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
typename ExtendedType<OtherDerived>::Type
|
||||
extendedTo(const DenseBase<OtherDerived>& other) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived);
|
||||
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(Direction==Vertical, OtherDerived::MaxColsAtCompileTime==1),
|
||||
YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED)
|
||||
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(Direction==Horizontal, OtherDerived::MaxRowsAtCompileTime==1),
|
||||
YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED)
|
||||
return typename ExtendedType<OtherDerived>::Type
|
||||
(other.derived(),
|
||||
Direction==Vertical ? 1 : m_matrix.rows(),
|
||||
@@ -418,10 +421,9 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
ExpressionType& operator=(const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
|
||||
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
|
||||
//eigen_assert((m_matrix.isNull()) == (other.isNull())); FIXME
|
||||
for(Index j=0; j<subVectors(); ++j)
|
||||
subVector(j) = other;
|
||||
return const_cast<ExpressionType&>(m_matrix);
|
||||
return const_cast<ExpressionType&>(m_matrix = extendedTo(other.derived()));
|
||||
}
|
||||
|
||||
/** Adds the vector \a other to each subvector of \c *this */
|
||||
@@ -429,9 +431,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
ExpressionType& operator+=(const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
|
||||
for(Index j=0; j<subVectors(); ++j)
|
||||
subVector(j) += other.derived();
|
||||
return const_cast<ExpressionType&>(m_matrix);
|
||||
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
|
||||
return const_cast<ExpressionType&>(m_matrix += extendedTo(other.derived()));
|
||||
}
|
||||
|
||||
/** Substracts the vector \a other to each subvector of \c *this */
|
||||
@@ -439,8 +440,29 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
ExpressionType& operator-=(const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
|
||||
for(Index j=0; j<subVectors(); ++j)
|
||||
subVector(j) -= other.derived();
|
||||
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
|
||||
return const_cast<ExpressionType&>(m_matrix -= extendedTo(other.derived()));
|
||||
}
|
||||
|
||||
/** Multiples each subvector of \c *this by the vector \a other */
|
||||
template<typename OtherDerived>
|
||||
ExpressionType& operator*=(const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
|
||||
EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
|
||||
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
|
||||
m_matrix *= extendedTo(other.derived());
|
||||
return const_cast<ExpressionType&>(m_matrix);
|
||||
}
|
||||
|
||||
/** Divides each subvector of \c *this by the vector \a other */
|
||||
template<typename OtherDerived>
|
||||
ExpressionType& operator/=(const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
|
||||
EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
|
||||
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
|
||||
m_matrix /= extendedTo(other.derived());
|
||||
return const_cast<ExpressionType&>(m_matrix);
|
||||
}
|
||||
|
||||
@@ -451,7 +473,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
const typename ExtendedType<OtherDerived>::Type>
|
||||
operator+(const DenseBase<OtherDerived>& other) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived);
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
|
||||
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
|
||||
return m_matrix + extendedTo(other.derived());
|
||||
}
|
||||
|
||||
@@ -462,10 +485,39 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
const typename ExtendedType<OtherDerived>::Type>
|
||||
operator-(const DenseBase<OtherDerived>& other) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived);
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
|
||||
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
|
||||
return m_matrix - extendedTo(other.derived());
|
||||
}
|
||||
|
||||
/** Returns the expression where each subvector is the product of the vector \a other
|
||||
* by the corresponding subvector of \c *this */
|
||||
template<typename OtherDerived> EIGEN_STRONG_INLINE
|
||||
CwiseBinaryOp<internal::scalar_product_op<Scalar>,
|
||||
const ExpressionTypeNestedCleaned,
|
||||
const typename ExtendedType<OtherDerived>::Type>
|
||||
operator*(const DenseBase<OtherDerived>& other) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
|
||||
EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
|
||||
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
|
||||
return m_matrix * extendedTo(other.derived());
|
||||
}
|
||||
|
||||
/** Returns the expression where each subvector is the quotient of the corresponding
|
||||
* subvector of \c *this by the vector \a other */
|
||||
template<typename OtherDerived>
|
||||
CwiseBinaryOp<internal::scalar_quotient_op<Scalar>,
|
||||
const ExpressionTypeNestedCleaned,
|
||||
const typename ExtendedType<OtherDerived>::Type>
|
||||
operator/(const DenseBase<OtherDerived>& other) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
|
||||
EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
|
||||
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
|
||||
return m_matrix / extendedTo(other.derived());
|
||||
}
|
||||
|
||||
/////////// Geometry module ///////////
|
||||
|
||||
#if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS
|
||||
@@ -509,7 +561,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
* Example: \include MatrixBase_colwise.cpp
|
||||
* Output: \verbinclude MatrixBase_colwise.out
|
||||
*
|
||||
* \sa rowwise(), class VectorwiseOp
|
||||
* \sa rowwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline const typename DenseBase<Derived>::ConstColwiseReturnType
|
||||
@@ -520,7 +572,7 @@ DenseBase<Derived>::colwise() const
|
||||
|
||||
/** \returns a writable VectorwiseOp wrapper of *this providing additional partial reduction operations
|
||||
*
|
||||
* \sa rowwise(), class VectorwiseOp
|
||||
* \sa rowwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline typename DenseBase<Derived>::ColwiseReturnType
|
||||
@@ -534,7 +586,7 @@ DenseBase<Derived>::colwise()
|
||||
* Example: \include MatrixBase_rowwise.cpp
|
||||
* Output: \verbinclude MatrixBase_rowwise.out
|
||||
*
|
||||
* \sa colwise(), class VectorwiseOp
|
||||
* \sa colwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline const typename DenseBase<Derived>::ConstRowwiseReturnType
|
||||
@@ -545,7 +597,7 @@ DenseBase<Derived>::rowwise() const
|
||||
|
||||
/** \returns a writable VectorwiseOp wrapper of *this providing additional partial reduction operations
|
||||
*
|
||||
* \sa colwise(), class VectorwiseOp
|
||||
* \sa colwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline typename DenseBase<Derived>::RowwiseReturnType
|
||||
|
||||
@@ -35,7 +35,7 @@ struct visitor_impl
|
||||
row = (UnrollCount-1) % Derived::RowsAtCompileTime
|
||||
};
|
||||
|
||||
inline static void run(const Derived &mat, Visitor& visitor)
|
||||
static inline void run(const Derived &mat, Visitor& visitor)
|
||||
{
|
||||
visitor_impl<Visitor, Derived, UnrollCount-1>::run(mat, visitor);
|
||||
visitor(mat.coeff(row, col), row, col);
|
||||
@@ -45,7 +45,7 @@ struct visitor_impl
|
||||
template<typename Visitor, typename Derived>
|
||||
struct visitor_impl<Visitor, Derived, 1>
|
||||
{
|
||||
inline static void run(const Derived &mat, Visitor& visitor)
|
||||
static inline void run(const Derived &mat, Visitor& visitor)
|
||||
{
|
||||
return visitor.init(mat.coeff(0, 0), 0, 0);
|
||||
}
|
||||
@@ -55,7 +55,7 @@ template<typename Visitor, typename Derived>
|
||||
struct visitor_impl<Visitor, Derived, Dynamic>
|
||||
{
|
||||
typedef typename Derived::Index Index;
|
||||
inline static void run(const Derived& mat, Visitor& visitor)
|
||||
static inline void run(const Derived& mat, Visitor& visitor)
|
||||
{
|
||||
visitor.init(mat.coeff(0,0), 0, 0);
|
||||
for(Index i = 1; i < mat.rows(); ++i)
|
||||
|
||||
@@ -168,7 +168,7 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const P
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet2cf>
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second)
|
||||
static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
|
||||
{
|
||||
if (Offset==1)
|
||||
{
|
||||
|
||||
@@ -487,7 +487,7 @@ template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet4f>
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(Packet4f& first, const Packet4f& second)
|
||||
static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
|
||||
{
|
||||
if (Offset!=0)
|
||||
first = vec_sld(first, second, Offset*4);
|
||||
@@ -497,7 +497,7 @@ struct palign_impl<Offset,Packet4f>
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet4i>
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(Packet4i& first, const Packet4i& second)
|
||||
static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
|
||||
{
|
||||
if (Offset!=0)
|
||||
first = vec_sld(first, second, Offset*4);
|
||||
|
||||
@@ -27,8 +27,8 @@
|
||||
|
||||
namespace internal {
|
||||
|
||||
static uint32x4_t p4ui_CONJ_XOR = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
|
||||
static uint32x2_t p2ui_CONJ_XOR = { 0x00000000, 0x80000000 };
|
||||
static uint32x4_t p4ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET4(0x00000000, 0x80000000, 0x00000000, 0x80000000);
|
||||
static uint32x2_t p2ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET2(0x00000000, 0x80000000);
|
||||
|
||||
//---------- float ----------
|
||||
struct Packet2cf
|
||||
|
||||
@@ -52,6 +52,16 @@ typedef uint32x4_t Packet4ui;
|
||||
#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
|
||||
const Packet4i p4i_##NAME = pset1<Packet4i>(X)
|
||||
|
||||
#if defined(__llvm__) && !defined(__clang__)
|
||||
//Special treatment for Apple's llvm-gcc, its NEON packet types are unions
|
||||
#define EIGEN_INIT_NEON_PACKET2(X, Y) {{X, Y}}
|
||||
#define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {{X, Y, Z, W}}
|
||||
#else
|
||||
//Default initializer for packets
|
||||
#define EIGEN_INIT_NEON_PACKET2(X, Y) {X, Y}
|
||||
#define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {X, Y, Z, W}
|
||||
#endif
|
||||
|
||||
#ifndef __pld
|
||||
#define __pld(x) asm volatile ( " pld [%[addr]]\n" :: [addr] "r" (x) : "cc" );
|
||||
#endif
|
||||
@@ -84,7 +94,7 @@ template<> struct packet_traits<int> : default_packet_traits
|
||||
};
|
||||
};
|
||||
|
||||
#if EIGEN_GNUC_AT_MOST(4,4)
|
||||
#if EIGEN_GNUC_AT_MOST(4,4) && !defined(__llvm__)
|
||||
// workaround gcc 4.2, 4.3 and 4.4 compilatin issue
|
||||
EIGEN_STRONG_INLINE float32x4_t vld1q_f32(const float* x) { return ::vld1q_f32((const float32_t*)x); }
|
||||
EIGEN_STRONG_INLINE float32x2_t vld1_f32 (const float* x) { return ::vld1_f32 ((const float32_t*)x); }
|
||||
@@ -100,12 +110,12 @@ template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) {
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f plset<float>(const float& a)
|
||||
{
|
||||
Packet4f countdown = { 0, 1, 2, 3 };
|
||||
Packet4f countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3);
|
||||
return vaddq_f32(pset1<Packet4f>(a), countdown);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i plset<int>(const int& a)
|
||||
{
|
||||
Packet4i countdown = { 0, 1, 2, 3 };
|
||||
Packet4i countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3);
|
||||
return vaddq_s32(pset1<Packet4i>(a), countdown);
|
||||
}
|
||||
|
||||
@@ -395,25 +405,29 @@ template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
|
||||
return s[0];
|
||||
}
|
||||
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet4f>
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(Packet4f& first, const Packet4f& second)
|
||||
{
|
||||
if (Offset!=0)
|
||||
first = vextq_f32(first, second, Offset);
|
||||
}
|
||||
};
|
||||
// this PALIGN_NEON business is to work around a bug in LLVM Clang 3.0 causing incorrect compilation errors,
|
||||
// see bug 347 and this LLVM bug: http://llvm.org/bugs/show_bug.cgi?id=11074
|
||||
#define PALIGN_NEON(Offset,Type,Command) \
|
||||
template<>\
|
||||
struct palign_impl<Offset,Type>\
|
||||
{\
|
||||
EIGEN_STRONG_INLINE static void run(Type& first, const Type& second)\
|
||||
{\
|
||||
if (Offset!=0)\
|
||||
first = Command(first, second, Offset);\
|
||||
}\
|
||||
};\
|
||||
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet4i>
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(Packet4i& first, const Packet4i& second)
|
||||
{
|
||||
if (Offset!=0)
|
||||
first = vextq_s32(first, second, Offset);
|
||||
}
|
||||
};
|
||||
PALIGN_NEON(0,Packet4f,vextq_f32)
|
||||
PALIGN_NEON(1,Packet4f,vextq_f32)
|
||||
PALIGN_NEON(2,Packet4f,vextq_f32)
|
||||
PALIGN_NEON(3,Packet4f,vextq_f32)
|
||||
PALIGN_NEON(0,Packet4i,vextq_s32)
|
||||
PALIGN_NEON(1,Packet4i,vextq_s32)
|
||||
PALIGN_NEON(2,Packet4i,vextq_s32)
|
||||
PALIGN_NEON(3,Packet4i,vextq_s32)
|
||||
|
||||
#undef PALIGN_NEON
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
|
||||
@@ -102,7 +102,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<flo
|
||||
Packet2cf res;
|
||||
#if EIGEN_GNUC_AT_MOST(4,2)
|
||||
// workaround annoying "may be used uninitialized in this function" warning with gcc 4.2
|
||||
res.v = _mm_loadl_pi(_mm_set1_ps(0.0f), (const __m64*)&from);
|
||||
res.v = _mm_loadl_pi(_mm_set1_ps(0.0f), reinterpret_cast<const __m64*>(&from));
|
||||
#else
|
||||
res.v = _mm_loadl_pi(res.v, (const __m64*)&from);
|
||||
#endif
|
||||
@@ -151,7 +151,7 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const P
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet2cf>
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second)
|
||||
static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
|
||||
{
|
||||
if (Offset==1)
|
||||
{
|
||||
@@ -350,7 +350,7 @@ template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet1cd>
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
|
||||
static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
|
||||
{
|
||||
// FIXME is it sure we never have to align a Packet1cd?
|
||||
// Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
|
||||
|
||||
@@ -110,9 +110,18 @@ template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}
|
||||
template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2}; };
|
||||
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; };
|
||||
|
||||
#if defined(_MSC_VER) && (_MSC_VER==1500)
|
||||
// Workaround MSVC 9 internal compiler error.
|
||||
// TODO: It has been detected with win64 builds (amd64), so let's check whether it also happens in 32bits+SSE mode
|
||||
// TODO: let's check whether there does not exist a better fix, like adding a pset0() function. (it crashed on pset1(0)).
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set_ps(from,from,from,from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set_pd(from,from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set_epi32(from,from,from,from); }
|
||||
#else
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set1_ps(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set1_pd(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set1_epi32(from); }
|
||||
#endif
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f plset<float>(const float& a) { return _mm_add_ps(pset1<Packet4f>(a), _mm_set_ps(3,2,1,0)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d plset<double>(const double& a) { return _mm_add_pd(pset1<Packet2d>(a),_mm_set_pd(1,0)); }
|
||||
@@ -282,7 +291,7 @@ template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
|
||||
{
|
||||
return vec4f_swizzle1(_mm_castpd_ps(_mm_load_sd((const double*)from)), 0, 0, 1, 1);
|
||||
return vec4f_swizzle1(_mm_castpd_ps(_mm_load_sd(reinterpret_cast<const double*>(from))), 0, 0, 1, 1);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
|
||||
{ return pset1<Packet2d>(from[0]); }
|
||||
@@ -302,8 +311,8 @@ template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d&
|
||||
_mm_storel_pd((to), from);
|
||||
_mm_storeh_pd((to+1), from);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, _mm_castps_pd(from)); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, _mm_castsi128_pd(from)); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<double*>(to), _mm_castps_pd(from)); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<double*>(to), _mm_castsi128_pd(from)); }
|
||||
|
||||
// some compilers might be tempted to perform multiple moves instead of using a vector path.
|
||||
template<> EIGEN_STRONG_INLINE void pstore1<Packet4f>(float* to, const float& a)
|
||||
@@ -541,7 +550,7 @@ template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet4f>
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(Packet4f& first, const Packet4f& second)
|
||||
static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
|
||||
{
|
||||
if (Offset!=0)
|
||||
first = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(second), _mm_castps_si128(first), Offset*4));
|
||||
@@ -551,7 +560,7 @@ struct palign_impl<Offset,Packet4f>
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet4i>
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(Packet4i& first, const Packet4i& second)
|
||||
static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
|
||||
{
|
||||
if (Offset!=0)
|
||||
first = _mm_alignr_epi8(second,first, Offset*4);
|
||||
@@ -561,7 +570,7 @@ struct palign_impl<Offset,Packet4i>
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet2d>
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(Packet2d& first, const Packet2d& second)
|
||||
static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
|
||||
{
|
||||
if (Offset==1)
|
||||
first = _mm_castsi128_pd(_mm_alignr_epi8(_mm_castpd_si128(second), _mm_castpd_si128(first), 8));
|
||||
@@ -572,7 +581,7 @@ struct palign_impl<Offset,Packet2d>
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet4f>
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(Packet4f& first, const Packet4f& second)
|
||||
static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
|
||||
{
|
||||
if (Offset==1)
|
||||
{
|
||||
@@ -595,7 +604,7 @@ struct palign_impl<Offset,Packet4f>
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet4i>
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(Packet4i& first, const Packet4i& second)
|
||||
static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
|
||||
{
|
||||
if (Offset==1)
|
||||
{
|
||||
@@ -618,7 +627,7 @@ struct palign_impl<Offset,Packet4i>
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet2d>
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(Packet2d& first, const Packet2d& second)
|
||||
static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
|
||||
{
|
||||
if (Offset==1)
|
||||
{
|
||||
|
||||
@@ -224,8 +224,8 @@ class CoeffBasedProduct
|
||||
{ return reinterpret_cast<const LazyCoeffBasedProductType&>(*this).diagonal(index); }
|
||||
|
||||
protected:
|
||||
const LhsNested m_lhs;
|
||||
const RhsNested m_rhs;
|
||||
typename internal::add_const_on_value_type<LhsNested>::type m_lhs;
|
||||
typename internal::add_const_on_value_type<RhsNested>::type m_rhs;
|
||||
|
||||
mutable PlainObject m_result;
|
||||
};
|
||||
@@ -252,7 +252,7 @@ template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
|
||||
struct product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
|
||||
{
|
||||
product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res);
|
||||
res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col);
|
||||
@@ -263,7 +263,7 @@ template<typename Lhs, typename Rhs, typename RetScalar>
|
||||
struct product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
|
||||
{
|
||||
res = lhs.coeff(row, 0) * rhs.coeff(0, col);
|
||||
}
|
||||
@@ -273,7 +273,7 @@ template<typename Lhs, typename Rhs, typename RetScalar>
|
||||
struct product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res)
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res)
|
||||
{
|
||||
eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
|
||||
res = lhs.coeff(row, 0) * rhs.coeff(0, col);
|
||||
@@ -291,7 +291,7 @@ struct product_coeff_vectorized_unroller
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
|
||||
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
|
||||
{
|
||||
product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
|
||||
pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) ));
|
||||
@@ -302,7 +302,7 @@ template<typename Lhs, typename Rhs, typename Packet>
|
||||
struct product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
|
||||
{
|
||||
pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col));
|
||||
}
|
||||
@@ -314,7 +314,7 @@ struct product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, Re
|
||||
typedef typename Lhs::PacketScalar Packet;
|
||||
typedef typename Lhs::Index Index;
|
||||
enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
|
||||
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
|
||||
{
|
||||
Packet pres;
|
||||
product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
|
||||
@@ -327,7 +327,7 @@ template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int R
|
||||
struct product_coeff_vectorized_dyn_selector
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
|
||||
{
|
||||
res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum();
|
||||
}
|
||||
@@ -339,7 +339,7 @@ template<typename Lhs, typename Rhs, int RhsCols>
|
||||
struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
EIGEN_STRONG_INLINE static void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
|
||||
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
|
||||
{
|
||||
res = lhs.transpose().cwiseProduct(rhs.col(col)).sum();
|
||||
}
|
||||
@@ -349,7 +349,7 @@ template<typename Lhs, typename Rhs, int LhsRows>
|
||||
struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
EIGEN_STRONG_INLINE static void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
|
||||
{
|
||||
res = lhs.row(row).transpose().cwiseProduct(rhs).sum();
|
||||
}
|
||||
@@ -359,7 +359,7 @@ template<typename Lhs, typename Rhs>
|
||||
struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
EIGEN_STRONG_INLINE static void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
|
||||
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
|
||||
{
|
||||
res = lhs.transpose().cwiseProduct(rhs).sum();
|
||||
}
|
||||
@@ -369,7 +369,7 @@ template<typename Lhs, typename Rhs, typename RetScalar>
|
||||
struct product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
|
||||
{
|
||||
product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, res);
|
||||
}
|
||||
@@ -383,7 +383,7 @@ template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int Lo
|
||||
struct product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
|
||||
{
|
||||
product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
|
||||
res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
|
||||
@@ -394,7 +394,7 @@ template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int Lo
|
||||
struct product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
|
||||
{
|
||||
product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
|
||||
res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
|
||||
@@ -405,7 +405,7 @@ template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||
struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
|
||||
{
|
||||
res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
|
||||
}
|
||||
@@ -415,7 +415,7 @@ template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||
struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
|
||||
{
|
||||
res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
|
||||
}
|
||||
@@ -425,7 +425,7 @@ template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||
struct product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
|
||||
{
|
||||
eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
|
||||
res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
|
||||
@@ -438,7 +438,7 @@ template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||
struct product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
|
||||
{
|
||||
eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
|
||||
res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
|
||||
|
||||
@@ -30,19 +30,18 @@ namespace internal {
|
||||
template<typename _LhsScalar, typename _RhsScalar, bool _ConjLhs=false, bool _ConjRhs=false>
|
||||
class gebp_traits;
|
||||
|
||||
|
||||
/** \internal \returns b if a<=0, and returns a otherwise. */
|
||||
inline std::ptrdiff_t manage_caching_sizes_helper(std::ptrdiff_t a, std::ptrdiff_t b)
|
||||
{
|
||||
return a<=0 ? b : a;
|
||||
}
|
||||
|
||||
/** \internal */
|
||||
inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1=0, std::ptrdiff_t* l2=0)
|
||||
{
|
||||
static std::ptrdiff_t m_l1CacheSize = 0;
|
||||
static std::ptrdiff_t m_l2CacheSize = 0;
|
||||
if(m_l1CacheSize==0)
|
||||
{
|
||||
m_l1CacheSize = queryL1CacheSize();
|
||||
m_l2CacheSize = queryTopLevelCacheSize();
|
||||
|
||||
if(m_l1CacheSize<=0) m_l1CacheSize = 8 * 1024;
|
||||
if(m_l2CacheSize<=0) m_l2CacheSize = 1 * 1024 * 1024;
|
||||
}
|
||||
static std::ptrdiff_t m_l1CacheSize = manage_caching_sizes_helper(queryL1CacheSize(),8 * 1024);
|
||||
static std::ptrdiff_t m_l2CacheSize = manage_caching_sizes_helper(queryTopLevelCacheSize(),1*1024*1024);
|
||||
|
||||
if(action==SetAction)
|
||||
{
|
||||
@@ -118,14 +117,14 @@ inline void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, st
|
||||
// FIXME (a bit overkill maybe ?)
|
||||
|
||||
template<typename CJ, typename A, typename B, typename C, typename T> struct gebp_madd_selector {
|
||||
EIGEN_STRONG_INLINE EIGEN_ALWAYS_INLINE_ATTRIB static void run(const CJ& cj, A& a, B& b, C& c, T& /*t*/)
|
||||
EIGEN_ALWAYS_INLINE static void run(const CJ& cj, A& a, B& b, C& c, T& /*t*/)
|
||||
{
|
||||
c = cj.pmadd(a,b,c);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename CJ, typename T> struct gebp_madd_selector<CJ,T,T,T,T> {
|
||||
EIGEN_STRONG_INLINE EIGEN_ALWAYS_INLINE_ATTRIB static void run(const CJ& cj, T& a, T& b, T& c, T& t)
|
||||
EIGEN_ALWAYS_INLINE static void run(const CJ& cj, T& a, T& b, T& c, T& t)
|
||||
{
|
||||
t = b; t = cj.pmul(a,t); c = padd(c,t);
|
||||
}
|
||||
@@ -536,7 +535,7 @@ struct gebp_kernel
|
||||
ResPacketSize = Traits::ResPacketSize
|
||||
};
|
||||
|
||||
EIGEN_FLATTEN_ATTRIB
|
||||
EIGEN_DONT_INLINE EIGEN_FLATTEN_ATTRIB
|
||||
void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index rows, Index depth, Index cols, ResScalar alpha,
|
||||
Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0, RhsScalar* unpackedB = 0)
|
||||
{
|
||||
@@ -598,64 +597,64 @@ struct gebp_kernel
|
||||
if(nr==2)
|
||||
{
|
||||
LhsPacket A0, A1;
|
||||
RhsPacket B0;
|
||||
RhsPacket B_0;
|
||||
RhsPacket T0;
|
||||
|
||||
EIGEN_ASM_COMMENT("mybegin2");
|
||||
traits.loadLhs(&blA[0*LhsProgress], A0);
|
||||
traits.loadLhs(&blA[1*LhsProgress], A1);
|
||||
traits.loadRhs(&blB[0*RhsProgress], B0);
|
||||
traits.madd(A0,B0,C0,T0);
|
||||
traits.madd(A1,B0,C4,B0);
|
||||
traits.loadRhs(&blB[1*RhsProgress], B0);
|
||||
traits.madd(A0,B0,C1,T0);
|
||||
traits.madd(A1,B0,C5,B0);
|
||||
traits.loadRhs(&blB[0*RhsProgress], B_0);
|
||||
traits.madd(A0,B_0,C0,T0);
|
||||
traits.madd(A1,B_0,C4,B_0);
|
||||
traits.loadRhs(&blB[1*RhsProgress], B_0);
|
||||
traits.madd(A0,B_0,C1,T0);
|
||||
traits.madd(A1,B_0,C5,B_0);
|
||||
|
||||
traits.loadLhs(&blA[2*LhsProgress], A0);
|
||||
traits.loadLhs(&blA[3*LhsProgress], A1);
|
||||
traits.loadRhs(&blB[2*RhsProgress], B0);
|
||||
traits.madd(A0,B0,C0,T0);
|
||||
traits.madd(A1,B0,C4,B0);
|
||||
traits.loadRhs(&blB[3*RhsProgress], B0);
|
||||
traits.madd(A0,B0,C1,T0);
|
||||
traits.madd(A1,B0,C5,B0);
|
||||
traits.loadRhs(&blB[2*RhsProgress], B_0);
|
||||
traits.madd(A0,B_0,C0,T0);
|
||||
traits.madd(A1,B_0,C4,B_0);
|
||||
traits.loadRhs(&blB[3*RhsProgress], B_0);
|
||||
traits.madd(A0,B_0,C1,T0);
|
||||
traits.madd(A1,B_0,C5,B_0);
|
||||
|
||||
traits.loadLhs(&blA[4*LhsProgress], A0);
|
||||
traits.loadLhs(&blA[5*LhsProgress], A1);
|
||||
traits.loadRhs(&blB[4*RhsProgress], B0);
|
||||
traits.madd(A0,B0,C0,T0);
|
||||
traits.madd(A1,B0,C4,B0);
|
||||
traits.loadRhs(&blB[5*RhsProgress], B0);
|
||||
traits.madd(A0,B0,C1,T0);
|
||||
traits.madd(A1,B0,C5,B0);
|
||||
traits.loadRhs(&blB[4*RhsProgress], B_0);
|
||||
traits.madd(A0,B_0,C0,T0);
|
||||
traits.madd(A1,B_0,C4,B_0);
|
||||
traits.loadRhs(&blB[5*RhsProgress], B_0);
|
||||
traits.madd(A0,B_0,C1,T0);
|
||||
traits.madd(A1,B_0,C5,B_0);
|
||||
|
||||
traits.loadLhs(&blA[6*LhsProgress], A0);
|
||||
traits.loadLhs(&blA[7*LhsProgress], A1);
|
||||
traits.loadRhs(&blB[6*RhsProgress], B0);
|
||||
traits.madd(A0,B0,C0,T0);
|
||||
traits.madd(A1,B0,C4,B0);
|
||||
traits.loadRhs(&blB[7*RhsProgress], B0);
|
||||
traits.madd(A0,B0,C1,T0);
|
||||
traits.madd(A1,B0,C5,B0);
|
||||
traits.loadRhs(&blB[6*RhsProgress], B_0);
|
||||
traits.madd(A0,B_0,C0,T0);
|
||||
traits.madd(A1,B_0,C4,B_0);
|
||||
traits.loadRhs(&blB[7*RhsProgress], B_0);
|
||||
traits.madd(A0,B_0,C1,T0);
|
||||
traits.madd(A1,B_0,C5,B_0);
|
||||
EIGEN_ASM_COMMENT("myend");
|
||||
}
|
||||
else
|
||||
{
|
||||
EIGEN_ASM_COMMENT("mybegin4");
|
||||
LhsPacket A0, A1;
|
||||
RhsPacket B0, B1, B2, B3;
|
||||
RhsPacket B_0, B1, B2, B3;
|
||||
RhsPacket T0;
|
||||
|
||||
traits.loadLhs(&blA[0*LhsProgress], A0);
|
||||
traits.loadLhs(&blA[1*LhsProgress], A1);
|
||||
traits.loadRhs(&blB[0*RhsProgress], B0);
|
||||
traits.loadRhs(&blB[0*RhsProgress], B_0);
|
||||
traits.loadRhs(&blB[1*RhsProgress], B1);
|
||||
|
||||
traits.madd(A0,B0,C0,T0);
|
||||
traits.madd(A0,B_0,C0,T0);
|
||||
traits.loadRhs(&blB[2*RhsProgress], B2);
|
||||
traits.madd(A1,B0,C4,B0);
|
||||
traits.madd(A1,B_0,C4,B_0);
|
||||
traits.loadRhs(&blB[3*RhsProgress], B3);
|
||||
traits.loadRhs(&blB[4*RhsProgress], B0);
|
||||
traits.loadRhs(&blB[4*RhsProgress], B_0);
|
||||
traits.madd(A0,B1,C1,T0);
|
||||
traits.madd(A1,B1,C5,B1);
|
||||
traits.loadRhs(&blB[5*RhsProgress], B1);
|
||||
@@ -667,9 +666,9 @@ EIGEN_ASM_COMMENT("mybegin4");
|
||||
traits.madd(A1,B3,C7,B3);
|
||||
traits.loadLhs(&blA[3*LhsProgress], A1);
|
||||
traits.loadRhs(&blB[7*RhsProgress], B3);
|
||||
traits.madd(A0,B0,C0,T0);
|
||||
traits.madd(A1,B0,C4,B0);
|
||||
traits.loadRhs(&blB[8*RhsProgress], B0);
|
||||
traits.madd(A0,B_0,C0,T0);
|
||||
traits.madd(A1,B_0,C4,B_0);
|
||||
traits.loadRhs(&blB[8*RhsProgress], B_0);
|
||||
traits.madd(A0,B1,C1,T0);
|
||||
traits.madd(A1,B1,C5,B1);
|
||||
traits.loadRhs(&blB[9*RhsProgress], B1);
|
||||
@@ -682,9 +681,9 @@ EIGEN_ASM_COMMENT("mybegin4");
|
||||
traits.loadLhs(&blA[5*LhsProgress], A1);
|
||||
traits.loadRhs(&blB[11*RhsProgress], B3);
|
||||
|
||||
traits.madd(A0,B0,C0,T0);
|
||||
traits.madd(A1,B0,C4,B0);
|
||||
traits.loadRhs(&blB[12*RhsProgress], B0);
|
||||
traits.madd(A0,B_0,C0,T0);
|
||||
traits.madd(A1,B_0,C4,B_0);
|
||||
traits.loadRhs(&blB[12*RhsProgress], B_0);
|
||||
traits.madd(A0,B1,C1,T0);
|
||||
traits.madd(A1,B1,C5,B1);
|
||||
traits.loadRhs(&blB[13*RhsProgress], B1);
|
||||
@@ -696,8 +695,8 @@ EIGEN_ASM_COMMENT("mybegin4");
|
||||
traits.madd(A1,B3,C7,B3);
|
||||
traits.loadLhs(&blA[7*LhsProgress], A1);
|
||||
traits.loadRhs(&blB[15*RhsProgress], B3);
|
||||
traits.madd(A0,B0,C0,T0);
|
||||
traits.madd(A1,B0,C4,B0);
|
||||
traits.madd(A0,B_0,C0,T0);
|
||||
traits.madd(A1,B_0,C4,B_0);
|
||||
traits.madd(A0,B1,C1,T0);
|
||||
traits.madd(A1,B1,C5,B1);
|
||||
traits.madd(A0,B2,C2,T0);
|
||||
@@ -715,32 +714,32 @@ EIGEN_ASM_COMMENT("mybegin4");
|
||||
if(nr==2)
|
||||
{
|
||||
LhsPacket A0, A1;
|
||||
RhsPacket B0;
|
||||
RhsPacket B_0;
|
||||
RhsPacket T0;
|
||||
|
||||
traits.loadLhs(&blA[0*LhsProgress], A0);
|
||||
traits.loadLhs(&blA[1*LhsProgress], A1);
|
||||
traits.loadRhs(&blB[0*RhsProgress], B0);
|
||||
traits.madd(A0,B0,C0,T0);
|
||||
traits.madd(A1,B0,C4,B0);
|
||||
traits.loadRhs(&blB[1*RhsProgress], B0);
|
||||
traits.madd(A0,B0,C1,T0);
|
||||
traits.madd(A1,B0,C5,B0);
|
||||
traits.loadRhs(&blB[0*RhsProgress], B_0);
|
||||
traits.madd(A0,B_0,C0,T0);
|
||||
traits.madd(A1,B_0,C4,B_0);
|
||||
traits.loadRhs(&blB[1*RhsProgress], B_0);
|
||||
traits.madd(A0,B_0,C1,T0);
|
||||
traits.madd(A1,B_0,C5,B_0);
|
||||
}
|
||||
else
|
||||
{
|
||||
LhsPacket A0, A1;
|
||||
RhsPacket B0, B1, B2, B3;
|
||||
RhsPacket B_0, B1, B2, B3;
|
||||
RhsPacket T0;
|
||||
|
||||
traits.loadLhs(&blA[0*LhsProgress], A0);
|
||||
traits.loadLhs(&blA[1*LhsProgress], A1);
|
||||
traits.loadRhs(&blB[0*RhsProgress], B0);
|
||||
traits.loadRhs(&blB[0*RhsProgress], B_0);
|
||||
traits.loadRhs(&blB[1*RhsProgress], B1);
|
||||
|
||||
traits.madd(A0,B0,C0,T0);
|
||||
traits.madd(A0,B_0,C0,T0);
|
||||
traits.loadRhs(&blB[2*RhsProgress], B2);
|
||||
traits.madd(A1,B0,C4,B0);
|
||||
traits.madd(A1,B_0,C4,B_0);
|
||||
traits.loadRhs(&blB[3*RhsProgress], B3);
|
||||
traits.madd(A0,B1,C1,T0);
|
||||
traits.madd(A1,B1,C5,B1);
|
||||
@@ -827,42 +826,42 @@ EIGEN_ASM_COMMENT("mybegin4");
|
||||
if(nr==2)
|
||||
{
|
||||
LhsPacket A0;
|
||||
RhsPacket B0, B1;
|
||||
RhsPacket B_0, B1;
|
||||
|
||||
traits.loadLhs(&blA[0*LhsProgress], A0);
|
||||
traits.loadRhs(&blB[0*RhsProgress], B0);
|
||||
traits.loadRhs(&blB[0*RhsProgress], B_0);
|
||||
traits.loadRhs(&blB[1*RhsProgress], B1);
|
||||
traits.madd(A0,B0,C0,B0);
|
||||
traits.loadRhs(&blB[2*RhsProgress], B0);
|
||||
traits.madd(A0,B_0,C0,B_0);
|
||||
traits.loadRhs(&blB[2*RhsProgress], B_0);
|
||||
traits.madd(A0,B1,C1,B1);
|
||||
traits.loadLhs(&blA[1*LhsProgress], A0);
|
||||
traits.loadRhs(&blB[3*RhsProgress], B1);
|
||||
traits.madd(A0,B0,C0,B0);
|
||||
traits.loadRhs(&blB[4*RhsProgress], B0);
|
||||
traits.madd(A0,B_0,C0,B_0);
|
||||
traits.loadRhs(&blB[4*RhsProgress], B_0);
|
||||
traits.madd(A0,B1,C1,B1);
|
||||
traits.loadLhs(&blA[2*LhsProgress], A0);
|
||||
traits.loadRhs(&blB[5*RhsProgress], B1);
|
||||
traits.madd(A0,B0,C0,B0);
|
||||
traits.loadRhs(&blB[6*RhsProgress], B0);
|
||||
traits.madd(A0,B_0,C0,B_0);
|
||||
traits.loadRhs(&blB[6*RhsProgress], B_0);
|
||||
traits.madd(A0,B1,C1,B1);
|
||||
traits.loadLhs(&blA[3*LhsProgress], A0);
|
||||
traits.loadRhs(&blB[7*RhsProgress], B1);
|
||||
traits.madd(A0,B0,C0,B0);
|
||||
traits.madd(A0,B_0,C0,B_0);
|
||||
traits.madd(A0,B1,C1,B1);
|
||||
}
|
||||
else
|
||||
{
|
||||
LhsPacket A0;
|
||||
RhsPacket B0, B1, B2, B3;
|
||||
RhsPacket B_0, B1, B2, B3;
|
||||
|
||||
traits.loadLhs(&blA[0*LhsProgress], A0);
|
||||
traits.loadRhs(&blB[0*RhsProgress], B0);
|
||||
traits.loadRhs(&blB[0*RhsProgress], B_0);
|
||||
traits.loadRhs(&blB[1*RhsProgress], B1);
|
||||
|
||||
traits.madd(A0,B0,C0,B0);
|
||||
traits.madd(A0,B_0,C0,B_0);
|
||||
traits.loadRhs(&blB[2*RhsProgress], B2);
|
||||
traits.loadRhs(&blB[3*RhsProgress], B3);
|
||||
traits.loadRhs(&blB[4*RhsProgress], B0);
|
||||
traits.loadRhs(&blB[4*RhsProgress], B_0);
|
||||
traits.madd(A0,B1,C1,B1);
|
||||
traits.loadRhs(&blB[5*RhsProgress], B1);
|
||||
traits.madd(A0,B2,C2,B2);
|
||||
@@ -870,8 +869,8 @@ EIGEN_ASM_COMMENT("mybegin4");
|
||||
traits.madd(A0,B3,C3,B3);
|
||||
traits.loadLhs(&blA[1*LhsProgress], A0);
|
||||
traits.loadRhs(&blB[7*RhsProgress], B3);
|
||||
traits.madd(A0,B0,C0,B0);
|
||||
traits.loadRhs(&blB[8*RhsProgress], B0);
|
||||
traits.madd(A0,B_0,C0,B_0);
|
||||
traits.loadRhs(&blB[8*RhsProgress], B_0);
|
||||
traits.madd(A0,B1,C1,B1);
|
||||
traits.loadRhs(&blB[9*RhsProgress], B1);
|
||||
traits.madd(A0,B2,C2,B2);
|
||||
@@ -880,8 +879,8 @@ EIGEN_ASM_COMMENT("mybegin4");
|
||||
traits.loadLhs(&blA[2*LhsProgress], A0);
|
||||
traits.loadRhs(&blB[11*RhsProgress], B3);
|
||||
|
||||
traits.madd(A0,B0,C0,B0);
|
||||
traits.loadRhs(&blB[12*RhsProgress], B0);
|
||||
traits.madd(A0,B_0,C0,B_0);
|
||||
traits.loadRhs(&blB[12*RhsProgress], B_0);
|
||||
traits.madd(A0,B1,C1,B1);
|
||||
traits.loadRhs(&blB[13*RhsProgress], B1);
|
||||
traits.madd(A0,B2,C2,B2);
|
||||
@@ -890,7 +889,7 @@ EIGEN_ASM_COMMENT("mybegin4");
|
||||
|
||||
traits.loadLhs(&blA[3*LhsProgress], A0);
|
||||
traits.loadRhs(&blB[15*RhsProgress], B3);
|
||||
traits.madd(A0,B0,C0,B0);
|
||||
traits.madd(A0,B_0,C0,B_0);
|
||||
traits.madd(A0,B1,C1,B1);
|
||||
traits.madd(A0,B2,C2,B2);
|
||||
traits.madd(A0,B3,C3,B3);
|
||||
@@ -905,26 +904,26 @@ EIGEN_ASM_COMMENT("mybegin4");
|
||||
if(nr==2)
|
||||
{
|
||||
LhsPacket A0;
|
||||
RhsPacket B0, B1;
|
||||
RhsPacket B_0, B1;
|
||||
|
||||
traits.loadLhs(&blA[0*LhsProgress], A0);
|
||||
traits.loadRhs(&blB[0*RhsProgress], B0);
|
||||
traits.loadRhs(&blB[0*RhsProgress], B_0);
|
||||
traits.loadRhs(&blB[1*RhsProgress], B1);
|
||||
traits.madd(A0,B0,C0,B0);
|
||||
traits.madd(A0,B_0,C0,B_0);
|
||||
traits.madd(A0,B1,C1,B1);
|
||||
}
|
||||
else
|
||||
{
|
||||
LhsPacket A0;
|
||||
RhsPacket B0, B1, B2, B3;
|
||||
RhsPacket B_0, B1, B2, B3;
|
||||
|
||||
traits.loadLhs(&blA[0*LhsProgress], A0);
|
||||
traits.loadRhs(&blB[0*RhsProgress], B0);
|
||||
traits.loadRhs(&blB[0*RhsProgress], B_0);
|
||||
traits.loadRhs(&blB[1*RhsProgress], B1);
|
||||
traits.loadRhs(&blB[2*RhsProgress], B2);
|
||||
traits.loadRhs(&blB[3*RhsProgress], B3);
|
||||
|
||||
traits.madd(A0,B0,C0,B0);
|
||||
traits.madd(A0,B_0,C0,B_0);
|
||||
traits.madd(A0,B1,C1,B1);
|
||||
traits.madd(A0,B2,C2,B2);
|
||||
traits.madd(A0,B3,C3,B3);
|
||||
@@ -971,26 +970,26 @@ EIGEN_ASM_COMMENT("mybegin4");
|
||||
if(nr==2)
|
||||
{
|
||||
LhsScalar A0;
|
||||
RhsScalar B0, B1;
|
||||
RhsScalar B_0, B1;
|
||||
|
||||
A0 = blA[k];
|
||||
B0 = blB[0];
|
||||
B_0 = blB[0];
|
||||
B1 = blB[1];
|
||||
MADD(cj,A0,B0,C0,B0);
|
||||
MADD(cj,A0,B_0,C0,B_0);
|
||||
MADD(cj,A0,B1,C1,B1);
|
||||
}
|
||||
else
|
||||
{
|
||||
LhsScalar A0;
|
||||
RhsScalar B0, B1, B2, B3;
|
||||
RhsScalar B_0, B1, B2, B3;
|
||||
|
||||
A0 = blA[k];
|
||||
B0 = blB[0];
|
||||
B_0 = blB[0];
|
||||
B1 = blB[1];
|
||||
B2 = blB[2];
|
||||
B3 = blB[3];
|
||||
|
||||
MADD(cj,A0,B0,C0,B0);
|
||||
MADD(cj,A0,B_0,C0,B_0);
|
||||
MADD(cj,A0,B1,C1,B1);
|
||||
MADD(cj,A0,B2,C2,B2);
|
||||
MADD(cj,A0,B3,C3,B3);
|
||||
@@ -1027,14 +1026,14 @@ EIGEN_ASM_COMMENT("mybegin4");
|
||||
for(Index k=0; k<depth; k++)
|
||||
{
|
||||
LhsPacket A0, A1;
|
||||
RhsPacket B0;
|
||||
RhsPacket B_0;
|
||||
RhsPacket T0;
|
||||
|
||||
traits.loadLhs(&blA[0*LhsProgress], A0);
|
||||
traits.loadLhs(&blA[1*LhsProgress], A1);
|
||||
traits.loadRhs(&blB[0*RhsProgress], B0);
|
||||
traits.madd(A0,B0,C0,T0);
|
||||
traits.madd(A1,B0,C4,B0);
|
||||
traits.loadRhs(&blB[0*RhsProgress], B_0);
|
||||
traits.madd(A0,B_0,C0,T0);
|
||||
traits.madd(A1,B_0,C4,B_0);
|
||||
|
||||
blB += RhsProgress;
|
||||
blA += 2*LhsProgress;
|
||||
@@ -1066,10 +1065,10 @@ EIGEN_ASM_COMMENT("mybegin4");
|
||||
for(Index k=0; k<depth; k++)
|
||||
{
|
||||
LhsPacket A0;
|
||||
RhsPacket B0;
|
||||
RhsPacket B_0;
|
||||
traits.loadLhs(blA, A0);
|
||||
traits.loadRhs(blB, B0);
|
||||
traits.madd(A0, B0, C0, B0);
|
||||
traits.loadRhs(blB, B_0);
|
||||
traits.madd(A0, B_0, C0, B_0);
|
||||
blB += RhsProgress;
|
||||
blA += LhsProgress;
|
||||
}
|
||||
@@ -1091,8 +1090,8 @@ EIGEN_ASM_COMMENT("mybegin4");
|
||||
for(Index k=0; k<depth; k++)
|
||||
{
|
||||
LhsScalar A0 = blA[k];
|
||||
RhsScalar B0 = blB[k];
|
||||
MADD(cj, A0, B0, C0, B0);
|
||||
RhsScalar B_0 = blB[k];
|
||||
MADD(cj, A0, B_0, C0, B_0);
|
||||
}
|
||||
res[(j2+0)*resStride + i] += alpha*C0;
|
||||
}
|
||||
@@ -1103,7 +1102,7 @@ EIGEN_ASM_COMMENT("mybegin4");
|
||||
#undef CJMADD
|
||||
|
||||
// pack a block of the lhs
|
||||
// The travesal is as follow (mr==4):
|
||||
// The traversal is as follow (mr==4):
|
||||
// 0 4 8 12 ...
|
||||
// 1 5 9 13 ...
|
||||
// 2 6 10 14 ...
|
||||
@@ -1119,11 +1118,15 @@ EIGEN_ASM_COMMENT("mybegin4");
|
||||
template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder, bool Conjugate, bool PanelMode>
|
||||
struct gemm_pack_lhs
|
||||
{
|
||||
void operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, Index lhsStride, Index depth, Index rows,
|
||||
EIGEN_DONT_INLINE void operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, Index lhsStride, Index depth, Index rows,
|
||||
Index stride=0, Index offset=0)
|
||||
{
|
||||
// enum { PacketSize = packet_traits<Scalar>::size };
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
enum { PacketSize = packet_traits<Scalar>::size };
|
||||
|
||||
EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK LHS");
|
||||
eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
|
||||
eigen_assert( (StorageOrder==RowMajor) || ((Pack1%PacketSize)==0 && Pack1<=4*PacketSize) );
|
||||
conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
|
||||
const_blas_data_mapper<Scalar, Index, StorageOrder> lhs(_lhs,lhsStride);
|
||||
Index count = 0;
|
||||
@@ -1131,9 +1134,44 @@ struct gemm_pack_lhs
|
||||
for(Index i=0; i<peeled_mc; i+=Pack1)
|
||||
{
|
||||
if(PanelMode) count += Pack1 * offset;
|
||||
for(Index k=0; k<depth; k++)
|
||||
for(Index w=0; w<Pack1; w++)
|
||||
blockA[count++] = cj(lhs(i+w, k));
|
||||
|
||||
if(StorageOrder==ColMajor)
|
||||
{
|
||||
for(Index k=0; k<depth; k++)
|
||||
{
|
||||
Packet A, B, C, D;
|
||||
if(Pack1>=1*PacketSize) A = ploadu<Packet>(&lhs(i+0*PacketSize, k));
|
||||
if(Pack1>=2*PacketSize) B = ploadu<Packet>(&lhs(i+1*PacketSize, k));
|
||||
if(Pack1>=3*PacketSize) C = ploadu<Packet>(&lhs(i+2*PacketSize, k));
|
||||
if(Pack1>=4*PacketSize) D = ploadu<Packet>(&lhs(i+3*PacketSize, k));
|
||||
if(Pack1>=1*PacketSize) { pstore(blockA+count, cj.pconj(A)); count+=PacketSize; }
|
||||
if(Pack1>=2*PacketSize) { pstore(blockA+count, cj.pconj(B)); count+=PacketSize; }
|
||||
if(Pack1>=3*PacketSize) { pstore(blockA+count, cj.pconj(C)); count+=PacketSize; }
|
||||
if(Pack1>=4*PacketSize) { pstore(blockA+count, cj.pconj(D)); count+=PacketSize; }
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for(Index k=0; k<depth; k++)
|
||||
{
|
||||
// TODO add a vectorized transpose here
|
||||
Index w=0;
|
||||
for(; w<Pack1-3; w+=4)
|
||||
{
|
||||
Scalar a(cj(lhs(i+w+0, k))),
|
||||
b(cj(lhs(i+w+1, k))),
|
||||
c(cj(lhs(i+w+2, k))),
|
||||
d(cj(lhs(i+w+3, k)));
|
||||
blockA[count++] = a;
|
||||
blockA[count++] = b;
|
||||
blockA[count++] = c;
|
||||
blockA[count++] = d;
|
||||
}
|
||||
if(Pack1%4)
|
||||
for(;w<Pack1;++w)
|
||||
blockA[count++] = cj(lhs(i+w, k));
|
||||
}
|
||||
}
|
||||
if(PanelMode) count += Pack1 * (stride-offset-depth);
|
||||
}
|
||||
if(rows-peeled_mc>=Pack2)
|
||||
@@ -1167,9 +1205,10 @@ struct gemm_pack_rhs<Scalar, Index, nr, ColMajor, Conjugate, PanelMode>
|
||||
{
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
enum { PacketSize = packet_traits<Scalar>::size };
|
||||
void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols,
|
||||
EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols,
|
||||
Index stride=0, Index offset=0)
|
||||
{
|
||||
EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS COLMAJOR");
|
||||
eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
|
||||
conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
|
||||
Index packet_cols = (cols/nr) * nr;
|
||||
@@ -1214,9 +1253,10 @@ template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode
|
||||
struct gemm_pack_rhs<Scalar, Index, nr, RowMajor, Conjugate, PanelMode>
|
||||
{
|
||||
enum { PacketSize = packet_traits<Scalar>::size };
|
||||
void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols,
|
||||
EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols,
|
||||
Index stride=0, Index offset=0)
|
||||
{
|
||||
EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS ROWMAJOR");
|
||||
eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
|
||||
conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
|
||||
Index packet_cols = (cols/nr) * nr;
|
||||
|
||||
@@ -412,8 +412,8 @@ class GeneralProduct<Lhs, Rhs, GemmProduct>
|
||||
{
|
||||
eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
|
||||
|
||||
const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs);
|
||||
const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs);
|
||||
typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
|
||||
typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
|
||||
|
||||
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
|
||||
* RhsBlasTraits::extractScalarFactor(m_rhs);
|
||||
|
||||
@@ -42,14 +42,14 @@ struct tribb_kernel;
|
||||
template <typename Index,
|
||||
typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
|
||||
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
|
||||
int ResStorageOrder, int UpLo>
|
||||
int ResStorageOrder, int UpLo, int Version = Specialized>
|
||||
struct general_matrix_matrix_triangular_product;
|
||||
|
||||
// as usual if the result is row major => we transpose the product
|
||||
template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
|
||||
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo>
|
||||
struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor,UpLo>
|
||||
{
|
||||
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo, int Version>
|
||||
struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor,UpLo,Version>
|
||||
{
|
||||
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
|
||||
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride,
|
||||
const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, ResScalar alpha)
|
||||
@@ -63,8 +63,8 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
|
||||
};
|
||||
|
||||
template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
|
||||
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo>
|
||||
struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo>
|
||||
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo, int Version>
|
||||
struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo,Version>
|
||||
{
|
||||
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
|
||||
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride,
|
||||
@@ -201,13 +201,13 @@ TriangularView<MatrixType,UpLo>& TriangularView<MatrixType,UpLo>::assignProduct(
|
||||
typedef internal::blas_traits<Lhs> LhsBlasTraits;
|
||||
typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs;
|
||||
typedef typename internal::remove_all<ActualLhs>::type _ActualLhs;
|
||||
const ActualLhs actualLhs = LhsBlasTraits::extract(prod.lhs());
|
||||
typename internal::add_const_on_value_type<ActualLhs>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
|
||||
|
||||
typedef typename internal::remove_all<typename ProductDerived::RhsNested>::type Rhs;
|
||||
typedef internal::blas_traits<Rhs> RhsBlasTraits;
|
||||
typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs;
|
||||
typedef typename internal::remove_all<ActualRhs>::type _ActualRhs;
|
||||
const ActualRhs actualRhs = RhsBlasTraits::extract(prod.rhs());
|
||||
typename internal::add_const_on_value_type<ActualRhs>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
|
||||
|
||||
typename ProductDerived::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());
|
||||
|
||||
|
||||
142
Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h
Normal file
142
Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h
Normal file
@@ -0,0 +1,142 @@
|
||||
/*
|
||||
Copyright (c) 2011, Intel Corporation. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software without
|
||||
specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
********************************************************************************
|
||||
* Content : Eigen bindings to Intel(R) MKL
|
||||
* Level 3 BLAS SYRK/HERK implementation.
|
||||
********************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H
|
||||
#define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H
|
||||
|
||||
namespace internal {
|
||||
|
||||
template <typename Index, typename Scalar, int AStorageOrder, bool ConjugateA, int ResStorageOrder, int UpLo>
|
||||
struct general_matrix_matrix_rankupdate :
|
||||
general_matrix_matrix_triangular_product<
|
||||
Index,Scalar,AStorageOrder,ConjugateA,Scalar,AStorageOrder,ConjugateA,ResStorageOrder,UpLo,BuiltIn> {};
|
||||
|
||||
|
||||
// try to go to BLAS specialization
|
||||
#define EIGEN_MKL_RANKUPDATE_SPECIALIZE(Scalar) \
|
||||
template <typename Index, int LhsStorageOrder, bool ConjugateLhs, \
|
||||
int RhsStorageOrder, bool ConjugateRhs, int UpLo> \
|
||||
struct general_matrix_matrix_triangular_product<Index,Scalar,LhsStorageOrder,ConjugateLhs, \
|
||||
Scalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo,Specialized> { \
|
||||
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const Scalar* lhs, Index lhsStride, \
|
||||
const Scalar* rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha) \
|
||||
{ \
|
||||
if (lhs==rhs) { \
|
||||
general_matrix_matrix_rankupdate<Index,Scalar,LhsStorageOrder,ConjugateLhs,ColMajor,UpLo> \
|
||||
::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha); \
|
||||
} else { \
|
||||
general_matrix_matrix_triangular_product<Index, \
|
||||
Scalar, LhsStorageOrder, ConjugateLhs, \
|
||||
Scalar, RhsStorageOrder, ConjugateRhs, \
|
||||
ColMajor, UpLo, BuiltIn> \
|
||||
::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha); \
|
||||
} \
|
||||
} \
|
||||
};
|
||||
|
||||
EIGEN_MKL_RANKUPDATE_SPECIALIZE(double)
|
||||
//EIGEN_MKL_RANKUPDATE_SPECIALIZE(dcomplex)
|
||||
EIGEN_MKL_RANKUPDATE_SPECIALIZE(float)
|
||||
//EIGEN_MKL_RANKUPDATE_SPECIALIZE(scomplex)
|
||||
|
||||
// SYRK for float/double
|
||||
#define EIGEN_MKL_RANKUPDATE_R(EIGTYPE, MKLTYPE, MKLFUNC) \
|
||||
template <typename Index, int AStorageOrder, bool ConjugateA, int UpLo> \
|
||||
struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,ColMajor,UpLo> { \
|
||||
enum { \
|
||||
IsLower = (UpLo&Lower) == Lower, \
|
||||
LowUp = IsLower ? Lower : Upper, \
|
||||
conjA = ((AStorageOrder==ColMajor) && ConjugateA) ? 1 : 0 \
|
||||
}; \
|
||||
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const EIGTYPE* lhs, Index lhsStride, \
|
||||
const EIGTYPE* rhs, Index rhsStride, EIGTYPE* res, Index resStride, EIGTYPE alpha) \
|
||||
{ \
|
||||
/* typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs;*/ \
|
||||
\
|
||||
MKL_INT lda=lhsStride, ldc=resStride, n=size, k=depth; \
|
||||
char uplo=(IsLower) ? 'L' : 'U', trans=(AStorageOrder==RowMajor) ? 'T':'N'; \
|
||||
MKLTYPE alpha_, beta_; \
|
||||
\
|
||||
/* Set alpha_ & beta_ */ \
|
||||
assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
|
||||
assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1)); \
|
||||
MKLFUNC(&uplo, &trans, &n, &k, &alpha_, lhs, &lda, &beta_, res, &ldc); \
|
||||
} \
|
||||
};
|
||||
|
||||
// HERK for complex data
|
||||
#define EIGEN_MKL_RANKUPDATE_C(EIGTYPE, MKLTYPE, RTYPE, MKLFUNC) \
|
||||
template <typename Index, int AStorageOrder, bool ConjugateA, int UpLo> \
|
||||
struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,ColMajor,UpLo> { \
|
||||
enum { \
|
||||
IsLower = (UpLo&Lower) == Lower, \
|
||||
LowUp = IsLower ? Lower : Upper, \
|
||||
conjA = (((AStorageOrder==ColMajor) && ConjugateA) || ((AStorageOrder==RowMajor) && !ConjugateA)) ? 1 : 0 \
|
||||
}; \
|
||||
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const EIGTYPE* lhs, Index lhsStride, \
|
||||
const EIGTYPE* rhs, Index rhsStride, EIGTYPE* res, Index resStride, EIGTYPE alpha) \
|
||||
{ \
|
||||
typedef Matrix<EIGTYPE, Dynamic, Dynamic, AStorageOrder> MatrixType; \
|
||||
\
|
||||
MKL_INT lda=lhsStride, ldc=resStride, n=size, k=depth; \
|
||||
char uplo=(IsLower) ? 'L' : 'U', trans=(AStorageOrder==RowMajor) ? 'C':'N'; \
|
||||
RTYPE alpha_, beta_; \
|
||||
const EIGTYPE* a_ptr; \
|
||||
\
|
||||
/* Set alpha_ & beta_ */ \
|
||||
/* assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); */\
|
||||
/* assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1));*/ \
|
||||
alpha_ = alpha.real(); \
|
||||
beta_ = 1.0; \
|
||||
/* Copy with conjugation in some cases*/ \
|
||||
MatrixType a; \
|
||||
if (conjA) { \
|
||||
Map<const MatrixType, 0, OuterStride<> > mapA(lhs,n,k,OuterStride<>(lhsStride)); \
|
||||
a = mapA.conjugate(); \
|
||||
lda = a.outerStride(); \
|
||||
a_ptr = a.data(); \
|
||||
} else a_ptr=lhs; \
|
||||
MKLFUNC(&uplo, &trans, &n, &k, &alpha_, (MKLTYPE*)a_ptr, &lda, &beta_, (MKLTYPE*)res, &ldc); \
|
||||
} \
|
||||
};
|
||||
|
||||
|
||||
EIGEN_MKL_RANKUPDATE_R(double, double, dsyrk)
|
||||
EIGEN_MKL_RANKUPDATE_R(float, float, ssyrk)
|
||||
|
||||
//EIGEN_MKL_RANKUPDATE_C(dcomplex, MKL_Complex16, double, zherk)
|
||||
//EIGEN_MKL_RANKUPDATE_C(scomplex, MKL_Complex8, double, cherk)
|
||||
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
#endif // EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H
|
||||
114
Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h
Normal file
114
Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h
Normal file
@@ -0,0 +1,114 @@
|
||||
/*
|
||||
Copyright (c) 2011, Intel Corporation. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software without
|
||||
specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
********************************************************************************
|
||||
* Content : Eigen bindings to Intel(R) MKL
|
||||
* General matrix-matrix product functionality based on ?GEMM.
|
||||
********************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef EIGEN_GENERAL_MATRIX_MATRIX_MKL_H
|
||||
#define EIGEN_GENERAL_MATRIX_MATRIX_MKL_H
|
||||
|
||||
namespace internal {
|
||||
|
||||
/**********************************************************************
|
||||
* This file implements general matrix-matrix multiplication using BLAS
|
||||
* gemm function via partial specialization of
|
||||
* general_matrix_matrix_product::run(..) method for float, double,
|
||||
* std::complex<float> and std::complex<double> types
|
||||
**********************************************************************/
|
||||
|
||||
// gemm specialization
|
||||
|
||||
#define GEMM_SPECIALIZATION(EIGTYPE, EIGPREFIX, MKLTYPE, MKLPREFIX) \
|
||||
template< \
|
||||
typename Index, \
|
||||
int LhsStorageOrder, bool ConjugateLhs, \
|
||||
int RhsStorageOrder, bool ConjugateRhs> \
|
||||
struct general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor> \
|
||||
{ \
|
||||
static void run(Index rows, Index cols, Index depth, \
|
||||
const EIGTYPE* _lhs, Index lhsStride, \
|
||||
const EIGTYPE* _rhs, Index rhsStride, \
|
||||
EIGTYPE* res, Index resStride, \
|
||||
EIGTYPE alpha, \
|
||||
level3_blocking<EIGTYPE, EIGTYPE>& blocking, \
|
||||
GemmParallelInfo<Index>* info = 0) \
|
||||
{ \
|
||||
using std::conj; \
|
||||
\
|
||||
char transa, transb; \
|
||||
MKL_INT m, n, k, lda, ldb, ldc; \
|
||||
const EIGTYPE *a, *b; \
|
||||
MKLTYPE alpha_, beta_; \
|
||||
MatrixX##EIGPREFIX a_tmp, b_tmp; \
|
||||
EIGTYPE myone(1);\
|
||||
\
|
||||
/* Set transpose options */ \
|
||||
transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \
|
||||
transb = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \
|
||||
\
|
||||
/* Set m, n, k */ \
|
||||
m = (MKL_INT)rows; \
|
||||
n = (MKL_INT)cols; \
|
||||
k = (MKL_INT)depth; \
|
||||
\
|
||||
/* Set alpha_ & beta_ */ \
|
||||
assign_scalar_eig2mkl(alpha_, alpha); \
|
||||
assign_scalar_eig2mkl(beta_, myone); \
|
||||
\
|
||||
/* Set lda, ldb, ldc */ \
|
||||
lda = (MKL_INT)lhsStride; \
|
||||
ldb = (MKL_INT)rhsStride; \
|
||||
ldc = (MKL_INT)resStride; \
|
||||
\
|
||||
/* Set a, b, c */ \
|
||||
if ((LhsStorageOrder==ColMajor) && (ConjugateLhs)) { \
|
||||
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,m,k,OuterStride<>(lhsStride)); \
|
||||
a_tmp = lhs.conjugate(); \
|
||||
a = a_tmp.data(); \
|
||||
lda = a_tmp.outerStride(); \
|
||||
} else a = _lhs; \
|
||||
\
|
||||
if ((RhsStorageOrder==ColMajor) && (ConjugateRhs)) { \
|
||||
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,k,n,OuterStride<>(rhsStride)); \
|
||||
b_tmp = rhs.conjugate(); \
|
||||
b = b_tmp.data(); \
|
||||
ldb = b_tmp.outerStride(); \
|
||||
} else b = _rhs; \
|
||||
\
|
||||
MKLPREFIX##gemm(&transa, &transb, &m, &n, &k, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
|
||||
}};
|
||||
|
||||
GEMM_SPECIALIZATION(double, d, double, d)
|
||||
GEMM_SPECIALIZATION(float, f, float, s)
|
||||
GEMM_SPECIALIZATION(dcomplex, cd, MKL_Complex16, z)
|
||||
GEMM_SPECIALIZATION(scomplex, cf, MKL_Complex8, c)
|
||||
|
||||
} //end of namespase
|
||||
|
||||
#endif // EIGEN_GENERAL_MATRIX_MATRIX_MKL_H
|
||||
@@ -40,8 +40,8 @@ namespace internal {
|
||||
* |cplx |real |cplx | invalid, the caller has to do tmp: = A * B; C += alpha*tmp
|
||||
* |cplx |real |real | optimal case, vectorization possible via real-cplx mul
|
||||
*/
|
||||
template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
|
||||
struct general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjugateLhs,RhsScalar,ConjugateRhs>
|
||||
template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version>
|
||||
struct general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjugateLhs,RhsScalar,ConjugateRhs,Version>
|
||||
{
|
||||
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
|
||||
|
||||
@@ -99,7 +99,7 @@ EIGEN_DONT_INLINE static void run(
|
||||
|
||||
// How many coeffs of the result do we have to skip to be aligned.
|
||||
// Here we assume data are at least aligned on the base scalar type.
|
||||
Index alignedStart = first_aligned(res,size);
|
||||
Index alignedStart = internal::first_aligned(res,size);
|
||||
Index alignedSize = ResPacketSize>1 ? alignedStart + ((size-alignedStart) & ~ResPacketAlignedMask) : 0;
|
||||
const Index peeledSize = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart;
|
||||
|
||||
@@ -109,7 +109,7 @@ EIGEN_DONT_INLINE static void run(
|
||||
: FirstAligned;
|
||||
|
||||
// we cannot assume the first element is aligned because of sub-matrices
|
||||
const Index lhsAlignmentOffset = first_aligned(lhs,size);
|
||||
const Index lhsAlignmentOffset = internal::first_aligned(lhs,size);
|
||||
|
||||
// find how many columns do we have to skip to be aligned with the result (if possible)
|
||||
Index skipColumns = 0;
|
||||
@@ -296,8 +296,8 @@ EIGEN_DONT_INLINE static void run(
|
||||
* - alpha is always a complex (or converted to a complex)
|
||||
* - no vectorization
|
||||
*/
|
||||
template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
|
||||
struct general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjugateLhs,RhsScalar,ConjugateRhs>
|
||||
template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version>
|
||||
struct general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjugateLhs,RhsScalar,ConjugateRhs,Version>
|
||||
{
|
||||
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
|
||||
|
||||
@@ -351,7 +351,7 @@ EIGEN_DONT_INLINE static void run(
|
||||
// How many coeffs of the result do we have to skip to be aligned.
|
||||
// Here we assume data are at least aligned on the base scalar type
|
||||
// if that's not the case then vectorization is discarded, see below.
|
||||
Index alignedStart = first_aligned(rhs, depth);
|
||||
Index alignedStart = internal::first_aligned(rhs, depth);
|
||||
Index alignedSize = RhsPacketSize>1 ? alignedStart + ((depth-alignedStart) & ~RhsPacketAlignedMask) : 0;
|
||||
const Index peeledSize = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart;
|
||||
|
||||
@@ -361,7 +361,7 @@ EIGEN_DONT_INLINE static void run(
|
||||
: FirstAligned;
|
||||
|
||||
// we cannot assume the first element is aligned because of sub-matrices
|
||||
const Index lhsAlignmentOffset = first_aligned(lhs,depth);
|
||||
const Index lhsAlignmentOffset = internal::first_aligned(lhs,depth);
|
||||
|
||||
// find how many rows do we have to skip to be aligned with rhs (if possible)
|
||||
Index skipRows = 0;
|
||||
|
||||
127
Eigen/src/Core/products/GeneralMatrixVector_MKL.h
Normal file
127
Eigen/src/Core/products/GeneralMatrixVector_MKL.h
Normal file
@@ -0,0 +1,127 @@
|
||||
/*
|
||||
Copyright (c) 2011, Intel Corporation. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software without
|
||||
specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
********************************************************************************
|
||||
* Content : Eigen bindings to Intel(R) MKL
|
||||
* General matrix-vector product functionality based on ?GEMV.
|
||||
********************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef EIGEN_GENERAL_MATRIX_VECTOR_MKL_H
|
||||
#define EIGEN_GENERAL_MATRIX_VECTOR_MKL_H
|
||||
|
||||
namespace internal {
|
||||
|
||||
/**********************************************************************
|
||||
* This file implements general matrix-vector multiplication using BLAS
|
||||
* gemv function via partial specialization of
|
||||
* general_matrix_vector_product::run(..) method for float, double,
|
||||
* std::complex<float> and std::complex<double> types
|
||||
**********************************************************************/
|
||||
|
||||
// gemv specialization
|
||||
|
||||
template<typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
|
||||
struct general_matrix_vector_product_gemv :
|
||||
general_matrix_vector_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,ConjugateRhs,BuiltIn> {};
|
||||
|
||||
#define EIGEN_MKL_GEMV_SPECIALIZE(Scalar) \
|
||||
template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
|
||||
struct general_matrix_vector_product<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs,Specialized> { \
|
||||
static EIGEN_DONT_INLINE void run( \
|
||||
Index rows, Index cols, \
|
||||
const Scalar* lhs, Index lhsStride, \
|
||||
const Scalar* rhs, Index rhsIncr, \
|
||||
Scalar* res, Index resIncr, Scalar alpha) \
|
||||
{ \
|
||||
if (ConjugateLhs) { \
|
||||
general_matrix_vector_product<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs,BuiltIn>::run( \
|
||||
rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \
|
||||
} else { \
|
||||
general_matrix_vector_product_gemv<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs>::run( \
|
||||
rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \
|
||||
} \
|
||||
} \
|
||||
}; \
|
||||
template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
|
||||
struct general_matrix_vector_product<Index,Scalar,RowMajor,ConjugateLhs,Scalar,ConjugateRhs,Specialized> { \
|
||||
static EIGEN_DONT_INLINE void run( \
|
||||
Index rows, Index cols, \
|
||||
const Scalar* lhs, Index lhsStride, \
|
||||
const Scalar* rhs, Index rhsIncr, \
|
||||
Scalar* res, Index resIncr, Scalar alpha) \
|
||||
{ \
|
||||
general_matrix_vector_product_gemv<Index,Scalar,RowMajor,ConjugateLhs,Scalar,ConjugateRhs>::run( \
|
||||
rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \
|
||||
} \
|
||||
}; \
|
||||
|
||||
EIGEN_MKL_GEMV_SPECIALIZE(double)
|
||||
EIGEN_MKL_GEMV_SPECIALIZE(float)
|
||||
EIGEN_MKL_GEMV_SPECIALIZE(dcomplex)
|
||||
EIGEN_MKL_GEMV_SPECIALIZE(scomplex)
|
||||
|
||||
#define EIGEN_MKL_GEMV_SPECIALIZATION(EIGTYPE,MKLTYPE,MKLPREFIX) \
|
||||
template<typename Index, int LhsStorageOrder, bool ConjugateLhs, bool ConjugateRhs> \
|
||||
struct general_matrix_vector_product_gemv<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,ConjugateRhs> \
|
||||
{ \
|
||||
typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> GEMVVector;\
|
||||
\
|
||||
static EIGEN_DONT_INLINE void run( \
|
||||
Index rows, Index cols, \
|
||||
const EIGTYPE* lhs, Index lhsStride, \
|
||||
const EIGTYPE* rhs, Index rhsIncr, \
|
||||
EIGTYPE* res, Index resIncr, EIGTYPE alpha) \
|
||||
{ \
|
||||
MKL_INT m=rows, n=cols, lda=lhsStride, incx=rhsIncr, incy=resIncr; \
|
||||
MKLTYPE alpha_, beta_; \
|
||||
const EIGTYPE *x_ptr, myone(1); \
|
||||
char trans=(LhsStorageOrder==ColMajor) ? 'N' : (ConjugateLhs) ? 'C' : 'T'; \
|
||||
if (LhsStorageOrder==RowMajor) { \
|
||||
m=cols; \
|
||||
n=rows; \
|
||||
}\
|
||||
assign_scalar_eig2mkl(alpha_, alpha); \
|
||||
assign_scalar_eig2mkl(beta_, myone); \
|
||||
GEMVVector x_tmp; \
|
||||
if (ConjugateRhs) { \
|
||||
Map<const GEMVVector, 0, InnerStride<> > map_x(rhs,cols,1,InnerStride<>(incx)); \
|
||||
x_tmp=map_x.conjugate(); \
|
||||
x_ptr=x_tmp.data(); \
|
||||
incx=1; \
|
||||
} else x_ptr=rhs; \
|
||||
MKLPREFIX##gemv(&trans, &m, &n, &alpha_, (const MKLTYPE*)lhs, &lda, (const MKLTYPE*)x_ptr, &incx, &beta_, (MKLTYPE*)res, &incy); \
|
||||
}\
|
||||
};
|
||||
|
||||
EIGEN_MKL_GEMV_SPECIALIZATION(double, double, d)
|
||||
EIGEN_MKL_GEMV_SPECIALIZATION(float, float, s)
|
||||
EIGEN_MKL_GEMV_SPECIALIZATION(dcomplex, MKL_Complex16, z)
|
||||
EIGEN_MKL_GEMV_SPECIALIZATION(scomplex, MKL_Complex8, c)
|
||||
|
||||
} //end of namespase
|
||||
|
||||
#endif // EIGEN_GENERAL_MATRIX_VECTOR_MKL_H
|
||||
@@ -85,7 +85,9 @@ template<typename Index> struct GemmParallelInfo
|
||||
template<bool Condition, typename Functor, typename Index>
|
||||
void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpose)
|
||||
{
|
||||
#ifndef EIGEN_HAS_OPENMP
|
||||
// TODO when EIGEN_USE_BLAS is defined,
|
||||
// we should still enable OMP for other scalar types
|
||||
#if !(defined (EIGEN_HAS_OPENMP)) || defined (EIGEN_USE_BLAS)
|
||||
// FIXME the transpose variable is only needed to properly split
|
||||
// the matrix product when multithreading is enabled. This is a temporary
|
||||
// fix to support row-major destination matrices. This whole
|
||||
|
||||
@@ -400,8 +400,8 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>
|
||||
{
|
||||
eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
|
||||
|
||||
const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs);
|
||||
const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs);
|
||||
typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
|
||||
typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
|
||||
|
||||
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
|
||||
* RhsBlasTraits::extractScalarFactor(m_rhs);
|
||||
|
||||
291
Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h
Normal file
291
Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h
Normal file
@@ -0,0 +1,291 @@
|
||||
/*
|
||||
Copyright (c) 2011, Intel Corporation. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software without
|
||||
specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
********************************************************************************
|
||||
* Content : Eigen bindings to Intel(R) MKL
|
||||
* Self adjoint matrix * matrix product functionality based on ?SYMM/?HEMM.
|
||||
********************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H
|
||||
#define EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H
|
||||
|
||||
namespace internal {
|
||||
|
||||
|
||||
/* Optimized selfadjoint matrix * matrix (?SYMM/?HEMM) product */
|
||||
|
||||
#define EIGEN_MKL_SYMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
|
||||
template <typename Index, \
|
||||
int LhsStorageOrder, bool ConjugateLhs, \
|
||||
int RhsStorageOrder, bool ConjugateRhs> \
|
||||
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \
|
||||
{\
|
||||
\
|
||||
static EIGEN_DONT_INLINE void run( \
|
||||
Index rows, Index cols, \
|
||||
const EIGTYPE* _lhs, Index lhsStride, \
|
||||
const EIGTYPE* _rhs, Index rhsStride, \
|
||||
EIGTYPE* res, Index resStride, \
|
||||
EIGTYPE alpha) \
|
||||
{ \
|
||||
char side='L', uplo='L'; \
|
||||
MKL_INT m, n, lda, ldb, ldc; \
|
||||
const EIGTYPE *a, *b; \
|
||||
MKLTYPE alpha_, beta_; \
|
||||
MatrixX##EIGPREFIX b_tmp; \
|
||||
EIGTYPE myone(1);\
|
||||
\
|
||||
/* Set transpose options */ \
|
||||
/* Set m, n, k */ \
|
||||
m = (MKL_INT)rows; \
|
||||
n = (MKL_INT)cols; \
|
||||
\
|
||||
/* Set alpha_ & beta_ */ \
|
||||
assign_scalar_eig2mkl(alpha_, alpha); \
|
||||
assign_scalar_eig2mkl(beta_, myone); \
|
||||
\
|
||||
/* Set lda, ldb, ldc */ \
|
||||
lda = (MKL_INT)lhsStride; \
|
||||
ldb = (MKL_INT)rhsStride; \
|
||||
ldc = (MKL_INT)resStride; \
|
||||
\
|
||||
/* Set a, b, c */ \
|
||||
if (LhsStorageOrder==RowMajor) uplo='U'; \
|
||||
a = _lhs; \
|
||||
\
|
||||
if (RhsStorageOrder==RowMajor) { \
|
||||
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \
|
||||
b_tmp = rhs.adjoint(); \
|
||||
b = b_tmp.data(); \
|
||||
ldb = b_tmp.outerStride(); \
|
||||
} else b = _rhs; \
|
||||
\
|
||||
MKLPREFIX##symm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
|
||||
\
|
||||
} \
|
||||
};
|
||||
|
||||
|
||||
#define EIGEN_MKL_HEMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
|
||||
template <typename Index, \
|
||||
int LhsStorageOrder, bool ConjugateLhs, \
|
||||
int RhsStorageOrder, bool ConjugateRhs> \
|
||||
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \
|
||||
{\
|
||||
static EIGEN_DONT_INLINE void run( \
|
||||
Index rows, Index cols, \
|
||||
const EIGTYPE* _lhs, Index lhsStride, \
|
||||
const EIGTYPE* _rhs, Index rhsStride, \
|
||||
EIGTYPE* res, Index resStride, \
|
||||
EIGTYPE alpha) \
|
||||
{ \
|
||||
char side='L', uplo='L'; \
|
||||
MKL_INT m, n, lda, ldb, ldc; \
|
||||
const EIGTYPE *a, *b; \
|
||||
MKLTYPE alpha_, beta_; \
|
||||
MatrixX##EIGPREFIX b_tmp; \
|
||||
Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> a_tmp; \
|
||||
EIGTYPE myone(1); \
|
||||
\
|
||||
/* Set transpose options */ \
|
||||
/* Set m, n, k */ \
|
||||
m = (MKL_INT)rows; \
|
||||
n = (MKL_INT)cols; \
|
||||
\
|
||||
/* Set alpha_ & beta_ */ \
|
||||
assign_scalar_eig2mkl(alpha_, alpha); \
|
||||
assign_scalar_eig2mkl(beta_, myone); \
|
||||
\
|
||||
/* Set lda, ldb, ldc */ \
|
||||
lda = (MKL_INT)lhsStride; \
|
||||
ldb = (MKL_INT)rhsStride; \
|
||||
ldc = (MKL_INT)resStride; \
|
||||
\
|
||||
/* Set a, b, c */ \
|
||||
if (((LhsStorageOrder==ColMajor) && ConjugateLhs) || ((LhsStorageOrder==RowMajor) && (!ConjugateLhs))) { \
|
||||
Map<const Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder>, 0, OuterStride<> > lhs(_lhs,m,m,OuterStride<>(lhsStride)); \
|
||||
a_tmp = lhs.conjugate(); \
|
||||
a = a_tmp.data(); \
|
||||
lda = a_tmp.outerStride(); \
|
||||
} else a = _lhs; \
|
||||
if (LhsStorageOrder==RowMajor) uplo='U'; \
|
||||
\
|
||||
if (RhsStorageOrder==ColMajor && (!ConjugateRhs)) { \
|
||||
b = _rhs; } \
|
||||
else { \
|
||||
if (RhsStorageOrder==ColMajor && ConjugateRhs) { \
|
||||
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,m,n,OuterStride<>(rhsStride)); \
|
||||
b_tmp = rhs.conjugate(); \
|
||||
} else \
|
||||
if (ConjugateRhs) { \
|
||||
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \
|
||||
b_tmp = rhs.adjoint(); \
|
||||
} else { \
|
||||
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \
|
||||
b_tmp = rhs.transpose(); \
|
||||
} \
|
||||
b = b_tmp.data(); \
|
||||
ldb = b_tmp.outerStride(); \
|
||||
} \
|
||||
\
|
||||
MKLPREFIX##hemm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
|
||||
\
|
||||
} \
|
||||
};
|
||||
|
||||
EIGEN_MKL_SYMM_L(double, double, d, d)
|
||||
EIGEN_MKL_SYMM_L(float, float, f, s)
|
||||
EIGEN_MKL_HEMM_L(dcomplex, MKL_Complex16, cd, z)
|
||||
EIGEN_MKL_HEMM_L(scomplex, MKL_Complex8, cf, c)
|
||||
|
||||
|
||||
/* Optimized matrix * selfadjoint matrix (?SYMM/?HEMM) product */
|
||||
|
||||
#define EIGEN_MKL_SYMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
|
||||
template <typename Index, \
|
||||
int LhsStorageOrder, bool ConjugateLhs, \
|
||||
int RhsStorageOrder, bool ConjugateRhs> \
|
||||
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \
|
||||
{\
|
||||
\
|
||||
static EIGEN_DONT_INLINE void run( \
|
||||
Index rows, Index cols, \
|
||||
const EIGTYPE* _lhs, Index lhsStride, \
|
||||
const EIGTYPE* _rhs, Index rhsStride, \
|
||||
EIGTYPE* res, Index resStride, \
|
||||
EIGTYPE alpha) \
|
||||
{ \
|
||||
char side='R', uplo='L'; \
|
||||
MKL_INT m, n, lda, ldb, ldc; \
|
||||
const EIGTYPE *a, *b; \
|
||||
MKLTYPE alpha_, beta_; \
|
||||
MatrixX##EIGPREFIX b_tmp; \
|
||||
EIGTYPE myone(1);\
|
||||
\
|
||||
/* Set m, n, k */ \
|
||||
m = (MKL_INT)rows; \
|
||||
n = (MKL_INT)cols; \
|
||||
\
|
||||
/* Set alpha_ & beta_ */ \
|
||||
assign_scalar_eig2mkl(alpha_, alpha); \
|
||||
assign_scalar_eig2mkl(beta_, myone); \
|
||||
\
|
||||
/* Set lda, ldb, ldc */ \
|
||||
lda = (MKL_INT)rhsStride; \
|
||||
ldb = (MKL_INT)lhsStride; \
|
||||
ldc = (MKL_INT)resStride; \
|
||||
\
|
||||
/* Set a, b, c */ \
|
||||
if (RhsStorageOrder==RowMajor) uplo='U'; \
|
||||
a = _rhs; \
|
||||
\
|
||||
if (LhsStorageOrder==RowMajor) { \
|
||||
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(rhsStride)); \
|
||||
b_tmp = lhs.adjoint(); \
|
||||
b = b_tmp.data(); \
|
||||
ldb = b_tmp.outerStride(); \
|
||||
} else b = _lhs; \
|
||||
\
|
||||
MKLPREFIX##symm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
|
||||
\
|
||||
} \
|
||||
};
|
||||
|
||||
|
||||
#define EIGEN_MKL_HEMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
|
||||
template <typename Index, \
|
||||
int LhsStorageOrder, bool ConjugateLhs, \
|
||||
int RhsStorageOrder, bool ConjugateRhs> \
|
||||
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \
|
||||
{\
|
||||
static EIGEN_DONT_INLINE void run( \
|
||||
Index rows, Index cols, \
|
||||
const EIGTYPE* _lhs, Index lhsStride, \
|
||||
const EIGTYPE* _rhs, Index rhsStride, \
|
||||
EIGTYPE* res, Index resStride, \
|
||||
EIGTYPE alpha) \
|
||||
{ \
|
||||
char side='R', uplo='L'; \
|
||||
MKL_INT m, n, lda, ldb, ldc; \
|
||||
const EIGTYPE *a, *b; \
|
||||
MKLTYPE alpha_, beta_; \
|
||||
MatrixX##EIGPREFIX b_tmp; \
|
||||
Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> a_tmp; \
|
||||
EIGTYPE myone(1); \
|
||||
\
|
||||
/* Set m, n, k */ \
|
||||
m = (MKL_INT)rows; \
|
||||
n = (MKL_INT)cols; \
|
||||
\
|
||||
/* Set alpha_ & beta_ */ \
|
||||
assign_scalar_eig2mkl(alpha_, alpha); \
|
||||
assign_scalar_eig2mkl(beta_, myone); \
|
||||
\
|
||||
/* Set lda, ldb, ldc */ \
|
||||
lda = (MKL_INT)rhsStride; \
|
||||
ldb = (MKL_INT)lhsStride; \
|
||||
ldc = (MKL_INT)resStride; \
|
||||
\
|
||||
/* Set a, b, c */ \
|
||||
if (((RhsStorageOrder==ColMajor) && ConjugateRhs) || ((RhsStorageOrder==RowMajor) && (!ConjugateRhs))) { \
|
||||
Map<const Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder>, 0, OuterStride<> > rhs(_rhs,n,n,OuterStride<>(rhsStride)); \
|
||||
a_tmp = rhs.conjugate(); \
|
||||
a = a_tmp.data(); \
|
||||
lda = a_tmp.outerStride(); \
|
||||
} else a = _rhs; \
|
||||
if (RhsStorageOrder==RowMajor) uplo='U'; \
|
||||
\
|
||||
if (LhsStorageOrder==ColMajor && (!ConjugateLhs)) { \
|
||||
b = _lhs; } \
|
||||
else { \
|
||||
if (LhsStorageOrder==ColMajor && ConjugateLhs) { \
|
||||
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,m,n,OuterStride<>(lhsStride)); \
|
||||
b_tmp = lhs.conjugate(); \
|
||||
} else \
|
||||
if (ConjugateLhs) { \
|
||||
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(lhsStride)); \
|
||||
b_tmp = lhs.adjoint(); \
|
||||
} else { \
|
||||
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(lhsStride)); \
|
||||
b_tmp = lhs.transpose(); \
|
||||
} \
|
||||
b = b_tmp.data(); \
|
||||
ldb = b_tmp.outerStride(); \
|
||||
} \
|
||||
\
|
||||
MKLPREFIX##hemm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
|
||||
} \
|
||||
};
|
||||
|
||||
EIGEN_MKL_SYMM_R(double, double, d, d)
|
||||
EIGEN_MKL_SYMM_R(float, float, f, s)
|
||||
EIGEN_MKL_HEMM_R(dcomplex, MKL_Complex16, cd, z)
|
||||
EIGEN_MKL_HEMM_R(scomplex, MKL_Complex8, cf, c)
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
#endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H
|
||||
@@ -32,8 +32,15 @@ namespace internal {
|
||||
* the number of load/stores of the result by a factor 2 and to reduce
|
||||
* the instruction dependency.
|
||||
*/
|
||||
template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs>
|
||||
static EIGEN_DONT_INLINE void product_selfadjoint_vector(
|
||||
|
||||
template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version=Specialized>
|
||||
struct selfadjoint_matrix_vector_product;
|
||||
|
||||
template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version>
|
||||
struct selfadjoint_matrix_vector_product
|
||||
|
||||
{
|
||||
static EIGEN_DONT_INLINE void run(
|
||||
Index size,
|
||||
const Scalar* lhs, Index lhsStride,
|
||||
const Scalar* _rhs, Index rhsIncr,
|
||||
@@ -85,14 +92,14 @@ static EIGEN_DONT_INLINE void product_selfadjoint_vector(
|
||||
Scalar t1 = cjAlpha * rhs[j+1];
|
||||
Packet ptmp1 = pset1<Packet>(t1);
|
||||
|
||||
Scalar t2 = 0;
|
||||
Scalar t2(0);
|
||||
Packet ptmp2 = pset1<Packet>(t2);
|
||||
Scalar t3 = 0;
|
||||
Scalar t3(0);
|
||||
Packet ptmp3 = pset1<Packet>(t3);
|
||||
|
||||
size_t starti = FirstTriangular ? 0 : j+2;
|
||||
size_t endi = FirstTriangular ? j : size;
|
||||
size_t alignedStart = (starti) + first_aligned(&res[starti], endi-starti);
|
||||
size_t alignedStart = (starti) + internal::first_aligned(&res[starti], endi-starti);
|
||||
size_t alignedEnd = alignedStart + ((endi-alignedStart)/(PacketSize))*(PacketSize);
|
||||
|
||||
// TODO make sure this product is a real * complex and that the rhs is properly conjugated if needed
|
||||
@@ -148,7 +155,7 @@ static EIGEN_DONT_INLINE void product_selfadjoint_vector(
|
||||
register const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
|
||||
|
||||
Scalar t1 = cjAlpha * rhs[j];
|
||||
Scalar t2 = 0;
|
||||
Scalar t2(0);
|
||||
// TODO make sure this product is a real * complex and that the rhs is properly conjugated if needed
|
||||
res[j] += cjd.pmul(internal::real(A0[j]), t1);
|
||||
for (Index i=FirstTriangular ? 0 : j+1; i<(FirstTriangular ? j : size); i++)
|
||||
@@ -159,6 +166,7 @@ static EIGEN_DONT_INLINE void product_selfadjoint_vector(
|
||||
res[j] += alpha * t2;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
@@ -193,8 +201,8 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
|
||||
|
||||
eigen_assert(dest.rows()==m_lhs.rows() && dest.cols()==m_rhs.cols());
|
||||
|
||||
const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs);
|
||||
const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs);
|
||||
typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
|
||||
typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
|
||||
|
||||
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
|
||||
* RhsBlasTraits::extractScalarFactor(m_rhs);
|
||||
@@ -232,7 +240,7 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
|
||||
}
|
||||
|
||||
|
||||
internal::product_selfadjoint_vector<Scalar, Index, (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)>
|
||||
internal::selfadjoint_matrix_vector_product<Scalar, Index, (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)>::run
|
||||
(
|
||||
lhs.rows(), // size
|
||||
&lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
|
||||
|
||||
110
Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h
Normal file
110
Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h
Normal file
@@ -0,0 +1,110 @@
|
||||
/*
|
||||
Copyright (c) 2011, Intel Corporation. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software without
|
||||
specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
********************************************************************************
|
||||
* Content : Eigen bindings to Intel(R) MKL
|
||||
* Selfadjoint matrix-vector product functionality based on ?SYMV/HEMV.
|
||||
********************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H
|
||||
#define EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H
|
||||
|
||||
namespace internal {
|
||||
|
||||
/**********************************************************************
|
||||
* This file implements selfadjoint matrix-vector multiplication using BLAS
|
||||
**********************************************************************/
|
||||
|
||||
// symv/hemv specialization
|
||||
|
||||
template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs>
|
||||
struct selfadjoint_matrix_vector_product_symv :
|
||||
selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,BuiltIn> {};
|
||||
|
||||
#define EIGEN_MKL_SYMV_SPECIALIZE(Scalar) \
|
||||
template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \
|
||||
struct selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Specialized> { \
|
||||
static EIGEN_DONT_INLINE void run( \
|
||||
Index size, const Scalar* lhs, Index lhsStride, \
|
||||
const Scalar* _rhs, Index rhsIncr, Scalar* res, Scalar alpha) { \
|
||||
enum {\
|
||||
IsColMajor = StorageOrder==ColMajor \
|
||||
}; \
|
||||
if (IsColMajor == ConjugateLhs) {\
|
||||
selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,BuiltIn>::run( \
|
||||
size, lhs, lhsStride, _rhs, rhsIncr, res, alpha); \
|
||||
} else {\
|
||||
selfadjoint_matrix_vector_product_symv<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs>::run( \
|
||||
size, lhs, lhsStride, _rhs, rhsIncr, res, alpha); \
|
||||
}\
|
||||
} \
|
||||
}; \
|
||||
|
||||
EIGEN_MKL_SYMV_SPECIALIZE(double)
|
||||
EIGEN_MKL_SYMV_SPECIALIZE(float)
|
||||
EIGEN_MKL_SYMV_SPECIALIZE(dcomplex)
|
||||
EIGEN_MKL_SYMV_SPECIALIZE(scomplex)
|
||||
|
||||
#define EIGEN_MKL_SYMV_SPECIALIZATION(EIGTYPE,MKLTYPE,MKLFUNC) \
|
||||
template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \
|
||||
struct selfadjoint_matrix_vector_product_symv<EIGTYPE,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs> \
|
||||
{ \
|
||||
typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> SYMVVector;\
|
||||
\
|
||||
static EIGEN_DONT_INLINE void run( \
|
||||
Index size, const EIGTYPE* lhs, Index lhsStride, \
|
||||
const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* res, EIGTYPE alpha) \
|
||||
{ \
|
||||
enum {\
|
||||
IsRowMajor = StorageOrder==RowMajor ? 1 : 0, \
|
||||
IsLower = UpLo == Lower ? 1 : 0, \
|
||||
}; \
|
||||
MKL_INT n=size, lda=lhsStride, incx=rhsIncr, incy=1; \
|
||||
MKLTYPE alpha_, beta_; \
|
||||
const EIGTYPE *x_ptr, myone(1); \
|
||||
char uplo=(IsRowMajor) ? (IsLower ? 'U' : 'L') : (IsLower ? 'L' : 'U'); \
|
||||
assign_scalar_eig2mkl(alpha_, alpha); \
|
||||
assign_scalar_eig2mkl(beta_, myone); \
|
||||
SYMVVector x_tmp; \
|
||||
if (ConjugateRhs) { \
|
||||
Map<const SYMVVector, 0, InnerStride<> > map_x(_rhs,size,1,InnerStride<>(incx)); \
|
||||
x_tmp=map_x.conjugate(); \
|
||||
x_ptr=x_tmp.data(); \
|
||||
incx=1; \
|
||||
} else x_ptr=_rhs; \
|
||||
MKLFUNC(&uplo, &n, &alpha_, (const MKLTYPE*)lhs, &lda, (const MKLTYPE*)x_ptr, &incx, &beta_, (MKLTYPE*)res, &incy); \
|
||||
}\
|
||||
};
|
||||
|
||||
EIGEN_MKL_SYMV_SPECIALIZATION(double, double, dsymv)
|
||||
EIGEN_MKL_SYMV_SPECIALIZATION(float, float, ssymv)
|
||||
EIGEN_MKL_SYMV_SPECIALIZATION(dcomplex, MKL_Complex16, zhemv)
|
||||
EIGEN_MKL_SYMV_SPECIALIZATION(scomplex, MKL_Complex8, chemv)
|
||||
|
||||
} //end of namespase
|
||||
|
||||
#endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H
|
||||
@@ -72,7 +72,7 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,true>
|
||||
typedef internal::blas_traits<OtherType> OtherBlasTraits;
|
||||
typedef typename OtherBlasTraits::DirectLinearAccessType ActualOtherType;
|
||||
typedef typename internal::remove_all<ActualOtherType>::type _ActualOtherType;
|
||||
const ActualOtherType actualOther = OtherBlasTraits::extract(other.derived());
|
||||
typename internal::add_const_on_value_type<ActualOtherType>::type actualOther = OtherBlasTraits::extract(other.derived());
|
||||
|
||||
Scalar actualAlpha = alpha * OtherBlasTraits::extractScalarFactor(other.derived());
|
||||
|
||||
@@ -105,12 +105,12 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false>
|
||||
typedef internal::blas_traits<OtherType> OtherBlasTraits;
|
||||
typedef typename OtherBlasTraits::DirectLinearAccessType ActualOtherType;
|
||||
typedef typename internal::remove_all<ActualOtherType>::type _ActualOtherType;
|
||||
const ActualOtherType actualOther = OtherBlasTraits::extract(other.derived());
|
||||
typename internal::add_const_on_value_type<ActualOtherType>::type actualOther = OtherBlasTraits::extract(other.derived());
|
||||
|
||||
Scalar actualAlpha = alpha * OtherBlasTraits::extractScalarFactor(other.derived());
|
||||
|
||||
enum { IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0 };
|
||||
|
||||
|
||||
internal::general_matrix_matrix_triangular_product<Index,
|
||||
Scalar, _ActualOtherType::Flags&RowMajorBit ? RowMajor : ColMajor, OtherBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex,
|
||||
Scalar, _ActualOtherType::Flags&RowMajorBit ? ColMajor : RowMajor, (!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex,
|
||||
|
||||
@@ -76,12 +76,12 @@ SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
|
||||
typedef internal::blas_traits<DerivedU> UBlasTraits;
|
||||
typedef typename UBlasTraits::DirectLinearAccessType ActualUType;
|
||||
typedef typename internal::remove_all<ActualUType>::type _ActualUType;
|
||||
const ActualUType actualU = UBlasTraits::extract(u.derived());
|
||||
typename internal::add_const_on_value_type<ActualUType>::type actualU = UBlasTraits::extract(u.derived());
|
||||
|
||||
typedef internal::blas_traits<DerivedV> VBlasTraits;
|
||||
typedef typename VBlasTraits::DirectLinearAccessType ActualVType;
|
||||
typedef typename internal::remove_all<ActualVType>::type _ActualVType;
|
||||
const ActualVType actualV = VBlasTraits::extract(v.derived());
|
||||
typename internal::add_const_on_value_type<ActualVType>::type actualV = VBlasTraits::extract(v.derived());
|
||||
|
||||
// If MatrixType is row major, then we use the routine for lower triangular in the upper triangular case and
|
||||
// vice versa, and take the complex conjugate of all coefficients and vector entries.
|
||||
|
||||
@@ -58,16 +58,16 @@ template <typename Scalar, typename Index,
|
||||
int Mode, bool LhsIsTriangular,
|
||||
int LhsStorageOrder, bool ConjugateLhs,
|
||||
int RhsStorageOrder, bool ConjugateRhs,
|
||||
int ResStorageOrder>
|
||||
int ResStorageOrder, int Version = Specialized>
|
||||
struct product_triangular_matrix_matrix;
|
||||
|
||||
template <typename Scalar, typename Index,
|
||||
int Mode, bool LhsIsTriangular,
|
||||
int LhsStorageOrder, bool ConjugateLhs,
|
||||
int RhsStorageOrder, bool ConjugateRhs>
|
||||
int RhsStorageOrder, bool ConjugateRhs, int Version>
|
||||
struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
|
||||
LhsStorageOrder,ConjugateLhs,
|
||||
RhsStorageOrder,ConjugateRhs,RowMajor>
|
||||
RhsStorageOrder,ConjugateRhs,RowMajor,Version>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(
|
||||
Index rows, Index cols, Index depth,
|
||||
@@ -91,15 +91,15 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
|
||||
// implements col-major += alpha * op(triangular) * op(general)
|
||||
template <typename Scalar, typename Index, int Mode,
|
||||
int LhsStorageOrder, bool ConjugateLhs,
|
||||
int RhsStorageOrder, bool ConjugateRhs>
|
||||
int RhsStorageOrder, bool ConjugateRhs, int Version>
|
||||
struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
|
||||
LhsStorageOrder,ConjugateLhs,
|
||||
RhsStorageOrder,ConjugateRhs,ColMajor>
|
||||
RhsStorageOrder,ConjugateRhs,ColMajor,Version>
|
||||
{
|
||||
|
||||
typedef gebp_traits<Scalar,Scalar> Traits;
|
||||
enum {
|
||||
SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
|
||||
SmallPanelWidth = 2 * EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
|
||||
IsLower = (Mode&Lower) == Lower,
|
||||
SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1
|
||||
};
|
||||
@@ -220,10 +220,10 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
|
||||
// implements col-major += alpha * op(general) * op(triangular)
|
||||
template <typename Scalar, typename Index, int Mode,
|
||||
int LhsStorageOrder, bool ConjugateLhs,
|
||||
int RhsStorageOrder, bool ConjugateRhs>
|
||||
int RhsStorageOrder, bool ConjugateRhs, int Version>
|
||||
struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
|
||||
LhsStorageOrder,ConjugateLhs,
|
||||
RhsStorageOrder,ConjugateRhs,ColMajor>
|
||||
RhsStorageOrder,ConjugateRhs,ColMajor,Version>
|
||||
{
|
||||
typedef gebp_traits<Scalar,Scalar> Traits;
|
||||
enum {
|
||||
@@ -378,8 +378,8 @@ struct TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
|
||||
|
||||
template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
|
||||
{
|
||||
const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs);
|
||||
const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs);
|
||||
typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
|
||||
typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
|
||||
|
||||
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
|
||||
* RhsBlasTraits::extractScalarFactor(m_rhs);
|
||||
|
||||
305
Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h
Normal file
305
Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h
Normal file
@@ -0,0 +1,305 @@
|
||||
/*
|
||||
Copyright (c) 2011, Intel Corporation. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software without
|
||||
specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
********************************************************************************
|
||||
* Content : Eigen bindings to Intel(R) MKL
|
||||
* Triangular matrix * matrix product functionality based on ?TRMM.
|
||||
********************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H
|
||||
#define EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H
|
||||
|
||||
namespace internal {
|
||||
|
||||
|
||||
template <typename Scalar, typename Index,
|
||||
int Mode, bool LhsIsTriangular,
|
||||
int LhsStorageOrder, bool ConjugateLhs,
|
||||
int RhsStorageOrder, bool ConjugateRhs,
|
||||
int ResStorageOrder>
|
||||
struct product_triangular_matrix_matrix_trmm :
|
||||
product_triangular_matrix_matrix<Scalar,Index,Mode,
|
||||
LhsIsTriangular,LhsStorageOrder,ConjugateLhs,
|
||||
RhsStorageOrder, ConjugateRhs, ResStorageOrder, BuiltIn> {};
|
||||
|
||||
|
||||
// try to go to BLAS specialization
|
||||
#define EIGEN_MKL_TRMM_SPECIALIZE(Scalar, LhsIsTriangular) \
|
||||
template <typename Index, int Mode, \
|
||||
int LhsStorageOrder, bool ConjugateLhs, \
|
||||
int RhsStorageOrder, bool ConjugateRhs> \
|
||||
struct product_triangular_matrix_matrix<Scalar,Index, Mode, LhsIsTriangular, \
|
||||
LhsStorageOrder,ConjugateLhs, RhsStorageOrder,ConjugateRhs,ColMajor,Specialized> { \
|
||||
static inline void run(Index _rows, Index _cols, Index _depth, const Scalar* _lhs, Index lhsStride,\
|
||||
const Scalar* _rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha) { \
|
||||
product_triangular_matrix_matrix_trmm<Scalar,Index,Mode, \
|
||||
LhsIsTriangular,LhsStorageOrder,ConjugateLhs, \
|
||||
RhsStorageOrder, ConjugateRhs, ColMajor>::run( \
|
||||
_rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha); \
|
||||
} \
|
||||
};
|
||||
|
||||
EIGEN_MKL_TRMM_SPECIALIZE(double, true)
|
||||
EIGEN_MKL_TRMM_SPECIALIZE(double, false)
|
||||
EIGEN_MKL_TRMM_SPECIALIZE(dcomplex, true)
|
||||
EIGEN_MKL_TRMM_SPECIALIZE(dcomplex, false)
|
||||
EIGEN_MKL_TRMM_SPECIALIZE(float, true)
|
||||
EIGEN_MKL_TRMM_SPECIALIZE(float, false)
|
||||
EIGEN_MKL_TRMM_SPECIALIZE(scomplex, true)
|
||||
EIGEN_MKL_TRMM_SPECIALIZE(scomplex, false)
|
||||
|
||||
// implements col-major += alpha * op(triangular) * op(general)
|
||||
#define EIGEN_MKL_TRMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
|
||||
template <typename Index, int Mode, \
|
||||
int LhsStorageOrder, bool ConjugateLhs, \
|
||||
int RhsStorageOrder, bool ConjugateRhs> \
|
||||
struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
|
||||
LhsStorageOrder,ConjugateLhs,RhsStorageOrder,ConjugateRhs,ColMajor> \
|
||||
{ \
|
||||
enum { \
|
||||
IsLower = (Mode&Lower) == Lower, \
|
||||
SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \
|
||||
IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \
|
||||
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
|
||||
LowUp = IsLower ? Lower : Upper, \
|
||||
conjA = ((LhsStorageOrder==ColMajor) && ConjugateLhs) ? 1 : 0 \
|
||||
}; \
|
||||
\
|
||||
static EIGEN_DONT_INLINE void run( \
|
||||
Index _rows, Index _cols, Index _depth, \
|
||||
const EIGTYPE* _lhs, Index lhsStride, \
|
||||
const EIGTYPE* _rhs, Index rhsStride, \
|
||||
EIGTYPE* res, Index resStride, \
|
||||
EIGTYPE alpha) \
|
||||
{ \
|
||||
Index diagSize = (std::min)(_rows,_depth); \
|
||||
Index rows = IsLower ? _rows : diagSize; \
|
||||
Index depth = IsLower ? diagSize : _depth; \
|
||||
Index cols = _cols; \
|
||||
\
|
||||
typedef Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> MatrixLhs; \
|
||||
typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs; \
|
||||
\
|
||||
/* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \
|
||||
if (rows != depth) { \
|
||||
\
|
||||
int nthr = mkl_domain_get_max_threads(MKL_BLAS); \
|
||||
\
|
||||
if (((nthr==1) && (((std::max)(rows,depth)-diagSize)/(double)diagSize < 0.5))) { \
|
||||
/* Most likely no benefit to call TRMM or GEMM from MKL*/ \
|
||||
product_triangular_matrix_matrix<EIGTYPE,Index,Mode,true, \
|
||||
LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, BuiltIn>::run( \
|
||||
_rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha); \
|
||||
/*std::cout << "TRMM_L: A is not square! Go to Eigen TRMM implementation!\n";*/ \
|
||||
} else { \
|
||||
/* Make sense to call GEMM */ \
|
||||
Map<const MatrixLhs, 0, OuterStride<> > lhsMap(_lhs,rows,depth,OuterStride<>(lhsStride)); \
|
||||
MatrixLhs aa_tmp=lhsMap.template triangularView<Mode>(); \
|
||||
MKL_INT aStride = aa_tmp.outerStride(); \
|
||||
gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> blocking(_rows,_cols,_depth); \
|
||||
general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \
|
||||
rows, cols, depth, aa_tmp.data(), aStride, _rhs, rhsStride, res, resStride, alpha, blocking); \
|
||||
\
|
||||
/*std::cout << "TRMM_L: A is not square! Go to MKL GEMM implementation! " << nthr<<" \n";*/ \
|
||||
} \
|
||||
return; \
|
||||
} \
|
||||
char side = 'L', transa, uplo, diag = 'N'; \
|
||||
EIGTYPE *b; \
|
||||
const EIGTYPE *a; \
|
||||
MKL_INT m, n, k, lda, ldb, ldc; \
|
||||
MKLTYPE alpha_; \
|
||||
\
|
||||
/* Set alpha_*/ \
|
||||
assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
|
||||
\
|
||||
/* Set m, n */ \
|
||||
m = (MKL_INT)diagSize; \
|
||||
n = (MKL_INT)cols; \
|
||||
\
|
||||
/* Set trans */ \
|
||||
transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \
|
||||
\
|
||||
/* Set b, ldb */ \
|
||||
Map<const MatrixRhs, 0, OuterStride<> > rhs(_rhs,depth,cols,OuterStride<>(rhsStride)); \
|
||||
MatrixX##EIGPREFIX b_tmp; \
|
||||
\
|
||||
if (ConjugateRhs) b_tmp = rhs.conjugate(); else b_tmp = rhs; \
|
||||
b = b_tmp.data(); \
|
||||
ldb = b_tmp.outerStride(); \
|
||||
\
|
||||
/* Set uplo */ \
|
||||
uplo = IsLower ? 'L' : 'U'; \
|
||||
if (LhsStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \
|
||||
/* Set a, lda */ \
|
||||
Map<const MatrixLhs, 0, OuterStride<> > lhs(_lhs,rows,depth,OuterStride<>(lhsStride)); \
|
||||
MatrixLhs a_tmp; \
|
||||
\
|
||||
if ((conjA!=0) || (SetDiag==0)) { \
|
||||
if (conjA) a_tmp = lhs.conjugate(); else a_tmp = lhs; \
|
||||
if (IsZeroDiag) \
|
||||
a_tmp.diagonal().setZero(); \
|
||||
else if (IsUnitDiag) \
|
||||
a_tmp.diagonal().setOnes();\
|
||||
a = a_tmp.data(); \
|
||||
lda = a_tmp.outerStride(); \
|
||||
} else { \
|
||||
a = _lhs; \
|
||||
lda = lhsStride; \
|
||||
} \
|
||||
/*std::cout << "TRMM_L: A is square! Go to MKL TRMM implementation! \n";*/ \
|
||||
/* call ?trmm*/ \
|
||||
MKLPREFIX##trmm(&side, &uplo, &transa, &diag, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (MKLTYPE*)b, &ldb); \
|
||||
\
|
||||
/* Add op(a_triangular)*b into res*/ \
|
||||
Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
|
||||
res_tmp=res_tmp+b_tmp; \
|
||||
} \
|
||||
};
|
||||
|
||||
EIGEN_MKL_TRMM_L(double, double, d, d)
|
||||
EIGEN_MKL_TRMM_L(dcomplex, MKL_Complex16, cd, z)
|
||||
EIGEN_MKL_TRMM_L(float, float, f, s)
|
||||
EIGEN_MKL_TRMM_L(scomplex, MKL_Complex8, cf, c)
|
||||
|
||||
// implements col-major += alpha * op(general) * op(triangular)
|
||||
#define EIGEN_MKL_TRMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
|
||||
template <typename Index, int Mode, \
|
||||
int LhsStorageOrder, bool ConjugateLhs, \
|
||||
int RhsStorageOrder, bool ConjugateRhs> \
|
||||
struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
|
||||
LhsStorageOrder,ConjugateLhs,RhsStorageOrder,ConjugateRhs,ColMajor> \
|
||||
{ \
|
||||
enum { \
|
||||
IsLower = (Mode&Lower) == Lower, \
|
||||
SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \
|
||||
IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \
|
||||
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
|
||||
LowUp = IsLower ? Lower : Upper, \
|
||||
conjA = ((RhsStorageOrder==ColMajor) && ConjugateRhs) ? 1 : 0 \
|
||||
}; \
|
||||
\
|
||||
static EIGEN_DONT_INLINE void run( \
|
||||
Index _rows, Index _cols, Index _depth, \
|
||||
const EIGTYPE* _lhs, Index lhsStride, \
|
||||
const EIGTYPE* _rhs, Index rhsStride, \
|
||||
EIGTYPE* res, Index resStride, \
|
||||
EIGTYPE alpha) \
|
||||
{ \
|
||||
Index diagSize = (std::min)(_cols,_depth); \
|
||||
Index rows = _rows; \
|
||||
Index depth = IsLower ? _depth : diagSize; \
|
||||
Index cols = IsLower ? diagSize : _cols; \
|
||||
\
|
||||
typedef Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> MatrixLhs; \
|
||||
typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs; \
|
||||
\
|
||||
/* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \
|
||||
if (cols != depth) { \
|
||||
\
|
||||
int nthr = mkl_domain_get_max_threads(MKL_BLAS); \
|
||||
\
|
||||
if ((nthr==1) && (((std::max)(cols,depth)-diagSize)/(double)diagSize < 0.5)) { \
|
||||
/* Most likely no benefit to call TRMM or GEMM from MKL*/ \
|
||||
product_triangular_matrix_matrix<EIGTYPE,Index,Mode,false, \
|
||||
LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, BuiltIn>::run( \
|
||||
_rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha); \
|
||||
/*std::cout << "TRMM_R: A is not square! Go to Eigen TRMM implementation!\n";*/ \
|
||||
} else { \
|
||||
/* Make sense to call GEMM */ \
|
||||
Map<const MatrixRhs, 0, OuterStride<> > rhsMap(_rhs,depth,cols, OuterStride<>(rhsStride)); \
|
||||
MatrixRhs aa_tmp=rhsMap.template triangularView<Mode>(); \
|
||||
MKL_INT aStride = aa_tmp.outerStride(); \
|
||||
gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> blocking(_rows,_cols,_depth); \
|
||||
general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \
|
||||
rows, cols, depth, _lhs, lhsStride, aa_tmp.data(), aStride, res, resStride, alpha, blocking); \
|
||||
\
|
||||
/*std::cout << "TRMM_R: A is not square! Go to MKL GEMM implementation! " << nthr<<" \n";*/ \
|
||||
} \
|
||||
return; \
|
||||
} \
|
||||
char side = 'R', transa, uplo, diag = 'N'; \
|
||||
EIGTYPE *b; \
|
||||
const EIGTYPE *a; \
|
||||
MKL_INT m, n, k, lda, ldb, ldc; \
|
||||
MKLTYPE alpha_; \
|
||||
\
|
||||
/* Set alpha_*/ \
|
||||
assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
|
||||
\
|
||||
/* Set m, n */ \
|
||||
m = (MKL_INT)rows; \
|
||||
n = (MKL_INT)diagSize; \
|
||||
\
|
||||
/* Set trans */ \
|
||||
transa = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \
|
||||
\
|
||||
/* Set b, ldb */ \
|
||||
Map<const MatrixLhs, 0, OuterStride<> > lhs(_lhs,rows,depth,OuterStride<>(lhsStride)); \
|
||||
MatrixX##EIGPREFIX b_tmp; \
|
||||
\
|
||||
if (ConjugateLhs) b_tmp = lhs.conjugate(); else b_tmp = lhs; \
|
||||
b = b_tmp.data(); \
|
||||
ldb = b_tmp.outerStride(); \
|
||||
\
|
||||
/* Set uplo */ \
|
||||
uplo = IsLower ? 'L' : 'U'; \
|
||||
if (RhsStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \
|
||||
/* Set a, lda */ \
|
||||
Map<const MatrixRhs, 0, OuterStride<> > rhs(_rhs,depth,cols, OuterStride<>(rhsStride)); \
|
||||
MatrixRhs a_tmp; \
|
||||
\
|
||||
if ((conjA!=0) || (SetDiag==0)) { \
|
||||
if (conjA) a_tmp = rhs.conjugate(); else a_tmp = rhs; \
|
||||
if (IsZeroDiag) \
|
||||
a_tmp.diagonal().setZero(); \
|
||||
else if (IsUnitDiag) \
|
||||
a_tmp.diagonal().setOnes();\
|
||||
a = a_tmp.data(); \
|
||||
lda = a_tmp.outerStride(); \
|
||||
} else { \
|
||||
a = _rhs; \
|
||||
lda = rhsStride; \
|
||||
} \
|
||||
/*std::cout << "TRMM_R: A is square! Go to MKL TRMM implementation! \n";*/ \
|
||||
/* call ?trmm*/ \
|
||||
MKLPREFIX##trmm(&side, &uplo, &transa, &diag, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (MKLTYPE*)b, &ldb); \
|
||||
\
|
||||
/* Add op(a_triangular)*b into res*/ \
|
||||
Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
|
||||
res_tmp=res_tmp+b_tmp; \
|
||||
} \
|
||||
};
|
||||
|
||||
EIGEN_MKL_TRMM_R(double, double, d, d)
|
||||
EIGEN_MKL_TRMM_R(dcomplex, MKL_Complex16, cd, z)
|
||||
EIGEN_MKL_TRMM_R(float, float, f, s)
|
||||
EIGEN_MKL_TRMM_R(scomplex, MKL_Complex8, cf, c)
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
#endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H
|
||||
@@ -27,21 +27,25 @@
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int StorageOrder>
|
||||
struct product_triangular_matrix_vector;
|
||||
template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int StorageOrder, int Version=Specialized>
|
||||
struct triangular_matrix_vector_product;
|
||||
|
||||
template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs>
|
||||
struct product_triangular_matrix_vector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,ColMajor>
|
||||
template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int Version>
|
||||
struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,ColMajor,Version>
|
||||
{
|
||||
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
|
||||
enum {
|
||||
IsLower = ((Mode&Lower)==Lower),
|
||||
HasUnitDiag = (Mode & UnitDiag)==UnitDiag
|
||||
HasUnitDiag = (Mode & UnitDiag)==UnitDiag,
|
||||
HasZeroDiag = (Mode & ZeroDiag)==ZeroDiag
|
||||
};
|
||||
static EIGEN_DONT_INLINE void run(Index rows, Index cols, const LhsScalar* _lhs, Index lhsStride,
|
||||
static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
|
||||
const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, ResScalar alpha)
|
||||
{
|
||||
static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
|
||||
Index size = (std::min)(_rows,_cols);
|
||||
Index rows = IsLower ? _rows : (std::min)(_rows,_cols);
|
||||
Index cols = IsLower ? (std::min)(_rows,_cols) : _cols;
|
||||
|
||||
typedef Map<const Matrix<LhsScalar,Dynamic,Dynamic,ColMajor>, 0, OuterStride<> > LhsMap;
|
||||
const LhsMap lhs(_lhs,rows,cols,OuterStride<>(lhsStride));
|
||||
@@ -54,45 +58,57 @@ struct product_triangular_matrix_vector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
|
||||
typedef Map<Matrix<ResScalar,Dynamic,1> > ResMap;
|
||||
ResMap res(_res,rows);
|
||||
|
||||
for (Index pi=0; pi<cols; pi+=PanelWidth)
|
||||
for (Index pi=0; pi<size; pi+=PanelWidth)
|
||||
{
|
||||
Index actualPanelWidth = (std::min)(PanelWidth, cols-pi);
|
||||
Index actualPanelWidth = (std::min)(PanelWidth, size-pi);
|
||||
for (Index k=0; k<actualPanelWidth; ++k)
|
||||
{
|
||||
Index i = pi + k;
|
||||
Index s = IsLower ? (HasUnitDiag ? i+1 : i ) : pi;
|
||||
Index s = IsLower ? ((HasUnitDiag||HasZeroDiag) ? i+1 : i ) : pi;
|
||||
Index r = IsLower ? actualPanelWidth-k : k+1;
|
||||
if ((!HasUnitDiag) || (--r)>0)
|
||||
if ((!(HasUnitDiag||HasZeroDiag)) || (--r)>0)
|
||||
res.segment(s,r) += (alpha * cjRhs.coeff(i)) * cjLhs.col(i).segment(s,r);
|
||||
if (HasUnitDiag)
|
||||
res.coeffRef(i) += alpha * cjRhs.coeff(i);
|
||||
}
|
||||
Index r = IsLower ? cols - pi - actualPanelWidth : pi;
|
||||
Index r = IsLower ? rows - pi - actualPanelWidth : pi;
|
||||
if (r>0)
|
||||
{
|
||||
Index s = IsLower ? pi+actualPanelWidth : 0;
|
||||
general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjLhs,RhsScalar,ConjRhs>::run(
|
||||
general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjLhs,RhsScalar,ConjRhs,BuiltIn>::run(
|
||||
r, actualPanelWidth,
|
||||
&lhs.coeffRef(s,pi), lhsStride,
|
||||
&rhs.coeffRef(pi), rhsIncr,
|
||||
&res.coeffRef(s), resIncr, alpha);
|
||||
}
|
||||
}
|
||||
if((!IsLower) && cols>size)
|
||||
{
|
||||
general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjLhs,RhsScalar,ConjRhs>::run(
|
||||
rows, cols-size,
|
||||
&lhs.coeffRef(0,size), lhsStride,
|
||||
&rhs.coeffRef(size), rhsIncr,
|
||||
_res, resIncr, alpha);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs>
|
||||
struct product_triangular_matrix_vector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,RowMajor>
|
||||
template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs,int Version>
|
||||
struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,RowMajor,Version>
|
||||
{
|
||||
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
|
||||
enum {
|
||||
IsLower = ((Mode&Lower)==Lower),
|
||||
HasUnitDiag = (Mode & UnitDiag)==UnitDiag
|
||||
HasUnitDiag = (Mode & UnitDiag)==UnitDiag,
|
||||
HasZeroDiag = (Mode & ZeroDiag)==ZeroDiag
|
||||
};
|
||||
static void run(Index rows, Index cols, const LhsScalar* _lhs, Index lhsStride,
|
||||
static void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
|
||||
const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, ResScalar alpha)
|
||||
{
|
||||
static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
|
||||
Index diagSize = (std::min)(_rows,_cols);
|
||||
Index rows = IsLower ? _rows : diagSize;
|
||||
Index cols = IsLower ? diagSize : _cols;
|
||||
|
||||
typedef Map<const Matrix<LhsScalar,Dynamic,Dynamic,RowMajor>, 0, OuterStride<> > LhsMap;
|
||||
const LhsMap lhs(_lhs,rows,cols,OuterStride<>(lhsStride));
|
||||
@@ -105,15 +121,15 @@ struct product_triangular_matrix_vector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
|
||||
typedef Map<Matrix<ResScalar,Dynamic,1>, 0, InnerStride<> > ResMap;
|
||||
ResMap res(_res,rows,InnerStride<>(resIncr));
|
||||
|
||||
for (Index pi=0; pi<cols; pi+=PanelWidth)
|
||||
for (Index pi=0; pi<diagSize; pi+=PanelWidth)
|
||||
{
|
||||
Index actualPanelWidth = (std::min)(PanelWidth, cols-pi);
|
||||
Index actualPanelWidth = (std::min)(PanelWidth, diagSize-pi);
|
||||
for (Index k=0; k<actualPanelWidth; ++k)
|
||||
{
|
||||
Index i = pi + k;
|
||||
Index s = IsLower ? pi : (HasUnitDiag ? i+1 : i);
|
||||
Index s = IsLower ? pi : ((HasUnitDiag||HasZeroDiag) ? i+1 : i);
|
||||
Index r = IsLower ? k+1 : actualPanelWidth-k;
|
||||
if ((!HasUnitDiag) || (--r)>0)
|
||||
if ((!(HasUnitDiag||HasZeroDiag)) || (--r)>0)
|
||||
res.coeffRef(i) += alpha * (cjLhs.row(i).segment(s,r).cwiseProduct(cjRhs.segment(s,r).transpose())).sum();
|
||||
if (HasUnitDiag)
|
||||
res.coeffRef(i) += alpha * cjRhs.coeff(i);
|
||||
@@ -122,13 +138,21 @@ struct product_triangular_matrix_vector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
|
||||
if (r>0)
|
||||
{
|
||||
Index s = IsLower ? 0 : pi + actualPanelWidth;
|
||||
general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjLhs,RhsScalar,ConjRhs>::run(
|
||||
general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjLhs,RhsScalar,ConjRhs,BuiltIn>::run(
|
||||
actualPanelWidth, r,
|
||||
&lhs.coeffRef(pi,s), lhsStride,
|
||||
&rhs.coeffRef(s), rhsIncr,
|
||||
&res.coeffRef(pi), resIncr, alpha);
|
||||
}
|
||||
}
|
||||
if(IsLower && rows>diagSize)
|
||||
{
|
||||
general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjLhs,RhsScalar,ConjRhs>::run(
|
||||
rows-diagSize, cols,
|
||||
&lhs.coeffRef(diagSize,0), lhsStride,
|
||||
&rhs.coeffRef(0), rhsIncr,
|
||||
&res.coeffRef(diagSize), resIncr, alpha);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@@ -180,7 +204,7 @@ struct TriangularProduct<Mode,false,Lhs,true,Rhs,false>
|
||||
{
|
||||
eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
|
||||
|
||||
typedef TriangularProduct<(Mode & UnitDiag) | ((Mode & Lower) ? Upper : Lower),true,Transpose<const Rhs>,false,Transpose<const Lhs>,true> TriangularProductTranspose;
|
||||
typedef TriangularProduct<(Mode & (UnitDiag|ZeroDiag)) | ((Mode & Lower) ? Upper : Lower),true,Transpose<const Rhs>,false,Transpose<const Lhs>,true> TriangularProductTranspose;
|
||||
Transpose<Dest> dstT(dst);
|
||||
internal::trmv_selector<(int(internal::traits<Rhs>::Flags)&RowMajorBit) ? ColMajor : RowMajor>::run(
|
||||
TriangularProductTranspose(m_rhs.transpose(),m_lhs.transpose()), dstT, alpha);
|
||||
@@ -208,8 +232,8 @@ template<> struct trmv_selector<ColMajor>
|
||||
typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
|
||||
typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
|
||||
|
||||
const ActualLhsType actualLhs = LhsBlasTraits::extract(prod.lhs());
|
||||
const ActualRhsType actualRhs = RhsBlasTraits::extract(prod.rhs());
|
||||
typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
|
||||
typename internal::add_const_on_value_type<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
|
||||
|
||||
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
|
||||
* RhsBlasTraits::extractScalarFactor(prod.rhs());
|
||||
@@ -247,7 +271,7 @@ template<> struct trmv_selector<ColMajor>
|
||||
MappedDest(actualDestPtr, dest.size()) = dest;
|
||||
}
|
||||
|
||||
internal::product_triangular_matrix_vector
|
||||
internal::triangular_matrix_vector_product
|
||||
<Index,Mode,
|
||||
LhsScalar, LhsBlasTraits::NeedToConjugate,
|
||||
RhsScalar, RhsBlasTraits::NeedToConjugate,
|
||||
@@ -307,7 +331,7 @@ template<> struct trmv_selector<RowMajor>
|
||||
Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
|
||||
}
|
||||
|
||||
internal::product_triangular_matrix_vector
|
||||
internal::triangular_matrix_vector_product
|
||||
<Index,Mode,
|
||||
LhsScalar, LhsBlasTraits::NeedToConjugate,
|
||||
RhsScalar, RhsBlasTraits::NeedToConjugate,
|
||||
|
||||
245
Eigen/src/Core/products/TriangularMatrixVector_MKL.h
Normal file
245
Eigen/src/Core/products/TriangularMatrixVector_MKL.h
Normal file
@@ -0,0 +1,245 @@
|
||||
/*
|
||||
Copyright (c) 2011, Intel Corporation. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software without
|
||||
specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
********************************************************************************
|
||||
* Content : Eigen bindings to Intel(R) MKL
|
||||
* Triangular matrix-vector product functionality based on ?TRMV.
|
||||
********************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H
|
||||
#define EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H
|
||||
|
||||
namespace internal {
|
||||
|
||||
/**********************************************************************
|
||||
* This file implements triangular matrix-vector multiplication using BLAS
|
||||
**********************************************************************/
|
||||
|
||||
// trmv/hemv specialization
|
||||
|
||||
template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int StorageOrder>
|
||||
struct triangular_matrix_vector_product_trmv :
|
||||
triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,StorageOrder,BuiltIn> {};
|
||||
|
||||
#define EIGEN_MKL_TRMV_SPECIALIZE(Scalar) \
|
||||
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
|
||||
struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor,Specialized> { \
|
||||
static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
|
||||
const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \
|
||||
triangular_matrix_vector_product_trmv<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor>::run( \
|
||||
_rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
|
||||
} \
|
||||
}; \
|
||||
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
|
||||
struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,RowMajor,Specialized> { \
|
||||
static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
|
||||
const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \
|
||||
triangular_matrix_vector_product_trmv<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,RowMajor>::run( \
|
||||
_rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
|
||||
} \
|
||||
};
|
||||
|
||||
EIGEN_MKL_TRMV_SPECIALIZE(double)
|
||||
EIGEN_MKL_TRMV_SPECIALIZE(float)
|
||||
EIGEN_MKL_TRMV_SPECIALIZE(dcomplex)
|
||||
EIGEN_MKL_TRMV_SPECIALIZE(scomplex)
|
||||
|
||||
// implements col-major: res += alpha * op(triangular) * vector
|
||||
#define EIGEN_MKL_TRMV_CM(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
|
||||
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
|
||||
struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor> { \
|
||||
enum { \
|
||||
IsLower = (Mode&Lower) == Lower, \
|
||||
SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \
|
||||
IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \
|
||||
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
|
||||
LowUp = IsLower ? Lower : Upper \
|
||||
}; \
|
||||
static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
|
||||
const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
|
||||
{ \
|
||||
if (ConjLhs || IsZeroDiag) { \
|
||||
triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor,BuiltIn>::run( \
|
||||
_rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
|
||||
return; \
|
||||
}\
|
||||
Index size = (std::min)(_rows,_cols); \
|
||||
Index rows = IsLower ? _rows : size; \
|
||||
Index cols = IsLower ? size : _cols; \
|
||||
\
|
||||
typedef VectorX##EIGPREFIX VectorRhs; \
|
||||
EIGTYPE *x, *y;\
|
||||
\
|
||||
/* Set x*/ \
|
||||
Map<const VectorRhs, 0, InnerStride<> > rhs(_rhs,cols,InnerStride<>(rhsIncr)); \
|
||||
VectorRhs x_tmp; \
|
||||
if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
|
||||
x = x_tmp.data(); \
|
||||
\
|
||||
/* Square part handling */\
|
||||
\
|
||||
char trans, uplo, diag; \
|
||||
MKL_INT m, n, k, lda, incx, incy; \
|
||||
EIGTYPE const *a; \
|
||||
MKLTYPE alpha_, beta_; \
|
||||
assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
|
||||
assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1)); \
|
||||
\
|
||||
/* Set m, n */ \
|
||||
n = (MKL_INT)size; \
|
||||
lda = lhsStride; \
|
||||
incx = 1; \
|
||||
incy = resIncr; \
|
||||
\
|
||||
/* Set uplo, trans and diag*/ \
|
||||
trans = 'N'; \
|
||||
uplo = IsLower ? 'L' : 'U'; \
|
||||
diag = IsUnitDiag ? 'U' : 'N'; \
|
||||
\
|
||||
/* call ?TRMV*/ \
|
||||
std::cout << "TRMV: CM\n";\
|
||||
MKLPREFIX##trmv(&uplo, &trans, &diag, &n, (const MKLTYPE*)_lhs, &lda, (MKLTYPE*)x, &incx); \
|
||||
\
|
||||
/* Add op(a_tr)rhs into res*/ \
|
||||
MKLPREFIX##axpy(&n, &alpha_,(const MKLTYPE*)x, &incx, (MKLTYPE*)_res, &incy); \
|
||||
/* Non-square case - doesn't fit to MKL ?TRMV. Fall to default triangular product*/ \
|
||||
if (size<(std::max)(rows,cols)) { \
|
||||
typedef Matrix<EIGTYPE, Dynamic, Dynamic> MatrixLhs; \
|
||||
if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
|
||||
x = x_tmp.data(); \
|
||||
if (size<rows) { \
|
||||
y = _res + size*resIncr; \
|
||||
a = _lhs + size; \
|
||||
m = rows-size; \
|
||||
n = size; \
|
||||
} \
|
||||
if (size<cols) { \
|
||||
x += size; \
|
||||
y = _res; \
|
||||
a = _lhs + size*lda; \
|
||||
m = size; \
|
||||
n = cols-size; \
|
||||
} \
|
||||
MKLPREFIX##gemv(&trans, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)x, &incx, &beta_, (MKLTYPE*)y, &incy); \
|
||||
} \
|
||||
} \
|
||||
};
|
||||
|
||||
EIGEN_MKL_TRMV_CM(double, double, d, d)
|
||||
EIGEN_MKL_TRMV_CM(dcomplex, MKL_Complex16, cd, z)
|
||||
EIGEN_MKL_TRMV_CM(float, float, f, s)
|
||||
EIGEN_MKL_TRMV_CM(scomplex, MKL_Complex8, cf, c)
|
||||
|
||||
// implements row-major: res += alpha * op(triangular) * vector
|
||||
#define EIGEN_MKL_TRMV_RM(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
|
||||
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
|
||||
struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor> { \
|
||||
enum { \
|
||||
IsLower = (Mode&Lower) == Lower, \
|
||||
SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \
|
||||
IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \
|
||||
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
|
||||
LowUp = IsLower ? Lower : Upper \
|
||||
}; \
|
||||
static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
|
||||
const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
|
||||
{ \
|
||||
if (IsZeroDiag) { \
|
||||
triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor,BuiltIn>::run( \
|
||||
_rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
|
||||
return; \
|
||||
}\
|
||||
Index size = (std::min)(_rows,_cols); \
|
||||
Index rows = IsLower ? _rows : size; \
|
||||
Index cols = IsLower ? size : _cols; \
|
||||
\
|
||||
typedef VectorX##EIGPREFIX VectorRhs; \
|
||||
EIGTYPE *x, *y;\
|
||||
\
|
||||
/* Set x*/ \
|
||||
Map<const VectorRhs, 0, InnerStride<> > rhs(_rhs,cols,InnerStride<>(rhsIncr)); \
|
||||
VectorRhs x_tmp; \
|
||||
if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
|
||||
x = x_tmp.data(); \
|
||||
\
|
||||
/* Square part handling */\
|
||||
\
|
||||
char trans, uplo, diag; \
|
||||
MKL_INT m, n, k, lda, incx, incy; \
|
||||
EIGTYPE const *a; \
|
||||
MKLTYPE alpha_, beta_; \
|
||||
assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
|
||||
assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1)); \
|
||||
\
|
||||
/* Set m, n */ \
|
||||
n = (MKL_INT)size; \
|
||||
lda = lhsStride; \
|
||||
incx = 1; \
|
||||
incy = resIncr; \
|
||||
\
|
||||
/* Set uplo, trans and diag*/ \
|
||||
trans = ConjLhs ? 'C' : 'T'; \
|
||||
uplo = IsLower ? 'U' : 'L'; \
|
||||
diag = IsUnitDiag ? 'U' : 'N'; \
|
||||
\
|
||||
/* call ?TRMV*/ \
|
||||
std::cout << "TRMV: RM\n";\
|
||||
MKLPREFIX##trmv(&uplo, &trans, &diag, &n, (const MKLTYPE*)_lhs, &lda, (MKLTYPE*)x, &incx); \
|
||||
\
|
||||
/* Add op(a_tr)rhs into res*/ \
|
||||
MKLPREFIX##axpy(&n, &alpha_,(const MKLTYPE*)x, &incx, (MKLTYPE*)_res, &incy); \
|
||||
/* Non-square case - doesn't fit to MKL ?TRMV. Fall to default triangular product*/ \
|
||||
if (size<(std::max)(rows,cols)) { \
|
||||
typedef Matrix<EIGTYPE, Dynamic, Dynamic> MatrixLhs; \
|
||||
if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
|
||||
x = x_tmp.data(); \
|
||||
if (size<rows) { \
|
||||
y = _res + size*resIncr; \
|
||||
a = _lhs + size*lda; \
|
||||
m = rows-size; \
|
||||
n = size; \
|
||||
} \
|
||||
if (size<cols) { \
|
||||
x += size; \
|
||||
y = _res; \
|
||||
a = _lhs + size; \
|
||||
m = size; \
|
||||
n = cols-size; \
|
||||
} \
|
||||
MKLPREFIX##gemv(&trans, &n, &m, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)x, &incx, &beta_, (MKLTYPE*)y, &incy); \
|
||||
} \
|
||||
} \
|
||||
};
|
||||
|
||||
EIGEN_MKL_TRMV_RM(double, double, d, d)
|
||||
EIGEN_MKL_TRMV_RM(dcomplex, MKL_Complex16, cd, z)
|
||||
EIGEN_MKL_TRMV_RM(float, float, f, s)
|
||||
EIGEN_MKL_TRMV_RM(scomplex, MKL_Complex8, cf, c)
|
||||
|
||||
} //end of namespase
|
||||
|
||||
#endif // EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H
|
||||
@@ -75,12 +75,20 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
|
||||
Scalar* blockB = allocatedBlockB + sizeW;
|
||||
Scalar* blockW = allocatedBlockB;
|
||||
|
||||
conj_if<Conjugate> conj;
|
||||
gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, Conjugate, false> gebp_kernel;
|
||||
gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, TriStorageOrder> pack_lhs;
|
||||
gemm_pack_rhs<Scalar, Index, Traits::nr, ColMajor, false, true> pack_rhs;
|
||||
|
||||
// the goal here is to subdivise the Rhs panels such that we keep some cache
|
||||
// coherence when accessing the rhs elements
|
||||
std::ptrdiff_t l1, l2;
|
||||
manage_caching_sizes(GetAction, &l1, &l2);
|
||||
Index subcols = cols>0 ? l2/(4 * sizeof(Scalar) * otherStride) : 0;
|
||||
subcols = std::max<Index>((subcols/Traits::nr)*Traits::nr, Traits::nr);
|
||||
|
||||
for(Index k2=IsLower ? 0 : size;
|
||||
IsLower ? k2<size : k2>0;
|
||||
IsLower ? k2+=kc : k2-=kc)
|
||||
@@ -92,16 +100,18 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
|
||||
// A11 (the triangular part) and A21 the remaining rectangular part.
|
||||
// Then the high level algorithm is:
|
||||
// - B = R1 => general block copy (done during the next step)
|
||||
// - R1 = L1^-1 B => tricky part
|
||||
// - R1 = A11^-1 B => tricky part
|
||||
// - update B from the new R1 => actually this has to be performed continuously during the above step
|
||||
// - R2 = L2 * B => GEPP
|
||||
// - R2 -= A21 * B => GEPP
|
||||
|
||||
// The tricky part: compute R1 = L1^-1 B while updating B from R1
|
||||
// The idea is to split L1 into multiple small vertical panels.
|
||||
// Each panel can be split into a small triangular part A1 which is processed without optimization,
|
||||
// and the remaining small part A2 which is processed using gebp with appropriate block strides
|
||||
// The tricky part: compute R1 = A11^-1 B while updating B from R1
|
||||
// The idea is to split A11 into multiple small vertical panels.
|
||||
// Each panel can be split into a small triangular part T1k which is processed without optimization,
|
||||
// and the remaining small part T2k which is processed using gebp with appropriate block strides
|
||||
for(Index j2=0; j2<cols; j2+=subcols)
|
||||
{
|
||||
// for each small vertical panels of lhs
|
||||
Index actual_cols = (std::min)(cols-j2,subcols);
|
||||
// for each small vertical panels [T1k^T, T2k^T]^T of lhs
|
||||
for (Index k1=0; k1<actual_kc; k1+=SmallPanelWidth)
|
||||
{
|
||||
Index actualPanelWidth = std::min<Index>(actual_kc-k1, SmallPanelWidth);
|
||||
@@ -114,11 +124,11 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
|
||||
Index rs = actualPanelWidth - k - 1; // remaining size
|
||||
|
||||
Scalar a = (Mode & UnitDiag) ? Scalar(1) : Scalar(1)/conj(tri(i,i));
|
||||
for (Index j=0; j<cols; ++j)
|
||||
for (Index j=j2; j<j2+actual_cols; ++j)
|
||||
{
|
||||
if (TriStorageOrder==RowMajor)
|
||||
{
|
||||
Scalar b = 0;
|
||||
Scalar b(0);
|
||||
const Scalar* l = &tri(i,s);
|
||||
Scalar* r = &other(s,j);
|
||||
for (Index i3=0; i3<k; ++i3)
|
||||
@@ -143,7 +153,7 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
|
||||
Index blockBOffset = IsLower ? k1 : lengthTarget;
|
||||
|
||||
// update the respective rows of B from other
|
||||
pack_rhs(blockB, _other+startBlock, otherStride, actualPanelWidth, cols, actual_kc, blockBOffset);
|
||||
pack_rhs(blockB+actual_kc*j2, &other(startBlock,j2), otherStride, actualPanelWidth, actual_cols, actual_kc, blockBOffset);
|
||||
|
||||
// GEBP
|
||||
if (lengthTarget>0)
|
||||
@@ -152,13 +162,13 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
|
||||
|
||||
pack_lhs(blockA, &tri(startTarget,startBlock), triStride, actualPanelWidth, lengthTarget);
|
||||
|
||||
gebp_kernel(_other+startTarget, otherStride, blockA, blockB, lengthTarget, actualPanelWidth, cols, Scalar(-1),
|
||||
actualPanelWidth, actual_kc, 0, blockBOffset);
|
||||
gebp_kernel(&other(startTarget,j2), otherStride, blockA, blockB+actual_kc*j2, lengthTarget, actualPanelWidth, actual_cols, Scalar(-1),
|
||||
actualPanelWidth, actual_kc, 0, blockBOffset, blockW);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// R2 = A2 * B => GEPP
|
||||
|
||||
// R2 -= A21 * B => GEPP
|
||||
{
|
||||
Index start = IsLower ? k2+kc : 0;
|
||||
Index end = IsLower ? size : k2-kc;
|
||||
|
||||
151
Eigen/src/Core/products/TriangularSolverMatrix_MKL.h
Normal file
151
Eigen/src/Core/products/TriangularSolverMatrix_MKL.h
Normal file
@@ -0,0 +1,151 @@
|
||||
/*
|
||||
Copyright (c) 2011, Intel Corporation. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software without
|
||||
specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
********************************************************************************
|
||||
* Content : Eigen bindings to Intel(R) MKL
|
||||
* Triangular matrix * matrix product functionality based on ?TRMM.
|
||||
********************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H
|
||||
#define EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H
|
||||
|
||||
namespace internal {
|
||||
|
||||
// implements LeftSide op(triangular)^-1 * general
|
||||
#define EIGEN_MKL_TRSM_L(EIGTYPE, MKLTYPE, MKLPREFIX) \
|
||||
template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
|
||||
struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor> \
|
||||
{ \
|
||||
enum { \
|
||||
IsLower = (Mode&Lower) == Lower, \
|
||||
IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \
|
||||
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
|
||||
conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \
|
||||
}; \
|
||||
static EIGEN_DONT_INLINE void run( \
|
||||
Index size, Index otherSize, \
|
||||
const EIGTYPE* _tri, Index triStride, \
|
||||
EIGTYPE* _other, Index otherStride) \
|
||||
{ \
|
||||
MKL_INT m = size, n = otherSize, lda, ldb; \
|
||||
char side = 'L', uplo, diag='N', transa; \
|
||||
/* Set alpha_ */ \
|
||||
MKLTYPE alpha; \
|
||||
EIGTYPE myone(1); \
|
||||
assign_scalar_eig2mkl(alpha, myone); \
|
||||
ldb = otherStride;\
|
||||
\
|
||||
const EIGTYPE *a; \
|
||||
/* Set trans */ \
|
||||
transa = (TriStorageOrder==RowMajor) ? ((Conjugate) ? 'C' : 'T') : 'N'; \
|
||||
/* Set uplo */ \
|
||||
uplo = IsLower ? 'L' : 'U'; \
|
||||
if (TriStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \
|
||||
/* Set a, lda */ \
|
||||
typedef Matrix<EIGTYPE, Dynamic, Dynamic, TriStorageOrder> MatrixTri; \
|
||||
Map<const MatrixTri, 0, OuterStride<> > tri(_tri,size,size,OuterStride<>(triStride)); \
|
||||
MatrixTri a_tmp; \
|
||||
\
|
||||
if (conjA) { \
|
||||
a_tmp = tri.conjugate(); \
|
||||
a = a_tmp.data(); \
|
||||
lda = a_tmp.outerStride(); \
|
||||
} else { \
|
||||
a = _tri; \
|
||||
lda = triStride; \
|
||||
} \
|
||||
if (IsUnitDiag) diag='U'; \
|
||||
/* call ?trsm*/ \
|
||||
MKLPREFIX##trsm(&side, &uplo, &transa, &diag, &m, &n, &alpha, (const MKLTYPE*)a, &lda, (MKLTYPE*)_other, &ldb); \
|
||||
} \
|
||||
};
|
||||
|
||||
EIGEN_MKL_TRSM_L(double, double, d)
|
||||
EIGEN_MKL_TRSM_L(dcomplex, MKL_Complex16, z)
|
||||
EIGEN_MKL_TRSM_L(float, float, s)
|
||||
EIGEN_MKL_TRSM_L(scomplex, MKL_Complex8, c)
|
||||
|
||||
|
||||
// implements RightSide general * op(triangular)^-1
|
||||
#define EIGEN_MKL_TRSM_R(EIGTYPE, MKLTYPE, MKLPREFIX) \
|
||||
template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
|
||||
struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor> \
|
||||
{ \
|
||||
enum { \
|
||||
IsLower = (Mode&Lower) == Lower, \
|
||||
IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \
|
||||
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
|
||||
conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \
|
||||
}; \
|
||||
static EIGEN_DONT_INLINE void run( \
|
||||
Index size, Index otherSize, \
|
||||
const EIGTYPE* _tri, Index triStride, \
|
||||
EIGTYPE* _other, Index otherStride) \
|
||||
{ \
|
||||
MKL_INT m = otherSize, n = size, lda, ldb; \
|
||||
char side = 'R', uplo, diag='N', transa; \
|
||||
/* Set alpha_ */ \
|
||||
MKLTYPE alpha; \
|
||||
EIGTYPE myone(1); \
|
||||
assign_scalar_eig2mkl(alpha, myone); \
|
||||
ldb = otherStride;\
|
||||
\
|
||||
const EIGTYPE *a; \
|
||||
/* Set trans */ \
|
||||
transa = (TriStorageOrder==RowMajor) ? ((Conjugate) ? 'C' : 'T') : 'N'; \
|
||||
/* Set uplo */ \
|
||||
uplo = IsLower ? 'L' : 'U'; \
|
||||
if (TriStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \
|
||||
/* Set a, lda */ \
|
||||
typedef Matrix<EIGTYPE, Dynamic, Dynamic, TriStorageOrder> MatrixTri; \
|
||||
Map<const MatrixTri, 0, OuterStride<> > tri(_tri,size,size,OuterStride<>(triStride)); \
|
||||
MatrixTri a_tmp; \
|
||||
\
|
||||
if (conjA) { \
|
||||
a_tmp = tri.conjugate(); \
|
||||
a = a_tmp.data(); \
|
||||
lda = a_tmp.outerStride(); \
|
||||
} else { \
|
||||
a = _tri; \
|
||||
lda = triStride; \
|
||||
} \
|
||||
if (IsUnitDiag) diag='U'; \
|
||||
/* call ?trsm*/ \
|
||||
MKLPREFIX##trsm(&side, &uplo, &transa, &diag, &m, &n, &alpha, (const MKLTYPE*)a, &lda, (MKLTYPE*)_other, &ldb); \
|
||||
/*std::cout << "TRMS_L specialization!\n";*/ \
|
||||
} \
|
||||
};
|
||||
|
||||
EIGEN_MKL_TRSM_R(double, double, d)
|
||||
EIGEN_MKL_TRSM_R(dcomplex, MKL_Complex16, z)
|
||||
EIGEN_MKL_TRSM_R(float, float, s)
|
||||
EIGEN_MKL_TRSM_R(scomplex, MKL_Complex8, c)
|
||||
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
#endif // EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H
|
||||
@@ -47,7 +47,7 @@ template<
|
||||
int ResStorageOrder>
|
||||
struct general_matrix_matrix_product;
|
||||
|
||||
template<typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
|
||||
template<typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version=Specialized>
|
||||
struct general_matrix_vector_product;
|
||||
|
||||
|
||||
@@ -56,11 +56,15 @@ template<bool Conjugate> struct conj_if;
|
||||
template<> struct conj_if<true> {
|
||||
template<typename T>
|
||||
inline T operator()(const T& x) { return conj(x); }
|
||||
template<typename T>
|
||||
inline T pconj(const T& x) { return internal::pconj(x); }
|
||||
};
|
||||
|
||||
template<> struct conj_if<false> {
|
||||
template<typename T>
|
||||
inline const T& operator()(const T& x) { return x; }
|
||||
template<typename T>
|
||||
inline const T& pconj(const T& x) { return x; }
|
||||
};
|
||||
|
||||
template<typename Scalar> struct conj_helper<Scalar,Scalar,false,false>
|
||||
@@ -118,11 +122,11 @@ template<typename RealScalar,bool Conj> struct conj_helper<RealScalar, std::comp
|
||||
};
|
||||
|
||||
template<typename From,typename To> struct get_factor {
|
||||
EIGEN_STRONG_INLINE static To run(const From& x) { return x; }
|
||||
static EIGEN_STRONG_INLINE To run(const From& x) { return x; }
|
||||
};
|
||||
|
||||
template<typename Scalar> struct get_factor<Scalar,typename NumTraits<Scalar>::Real> {
|
||||
EIGEN_STRONG_INLINE static typename NumTraits<Scalar>::Real run(const Scalar& x) { return real(x); }
|
||||
static EIGEN_STRONG_INLINE typename NumTraits<Scalar>::Real run(const Scalar& x) { return real(x); }
|
||||
};
|
||||
|
||||
// Lightweight helper class to access matrix coefficients.
|
||||
@@ -175,7 +179,7 @@ template<typename XprType> struct blas_traits
|
||||
ExtractType,
|
||||
typename _ExtractType::PlainObject
|
||||
>::type DirectLinearAccessType;
|
||||
static inline const ExtractType extract(const XprType& x) { return x; }
|
||||
static inline ExtractType extract(const XprType& x) { return x; }
|
||||
static inline const Scalar extractScalarFactor(const XprType&) { return Scalar(1); }
|
||||
};
|
||||
|
||||
@@ -192,7 +196,7 @@ struct blas_traits<CwiseUnaryOp<scalar_conjugate_op<Scalar>, NestedXpr> >
|
||||
IsComplex = NumTraits<Scalar>::IsComplex,
|
||||
NeedToConjugate = Base::NeedToConjugate ? 0 : IsComplex
|
||||
};
|
||||
static inline const ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
|
||||
static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
|
||||
static inline Scalar extractScalarFactor(const XprType& x) { return conj(Base::extractScalarFactor(x.nestedExpression())); }
|
||||
};
|
||||
|
||||
@@ -204,7 +208,7 @@ struct blas_traits<CwiseUnaryOp<scalar_multiple_op<Scalar>, NestedXpr> >
|
||||
typedef blas_traits<NestedXpr> Base;
|
||||
typedef CwiseUnaryOp<scalar_multiple_op<Scalar>, NestedXpr> XprType;
|
||||
typedef typename Base::ExtractType ExtractType;
|
||||
static inline const ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
|
||||
static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
|
||||
static inline Scalar extractScalarFactor(const XprType& x)
|
||||
{ return x.functor().m_other * Base::extractScalarFactor(x.nestedExpression()); }
|
||||
};
|
||||
@@ -217,7 +221,7 @@ struct blas_traits<CwiseUnaryOp<scalar_opposite_op<Scalar>, NestedXpr> >
|
||||
typedef blas_traits<NestedXpr> Base;
|
||||
typedef CwiseUnaryOp<scalar_opposite_op<Scalar>, NestedXpr> XprType;
|
||||
typedef typename Base::ExtractType ExtractType;
|
||||
static inline const ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
|
||||
static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
|
||||
static inline Scalar extractScalarFactor(const XprType& x)
|
||||
{ return - Base::extractScalarFactor(x.nestedExpression()); }
|
||||
};
|
||||
@@ -239,7 +243,7 @@ struct blas_traits<Transpose<NestedXpr> >
|
||||
enum {
|
||||
IsTransposed = Base::IsTransposed ? 0 : 1
|
||||
};
|
||||
static inline const ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
|
||||
static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
|
||||
static inline Scalar extractScalarFactor(const XprType& x) { return Base::extractScalarFactor(x.nestedExpression()); }
|
||||
};
|
||||
|
||||
@@ -252,7 +256,7 @@ template<typename T, bool HasUsableDirectAccess=blas_traits<T>::HasUsableDirectA
|
||||
struct extract_data_selector {
|
||||
static const typename T::Scalar* run(const T& m)
|
||||
{
|
||||
return const_cast<typename T::Scalar*>(&blas_traits<T>::extract(m).coeffRef(0,0)); // FIXME this should be .data()
|
||||
return blas_traits<T>::extract(m).data();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -188,7 +188,9 @@ enum {
|
||||
/** View matrix as an upper triangular matrix with zeros on the diagonal. */
|
||||
StrictlyUpper=ZeroDiag|Upper,
|
||||
/** Used in BandMatrix and SelfAdjointView to indicate that the matrix is self-adjoint. */
|
||||
SelfAdjoint=0x10
|
||||
SelfAdjoint=0x10,
|
||||
/** Used to support symmetric, non-selfadjoint, complex matrices. */
|
||||
Symmetric=0x20
|
||||
};
|
||||
|
||||
/** \ingroup enums
|
||||
@@ -200,8 +202,6 @@ enum {
|
||||
Aligned=1
|
||||
};
|
||||
|
||||
enum { ConditionalJumpCost = 5 };
|
||||
|
||||
/** \ingroup enums
|
||||
* Enum used by DenseBase::corner() in Eigen2 compatibility mode. */
|
||||
// FIXME after the corner() API change, this was not needed anymore, except by AlignedBox
|
||||
@@ -223,8 +223,6 @@ enum DirectionType {
|
||||
BothDirections
|
||||
};
|
||||
|
||||
enum ProductEvaluationMode { NormalProduct, CacheFriendlyProduct };
|
||||
|
||||
/** \internal \ingroup enums
|
||||
* Enum to specify how to traverse the entries of a matrix. */
|
||||
enum {
|
||||
@@ -257,6 +255,13 @@ enum {
|
||||
CompleteUnrolling
|
||||
};
|
||||
|
||||
/** \internal \ingroup enums
|
||||
* Enum to specify whether to use the default (built-in) implementation or the specialization. */
|
||||
enum {
|
||||
Specialized,
|
||||
BuiltIn
|
||||
};
|
||||
|
||||
/** \ingroup enums
|
||||
* Enum containing possible values for the \p _Options template parameter of
|
||||
* Matrix, Array and BandMatrix. */
|
||||
@@ -375,7 +380,7 @@ enum QRPreconditioners {
|
||||
#error The preprocessor symbol 'Success' is defined, possibly by the X11 header file X.h
|
||||
#endif
|
||||
|
||||
/** \ingroups enums
|
||||
/** \ingroup enums
|
||||
* Enum for reporting the status of a computation. */
|
||||
enum ComputationInfo {
|
||||
/** Computation was successful. */
|
||||
@@ -383,7 +388,10 @@ enum ComputationInfo {
|
||||
/** The provided data did not satisfy the prerequisites. */
|
||||
NumericalIssue = 1,
|
||||
/** Iterative procedure did not converge. */
|
||||
NoConvergence = 2
|
||||
NoConvergence = 2,
|
||||
/** The inputs are invalid, or the algorithm has been improperly called.
|
||||
* When assertions are enabled, such errors trigger an assert. */
|
||||
InvalidInput = 3
|
||||
};
|
||||
|
||||
/** \ingroup enums
|
||||
|
||||
@@ -21,15 +21,13 @@
|
||||
#elif defined __INTEL_COMPILER
|
||||
// 2196 - routine is both "inline" and "noinline" ("noinline" assumed)
|
||||
// ICC 12 generates this warning even without any inline keyword, when defining class methods 'inline' i.e. inside of class body
|
||||
// 2536 - type qualifiers are meaningless here
|
||||
// ICC 12 generates this warning when a function return type is const qualified, even if that type is a template-parameter-dependent
|
||||
// typedef that may be a reference type.
|
||||
// 279 - controlling expression is constant
|
||||
// ICC 12 generates this warning on assert(constant_expression_depending_on_template_params) and frankly this is a legitimate use case.
|
||||
#ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
|
||||
#pragma warning push
|
||||
#endif
|
||||
#pragma warning disable 2196 2536 279
|
||||
#pragma warning disable 2196 279
|
||||
#elif defined __clang__
|
||||
// -Wconstant-logical-operand - warning: use of logical && with constant operand; switch to bitwise & or remove constant
|
||||
// this is really a stupid warning as it warns on compile-time expressions involving enums
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user