mirror of
https://gitlab.com/libeigen/eigen.git
synced 2026-04-10 11:34:33 +08:00
Compare commits
344 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
21ae2afd4e | ||
|
|
171f513ecd | ||
|
|
c310bedb29 | ||
|
|
a661812ad7 | ||
|
|
f8d653d1f9 | ||
|
|
b942bb0043 | ||
|
|
f1ffadb6e0 | ||
|
|
204d1f1456 | ||
|
|
c285ed1033 | ||
|
|
818bf74b18 | ||
|
|
9d56215db8 | ||
|
|
c4ea9a916f | ||
|
|
24d56f2e0e | ||
|
|
b9a2a8d2aa | ||
|
|
5c97b48c29 | ||
|
|
a2d6c106a4 | ||
|
|
40ddac243e | ||
|
|
065c366b40 | ||
|
|
116dbf2c28 | ||
|
|
0ee9dede55 | ||
|
|
d107a371c6 | ||
|
|
a4afa90d16 | ||
|
|
e154c87504 | ||
|
|
fcc41f1b9a | ||
|
|
9a53659b08 | ||
|
|
9ccbaaf3dd | ||
|
|
1d5581ead2 | ||
|
|
3636a64667 | ||
|
|
148e579cc0 | ||
|
|
64ec5a1a6b | ||
|
|
2c932556fc | ||
|
|
bc000deaae | ||
|
|
92cd158c01 | ||
|
|
80473b48bb | ||
|
|
3b92f547f5 | ||
|
|
718e954df4 | ||
|
|
1eef23a1eb | ||
|
|
af3656d4ca | ||
|
|
7c6ed911b3 | ||
|
|
5be00b0e29 | ||
|
|
03326d9155 | ||
|
|
6111dce0e8 | ||
|
|
f98992725c | ||
|
|
c5198249a9 | ||
|
|
e6c8d0b72d | ||
|
|
caf7e6e7a7 | ||
|
|
ea7f12ebb5 | ||
|
|
a9508054c3 | ||
|
|
7f3fff3fec | ||
|
|
6ce4be6f84 | ||
|
|
ab95a8c1ef | ||
|
|
461620668c | ||
|
|
e4127b0f7d | ||
|
|
8180e13926 | ||
|
|
6eb4ce5f8e | ||
|
|
b89d81b2a8 | ||
|
|
73b1c0a660 | ||
|
|
4d05b107cf | ||
|
|
7621bbc2a5 | ||
|
|
c15d736be3 | ||
|
|
81bdde705c | ||
|
|
06fc5761fa | ||
|
|
a185bc485c | ||
|
|
96134409fc | ||
|
|
ab3fa2e123 | ||
|
|
ae6e5caa40 | ||
|
|
483beabab9 | ||
|
|
5c59564bfb | ||
|
|
1939c971a3 | ||
|
|
c2f9e6cb37 | ||
|
|
1641a6cdd5 | ||
|
|
fea50d40ea | ||
|
|
c1128efb6c | ||
|
|
20ca86888e | ||
|
|
36a1cd87d9 | ||
|
|
523e442a7b | ||
|
|
48048172e5 | ||
|
|
e9bd839b13 | ||
|
|
3df78d5afc | ||
|
|
352489edbe | ||
|
|
450c5e5d27 | ||
|
|
64cc5f8512 | ||
|
|
656712d48f | ||
|
|
971b32440c | ||
|
|
bb87f618bf | ||
|
|
2f9de52245 | ||
|
|
2136cfa17e | ||
|
|
39125654ce | ||
|
|
927d023cea | ||
|
|
1e2d2693b9 | ||
|
|
7634a44bfe | ||
|
|
2480d04ac7 | ||
|
|
c92536d926 | ||
|
|
80af7d6a47 | ||
|
|
87f9e301f9 | ||
|
|
542fb03968 | ||
|
|
f90d136c84 | ||
|
|
877a2b64c9 | ||
|
|
e6577f3c30 | ||
|
|
69e01a2999 | ||
|
|
5f71579a2d | ||
|
|
686e0749a5 | ||
|
|
385d8b5e42 | ||
|
|
4662c610c1 | ||
|
|
906a98fe39 | ||
|
|
1c4fdad7bd | ||
|
|
3f711f3356 | ||
|
|
b02ab76847 | ||
|
|
5fec52ced1 | ||
|
|
bde2bfcee8 | ||
|
|
eab7afe252 | ||
|
|
81e94eea02 | ||
|
|
a2a2c3c865 | ||
|
|
90cd199d4b | ||
|
|
b18e2d422b | ||
|
|
892c0a79ce | ||
|
|
59398aa2bb | ||
|
|
170914dbbc | ||
|
|
866d222d60 | ||
|
|
86a939451c | ||
|
|
9ff3150243 | ||
|
|
a7144f8d6a | ||
|
|
273738ba6f | ||
|
|
3fb42ff7b2 | ||
|
|
e90a14609a | ||
|
|
ece56baba0 | ||
|
|
1724dae8b8 | ||
|
|
190b46dd1f | ||
|
|
74daf12e52 | ||
|
|
c24844195d | ||
|
|
15752027ec | ||
|
|
bfc66e8b9a | ||
|
|
b60cbbef37 | ||
|
|
33b972d8b3 | ||
|
|
bb28a2aada | ||
|
|
acd0ce11aa | ||
|
|
01fb621733 | ||
|
|
71d1198ccd | ||
|
|
95ec3232c6 | ||
|
|
243249718b | ||
|
|
32a6db0f8c | ||
|
|
6fc0f2be70 | ||
|
|
70ac6c9230 | ||
|
|
609e425166 | ||
|
|
4ead16cdd6 | ||
|
|
361102f88b | ||
|
|
5d40715db6 | ||
|
|
e7c065ec71 | ||
|
|
18868228ad | ||
|
|
fbb0c510c5 | ||
|
|
a8d2459f8e | ||
|
|
9a266e5118 | ||
|
|
51e1aa1539 | ||
|
|
0137ed4f19 | ||
|
|
9d03711df8 | ||
|
|
1ca9072b51 | ||
|
|
9fd138e2b3 | ||
|
|
55fbf4fedd | ||
|
|
b87875abf8 | ||
|
|
ac2c97edff | ||
|
|
292dea7922 | ||
|
|
070b5958e0 | ||
|
|
3108fbf767 | ||
|
|
9df7f3d8e9 | ||
|
|
782fd81dee | ||
|
|
fa77d71335 | ||
|
|
3d1795da28 | ||
|
|
d1c2d6683c | ||
|
|
d8cf158e06 | ||
|
|
bc837b7975 | ||
|
|
68e8f2b833 | ||
|
|
3dc3a0ea2d | ||
|
|
79120a4c63 | ||
|
|
e0412f18fd | ||
|
|
40b0c43bda | ||
|
|
72f3e20e74 | ||
|
|
676a7a3271 | ||
|
|
f843239452 | ||
|
|
a4ab0c6b6a | ||
|
|
ef955ea8e5 | ||
|
|
8bd392ca0e | ||
|
|
8d2ac85797 | ||
|
|
6d6e5fcd43 | ||
|
|
9c9e90f6db | ||
|
|
7ffa27f347 | ||
|
|
c20043c8fd | ||
|
|
d18877f18d | ||
|
|
02c0cef97f | ||
|
|
c8e663fe87 | ||
|
|
7a875acfb0 | ||
|
|
3ec11d8f17 | ||
|
|
ec067ac5e3 | ||
|
|
316969d839 | ||
|
|
7a0a9581b5 | ||
|
|
8880be60fa | ||
|
|
e41713d52e | ||
|
|
b69e465d7a | ||
|
|
0db83fc571 | ||
|
|
1ac703f641 | ||
|
|
2c32368642 | ||
|
|
db40309e70 | ||
|
|
e36c1f7501 | ||
|
|
3aef5c1a2f | ||
|
|
ab6bb89980 | ||
|
|
983ace99d4 | ||
|
|
72fa6775e8 | ||
|
|
9f25cdf4f6 | ||
|
|
6e5edd68d3 | ||
|
|
e8978ffa99 | ||
|
|
c753fe7cc3 | ||
|
|
e59e345720 | ||
|
|
07c2244440 | ||
|
|
1865dccd58 | ||
|
|
f2e6ee9687 | ||
|
|
9219307e13 | ||
|
|
f2e8f96151 | ||
|
|
faf8af25ed | ||
|
|
106ba41c2a | ||
|
|
87939ea0dd | ||
|
|
e813640aa1 | ||
|
|
612b8f2749 | ||
|
|
ead8e1b796 | ||
|
|
3d4265f2d5 | ||
|
|
d66586ac90 | ||
|
|
44920624fb | ||
|
|
208058b9ad | ||
|
|
b4218b8473 | ||
|
|
3c2f0812f6 | ||
|
|
17bbd82f7d | ||
|
|
e1385337ff | ||
|
|
d367ecb475 | ||
|
|
c3b658b2c9 | ||
|
|
f9d655a8c8 | ||
|
|
ad3e4d1a49 | ||
|
|
222ed66f79 | ||
|
|
6bceebfabf | ||
|
|
2ca3eb8407 | ||
|
|
698205cddf | ||
|
|
2ecb33820f | ||
|
|
a0de6eb4ce | ||
|
|
7962ac1a58 | ||
|
|
9c97b053f3 | ||
|
|
f61b0d56f0 | ||
|
|
5087e016eb | ||
|
|
fa9f5d7170 | ||
|
|
6975534cb2 | ||
|
|
95c6d8db75 | ||
|
|
e0548e9ff3 | ||
|
|
c289ef20f3 | ||
|
|
b8cf157e8c | ||
|
|
b4d2b404b0 | ||
|
|
70fcaf9bd8 | ||
|
|
2f31c6b1d8 | ||
|
|
9e55467b4c | ||
|
|
35bf99c63e | ||
|
|
f9b8729597 | ||
|
|
4b2e7f26aa | ||
|
|
5202bc92e6 | ||
|
|
9d83411cc4 | ||
|
|
556c03a09d | ||
|
|
ce463b9fa4 | ||
|
|
477d1e8192 | ||
|
|
0eaff8fdf2 | ||
|
|
582c96691b | ||
|
|
0b22158d9f | ||
|
|
dafdb0d8a8 | ||
|
|
1d1686c62b | ||
|
|
ad95b924d0 | ||
|
|
9499684320 | ||
|
|
5b6a31626b | ||
|
|
bc3fee2d8e | ||
|
|
eaa9223277 | ||
|
|
c9ba1165e7 | ||
|
|
dd2d5d67ff | ||
|
|
404322b64f | ||
|
|
ce37bae2cd | ||
|
|
3900dbc341 | ||
|
|
5f586c2bd0 | ||
|
|
215f88a417 | ||
|
|
2257f40f4a | ||
|
|
9e0fa0ef6d | ||
|
|
0fddbf3dc7 | ||
|
|
eda635bd58 | ||
|
|
26197bb467 | ||
|
|
772e59d475 | ||
|
|
e8f83cbb5d | ||
|
|
dce584d799 | ||
|
|
0bcef9557d | ||
|
|
2b3c876b2a | ||
|
|
a05f6aad0e | ||
|
|
59187285e1 | ||
|
|
1dd074ea7e | ||
|
|
24fa7a01bd | ||
|
|
e236d3443c | ||
|
|
4ec8833220 | ||
|
|
dd3685cc6a | ||
|
|
487a6e6515 | ||
|
|
75f0b8aae3 | ||
|
|
23aca8a586 | ||
|
|
28bf2bf070 | ||
|
|
0164f4c682 | ||
|
|
bbff608a42 | ||
|
|
ea56d2ff2c | ||
|
|
a4c8701e9a | ||
|
|
a9bb9796e0 | ||
|
|
449883be74 | ||
|
|
0a08d4c60b | ||
|
|
4086187e49 | ||
|
|
91864f85d3 | ||
|
|
c3597106ab | ||
|
|
aed1d6597f | ||
|
|
b6f04a2dd4 | ||
|
|
a9aa3bcf50 | ||
|
|
32b8da66e3 | ||
|
|
eb94179ea3 | ||
|
|
52a7386aef | ||
|
|
8cada1d894 | ||
|
|
6e4a664c42 | ||
|
|
1cd1a96d56 | ||
|
|
86ab00cdcf | ||
|
|
65f09be8d2 | ||
|
|
400d756b82 | ||
|
|
9d31798a84 | ||
|
|
723ed92e0e | ||
|
|
0a7de0b273 | ||
|
|
d6b9bc1ccd | ||
|
|
0eff51e2ed | ||
|
|
1b7dd46d94 | ||
|
|
b2eb1bf3dc | ||
|
|
fe48c25682 | ||
|
|
0ba6da3470 | ||
|
|
a287140f72 | ||
|
|
4d89ec8a00 | ||
|
|
441760f239 | ||
|
|
664162fb8a | ||
|
|
aa3c761002 | ||
|
|
94f2cfc9c7 | ||
|
|
4a13d79df6 | ||
|
|
463176cc44 | ||
|
|
5aab97fba6 | ||
|
|
89abc6806d | ||
|
|
baf793ebaa | ||
|
|
b4ddafcfac | ||
|
|
1079967710 |
157
CMakeLists.txt
157
CMakeLists.txt
@@ -41,10 +41,13 @@ string(REGEX MATCH "define[ \t]+EIGEN_MINOR_VERSION[ \t]+([0-9]+)" _eigen_minor_
|
||||
set(EIGEN_MINOR_VERSION "${CMAKE_MATCH_1}")
|
||||
set(EIGEN_VERSION_NUMBER ${EIGEN_WORLD_VERSION}.${EIGEN_MAJOR_VERSION}.${EIGEN_MINOR_VERSION})
|
||||
|
||||
# if the mercurial program is absent, this will leave the EIGEN_HG_CHANGESET string empty,
|
||||
# but won't stop CMake.
|
||||
execute_process(COMMAND hg tip -R ${CMAKE_SOURCE_DIR} OUTPUT_VARIABLE EIGEN_HGTIP_OUTPUT)
|
||||
execute_process(COMMAND hg branch -R ${CMAKE_SOURCE_DIR} OUTPUT_VARIABLE EIGEN_BRANCH_OUTPUT)
|
||||
# if we are not in a mercurial clone
|
||||
if(IS_DIRECTORY ${CMAKE_SOURCE_DIR}/.hg)
|
||||
# if the mercurial program is absent or this will leave the EIGEN_HG_CHANGESET string empty,
|
||||
# but won't stop CMake.
|
||||
execute_process(COMMAND hg tip -R ${CMAKE_SOURCE_DIR} OUTPUT_VARIABLE EIGEN_HGTIP_OUTPUT)
|
||||
execute_process(COMMAND hg branch -R ${CMAKE_SOURCE_DIR} OUTPUT_VARIABLE EIGEN_BRANCH_OUTPUT)
|
||||
endif()
|
||||
|
||||
# if this is the default (aka development) branch, extract the mercurial changeset number from the hg tip output...
|
||||
if(EIGEN_BRANCH_OUTPUT MATCHES "default")
|
||||
@@ -64,6 +67,33 @@ include(GNUInstallDirs)
|
||||
|
||||
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
|
||||
|
||||
|
||||
option(EIGEN_TEST_CXX11 "Enable testing with C++11 and C++11 features (e.g. Tensor module)." OFF)
|
||||
|
||||
|
||||
macro(ei_add_cxx_compiler_flag FLAG)
|
||||
string(REGEX REPLACE "-" "" SFLAG1 ${FLAG})
|
||||
string(REGEX REPLACE "\\+" "p" SFLAG ${SFLAG1})
|
||||
check_cxx_compiler_flag(${FLAG} COMPILER_SUPPORT_${SFLAG})
|
||||
if(COMPILER_SUPPORT_${SFLAG})
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAG}")
|
||||
endif()
|
||||
endmacro(ei_add_cxx_compiler_flag)
|
||||
|
||||
check_cxx_compiler_flag("-std=c++11" EIGEN_COMPILER_SUPPORT_CPP11)
|
||||
|
||||
if(EIGEN_TEST_CXX11)
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
if(EIGEN_COMPILER_SUPPORT_CPP11)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
|
||||
endif()
|
||||
else()
|
||||
#set(CMAKE_CXX_STANDARD 03)
|
||||
#set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
ei_add_cxx_compiler_flag("-std=c++03")
|
||||
endif()
|
||||
|
||||
#############################################################################
|
||||
# find how to link to the standard libraries #
|
||||
#############################################################################
|
||||
@@ -115,15 +145,6 @@ endif()
|
||||
|
||||
set(EIGEN_TEST_MAX_SIZE "320" CACHE STRING "Maximal matrix/vector size, default is 320")
|
||||
|
||||
macro(ei_add_cxx_compiler_flag FLAG)
|
||||
string(REGEX REPLACE "-" "" SFLAG1 ${FLAG})
|
||||
string(REGEX REPLACE "\\+" "p" SFLAG ${SFLAG1})
|
||||
check_cxx_compiler_flag(${FLAG} COMPILER_SUPPORT_${SFLAG})
|
||||
if(COMPILER_SUPPORT_${SFLAG})
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAG}")
|
||||
endif()
|
||||
endmacro(ei_add_cxx_compiler_flag)
|
||||
|
||||
if(NOT MSVC)
|
||||
# We assume that other compilers are partly compatible with GNUCC
|
||||
|
||||
@@ -359,8 +380,6 @@ if(EIGEN_TEST_NO_EXCEPTIONS)
|
||||
message(STATUS "Disabling exceptions in tests/examples")
|
||||
endif()
|
||||
|
||||
option(EIGEN_TEST_CXX11 "Enable testing with C++11 and C++11 features (e.g. Tensor module)." OFF)
|
||||
|
||||
set(EIGEN_CUDA_COMPUTE_ARCH 30 CACHE STRING "The CUDA compute architecture level to target when compiling CUDA code")
|
||||
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
|
||||
@@ -380,7 +399,7 @@ else()
|
||||
)
|
||||
endif()
|
||||
set(CMAKEPACKAGE_INSTALL_DIR
|
||||
"${CMAKE_INSTALL_LIBDIR}/cmake/eigen3"
|
||||
"${CMAKE_INSTALL_DATADIR}/eigen3/cmake"
|
||||
CACHE PATH "The directory relative to CMAKE_PREFIX_PATH where Eigen3Config.cmake is installed"
|
||||
)
|
||||
set(PKGCONFIG_INSTALL_DIR
|
||||
@@ -416,16 +435,15 @@ add_subdirectory(Eigen)
|
||||
|
||||
add_subdirectory(doc EXCLUDE_FROM_ALL)
|
||||
|
||||
include(EigenConfigureTesting)
|
||||
option(BUILD_TESTING "Enable creation of Eigen tests." ON)
|
||||
if(BUILD_TESTING)
|
||||
include(EigenConfigureTesting)
|
||||
|
||||
# fixme, not sure this line is still needed:
|
||||
enable_testing() # must be called from the root CMakeLists, see man page
|
||||
|
||||
|
||||
if(EIGEN_LEAVE_TEST_IN_ALL_TARGET)
|
||||
add_subdirectory(test) # can't do EXCLUDE_FROM_ALL here, breaks CTest
|
||||
else()
|
||||
add_subdirectory(test EXCLUDE_FROM_ALL)
|
||||
if(EIGEN_LEAVE_TEST_IN_ALL_TARGET)
|
||||
add_subdirectory(test) # can't do EXCLUDE_FROM_ALL here, breaks CTest
|
||||
else()
|
||||
add_subdirectory(test EXCLUDE_FROM_ALL)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(EIGEN_LEAVE_TEST_IN_ALL_TARGET)
|
||||
@@ -461,7 +479,9 @@ endif(NOT WIN32)
|
||||
|
||||
configure_file(scripts/cdashtesting.cmake.in cdashtesting.cmake @ONLY)
|
||||
|
||||
ei_testing_print_summary()
|
||||
if(BUILD_TESTING)
|
||||
ei_testing_print_summary()
|
||||
endif()
|
||||
|
||||
message(STATUS "")
|
||||
message(STATUS "Configured Eigen ${EIGEN_VERSION_NUMBER}")
|
||||
@@ -507,18 +527,89 @@ set ( EIGEN_VERSION_MINOR ${EIGEN_MAJOR_VERSION} )
|
||||
set ( EIGEN_VERSION_PATCH ${EIGEN_MINOR_VERSION} )
|
||||
set ( EIGEN_DEFINITIONS "")
|
||||
set ( EIGEN_INCLUDE_DIR "${CMAKE_INSTALL_PREFIX}/${INCLUDE_INSTALL_DIR}" )
|
||||
set ( EIGEN_INCLUDE_DIRS ${EIGEN_INCLUDE_DIR} )
|
||||
set ( EIGEN_ROOT_DIR ${CMAKE_INSTALL_PREFIX} )
|
||||
|
||||
configure_file ( ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Eigen3Config.cmake.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
|
||||
@ONLY ESCAPE_QUOTES
|
||||
)
|
||||
# Interface libraries require at least CMake 3.0
|
||||
if (NOT CMAKE_VERSION VERSION_LESS 3.0)
|
||||
include (CMakePackageConfigHelpers)
|
||||
|
||||
# Imported target support
|
||||
add_library (eigen INTERFACE)
|
||||
|
||||
target_compile_definitions (eigen INTERFACE ${EIGEN_DEFINITIONS})
|
||||
target_include_directories (eigen INTERFACE
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
|
||||
$<INSTALL_INTERFACE:${INCLUDE_INSTALL_DIR}>
|
||||
)
|
||||
|
||||
# Export as title case Eigen
|
||||
set_target_properties (eigen PROPERTIES EXPORT_NAME Eigen)
|
||||
|
||||
install (TARGETS eigen EXPORT Eigen3Targets)
|
||||
|
||||
configure_package_config_file (
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmake/Eigen3Config.cmake.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
|
||||
PATH_VARS EIGEN_INCLUDE_DIR EIGEN_ROOT_DIR
|
||||
INSTALL_DESTINATION ${CMAKEPACKAGE_INSTALL_DIR}
|
||||
NO_CHECK_REQUIRED_COMPONENTS_MACRO # Eigen does not provide components
|
||||
)
|
||||
# Remove CMAKE_SIZEOF_VOID_P from Eigen3ConfigVersion.cmake since Eigen does
|
||||
# not depend on architecture specific settings or libraries. More
|
||||
# specifically, an Eigen3Config.cmake generated from a 64 bit target can be
|
||||
# used for 32 bit targets as well (and vice versa).
|
||||
set (_Eigen3_CMAKE_SIZEOF_VOID_P ${CMAKE_SIZEOF_VOID_P})
|
||||
unset (CMAKE_SIZEOF_VOID_P)
|
||||
write_basic_package_version_file (Eigen3ConfigVersion.cmake
|
||||
VERSION ${EIGEN_VERSION_NUMBER}
|
||||
COMPATIBILITY SameMajorVersion)
|
||||
set (CMAKE_SIZEOF_VOID_P ${_Eigen3_CMAKE_SIZEOF_VOID_P})
|
||||
|
||||
# The Eigen target will be located in the Eigen3 namespace. Other CMake
|
||||
# targets can refer to it using Eigen3::Eigen.
|
||||
export (TARGETS eigen NAMESPACE Eigen3:: FILE Eigen3Targets.cmake)
|
||||
# Export Eigen3 package to CMake registry such that it can be easily found by
|
||||
# CMake even if it has not been installed to a standard directory.
|
||||
export (PACKAGE Eigen3)
|
||||
|
||||
install (EXPORT Eigen3Targets NAMESPACE Eigen3:: DESTINATION ${CMAKEPACKAGE_INSTALL_DIR})
|
||||
|
||||
else (NOT CMAKE_VERSION VERSION_LESS 3.0)
|
||||
# Fallback to legacy Eigen3Config.cmake without the imported target
|
||||
|
||||
# If CMakePackageConfigHelpers module is available (CMake >= 2.8.8)
|
||||
# create a relocatable Config file, otherwise leave the hardcoded paths
|
||||
include(CMakePackageConfigHelpers OPTIONAL RESULT_VARIABLE CPCH_PATH)
|
||||
|
||||
if(CPCH_PATH)
|
||||
configure_package_config_file (
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmake/Eigen3ConfigLegacy.cmake.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
|
||||
PATH_VARS EIGEN_INCLUDE_DIR EIGEN_ROOT_DIR
|
||||
INSTALL_DESTINATION ${CMAKEPACKAGE_INSTALL_DIR}
|
||||
NO_CHECK_REQUIRED_COMPONENTS_MACRO # Eigen does not provide components
|
||||
)
|
||||
else()
|
||||
# The PACKAGE_* variables are defined by the configure_package_config_file
|
||||
# but without it we define them manually to the hardcoded paths
|
||||
set(PACKAGE_INIT "")
|
||||
set(PACKAGE_EIGEN_INCLUDE_DIR ${EIGEN_INCLUDE_DIR})
|
||||
set(PACKAGE_EIGEN_ROOT_DIR ${EIGEN_ROOT_DIR})
|
||||
configure_file ( ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Eigen3ConfigLegacy.cmake.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
|
||||
@ONLY ESCAPE_QUOTES )
|
||||
endif()
|
||||
|
||||
write_basic_package_version_file( Eigen3ConfigVersion.cmake
|
||||
VERSION ${EIGEN_VERSION_NUMBER}
|
||||
COMPATIBILITY SameMajorVersion )
|
||||
|
||||
endif (NOT CMAKE_VERSION VERSION_LESS 3.0)
|
||||
|
||||
install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/cmake/UseEigen3.cmake
|
||||
${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
|
||||
DESTINATION ${CMAKEPACKAGE_INSTALL_DIR}
|
||||
)
|
||||
${CMAKE_CURRENT_BINARY_DIR}/Eigen3ConfigVersion.cmake
|
||||
DESTINATION ${CMAKEPACKAGE_INSTALL_DIR} )
|
||||
|
||||
# Add uninstall target
|
||||
add_custom_target ( uninstall
|
||||
|
||||
@@ -4,14 +4,10 @@
|
||||
## # The following are required to uses Dart and the Cdash dashboard
|
||||
## ENABLE_TESTING()
|
||||
## INCLUDE(CTest)
|
||||
set(CTEST_PROJECT_NAME "Eigen")
|
||||
set(CTEST_PROJECT_NAME "Eigen 3.3")
|
||||
set(CTEST_NIGHTLY_START_TIME "00:00:00 UTC")
|
||||
|
||||
set(CTEST_DROP_METHOD "http")
|
||||
set(CTEST_DROP_SITE "manao.inria.fr")
|
||||
set(CTEST_DROP_LOCATION "/CDash/submit.php?project=Eigen")
|
||||
set(CTEST_DROP_LOCATION "/CDash/submit.php?project=Eigen+3.3")
|
||||
set(CTEST_DROP_SITE_CDASH TRUE)
|
||||
set(CTEST_PROJECT_SUBPROJECTS
|
||||
Official
|
||||
Unsupported
|
||||
)
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
|
||||
set(CTEST_CUSTOM_MAXIMUM_NUMBER_OF_WARNINGS "2000")
|
||||
set(CTEST_CUSTOM_MAXIMUM_NUMBER_OF_ERRORS "2000")
|
||||
list(APPEND CTEST_CUSTOM_ERROR_EXCEPTION @EIGEN_CTEST_ERROR_EXCEPTION@)
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
#define EIGEN_CHOLESKY_MODULE_H
|
||||
|
||||
#include "Core"
|
||||
#include "Jacobi"
|
||||
|
||||
#include "src/Core/util/DisableStupidWarnings.h"
|
||||
|
||||
@@ -31,7 +32,11 @@
|
||||
#include "src/Cholesky/LLT.h"
|
||||
#include "src/Cholesky/LDLT.h"
|
||||
#ifdef EIGEN_USE_LAPACKE
|
||||
#ifdef EIGEN_USE_MKL
|
||||
#include "mkl_lapacke.h"
|
||||
#else
|
||||
#include "src/misc/lapacke.h"
|
||||
#endif
|
||||
#include "src/Cholesky/LLT_LAPACKE.h"
|
||||
#endif
|
||||
|
||||
|
||||
33
Eigen/Core
33
Eigen/Core
@@ -14,6 +14,22 @@
|
||||
// first thing Eigen does: stop the compiler from committing suicide
|
||||
#include "src/Core/util/DisableStupidWarnings.h"
|
||||
|
||||
#if defined(__CUDACC__) && !defined(EIGEN_NO_CUDA)
|
||||
#define EIGEN_CUDACC __CUDACC__
|
||||
#endif
|
||||
|
||||
#if defined(__CUDA_ARCH__) && !defined(EIGEN_NO_CUDA)
|
||||
#define EIGEN_CUDA_ARCH __CUDA_ARCH__
|
||||
#endif
|
||||
|
||||
#if defined(__CUDACC_VER_MAJOR__) && (__CUDACC_VER_MAJOR__ >= 9)
|
||||
#define EIGEN_CUDACC_VER ((__CUDACC_VER_MAJOR__ * 10000) + (__CUDACC_VER_MINOR__ * 100))
|
||||
#elif defined(__CUDACC_VER__)
|
||||
#define EIGEN_CUDACC_VER __CUDACC_VER__
|
||||
#else
|
||||
#define EIGEN_CUDACC_VER 0
|
||||
#endif
|
||||
|
||||
// Handle NVCC/CUDA/SYCL
|
||||
#if defined(__CUDACC__) || defined(__SYCL_DEVICE_ONLY__)
|
||||
// Do not try asserts on CUDA and SYCL!
|
||||
@@ -37,9 +53,9 @@
|
||||
#endif
|
||||
|
||||
#define EIGEN_DEVICE_FUNC __host__ __device__
|
||||
// We need math_functions.hpp to ensure that that EIGEN_USING_STD_MATH macro
|
||||
// We need cuda_runtime.h to ensure that that EIGEN_USING_STD_MATH macro
|
||||
// works properly on the device side
|
||||
#include <math_functions.hpp>
|
||||
#include <cuda_runtime.h>
|
||||
#else
|
||||
#define EIGEN_DEVICE_FUNC
|
||||
#endif
|
||||
@@ -155,6 +171,9 @@
|
||||
#ifdef __AVX512DQ__
|
||||
#define EIGEN_VECTORIZE_AVX512DQ
|
||||
#endif
|
||||
#ifdef __AVX512ER__
|
||||
#define EIGEN_VECTORIZE_AVX512ER
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// include files
|
||||
@@ -229,7 +248,7 @@
|
||||
#if defined __CUDACC__
|
||||
#define EIGEN_VECTORIZE_CUDA
|
||||
#include <vector_types.h>
|
||||
#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
|
||||
#if EIGEN_CUDACC_VER >= 70500
|
||||
#define EIGEN_HAS_CUDA_FP16
|
||||
#endif
|
||||
#endif
|
||||
@@ -321,12 +340,16 @@ inline static const char *SimdInstructionSetsInUse(void) {
|
||||
#error Eigen2-support is only available up to version 3.2. Please go to "http://eigen.tuxfamily.org/index.php?title=Eigen2" for further information
|
||||
#endif
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
// we use size_t frequently and we'll never remember to prepend it with std:: everytime just to
|
||||
// ensure QNX/QCC support
|
||||
using std::size_t;
|
||||
// gcc 4.6.0 wants std:: for ptrdiff_t
|
||||
using std::ptrdiff_t;
|
||||
|
||||
}
|
||||
|
||||
/** \defgroup Core_Module Core module
|
||||
* This is the main module of Eigen providing dense matrix and vector support
|
||||
* (both fixed and dynamic size) with all the features corresponding to a BLAS library
|
||||
@@ -348,6 +371,7 @@ using std::ptrdiff_t;
|
||||
#include "src/Core/MathFunctions.h"
|
||||
#include "src/Core/GenericPacketMath.h"
|
||||
#include "src/Core/MathFunctionsImpl.h"
|
||||
#include "src/Core/arch/Default/ConjHelper.h"
|
||||
|
||||
#if defined EIGEN_VECTORIZE_AVX512
|
||||
#include "src/Core/arch/SSE/PacketMath.h"
|
||||
@@ -363,6 +387,7 @@ using std::ptrdiff_t;
|
||||
#include "src/Core/arch/AVX/MathFunctions.h"
|
||||
#include "src/Core/arch/AVX/Complex.h"
|
||||
#include "src/Core/arch/AVX/TypeCasting.h"
|
||||
#include "src/Core/arch/SSE/TypeCasting.h"
|
||||
#elif defined EIGEN_VECTORIZE_SSE
|
||||
#include "src/Core/arch/SSE/PacketMath.h"
|
||||
#include "src/Core/arch/SSE/MathFunctions.h"
|
||||
@@ -405,6 +430,7 @@ using std::ptrdiff_t;
|
||||
// on CUDA devices
|
||||
#include "src/Core/arch/CUDA/Complex.h"
|
||||
|
||||
#include "src/Core/IO.h"
|
||||
#include "src/Core/DenseCoeffsBase.h"
|
||||
#include "src/Core/DenseBase.h"
|
||||
#include "src/Core/MatrixBase.h"
|
||||
@@ -452,7 +478,6 @@ using std::ptrdiff_t;
|
||||
#include "src/Core/Redux.h"
|
||||
#include "src/Core/Visitor.h"
|
||||
#include "src/Core/Fuzzy.h"
|
||||
#include "src/Core/IO.h"
|
||||
#include "src/Core/Swap.h"
|
||||
#include "src/Core/CommaInitializer.h"
|
||||
#include "src/Core/GeneralProduct.h"
|
||||
|
||||
@@ -45,7 +45,11 @@
|
||||
#include "src/Eigenvalues/GeneralizedEigenSolver.h"
|
||||
#include "src/Eigenvalues/MatrixBaseEigenvalues.h"
|
||||
#ifdef EIGEN_USE_LAPACKE
|
||||
#ifdef EIGEN_USE_MKL
|
||||
#include "mkl_lapacke.h"
|
||||
#else
|
||||
#include "src/misc/lapacke.h"
|
||||
#endif
|
||||
#include "src/Eigenvalues/RealSchur_LAPACKE.h"
|
||||
#include "src/Eigenvalues/ComplexSchur_LAPACKE.h"
|
||||
#include "src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h"
|
||||
|
||||
4
Eigen/LU
4
Eigen/LU
@@ -28,7 +28,11 @@
|
||||
#include "src/LU/FullPivLU.h"
|
||||
#include "src/LU/PartialPivLU.h"
|
||||
#ifdef EIGEN_USE_LAPACKE
|
||||
#ifdef EIGEN_USE_MKL
|
||||
#include "mkl_lapacke.h"
|
||||
#else
|
||||
#include "src/misc/lapacke.h"
|
||||
#endif
|
||||
#include "src/LU/PartialPivLU_LAPACKE.h"
|
||||
#endif
|
||||
#include "src/LU/Determinant.h"
|
||||
|
||||
4
Eigen/QR
4
Eigen/QR
@@ -36,7 +36,11 @@
|
||||
#include "src/QR/ColPivHouseholderQR.h"
|
||||
#include "src/QR/CompleteOrthogonalDecomposition.h"
|
||||
#ifdef EIGEN_USE_LAPACKE
|
||||
#ifdef EIGEN_USE_MKL
|
||||
#include "mkl_lapacke.h"
|
||||
#else
|
||||
#include "src/misc/lapacke.h"
|
||||
#endif
|
||||
#include "src/QR/HouseholderQR_LAPACKE.h"
|
||||
#include "src/QR/ColPivHouseholderQR_LAPACKE.h"
|
||||
#endif
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
#include "src/Core/util/DisableStupidWarnings.h"
|
||||
|
||||
void *qMalloc(size_t size)
|
||||
void *qMalloc(std::size_t size)
|
||||
{
|
||||
return Eigen::internal::aligned_malloc(size);
|
||||
}
|
||||
@@ -24,10 +24,10 @@ void qFree(void *ptr)
|
||||
Eigen::internal::aligned_free(ptr);
|
||||
}
|
||||
|
||||
void *qRealloc(void *ptr, size_t size)
|
||||
void *qRealloc(void *ptr, std::size_t size)
|
||||
{
|
||||
void* newPtr = Eigen::internal::aligned_malloc(size);
|
||||
memcpy(newPtr, ptr, size);
|
||||
std::memcpy(newPtr, ptr, size);
|
||||
Eigen::internal::aligned_free(ptr);
|
||||
return newPtr;
|
||||
}
|
||||
|
||||
@@ -37,7 +37,11 @@
|
||||
#include "src/SVD/JacobiSVD.h"
|
||||
#include "src/SVD/BDCSVD.h"
|
||||
#if defined(EIGEN_USE_LAPACKE) && !defined(EIGEN_USE_LAPACKE_STRICT)
|
||||
#ifdef EIGEN_USE_MKL
|
||||
#include "mkl_lapacke.h"
|
||||
#else
|
||||
#include "src/misc/lapacke.h"
|
||||
#endif
|
||||
#include "src/SVD/JacobiSVD_LAPACKE.h"
|
||||
#endif
|
||||
|
||||
|
||||
@@ -25,7 +25,9 @@
|
||||
|
||||
#include "SparseCore"
|
||||
#include "OrderingMethods"
|
||||
#ifndef EIGEN_MPL2_ONLY
|
||||
#include "SparseCholesky"
|
||||
#endif
|
||||
#include "SparseLU"
|
||||
#include "SparseQR"
|
||||
#include "IterativeLinearSolvers"
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
#include "Core"
|
||||
#include <deque>
|
||||
|
||||
#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 /* MSVC auto aligns in 64 bit builds */
|
||||
#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 && (EIGEN_MAX_STATIC_ALIGN_BYTES<=16) /* MSVC auto aligns up to 16 bytes in 64 bit builds */
|
||||
|
||||
#define EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(...)
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
#include "Core"
|
||||
#include <list>
|
||||
|
||||
#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 /* MSVC auto aligns in 64 bit builds */
|
||||
#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 && (EIGEN_MAX_STATIC_ALIGN_BYTES<=16) /* MSVC auto aligns up to 16 bytes in 64 bit builds */
|
||||
|
||||
#define EIGEN_DEFINE_STL_LIST_SPECIALIZATION(...)
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
#include "Core"
|
||||
#include <vector>
|
||||
|
||||
#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 /* MSVC auto aligns in 64 bit builds */
|
||||
#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 && (EIGEN_MAX_STATIC_ALIGN_BYTES<=16) /* MSVC auto aligns up to 16 bytes in 64 bit builds */
|
||||
|
||||
#define EIGEN_DEFINE_STL_VECTOR_SPECIALIZATION(...)
|
||||
|
||||
|
||||
@@ -248,7 +248,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
|
||||
/** \brief Reports whether previous computation was successful.
|
||||
*
|
||||
* \returns \c Success if computation was succesful,
|
||||
* \c NumericalIssue if the matrix.appears to be negative.
|
||||
* \c NumericalIssue if the factorization failed because of a zero pivot.
|
||||
*/
|
||||
ComputationInfo info() const
|
||||
{
|
||||
@@ -305,7 +305,8 @@ template<> struct ldlt_inplace<Lower>
|
||||
if (size <= 1)
|
||||
{
|
||||
transpositions.setIdentity();
|
||||
if (numext::real(mat.coeff(0,0)) > static_cast<RealScalar>(0) ) sign = PositiveSemiDef;
|
||||
if(size==0) sign = ZeroSign;
|
||||
else if (numext::real(mat.coeff(0,0)) > static_cast<RealScalar>(0) ) sign = PositiveSemiDef;
|
||||
else if (numext::real(mat.coeff(0,0)) < static_cast<RealScalar>(0)) sign = NegativeSemiDef;
|
||||
else sign = ZeroSign;
|
||||
return true;
|
||||
@@ -376,6 +377,8 @@ template<> struct ldlt_inplace<Lower>
|
||||
|
||||
if((rs>0) && pivot_is_valid)
|
||||
A21 /= realAkk;
|
||||
else if(rs>0)
|
||||
ret = ret && (A21.array()==Scalar(0)).all();
|
||||
|
||||
if(found_zero_pivot && pivot_is_valid) ret = false; // factorization failed
|
||||
else if(!pivot_is_valid) found_zero_pivot = true;
|
||||
@@ -568,13 +571,14 @@ void LDLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) cons
|
||||
// more precisely, use pseudo-inverse of D (see bug 241)
|
||||
using std::abs;
|
||||
const typename Diagonal<const MatrixType>::RealReturnType vecD(vectorD());
|
||||
// In some previous versions, tolerance was set to the max of 1/highest and the maximal diagonal entry * epsilon
|
||||
// as motivated by LAPACK's xGELSS:
|
||||
// In some previous versions, tolerance was set to the max of 1/highest (or rather numeric_limits::min())
|
||||
// and the maximal diagonal entry * epsilon as motivated by LAPACK's xGELSS:
|
||||
// RealScalar tolerance = numext::maxi(vecD.array().abs().maxCoeff() * NumTraits<RealScalar>::epsilon(),RealScalar(1) / NumTraits<RealScalar>::highest());
|
||||
// However, LDLT is not rank revealing, and so adjusting the tolerance wrt to the highest
|
||||
// diagonal element is not well justified and leads to numerical issues in some cases.
|
||||
// Moreover, Lapack's xSYTRS routines use 0 for the tolerance.
|
||||
RealScalar tolerance = RealScalar(1) / NumTraits<RealScalar>::highest();
|
||||
// Using numeric_limits::min() gives us more robustness to denormals.
|
||||
RealScalar tolerance = (std::numeric_limits<RealScalar>::min)();
|
||||
|
||||
for (Index i = 0; i < vecD.size(); ++i)
|
||||
{
|
||||
|
||||
@@ -24,7 +24,7 @@ template<typename MatrixType, int UpLo> struct LLT_Traits;
|
||||
*
|
||||
* \tparam _MatrixType the type of the matrix of which we are computing the LL^T Cholesky decomposition
|
||||
* \tparam _UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.
|
||||
* The other triangular part won't be read.
|
||||
* The other triangular part won't be read.
|
||||
*
|
||||
* This class performs a LL^T Cholesky decomposition of a symmetric, positive definite
|
||||
* matrix A such that A = LL^* = U^*U, where L is lower triangular.
|
||||
@@ -41,14 +41,18 @@ template<typename MatrixType, int UpLo> struct LLT_Traits;
|
||||
* Example: \include LLT_example.cpp
|
||||
* Output: \verbinclude LLT_example.out
|
||||
*
|
||||
* \b Performance: for best performance, it is recommended to use a column-major storage format
|
||||
* with the Lower triangular part (the default), or, equivalently, a row-major storage format
|
||||
* with the Upper triangular part. Otherwise, you might get a 20% slowdown for the full factorization
|
||||
* step, and rank-updates can be up to 3 times slower.
|
||||
*
|
||||
* This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism.
|
||||
*
|
||||
* Note that during the decomposition, only the lower (or upper, as defined by _UpLo) triangular part of A is considered.
|
||||
* Therefore, the strict lower part does not have to store correct values.
|
||||
*
|
||||
* \sa MatrixBase::llt(), SelfAdjointView::llt(), class LDLT
|
||||
*/
|
||||
/* HEY THIS DOX IS DISABLED BECAUSE THERE's A BUG EITHER HERE OR IN LDLT ABOUT THAT (OR BOTH)
|
||||
* Note that during the decomposition, only the upper triangular part of A is considered. Therefore,
|
||||
* the strict lower part does not have to store correct values.
|
||||
*/
|
||||
template<typename _MatrixType, int _UpLo> class LLT
|
||||
{
|
||||
public:
|
||||
@@ -146,7 +150,7 @@ template<typename _MatrixType, int _UpLo> class LLT
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
void solveInPlace(MatrixBase<Derived> &bAndX) const;
|
||||
void solveInPlace(const MatrixBase<Derived> &bAndX) const;
|
||||
|
||||
template<typename InputType>
|
||||
LLT& compute(const EigenBase<InputType>& matrix);
|
||||
@@ -177,7 +181,7 @@ template<typename _MatrixType, int _UpLo> class LLT
|
||||
/** \brief Reports whether previous computation was successful.
|
||||
*
|
||||
* \returns \c Success if computation was succesful,
|
||||
* \c NumericalIssue if the matrix.appears to be negative.
|
||||
* \c NumericalIssue if the matrix.appears not to be positive definite.
|
||||
*/
|
||||
ComputationInfo info() const
|
||||
{
|
||||
@@ -425,7 +429,8 @@ LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const EigenBase<InputType>
|
||||
eigen_assert(a.rows()==a.cols());
|
||||
const Index size = a.rows();
|
||||
m_matrix.resize(size, size);
|
||||
m_matrix = a.derived();
|
||||
if (!internal::is_same_dense(m_matrix, a.derived()))
|
||||
m_matrix = a.derived();
|
||||
|
||||
// Compute matrix L1 norm = max abs column sum.
|
||||
m_l1_norm = RealScalar(0);
|
||||
@@ -485,11 +490,14 @@ void LLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const
|
||||
*
|
||||
* This version avoids a copy when the right hand side matrix b is not needed anymore.
|
||||
*
|
||||
* \warning The parameter is only marked 'const' to make the C++ compiler accept a temporary expression here.
|
||||
* This function will const_cast it, so constness isn't honored here.
|
||||
*
|
||||
* \sa LLT::solve(), MatrixBase::llt()
|
||||
*/
|
||||
template<typename MatrixType, int _UpLo>
|
||||
template<typename Derived>
|
||||
void LLT<MatrixType,_UpLo>::solveInPlace(MatrixBase<Derived> &bAndX) const
|
||||
void LLT<MatrixType,_UpLo>::solveInPlace(const MatrixBase<Derived> &bAndX) const
|
||||
{
|
||||
eigen_assert(m_isInitialized && "LLT is not initialized.");
|
||||
eigen_assert(m_matrix.rows()==bAndX.rows());
|
||||
|
||||
@@ -153,8 +153,6 @@ class Array
|
||||
: Base(std::move(other))
|
||||
{
|
||||
Base::_check_template_params();
|
||||
if (RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic)
|
||||
Base::_set_noalias(other);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
Array& operator=(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable<Scalar>::value)
|
||||
@@ -231,10 +229,16 @@ class Array
|
||||
: Base(other)
|
||||
{ }
|
||||
|
||||
private:
|
||||
struct PrivateType {};
|
||||
public:
|
||||
|
||||
/** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Array(const EigenBase<OtherDerived> &other)
|
||||
EIGEN_STRONG_INLINE Array(const EigenBase<OtherDerived> &other,
|
||||
typename internal::enable_if<internal::is_convertible<typename OtherDerived::Scalar,Scalar>::value,
|
||||
PrivateType>::type = PrivateType())
|
||||
: Base(other.derived())
|
||||
{ }
|
||||
|
||||
|
||||
@@ -175,7 +175,7 @@ template<typename Derived> class ArrayBase
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
EIGEN_STRONG_INLINE Derived &
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
|
||||
ArrayBase<Derived>::operator-=(const ArrayBase<OtherDerived> &other)
|
||||
{
|
||||
call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
|
||||
@@ -188,7 +188,7 @@ ArrayBase<Derived>::operator-=(const ArrayBase<OtherDerived> &other)
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
EIGEN_STRONG_INLINE Derived &
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
|
||||
ArrayBase<Derived>::operator+=(const ArrayBase<OtherDerived>& other)
|
||||
{
|
||||
call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
|
||||
@@ -201,7 +201,7 @@ ArrayBase<Derived>::operator+=(const ArrayBase<OtherDerived>& other)
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
EIGEN_STRONG_INLINE Derived &
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
|
||||
ArrayBase<Derived>::operator*=(const ArrayBase<OtherDerived>& other)
|
||||
{
|
||||
call_assignment(derived(), other.derived(), internal::mul_assign_op<Scalar,typename OtherDerived::Scalar>());
|
||||
@@ -214,7 +214,7 @@ ArrayBase<Derived>::operator*=(const ArrayBase<OtherDerived>& other)
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
EIGEN_STRONG_INLINE Derived &
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
|
||||
ArrayBase<Derived>::operator/=(const ArrayBase<OtherDerived>& other)
|
||||
{
|
||||
call_assignment(derived(), other.derived(), internal::div_assign_op<Scalar,typename OtherDerived::Scalar>());
|
||||
|
||||
@@ -32,7 +32,8 @@ struct traits<ArrayWrapper<ExpressionType> >
|
||||
// Let's remove NestByRefBit
|
||||
enum {
|
||||
Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags,
|
||||
Flags = Flags0 & ~NestByRefBit
|
||||
LvalueBitFlag = is_lvalue<ExpressionType>::value ? LvalueBit : 0,
|
||||
Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag
|
||||
};
|
||||
};
|
||||
}
|
||||
@@ -129,7 +130,8 @@ struct traits<MatrixWrapper<ExpressionType> >
|
||||
// Let's remove NestByRefBit
|
||||
enum {
|
||||
Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags,
|
||||
Flags = Flags0 & ~NestByRefBit
|
||||
LvalueBitFlag = is_lvalue<ExpressionType>::value ? LvalueBit : 0,
|
||||
Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
@@ -39,7 +39,7 @@ public:
|
||||
enum {
|
||||
DstAlignment = DstEvaluator::Alignment,
|
||||
SrcAlignment = SrcEvaluator::Alignment,
|
||||
DstHasDirectAccess = DstFlags & DirectAccessBit,
|
||||
DstHasDirectAccess = (DstFlags & DirectAccessBit) == DirectAccessBit,
|
||||
JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment)
|
||||
};
|
||||
|
||||
@@ -83,7 +83,7 @@ private:
|
||||
&& int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0
|
||||
&& (EIGEN_UNALIGNED_VECTORIZE || int(JointAlignment)>=int(InnerRequiredAlignment)),
|
||||
MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
|
||||
MayLinearVectorize = bool(MightVectorize) && MayLinearize && DstHasDirectAccess
|
||||
MayLinearVectorize = bool(MightVectorize) && bool(MayLinearize) && bool(DstHasDirectAccess)
|
||||
&& (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic),
|
||||
/* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
|
||||
so it's only good for large enough sizes. */
|
||||
@@ -407,7 +407,7 @@ struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling>
|
||||
: int(Kernel::AssignmentTraits::DstAlignment),
|
||||
srcAlignment = Kernel::AssignmentTraits::JointAlignment
|
||||
};
|
||||
const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned<requestedAlignment>(&kernel.dstEvaluator().coeffRef(0), size);
|
||||
const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned<requestedAlignment>(kernel.dstDataPtr(), size);
|
||||
const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
|
||||
|
||||
unaligned_dense_assignment_loop<dstIsAligned!=0>::run(kernel, 0, alignedStart);
|
||||
@@ -515,7 +515,7 @@ struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling>
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel)
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
{
|
||||
typedef typename Kernel::Scalar Scalar;
|
||||
typedef typename Kernel::PacketType PacketType;
|
||||
@@ -527,7 +527,7 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
|
||||
dstAlignment = alignable ? int(requestedAlignment)
|
||||
: int(Kernel::AssignmentTraits::DstAlignment)
|
||||
};
|
||||
const Scalar *dst_ptr = &kernel.dstEvaluator().coeffRef(0,0);
|
||||
const Scalar *dst_ptr = kernel.dstDataPtr();
|
||||
if((!bool(dstIsAligned)) && (UIntPtr(dst_ptr) % sizeof(Scalar))>0)
|
||||
{
|
||||
// the pointer is not aligend-on scalar, so alignment is not possible
|
||||
@@ -563,7 +563,7 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, InnerUnrolling>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel)
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
{
|
||||
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
|
||||
typedef typename Kernel::PacketType PacketType;
|
||||
@@ -683,6 +683,11 @@ public:
|
||||
: int(DstEvaluatorType::Flags)&RowMajorBit ? inner
|
||||
: outer;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC const Scalar* dstDataPtr() const
|
||||
{
|
||||
return m_dstExpr.data();
|
||||
}
|
||||
|
||||
protected:
|
||||
DstEvaluatorType& m_dst;
|
||||
@@ -696,6 +701,26 @@ protected:
|
||||
* Part 5 : Entry point for dense rectangular assignment
|
||||
***************************************************************************/
|
||||
|
||||
template<typename DstXprType,typename SrcXprType, typename Functor>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const Functor &/*func*/)
|
||||
{
|
||||
EIGEN_ONLY_USED_FOR_DEBUG(dst);
|
||||
EIGEN_ONLY_USED_FOR_DEBUG(src);
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||
}
|
||||
|
||||
template<typename DstXprType,typename SrcXprType, typename T1, typename T2>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const internal::assign_op<T1,T2> &/*func*/)
|
||||
{
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if(((dst.rows()!=dstRows) || (dst.cols()!=dstCols)))
|
||||
dst.resize(dstRows, dstCols);
|
||||
eigen_assert(dst.rows() == dstRows && dst.cols() == dstCols);
|
||||
}
|
||||
|
||||
template<typename DstXprType, typename SrcXprType, typename Functor>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor &func)
|
||||
{
|
||||
@@ -706,10 +731,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType
|
||||
|
||||
// NOTE To properly handle A = (A*A.transpose())/s with A rectangular,
|
||||
// we need to resize the destination after the source evaluator has been created.
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
resize_if_allowed(dst, src, func);
|
||||
|
||||
DstEvaluatorType dstEvaluator(dst);
|
||||
|
||||
@@ -876,6 +898,34 @@ struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Weak>
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||
src.evalTo(dst);
|
||||
}
|
||||
|
||||
// NOTE The following two functions are templated to avoid their instanciation if not needed
|
||||
// This is needed because some expressions supports evalTo only and/or have 'void' as scalar type.
|
||||
template<typename SrcScalarType>
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar,SrcScalarType> &/*func*/)
|
||||
{
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||
src.addTo(dst);
|
||||
}
|
||||
|
||||
template<typename SrcScalarType>
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar,SrcScalarType> &/*func*/)
|
||||
{
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||
src.subTo(dst);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
|
||||
@@ -84,7 +84,8 @@ class vml_assign_traits
|
||||
struct Assignment<DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested>, assign_op<EIGENTYPE,EIGENTYPE>, \
|
||||
Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> { \
|
||||
typedef CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested> SrcXprType; \
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &/*func*/) { \
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &func) { \
|
||||
resize_if_allowed(dst, src, func); \
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \
|
||||
if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) { \
|
||||
VMLOP(dst.size(), (const VMLTYPE*)src.nestedExpression().data(), \
|
||||
@@ -144,7 +145,8 @@ EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(ceil, Ceil, _)
|
||||
Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> { \
|
||||
typedef CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE,EIGENTYPE>, SrcXprNested, \
|
||||
const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>,Plain> > SrcXprType; \
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &/*func*/) { \
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &func) { \
|
||||
resize_if_allowed(dst, src, func); \
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \
|
||||
VMLTYPE exponent = reinterpret_cast<const VMLTYPE&>(src.rhs().functor().m_other); \
|
||||
if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) \
|
||||
|
||||
@@ -160,7 +160,7 @@ rcond_estimate_helper(typename Decomposition::RealScalar matrix_norm, const Deco
|
||||
{
|
||||
typedef typename Decomposition::RealScalar RealScalar;
|
||||
eigen_assert(dec.rows() == dec.cols());
|
||||
if (dec.rows() == 0) return RealScalar(1);
|
||||
if (dec.rows() == 0) return NumTraits<RealScalar>::infinity();
|
||||
if (matrix_norm == RealScalar(0)) return RealScalar(0);
|
||||
if (dec.rows() == 1) return RealScalar(1);
|
||||
const RealScalar inverse_matrix_norm = rcond_invmatrix_L1_norm_estimate(dec);
|
||||
|
||||
@@ -977,7 +977,7 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
|
||||
OuterStrideAtCompileTime = HasSameStorageOrderAsArgType
|
||||
? int(outer_stride_at_compile_time<ArgType>::ret)
|
||||
: int(inner_stride_at_compile_time<ArgType>::ret),
|
||||
MaskPacketAccessBit = (InnerStrideAtCompileTime == 1) ? PacketAccessBit : 0,
|
||||
MaskPacketAccessBit = (InnerStrideAtCompileTime == 1 || HasSameStorageOrderAsArgType) ? PacketAccessBit : 0,
|
||||
|
||||
FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator<ArgType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0,
|
||||
FlagsRowMajorBit = XprType::Flags&RowMajorBit,
|
||||
@@ -987,7 +987,9 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
|
||||
Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit,
|
||||
|
||||
PacketAlignment = unpacket_traits<PacketScalar>::alignment,
|
||||
Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0,
|
||||
Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic)
|
||||
&& (OuterStrideAtCompileTime!=0)
|
||||
&& (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0,
|
||||
Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ArgType>::Alignment, Alignment0)
|
||||
};
|
||||
typedef block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> block_evaluator_type;
|
||||
@@ -1018,14 +1020,16 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
|
||||
EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& block)
|
||||
: m_argImpl(block.nestedExpression()),
|
||||
m_startRow(block.startRow()),
|
||||
m_startCol(block.startCol())
|
||||
m_startCol(block.startCol()),
|
||||
m_linear_offset(InnerPanel?(XprType::IsRowMajor ? block.startRow()*block.cols() : block.startCol()*block.rows()):0)
|
||||
{ }
|
||||
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
|
||||
enum {
|
||||
RowsAtCompileTime = XprType::RowsAtCompileTime
|
||||
RowsAtCompileTime = XprType::RowsAtCompileTime,
|
||||
ForwardLinearAccess = InnerPanel && bool(evaluator<ArgType>::Flags&LinearAccessBit)
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
@@ -1037,7 +1041,10 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
|
||||
if (ForwardLinearAccess)
|
||||
return m_argImpl.coeff(m_linear_offset.value() + index);
|
||||
else
|
||||
return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
@@ -1049,7 +1056,10 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Scalar& coeffRef(Index index)
|
||||
{
|
||||
return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
|
||||
if (ForwardLinearAccess)
|
||||
return m_argImpl.coeffRef(m_linear_offset.value() + index);
|
||||
else
|
||||
return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
|
||||
}
|
||||
|
||||
template<int LoadMode, typename PacketType>
|
||||
@@ -1063,8 +1073,11 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
|
||||
EIGEN_STRONG_INLINE
|
||||
PacketType packet(Index index) const
|
||||
{
|
||||
return packet<LoadMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
|
||||
RowsAtCompileTime == 1 ? index : 0);
|
||||
if (ForwardLinearAccess)
|
||||
return m_argImpl.template packet<LoadMode,PacketType>(m_linear_offset.value() + index);
|
||||
else
|
||||
return packet<LoadMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
|
||||
RowsAtCompileTime == 1 ? index : 0);
|
||||
}
|
||||
|
||||
template<int StoreMode, typename PacketType>
|
||||
@@ -1078,15 +1091,19 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
|
||||
EIGEN_STRONG_INLINE
|
||||
void writePacket(Index index, const PacketType& x)
|
||||
{
|
||||
return writePacket<StoreMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
|
||||
RowsAtCompileTime == 1 ? index : 0,
|
||||
x);
|
||||
if (ForwardLinearAccess)
|
||||
return m_argImpl.template writePacket<StoreMode,PacketType>(m_linear_offset.value() + index, x);
|
||||
else
|
||||
return writePacket<StoreMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
|
||||
RowsAtCompileTime == 1 ? index : 0,
|
||||
x);
|
||||
}
|
||||
|
||||
protected:
|
||||
evaluator<ArgType> m_argImpl;
|
||||
const variable_if_dynamic<Index, (ArgType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow;
|
||||
const variable_if_dynamic<Index, (ArgType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol;
|
||||
const variable_if_dynamic<Index, InnerPanel ? Dynamic : 0> m_linear_offset;
|
||||
};
|
||||
|
||||
// TODO: This evaluator does not actually use the child evaluator;
|
||||
@@ -1556,9 +1573,7 @@ struct evaluator<Diagonal<ArgType, DiagIndex> >
|
||||
{ }
|
||||
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
// FIXME having to check whether ArgType is sparse here i not very nice.
|
||||
typedef typename internal::conditional<!internal::is_same<typename ArgType::StorageKind,Sparse>::value,
|
||||
typename XprType::CoeffReturnType,Scalar>::type CoeffReturnType;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
CoeffReturnType coeff(Index row, Index) const
|
||||
|
||||
@@ -46,7 +46,7 @@ struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
|
||||
typedef typename remove_reference<LhsNested>::type _LhsNested;
|
||||
typedef typename remove_reference<RhsNested>::type _RhsNested;
|
||||
enum {
|
||||
Flags = _LhsNested::Flags & RowMajorBit
|
||||
Flags = cwise_promote_storage_order<typename traits<Lhs>::StorageKind,typename traits<Rhs>::StorageKind,_LhsNested::Flags & RowMajorBit,_RhsNested::Flags & RowMajorBit>::value
|
||||
};
|
||||
};
|
||||
} // end namespace internal
|
||||
@@ -84,6 +84,7 @@ class CwiseBinaryOp :
|
||||
{
|
||||
public:
|
||||
|
||||
typedef typename internal::remove_all<BinaryOp>::type Functor;
|
||||
typedef typename internal::remove_all<LhsType>::type Lhs;
|
||||
typedef typename internal::remove_all<RhsType>::type Rhs;
|
||||
|
||||
|
||||
@@ -105,7 +105,7 @@ class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename CustomNullaryOp>
|
||||
EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
|
||||
DenseBase<Derived>::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func)
|
||||
{
|
||||
return CwiseNullaryOp<CustomNullaryOp, PlainObject>(rows, cols, func);
|
||||
@@ -150,7 +150,7 @@ DenseBase<Derived>::NullaryExpr(Index size, const CustomNullaryOp& func)
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename CustomNullaryOp>
|
||||
EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
|
||||
DenseBase<Derived>::NullaryExpr(const CustomNullaryOp& func)
|
||||
{
|
||||
return CwiseNullaryOp<CustomNullaryOp, PlainObject>(RowsAtCompileTime, ColsAtCompileTime, func);
|
||||
@@ -192,7 +192,7 @@ DenseBase<Derived>::Constant(Index rows, Index cols, const Scalar& value)
|
||||
* \sa class CwiseNullaryOp
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
|
||||
DenseBase<Derived>::Constant(Index size, const Scalar& value)
|
||||
{
|
||||
return DenseBase<Derived>::NullaryExpr(size, internal::scalar_constant_op<Scalar>(value));
|
||||
@@ -208,7 +208,7 @@ DenseBase<Derived>::Constant(Index size, const Scalar& value)
|
||||
* \sa class CwiseNullaryOp
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
|
||||
DenseBase<Derived>::Constant(const Scalar& value)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
|
||||
@@ -220,7 +220,7 @@ DenseBase<Derived>::Constant(const Scalar& value)
|
||||
* \sa LinSpaced(Index,Scalar,Scalar), setLinSpaced(Index,const Scalar&,const Scalar&)
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
|
||||
DenseBase<Derived>::LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
@@ -232,7 +232,7 @@ DenseBase<Derived>::LinSpaced(Sequential_t, Index size, const Scalar& low, const
|
||||
* \sa LinSpaced(Scalar,Scalar)
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
|
||||
DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& high)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
@@ -264,7 +264,7 @@ DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& hig
|
||||
* \sa setLinSpaced(Index,const Scalar&,const Scalar&), CwiseNullaryOp
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
|
||||
DenseBase<Derived>::LinSpaced(Index size, const Scalar& low, const Scalar& high)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
@@ -276,7 +276,7 @@ DenseBase<Derived>::LinSpaced(Index size, const Scalar& low, const Scalar& high)
|
||||
* Special version for fixed size types which does not require the size parameter.
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
|
||||
DenseBase<Derived>::LinSpaced(const Scalar& low, const Scalar& high)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
@@ -286,7 +286,7 @@ DenseBase<Derived>::LinSpaced(const Scalar& low, const Scalar& high)
|
||||
|
||||
/** \returns true if all coefficients in this matrix are approximately equal to \a val, to within precision \a prec */
|
||||
template<typename Derived>
|
||||
bool DenseBase<Derived>::isApproxToConstant
|
||||
EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isApproxToConstant
|
||||
(const Scalar& val, const RealScalar& prec) const
|
||||
{
|
||||
typename internal::nested_eval<Derived,1>::type self(derived());
|
||||
@@ -301,7 +301,7 @@ bool DenseBase<Derived>::isApproxToConstant
|
||||
*
|
||||
* \returns true if all coefficients in this matrix are approximately equal to \a value, to within precision \a prec */
|
||||
template<typename Derived>
|
||||
bool DenseBase<Derived>::isConstant
|
||||
EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isConstant
|
||||
(const Scalar& val, const RealScalar& prec) const
|
||||
{
|
||||
return isApproxToConstant(val, prec);
|
||||
@@ -312,7 +312,7 @@ bool DenseBase<Derived>::isConstant
|
||||
* \sa setConstant(), Constant(), class CwiseNullaryOp
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE void DenseBase<Derived>::fill(const Scalar& val)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void DenseBase<Derived>::fill(const Scalar& val)
|
||||
{
|
||||
setConstant(val);
|
||||
}
|
||||
@@ -322,7 +322,7 @@ EIGEN_STRONG_INLINE void DenseBase<Derived>::fill(const Scalar& val)
|
||||
* \sa fill(), setConstant(Index,const Scalar&), setConstant(Index,Index,const Scalar&), setZero(), setOnes(), Constant(), class CwiseNullaryOp, setZero(), setOnes()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setConstant(const Scalar& val)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setConstant(const Scalar& val)
|
||||
{
|
||||
return derived() = Constant(rows(), cols(), val);
|
||||
}
|
||||
@@ -337,7 +337,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setConstant(const Scalar& val)
|
||||
* \sa MatrixBase::setConstant(const Scalar&), setConstant(Index,Index,const Scalar&), class CwiseNullaryOp, MatrixBase::Constant(const Scalar&)
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived&
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
|
||||
PlainObjectBase<Derived>::setConstant(Index size, const Scalar& val)
|
||||
{
|
||||
resize(size);
|
||||
@@ -356,7 +356,7 @@ PlainObjectBase<Derived>::setConstant(Index size, const Scalar& val)
|
||||
* \sa MatrixBase::setConstant(const Scalar&), setConstant(Index,const Scalar&), class CwiseNullaryOp, MatrixBase::Constant(const Scalar&)
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived&
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
|
||||
PlainObjectBase<Derived>::setConstant(Index rows, Index cols, const Scalar& val)
|
||||
{
|
||||
resize(rows, cols);
|
||||
@@ -380,7 +380,7 @@ PlainObjectBase<Derived>::setConstant(Index rows, Index cols, const Scalar& val)
|
||||
* \sa LinSpaced(Index,const Scalar&,const Scalar&), CwiseNullaryOp
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index newSize, const Scalar& low, const Scalar& high)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index newSize, const Scalar& low, const Scalar& high)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
return derived() = Derived::NullaryExpr(newSize, internal::linspaced_op<Scalar,PacketScalar>(low,high,newSize));
|
||||
@@ -400,7 +400,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index newSize, con
|
||||
* \sa LinSpaced(Index,const Scalar&,const Scalar&), setLinSpaced(Index, const Scalar&, const Scalar&), CwiseNullaryOp
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(const Scalar& low, const Scalar& high)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(const Scalar& low, const Scalar& high)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
return setLinSpaced(size(), low, high);
|
||||
@@ -423,7 +423,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(const Scalar& low,
|
||||
* \sa Zero(), Zero(Index)
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
|
||||
DenseBase<Derived>::Zero(Index rows, Index cols)
|
||||
{
|
||||
return Constant(rows, cols, Scalar(0));
|
||||
@@ -446,7 +446,7 @@ DenseBase<Derived>::Zero(Index rows, Index cols)
|
||||
* \sa Zero(), Zero(Index,Index)
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
|
||||
DenseBase<Derived>::Zero(Index size)
|
||||
{
|
||||
return Constant(size, Scalar(0));
|
||||
@@ -463,7 +463,7 @@ DenseBase<Derived>::Zero(Index size)
|
||||
* \sa Zero(Index), Zero(Index,Index)
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
|
||||
DenseBase<Derived>::Zero()
|
||||
{
|
||||
return Constant(Scalar(0));
|
||||
@@ -478,7 +478,7 @@ DenseBase<Derived>::Zero()
|
||||
* \sa class CwiseNullaryOp, Zero()
|
||||
*/
|
||||
template<typename Derived>
|
||||
bool DenseBase<Derived>::isZero(const RealScalar& prec) const
|
||||
EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isZero(const RealScalar& prec) const
|
||||
{
|
||||
typename internal::nested_eval<Derived,1>::type self(derived());
|
||||
for(Index j = 0; j < cols(); ++j)
|
||||
@@ -496,7 +496,7 @@ bool DenseBase<Derived>::isZero(const RealScalar& prec) const
|
||||
* \sa class CwiseNullaryOp, Zero()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setZero()
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setZero()
|
||||
{
|
||||
return setConstant(Scalar(0));
|
||||
}
|
||||
@@ -511,7 +511,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setZero()
|
||||
* \sa DenseBase::setZero(), setZero(Index,Index), class CwiseNullaryOp, DenseBase::Zero()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived&
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
|
||||
PlainObjectBase<Derived>::setZero(Index newSize)
|
||||
{
|
||||
resize(newSize);
|
||||
@@ -529,7 +529,7 @@ PlainObjectBase<Derived>::setZero(Index newSize)
|
||||
* \sa DenseBase::setZero(), setZero(Index), class CwiseNullaryOp, DenseBase::Zero()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived&
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
|
||||
PlainObjectBase<Derived>::setZero(Index rows, Index cols)
|
||||
{
|
||||
resize(rows, cols);
|
||||
@@ -553,7 +553,7 @@ PlainObjectBase<Derived>::setZero(Index rows, Index cols)
|
||||
* \sa Ones(), Ones(Index), isOnes(), class Ones
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
|
||||
DenseBase<Derived>::Ones(Index rows, Index cols)
|
||||
{
|
||||
return Constant(rows, cols, Scalar(1));
|
||||
@@ -576,7 +576,7 @@ DenseBase<Derived>::Ones(Index rows, Index cols)
|
||||
* \sa Ones(), Ones(Index,Index), isOnes(), class Ones
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
|
||||
DenseBase<Derived>::Ones(Index newSize)
|
||||
{
|
||||
return Constant(newSize, Scalar(1));
|
||||
@@ -593,7 +593,7 @@ DenseBase<Derived>::Ones(Index newSize)
|
||||
* \sa Ones(Index), Ones(Index,Index), isOnes(), class Ones
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
|
||||
DenseBase<Derived>::Ones()
|
||||
{
|
||||
return Constant(Scalar(1));
|
||||
@@ -608,7 +608,7 @@ DenseBase<Derived>::Ones()
|
||||
* \sa class CwiseNullaryOp, Ones()
|
||||
*/
|
||||
template<typename Derived>
|
||||
bool DenseBase<Derived>::isOnes
|
||||
EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isOnes
|
||||
(const RealScalar& prec) const
|
||||
{
|
||||
return isApproxToConstant(Scalar(1), prec);
|
||||
@@ -622,7 +622,7 @@ bool DenseBase<Derived>::isOnes
|
||||
* \sa class CwiseNullaryOp, Ones()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setOnes()
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setOnes()
|
||||
{
|
||||
return setConstant(Scalar(1));
|
||||
}
|
||||
@@ -637,7 +637,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setOnes()
|
||||
* \sa MatrixBase::setOnes(), setOnes(Index,Index), class CwiseNullaryOp, MatrixBase::Ones()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived&
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
|
||||
PlainObjectBase<Derived>::setOnes(Index newSize)
|
||||
{
|
||||
resize(newSize);
|
||||
@@ -655,7 +655,7 @@ PlainObjectBase<Derived>::setOnes(Index newSize)
|
||||
* \sa MatrixBase::setOnes(), setOnes(Index), class CwiseNullaryOp, MatrixBase::Ones()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived&
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
|
||||
PlainObjectBase<Derived>::setOnes(Index rows, Index cols)
|
||||
{
|
||||
resize(rows, cols);
|
||||
@@ -679,7 +679,7 @@ PlainObjectBase<Derived>::setOnes(Index rows, Index cols)
|
||||
* \sa Identity(), setIdentity(), isIdentity()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType
|
||||
MatrixBase<Derived>::Identity(Index rows, Index cols)
|
||||
{
|
||||
return DenseBase<Derived>::NullaryExpr(rows, cols, internal::scalar_identity_op<Scalar>());
|
||||
@@ -696,7 +696,7 @@ MatrixBase<Derived>::Identity(Index rows, Index cols)
|
||||
* \sa Identity(Index,Index), setIdentity(), isIdentity()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType
|
||||
MatrixBase<Derived>::Identity()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
|
||||
@@ -771,7 +771,7 @@ struct setIdentity_impl<Derived, true>
|
||||
* \sa class CwiseNullaryOp, Identity(), Identity(Index,Index), isIdentity()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity()
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity()
|
||||
{
|
||||
return internal::setIdentity_impl<Derived>::run(derived());
|
||||
}
|
||||
@@ -787,7 +787,7 @@ EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity()
|
||||
* \sa MatrixBase::setIdentity(), class CwiseNullaryOp, MatrixBase::Identity()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity(Index rows, Index cols)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity(Index rows, Index cols)
|
||||
{
|
||||
derived().resize(rows, cols);
|
||||
return setIdentity();
|
||||
@@ -800,7 +800,7 @@ EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity(Index rows, Index
|
||||
* \sa MatrixBase::Unit(Index), MatrixBase::UnitX(), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index newSize, Index i)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index newSize, Index i)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
return BasisReturnType(SquareMatrixType::Identity(newSize,newSize), i);
|
||||
@@ -815,7 +815,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBa
|
||||
* \sa MatrixBase::Unit(Index,Index), MatrixBase::UnitX(), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index i)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index i)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
return BasisReturnType(SquareMatrixType::Identity(),i);
|
||||
@@ -828,7 +828,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBa
|
||||
* \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitX()
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitX()
|
||||
{ return Derived::Unit(0); }
|
||||
|
||||
/** \returns an expression of the Y axis unit vector (0,1{,0}^*)
|
||||
@@ -838,7 +838,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBa
|
||||
* \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitY()
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitY()
|
||||
{ return Derived::Unit(1); }
|
||||
|
||||
/** \returns an expression of the Z axis unit vector (0,0,1{,0}^*)
|
||||
@@ -848,7 +848,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBa
|
||||
* \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitZ()
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitZ()
|
||||
{ return Derived::Unit(2); }
|
||||
|
||||
/** \returns an expression of the W axis unit vector (0,0,0,1)
|
||||
@@ -858,7 +858,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBa
|
||||
* \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitW()
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitW()
|
||||
{ return Derived::Unit(3); }
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
@@ -296,7 +296,7 @@ template<typename Derived> class DenseBase
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator=(const ReturnByValue<OtherDerived>& func);
|
||||
|
||||
/** \ínternal
|
||||
/** \internal
|
||||
* Copies \a other into *this without evaluating other. \returns a reference to *this.
|
||||
* \deprecated */
|
||||
template<typename OtherDerived>
|
||||
@@ -463,7 +463,17 @@ template<typename Derived> class DenseBase
|
||||
EIGEN_DEVICE_FUNC
|
||||
void visit(Visitor& func) const;
|
||||
|
||||
inline const WithFormat<Derived> format(const IOFormat& fmt) const;
|
||||
/** \returns a WithFormat proxy object allowing to print a matrix the with given
|
||||
* format \a fmt.
|
||||
*
|
||||
* See class IOFormat for some examples.
|
||||
*
|
||||
* \sa class IOFormat, class WithFormat
|
||||
*/
|
||||
inline const WithFormat<Derived> format(const IOFormat& fmt) const
|
||||
{
|
||||
return WithFormat<Derived>(derived(), fmt);
|
||||
}
|
||||
|
||||
/** \returns the unique coefficient of a 1x1 expression */
|
||||
EIGEN_DEVICE_FUNC
|
||||
@@ -474,9 +484,9 @@ template<typename Derived> class DenseBase
|
||||
return derived().coeff(0,0);
|
||||
}
|
||||
|
||||
bool all() const;
|
||||
bool any() const;
|
||||
Index count() const;
|
||||
EIGEN_DEVICE_FUNC bool all() const;
|
||||
EIGEN_DEVICE_FUNC bool any() const;
|
||||
EIGEN_DEVICE_FUNC Index count() const;
|
||||
|
||||
typedef VectorwiseOp<Derived, Horizontal> RowwiseReturnType;
|
||||
typedef const VectorwiseOp<const Derived, Horizontal> ConstRowwiseReturnType;
|
||||
|
||||
@@ -13,9 +13,9 @@
|
||||
#define EIGEN_MATRIXSTORAGE_H
|
||||
|
||||
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
#define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN EIGEN_DENSE_STORAGE_CTOR_PLUGIN;
|
||||
#define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(X) X; EIGEN_DENSE_STORAGE_CTOR_PLUGIN;
|
||||
#else
|
||||
#define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
|
||||
#define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(X)
|
||||
#endif
|
||||
|
||||
namespace Eigen {
|
||||
@@ -184,12 +184,16 @@ template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseSt
|
||||
{
|
||||
internal::plain_array<T,Size,_Options> m_data;
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC DenseStorage() {}
|
||||
EIGEN_DEVICE_FUNC DenseStorage() {
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = Size)
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
|
||||
: m_data(internal::constructor_without_unaligned_array_assert()) {}
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseStorage(const DenseStorage& other) : m_data(other.m_data) {}
|
||||
DenseStorage(const DenseStorage& other) : m_data(other.m_data) {
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = Size)
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseStorage& operator=(const DenseStorage& other)
|
||||
{
|
||||
@@ -197,7 +201,7 @@ template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseSt
|
||||
return *this;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) {
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
|
||||
eigen_internal_assert(size==rows*cols && rows==_Rows && cols==_Cols);
|
||||
EIGEN_UNUSED_VARIABLE(size);
|
||||
EIGEN_UNUSED_VARIABLE(rows);
|
||||
@@ -343,7 +347,7 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
|
||||
EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols)
|
||||
: m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(rows), m_cols(cols)
|
||||
{
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
|
||||
eigen_internal_assert(size==rows*cols && rows>=0 && cols >=0);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
|
||||
@@ -351,6 +355,7 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
|
||||
, m_rows(other.m_rows)
|
||||
, m_cols(other.m_cols)
|
||||
{
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_rows*m_cols)
|
||||
internal::smart_copy(other.m_data, other.m_data+other.m_rows*other.m_cols, m_data);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
|
||||
@@ -403,7 +408,7 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
|
||||
m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
|
||||
else
|
||||
m_data = 0;
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
|
||||
}
|
||||
m_rows = rows;
|
||||
m_cols = cols;
|
||||
@@ -422,7 +427,7 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
|
||||
explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_cols(cols)
|
||||
{
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
|
||||
eigen_internal_assert(size==rows*cols && rows==_Rows && cols >=0);
|
||||
EIGEN_UNUSED_VARIABLE(rows);
|
||||
}
|
||||
@@ -430,6 +435,7 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
|
||||
: m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(_Rows*other.m_cols))
|
||||
, m_cols(other.m_cols)
|
||||
{
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_cols*_Rows)
|
||||
internal::smart_copy(other.m_data, other.m_data+_Rows*m_cols, m_data);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
|
||||
@@ -477,7 +483,7 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
|
||||
m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
|
||||
else
|
||||
m_data = 0;
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
|
||||
}
|
||||
m_cols = cols;
|
||||
}
|
||||
@@ -495,7 +501,7 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
|
||||
explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(rows)
|
||||
{
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
|
||||
eigen_internal_assert(size==rows*cols && rows>=0 && cols == _Cols);
|
||||
EIGEN_UNUSED_VARIABLE(cols);
|
||||
}
|
||||
@@ -503,6 +509,7 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
|
||||
: m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(other.m_rows*_Cols))
|
||||
, m_rows(other.m_rows)
|
||||
{
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_rows*_Cols)
|
||||
internal::smart_copy(other.m_data, other.m_data+other.m_rows*_Cols, m_data);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
|
||||
@@ -550,7 +557,7 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
|
||||
m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
|
||||
else
|
||||
m_data = 0;
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
|
||||
}
|
||||
m_rows = rows;
|
||||
}
|
||||
|
||||
@@ -21,7 +21,7 @@ namespace Eigen {
|
||||
* \param MatrixType the type of the object in which we are taking a sub/main/super diagonal
|
||||
* \param DiagIndex the index of the sub/super diagonal. The default is 0 and it means the main diagonal.
|
||||
* A positive value means a superdiagonal, a negative value means a subdiagonal.
|
||||
* You can also use Dynamic so the index can be set at runtime.
|
||||
* You can also use DynamicIndex so the index can be set at runtime.
|
||||
*
|
||||
* The matrix is not required to be square.
|
||||
*
|
||||
@@ -70,7 +70,10 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(Diagonal)
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index) {}
|
||||
explicit inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index)
|
||||
{
|
||||
eigen_assert( a_index <= m_matrix.cols() && -a_index <= m_matrix.rows() );
|
||||
}
|
||||
|
||||
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Diagonal)
|
||||
|
||||
|
||||
@@ -31,7 +31,8 @@ struct dot_nocheck
|
||||
typedef scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> conj_prod;
|
||||
typedef typename conj_prod::result_type ResScalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
|
||||
EIGEN_STRONG_INLINE
|
||||
static ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
|
||||
{
|
||||
return a.template binaryExpr<conj_prod>(b).sum();
|
||||
}
|
||||
@@ -43,7 +44,8 @@ struct dot_nocheck<T, U, true>
|
||||
typedef scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> conj_prod;
|
||||
typedef typename conj_prod::result_type ResScalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
|
||||
EIGEN_STRONG_INLINE
|
||||
static ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
|
||||
{
|
||||
return a.transpose().template binaryExpr<conj_prod>(b).sum();
|
||||
}
|
||||
@@ -51,7 +53,8 @@ struct dot_nocheck<T, U, true>
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/** \returns the dot product of *this with other.
|
||||
/** \fn MatrixBase::dot
|
||||
* \returns the dot product of *this with other.
|
||||
*
|
||||
* \only_for_vectors
|
||||
*
|
||||
@@ -64,15 +67,18 @@ struct dot_nocheck<T, U, true>
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE
|
||||
typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
|
||||
MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
|
||||
EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived)
|
||||
#if !(defined(EIGEN_NO_STATIC_ASSERT) && defined(EIGEN_NO_DEBUG))
|
||||
typedef internal::scalar_conj_product_op<Scalar,typename OtherDerived::Scalar> func;
|
||||
EIGEN_CHECK_BINARY_COMPATIBILIY(func,Scalar,typename OtherDerived::Scalar);
|
||||
|
||||
#endif
|
||||
|
||||
eigen_assert(size() == other.size());
|
||||
|
||||
return internal::dot_nocheck<Derived,OtherDerived>::run(*this, other);
|
||||
@@ -99,7 +105,7 @@ EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scala
|
||||
* \sa lpNorm(), dot(), squaredNorm()
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const
|
||||
EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const
|
||||
{
|
||||
return numext::sqrt(squaredNorm());
|
||||
}
|
||||
@@ -114,7 +120,7 @@ inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real Matr
|
||||
* \sa norm(), normalize()
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline const typename MatrixBase<Derived>::PlainObject
|
||||
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::PlainObject
|
||||
MatrixBase<Derived>::normalized() const
|
||||
{
|
||||
typedef typename internal::nested_eval<Derived,2>::type _Nested;
|
||||
@@ -136,7 +142,7 @@ MatrixBase<Derived>::normalized() const
|
||||
* \sa norm(), normalized()
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline void MatrixBase<Derived>::normalize()
|
||||
EIGEN_STRONG_INLINE void MatrixBase<Derived>::normalize()
|
||||
{
|
||||
RealScalar z = squaredNorm();
|
||||
// NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU
|
||||
@@ -157,7 +163,7 @@ inline void MatrixBase<Derived>::normalize()
|
||||
* \sa stableNorm(), stableNormalize(), normalized()
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline const typename MatrixBase<Derived>::PlainObject
|
||||
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::PlainObject
|
||||
MatrixBase<Derived>::stableNormalized() const
|
||||
{
|
||||
typedef typename internal::nested_eval<Derived,3>::type _Nested;
|
||||
@@ -182,7 +188,7 @@ MatrixBase<Derived>::stableNormalized() const
|
||||
* \sa stableNorm(), stableNormalized(), normalize()
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline void MatrixBase<Derived>::stableNormalize()
|
||||
EIGEN_STRONG_INLINE void MatrixBase<Derived>::stableNormalize()
|
||||
{
|
||||
RealScalar w = cwiseAbs().maxCoeff();
|
||||
RealScalar z = (derived()/w).squaredNorm();
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
namespace Eigen {
|
||||
|
||||
/** \class EigenBase
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* Common base class for all classes T such that MatrixBase has an operator=(T) and a constructor MatrixBase(T).
|
||||
*
|
||||
@@ -128,6 +129,7 @@ template<typename Derived> struct EigenBase
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& DenseBase<Derived>::operator=(const EigenBase<OtherDerived> &other)
|
||||
{
|
||||
call_assignment(derived(), other.derived());
|
||||
@@ -136,6 +138,7 @@ Derived& DenseBase<Derived>::operator=(const EigenBase<OtherDerived> &other)
|
||||
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& DenseBase<Derived>::operator+=(const EigenBase<OtherDerived> &other)
|
||||
{
|
||||
call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
|
||||
@@ -144,6 +147,7 @@ Derived& DenseBase<Derived>::operator+=(const EigenBase<OtherDerived> &other)
|
||||
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& DenseBase<Derived>::operator-=(const EigenBase<OtherDerived> &other)
|
||||
{
|
||||
call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
|
||||
|
||||
@@ -24,11 +24,17 @@ template<int Rows, int Cols, int Depth> struct product_type_selector;
|
||||
|
||||
template<int Size, int MaxSize> struct product_size_category
|
||||
{
|
||||
enum { is_large = MaxSize == Dynamic ||
|
||||
Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD,
|
||||
value = is_large ? Large
|
||||
: Size == 1 ? 1
|
||||
: Small
|
||||
enum {
|
||||
#ifndef EIGEN_CUDA_ARCH
|
||||
is_large = MaxSize == Dynamic ||
|
||||
Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ||
|
||||
(Size==Dynamic && MaxSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD),
|
||||
#else
|
||||
is_large = 0,
|
||||
#endif
|
||||
value = is_large ? Large
|
||||
: Size == 1 ? 1
|
||||
: Small
|
||||
};
|
||||
};
|
||||
|
||||
@@ -223,50 +229,65 @@ template<> struct gemv_dense_selector<OnTheRight,ColMajor,true>
|
||||
// on, the other hand it is good for the cache to pack the vector anyways...
|
||||
EvalToDestAtCompileTime = (ActualDest::InnerStrideAtCompileTime==1),
|
||||
ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
|
||||
MightCannotUseDest = (ActualDest::InnerStrideAtCompileTime!=1) || ComplexByReal
|
||||
MightCannotUseDest = (!EvalToDestAtCompileTime) || ComplexByReal
|
||||
};
|
||||
|
||||
gemv_static_vector_if<ResScalar,ActualDest::SizeAtCompileTime,ActualDest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
|
||||
|
||||
const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0));
|
||||
const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
|
||||
|
||||
RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
|
||||
|
||||
ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
|
||||
evalToDest ? dest.data() : static_dest.data());
|
||||
|
||||
if(!evalToDest)
|
||||
{
|
||||
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
Index size = dest.size();
|
||||
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
#endif
|
||||
if(!alphaIsCompatible)
|
||||
{
|
||||
MappedDest(actualDestPtr, dest.size()).setZero();
|
||||
compatibleAlpha = RhsScalar(1);
|
||||
}
|
||||
else
|
||||
MappedDest(actualDestPtr, dest.size()) = dest;
|
||||
}
|
||||
|
||||
typedef const_blas_data_mapper<LhsScalar,Index,ColMajor> LhsMapper;
|
||||
typedef const_blas_data_mapper<RhsScalar,Index,RowMajor> RhsMapper;
|
||||
general_matrix_vector_product
|
||||
<Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
|
||||
actualLhs.rows(), actualLhs.cols(),
|
||||
LhsMapper(actualLhs.data(), actualLhs.outerStride()),
|
||||
RhsMapper(actualRhs.data(), actualRhs.innerStride()),
|
||||
actualDestPtr, 1,
|
||||
compatibleAlpha);
|
||||
RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
|
||||
|
||||
if (!evalToDest)
|
||||
if(!MightCannotUseDest)
|
||||
{
|
||||
if(!alphaIsCompatible)
|
||||
dest += actualAlpha * MappedDest(actualDestPtr, dest.size());
|
||||
else
|
||||
dest = MappedDest(actualDestPtr, dest.size());
|
||||
// shortcut if we are sure to be able to use dest directly,
|
||||
// this ease the compiler to generate cleaner and more optimzized code for most common cases
|
||||
general_matrix_vector_product
|
||||
<Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
|
||||
actualLhs.rows(), actualLhs.cols(),
|
||||
LhsMapper(actualLhs.data(), actualLhs.outerStride()),
|
||||
RhsMapper(actualRhs.data(), actualRhs.innerStride()),
|
||||
dest.data(), 1,
|
||||
compatibleAlpha);
|
||||
}
|
||||
else
|
||||
{
|
||||
gemv_static_vector_if<ResScalar,ActualDest::SizeAtCompileTime,ActualDest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
|
||||
|
||||
const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0));
|
||||
const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
|
||||
|
||||
ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
|
||||
evalToDest ? dest.data() : static_dest.data());
|
||||
|
||||
if(!evalToDest)
|
||||
{
|
||||
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
Index size = dest.size();
|
||||
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
#endif
|
||||
if(!alphaIsCompatible)
|
||||
{
|
||||
MappedDest(actualDestPtr, dest.size()).setZero();
|
||||
compatibleAlpha = RhsScalar(1);
|
||||
}
|
||||
else
|
||||
MappedDest(actualDestPtr, dest.size()) = dest;
|
||||
}
|
||||
|
||||
general_matrix_vector_product
|
||||
<Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
|
||||
actualLhs.rows(), actualLhs.cols(),
|
||||
LhsMapper(actualLhs.data(), actualLhs.outerStride()),
|
||||
RhsMapper(actualRhs.data(), actualRhs.innerStride()),
|
||||
actualDestPtr, 1,
|
||||
compatibleAlpha);
|
||||
|
||||
if (!evalToDest)
|
||||
{
|
||||
if(!alphaIsCompatible)
|
||||
dest.matrix() += actualAlpha * MappedDest(actualDestPtr, dest.size());
|
||||
else
|
||||
dest = MappedDest(actualDestPtr, dest.size());
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
@@ -329,6 +350,7 @@ template<> struct gemv_dense_selector<OnTheRight,ColMajor,false>
|
||||
template<typename Lhs, typename Rhs, typename Dest>
|
||||
static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((!nested_eval<Lhs,1>::Evaluate),EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||
// TODO if rhs is large enough it might be beneficial to make sure that dest is sequentially stored in memory, otherwise use a temp
|
||||
typename nested_eval<Rhs,1>::type actual_rhs(rhs);
|
||||
const Index size = rhs.rows();
|
||||
@@ -342,6 +364,7 @@ template<> struct gemv_dense_selector<OnTheRight,RowMajor,false>
|
||||
template<typename Lhs, typename Rhs, typename Dest>
|
||||
static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((!nested_eval<Lhs,1>::Evaluate),EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||
typename nested_eval<Rhs,Lhs::RowsAtCompileTime>::type actual_rhs(rhs);
|
||||
const Index rows = dest.rows();
|
||||
for(Index i=0; i<rows; ++i)
|
||||
@@ -361,8 +384,6 @@ template<> struct gemv_dense_selector<OnTheRight,RowMajor,false>
|
||||
*
|
||||
* \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*()
|
||||
*/
|
||||
#ifndef __CUDACC__
|
||||
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
inline const Product<Derived, OtherDerived>
|
||||
@@ -394,8 +415,6 @@ MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
|
||||
return Product<Derived, OtherDerived>(derived(), other.derived());
|
||||
}
|
||||
|
||||
#endif // __CUDACC__
|
||||
|
||||
/** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation.
|
||||
*
|
||||
* The returned product will behave like any other expressions: the coefficients of the product will be
|
||||
|
||||
@@ -230,7 +230,7 @@ pload1(const typename unpacket_traits<Packet>::type *a) { return pset1<Packet>(
|
||||
* duplicated to form: {from[0],from[0],from[1],from[1],from[2],from[2],from[3],from[3]}
|
||||
* Currently, this function is only used for scalar * complex products.
|
||||
*/
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
|
||||
ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; }
|
||||
|
||||
/** \internal \returns a packet with elements of \a *from quadrupled.
|
||||
@@ -278,7 +278,7 @@ inline void pbroadcast2(const typename unpacket_traits<Packet>::type *a,
|
||||
}
|
||||
|
||||
/** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */
|
||||
template<typename Packet> inline Packet
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
|
||||
plset(const typename unpacket_traits<Packet>::type& a) { return a; }
|
||||
|
||||
/** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */
|
||||
@@ -482,7 +482,7 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& fro
|
||||
* by the current computation.
|
||||
*/
|
||||
template<typename Packet, int LoadMode>
|
||||
inline Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from)
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from)
|
||||
{
|
||||
return ploadt<Packet, LoadMode>(from);
|
||||
}
|
||||
|
||||
@@ -105,24 +105,10 @@ class WithFormat
|
||||
}
|
||||
|
||||
protected:
|
||||
const typename ExpressionType::Nested m_matrix;
|
||||
typename ExpressionType::Nested m_matrix;
|
||||
IOFormat m_format;
|
||||
};
|
||||
|
||||
/** \returns a WithFormat proxy object allowing to print a matrix the with given
|
||||
* format \a fmt.
|
||||
*
|
||||
* See class IOFormat for some examples.
|
||||
*
|
||||
* \sa class IOFormat, class WithFormat
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline const WithFormat<Derived>
|
||||
DenseBase<Derived>::format(const IOFormat& fmt) const
|
||||
{
|
||||
return WithFormat<Derived>(derived(), fmt);
|
||||
}
|
||||
|
||||
namespace internal {
|
||||
|
||||
// NOTE: This helper is kept for backward compatibility with previous code specializing
|
||||
|
||||
@@ -45,6 +45,7 @@ class Inverse : public InverseImpl<XprType,typename internal::traits<XprType>::S
|
||||
public:
|
||||
typedef typename XprType::StorageIndex StorageIndex;
|
||||
typedef typename XprType::PlainObject PlainObject;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename internal::ref_selector<XprType>::type XprTypeNested;
|
||||
typedef typename internal::remove_all<XprTypeNested>::type XprTypeNestedCleaned;
|
||||
typedef typename internal::ref_selector<Inverse>::type Nested;
|
||||
|
||||
@@ -20,11 +20,17 @@ struct traits<Map<PlainObjectType, MapOptions, StrideType> >
|
||||
{
|
||||
typedef traits<PlainObjectType> TraitsBase;
|
||||
enum {
|
||||
PlainObjectTypeInnerSize = ((traits<PlainObjectType>::Flags&RowMajorBit)==RowMajorBit)
|
||||
? PlainObjectType::ColsAtCompileTime
|
||||
: PlainObjectType::RowsAtCompileTime,
|
||||
|
||||
InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
|
||||
? int(PlainObjectType::InnerStrideAtCompileTime)
|
||||
: int(StrideType::InnerStrideAtCompileTime),
|
||||
OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
|
||||
? int(PlainObjectType::OuterStrideAtCompileTime)
|
||||
? (InnerStrideAtCompileTime==Dynamic || PlainObjectTypeInnerSize==Dynamic
|
||||
? Dynamic
|
||||
: int(InnerStrideAtCompileTime) * int(PlainObjectTypeInnerSize))
|
||||
: int(StrideType::OuterStrideAtCompileTime),
|
||||
Alignment = int(MapOptions)&int(AlignedMask),
|
||||
Flags0 = TraitsBase::Flags & (~NestByRefBit),
|
||||
@@ -107,10 +113,11 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index outerStride() const
|
||||
{
|
||||
return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer()
|
||||
: IsVectorAtCompileTime ? this->size()
|
||||
: int(Flags)&RowMajorBit ? this->cols()
|
||||
: this->rows();
|
||||
return int(StrideType::OuterStrideAtCompileTime) != 0 ? m_stride.outer()
|
||||
: int(internal::traits<Map>::OuterStrideAtCompileTime) != Dynamic ? Index(internal::traits<Map>::OuterStrideAtCompileTime)
|
||||
: IsVectorAtCompileTime ? (this->size() * innerStride())
|
||||
: (int(Flags)&RowMajorBit) ? (this->cols() * innerStride())
|
||||
: (this->rows() * innerStride());
|
||||
}
|
||||
|
||||
/** Constructor in the fixed-size case.
|
||||
|
||||
@@ -43,6 +43,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
|
||||
enum {
|
||||
RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,
|
||||
ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
|
||||
InnerStrideAtCompileTime = internal::traits<Derived>::InnerStrideAtCompileTime,
|
||||
SizeAtCompileTime = Base::SizeAtCompileTime
|
||||
};
|
||||
|
||||
@@ -187,8 +188,11 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
|
||||
void checkSanity(typename internal::enable_if<(internal::traits<T>::Alignment>0),void*>::type = 0) const
|
||||
{
|
||||
#if EIGEN_MAX_ALIGN_BYTES>0
|
||||
// innerStride() is not set yet when this function is called, so we optimistically assume the lowest plausible value:
|
||||
const Index minInnerStride = InnerStrideAtCompileTime == Dynamic ? 1 : Index(InnerStrideAtCompileTime);
|
||||
EIGEN_ONLY_USED_FOR_DEBUG(minInnerStride);
|
||||
eigen_assert(( ((internal::UIntPtr(m_data) % internal::traits<Derived>::Alignment) == 0)
|
||||
|| (cols() * rows() * innerStride() * sizeof(Scalar)) < internal::traits<Derived>::Alignment ) && "data is not aligned");
|
||||
|| (cols() * rows() * minInnerStride * sizeof(Scalar)) < internal::traits<Derived>::Alignment ) && "data is not aligned");
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@@ -348,31 +348,7 @@ struct norm1_retval
|
||||
* Implementation of hypot *
|
||||
****************************************************************************/
|
||||
|
||||
template<typename Scalar>
|
||||
struct hypot_impl
|
||||
{
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
static inline RealScalar run(const Scalar& x, const Scalar& y)
|
||||
{
|
||||
EIGEN_USING_STD_MATH(abs);
|
||||
EIGEN_USING_STD_MATH(sqrt);
|
||||
RealScalar _x = abs(x);
|
||||
RealScalar _y = abs(y);
|
||||
Scalar p, qp;
|
||||
if(_x>_y)
|
||||
{
|
||||
p = _x;
|
||||
qp = _y / p;
|
||||
}
|
||||
else
|
||||
{
|
||||
p = _y;
|
||||
qp = _x / p;
|
||||
}
|
||||
if(p==RealScalar(0)) return RealScalar(0);
|
||||
return p * sqrt(RealScalar(1) + qp*qp);
|
||||
}
|
||||
};
|
||||
template<typename Scalar> struct hypot_impl;
|
||||
|
||||
template<typename Scalar>
|
||||
struct hypot_retval
|
||||
@@ -495,7 +471,7 @@ namespace std_fallback {
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
EIGEN_USING_STD_MATH(log);
|
||||
Scalar x1p = RealScalar(1) + x;
|
||||
return ( x1p == Scalar(1) ) ? x : x * ( log(x1p) / (x1p - RealScalar(1)) );
|
||||
return numext::equal_strict(x1p, Scalar(1)) ? x : x * ( log(x1p) / (x1p - RealScalar(1)) );
|
||||
}
|
||||
}
|
||||
|
||||
@@ -640,21 +616,28 @@ template<typename Scalar>
|
||||
struct random_default_impl<Scalar, false, true>
|
||||
{
|
||||
static inline Scalar run(const Scalar& x, const Scalar& y)
|
||||
{
|
||||
typedef typename conditional<NumTraits<Scalar>::IsSigned,std::ptrdiff_t,std::size_t>::type ScalarX;
|
||||
if(y<x)
|
||||
{
|
||||
if (y <= x)
|
||||
return x;
|
||||
// the following difference might overflow on a 32 bits system,
|
||||
// but since y>=x the result converted to an unsigned long is still correct.
|
||||
std::size_t range = ScalarX(y)-ScalarX(x);
|
||||
std::size_t offset = 0;
|
||||
// rejection sampling
|
||||
std::size_t divisor = 1;
|
||||
std::size_t multiplier = 1;
|
||||
if(range<RAND_MAX) divisor = (std::size_t(RAND_MAX)+1)/(range+1);
|
||||
else multiplier = 1 + range/(std::size_t(RAND_MAX)+1);
|
||||
// ScalarU is the unsigned counterpart of Scalar, possibly Scalar itself.
|
||||
typedef typename make_unsigned<Scalar>::type ScalarU;
|
||||
// ScalarX is the widest of ScalarU and unsigned int.
|
||||
// We'll deal only with ScalarX and unsigned int below thus avoiding signed
|
||||
// types and arithmetic and signed overflows (which are undefined behavior).
|
||||
typedef typename conditional<(ScalarU(-1) > unsigned(-1)), ScalarU, unsigned>::type ScalarX;
|
||||
// The following difference doesn't overflow, provided our integer types are two's
|
||||
// complement and have the same number of padding bits in signed and unsigned variants.
|
||||
// This is the case in most modern implementations of C++.
|
||||
ScalarX range = ScalarX(y) - ScalarX(x);
|
||||
ScalarX offset = 0;
|
||||
ScalarX divisor = 1;
|
||||
ScalarX multiplier = 1;
|
||||
const unsigned rand_max = RAND_MAX;
|
||||
if (range <= rand_max) divisor = (rand_max + 1) / (range + 1);
|
||||
else multiplier = 1 + range / (rand_max + 1);
|
||||
// Rejection sampling.
|
||||
do {
|
||||
offset = (std::size_t(std::rand()) * multiplier) / divisor;
|
||||
offset = (unsigned(std::rand()) * multiplier) / divisor;
|
||||
} while (offset > range);
|
||||
return Scalar(ScalarX(x) + offset);
|
||||
}
|
||||
@@ -1030,7 +1013,8 @@ inline int log2(int x)
|
||||
|
||||
/** \returns the square root of \a x.
|
||||
*
|
||||
* It is essentially equivalent to \code using std::sqrt; return sqrt(x); \endcode,
|
||||
* It is essentially equivalent to
|
||||
* \code using std::sqrt; return sqrt(x); \endcode
|
||||
* but slightly faster for float/double and some compilers (e.g., gcc), thanks to
|
||||
* specializations when SSE is enabled.
|
||||
*
|
||||
@@ -1061,11 +1045,24 @@ double log(const double &x) { return ::log(x); }
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
typename NumTraits<T>::Real abs(const T &x) {
|
||||
typename internal::enable_if<NumTraits<T>::IsSigned || NumTraits<T>::IsComplex,typename NumTraits<T>::Real>::type
|
||||
abs(const T &x) {
|
||||
EIGEN_USING_STD_MATH(abs);
|
||||
return abs(x);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
typename internal::enable_if<!(NumTraits<T>::IsSigned || NumTraits<T>::IsComplex),typename NumTraits<T>::Real>::type
|
||||
abs(const T &x) {
|
||||
return x;
|
||||
}
|
||||
|
||||
#if defined(__SYCL_DEVICE_ONLY__)
|
||||
EIGEN_ALWAYS_INLINE float abs(float x) { return cl::sycl::fabs(x); }
|
||||
EIGEN_ALWAYS_INLINE double abs(double x) { return cl::sycl::fabs(x); }
|
||||
#endif // defined(__SYCL_DEVICE_ONLY__)
|
||||
|
||||
#ifdef __CUDACC__
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
float abs(const float &x) { return ::fabsf(x); }
|
||||
|
||||
@@ -71,6 +71,29 @@ T generic_fast_tanh_float(const T& a_x)
|
||||
return pdiv(p, q);
|
||||
}
|
||||
|
||||
template<typename RealScalar>
|
||||
EIGEN_STRONG_INLINE
|
||||
RealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y)
|
||||
{
|
||||
EIGEN_USING_STD_MATH(sqrt);
|
||||
RealScalar p, qp;
|
||||
p = numext::maxi(x,y);
|
||||
if(p==RealScalar(0)) return RealScalar(0);
|
||||
qp = numext::mini(y,x) / p;
|
||||
return p * sqrt(RealScalar(1) + qp*qp);
|
||||
}
|
||||
|
||||
template<typename Scalar>
|
||||
struct hypot_impl
|
||||
{
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
static inline RealScalar run(const Scalar& x, const Scalar& y)
|
||||
{
|
||||
EIGEN_USING_STD_MATH(abs);
|
||||
return positive_real_hypot<RealScalar>(abs(x), abs(y));
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
@@ -274,8 +274,6 @@ class Matrix
|
||||
: Base(std::move(other))
|
||||
{
|
||||
Base::_check_template_params();
|
||||
if (RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic)
|
||||
Base::_set_noalias(other);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
Matrix& operator=(Matrix&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable<Scalar>::value)
|
||||
|
||||
@@ -160,20 +160,11 @@ template<typename Derived> class MatrixBase
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Derived& operator-=(const MatrixBase<OtherDerived>& other);
|
||||
|
||||
#ifdef __CUDACC__
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
const Product<Derived,OtherDerived,LazyProduct>
|
||||
operator*(const MatrixBase<OtherDerived> &other) const
|
||||
{ return this->lazyProduct(other); }
|
||||
#else
|
||||
|
||||
template<typename OtherDerived>
|
||||
const Product<Derived,OtherDerived>
|
||||
operator*(const MatrixBase<OtherDerived> &other) const;
|
||||
|
||||
#endif
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
const Product<Derived,OtherDerived,LazyProduct>
|
||||
@@ -294,7 +285,7 @@ template<typename Derived> class MatrixBase
|
||||
* fuzzy comparison such as isApprox()
|
||||
* \sa isApprox(), operator!= */
|
||||
template<typename OtherDerived>
|
||||
inline bool operator==(const MatrixBase<OtherDerived>& other) const
|
||||
EIGEN_DEVICE_FUNC inline bool operator==(const MatrixBase<OtherDerived>& other) const
|
||||
{ return cwiseEqual(other).all(); }
|
||||
|
||||
/** \returns true if at least one pair of coefficients of \c *this and \a other are not exactly equal to each other.
|
||||
@@ -302,7 +293,7 @@ template<typename Derived> class MatrixBase
|
||||
* fuzzy comparison such as isApprox()
|
||||
* \sa isApprox(), operator== */
|
||||
template<typename OtherDerived>
|
||||
inline bool operator!=(const MatrixBase<OtherDerived>& other) const
|
||||
EIGEN_DEVICE_FUNC inline bool operator!=(const MatrixBase<OtherDerived>& other) const
|
||||
{ return cwiseNotEqual(other).any(); }
|
||||
|
||||
NoAlias<Derived,Eigen::MatrixBase > noalias();
|
||||
@@ -453,16 +444,24 @@ template<typename Derived> class MatrixBase
|
||||
///////// MatrixFunctions module /////////
|
||||
|
||||
typedef typename internal::stem_function<Scalar>::type StemFunction;
|
||||
const MatrixExponentialReturnValue<Derived> exp() const;
|
||||
#define EIGEN_MATRIX_FUNCTION(ReturnType, Name, Description) \
|
||||
/** \returns an expression of the matrix Description of \c *this. \brief This function requires the <a href="unsupported/group__MatrixFunctions__Module.html"> unsupported MatrixFunctions module</a>. To compute the coefficient-wise Description use ArrayBase::##Name . */ \
|
||||
const ReturnType<Derived> Name() const;
|
||||
#define EIGEN_MATRIX_FUNCTION_1(ReturnType, Name, Description, Argument) \
|
||||
/** \returns an expression of the matrix Description of \c *this. \brief This function requires the <a href="unsupported/group__MatrixFunctions__Module.html"> unsupported MatrixFunctions module</a>. To compute the coefficient-wise Description use ArrayBase::##Name . */ \
|
||||
const ReturnType<Derived> Name(Argument) const;
|
||||
|
||||
EIGEN_MATRIX_FUNCTION(MatrixExponentialReturnValue, exp, exponential)
|
||||
/** \brief Helper function for the <a href="unsupported/group__MatrixFunctions__Module.html"> unsupported MatrixFunctions module</a>.*/
|
||||
const MatrixFunctionReturnValue<Derived> matrixFunction(StemFunction f) const;
|
||||
const MatrixFunctionReturnValue<Derived> cosh() const;
|
||||
const MatrixFunctionReturnValue<Derived> sinh() const;
|
||||
const MatrixFunctionReturnValue<Derived> cos() const;
|
||||
const MatrixFunctionReturnValue<Derived> sin() const;
|
||||
const MatrixSquareRootReturnValue<Derived> sqrt() const;
|
||||
const MatrixLogarithmReturnValue<Derived> log() const;
|
||||
const MatrixPowerReturnValue<Derived> pow(const RealScalar& p) const;
|
||||
const MatrixComplexPowerReturnValue<Derived> pow(const std::complex<RealScalar>& p) const;
|
||||
EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, cosh, hyperbolic cosine)
|
||||
EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, sinh, hyperbolic sine)
|
||||
EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, cos, cosine)
|
||||
EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, sin, sine)
|
||||
EIGEN_MATRIX_FUNCTION(MatrixSquareRootReturnValue, sqrt, square root)
|
||||
EIGEN_MATRIX_FUNCTION(MatrixLogarithmReturnValue, log, logarithm)
|
||||
EIGEN_MATRIX_FUNCTION_1(MatrixPowerReturnValue, pow, power to \c p, const RealScalar& p)
|
||||
EIGEN_MATRIX_FUNCTION_1(MatrixComplexPowerReturnValue, pow, power to \c p, const std::complex<RealScalar>& p)
|
||||
|
||||
protected:
|
||||
EIGEN_DEVICE_FUNC MatrixBase() : Base() {}
|
||||
|
||||
@@ -215,6 +215,8 @@ struct NumTraits<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
|
||||
static inline RealScalar epsilon() { return NumTraits<RealScalar>::epsilon(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline RealScalar dummy_precision() { return NumTraits<RealScalar>::dummy_precision(); }
|
||||
|
||||
static inline int digits10() { return NumTraits<Scalar>::digits10(); }
|
||||
};
|
||||
|
||||
template<> struct NumTraits<std::string>
|
||||
|
||||
@@ -41,7 +41,7 @@ template<> struct check_rows_cols_for_overflow<Dynamic> {
|
||||
{
|
||||
// http://hg.mozilla.org/mozilla-central/file/6c8a909977d3/xpcom/ds/CheckedInt.h#l242
|
||||
// we assume Index is signed
|
||||
Index max_index = (size_t(1) << (8 * sizeof(Index) - 1)) - 1; // assume Index is signed
|
||||
Index max_index = (std::size_t(1) << (8 * sizeof(Index) - 1)) - 1; // assume Index is signed
|
||||
bool error = (rows == 0 || cols == 0) ? false
|
||||
: (rows > max_index / cols);
|
||||
if (error)
|
||||
@@ -58,6 +58,28 @@ template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers> struct m
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
#ifdef EIGEN_PARSED_BY_DOXYGEN
|
||||
namespace doxygen {
|
||||
|
||||
// This is a workaround to doxygen not being able to understand the inheritance logic
|
||||
// when it is hidden by the dense_xpr_base helper struct.
|
||||
// Moreover, doxygen fails to include members that are not documented in the declaration body of
|
||||
// MatrixBase if we inherits MatrixBase<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >,
|
||||
// this is why we simply inherits MatrixBase, though this does not make sense.
|
||||
|
||||
/** This class is just a workaround for Doxygen and it does not not actually exist. */
|
||||
template<typename Derived> struct dense_xpr_base_dispatcher;
|
||||
/** This class is just a workaround for Doxygen and it does not not actually exist. */
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
|
||||
struct dense_xpr_base_dispatcher<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
|
||||
: public MatrixBase {};
|
||||
/** This class is just a workaround for Doxygen and it does not not actually exist. */
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
|
||||
struct dense_xpr_base_dispatcher<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
|
||||
: public ArrayBase {};
|
||||
|
||||
} // namespace doxygen
|
||||
|
||||
/** \class PlainObjectBase
|
||||
* \ingroup Core_Module
|
||||
* \brief %Dense storage base class for matrices and arrays.
|
||||
@@ -65,26 +87,10 @@ template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers> struct m
|
||||
* This class can be extended with the help of the plugin mechanism described on the page
|
||||
* \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_PLAINOBJECTBASE_PLUGIN.
|
||||
*
|
||||
* \tparam Derived is the derived type, e.g., a Matrix or Array
|
||||
*
|
||||
* \sa \ref TopicClassHierarchy
|
||||
*/
|
||||
#ifdef EIGEN_PARSED_BY_DOXYGEN
|
||||
namespace doxygen {
|
||||
|
||||
// this is a workaround to doxygen not being able to understand the inheritance logic
|
||||
// when it is hidden by the dense_xpr_base helper struct.
|
||||
/** This class is just a workaround for Doxygen and it does not not actually exist. */
|
||||
template<typename Derived> struct dense_xpr_base_dispatcher;
|
||||
/** This class is just a workaround for Doxygen and it does not not actually exist. */
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
|
||||
struct dense_xpr_base_dispatcher<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
|
||||
: public MatrixBase<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > {};
|
||||
/** This class is just a workaround for Doxygen and it does not not actually exist. */
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
|
||||
struct dense_xpr_base_dispatcher<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
|
||||
: public ArrayBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > {};
|
||||
|
||||
} // namespace doxygen
|
||||
|
||||
template<typename Derived>
|
||||
class PlainObjectBase : public doxygen::dense_xpr_base_dispatcher<Derived>
|
||||
#else
|
||||
@@ -554,7 +560,8 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
|
||||
public:
|
||||
|
||||
/** \copydoc DenseBase::operator=(const EigenBase<OtherDerived>&)
|
||||
/** \brief Copies the generic expression \a other into *this.
|
||||
* \copydetails DenseBase::operator=(const EigenBase<OtherDerived> &other)
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
@@ -570,6 +577,10 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
* while the AlignedMap() functions return aligned Map objects and thus should be called only with 16-byte-aligned
|
||||
* \a data pointers.
|
||||
*
|
||||
* Here is an example using strides:
|
||||
* \include Matrix_Map_stride.cpp
|
||||
* Output: \verbinclude Matrix_Map_stride.out
|
||||
*
|
||||
* \see class Map
|
||||
*/
|
||||
//@{
|
||||
@@ -763,6 +774,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
{
|
||||
// NOTE MSVC 2008 complains if we directly put bool(NumTraits<T>::IsInteger) as the EIGEN_STATIC_ASSERT argument.
|
||||
const bool is_integer = NumTraits<T>::IsInteger;
|
||||
EIGEN_UNUSED_VARIABLE(is_integer);
|
||||
EIGEN_STATIC_ASSERT(is_integer,
|
||||
FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED)
|
||||
resize(size);
|
||||
@@ -804,6 +816,13 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
this->_set_noalias(other);
|
||||
}
|
||||
|
||||
// Initialize an arbitrary matrix from an object convertible to the Derived type.
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void _init1(const Derived& other){
|
||||
this->_set_noalias(other);
|
||||
}
|
||||
|
||||
// Initialize an arbitrary matrix from a generic Eigen expression
|
||||
template<typename T, typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
@@ -826,7 +845,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
this->derived() = r;
|
||||
}
|
||||
|
||||
// For fixed -size arrays:
|
||||
// For fixed-size Array<Scalar,...>
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void _init1(const Scalar& val0,
|
||||
@@ -838,6 +857,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
Base::setConstant(val0);
|
||||
}
|
||||
|
||||
// For fixed-size Array<Index,...>
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void _init1(const Index& val0,
|
||||
|
||||
@@ -97,8 +97,8 @@ class Product : public ProductImpl<_Lhs,_Rhs,Option,
|
||||
&& "if you wanted a coeff-wise or a dot product use the respective explicit functions");
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Index rows() const { return m_lhs.rows(); }
|
||||
EIGEN_DEVICE_FUNC inline Index cols() const { return m_rhs.cols(); }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC const LhsNestedCleaned& lhs() const { return m_lhs; }
|
||||
EIGEN_DEVICE_FUNC const RhsNestedCleaned& rhs() const { return m_rhs; }
|
||||
@@ -127,7 +127,7 @@ public:
|
||||
using Base::derived;
|
||||
typedef typename Base::Scalar Scalar;
|
||||
|
||||
operator const Scalar() const
|
||||
EIGEN_STRONG_INLINE operator const Scalar() const
|
||||
{
|
||||
return internal::evaluator<ProductXpr>(derived()).coeff(0,0);
|
||||
}
|
||||
@@ -162,7 +162,7 @@ class ProductImpl<Lhs,Rhs,Option,Dense>
|
||||
|
||||
public:
|
||||
|
||||
EIGEN_DEVICE_FUNC Scalar coeff(Index row, Index col) const
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index row, Index col) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS);
|
||||
eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) );
|
||||
@@ -170,7 +170,7 @@ class ProductImpl<Lhs,Rhs,Option,Dense>
|
||||
return internal::evaluator<Derived>(derived()).coeff(row,col);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC Scalar coeff(Index i) const
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index i) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS);
|
||||
eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) );
|
||||
|
||||
@@ -32,7 +32,7 @@ struct evaluator<Product<Lhs, Rhs, Options> >
|
||||
typedef Product<Lhs, Rhs, Options> XprType;
|
||||
typedef product_evaluator<XprType> Base;
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(xpr) {}
|
||||
};
|
||||
|
||||
// Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B"
|
||||
@@ -55,7 +55,7 @@ struct evaluator<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
|
||||
const Product<Lhs, Rhs, DefaultProduct> > XprType;
|
||||
typedef evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1,Lhs,product), Rhs, DefaultProduct> > Base;
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr)
|
||||
: Base(xpr.lhs().functor().m_other * xpr.rhs().lhs() * xpr.rhs().rhs())
|
||||
{}
|
||||
};
|
||||
@@ -68,7 +68,7 @@ struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> >
|
||||
typedef Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> XprType;
|
||||
typedef evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> > Base;
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr)
|
||||
: Base(Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>(
|
||||
Product<Lhs, Rhs, LazyProduct>(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()),
|
||||
xpr.index() ))
|
||||
@@ -158,10 +158,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<
|
||||
static EIGEN_STRONG_INLINE
|
||||
void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,Scalar> &)
|
||||
{
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||
// FIXME shall we handle nested_eval here?
|
||||
generic_product_impl<Lhs, Rhs>::addTo(dst, src.lhs(), src.rhs());
|
||||
}
|
||||
@@ -176,10 +173,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<
|
||||
static EIGEN_STRONG_INLINE
|
||||
void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,Scalar> &)
|
||||
{
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||
// FIXME shall we handle nested_eval here?
|
||||
generic_product_impl<Lhs, Rhs>::subTo(dst, src.lhs(), src.rhs());
|
||||
}
|
||||
@@ -213,6 +207,12 @@ struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_sum_op<typename
|
||||
static const bool value = true;
|
||||
};
|
||||
|
||||
template<typename OtherXpr, typename Lhs, typename Rhs>
|
||||
struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_difference_op<typename OtherXpr::Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, const OtherXpr,
|
||||
const Product<Lhs,Rhs,DefaultProduct> >, DenseShape > {
|
||||
static const bool value = true;
|
||||
};
|
||||
|
||||
template<typename DstXprType, typename OtherXpr, typename ProductType, typename Func1, typename Func2>
|
||||
struct assignment_from_xpr_op_product
|
||||
{
|
||||
@@ -246,19 +246,19 @@ template<typename Lhs, typename Rhs>
|
||||
struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct>
|
||||
{
|
||||
template<typename Dst>
|
||||
static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
{
|
||||
dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
|
||||
}
|
||||
|
||||
template<typename Dst>
|
||||
static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
{
|
||||
dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum();
|
||||
}
|
||||
|
||||
template<typename Dst>
|
||||
static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
{ dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); }
|
||||
};
|
||||
|
||||
@@ -312,25 +312,25 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,OuterProduct>
|
||||
};
|
||||
|
||||
template<typename Dst>
|
||||
static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
{
|
||||
internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major<Dst>());
|
||||
}
|
||||
|
||||
template<typename Dst>
|
||||
static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
{
|
||||
internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major<Dst>());
|
||||
}
|
||||
|
||||
template<typename Dst>
|
||||
static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
{
|
||||
internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major<Dst>());
|
||||
}
|
||||
|
||||
template<typename Dst>
|
||||
static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
||||
static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
||||
{
|
||||
internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major<Dst>());
|
||||
}
|
||||
@@ -366,17 +366,21 @@ template<typename Lhs, typename Rhs>
|
||||
struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct>
|
||||
: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct> >
|
||||
{
|
||||
typedef typename nested_eval<Lhs,1>::type LhsNested;
|
||||
typedef typename nested_eval<Rhs,1>::type RhsNested;
|
||||
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
|
||||
enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
|
||||
typedef typename internal::conditional<int(Side)==OnTheRight,Lhs,Rhs>::type MatrixType;
|
||||
typedef typename internal::remove_all<typename internal::conditional<int(Side)==OnTheRight,LhsNested,RhsNested>::type>::type MatrixType;
|
||||
|
||||
template<typename Dest>
|
||||
static EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
||||
{
|
||||
LhsNested actual_lhs(lhs);
|
||||
RhsNested actual_rhs(rhs);
|
||||
internal::gemv_dense_selector<Side,
|
||||
(int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
|
||||
bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)
|
||||
>::run(lhs, rhs, dst, alpha);
|
||||
>::run(actual_lhs, actual_rhs, dst, alpha);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -781,7 +785,11 @@ public:
|
||||
_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))),
|
||||
_LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0,
|
||||
Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0),
|
||||
Alignment = evaluator<MatrixType>::Alignment
|
||||
Alignment = evaluator<MatrixType>::Alignment,
|
||||
|
||||
AsScalarProduct = (DiagonalType::SizeAtCompileTime==1)
|
||||
|| (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::RowsAtCompileTime==1 && ProductOrder==OnTheLeft)
|
||||
|| (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::ColsAtCompileTime==1 && ProductOrder==OnTheRight)
|
||||
};
|
||||
|
||||
diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag)
|
||||
@@ -793,7 +801,10 @@ public:
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const
|
||||
{
|
||||
return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx);
|
||||
if(AsScalarProduct)
|
||||
return m_diagImpl.coeff(0) * m_matImpl.coeff(idx);
|
||||
else
|
||||
return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx);
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
@@ -407,7 +407,7 @@ protected:
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename Func>
|
||||
typename internal::traits<Derived>::Scalar
|
||||
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
|
||||
DenseBase<Derived>::redux(const Func& func) const
|
||||
{
|
||||
eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
|
||||
|
||||
@@ -95,6 +95,8 @@ protected:
|
||||
template<typename Expression>
|
||||
EIGEN_DEVICE_FUNC void construct(Expression& expr)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(PlainObjectType,Expression);
|
||||
|
||||
if(PlainObjectType::RowsAtCompileTime==1)
|
||||
{
|
||||
eigen_assert(expr.rows()==1 || expr.cols()==1);
|
||||
|
||||
@@ -45,7 +45,7 @@ struct traits<SelfAdjointView<MatrixType, UpLo> > : traits<MatrixType>
|
||||
};
|
||||
}
|
||||
|
||||
// FIXME could also be called SelfAdjointWrapper to be consistent with DiagonalWrapper ??
|
||||
|
||||
template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
|
||||
: public TriangularBase<SelfAdjointView<_MatrixType, UpLo> >
|
||||
{
|
||||
@@ -60,16 +60,20 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
|
||||
/** \brief The type of coefficients in this matrix */
|
||||
typedef typename internal::traits<SelfAdjointView>::Scalar Scalar;
|
||||
typedef typename MatrixType::StorageIndex StorageIndex;
|
||||
typedef typename internal::remove_all<typename MatrixType::ConjugateReturnType>::type MatrixConjugateReturnType;
|
||||
|
||||
enum {
|
||||
Mode = internal::traits<SelfAdjointView>::Mode,
|
||||
Flags = internal::traits<SelfAdjointView>::Flags
|
||||
Flags = internal::traits<SelfAdjointView>::Flags,
|
||||
TransposeMode = ((Mode & Upper) ? Lower : 0) | ((Mode & Lower) ? Upper : 0)
|
||||
};
|
||||
typedef typename MatrixType::PlainObject PlainObject;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit inline SelfAdjointView(MatrixType& matrix) : m_matrix(matrix)
|
||||
{}
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(UpLo==Lower || UpLo==Upper,SELFADJOINTVIEW_ACCEPTS_UPPER_AND_LOWER_MODE_ONLY);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index rows() const { return m_matrix.rows(); }
|
||||
@@ -187,6 +191,36 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
|
||||
TriangularView<typename MatrixType::AdjointReturnType,TriMode> >::type(tmp2);
|
||||
}
|
||||
|
||||
typedef SelfAdjointView<const MatrixConjugateReturnType,UpLo> ConjugateReturnType;
|
||||
/** \sa MatrixBase::conjugate() const */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const ConjugateReturnType conjugate() const
|
||||
{ return ConjugateReturnType(m_matrix.conjugate()); }
|
||||
|
||||
typedef SelfAdjointView<const typename MatrixType::AdjointReturnType,TransposeMode> AdjointReturnType;
|
||||
/** \sa MatrixBase::adjoint() const */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const AdjointReturnType adjoint() const
|
||||
{ return AdjointReturnType(m_matrix.adjoint()); }
|
||||
|
||||
typedef SelfAdjointView<typename MatrixType::TransposeReturnType,TransposeMode> TransposeReturnType;
|
||||
/** \sa MatrixBase::transpose() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline TransposeReturnType transpose()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
|
||||
typename MatrixType::TransposeReturnType tmp(m_matrix);
|
||||
return TransposeReturnType(tmp);
|
||||
}
|
||||
|
||||
typedef SelfAdjointView<const typename MatrixType::ConstTransposeReturnType,TransposeMode> ConstTransposeReturnType;
|
||||
/** \sa MatrixBase::transpose() const */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const ConstTransposeReturnType transpose() const
|
||||
{
|
||||
return ConstTransposeReturnType(m_matrix.transpose());
|
||||
}
|
||||
|
||||
/** \returns a const expression of the main diagonal of the matrix \c *this
|
||||
*
|
||||
* This method simply returns the diagonal of the nested expression, thus by-passing the SelfAdjointView decorator.
|
||||
@@ -287,6 +321,7 @@ public:
|
||||
* Implementation of MatrixBase methods
|
||||
***************************************************************************/
|
||||
|
||||
/** This is the const version of MatrixBase::selfadjointView() */
|
||||
template<typename Derived>
|
||||
template<unsigned int UpLo>
|
||||
typename MatrixBase<Derived>::template ConstSelfAdjointViewReturnType<UpLo>::Type
|
||||
@@ -295,6 +330,15 @@ MatrixBase<Derived>::selfadjointView() const
|
||||
return typename ConstSelfAdjointViewReturnType<UpLo>::Type(derived());
|
||||
}
|
||||
|
||||
/** \returns an expression of a symmetric/self-adjoint view extracted from the upper or lower triangular part of the current matrix
|
||||
*
|
||||
* The parameter \a UpLo can be either \c #Upper or \c #Lower
|
||||
*
|
||||
* Example: \include MatrixBase_selfadjointView.cpp
|
||||
* Output: \verbinclude MatrixBase_selfadjointView.out
|
||||
*
|
||||
* \sa class SelfAdjointView
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<unsigned int UpLo>
|
||||
typename MatrixBase<Derived>::template SelfAdjointViewReturnType<UpLo>::Type
|
||||
|
||||
@@ -15,33 +15,29 @@ namespace Eigen {
|
||||
// TODO generalize the scalar type of 'other'
|
||||
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(const Scalar& other)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(const Scalar& other)
|
||||
{
|
||||
typedef typename Derived::PlainObject PlainObject;
|
||||
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op<Scalar,Scalar>());
|
||||
return derived();
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator+=(const Scalar& other)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator+=(const Scalar& other)
|
||||
{
|
||||
typedef typename Derived::PlainObject PlainObject;
|
||||
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op<Scalar,Scalar>());
|
||||
return derived();
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator-=(const Scalar& other)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator-=(const Scalar& other)
|
||||
{
|
||||
typedef typename Derived::PlainObject PlainObject;
|
||||
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op<Scalar,Scalar>());
|
||||
return derived();
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator/=(const Scalar& other)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator/=(const Scalar& other)
|
||||
{
|
||||
typedef typename Derived::PlainObject PlainObject;
|
||||
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op<Scalar,Scalar>());
|
||||
return derived();
|
||||
}
|
||||
|
||||
@@ -34,12 +34,12 @@ template<typename Decomposition, typename RhsType,typename StorageKind> struct s
|
||||
template<typename Decomposition, typename RhsType>
|
||||
struct solve_traits<Decomposition,RhsType,Dense>
|
||||
{
|
||||
typedef Matrix<typename RhsType::Scalar,
|
||||
typedef typename make_proper_matrix_type<typename RhsType::Scalar,
|
||||
Decomposition::ColsAtCompileTime,
|
||||
RhsType::ColsAtCompileTime,
|
||||
RhsType::PlainObject::Options,
|
||||
Decomposition::MaxColsAtCompileTime,
|
||||
RhsType::MaxColsAtCompileTime> PlainObject;
|
||||
RhsType::MaxColsAtCompileTime>::type PlainObject;
|
||||
};
|
||||
|
||||
template<typename Decomposition, typename RhsType>
|
||||
|
||||
@@ -161,6 +161,7 @@ struct triangular_solver_selector<Lhs,Rhs,OnTheRight,Mode,CompleteUnrolling,1> {
|
||||
* TriangularView methods
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename MatrixType, unsigned int Mode>
|
||||
template<int Side, typename OtherDerived>
|
||||
void TriangularViewImpl<MatrixType,Mode,Dense>::solveInPlace(const MatrixBase<OtherDerived>& _other) const
|
||||
@@ -168,6 +169,9 @@ void TriangularViewImpl<MatrixType,Mode,Dense>::solveInPlace(const MatrixBase<Ot
|
||||
OtherDerived& other = _other.const_cast_derived();
|
||||
eigen_assert( derived().cols() == derived().rows() && ((Side==OnTheLeft && derived().cols() == other.rows()) || (Side==OnTheRight && derived().cols() == other.cols())) );
|
||||
eigen_assert((!(Mode & ZeroDiag)) && bool(Mode & (Upper|Lower)));
|
||||
// If solving for a 0x0 matrix, nothing to do, simply return.
|
||||
if (derived().cols() == 0)
|
||||
return;
|
||||
|
||||
enum { copy = (internal::traits<OtherDerived>::Flags & RowMajorBit) && OtherDerived::IsVectorAtCompileTime && OtherDerived::SizeAtCompileTime!=1};
|
||||
typedef typename internal::conditional<copy,
|
||||
@@ -188,6 +192,7 @@ TriangularViewImpl<Derived,Mode,Dense>::solve(const MatrixBase<Other>& other) co
|
||||
{
|
||||
return internal::triangular_solve_retval<Side,TriangularViewType,Other>(derived(), other.derived());
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace internal {
|
||||
|
||||
|
||||
@@ -165,12 +165,13 @@ MatrixBase<Derived>::stableNorm() const
|
||||
|
||||
typedef typename internal::nested_eval<Derived,2>::type DerivedCopy;
|
||||
typedef typename internal::remove_all<DerivedCopy>::type DerivedCopyClean;
|
||||
DerivedCopy copy(derived());
|
||||
const DerivedCopy copy(derived());
|
||||
|
||||
enum {
|
||||
CanAlign = ( (int(DerivedCopyClean::Flags)&DirectAccessBit)
|
||||
|| (int(internal::evaluator<DerivedCopyClean>::Alignment)>0) // FIXME Alignment)>0 might not be enough
|
||||
) && (blockSize*sizeof(Scalar)*2<EIGEN_STACK_ALLOCATION_LIMIT) // ifwe cannot allocate on the stack, then let's not bother about this optimization
|
||||
) && (blockSize*sizeof(Scalar)*2<EIGEN_STACK_ALLOCATION_LIMIT)
|
||||
&& (EIGEN_MAX_STATIC_ALIGN_BYTES>0) // if we cannot allocate on the stack, then let's not bother about this optimization
|
||||
};
|
||||
typedef typename internal::conditional<CanAlign, Ref<const Matrix<Scalar,Dynamic,1,0,blockSize,1>, internal::evaluator<DerivedCopyClean>::Alignment>,
|
||||
typename DerivedCopyClean::ConstSegmentReturnType>::type SegmentWrapper;
|
||||
|
||||
@@ -384,7 +384,7 @@ class Transpose<TranspositionsBase<TranspositionsDerived> >
|
||||
const Product<OtherDerived, Transpose, AliasFreeProduct>
|
||||
operator*(const MatrixBase<OtherDerived>& matrix, const Transpose& trt)
|
||||
{
|
||||
return Product<OtherDerived, Transpose, AliasFreeProduct>(matrix.derived(), trt.derived());
|
||||
return Product<OtherDerived, Transpose, AliasFreeProduct>(matrix.derived(), trt);
|
||||
}
|
||||
|
||||
/** \returns the \a matrix with the inverse transpositions applied to the rows.
|
||||
|
||||
@@ -470,6 +470,8 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_Mat
|
||||
* \a Side==OnTheLeft (the default), or the right-inverse-multiply \a other * inverse(\c *this) if
|
||||
* \a Side==OnTheRight.
|
||||
*
|
||||
* Note that the template parameter \c Side can be ommitted, in which case \c Side==OnTheLeft
|
||||
*
|
||||
* The matrix \c *this must be triangular and invertible (i.e., all the coefficients of the
|
||||
* diagonal must be non zero). It works as a forward (resp. backward) substitution if \c *this
|
||||
* is an upper (resp. lower) triangular matrix.
|
||||
@@ -495,6 +497,8 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_Mat
|
||||
* \warning The parameter is only marked 'const' to make the C++ compiler accept a temporary expression here.
|
||||
* This function will const_cast it, so constness isn't honored here.
|
||||
*
|
||||
* Note that the template parameter \c Side can be ommitted, in which case \c Side==OnTheLeft
|
||||
*
|
||||
* See TriangularView:solve() for the details.
|
||||
*/
|
||||
template<int Side, typename OtherDerived>
|
||||
@@ -539,13 +543,14 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_Mat
|
||||
|
||||
template<typename ProductType>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE TriangularViewType& _assignProduct(const ProductType& prod, const Scalar& alpha);
|
||||
EIGEN_STRONG_INLINE TriangularViewType& _assignProduct(const ProductType& prod, const Scalar& alpha, bool beta);
|
||||
};
|
||||
|
||||
/***************************************************************************
|
||||
* Implementation of triangular evaluation/assignment
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
// FIXME should we keep that possibility
|
||||
template<typename MatrixType, unsigned int Mode>
|
||||
template<typename OtherDerived>
|
||||
@@ -583,6 +588,7 @@ void TriangularViewImpl<MatrixType, Mode, Dense>::lazyAssign(const TriangularBas
|
||||
eigen_assert(Mode == int(OtherDerived::Mode));
|
||||
internal::call_assignment_no_alias(derived(), other.derived());
|
||||
}
|
||||
#endif
|
||||
|
||||
/***************************************************************************
|
||||
* Implementation of TriangularBase methods
|
||||
@@ -944,8 +950,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::assign_
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
|
||||
dst.setZero();
|
||||
dst._assignProduct(src, 1);
|
||||
dst._assignProduct(src, 1, 0);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -956,7 +961,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::add_ass
|
||||
typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,typename SrcXprType::Scalar> &)
|
||||
{
|
||||
dst._assignProduct(src, 1);
|
||||
dst._assignProduct(src, 1, 1);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -967,7 +972,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::sub_ass
|
||||
typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,typename SrcXprType::Scalar> &)
|
||||
{
|
||||
dst._assignProduct(src, -1);
|
||||
dst._assignProduct(src, -1, 1);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -194,7 +194,8 @@ struct functor_traits<max_coeff_visitor<Scalar> > {
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/** \returns the minimum of all coefficients of *this and puts in *row and *col its location.
|
||||
/** \fn DenseBase<Derived>::minCoeff(IndexType* rowId, IndexType* colId) const
|
||||
* \returns the minimum of all coefficients of *this and puts in *row and *col its location.
|
||||
* \warning the result is undefined if \c *this contains NaN.
|
||||
*
|
||||
* \sa DenseBase::minCoeff(Index*), DenseBase::maxCoeff(Index*,Index*), DenseBase::visit(), DenseBase::minCoeff()
|
||||
@@ -230,7 +231,8 @@ DenseBase<Derived>::minCoeff(IndexType* index) const
|
||||
return minVisitor.res;
|
||||
}
|
||||
|
||||
/** \returns the maximum of all coefficients of *this and puts in *row and *col its location.
|
||||
/** \fn DenseBase<Derived>::maxCoeff(IndexType* rowId, IndexType* colId) const
|
||||
* \returns the maximum of all coefficients of *this and puts in *row and *col its location.
|
||||
* \warning the result is undefined if \c *this contains NaN.
|
||||
*
|
||||
* \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visit(), DenseBase::maxCoeff()
|
||||
|
||||
@@ -204,23 +204,7 @@ template<> struct conj_helper<Packet4cf, Packet4cf, true,true>
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet8f, Packet4cf, false,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet8f& x, const Packet4cf& y, const Packet4cf& c) const
|
||||
{ return padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet8f& x, const Packet4cf& y) const
|
||||
{ return Packet4cf(Eigen::internal::pmul(x, y.v)); }
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet4cf, Packet8f, false,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet8f& y, const Packet4cf& c) const
|
||||
{ return padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& x, const Packet8f& y) const
|
||||
{ return Packet4cf(Eigen::internal::pmul(x.v, y)); }
|
||||
};
|
||||
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet4cf,Packet8f)
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf pdiv<Packet4cf>(const Packet4cf& a, const Packet4cf& b)
|
||||
{
|
||||
@@ -400,23 +384,7 @@ template<> struct conj_helper<Packet2cd, Packet2cd, true,true>
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet4d, Packet2cd, false,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet4d& x, const Packet2cd& y, const Packet2cd& c) const
|
||||
{ return padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet4d& x, const Packet2cd& y) const
|
||||
{ return Packet2cd(Eigen::internal::pmul(x, y.v)); }
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet2cd, Packet4d, false,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet4d& y, const Packet2cd& c) const
|
||||
{ return padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& x, const Packet4d& y) const
|
||||
{ return Packet2cd(Eigen::internal::pmul(x.v, y)); }
|
||||
};
|
||||
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cd,Packet4d)
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd pdiv<Packet2cd>(const Packet2cd& a, const Packet2cd& b)
|
||||
{
|
||||
|
||||
@@ -159,11 +159,12 @@ template<> EIGEN_STRONG_INLINE Packet8i pdiv<Packet8i>(const Packet8i& /*a*/, co
|
||||
|
||||
#ifdef __FMA__
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) {
|
||||
#if ( EIGEN_COMP_GNUC_STRICT || (EIGEN_COMP_CLANG && (EIGEN_COMP_CLANG<308)) )
|
||||
// clang stupidly generates a vfmadd213ps instruction plus some vmovaps on registers,
|
||||
// and gcc stupidly generates a vfmadd132ps instruction,
|
||||
// so let's enforce it to generate a vfmadd231ps instruction since the most common use case is to accumulate
|
||||
// the result of the product.
|
||||
#if ( (EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<80) || (EIGEN_COMP_CLANG) )
|
||||
// Clang stupidly generates a vfmadd213ps instruction plus some vmovaps on registers,
|
||||
// and even register spilling with clang>=6.0 (bug 1637).
|
||||
// Gcc stupidly generates a vfmadd132ps instruction.
|
||||
// So let's enforce it to generate a vfmadd231ps instruction since the most common use
|
||||
// case is to accumulate the result of the product.
|
||||
Packet8f res = c;
|
||||
__asm__("vfmadd231ps %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
|
||||
return res;
|
||||
@@ -172,7 +173,7 @@ template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f&
|
||||
#endif
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) {
|
||||
#if ( EIGEN_COMP_GNUC_STRICT || (EIGEN_COMP_CLANG && (EIGEN_COMP_CLANG<308)) )
|
||||
#if ( (EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<80) || (EIGEN_COMP_CLANG) )
|
||||
// see above
|
||||
Packet4d res = c;
|
||||
__asm__("vfmadd231pd %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
|
||||
@@ -308,9 +309,9 @@ template<> EIGEN_STRONG_INLINE void pstore1<Packet8i>(int* to, const int& a)
|
||||
}
|
||||
|
||||
#ifndef EIGEN_VECTORIZE_AVX512
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
|
||||
#endif
|
||||
|
||||
template<> EIGEN_STRONG_INLINE float pfirst<Packet8f>(const Packet8f& a) {
|
||||
@@ -333,9 +334,12 @@ template<> EIGEN_STRONG_INLINE Packet4d preverse(const Packet4d& a)
|
||||
{
|
||||
__m256d tmp = _mm256_shuffle_pd(a,a,5);
|
||||
return _mm256_permute2f128_pd(tmp, tmp, 1);
|
||||
|
||||
#if 0
|
||||
// This version is unlikely to be faster as _mm256_shuffle_ps and _mm256_permute_pd
|
||||
// exhibit the same latency/throughput, but it is here for future reference/benchmarking...
|
||||
__m256d swap_halves = _mm256_permute2f128_pd(a,a,1);
|
||||
return _mm256_permute_pd(swap_halves,5);
|
||||
#endif
|
||||
}
|
||||
|
||||
// pabs should be ok
|
||||
@@ -395,14 +399,11 @@ template<> EIGEN_STRONG_INLINE Packet4d preduxp<Packet4d>(const Packet4d* vecs)
|
||||
|
||||
template<> EIGEN_STRONG_INLINE float predux<Packet8f>(const Packet8f& a)
|
||||
{
|
||||
Packet8f tmp0 = _mm256_hadd_ps(a,_mm256_permute2f128_ps(a,a,1));
|
||||
tmp0 = _mm256_hadd_ps(tmp0,tmp0);
|
||||
return pfirst(_mm256_hadd_ps(tmp0, tmp0));
|
||||
return predux(Packet4f(_mm_add_ps(_mm256_castps256_ps128(a),_mm256_extractf128_ps(a,1))));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE double predux<Packet4d>(const Packet4d& a)
|
||||
{
|
||||
Packet4d tmp0 = _mm256_hadd_pd(a,_mm256_permute2f128_pd(a,a,1));
|
||||
return pfirst(_mm256_hadd_pd(tmp0,tmp0));
|
||||
return predux(Packet2d(_mm_add_pd(_mm256_castpd256_pd128(a),_mm256_extractf128_pd(a,1))));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f predux_downto4<Packet8f>(const Packet8f& a)
|
||||
|
||||
@@ -88,9 +88,9 @@ plog<Packet16f>(const Packet16f& _x) {
|
||||
// x = x + x - 1.0;
|
||||
// } else { x = x - 1.0; }
|
||||
__mmask16 mask = _mm512_cmp_ps_mask(x, p16f_cephes_SQRTHF, _CMP_LT_OQ);
|
||||
Packet16f tmp = _mm512_mask_blend_ps(mask, x, _mm512_setzero_ps());
|
||||
Packet16f tmp = _mm512_mask_blend_ps(mask, _mm512_setzero_ps(), x);
|
||||
x = psub(x, p16f_1);
|
||||
e = psub(e, _mm512_mask_blend_ps(mask, p16f_1, _mm512_setzero_ps()));
|
||||
e = psub(e, _mm512_mask_blend_ps(mask, _mm512_setzero_ps(), p16f_1));
|
||||
x = padd(x, tmp);
|
||||
|
||||
Packet16f x2 = pmul(x, x);
|
||||
@@ -119,8 +119,9 @@ plog<Packet16f>(const Packet16f& _x) {
|
||||
x = padd(x, y2);
|
||||
|
||||
// Filter out invalid inputs, i.e. negative arg will be NAN, 0 will be -INF.
|
||||
return _mm512_mask_blend_ps(iszero_mask, p16f_minus_inf,
|
||||
_mm512_mask_blend_ps(invalid_mask, p16f_nan, x));
|
||||
return _mm512_mask_blend_ps(iszero_mask,
|
||||
_mm512_mask_blend_ps(invalid_mask, x, p16f_nan),
|
||||
p16f_minus_inf);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -266,8 +267,7 @@ psqrt<Packet16f>(const Packet16f& _x) {
|
||||
// select only the inverse sqrt of positive normal inputs (denormals are
|
||||
// flushed to zero and cause infs as well).
|
||||
__mmask16 non_zero_mask = _mm512_cmp_ps_mask(_x, p16f_flt_min, _CMP_GE_OQ);
|
||||
Packet16f x = _mm512_mask_blend_ps(non_zero_mask, _mm512_rsqrt14_ps(_x),
|
||||
_mm512_setzero_ps());
|
||||
Packet16f x = _mm512_mask_blend_ps(non_zero_mask, _mm512_setzero_ps(), _mm512_rsqrt14_ps(_x));
|
||||
|
||||
// Do a single step of Newton's iteration.
|
||||
x = pmul(x, pmadd(neg_half, pmul(x, x), p16f_one_point_five));
|
||||
@@ -289,8 +289,7 @@ psqrt<Packet8d>(const Packet8d& _x) {
|
||||
// select only the inverse sqrt of positive normal inputs (denormals are
|
||||
// flushed to zero and cause infs as well).
|
||||
__mmask8 non_zero_mask = _mm512_cmp_pd_mask(_x, p8d_dbl_min, _CMP_GE_OQ);
|
||||
Packet8d x = _mm512_mask_blend_pd(non_zero_mask, _mm512_rsqrt14_pd(_x),
|
||||
_mm512_setzero_pd());
|
||||
Packet8d x = _mm512_mask_blend_pd(non_zero_mask, _mm512_setzero_pd(), _mm512_rsqrt14_pd(_x));
|
||||
|
||||
// Do a first step of Newton's iteration.
|
||||
x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
|
||||
@@ -333,20 +332,18 @@ prsqrt<Packet16f>(const Packet16f& _x) {
|
||||
// select only the inverse sqrt of positive normal inputs (denormals are
|
||||
// flushed to zero and cause infs as well).
|
||||
__mmask16 le_zero_mask = _mm512_cmp_ps_mask(_x, p16f_flt_min, _CMP_LT_OQ);
|
||||
Packet16f x = _mm512_mask_blend_ps(le_zero_mask, _mm512_setzero_ps(),
|
||||
_mm512_rsqrt14_ps(_x));
|
||||
Packet16f x = _mm512_mask_blend_ps(le_zero_mask, _mm512_rsqrt14_ps(_x), _mm512_setzero_ps());
|
||||
|
||||
// Fill in NaNs and Infs for the negative/zero entries.
|
||||
__mmask16 neg_mask = _mm512_cmp_ps_mask(_x, _mm512_setzero_ps(), _CMP_LT_OQ);
|
||||
Packet16f infs_and_nans = _mm512_mask_blend_ps(
|
||||
neg_mask, p16f_nan,
|
||||
_mm512_mask_blend_ps(le_zero_mask, p16f_inf, _mm512_setzero_ps()));
|
||||
neg_mask, _mm512_mask_blend_ps(le_zero_mask, _mm512_setzero_ps(), p16f_inf), p16f_nan);
|
||||
|
||||
// Do a single step of Newton's iteration.
|
||||
x = pmul(x, pmadd(neg_half, pmul(x, x), p16f_one_point_five));
|
||||
|
||||
// Insert NaNs and Infs in all the right places.
|
||||
return _mm512_mask_blend_ps(le_zero_mask, infs_and_nans, x);
|
||||
return _mm512_mask_blend_ps(le_zero_mask, x, infs_and_nans);
|
||||
}
|
||||
|
||||
template <>
|
||||
@@ -363,14 +360,12 @@ prsqrt<Packet8d>(const Packet8d& _x) {
|
||||
// select only the inverse sqrt of positive normal inputs (denormals are
|
||||
// flushed to zero and cause infs as well).
|
||||
__mmask8 le_zero_mask = _mm512_cmp_pd_mask(_x, p8d_dbl_min, _CMP_LT_OQ);
|
||||
Packet8d x = _mm512_mask_blend_pd(le_zero_mask, _mm512_setzero_pd(),
|
||||
_mm512_rsqrt14_pd(_x));
|
||||
Packet8d x = _mm512_mask_blend_pd(le_zero_mask, _mm512_rsqrt14_pd(_x), _mm512_setzero_pd());
|
||||
|
||||
// Fill in NaNs and Infs for the negative/zero entries.
|
||||
__mmask8 neg_mask = _mm512_cmp_pd_mask(_x, _mm512_setzero_pd(), _CMP_LT_OQ);
|
||||
Packet8d infs_and_nans = _mm512_mask_blend_pd(
|
||||
neg_mask, p8d_nan,
|
||||
_mm512_mask_blend_pd(le_zero_mask, p8d_inf, _mm512_setzero_pd()));
|
||||
neg_mask, _mm512_mask_blend_pd(le_zero_mask, _mm512_setzero_pd(), p8d_inf), p8d_nan);
|
||||
|
||||
// Do a first step of Newton's iteration.
|
||||
x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
|
||||
@@ -379,9 +374,9 @@ prsqrt<Packet8d>(const Packet8d& _x) {
|
||||
x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
|
||||
|
||||
// Insert NaNs and Infs in all the right places.
|
||||
return _mm512_mask_blend_pd(le_zero_mask, infs_and_nans, x);
|
||||
return _mm512_mask_blend_pd(le_zero_mask, x, infs_and_nans);
|
||||
}
|
||||
#else
|
||||
#elif defined(EIGEN_VECTORIZE_AVX512ER)
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16f prsqrt<Packet16f>(const Packet16f& x) {
|
||||
return _mm512_rsqrt28_ps(x);
|
||||
|
||||
@@ -618,9 +618,9 @@ EIGEN_STRONG_INLINE void pstore1<Packet16i>(int* to, const int& a) {
|
||||
pstore(to, pa);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float pfirst<Packet16f>(const Packet16f& a) {
|
||||
@@ -648,13 +648,13 @@ template<> EIGEN_STRONG_INLINE Packet8d preverse(const Packet8d& a)
|
||||
template<> EIGEN_STRONG_INLINE Packet16f pabs(const Packet16f& a)
|
||||
{
|
||||
// _mm512_abs_ps intrinsic not found, so hack around it
|
||||
return (__m512)_mm512_and_si512((__m512i)a, _mm512_set1_epi32(0x7fffffff));
|
||||
return _mm512_castsi512_ps(_mm512_and_si512(_mm512_castps_si512(a), _mm512_set1_epi32(0x7fffffff)));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8d pabs(const Packet8d& a) {
|
||||
// _mm512_abs_ps intrinsic not found, so hack around it
|
||||
return (__m512d)_mm512_and_si512((__m512i)a,
|
||||
_mm512_set1_epi64(0x7fffffffffffffff));
|
||||
return _mm512_castsi512_pd(_mm512_and_si512(_mm512_castpd_si512(a),
|
||||
_mm512_set1_epi64(0x7fffffffffffffff)));
|
||||
}
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_AVX512DQ
|
||||
|
||||
@@ -15,14 +15,14 @@ namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
static Packet4ui p4ui_CONJ_XOR = vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_ZERO_);//{ 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
|
||||
static Packet4ui p4ui_CONJ_XOR = vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_MZERO);//{ 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
|
||||
#ifdef __VSX__
|
||||
#if defined(_BIG_ENDIAN)
|
||||
static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2d_ZERO_, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
|
||||
static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_ZERO_, 8);//{ 0x8000000000000000, 0x0000000000000000 };
|
||||
static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2d_MZERO, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
|
||||
static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_MZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
|
||||
#else
|
||||
static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_ZERO_, 8);//{ 0x8000000000000000, 0x0000000000000000 };
|
||||
static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2d_ZERO_, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
|
||||
static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_MZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
|
||||
static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2d_MZERO, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@@ -65,7 +65,7 @@ template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type;
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
|
||||
{
|
||||
Packet2cf res;
|
||||
if((ptrdiff_t(&from) % 16) == 0)
|
||||
if((std::ptrdiff_t(&from) % 16) == 0)
|
||||
res.v = pload<Packet4f>((const float *)&from);
|
||||
else
|
||||
res.v = ploadu<Packet4f>((const float *)&from);
|
||||
@@ -224,23 +224,7 @@ template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet4f, Packet2cf, false,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet4f& x, const Packet2cf& y, const Packet2cf& c) const
|
||||
{ return padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet4f& x, const Packet2cf& y) const
|
||||
{ return Packet2cf(internal::pmul<Packet4f>(x, y.v)); }
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet2cf, Packet4f, false,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet4f& y, const Packet2cf& c) const
|
||||
{ return padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& x, const Packet4f& y) const
|
||||
{ return Packet2cf(internal::pmul<Packet4f>(x.v, y)); }
|
||||
};
|
||||
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
||||
{
|
||||
@@ -416,23 +400,8 @@ template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
|
||||
return pconj(internal::pmul(a, b));
|
||||
}
|
||||
};
|
||||
template<> struct conj_helper<Packet2d, Packet1cd, false,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet2d& x, const Packet1cd& y, const Packet1cd& c) const
|
||||
{ return padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet2d& x, const Packet1cd& y) const
|
||||
{ return Packet1cd(internal::pmul<Packet2d>(x, y.v)); }
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet1cd, Packet2d, false,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet2d& y, const Packet1cd& c) const
|
||||
{ return padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& x, const Packet2d& y) const
|
||||
{ return Packet1cd(internal::pmul<Packet2d>(x.v, y)); }
|
||||
};
|
||||
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
||||
{
|
||||
|
||||
@@ -84,8 +84,10 @@ static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
|
||||
|
||||
#ifdef __POWER8_VECTOR__
|
||||
static Packet2l p2l_1023 = { 1023, 1023 };
|
||||
static Packet2ul p2ul_52 = { 52, 52 };
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@@ -72,7 +72,7 @@ static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0); //{ 0, 0, 0, 0,}
|
||||
static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE,1); //{ 1, 1, 1, 1}
|
||||
static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS16,-16); //{ -16, -16, -16, -16}
|
||||
static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS1,-1); //{ -1, -1, -1, -1}
|
||||
static Packet4f p4f_ZERO_ = (Packet4f) vec_sl((Packet4ui)p4i_MINUS1, (Packet4ui)p4i_MINUS1); //{ 0x80000000, 0x80000000, 0x80000000, 0x80000000}
|
||||
static Packet4f p4f_MZERO = (Packet4f) vec_sl((Packet4ui)p4i_MINUS1, (Packet4ui)p4i_MINUS1); //{ 0x80000000, 0x80000000, 0x80000000, 0x80000000}
|
||||
#ifndef __VSX__
|
||||
static Packet4f p4f_ONE = vec_ctf(p4i_ONE, 0); //{ 1.0, 1.0, 1.0, 1.0}
|
||||
#endif
|
||||
@@ -90,7 +90,7 @@ static Packet16uc p16uc_DUPLICATE32_HI = { 0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7 };
|
||||
#define _EIGEN_MASK_ALIGNMENT 0xfffffff0
|
||||
#endif
|
||||
|
||||
#define _EIGEN_ALIGNED_PTR(x) ((ptrdiff_t)(x) & _EIGEN_MASK_ALIGNMENT)
|
||||
#define _EIGEN_ALIGNED_PTR(x) ((std::ptrdiff_t)(x) & _EIGEN_MASK_ALIGNMENT)
|
||||
|
||||
// Handle endianness properly while loading constants
|
||||
// Define global static constants:
|
||||
@@ -103,7 +103,7 @@ static Packet16uc p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4u
|
||||
static Packet16uc p16uc_PSET32_WEVEN = vec_sld(p16uc_DUPLICATE32_HI, (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 };
|
||||
static Packet16uc p16uc_HALF64_0_16 = vec_sld((Packet16uc)p4i_ZERO, vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 3), 8); //{ 0,0,0,0, 0,0,0,0, 16,16,16,16, 16,16,16,16};
|
||||
#else
|
||||
static Packet16uc p16uc_FORWARD = p16uc_REVERSE32;
|
||||
static Packet16uc p16uc_FORWARD = p16uc_REVERSE32;
|
||||
static Packet16uc p16uc_REVERSE64 = { 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
|
||||
static Packet16uc p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 1), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };
|
||||
static Packet16uc p16uc_PSET32_WEVEN = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 };
|
||||
@@ -358,7 +358,7 @@ template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return p4i_
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_madd(a,b, p4f_ZERO); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_madd(a,b, p4f_MZERO); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { return a * b; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
@@ -373,7 +373,7 @@ template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const
|
||||
t = vec_nmsub(y_0, b, p4f_ONE);
|
||||
y_1 = vec_madd(y_0, t, y_0);
|
||||
|
||||
return vec_madd(a, y_1, p4f_ZERO);
|
||||
return vec_madd(a, y_1, p4f_MZERO);
|
||||
#else
|
||||
return vec_div(a, b);
|
||||
#endif
|
||||
@@ -388,10 +388,28 @@ template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, co
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_madd(a,b,c); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return a*b + c; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_min(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
#ifdef __VSX__
|
||||
Packet4f ret;
|
||||
__asm__ ("xvcmpgesp %x0,%x1,%x2\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b));
|
||||
return ret;
|
||||
#else
|
||||
return vec_min(a, b);
|
||||
#endif
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_max(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
#ifdef __VSX__
|
||||
Packet4f ret;
|
||||
__asm__ ("xvcmpgtsp %x0,%x2,%x1\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b));
|
||||
return ret;
|
||||
#else
|
||||
return vec_max(a, b);
|
||||
#endif
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, b); }
|
||||
@@ -450,15 +468,15 @@ template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
|
||||
{
|
||||
Packet4f p;
|
||||
if((ptrdiff_t(from) % 16) == 0) p = pload<Packet4f>(from);
|
||||
else p = ploadu<Packet4f>(from);
|
||||
if((std::ptrdiff_t(from) % 16) == 0) p = pload<Packet4f>(from);
|
||||
else p = ploadu<Packet4f>(from);
|
||||
return vec_perm(p, p, p16uc_DUPLICATE32_HI);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
|
||||
{
|
||||
Packet4i p;
|
||||
if((ptrdiff_t(from) % 16) == 0) p = pload<Packet4i>(from);
|
||||
else p = ploadu<Packet4i>(from);
|
||||
if((std::ptrdiff_t(from) % 16) == 0) p = pload<Packet4i>(from);
|
||||
else p = ploadu<Packet4i>(from);
|
||||
return vec_perm(p, p, p16uc_DUPLICATE32_HI);
|
||||
}
|
||||
|
||||
@@ -764,9 +782,9 @@ typedef __vector __bool long Packet2bl;
|
||||
|
||||
static Packet2l p2l_ONE = { 1, 1 };
|
||||
static Packet2l p2l_ZERO = reinterpret_cast<Packet2l>(p4i_ZERO);
|
||||
static Packet2d p2d_ONE = { 1.0, 1.0 };
|
||||
static Packet2d p2d_ONE = { 1.0, 1.0 };
|
||||
static Packet2d p2d_ZERO = reinterpret_cast<Packet2d>(p4f_ZERO);
|
||||
static Packet2d p2d_ZERO_ = { -0.0, -0.0 };
|
||||
static Packet2d p2d_MZERO = { -0.0, -0.0 };
|
||||
|
||||
#ifdef _BIG_ENDIAN
|
||||
static Packet2d p2d_COUNTDOWN = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(p2d_ZERO), reinterpret_cast<Packet4f>(p2d_ONE), 8));
|
||||
@@ -904,15 +922,25 @@ template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return p2d_
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_madd(a,b,p2d_ZERO); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_madd(a,b,p2d_MZERO); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_div(a,b); }
|
||||
|
||||
// for some weird raisons, it has to be overloaded for packet of integers
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vec_madd(a, b, c); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_min(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b)
|
||||
{
|
||||
Packet2d ret;
|
||||
__asm__ ("xvcmpgedp %x0,%x1,%x2\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b));
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_max(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b)
|
||||
{
|
||||
Packet2d ret;
|
||||
__asm__ ("xvcmpgtdp %x0,%x2,%x1\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b));
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, b); }
|
||||
|
||||
@@ -935,8 +963,8 @@ template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from)
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
|
||||
{
|
||||
Packet2d p;
|
||||
if((ptrdiff_t(from) % 16) == 0) p = pload<Packet2d>(from);
|
||||
else p = ploadu<Packet2d>(from);
|
||||
if((std::ptrdiff_t(from) % 16) == 0) p = pload<Packet2d>(from);
|
||||
else p = ploadu<Packet2d>(from);
|
||||
return vec_splat_dbl<0>(p);
|
||||
}
|
||||
|
||||
@@ -969,7 +997,7 @@ template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
|
||||
Packet2d v[2], sum;
|
||||
v[0] = vecs[0] + reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(vecs[0]), reinterpret_cast<Packet4f>(vecs[0]), 8));
|
||||
v[1] = vecs[1] + reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(vecs[1]), reinterpret_cast<Packet4f>(vecs[1]), 8));
|
||||
|
||||
|
||||
#ifdef _BIG_ENDIAN
|
||||
sum = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(v[0]), reinterpret_cast<Packet4f>(v[1]), 8));
|
||||
#else
|
||||
@@ -1022,7 +1050,7 @@ ptranspose(PacketBlock<Packet2d,2>& kernel) {
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) {
|
||||
Packet2l select = { ifPacket.select[0], ifPacket.select[1] };
|
||||
Packet2bl mask = vec_cmpeq(reinterpret_cast<Packet2d>(select), reinterpret_cast<Packet2d>(p2l_ONE));
|
||||
Packet2bl mask = reinterpret_cast<Packet2bl>( vec_cmpeq(reinterpret_cast<Packet2d>(select), reinterpret_cast<Packet2d>(p2l_ONE)) );
|
||||
return vec_sel(elsePacket, thenPacket, mask);
|
||||
}
|
||||
#endif // __VSX__
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted.
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
@@ -29,7 +29,7 @@
|
||||
// type Eigen::half (inheriting from CUDA's __half struct) with
|
||||
// operator overloads such that it behaves basically as an arithmetic
|
||||
// type. It will be quite slow on CPUs (so it is recommended to stay
|
||||
// in fp32 for CPUs, except for simple parameter conversions, I/O
|
||||
// in float32_bits for CPUs, except for simple parameter conversions, I/O
|
||||
// to disk and the likes), but fast on GPUs.
|
||||
|
||||
|
||||
@@ -50,38 +50,45 @@ struct half;
|
||||
namespace half_impl {
|
||||
|
||||
#if !defined(EIGEN_HAS_CUDA_FP16)
|
||||
|
||||
// Make our own __half definition that is similar to CUDA's.
|
||||
struct __half {
|
||||
EIGEN_DEVICE_FUNC __half() {}
|
||||
explicit EIGEN_DEVICE_FUNC __half(unsigned short raw) : x(raw) {}
|
||||
// Make our own __half_raw definition that is similar to CUDA's.
|
||||
struct __half_raw {
|
||||
EIGEN_DEVICE_FUNC __half_raw() : x(0) {}
|
||||
explicit EIGEN_DEVICE_FUNC __half_raw(unsigned short raw) : x(raw) {}
|
||||
unsigned short x;
|
||||
};
|
||||
|
||||
#elif defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER < 90000
|
||||
// In CUDA < 9.0, __half is the equivalent of CUDA 9's __half_raw
|
||||
typedef __half __half_raw;
|
||||
#endif
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half raw_uint16_to_half(unsigned short x);
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff);
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half h);
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw raw_uint16_to_half(unsigned short x);
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw float_to_half_rtne(float ff);
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half_raw h);
|
||||
|
||||
struct half_base : public __half {
|
||||
struct half_base : public __half_raw {
|
||||
EIGEN_DEVICE_FUNC half_base() {}
|
||||
EIGEN_DEVICE_FUNC half_base(const half_base& h) : __half(h) {}
|
||||
EIGEN_DEVICE_FUNC half_base(const __half& h) : __half(h) {}
|
||||
EIGEN_DEVICE_FUNC half_base(const half_base& h) : __half_raw(h) {}
|
||||
EIGEN_DEVICE_FUNC half_base(const __half_raw& h) : __half_raw(h) {}
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER >= 90000
|
||||
EIGEN_DEVICE_FUNC half_base(const __half& h) : __half_raw(*(__half_raw*)&h) {}
|
||||
#endif
|
||||
};
|
||||
|
||||
} // namespace half_impl
|
||||
|
||||
// Class definition.
|
||||
struct half : public half_impl::half_base {
|
||||
#if !defined(EIGEN_HAS_CUDA_FP16)
|
||||
typedef half_impl::__half __half;
|
||||
#if !defined(EIGEN_HAS_CUDA_FP16) || (defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER < 90000)
|
||||
typedef half_impl::__half_raw __half_raw;
|
||||
#endif
|
||||
|
||||
EIGEN_DEVICE_FUNC half() {}
|
||||
|
||||
EIGEN_DEVICE_FUNC half(const __half& h) : half_impl::half_base(h) {}
|
||||
EIGEN_DEVICE_FUNC half(const __half_raw& h) : half_impl::half_base(h) {}
|
||||
EIGEN_DEVICE_FUNC half(const half& h) : half_impl::half_base(h) {}
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER >= 90000
|
||||
EIGEN_DEVICE_FUNC half(const __half& h) : half_impl::half_base(h) {}
|
||||
#endif
|
||||
|
||||
explicit EIGEN_DEVICE_FUNC half(bool b)
|
||||
: half_impl::half_base(half_impl::raw_uint16_to_half(b ? 0x3c00 : 0)) {}
|
||||
@@ -138,71 +145,125 @@ struct half : public half_impl::half_base {
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
namespace std {
|
||||
template<>
|
||||
struct numeric_limits<Eigen::half> {
|
||||
static const bool is_specialized = true;
|
||||
static const bool is_signed = true;
|
||||
static const bool is_integer = false;
|
||||
static const bool is_exact = false;
|
||||
static const bool has_infinity = true;
|
||||
static const bool has_quiet_NaN = true;
|
||||
static const bool has_signaling_NaN = true;
|
||||
static const float_denorm_style has_denorm = denorm_present;
|
||||
static const bool has_denorm_loss = false;
|
||||
static const std::float_round_style round_style = std::round_to_nearest;
|
||||
static const bool is_iec559 = false;
|
||||
static const bool is_bounded = false;
|
||||
static const bool is_modulo = false;
|
||||
static const int digits = 11;
|
||||
static const int digits10 = 3; // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html
|
||||
static const int max_digits10 = 5; // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html
|
||||
static const int radix = 2;
|
||||
static const int min_exponent = -13;
|
||||
static const int min_exponent10 = -4;
|
||||
static const int max_exponent = 16;
|
||||
static const int max_exponent10 = 4;
|
||||
static const bool traps = true;
|
||||
static const bool tinyness_before = false;
|
||||
|
||||
static Eigen::half (min)() { return Eigen::half_impl::raw_uint16_to_half(0x400); }
|
||||
static Eigen::half lowest() { return Eigen::half_impl::raw_uint16_to_half(0xfbff); }
|
||||
static Eigen::half (max)() { return Eigen::half_impl::raw_uint16_to_half(0x7bff); }
|
||||
static Eigen::half epsilon() { return Eigen::half_impl::raw_uint16_to_half(0x0800); }
|
||||
static Eigen::half round_error() { return Eigen::half(0.5); }
|
||||
static Eigen::half infinity() { return Eigen::half_impl::raw_uint16_to_half(0x7c00); }
|
||||
static Eigen::half quiet_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); }
|
||||
static Eigen::half signaling_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); }
|
||||
static Eigen::half denorm_min() { return Eigen::half_impl::raw_uint16_to_half(0x1); }
|
||||
};
|
||||
|
||||
// If std::numeric_limits<T> is specialized, should also specialize
|
||||
// std::numeric_limits<const T>, std::numeric_limits<volatile T>, and
|
||||
// std::numeric_limits<const volatile T>
|
||||
// https://stackoverflow.com/a/16519653/
|
||||
template<>
|
||||
struct numeric_limits<const Eigen::half> : numeric_limits<Eigen::half> {};
|
||||
template<>
|
||||
struct numeric_limits<volatile Eigen::half> : numeric_limits<Eigen::half> {};
|
||||
template<>
|
||||
struct numeric_limits<const volatile Eigen::half> : numeric_limits<Eigen::half> {};
|
||||
} // end namespace std
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace half_impl {
|
||||
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
|
||||
|
||||
// Intrinsics for native fp16 support. Note that on current hardware,
|
||||
// these are no faster than fp32 arithmetic (you need to use the half2
|
||||
// these are no faster than float32_bits arithmetic (you need to use the half2
|
||||
// versions to get the ALU speed increased), but you do save the
|
||||
// conversion steps back and forth.
|
||||
|
||||
__device__ half operator + (const half& a, const half& b) {
|
||||
EIGEN_STRONG_INLINE __device__ half operator + (const half& a, const half& b) {
|
||||
return __hadd(a, b);
|
||||
}
|
||||
__device__ half operator * (const half& a, const half& b) {
|
||||
EIGEN_STRONG_INLINE __device__ half operator * (const half& a, const half& b) {
|
||||
return __hmul(a, b);
|
||||
}
|
||||
__device__ half operator - (const half& a, const half& b) {
|
||||
EIGEN_STRONG_INLINE __device__ half operator - (const half& a, const half& b) {
|
||||
return __hsub(a, b);
|
||||
}
|
||||
__device__ half operator / (const half& a, const half& b) {
|
||||
EIGEN_STRONG_INLINE __device__ half operator / (const half& a, const half& b) {
|
||||
float num = __half2float(a);
|
||||
float denom = __half2float(b);
|
||||
return __float2half(num / denom);
|
||||
}
|
||||
__device__ half operator - (const half& a) {
|
||||
EIGEN_STRONG_INLINE __device__ half operator - (const half& a) {
|
||||
return __hneg(a);
|
||||
}
|
||||
__device__ half& operator += (half& a, const half& b) {
|
||||
EIGEN_STRONG_INLINE __device__ half& operator += (half& a, const half& b) {
|
||||
a = a + b;
|
||||
return a;
|
||||
}
|
||||
__device__ half& operator *= (half& a, const half& b) {
|
||||
EIGEN_STRONG_INLINE __device__ half& operator *= (half& a, const half& b) {
|
||||
a = a * b;
|
||||
return a;
|
||||
}
|
||||
__device__ half& operator -= (half& a, const half& b) {
|
||||
EIGEN_STRONG_INLINE __device__ half& operator -= (half& a, const half& b) {
|
||||
a = a - b;
|
||||
return a;
|
||||
}
|
||||
__device__ half& operator /= (half& a, const half& b) {
|
||||
EIGEN_STRONG_INLINE __device__ half& operator /= (half& a, const half& b) {
|
||||
a = a / b;
|
||||
return a;
|
||||
}
|
||||
__device__ bool operator == (const half& a, const half& b) {
|
||||
EIGEN_STRONG_INLINE __device__ bool operator == (const half& a, const half& b) {
|
||||
return __heq(a, b);
|
||||
}
|
||||
__device__ bool operator != (const half& a, const half& b) {
|
||||
EIGEN_STRONG_INLINE __device__ bool operator != (const half& a, const half& b) {
|
||||
return __hne(a, b);
|
||||
}
|
||||
__device__ bool operator < (const half& a, const half& b) {
|
||||
EIGEN_STRONG_INLINE __device__ bool operator < (const half& a, const half& b) {
|
||||
return __hlt(a, b);
|
||||
}
|
||||
__device__ bool operator <= (const half& a, const half& b) {
|
||||
EIGEN_STRONG_INLINE __device__ bool operator <= (const half& a, const half& b) {
|
||||
return __hle(a, b);
|
||||
}
|
||||
__device__ bool operator > (const half& a, const half& b) {
|
||||
EIGEN_STRONG_INLINE __device__ bool operator > (const half& a, const half& b) {
|
||||
return __hgt(a, b);
|
||||
}
|
||||
__device__ bool operator >= (const half& a, const half& b) {
|
||||
EIGEN_STRONG_INLINE __device__ bool operator >= (const half& a, const half& b) {
|
||||
return __hge(a, b);
|
||||
}
|
||||
|
||||
#else // Emulate support for half floats
|
||||
|
||||
// Definitions for CPUs and older CUDA, mostly working through conversion
|
||||
// to/from fp32.
|
||||
// to/from float32_bits.
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator + (const half& a, const half& b) {
|
||||
return half(float(a) + float(b));
|
||||
@@ -238,10 +299,10 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator /= (half& a, const half& b)
|
||||
return a;
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator == (const half& a, const half& b) {
|
||||
return float(a) == float(b);
|
||||
return numext::equal_strict(float(a),float(b));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator != (const half& a, const half& b) {
|
||||
return float(a) != float(b);
|
||||
return numext::not_equal_strict(float(a), float(b));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator < (const half& a, const half& b) {
|
||||
return float(a) < float(b);
|
||||
@@ -269,34 +330,35 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator / (const half& a, Index b) {
|
||||
// these in hardware. If we need more performance on older/other CPUs, they are
|
||||
// also possible to vectorize directly.
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half raw_uint16_to_half(unsigned short x) {
|
||||
__half h;
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw raw_uint16_to_half(unsigned short x) {
|
||||
__half_raw h;
|
||||
h.x = x;
|
||||
return h;
|
||||
}
|
||||
|
||||
union FP32 {
|
||||
union float32_bits {
|
||||
unsigned int u;
|
||||
float f;
|
||||
};
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff) {
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
||||
return __float2half(ff);
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw float_to_half_rtne(float ff) {
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300
|
||||
__half tmp_ff = __float2half(ff);
|
||||
return *(__half_raw*)&tmp_ff;
|
||||
|
||||
#elif defined(EIGEN_HAS_FP16_C)
|
||||
__half h;
|
||||
__half_raw h;
|
||||
h.x = _cvtss_sh(ff, 0);
|
||||
return h;
|
||||
|
||||
#else
|
||||
FP32 f; f.f = ff;
|
||||
float32_bits f; f.f = ff;
|
||||
|
||||
const FP32 f32infty = { 255 << 23 };
|
||||
const FP32 f16max = { (127 + 16) << 23 };
|
||||
const FP32 denorm_magic = { ((127 - 15) + (23 - 10) + 1) << 23 };
|
||||
const float32_bits f32infty = { 255 << 23 };
|
||||
const float32_bits f16max = { (127 + 16) << 23 };
|
||||
const float32_bits denorm_magic = { ((127 - 15) + (23 - 10) + 1) << 23 };
|
||||
unsigned int sign_mask = 0x80000000u;
|
||||
__half o;
|
||||
__half_raw o;
|
||||
o.x = static_cast<unsigned short>(0x0u);
|
||||
|
||||
unsigned int sign = f.u & sign_mask;
|
||||
@@ -335,17 +397,17 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff) {
|
||||
#endif
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half h) {
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half_raw h) {
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300
|
||||
return __half2float(h);
|
||||
|
||||
#elif defined(EIGEN_HAS_FP16_C)
|
||||
return _cvtsh_ss(h.x);
|
||||
|
||||
#else
|
||||
const FP32 magic = { 113 << 23 };
|
||||
const float32_bits magic = { 113 << 23 };
|
||||
const unsigned int shifted_exp = 0x7c00 << 13; // exponent mask after shift
|
||||
FP32 o;
|
||||
float32_bits o;
|
||||
|
||||
o.u = (h.x & 0x7fff) << 13; // exponent/mantissa bits
|
||||
unsigned int exp = shifted_exp & o.u; // just the exponent
|
||||
@@ -370,7 +432,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isinf)(const half& a) {
|
||||
return (a.x & 0x7fff) == 0x7c00;
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isnan)(const half& a) {
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
|
||||
return __hisnan(a);
|
||||
#else
|
||||
return (a.x & 0x7fff) > 0x7c00;
|
||||
@@ -386,11 +448,15 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half abs(const half& a) {
|
||||
return result;
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half exp(const half& a) {
|
||||
return half(::expf(float(a)));
|
||||
#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
|
||||
return half(hexp(a));
|
||||
#else
|
||||
return half(::expf(float(a)));
|
||||
#endif
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log(const half& a) {
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
|
||||
return Eigen::half(::hlog(a));
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && EIGEN_CUDACC_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
|
||||
return half(::hlog(a));
|
||||
#else
|
||||
return half(::logf(float(a)));
|
||||
#endif
|
||||
@@ -402,7 +468,11 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log10(const half& a) {
|
||||
return half(::log10f(float(a)));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sqrt(const half& a) {
|
||||
return half(::sqrtf(float(a)));
|
||||
#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
|
||||
return half(hsqrt(a));
|
||||
#else
|
||||
return half(::sqrtf(float(a)));
|
||||
#endif
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half pow(const half& a, const half& b) {
|
||||
return half(::powf(float(a), float(b)));
|
||||
@@ -420,14 +490,22 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tanh(const half& a) {
|
||||
return half(::tanhf(float(a)));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half floor(const half& a) {
|
||||
#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300
|
||||
return half(hfloor(a));
|
||||
#else
|
||||
return half(::floorf(float(a)));
|
||||
#endif
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half ceil(const half& a) {
|
||||
#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300
|
||||
return half(hceil(a));
|
||||
#else
|
||||
return half(::ceilf(float(a)));
|
||||
#endif
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (min)(const half& a, const half& b) {
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
|
||||
return __hlt(b, a) ? b : a;
|
||||
#else
|
||||
const float f1 = static_cast<float>(a);
|
||||
@@ -436,7 +514,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (min)(const half& a, const half& b) {
|
||||
#endif
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (max)(const half& a, const half& b) {
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
|
||||
return __hlt(a, b) ? b : a;
|
||||
#else
|
||||
const float f1 = static_cast<float>(a);
|
||||
@@ -477,6 +555,13 @@ template<> struct is_arithmetic<half> { enum { value = true }; };
|
||||
template<> struct NumTraits<Eigen::half>
|
||||
: GenericNumTraits<Eigen::half>
|
||||
{
|
||||
enum {
|
||||
IsSigned = true,
|
||||
IsInteger = false,
|
||||
IsComplex = false,
|
||||
RequireInitialization = false
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half epsilon() {
|
||||
return half_impl::raw_uint16_to_half(0x0800);
|
||||
}
|
||||
@@ -507,7 +592,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half exph(const Eigen::half& a) {
|
||||
return Eigen::half(::expf(float(a)));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half logh(const Eigen::half& a) {
|
||||
#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
|
||||
#if EIGEN_CUDACC_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
|
||||
return Eigen::half(::hlog(a));
|
||||
#else
|
||||
return Eigen::half(::logf(float(a)));
|
||||
@@ -541,14 +626,18 @@ struct hash<Eigen::half> {
|
||||
|
||||
|
||||
// Add the missing shfl_xor intrinsic
|
||||
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
||||
#if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300
|
||||
__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_xor(Eigen::half var, int laneMask, int width=warpSize) {
|
||||
#if EIGEN_CUDACC_VER < 90000
|
||||
return static_cast<Eigen::half>(__shfl_xor(static_cast<float>(var), laneMask, width));
|
||||
#else
|
||||
return static_cast<Eigen::half>(__shfl_xor_sync(0xFFFFFFFF, static_cast<float>(var), laneMask, width));
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
// ldg() has an overload for __half, but we also need one for Eigen::half.
|
||||
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
|
||||
// ldg() has an overload for __half_raw, but we also need one for Eigen::half.
|
||||
#if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 350
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half __ldg(const Eigen::half* ptr) {
|
||||
return Eigen::half_impl::raw_uint16_to_half(
|
||||
__ldg(reinterpret_cast<const unsigned short*>(ptr)));
|
||||
@@ -556,7 +645,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half __ldg(const Eigen::half* ptr)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__CUDA_ARCH__)
|
||||
#if defined(EIGEN_CUDA_ARCH)
|
||||
namespace Eigen {
|
||||
namespace numext {
|
||||
|
||||
|
||||
@@ -291,7 +291,7 @@ template<> EIGEN_DEVICE_FUNC inline double2 pabs<double2>(const double2& a) {
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<float4,4>& kernel) {
|
||||
double tmp = kernel.packet[0].y;
|
||||
float tmp = kernel.packet[0].y;
|
||||
kernel.packet[0].y = kernel.packet[1].x;
|
||||
kernel.packet[1].x = tmp;
|
||||
|
||||
|
||||
@@ -99,7 +99,8 @@ template<> __device__ EIGEN_STRONG_INLINE Eigen::half pfirst<half2>(const half2&
|
||||
|
||||
template<> __device__ EIGEN_STRONG_INLINE half2 pabs<half2>(const half2& a) {
|
||||
half2 result;
|
||||
result.x = a.x & 0x7FFF7FFF;
|
||||
unsigned temp = *(reinterpret_cast<const unsigned*>(&(a)));
|
||||
*(reinterpret_cast<unsigned*>(&(result))) = temp & 0x7FFF7FFF;
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -275,7 +276,7 @@ template<> __device__ EIGEN_STRONG_INLINE half2 plog1p<half2>(const half2& a) {
|
||||
return __floats2half2_rn(r1, r2);
|
||||
}
|
||||
|
||||
#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530
|
||||
#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
|
||||
|
||||
template<> __device__ EIGEN_STRONG_INLINE
|
||||
half2 plog<half2>(const half2& a) {
|
||||
|
||||
29
Eigen/src/Core/arch/Default/ConjHelper.h
Normal file
29
Eigen/src/Core/arch/Default/ConjHelper.h
Normal file
@@ -0,0 +1,29 @@
|
||||
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_ARCH_CONJ_HELPER_H
|
||||
#define EIGEN_ARCH_CONJ_HELPER_H
|
||||
|
||||
#define EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(PACKET_CPLX, PACKET_REAL) \
|
||||
template<> struct conj_helper<PACKET_REAL, PACKET_CPLX, false,false> { \
|
||||
EIGEN_STRONG_INLINE PACKET_CPLX pmadd(const PACKET_REAL& x, const PACKET_CPLX& y, const PACKET_CPLX& c) const \
|
||||
{ return padd(c, pmul(x,y)); } \
|
||||
EIGEN_STRONG_INLINE PACKET_CPLX pmul(const PACKET_REAL& x, const PACKET_CPLX& y) const \
|
||||
{ return PACKET_CPLX(Eigen::internal::pmul<PACKET_REAL>(x, y.v)); } \
|
||||
}; \
|
||||
\
|
||||
template<> struct conj_helper<PACKET_CPLX, PACKET_REAL, false,false> { \
|
||||
EIGEN_STRONG_INLINE PACKET_CPLX pmadd(const PACKET_CPLX& x, const PACKET_REAL& y, const PACKET_CPLX& c) const \
|
||||
{ return padd(c, pmul(x,y)); } \
|
||||
EIGEN_STRONG_INLINE PACKET_CPLX pmul(const PACKET_CPLX& x, const PACKET_REAL& y) const \
|
||||
{ return PACKET_CPLX(Eigen::internal::pmul<PACKET_REAL>(x.v, y)); } \
|
||||
};
|
||||
|
||||
#endif // EIGEN_ARCH_CONJ_HELPER_H
|
||||
@@ -67,7 +67,7 @@ template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type;
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
|
||||
{
|
||||
float32x2_t r64;
|
||||
r64 = vld1_f32((float *)&from);
|
||||
r64 = vld1_f32((const float *)&from);
|
||||
|
||||
return Packet2cf(vcombine_f32(r64, r64));
|
||||
}
|
||||
@@ -142,7 +142,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf
|
||||
to[stride*1] = std::complex<float>(vgetq_lane_f32(from.v, 2), vgetq_lane_f32(from.v, 3));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_ARM_PREFETCH((float *)addr); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_ARM_PREFETCH((const float *)addr); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
|
||||
{
|
||||
@@ -265,6 +265,8 @@ template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
|
||||
}
|
||||
};
|
||||
|
||||
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
||||
{
|
||||
// TODO optimize it for NEON
|
||||
@@ -275,7 +277,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, con
|
||||
s = vmulq_f32(b.v, b.v);
|
||||
rev_s = vrev64q_f32(s);
|
||||
|
||||
return Packet2cf(pdiv(res.v, vaddq_f32(s,rev_s)));
|
||||
return Packet2cf(pdiv<Packet4f>(res.v, vaddq_f32(s,rev_s)));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
@@ -381,7 +383,7 @@ template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<
|
||||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { EIGEN_ARM_PREFETCH((double *)addr); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { EIGEN_ARM_PREFETCH((const double *)addr); }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride)
|
||||
{
|
||||
@@ -456,6 +458,8 @@ template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
|
||||
}
|
||||
};
|
||||
|
||||
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
||||
{
|
||||
// TODO optimize it for NEON
|
||||
|
||||
@@ -28,11 +28,42 @@ namespace internal {
|
||||
#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
|
||||
#endif
|
||||
|
||||
// FIXME NEON has 16 quad registers, but since the current register allocator
|
||||
// is so bad, it is much better to reduce it to 8
|
||||
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
|
||||
#if EIGEN_ARCH_ARM64
|
||||
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
|
||||
#else
|
||||
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 16
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if EIGEN_COMP_MSVC
|
||||
|
||||
// In MSVC's arm_neon.h header file, all NEON vector types
|
||||
// are aliases to the same underlying type __n128.
|
||||
// We thus have to wrap them to make them different C++ types.
|
||||
// (See also bug 1428)
|
||||
|
||||
template<typename T,int unique_id>
|
||||
struct eigen_packet_wrapper
|
||||
{
|
||||
operator T&() { return m_val; }
|
||||
operator const T&() const { return m_val; }
|
||||
eigen_packet_wrapper() {}
|
||||
eigen_packet_wrapper(const T &v) : m_val(v) {}
|
||||
eigen_packet_wrapper& operator=(const T &v) {
|
||||
m_val = v;
|
||||
return *this;
|
||||
}
|
||||
|
||||
T m_val;
|
||||
};
|
||||
typedef eigen_packet_wrapper<float32x2_t,0> Packet2f;
|
||||
typedef eigen_packet_wrapper<float32x4_t,1> Packet4f;
|
||||
typedef eigen_packet_wrapper<int32x4_t ,2> Packet4i;
|
||||
typedef eigen_packet_wrapper<int32x2_t ,3> Packet2i;
|
||||
typedef eigen_packet_wrapper<uint32x4_t ,4> Packet4ui;
|
||||
|
||||
#else
|
||||
|
||||
typedef float32x2_t Packet2f;
|
||||
typedef float32x4_t Packet4f;
|
||||
@@ -40,23 +71,28 @@ typedef int32x4_t Packet4i;
|
||||
typedef int32x2_t Packet2i;
|
||||
typedef uint32x4_t Packet4ui;
|
||||
|
||||
#endif // EIGEN_COMP_MSVC
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
|
||||
const Packet4f p4f_##NAME = pset1<Packet4f>(X)
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
|
||||
const Packet4f p4f_##NAME = vreinterpretq_f32_u32(pset1<int>(X))
|
||||
const Packet4f p4f_##NAME = vreinterpretq_f32_u32(pset1<int32_t>(X))
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
|
||||
const Packet4i p4i_##NAME = pset1<Packet4i>(X)
|
||||
|
||||
// arm64 does have the pld instruction. If available, let's trust the __builtin_prefetch built-in function
|
||||
// which available on LLVM and GCC (at least)
|
||||
#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC
|
||||
#if EIGEN_ARCH_ARM64
|
||||
// __builtin_prefetch tends to do nothing on ARM64 compilers because the
|
||||
// prefetch instructions there are too detailed for __builtin_prefetch to map
|
||||
// meaningfully to them.
|
||||
#define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__("prfm pldl1keep, [%[addr]]\n" ::[addr] "r"(ADDR) : );
|
||||
#elif EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC
|
||||
#define EIGEN_ARM_PREFETCH(ADDR) __builtin_prefetch(ADDR);
|
||||
#elif defined __pld
|
||||
#define EIGEN_ARM_PREFETCH(ADDR) __pld(ADDR)
|
||||
#elif !EIGEN_ARCH_ARM64
|
||||
#define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__ ( " pld [%[addr]]\n" :: [addr] "r" (ADDR) : "cc" );
|
||||
#elif EIGEN_ARCH_ARM32
|
||||
#define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__ ("pld [%[addr]]\n" :: [addr] "r" (ADDR) : );
|
||||
#else
|
||||
// by default no explicit prefetching
|
||||
#define EIGEN_ARM_PREFETCH(ADDR)
|
||||
@@ -81,7 +117,7 @@ template<> struct packet_traits<float> : default_packet_traits
|
||||
HasSqrt = 0
|
||||
};
|
||||
};
|
||||
template<> struct packet_traits<int> : default_packet_traits
|
||||
template<> struct packet_traits<int32_t> : default_packet_traits
|
||||
{
|
||||
typedef Packet4i type;
|
||||
typedef Packet4i half; // Packet2i intrinsics not implemented yet
|
||||
@@ -103,19 +139,19 @@ EIGEN_STRONG_INLINE void vst1q_f32(float* to, float32x4_t from) { ::vst1q
|
||||
EIGEN_STRONG_INLINE void vst1_f32 (float* to, float32x2_t from) { ::vst1_f32 ((float32_t*)to,from); }
|
||||
#endif
|
||||
|
||||
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; };
|
||||
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
|
||||
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; };
|
||||
template<> struct unpacket_traits<Packet4i> { typedef int32_t type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return vdupq_n_f32(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return vdupq_n_s32(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int32_t& from) { return vdupq_n_s32(from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a)
|
||||
{
|
||||
const float32_t f[] = {0, 1, 2, 3};
|
||||
const float f[] = {0, 1, 2, 3};
|
||||
Packet4f countdown = vld1q_f32(f);
|
||||
return vaddq_f32(pset1<Packet4f>(a), countdown);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a)
|
||||
template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int32_t& a)
|
||||
{
|
||||
const int32_t i[] = {0, 1, 2, 3};
|
||||
Packet4i countdown = vld1q_s32(i);
|
||||
@@ -238,20 +274,20 @@ template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, con
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vbicq_s32(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int32_t* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_f32(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_s32(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_f32(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int32_t* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_s32(from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
|
||||
{
|
||||
float32x2_t lo, hi;
|
||||
lo = vld1_dup_f32(from);
|
||||
hi = vld1_dup_f32(from+1);
|
||||
return vcombine_f32(lo, hi);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int32_t* from)
|
||||
{
|
||||
int32x2_t lo, hi;
|
||||
lo = vld1_dup_s32(from);
|
||||
@@ -259,11 +295,11 @@ template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
|
||||
return vcombine_s32(lo, hi);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_f32(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_s32(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstore<float> (float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_f32(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstore<int32_t>(int32_t* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_s32(to, from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f32(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_s32(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<float> (float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f32(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<int32_t>(int32_t* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_s32(to, from); }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)
|
||||
{
|
||||
@@ -274,7 +310,7 @@ template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const floa
|
||||
res = vsetq_lane_f32(from[3*stride], res, 3);
|
||||
return res;
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride)
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int32_t, Packet4i>(const int32_t* from, Index stride)
|
||||
{
|
||||
Packet4i res = pset1<Packet4i>(0);
|
||||
res = vsetq_lane_s32(from[0*stride], res, 0);
|
||||
@@ -291,7 +327,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, co
|
||||
to[stride*2] = vgetq_lane_f32(from, 2);
|
||||
to[stride*3] = vgetq_lane_f32(from, 3);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, Index stride)
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<int32_t, Packet4i>(int32_t* to, const Packet4i& from, Index stride)
|
||||
{
|
||||
to[stride*0] = vgetq_lane_s32(from, 0);
|
||||
to[stride*1] = vgetq_lane_s32(from, 1);
|
||||
@@ -299,12 +335,12 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const
|
||||
to[stride*3] = vgetq_lane_s32(from, 3);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { EIGEN_ARM_PREFETCH(addr); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { EIGEN_ARM_PREFETCH(addr); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<float> (const float* addr) { EIGEN_ARM_PREFETCH(addr); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<int32_t>(const int32_t* addr) { EIGEN_ARM_PREFETCH(addr); }
|
||||
|
||||
// FIXME only store the 2 first elements ?
|
||||
template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vst1q_f32(x, a); return x[0]; }
|
||||
template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; vst1q_s32(x, a); return x[0]; }
|
||||
template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vst1q_f32(x, a); return x[0]; }
|
||||
template<> EIGEN_STRONG_INLINE int32_t pfirst<Packet4i>(const Packet4i& a) { int32_t EIGEN_ALIGN16 x[4]; vst1q_s32(x, a); return x[0]; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) {
|
||||
float32x2_t a_lo, a_hi;
|
||||
@@ -359,7 +395,7 @@ template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
|
||||
return sum;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
|
||||
template<> EIGEN_STRONG_INLINE int32_t predux<Packet4i>(const Packet4i& a)
|
||||
{
|
||||
int32x2_t a_lo, a_hi, sum;
|
||||
|
||||
@@ -406,7 +442,7 @@ template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
|
||||
|
||||
return vget_lane_f32(prod, 0);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
|
||||
template<> EIGEN_STRONG_INLINE int32_t predux_mul<Packet4i>(const Packet4i& a)
|
||||
{
|
||||
int32x2_t a_lo, a_hi, prod;
|
||||
|
||||
@@ -434,7 +470,7 @@ template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
|
||||
return vget_lane_f32(min, 0);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
|
||||
template<> EIGEN_STRONG_INLINE int32_t predux_min<Packet4i>(const Packet4i& a)
|
||||
{
|
||||
int32x2_t a_lo, a_hi, min;
|
||||
|
||||
@@ -459,7 +495,7 @@ template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
|
||||
return vget_lane_f32(max, 0);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
|
||||
template<> EIGEN_STRONG_INLINE int32_t predux_max<Packet4i>(const Packet4i& a)
|
||||
{
|
||||
int32x2_t a_lo, a_hi, max;
|
||||
|
||||
|
||||
@@ -128,7 +128,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf
|
||||
_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 3)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
|
||||
{
|
||||
@@ -229,23 +229,7 @@ template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet4f, Packet2cf, false,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet4f& x, const Packet2cf& y, const Packet2cf& c) const
|
||||
{ return padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet4f& x, const Packet2cf& y) const
|
||||
{ return Packet2cf(Eigen::internal::pmul<Packet4f>(x, y.v)); }
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet2cf, Packet4f, false,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet4f& y, const Packet2cf& c) const
|
||||
{ return padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& x, const Packet4f& y) const
|
||||
{ return Packet2cf(Eigen::internal::pmul<Packet4f>(x.v, y)); }
|
||||
};
|
||||
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
||||
{
|
||||
@@ -340,7 +324,7 @@ template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<
|
||||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, Packet2d(from.v)); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, Packet2d(from.v)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
|
||||
{
|
||||
@@ -430,23 +414,7 @@ template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet2d, Packet1cd, false,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet2d& x, const Packet1cd& y, const Packet1cd& c) const
|
||||
{ return padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet2d& x, const Packet1cd& y) const
|
||||
{ return Packet1cd(Eigen::internal::pmul<Packet2d>(x, y.v)); }
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet1cd, Packet2d, false,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet2d& y, const Packet1cd& c) const
|
||||
{ return padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& x, const Packet2d& y) const
|
||||
{ return Packet1cd(Eigen::internal::pmul<Packet2d>(x.v, y)); }
|
||||
};
|
||||
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
||||
{
|
||||
|
||||
@@ -28,7 +28,7 @@ namespace internal {
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if (defined EIGEN_VECTORIZE_AVX) && EIGEN_COMP_GNUC_STRICT && (__GXX_ABI_VERSION < 1004)
|
||||
#if ((defined EIGEN_VECTORIZE_AVX) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_MINGW) && (__GXX_ABI_VERSION < 1004)) || EIGEN_OS_QNX
|
||||
// With GCC's default ABI version, a __m128 or __m256 are the same types and therefore we cannot
|
||||
// have overloads for both types without linking error.
|
||||
// One solution is to increase ABI version using -fabi-version=4 (or greater).
|
||||
@@ -409,10 +409,16 @@ template<> EIGEN_STRONG_INLINE void pstore1<Packet2d>(double* to, const double&
|
||||
pstore(to, Packet2d(vec2d_swizzle1(pa,0,0)));
|
||||
}
|
||||
|
||||
#if EIGEN_COMP_PGI
|
||||
typedef const void * SsePrefetchPtrType;
|
||||
#else
|
||||
typedef const char * SsePrefetchPtrType;
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_VECTORIZE_AVX
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
|
||||
#endif
|
||||
|
||||
#if EIGEN_COMP_MSVC_STRICT && EIGEN_OS_WIN64
|
||||
@@ -504,30 +510,13 @@ template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
|
||||
{
|
||||
return _mm_hadd_ps(_mm_hadd_ps(vecs[0], vecs[1]),_mm_hadd_ps(vecs[2], vecs[3]));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
|
||||
{
|
||||
return _mm_hadd_pd(vecs[0], vecs[1]);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet4f tmp0 = _mm_hadd_ps(a,a);
|
||||
return pfirst<Packet4f>(_mm_hadd_ps(tmp0, tmp0));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) { return pfirst<Packet2d>(_mm_hadd_pd(a, a)); }
|
||||
#else
|
||||
// SSE2 versions
|
||||
template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet4f tmp = _mm_add_ps(a, _mm_movehl_ps(a,a));
|
||||
return pfirst<Packet4f>(_mm_add_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
|
||||
{
|
||||
return pfirst<Packet2d>(_mm_add_sd(a, _mm_unpackhi_pd(a,a)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
|
||||
{
|
||||
Packet4f tmp0, tmp1, tmp2;
|
||||
@@ -548,6 +537,29 @@ template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
|
||||
}
|
||||
#endif // SSE3
|
||||
|
||||
template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
// Disable SSE3 _mm_hadd_pd that is extremely slow on all existing Intel's architectures
|
||||
// (from Nehalem to Haswell)
|
||||
// #ifdef EIGEN_VECTORIZE_SSE3
|
||||
// Packet4f tmp = _mm_add_ps(a, vec4f_swizzle1(a,2,3,2,3));
|
||||
// return pfirst<Packet4f>(_mm_hadd_ps(tmp, tmp));
|
||||
// #else
|
||||
Packet4f tmp = _mm_add_ps(a, _mm_movehl_ps(a,a));
|
||||
return pfirst<Packet4f>(_mm_add_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
|
||||
// #endif
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
|
||||
{
|
||||
// Disable SSE3 _mm_hadd_pd that is extremely slow on all existing Intel's architectures
|
||||
// (from Nehalem to Haswell)
|
||||
// #ifdef EIGEN_VECTORIZE_SSE3
|
||||
// return pfirst<Packet2d>(_mm_hadd_pd(a, a));
|
||||
// #else
|
||||
return pfirst<Packet2d>(_mm_add_sd(a, _mm_unpackhi_pd(a,a)));
|
||||
// #endif
|
||||
}
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_SSSE3
|
||||
template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
|
||||
@@ -870,4 +882,14 @@ template<> EIGEN_STRONG_INLINE double pmadd(const double& a, const double& b, co
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#if EIGEN_COMP_PGI
|
||||
// PGI++ does not define the following intrinsics in C++ mode.
|
||||
static inline __m128 _mm_castpd_ps (__m128d x) { return reinterpret_cast<__m128&>(x); }
|
||||
static inline __m128i _mm_castpd_si128(__m128d x) { return reinterpret_cast<__m128i&>(x); }
|
||||
static inline __m128d _mm_castps_pd (__m128 x) { return reinterpret_cast<__m128d&>(x); }
|
||||
static inline __m128i _mm_castps_si128(__m128 x) { return reinterpret_cast<__m128i&>(x); }
|
||||
static inline __m128 _mm_castsi128_ps(__m128i x) { return reinterpret_cast<__m128&>(x); }
|
||||
static inline __m128d _mm_castsi128_pd(__m128i x) { return reinterpret_cast<__m128d&>(x); }
|
||||
#endif
|
||||
|
||||
#endif // EIGEN_PACKET_MATH_SSE_H
|
||||
|
||||
@@ -14,6 +14,7 @@ namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
#ifndef EIGEN_VECTORIZE_AVX
|
||||
template <>
|
||||
struct type_casting_traits<float, int> {
|
||||
enum {
|
||||
@@ -23,11 +24,6 @@ struct type_casting_traits<float, int> {
|
||||
};
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a) {
|
||||
return _mm_cvttps_epi32(a);
|
||||
}
|
||||
|
||||
|
||||
template <>
|
||||
struct type_casting_traits<int, float> {
|
||||
enum {
|
||||
@@ -37,11 +33,6 @@ struct type_casting_traits<int, float> {
|
||||
};
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) {
|
||||
return _mm_cvtepi32_ps(a);
|
||||
}
|
||||
|
||||
|
||||
template <>
|
||||
struct type_casting_traits<double, float> {
|
||||
enum {
|
||||
@@ -51,10 +42,6 @@ struct type_casting_traits<double, float> {
|
||||
};
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet2d, Packet4f>(const Packet2d& a, const Packet2d& b) {
|
||||
return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6));
|
||||
}
|
||||
|
||||
template <>
|
||||
struct type_casting_traits<float, double> {
|
||||
enum {
|
||||
@@ -63,6 +50,19 @@ struct type_casting_traits<float, double> {
|
||||
TgtCoeffRatio = 2
|
||||
};
|
||||
};
|
||||
#endif
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a) {
|
||||
return _mm_cvttps_epi32(a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) {
|
||||
return _mm_cvtepi32_ps(a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet2d, Packet4f>(const Packet2d& a, const Packet2d& b) {
|
||||
return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pcast<Packet4f, Packet2d>(const Packet4f& a) {
|
||||
// Simply discard the second half of the input
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2016 Konstantinos Margaritis <markos@freevec.org>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
@@ -24,13 +25,48 @@ struct Packet1cd
|
||||
Packet2d v;
|
||||
};
|
||||
|
||||
struct Packet2cf
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf() {}
|
||||
EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
|
||||
union {
|
||||
Packet4f v;
|
||||
Packet1cd cd[2];
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
||||
{
|
||||
typedef Packet2cf type;
|
||||
typedef Packet2cf half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size = 2,
|
||||
HasHalfPacket = 0,
|
||||
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasMul = 1,
|
||||
HasDiv = 1,
|
||||
HasNegate = 1,
|
||||
HasAbs = 0,
|
||||
HasAbs2 = 0,
|
||||
HasMin = 0,
|
||||
HasMax = 0,
|
||||
HasBlend = 1,
|
||||
HasSetLinear = 0
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
template<> struct packet_traits<std::complex<double> > : default_packet_traits
|
||||
{
|
||||
typedef Packet1cd type;
|
||||
typedef Packet1cd half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 0,
|
||||
AlignedOnScalar = 1,
|
||||
size = 1,
|
||||
HasHalfPacket = 0,
|
||||
|
||||
@@ -47,20 +83,68 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
|
||||
template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
|
||||
|
||||
/* Forward declaration */
|
||||
EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel);
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pload <Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>((const float*)from)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>((const float*)from)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
|
||||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
|
||||
{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
|
||||
{
|
||||
Packet2cf res;
|
||||
res.cd[0] = Packet1cd(vec_ld2f((const float *)&from));
|
||||
res.cd[1] = res.cd[0];
|
||||
return res;
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
|
||||
{
|
||||
std::complex<float> EIGEN_ALIGN16 af[2];
|
||||
af[0] = from[0*stride];
|
||||
af[1] = from[1*stride];
|
||||
return pload<Packet2cf>(af);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride EIGEN_UNUSED)
|
||||
{
|
||||
return pload<Packet1cd>(from);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
|
||||
{
|
||||
std::complex<float> EIGEN_ALIGN16 af[2];
|
||||
pstore<std::complex<float> >((std::complex<float> *) af, from);
|
||||
to[0*stride] = af[0];
|
||||
to[1*stride] = af[1];
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index stride EIGEN_UNUSED)
|
||||
{
|
||||
pstore<std::complex<double> >(to, from);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(padd<Packet4f>(a.v, b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v + b.v); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(psub<Packet4f>(a.v, b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v - b.v); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(Packet4f(a.v))); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd((Packet2d)vec_xor((Packet2d)a.v, (Packet2d)p2ul_CONJ_XOR2)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
|
||||
{
|
||||
Packet2cf res;
|
||||
res.v.v4f[0] = pconj(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[0]))).v;
|
||||
res.v.v4f[1] = pconj(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[1]))).v;
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
||||
{
|
||||
@@ -79,43 +163,90 @@ template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, con
|
||||
|
||||
return Packet1cd(v1 + v2);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_or(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_xor(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v, vec_nor(b.v,b.v))); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from)
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
||||
{
|
||||
return pset1<Packet1cd>(*from);
|
||||
Packet2cf res;
|
||||
res.v.v4f[0] = pmul(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[0])), Packet1cd(reinterpret_cast<Packet2d>(b.v.v4f[0]))).v;
|
||||
res.v.v4f[1] = pmul(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[1])), Packet1cd(reinterpret_cast<Packet2d>(b.v.v4f[1]))).v;
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pand<Packet4f>(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_or(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(por<Packet4f>(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_xor(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pxor<Packet4f>(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v, vec_nor(b.v,b.v))); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pandnot<Packet4f>(a.v,b.v)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
|
||||
{
|
||||
std::complex<double> EIGEN_ALIGN16 res[2];
|
||||
pstore<std::complex<double> >(res, a);
|
||||
std::complex<double> EIGEN_ALIGN16 res;
|
||||
pstore<std::complex<double> >(&res, a);
|
||||
|
||||
return res;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
|
||||
{
|
||||
std::complex<float> EIGEN_ALIGN16 res[2];
|
||||
pstore<std::complex<float> >(res, a);
|
||||
|
||||
return res[0];
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
|
||||
{
|
||||
Packet2cf res;
|
||||
res.cd[0] = a.cd[1];
|
||||
res.cd[1] = a.cd[0];
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a)
|
||||
{
|
||||
return pfirst(a);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
|
||||
{
|
||||
std::complex<float> res;
|
||||
Packet1cd b = padd<Packet1cd>(a.cd[0], a.cd[1]);
|
||||
vec_st2f(b.v, (float*)&res);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs)
|
||||
{
|
||||
return vecs[0];
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
|
||||
{
|
||||
PacketBlock<Packet2cf,2> transpose;
|
||||
transpose.packet[0] = vecs[0];
|
||||
transpose.packet[1] = vecs[1];
|
||||
ptranspose(transpose);
|
||||
|
||||
return padd<Packet2cf>(transpose.packet[0], transpose.packet[1]);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a)
|
||||
{
|
||||
return pfirst(a);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
|
||||
{
|
||||
std::complex<float> res;
|
||||
Packet1cd b = pmul<Packet1cd>(a.cd[0], a.cd[1]);
|
||||
vec_st2f(b.v, (float*)&res);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet1cd>
|
||||
@@ -127,6 +258,18 @@ struct palign_impl<Offset,Packet1cd>
|
||||
}
|
||||
};
|
||||
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet2cf>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
|
||||
{
|
||||
if (Offset == 1) {
|
||||
first.cd[0] = first.cd[1];
|
||||
first.cd[1] = second.cd[0];
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
|
||||
@@ -160,6 +303,42 @@ template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
||||
{
|
||||
return internal::pmul(a, pconj(b));
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
||||
{
|
||||
return internal::pmul(pconj(a), b);
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
||||
{
|
||||
return pconj(internal::pmul(a, b));
|
||||
}
|
||||
};
|
||||
|
||||
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
|
||||
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
||||
{
|
||||
// TODO optimize it for AltiVec
|
||||
@@ -168,17 +347,49 @@ template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, con
|
||||
return Packet1cd(pdiv(res.v, s + vec_perm(s, s, p16uc_REVERSE64)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
||||
{
|
||||
// TODO optimize it for AltiVec
|
||||
Packet2cf res;
|
||||
res.cd[0] = pdiv<Packet1cd>(a.cd[0], b.cd[0]);
|
||||
res.cd[1] = pdiv<Packet1cd>(a.cd[1], b.cd[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
|
||||
{
|
||||
return Packet1cd(preverse(Packet2d(x.v)));
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pcplxflip/*<Packet2cf>*/(const Packet2cf& x)
|
||||
{
|
||||
Packet2cf res;
|
||||
res.cd[0] = pcplxflip(x.cd[0]);
|
||||
res.cd[1] = pcplxflip(x.cd[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)
|
||||
{
|
||||
Packet2d tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI);
|
||||
kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO);
|
||||
kernel.packet[0].v = tmp;
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel)
|
||||
{
|
||||
Packet1cd tmp = kernel.packet[0].cd[1];
|
||||
kernel.packet[0].cd[1] = kernel.packet[1].cd[0];
|
||||
kernel.packet[1].cd[0] = tmp;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) {
|
||||
Packet2cf result;
|
||||
const Selector<4> ifPacket4 = { ifPacket.select[0], ifPacket.select[0], ifPacket.select[1], ifPacket.select[1] };
|
||||
result.v = pblend<Packet4f>(ifPacket4, thenPacket.v, elsePacket.v);
|
||||
return result;
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
//
|
||||
// Copyright (C) 2007 Julien Pommier
|
||||
// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2016 Konstantinos Margaritis <markos@freevec.org>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
@@ -19,32 +20,32 @@ namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(1 , 1.0);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(2 , 2.0);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
|
||||
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(exp_hi, 709.437);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303);
|
||||
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
|
||||
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
|
||||
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
|
||||
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet2d pexp<Packet2d>(const Packet2d& _x)
|
||||
{
|
||||
Packet2d x = _x;
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet2d(1 , 1.0);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(2 , 2.0);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet2d(exp_hi, 709.437);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
|
||||
|
||||
Packet2d tmp, fx;
|
||||
Packet2l emm0;
|
||||
|
||||
@@ -91,18 +92,44 @@ Packet2d pexp<Packet2d>(const Packet2d& _x)
|
||||
isnumber_mask);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4f pexp<Packet4f>(const Packet4f& x)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = pexp<Packet2d>(x.v4f[0]);
|
||||
res.v4f[1] = pexp<Packet2d>(x.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet2d psqrt<Packet2d>(const Packet2d& x)
|
||||
{
|
||||
return __builtin_s390_vfsqdb(x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4f psqrt<Packet4f>(const Packet4f& x)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = psqrt<Packet2d>(x.v4f[0]);
|
||||
res.v4f[1] = psqrt<Packet2d>(x.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet2d prsqrt<Packet2d>(const Packet2d& x) {
|
||||
// Unfortunately we can't use the much faster mm_rqsrt_pd since it only provides an approximation.
|
||||
return pset1<Packet2d>(1.0) / psqrt<Packet2d>(x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4f prsqrt<Packet4f>(const Packet4f& x) {
|
||||
Packet4f res;
|
||||
res.v4f[0] = prsqrt<Packet2d>(x.v4f[0]);
|
||||
res.v4f[1] = prsqrt<Packet2d>(x.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
@@ -28,9 +28,8 @@ namespace internal {
|
||||
#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
|
||||
#endif
|
||||
|
||||
// NOTE Altivec has 32 registers, but Eigen only accepts a value of 8 or 16
|
||||
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
|
||||
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
|
||||
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 16
|
||||
#endif
|
||||
|
||||
typedef __vector int Packet4i;
|
||||
@@ -42,6 +41,10 @@ typedef __vector double Packet2d;
|
||||
typedef __vector unsigned long long Packet2ul;
|
||||
typedef __vector long long Packet2l;
|
||||
|
||||
typedef struct {
|
||||
Packet2d v4f[2];
|
||||
} Packet4f;
|
||||
|
||||
typedef union {
|
||||
int32_t i[4];
|
||||
uint32_t ui[4];
|
||||
@@ -88,6 +91,7 @@ static Packet2d p2d_ONE = { 1.0, 1.0 };
|
||||
static Packet2d p2d_ZERO_ = { -0.0, -0.0 };
|
||||
|
||||
static Packet4i p4i_COUNTDOWN = { 0, 1, 2, 3 };
|
||||
static Packet4f p4f_COUNTDOWN = { 0.0, 1.0, 2.0, 3.0 };
|
||||
static Packet2d p2d_COUNTDOWN = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet16uc>(p2d_ZERO), reinterpret_cast<Packet16uc>(p2d_ONE), 8));
|
||||
|
||||
static Packet16uc p16uc_PSET64_HI = { 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 };
|
||||
@@ -96,7 +100,7 @@ static Packet16uc p16uc_DUPLICATE32_HI = { 0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7 };
|
||||
// Mask alignment
|
||||
#define _EIGEN_MASK_ALIGNMENT 0xfffffffffffffff0
|
||||
|
||||
#define _EIGEN_ALIGNED_PTR(x) ((ptrdiff_t)(x) & _EIGEN_MASK_ALIGNMENT)
|
||||
#define _EIGEN_ALIGNED_PTR(x) ((std::ptrdiff_t)(x) & _EIGEN_MASK_ALIGNMENT)
|
||||
|
||||
// Handle endianness properly while loading constants
|
||||
// Define global static constants:
|
||||
@@ -132,13 +136,11 @@ template<> struct packet_traits<int> : default_packet_traits
|
||||
typedef Packet4i type;
|
||||
typedef Packet4i half;
|
||||
enum {
|
||||
// FIXME check the Has*
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size = 4,
|
||||
HasHalfPacket = 0,
|
||||
|
||||
// FIXME check the Has*
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasMul = 1,
|
||||
@@ -147,6 +149,37 @@ template<> struct packet_traits<int> : default_packet_traits
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct packet_traits<float> : default_packet_traits
|
||||
{
|
||||
typedef Packet4f type;
|
||||
typedef Packet4f half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size=4,
|
||||
HasHalfPacket = 0,
|
||||
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasMul = 1,
|
||||
HasDiv = 1,
|
||||
HasMin = 1,
|
||||
HasMax = 1,
|
||||
HasAbs = 1,
|
||||
HasSin = 0,
|
||||
HasCos = 0,
|
||||
HasLog = 0,
|
||||
HasExp = 1,
|
||||
HasSqrt = 1,
|
||||
HasRsqrt = 1,
|
||||
HasRound = 1,
|
||||
HasFloor = 1,
|
||||
HasCeil = 1,
|
||||
HasNegate = 1,
|
||||
HasBlend = 1
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct packet_traits<double> : default_packet_traits
|
||||
{
|
||||
typedef Packet2d type;
|
||||
@@ -157,7 +190,6 @@ template<> struct packet_traits<double> : default_packet_traits
|
||||
size=2,
|
||||
HasHalfPacket = 1,
|
||||
|
||||
// FIXME check the Has*
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasMul = 1,
|
||||
@@ -180,8 +212,12 @@ template<> struct packet_traits<double> : default_packet_traits
|
||||
};
|
||||
|
||||
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
|
||||
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; };
|
||||
template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
|
||||
|
||||
/* Forward declaration */
|
||||
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4f,4>& kernel);
|
||||
|
||||
inline std::ostream & operator <<(std::ostream & s, const Packet4i & v)
|
||||
{
|
||||
Packet vt;
|
||||
@@ -222,6 +258,32 @@ inline std::ostream & operator <<(std::ostream & s, const Packet2d & v)
|
||||
return s;
|
||||
}
|
||||
|
||||
/* Helper function to simulate a vec_splat_packet4f
|
||||
*/
|
||||
template<int element> EIGEN_STRONG_INLINE Packet4f vec_splat_packet4f(const Packet4f& from)
|
||||
{
|
||||
Packet4f splat;
|
||||
switch (element) {
|
||||
case 0:
|
||||
splat.v4f[0] = vec_splat(from.v4f[0], 0);
|
||||
splat.v4f[1] = splat.v4f[0];
|
||||
break;
|
||||
case 1:
|
||||
splat.v4f[0] = vec_splat(from.v4f[0], 1);
|
||||
splat.v4f[1] = splat.v4f[0];
|
||||
break;
|
||||
case 2:
|
||||
splat.v4f[0] = vec_splat(from.v4f[1], 0);
|
||||
splat.v4f[1] = splat.v4f[0];
|
||||
break;
|
||||
case 3:
|
||||
splat.v4f[0] = vec_splat(from.v4f[1], 1);
|
||||
splat.v4f[1] = splat.v4f[0];
|
||||
break;
|
||||
}
|
||||
return splat;
|
||||
}
|
||||
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet4i>
|
||||
{
|
||||
@@ -238,6 +300,31 @@ struct palign_impl<Offset,Packet4i>
|
||||
}
|
||||
};
|
||||
|
||||
/* This is a tricky one, we have to translate float alignment to vector elements of sizeof double
|
||||
*/
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet4f>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
|
||||
{
|
||||
switch (Offset % 4) {
|
||||
case 1:
|
||||
first.v4f[0] = vec_sld(first.v4f[0], first.v4f[1], 8);
|
||||
first.v4f[1] = vec_sld(first.v4f[1], second.v4f[0], 8);
|
||||
break;
|
||||
case 2:
|
||||
first.v4f[0] = first.v4f[1];
|
||||
first.v4f[1] = second.v4f[0];
|
||||
break;
|
||||
case 3:
|
||||
first.v4f[0] = vec_sld(first.v4f[1], second.v4f[0], 8);
|
||||
first.v4f[1] = vec_sld(second.v4f[0], second.v4f[1], 8);
|
||||
break;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet2d>
|
||||
{
|
||||
@@ -257,6 +344,16 @@ template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from)
|
||||
return vfrom->v4i;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from)
|
||||
{
|
||||
// FIXME: No intrinsic yet
|
||||
EIGEN_DEBUG_ALIGNED_LOAD
|
||||
Packet4f vfrom;
|
||||
vfrom.v4f[0] = vec_ld2f(&from[0]);
|
||||
vfrom.v4f[1] = vec_ld2f(&from[2]);
|
||||
return vfrom;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from)
|
||||
{
|
||||
// FIXME: No intrinsic yet
|
||||
@@ -275,6 +372,15 @@ template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& f
|
||||
vto->v4i = from;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from)
|
||||
{
|
||||
// FIXME: No intrinsic yet
|
||||
EIGEN_DEBUG_ALIGNED_STORE
|
||||
vec_st2f(from.v4f[0], &to[0]);
|
||||
vec_st2f(from.v4f[1], &to[2]);
|
||||
}
|
||||
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from)
|
||||
{
|
||||
// FIXME: No intrinsic yet
|
||||
@@ -288,10 +394,16 @@ template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from)
|
||||
{
|
||||
return vec_splats(from);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) {
|
||||
return vec_splats(from);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from)
|
||||
{
|
||||
Packet4f to;
|
||||
to.v4f[0] = pset1<Packet2d>(static_cast<const double&>(from));
|
||||
to.v4f[1] = to.v4f[0];
|
||||
return to;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void
|
||||
pbroadcast4<Packet4i>(const int *a,
|
||||
@@ -304,6 +416,17 @@ pbroadcast4<Packet4i>(const int *a,
|
||||
a3 = vec_splat(a3, 3);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void
|
||||
pbroadcast4<Packet4f>(const float *a,
|
||||
Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
|
||||
{
|
||||
a3 = pload<Packet4f>(a);
|
||||
a0 = vec_splat_packet4f<0>(a3);
|
||||
a1 = vec_splat_packet4f<1>(a3);
|
||||
a2 = vec_splat_packet4f<2>(a3);
|
||||
a3 = vec_splat_packet4f<3>(a3);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void
|
||||
pbroadcast4<Packet2d>(const double *a,
|
||||
Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)
|
||||
@@ -326,6 +449,16 @@ template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* f
|
||||
return pload<Packet4i>(ai);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)
|
||||
{
|
||||
float EIGEN_ALIGN16 ai[4];
|
||||
ai[0] = from[0*stride];
|
||||
ai[1] = from[1*stride];
|
||||
ai[2] = from[2*stride];
|
||||
ai[3] = from[3*stride];
|
||||
return pload<Packet4f>(ai);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride)
|
||||
{
|
||||
double EIGEN_ALIGN16 af[2];
|
||||
@@ -344,6 +477,16 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const
|
||||
to[3*stride] = ai[3];
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride)
|
||||
{
|
||||
float EIGEN_ALIGN16 ai[4];
|
||||
pstore<float>((float *)ai, from);
|
||||
to[0*stride] = ai[0];
|
||||
to[1*stride] = ai[1];
|
||||
to[2*stride] = ai[2];
|
||||
to[3*stride] = ai[3];
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride)
|
||||
{
|
||||
double EIGEN_ALIGN16 af[2];
|
||||
@@ -353,52 +496,160 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to,
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a + b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
Packet4f c;
|
||||
c.v4f[0] = a.v4f[0] + b.v4f[0];
|
||||
c.v4f[1] = a.v4f[1] + b.v4f[1];
|
||||
return c;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a + b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a - b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
Packet4f c;
|
||||
c.v4f[0] = a.v4f[0] - b.v4f[0];
|
||||
c.v4f[1] = a.v4f[1] - b.v4f[1];
|
||||
return c;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a - b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a * b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
Packet4f c;
|
||||
c.v4f[0] = a.v4f[0] * b.v4f[0];
|
||||
c.v4f[1] = a.v4f[1] * b.v4f[1];
|
||||
return c;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a * b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a / b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
Packet4f c;
|
||||
c.v4f[0] = a.v4f[0] / b.v4f[0];
|
||||
c.v4f[1] = a.v4f[1] / b.v4f[1];
|
||||
return c;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a / b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return (-a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a)
|
||||
{
|
||||
Packet4f c;
|
||||
c.v4f[0] = -a.v4f[0];
|
||||
c.v4f[1] = -a.v4f[1];
|
||||
return c;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return (-a); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd<Packet4i>(pmul<Packet4i>(a, b), c); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = vec_madd(a.v4f[0], b.v4f[0], c.v4f[0]);
|
||||
res.v4f[1] = vec_madd(a.v4f[1], b.v4f[1], c.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vec_madd(a, b, c); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a) { return padd<Packet4i>(pset1<Packet4i>(a), p4i_COUNTDOWN); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) { return padd<Packet4f>(pset1<Packet4f>(a), p4f_COUNTDOWN); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) { return padd<Packet2d>(pset1<Packet2d>(a), p2d_COUNTDOWN); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_min(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = pmin(a.v4f[0], b.v4f[0]);
|
||||
res.v4f[1] = pmin(a.v4f[1], b.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_max(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = pmax(a.v4f[0], b.v4f[0]);
|
||||
res.v4f[1] = pmax(a.v4f[1], b.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = pand(a.v4f[0], b.v4f[0]);
|
||||
res.v4f[1] = pand(a.v4f[1], b.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_or(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_or(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = pand(a.v4f[0], b.v4f[0]);
|
||||
res.v4f[1] = pand(a.v4f[1], b.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_xor(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_xor(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = pand(a.v4f[0], b.v4f[0]);
|
||||
res.v4f[1] = pand(a.v4f[1], b.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return pand<Packet4i>(a, vec_nor(b, b)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, vec_nor(b, b)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = pandnot(a.v4f[0], b.v4f[0]);
|
||||
res.v4f[1] = pandnot(a.v4f[1], b.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = vec_round(a.v4f[0]);
|
||||
res.v4f[1] = vec_round(a.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a) { return vec_round(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = vec_ceil(a.v4f[0]);
|
||||
res.v4f[1] = vec_ceil(a.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a) { return vec_ceil(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = vec_floor(a.v4f[0]);
|
||||
res.v4f[1] = vec_floor(a.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) { return vec_floor(a); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) { return pload<Packet4i>(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) { return pload<Packet4f>(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) { return pload<Packet2d>(from); }
|
||||
|
||||
|
||||
@@ -408,6 +659,14 @@ template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
|
||||
return vec_perm(p, p, p16uc_DUPLICATE32_HI);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
|
||||
{
|
||||
Packet4f p = pload<Packet4f>(from);
|
||||
p.v4f[1] = vec_splat(p.v4f[0], 1);
|
||||
p.v4f[0] = vec_splat(p.v4f[0], 0);
|
||||
return p;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
|
||||
{
|
||||
Packet2d p = pload<Packet2d>(from);
|
||||
@@ -415,12 +674,15 @@ template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { pstore<int>(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { pstore<float>(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) { pstore<double>(to, from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; pstore(x, a); return x[0]; }
|
||||
template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[2]; vec_st2f(a.v4f[0], &x[0]); return x[0]; }
|
||||
template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { double EIGEN_ALIGN16 x[2]; pstore(x, a); return x[0]; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a)
|
||||
@@ -433,8 +695,23 @@ template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a)
|
||||
return reinterpret_cast<Packet2d>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE64));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vec_abs(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vec_abs(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a)
|
||||
{
|
||||
Packet4f rev;
|
||||
rev.v4f[0] = preverse<Packet2d>(a.v4f[1]);
|
||||
rev.v4f[1] = preverse<Packet2d>(a.v4f[0]);
|
||||
return rev;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pabs<Packet4i>(const Packet4i& a) { return vec_abs(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pabs<Packet2d>(const Packet2d& a) { return vec_abs(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pabs<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = pabs(a.v4f[0]);
|
||||
res.v4f[1] = pabs(a.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
|
||||
{
|
||||
@@ -453,6 +730,13 @@ template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
|
||||
sum = padd<Packet2d>(a, b);
|
||||
return pfirst(sum);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet2d sum;
|
||||
sum = padd<Packet2d>(a.v4f[0], a.v4f[1]);
|
||||
double first = predux<Packet2d>(sum);
|
||||
return static_cast<float>(first);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
|
||||
{
|
||||
@@ -493,6 +777,21 @@ template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
|
||||
return sum;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
|
||||
{
|
||||
PacketBlock<Packet4f,4> transpose;
|
||||
transpose.packet[0] = vecs[0];
|
||||
transpose.packet[1] = vecs[1];
|
||||
transpose.packet[2] = vecs[2];
|
||||
transpose.packet[3] = vecs[3];
|
||||
ptranspose(transpose);
|
||||
|
||||
Packet4f sum = padd(transpose.packet[0], transpose.packet[1]);
|
||||
sum = padd(sum, transpose.packet[2]);
|
||||
sum = padd(sum, transpose.packet[3]);
|
||||
return sum;
|
||||
}
|
||||
|
||||
// Other reduction functions:
|
||||
// mul
|
||||
template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
|
||||
@@ -507,6 +806,12 @@ template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a)
|
||||
return pfirst(pmul(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8))));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
// Return predux_mul<Packet2d> of the subvectors product
|
||||
return static_cast<float>(pfirst(predux_mul(pmul(a.v4f[0], a.v4f[1]))));
|
||||
}
|
||||
|
||||
// min
|
||||
template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
|
||||
{
|
||||
@@ -521,6 +826,14 @@ template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)
|
||||
return pfirst(pmin<Packet2d>(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8))));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet2d b, res;
|
||||
b = pmin<Packet2d>(a.v4f[0], a.v4f[1]);
|
||||
res = pmin<Packet2d>(b, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(b), reinterpret_cast<Packet4i>(b), 8)));
|
||||
return static_cast<float>(pfirst(res));
|
||||
}
|
||||
|
||||
// max
|
||||
template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
|
||||
{
|
||||
@@ -536,6 +849,14 @@ template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
|
||||
return pfirst(pmax<Packet2d>(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8))));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet2d b, res;
|
||||
b = pmax<Packet2d>(a.v4f[0], a.v4f[1]);
|
||||
res = pmax<Packet2d>(b, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(b), reinterpret_cast<Packet4i>(b), 8)));
|
||||
return static_cast<float>(pfirst(res));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet4i,4>& kernel) {
|
||||
Packet4i t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);
|
||||
@@ -556,12 +877,61 @@ ptranspose(PacketBlock<Packet2d,2>& kernel) {
|
||||
kernel.packet[1] = t1;
|
||||
}
|
||||
|
||||
/* Split the Packet4f PacketBlock into 4 Packet2d PacketBlocks and transpose each one
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet4f,4>& kernel) {
|
||||
PacketBlock<Packet2d,2> t0,t1,t2,t3;
|
||||
// copy top-left 2x2 Packet2d block
|
||||
t0.packet[0] = kernel.packet[0].v4f[0];
|
||||
t0.packet[1] = kernel.packet[1].v4f[0];
|
||||
|
||||
// copy top-right 2x2 Packet2d block
|
||||
t1.packet[0] = kernel.packet[0].v4f[1];
|
||||
t1.packet[1] = kernel.packet[1].v4f[1];
|
||||
|
||||
// copy bottom-left 2x2 Packet2d block
|
||||
t2.packet[0] = kernel.packet[2].v4f[0];
|
||||
t2.packet[1] = kernel.packet[3].v4f[0];
|
||||
|
||||
// copy bottom-right 2x2 Packet2d block
|
||||
t3.packet[0] = kernel.packet[2].v4f[1];
|
||||
t3.packet[1] = kernel.packet[3].v4f[1];
|
||||
|
||||
// Transpose all 2x2 blocks
|
||||
ptranspose(t0);
|
||||
ptranspose(t1);
|
||||
ptranspose(t2);
|
||||
ptranspose(t3);
|
||||
|
||||
// Copy back transposed blocks, but exchange t1 and t2 due to transposition
|
||||
kernel.packet[0].v4f[0] = t0.packet[0];
|
||||
kernel.packet[0].v4f[1] = t2.packet[0];
|
||||
kernel.packet[1].v4f[0] = t0.packet[1];
|
||||
kernel.packet[1].v4f[1] = t2.packet[1];
|
||||
kernel.packet[2].v4f[0] = t1.packet[0];
|
||||
kernel.packet[2].v4f[1] = t3.packet[0];
|
||||
kernel.packet[3].v4f[0] = t1.packet[1];
|
||||
kernel.packet[3].v4f[1] = t3.packet[1];
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) {
|
||||
Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3] };
|
||||
Packet4ui mask = vec_cmpeq(select, reinterpret_cast<Packet4ui>(p4i_ONE));
|
||||
return vec_sel(elsePacket, thenPacket, mask);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket) {
|
||||
Packet2ul select_hi = { ifPacket.select[0], ifPacket.select[1] };
|
||||
Packet2ul select_lo = { ifPacket.select[2], ifPacket.select[3] };
|
||||
Packet2ul mask_hi = vec_cmpeq(select_hi, reinterpret_cast<Packet2ul>(p2l_ONE));
|
||||
Packet2ul mask_lo = vec_cmpeq(select_lo, reinterpret_cast<Packet2ul>(p2l_ONE));
|
||||
Packet4f result;
|
||||
result.v4f[0] = vec_sel(elsePacket.v4f[0], thenPacket.v4f[0], mask_hi);
|
||||
result.v4f[1] = vec_sel(elsePacket.v4f[1], thenPacket.v4f[1], mask_lo);
|
||||
return result;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) {
|
||||
Packet2ul select = { ifPacket.select[0], ifPacket.select[1] };
|
||||
Packet2ul mask = vec_cmpeq(select, reinterpret_cast<Packet2ul>(p2l_ONE));
|
||||
|
||||
@@ -28,7 +28,7 @@ template<typename DstScalar,typename SrcScalar> struct assign_op {
|
||||
{ internal::pstoret<DstScalar,Packet,Alignment>(a,b); }
|
||||
};
|
||||
|
||||
// Empty overload for void type (used by PermutationMatrix
|
||||
// Empty overload for void type (used by PermutationMatrix)
|
||||
template<typename DstScalar> struct assign_op<DstScalar,void> {};
|
||||
|
||||
template<typename DstScalar,typename SrcScalar>
|
||||
|
||||
@@ -255,7 +255,7 @@ struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_NEQ> : binary_op_base<LhsScalar,Rh
|
||||
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the hypot of two scalars
|
||||
* \brief Template functor to compute the hypot of two \b positive \b and \b real scalars
|
||||
*
|
||||
* \sa MatrixBase::stableNorm(), class Redux
|
||||
*/
|
||||
@@ -263,22 +263,15 @@ template<typename Scalar>
|
||||
struct scalar_hypot_op<Scalar,Scalar> : binary_op_base<Scalar,Scalar>
|
||||
{
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_hypot_op)
|
||||
// typedef typename NumTraits<Scalar>::Real result_type;
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar &x, const Scalar &y) const
|
||||
{
|
||||
EIGEN_USING_STD_MATH(sqrt)
|
||||
Scalar p, qp;
|
||||
if(_x>_y)
|
||||
{
|
||||
p = _x;
|
||||
qp = _y / p;
|
||||
}
|
||||
else
|
||||
{
|
||||
p = _y;
|
||||
qp = _x / p;
|
||||
}
|
||||
return p * sqrt(Scalar(1) + qp*qp);
|
||||
// This functor is used by hypotNorm only for which it is faster to first apply abs
|
||||
// on all coefficients prior to reduction through hypot.
|
||||
// This way we avoid calling abs on positive and real entries, and this also permits
|
||||
// to seamlessly handle complexes. Otherwise we would have to handle both real and complexes
|
||||
// through the same functor...
|
||||
return internal::positive_real_hypot(x,y);
|
||||
}
|
||||
};
|
||||
template<typename Scalar>
|
||||
|
||||
@@ -44,16 +44,16 @@ struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/false>
|
||||
{
|
||||
linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) :
|
||||
m_low(low), m_high(high), m_size1(num_steps==1 ? 1 : num_steps-1), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)),
|
||||
m_interPacket(plset<Packet>(0)),
|
||||
m_flip(std::abs(high)<std::abs(low))
|
||||
m_flip(numext::abs(high)<numext::abs(low))
|
||||
{}
|
||||
|
||||
template<typename IndexType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const {
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
if(m_flip)
|
||||
return (i==0)? m_low : (m_high - (m_size1-i)*m_step);
|
||||
return (i==0)? m_low : (m_high - RealScalar(m_size1-i)*m_step);
|
||||
else
|
||||
return (i==m_size1)? m_high : (m_low + i*m_step);
|
||||
return (i==m_size1)? m_high : (m_low + RealScalar(i)*m_step);
|
||||
}
|
||||
|
||||
template<typename IndexType>
|
||||
@@ -63,7 +63,7 @@ struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/false>
|
||||
// [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) )
|
||||
if(m_flip)
|
||||
{
|
||||
Packet pi = padd(pset1<Packet>(Scalar(i-m_size1)),m_interPacket);
|
||||
Packet pi = plset<Packet>(Scalar(i-m_size1));
|
||||
Packet res = padd(pset1<Packet>(m_high), pmul(pset1<Packet>(m_step), pi));
|
||||
if(i==0)
|
||||
res = pinsertfirst(res, m_low);
|
||||
@@ -71,7 +71,7 @@ struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/false>
|
||||
}
|
||||
else
|
||||
{
|
||||
Packet pi = padd(pset1<Packet>(Scalar(i)),m_interPacket);
|
||||
Packet pi = plset<Packet>(Scalar(i));
|
||||
Packet res = padd(pset1<Packet>(m_low), pmul(pset1<Packet>(m_step), pi));
|
||||
if(i==m_size1-unpacket_traits<Packet>::size+1)
|
||||
res = pinsertlast(res, m_high);
|
||||
@@ -83,7 +83,6 @@ struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/false>
|
||||
const Scalar m_high;
|
||||
const Index m_size1;
|
||||
const Scalar m_step;
|
||||
const Packet m_interPacket;
|
||||
const bool m_flip;
|
||||
};
|
||||
|
||||
@@ -93,8 +92,8 @@ struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/true>
|
||||
linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) :
|
||||
m_low(low),
|
||||
m_multiplier((high-low)/convert_index<Scalar>(num_steps<=1 ? 1 : num_steps-1)),
|
||||
m_divisor(convert_index<Scalar>(num_steps+high-low)/(high-low+1)),
|
||||
m_use_divisor((high-low+1)<num_steps)
|
||||
m_divisor(convert_index<Scalar>((high>=low?num_steps:-num_steps)+(high-low))/((numext::abs(high-low)+1)==0?1:(numext::abs(high-low)+1))),
|
||||
m_use_divisor(num_steps>1 && (numext::abs(high-low)+1)<num_steps)
|
||||
{}
|
||||
|
||||
template<typename IndexType>
|
||||
@@ -173,6 +172,13 @@ template<typename Scalar, typename PacketType,typename IndexType>
|
||||
struct has_unary_operator<linspaced_op<Scalar,PacketType>,IndexType> { enum { value = 1}; };
|
||||
template<typename Scalar, typename PacketType,typename IndexType>
|
||||
struct has_binary_operator<linspaced_op<Scalar,PacketType>,IndexType> { enum { value = 0}; };
|
||||
|
||||
template<typename Scalar,typename IndexType>
|
||||
struct has_nullary_operator<scalar_random_op<Scalar>,IndexType> { enum { value = 1}; };
|
||||
template<typename Scalar,typename IndexType>
|
||||
struct has_unary_operator<scalar_random_op<Scalar>,IndexType> { enum { value = 0}; };
|
||||
template<typename Scalar,typename IndexType>
|
||||
struct has_binary_operator<scalar_random_op<Scalar>,IndexType> { enum { value = 0}; };
|
||||
#endif
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
@@ -72,7 +72,7 @@ template<typename T>
|
||||
struct functor_traits<std::not_equal_to<T> >
|
||||
{ enum { Cost = 1, PacketAccess = false }; };
|
||||
|
||||
#if(__cplusplus < 201103L)
|
||||
#if (__cplusplus < 201103L) && (EIGEN_COMP_MSVC <= 1900)
|
||||
// std::binder* are deprecated since c++11 and will be removed in c++17
|
||||
template<typename T>
|
||||
struct functor_traits<std::binder2nd<T> >
|
||||
@@ -83,13 +83,17 @@ struct functor_traits<std::binder1st<T> >
|
||||
{ enum { Cost = functor_traits<T>::Cost, PacketAccess = false }; };
|
||||
#endif
|
||||
|
||||
#if (__cplusplus < 201703L) && (EIGEN_COMP_MSVC < 1910)
|
||||
// std::unary_negate is deprecated since c++17 and will be removed in c++20
|
||||
template<typename T>
|
||||
struct functor_traits<std::unary_negate<T> >
|
||||
{ enum { Cost = 1 + functor_traits<T>::Cost, PacketAccess = false }; };
|
||||
|
||||
// std::binary_negate is deprecated since c++17 and will be removed in c++20
|
||||
template<typename T>
|
||||
struct functor_traits<std::binary_negate<T> >
|
||||
{ enum { Cost = 1 + functor_traits<T>::Cost, PacketAccess = false }; };
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN_STDEXT_SUPPORT
|
||||
|
||||
|
||||
@@ -972,7 +972,7 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
|
||||
EIGEN_ASM_COMMENT("begin step of gebp micro kernel 3pX4"); \
|
||||
EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
|
||||
internal::prefetch(blA+(3*K+16)*LhsProgress); \
|
||||
if (EIGEN_ARCH_ARM) internal::prefetch(blB+(4*K+16)*RhsProgress); /* Bug 953 */ \
|
||||
if (EIGEN_ARCH_ARM) { internal::prefetch(blB+(4*K+16)*RhsProgress); } /* Bug 953 */ \
|
||||
traits.loadLhs(&blA[(0+3*K)*LhsProgress], A0); \
|
||||
traits.loadLhs(&blA[(1+3*K)*LhsProgress], A1); \
|
||||
traits.loadLhs(&blA[(2+3*K)*LhsProgress], A2); \
|
||||
@@ -1197,10 +1197,16 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
|
||||
EIGEN_ASM_COMMENT("begin gebp micro kernel 2pX4");
|
||||
RhsPacket B_0, B1, B2, B3, T0;
|
||||
|
||||
#define EIGEN_GEBGP_ONESTEP(K) \
|
||||
// NOTE: the begin/end asm comments below work around bug 935!
|
||||
// but they are not enough for gcc>=6 without FMA (bug 1637)
|
||||
#if EIGEN_GNUC_AT_LEAST(6,0) && defined(EIGEN_VECTORIZE_SSE)
|
||||
#define EIGEN_GEBP_2PX4_SPILLING_WORKAROUND __asm__ ("" : [a0] "+x,m" (A0),[a1] "+x,m" (A1));
|
||||
#else
|
||||
#define EIGEN_GEBP_2PX4_SPILLING_WORKAROUND
|
||||
#endif
|
||||
#define EIGEN_GEBGP_ONESTEP(K) \
|
||||
do { \
|
||||
EIGEN_ASM_COMMENT("begin step of gebp micro kernel 2pX4"); \
|
||||
EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
|
||||
traits.loadLhs(&blA[(0+2*K)*LhsProgress], A0); \
|
||||
traits.loadLhs(&blA[(1+2*K)*LhsProgress], A1); \
|
||||
traits.broadcastRhs(&blB[(0+4*K)*RhsProgress], B_0, B1, B2, B3); \
|
||||
@@ -1212,6 +1218,7 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
|
||||
traits.madd(A1, B2, C6, B2); \
|
||||
traits.madd(A0, B3, C3, T0); \
|
||||
traits.madd(A1, B3, C7, B3); \
|
||||
EIGEN_GEBP_2PX4_SPILLING_WORKAROUND \
|
||||
EIGEN_ASM_COMMENT("end step of gebp micro kernel 2pX4"); \
|
||||
} while(false)
|
||||
|
||||
@@ -1526,10 +1533,10 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
|
||||
// The following piece of code wont work for 512 bit registers
|
||||
// Moreover, if LhsProgress==8 it assumes that there is a half packet of the same size
|
||||
// as nr (which is currently 4) for the return type.
|
||||
typedef typename unpacket_traits<SResPacket>::half SResPacketHalf;
|
||||
const int SResPacketHalfSize = unpacket_traits<typename unpacket_traits<SResPacket>::half>::size;
|
||||
if ((SwappedTraits::LhsProgress % 4) == 0 &&
|
||||
(SwappedTraits::LhsProgress <= 8) &&
|
||||
(SwappedTraits::LhsProgress!=8 || unpacket_traits<SResPacketHalf>::size==nr))
|
||||
(SwappedTraits::LhsProgress!=8 || SResPacketHalfSize==nr))
|
||||
{
|
||||
SAccPacket C0, C1, C2, C3;
|
||||
straits.initAcc(C0);
|
||||
|
||||
@@ -83,8 +83,8 @@ static void run(Index rows, Index cols, Index depth,
|
||||
if(info)
|
||||
{
|
||||
// this is the parallel version!
|
||||
Index tid = omp_get_thread_num();
|
||||
Index threads = omp_get_num_threads();
|
||||
int tid = omp_get_thread_num();
|
||||
int threads = omp_get_num_threads();
|
||||
|
||||
LhsScalar* blockA = blocking.blockA();
|
||||
eigen_internal_assert(blockA!=0);
|
||||
@@ -116,9 +116,9 @@ static void run(Index rows, Index cols, Index depth,
|
||||
info[tid].sync = k;
|
||||
|
||||
// Computes C_i += A' * B' per A'_i
|
||||
for(Index shift=0; shift<threads; ++shift)
|
||||
for(int shift=0; shift<threads; ++shift)
|
||||
{
|
||||
Index i = (tid+shift)%threads;
|
||||
int i = (tid+shift)%threads;
|
||||
|
||||
// At this point we have to make sure that A'_i has been updated by the thread i,
|
||||
// we use testAndSetOrdered to mimic a volatile access.
|
||||
|
||||
@@ -148,7 +148,7 @@ struct tribb_kernel
|
||||
ResMapper res(_res, resStride);
|
||||
gebp_kernel<LhsScalar, RhsScalar, Index, ResMapper, mr, nr, ConjLhs, ConjRhs> gebp_kernel;
|
||||
|
||||
Matrix<ResScalar,BlockSize,BlockSize,ColMajor> buffer;
|
||||
Matrix<ResScalar,BlockSize,BlockSize,ColMajor> buffer((internal::constructor_without_unaligned_array_assert()));
|
||||
|
||||
// let's process the block per panel of actual_mc x BlockSize,
|
||||
// again, each is split into three parts, etc.
|
||||
@@ -199,7 +199,7 @@ struct general_product_to_triangular_selector;
|
||||
template<typename MatrixType, typename ProductType, int UpLo>
|
||||
struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,true>
|
||||
{
|
||||
static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha)
|
||||
static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha, bool beta)
|
||||
{
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
|
||||
@@ -217,6 +217,9 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,true>
|
||||
|
||||
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());
|
||||
|
||||
if(!beta)
|
||||
mat.template triangularView<UpLo>().setZero();
|
||||
|
||||
enum {
|
||||
StorageOrder = (internal::traits<MatrixType>::Flags&RowMajorBit) ? RowMajor : ColMajor,
|
||||
UseLhsDirectly = _ActualLhs::InnerStrideAtCompileTime==1,
|
||||
@@ -244,7 +247,7 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,true>
|
||||
template<typename MatrixType, typename ProductType, int UpLo>
|
||||
struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false>
|
||||
{
|
||||
static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha)
|
||||
static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha, bool beta)
|
||||
{
|
||||
typedef typename internal::remove_all<typename ProductType::LhsNested>::type Lhs;
|
||||
typedef internal::blas_traits<Lhs> LhsBlasTraits;
|
||||
@@ -260,13 +263,19 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false>
|
||||
|
||||
typename ProductType::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());
|
||||
|
||||
if(!beta)
|
||||
mat.template triangularView<UpLo>().setZero();
|
||||
|
||||
enum {
|
||||
IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0,
|
||||
LhsIsRowMajor = _ActualLhs::Flags&RowMajorBit ? 1 : 0,
|
||||
RhsIsRowMajor = _ActualRhs::Flags&RowMajorBit ? 1 : 0
|
||||
RhsIsRowMajor = _ActualRhs::Flags&RowMajorBit ? 1 : 0,
|
||||
SkipDiag = (UpLo&(UnitDiag|ZeroDiag))!=0
|
||||
};
|
||||
|
||||
Index size = mat.cols();
|
||||
if(SkipDiag)
|
||||
size--;
|
||||
Index depth = actualLhs.cols();
|
||||
|
||||
typedef internal::gemm_blocking_space<IsRowMajor ? RowMajor : ColMajor,typename Lhs::Scalar,typename Rhs::Scalar,
|
||||
@@ -277,20 +286,22 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false>
|
||||
internal::general_matrix_matrix_triangular_product<Index,
|
||||
typename Lhs::Scalar, LhsIsRowMajor ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
|
||||
typename Rhs::Scalar, RhsIsRowMajor ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
|
||||
IsRowMajor ? RowMajor : ColMajor, UpLo>
|
||||
IsRowMajor ? RowMajor : ColMajor, UpLo&(Lower|Upper)>
|
||||
::run(size, depth,
|
||||
&actualLhs.coeffRef(0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,0), actualRhs.outerStride(),
|
||||
mat.data(), mat.outerStride(), actualAlpha, blocking);
|
||||
&actualLhs.coeffRef(SkipDiag&&(UpLo&Lower)==Lower ? 1 : 0,0), actualLhs.outerStride(),
|
||||
&actualRhs.coeffRef(0,SkipDiag&&(UpLo&Upper)==Upper ? 1 : 0), actualRhs.outerStride(),
|
||||
mat.data() + (SkipDiag ? (bool(IsRowMajor) != ((UpLo&Lower)==Lower) ? 1 : mat.outerStride() ) : 0), mat.outerStride(), actualAlpha, blocking);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename MatrixType, unsigned int UpLo>
|
||||
template<typename ProductType>
|
||||
TriangularView<MatrixType,UpLo>& TriangularViewImpl<MatrixType,UpLo,Dense>::_assignProduct(const ProductType& prod, const Scalar& alpha)
|
||||
TriangularView<MatrixType,UpLo>& TriangularViewImpl<MatrixType,UpLo,Dense>::_assignProduct(const ProductType& prod, const Scalar& alpha, bool beta)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((UpLo&UnitDiag)==0, WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED);
|
||||
eigen_assert(derived().nestedExpression().rows() == prod.rows() && derived().cols() == prod.cols());
|
||||
|
||||
general_product_to_triangular_selector<MatrixType, ProductType, UpLo, internal::traits<ProductType>::InnerSize==1>::run(derived().nestedExpression().const_cast_derived(), prod, alpha);
|
||||
general_product_to_triangular_selector<MatrixType, ProductType, UpLo, internal::traits<ProductType>::InnerSize==1>::run(derived().nestedExpression().const_cast_derived(), prod, alpha, beta);
|
||||
|
||||
return derived();
|
||||
}
|
||||
|
||||
@@ -33,7 +33,7 @@
|
||||
#ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_BLAS_H
|
||||
#define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_BLAS_H
|
||||
|
||||
namespace Eigen {
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
@@ -52,7 +52,7 @@ struct general_matrix_matrix_triangular_product<Index,Scalar,LhsStorageOrder,Con
|
||||
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const Scalar* lhs, Index lhsStride, \
|
||||
const Scalar* rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha, level3_blocking<Scalar, Scalar>& blocking) \
|
||||
{ \
|
||||
if (lhs==rhs) { \
|
||||
if ( lhs==rhs && ((UpLo&(Lower|Upper))==UpLo) ) { \
|
||||
general_matrix_matrix_rankupdate<Index,Scalar,LhsStorageOrder,ConjugateLhs,ColMajor,UpLo> \
|
||||
::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha,blocking); \
|
||||
} else { \
|
||||
@@ -86,9 +86,9 @@ struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,C
|
||||
/* typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs;*/ \
|
||||
\
|
||||
BlasIndex lda=convert_index<BlasIndex>(lhsStride), ldc=convert_index<BlasIndex>(resStride), n=convert_index<BlasIndex>(size), k=convert_index<BlasIndex>(depth); \
|
||||
char uplo=(IsLower) ? 'L' : 'U', trans=(AStorageOrder==RowMajor) ? 'T':'N'; \
|
||||
EIGTYPE beta; \
|
||||
BLASFUNC(&uplo, &trans, &n, &k, &numext::real_ref(alpha), lhs, &lda, &numext::real_ref(beta), res, &ldc); \
|
||||
char uplo=((IsLower) ? 'L' : 'U'), trans=((AStorageOrder==RowMajor) ? 'T':'N'); \
|
||||
EIGTYPE beta(1); \
|
||||
BLASFUNC(&uplo, &trans, &n, &k, (const BLASTYPE*)&numext::real_ref(alpha), lhs, &lda, (const BLASTYPE*)&numext::real_ref(beta), res, &ldc); \
|
||||
} \
|
||||
};
|
||||
|
||||
@@ -107,7 +107,7 @@ struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,C
|
||||
typedef Matrix<EIGTYPE, Dynamic, Dynamic, AStorageOrder> MatrixType; \
|
||||
\
|
||||
BlasIndex lda=convert_index<BlasIndex>(lhsStride), ldc=convert_index<BlasIndex>(resStride), n=convert_index<BlasIndex>(size), k=convert_index<BlasIndex>(depth); \
|
||||
char uplo=(IsLower) ? 'L' : 'U', trans=(AStorageOrder==RowMajor) ? 'C':'N'; \
|
||||
char uplo=((IsLower) ? 'L' : 'U'), trans=((AStorageOrder==RowMajor) ? 'C':'N'); \
|
||||
RTYPE alpha_, beta_; \
|
||||
const EIGTYPE* a_ptr; \
|
||||
\
|
||||
@@ -125,9 +125,13 @@ struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,C
|
||||
} \
|
||||
};
|
||||
|
||||
|
||||
#ifdef EIGEN_USE_MKL
|
||||
EIGEN_BLAS_RANKUPDATE_R(double, double, dsyrk)
|
||||
EIGEN_BLAS_RANKUPDATE_R(float, float, ssyrk)
|
||||
#else
|
||||
EIGEN_BLAS_RANKUPDATE_R(double, double, dsyrk_)
|
||||
EIGEN_BLAS_RANKUPDATE_R(float, float, ssyrk_)
|
||||
#endif
|
||||
|
||||
// TODO hanlde complex cases
|
||||
// EIGEN_BLAS_RANKUPDATE_C(dcomplex, double, double, zherk_)
|
||||
|
||||
@@ -46,7 +46,7 @@ namespace internal {
|
||||
|
||||
// gemm specialization
|
||||
|
||||
#define GEMM_SPECIALIZATION(EIGTYPE, EIGPREFIX, BLASTYPE, BLASPREFIX) \
|
||||
#define GEMM_SPECIALIZATION(EIGTYPE, EIGPREFIX, BLASTYPE, BLASFUNC) \
|
||||
template< \
|
||||
typename Index, \
|
||||
int LhsStorageOrder, bool ConjugateLhs, \
|
||||
@@ -100,13 +100,20 @@ static void run(Index rows, Index cols, Index depth, \
|
||||
ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
|
||||
} else b = _rhs; \
|
||||
\
|
||||
BLASPREFIX##gemm_(&transa, &transb, &m, &n, &k, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
|
||||
BLASFUNC(&transa, &transb, &m, &n, &k, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
|
||||
}};
|
||||
|
||||
GEMM_SPECIALIZATION(double, d, double, d)
|
||||
GEMM_SPECIALIZATION(float, f, float, s)
|
||||
GEMM_SPECIALIZATION(dcomplex, cd, double, z)
|
||||
GEMM_SPECIALIZATION(scomplex, cf, float, c)
|
||||
#ifdef EIGEN_USE_MKL
|
||||
GEMM_SPECIALIZATION(double, d, double, dgemm)
|
||||
GEMM_SPECIALIZATION(float, f, float, sgemm)
|
||||
GEMM_SPECIALIZATION(dcomplex, cd, MKL_Complex16, zgemm)
|
||||
GEMM_SPECIALIZATION(scomplex, cf, MKL_Complex8, cgemm)
|
||||
#else
|
||||
GEMM_SPECIALIZATION(double, d, double, dgemm_)
|
||||
GEMM_SPECIALIZATION(float, f, float, sgemm_)
|
||||
GEMM_SPECIALIZATION(dcomplex, cd, double, zgemm_)
|
||||
GEMM_SPECIALIZATION(scomplex, cf, float, cgemm_)
|
||||
#endif
|
||||
|
||||
} // end namespase internal
|
||||
|
||||
|
||||
@@ -183,8 +183,8 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,C
|
||||
alignmentPattern = AllAligned;
|
||||
}
|
||||
|
||||
const Index offset1 = (FirstAligned && alignmentStep==1)?3:1;
|
||||
const Index offset3 = (FirstAligned && alignmentStep==1)?1:3;
|
||||
const Index offset1 = (alignmentPattern==FirstAligned && alignmentStep==1)?3:1;
|
||||
const Index offset3 = (alignmentPattern==FirstAligned && alignmentStep==1)?1:3;
|
||||
|
||||
Index columnBound = ((cols-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns;
|
||||
for (Index i=skipColumns; i<columnBound; i+=columnsAtOnce)
|
||||
@@ -457,8 +457,8 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,R
|
||||
alignmentPattern = AllAligned;
|
||||
}
|
||||
|
||||
const Index offset1 = (FirstAligned && alignmentStep==1)?3:1;
|
||||
const Index offset3 = (FirstAligned && alignmentStep==1)?1:3;
|
||||
const Index offset1 = (alignmentPattern==FirstAligned && alignmentStep==1)?3:1;
|
||||
const Index offset3 = (alignmentPattern==FirstAligned && alignmentStep==1)?1:3;
|
||||
|
||||
Index rowBound = ((rows-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows;
|
||||
for (Index i=skipRows; i<rowBound; i+=rowsAtOnce)
|
||||
|
||||
@@ -85,7 +85,7 @@ EIGEN_BLAS_GEMV_SPECIALIZE(float)
|
||||
EIGEN_BLAS_GEMV_SPECIALIZE(dcomplex)
|
||||
EIGEN_BLAS_GEMV_SPECIALIZE(scomplex)
|
||||
|
||||
#define EIGEN_BLAS_GEMV_SPECIALIZATION(EIGTYPE,BLASTYPE,BLASPREFIX) \
|
||||
#define EIGEN_BLAS_GEMV_SPECIALIZATION(EIGTYPE,BLASTYPE,BLASFUNC) \
|
||||
template<typename Index, int LhsStorageOrder, bool ConjugateLhs, bool ConjugateRhs> \
|
||||
struct general_matrix_vector_product_gemv<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,ConjugateRhs> \
|
||||
{ \
|
||||
@@ -113,14 +113,21 @@ static void run( \
|
||||
x_ptr=x_tmp.data(); \
|
||||
incx=1; \
|
||||
} else x_ptr=rhs; \
|
||||
BLASPREFIX##gemv_(&trans, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, &numext::real_ref(beta), (BLASTYPE*)res, &incy); \
|
||||
BLASFUNC(&trans, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &incy); \
|
||||
}\
|
||||
};
|
||||
|
||||
EIGEN_BLAS_GEMV_SPECIALIZATION(double, double, d)
|
||||
EIGEN_BLAS_GEMV_SPECIALIZATION(float, float, s)
|
||||
EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, double, z)
|
||||
EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, float, c)
|
||||
#ifdef EIGEN_USE_MKL
|
||||
EIGEN_BLAS_GEMV_SPECIALIZATION(double, double, dgemv)
|
||||
EIGEN_BLAS_GEMV_SPECIALIZATION(float, float, sgemv)
|
||||
EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, MKL_Complex16, zgemv)
|
||||
EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, MKL_Complex8 , cgemv)
|
||||
#else
|
||||
EIGEN_BLAS_GEMV_SPECIALIZATION(double, double, dgemv_)
|
||||
EIGEN_BLAS_GEMV_SPECIALIZATION(float, float, sgemv_)
|
||||
EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, double, zgemv_)
|
||||
EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, float, cgemv_)
|
||||
#endif
|
||||
|
||||
} // end namespase internal
|
||||
|
||||
|
||||
@@ -75,7 +75,7 @@ template<typename Index> struct GemmParallelInfo
|
||||
{
|
||||
GemmParallelInfo() : sync(-1), users(0), lhs_start(0), lhs_length(0) {}
|
||||
|
||||
int volatile sync;
|
||||
Index volatile sync;
|
||||
int volatile users;
|
||||
|
||||
Index lhs_start;
|
||||
@@ -104,13 +104,14 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, Index depth,
|
||||
// - the sizes are large enough
|
||||
|
||||
// compute the maximal number of threads from the size of the product:
|
||||
// FIXME this has to be fine tuned
|
||||
// This first heuristic takes into account that the product kernel is fully optimized when working with nr columns at once.
|
||||
Index size = transpose ? rows : cols;
|
||||
Index pb_max_threads = std::max<Index>(1,size / 32);
|
||||
Index pb_max_threads = std::max<Index>(1,size / Functor::Traits::nr);
|
||||
|
||||
// compute the maximal number of threads from the total amount of work:
|
||||
double work = static_cast<double>(rows) * static_cast<double>(cols) *
|
||||
static_cast<double>(depth);
|
||||
double kMinTaskSize = 50000; // Heuristic.
|
||||
double kMinTaskSize = 50000; // FIXME improve this heuristic.
|
||||
pb_max_threads = std::max<Index>(1, std::min<Index>(pb_max_threads, work / kMinTaskSize));
|
||||
|
||||
// compute the number of threads we are going to use
|
||||
|
||||
@@ -40,7 +40,7 @@ namespace internal {
|
||||
|
||||
/* Optimized selfadjoint matrix * matrix (?SYMM/?HEMM) product */
|
||||
|
||||
#define EIGEN_BLAS_SYMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
|
||||
#define EIGEN_BLAS_SYMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
|
||||
template <typename Index, \
|
||||
int LhsStorageOrder, bool ConjugateLhs, \
|
||||
int RhsStorageOrder, bool ConjugateRhs> \
|
||||
@@ -81,13 +81,13 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLh
|
||||
ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
|
||||
} else b = _rhs; \
|
||||
\
|
||||
BLASPREFIX##symm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
|
||||
BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
|
||||
\
|
||||
} \
|
||||
};
|
||||
|
||||
|
||||
#define EIGEN_BLAS_HEMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
|
||||
#define EIGEN_BLAS_HEMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
|
||||
template <typename Index, \
|
||||
int LhsStorageOrder, bool ConjugateLhs, \
|
||||
int RhsStorageOrder, bool ConjugateRhs> \
|
||||
@@ -144,20 +144,26 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLh
|
||||
ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
|
||||
} \
|
||||
\
|
||||
BLASPREFIX##hemm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
|
||||
BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
|
||||
\
|
||||
} \
|
||||
};
|
||||
|
||||
EIGEN_BLAS_SYMM_L(double, double, d, d)
|
||||
EIGEN_BLAS_SYMM_L(float, float, f, s)
|
||||
EIGEN_BLAS_HEMM_L(dcomplex, double, cd, z)
|
||||
EIGEN_BLAS_HEMM_L(scomplex, float, cf, c)
|
||||
|
||||
#ifdef EIGEN_USE_MKL
|
||||
EIGEN_BLAS_SYMM_L(double, double, d, dsymm)
|
||||
EIGEN_BLAS_SYMM_L(float, float, f, ssymm)
|
||||
EIGEN_BLAS_HEMM_L(dcomplex, MKL_Complex16, cd, zhemm)
|
||||
EIGEN_BLAS_HEMM_L(scomplex, MKL_Complex8, cf, chemm)
|
||||
#else
|
||||
EIGEN_BLAS_SYMM_L(double, double, d, dsymm_)
|
||||
EIGEN_BLAS_SYMM_L(float, float, f, ssymm_)
|
||||
EIGEN_BLAS_HEMM_L(dcomplex, double, cd, zhemm_)
|
||||
EIGEN_BLAS_HEMM_L(scomplex, float, cf, chemm_)
|
||||
#endif
|
||||
|
||||
/* Optimized matrix * selfadjoint matrix (?SYMM/?HEMM) product */
|
||||
|
||||
#define EIGEN_BLAS_SYMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
|
||||
#define EIGEN_BLAS_SYMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
|
||||
template <typename Index, \
|
||||
int LhsStorageOrder, bool ConjugateLhs, \
|
||||
int RhsStorageOrder, bool ConjugateRhs> \
|
||||
@@ -197,13 +203,13 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateL
|
||||
ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
|
||||
} else b = _lhs; \
|
||||
\
|
||||
BLASPREFIX##symm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
|
||||
BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
|
||||
\
|
||||
} \
|
||||
};
|
||||
|
||||
|
||||
#define EIGEN_BLAS_HEMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
|
||||
#define EIGEN_BLAS_HEMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
|
||||
template <typename Index, \
|
||||
int LhsStorageOrder, bool ConjugateLhs, \
|
||||
int RhsStorageOrder, bool ConjugateRhs> \
|
||||
@@ -259,15 +265,21 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateL
|
||||
ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
|
||||
} \
|
||||
\
|
||||
BLASPREFIX##hemm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
|
||||
BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
|
||||
} \
|
||||
};
|
||||
|
||||
EIGEN_BLAS_SYMM_R(double, double, d, d)
|
||||
EIGEN_BLAS_SYMM_R(float, float, f, s)
|
||||
EIGEN_BLAS_HEMM_R(dcomplex, double, cd, z)
|
||||
EIGEN_BLAS_HEMM_R(scomplex, float, cf, c)
|
||||
|
||||
#ifdef EIGEN_USE_MKL
|
||||
EIGEN_BLAS_SYMM_R(double, double, d, dsymm)
|
||||
EIGEN_BLAS_SYMM_R(float, float, f, ssymm)
|
||||
EIGEN_BLAS_HEMM_R(dcomplex, MKL_Complex16, cd, zhemm)
|
||||
EIGEN_BLAS_HEMM_R(scomplex, MKL_Complex8, cf, chemm)
|
||||
#else
|
||||
EIGEN_BLAS_SYMM_R(double, double, d, dsymm_)
|
||||
EIGEN_BLAS_SYMM_R(float, float, f, ssymm_)
|
||||
EIGEN_BLAS_HEMM_R(dcomplex, double, cd, zhemm_)
|
||||
EIGEN_BLAS_HEMM_R(scomplex, float, cf, chemm_)
|
||||
#endif
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
@@ -83,10 +83,10 @@ EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrd
|
||||
Scalar t3(0);
|
||||
Packet ptmp3 = pset1<Packet>(t3);
|
||||
|
||||
size_t starti = FirstTriangular ? 0 : j+2;
|
||||
size_t endi = FirstTriangular ? j : size;
|
||||
size_t alignedStart = (starti) + internal::first_default_aligned(&res[starti], endi-starti);
|
||||
size_t alignedEnd = alignedStart + ((endi-alignedStart)/(PacketSize))*(PacketSize);
|
||||
Index starti = FirstTriangular ? 0 : j+2;
|
||||
Index endi = FirstTriangular ? j : size;
|
||||
Index alignedStart = (starti) + internal::first_default_aligned(&res[starti], endi-starti);
|
||||
Index alignedEnd = alignedStart + ((endi-alignedStart)/(PacketSize))*(PacketSize);
|
||||
|
||||
res[j] += cjd.pmul(numext::real(A0[j]), t0);
|
||||
res[j+1] += cjd.pmul(numext::real(A1[j+1]), t1);
|
||||
@@ -101,7 +101,7 @@ EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrd
|
||||
t2 += cj1.pmul(A0[j+1], rhs[j+1]);
|
||||
}
|
||||
|
||||
for (size_t i=starti; i<alignedStart; ++i)
|
||||
for (Index i=starti; i<alignedStart; ++i)
|
||||
{
|
||||
res[i] += cj0.pmul(A0[i], t0) + cj0.pmul(A1[i],t1);
|
||||
t2 += cj1.pmul(A0[i], rhs[i]);
|
||||
@@ -113,7 +113,7 @@ EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrd
|
||||
const Scalar* EIGEN_RESTRICT a1It = A1 + alignedStart;
|
||||
const Scalar* EIGEN_RESTRICT rhsIt = rhs + alignedStart;
|
||||
Scalar* EIGEN_RESTRICT resIt = res + alignedStart;
|
||||
for (size_t i=alignedStart; i<alignedEnd; i+=PacketSize)
|
||||
for (Index i=alignedStart; i<alignedEnd; i+=PacketSize)
|
||||
{
|
||||
Packet A0i = ploadu<Packet>(a0It); a0It += PacketSize;
|
||||
Packet A1i = ploadu<Packet>(a1It); a1It += PacketSize;
|
||||
@@ -125,7 +125,7 @@ EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrd
|
||||
ptmp3 = pcj1.pmadd(A1i, Bi, ptmp3);
|
||||
pstore(resIt,Xi); resIt += PacketSize;
|
||||
}
|
||||
for (size_t i=alignedEnd; i<endi; i++)
|
||||
for (Index i=alignedEnd; i<endi; i++)
|
||||
{
|
||||
res[i] += cj0.pmul(A0[i], t0) + cj0.pmul(A1[i],t1);
|
||||
t2 += cj1.pmul(A0[i], rhs[i]);
|
||||
|
||||
@@ -95,14 +95,21 @@ const EIGTYPE* _rhs, EIGTYPE* res, EIGTYPE alpha) \
|
||||
x_tmp=map_x.conjugate(); \
|
||||
x_ptr=x_tmp.data(); \
|
||||
} else x_ptr=_rhs; \
|
||||
BLASFUNC(&uplo, &n, &numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, &numext::real_ref(beta), (BLASTYPE*)res, &incy); \
|
||||
BLASFUNC(&uplo, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &incy); \
|
||||
}\
|
||||
};
|
||||
|
||||
#ifdef EIGEN_USE_MKL
|
||||
EIGEN_BLAS_SYMV_SPECIALIZATION(double, double, dsymv)
|
||||
EIGEN_BLAS_SYMV_SPECIALIZATION(float, float, ssymv)
|
||||
EIGEN_BLAS_SYMV_SPECIALIZATION(dcomplex, MKL_Complex16, zhemv)
|
||||
EIGEN_BLAS_SYMV_SPECIALIZATION(scomplex, MKL_Complex8, chemv)
|
||||
#else
|
||||
EIGEN_BLAS_SYMV_SPECIALIZATION(double, double, dsymv_)
|
||||
EIGEN_BLAS_SYMV_SPECIALIZATION(float, float, ssymv_)
|
||||
EIGEN_BLAS_SYMV_SPECIALIZATION(dcomplex, double, zhemv_)
|
||||
EIGEN_BLAS_SYMV_SPECIALIZATION(scomplex, float, chemv_)
|
||||
#endif
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
|
||||
@@ -137,7 +137,13 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
|
||||
|
||||
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer;
|
||||
// To work around an "error: member reference base type 'Matrix<...>
|
||||
// (Eigen::internal::constructor_without_unaligned_array_assert (*)())' is
|
||||
// not a structure or union" compilation error in nvcc (tested V8.0.61),
|
||||
// create a dummy internal::constructor_without_unaligned_array_assert
|
||||
// object to pass to the Matrix constructor.
|
||||
internal::constructor_without_unaligned_array_assert a;
|
||||
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer(a);
|
||||
triangularBuffer.setZero();
|
||||
if((Mode&ZeroDiag)==ZeroDiag)
|
||||
triangularBuffer.diagonal().setZero();
|
||||
@@ -284,7 +290,8 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
|
||||
|
||||
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer;
|
||||
internal::constructor_without_unaligned_array_assert a;
|
||||
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer(a);
|
||||
triangularBuffer.setZero();
|
||||
if((Mode&ZeroDiag)==ZeroDiag)
|
||||
triangularBuffer.diagonal().setZero();
|
||||
@@ -393,7 +400,9 @@ struct triangular_product_impl<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
|
||||
{
|
||||
template<typename Dest> static void run(Dest& dst, const Lhs &a_lhs, const Rhs &a_rhs, const typename Dest::Scalar& alpha)
|
||||
{
|
||||
typedef typename Dest::Scalar Scalar;
|
||||
typedef typename Lhs::Scalar LhsScalar;
|
||||
typedef typename Rhs::Scalar RhsScalar;
|
||||
typedef typename Dest::Scalar Scalar;
|
||||
|
||||
typedef internal::blas_traits<Lhs> LhsBlasTraits;
|
||||
typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
|
||||
@@ -405,8 +414,9 @@ struct triangular_product_impl<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
|
||||
typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
|
||||
typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);
|
||||
|
||||
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs)
|
||||
* RhsBlasTraits::extractScalarFactor(a_rhs);
|
||||
LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(a_lhs);
|
||||
RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(a_rhs);
|
||||
Scalar actualAlpha = alpha * lhs_alpha * rhs_alpha;
|
||||
|
||||
typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
|
||||
Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,4> BlockingType;
|
||||
@@ -431,6 +441,21 @@ struct triangular_product_impl<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
|
||||
&dst.coeffRef(0,0), dst.outerStride(), // result info
|
||||
actualAlpha, blocking
|
||||
);
|
||||
|
||||
// Apply correction if the diagonal is unit and a scalar factor was nested:
|
||||
if ((Mode&UnitDiag)==UnitDiag)
|
||||
{
|
||||
if (LhsIsTriangular && lhs_alpha!=LhsScalar(1))
|
||||
{
|
||||
Index diagSize = (std::min)(lhs.rows(),lhs.cols());
|
||||
dst.topRows(diagSize) -= ((lhs_alpha-LhsScalar(1))*a_rhs).topRows(diagSize);
|
||||
}
|
||||
else if ((!LhsIsTriangular) && rhs_alpha!=RhsScalar(1))
|
||||
{
|
||||
Index diagSize = (std::min)(rhs.rows(),rhs.cols());
|
||||
dst.leftCols(diagSize) -= (rhs_alpha-RhsScalar(1))*a_lhs.leftCols(diagSize);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -75,7 +75,7 @@ EIGEN_BLAS_TRMM_SPECIALIZE(scomplex, true)
|
||||
EIGEN_BLAS_TRMM_SPECIALIZE(scomplex, false)
|
||||
|
||||
// implements col-major += alpha * op(triangular) * op(general)
|
||||
#define EIGEN_BLAS_TRMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
|
||||
#define EIGEN_BLAS_TRMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
|
||||
template <typename Index, int Mode, \
|
||||
int LhsStorageOrder, bool ConjugateLhs, \
|
||||
int RhsStorageOrder, bool ConjugateRhs> \
|
||||
@@ -172,7 +172,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
|
||||
} \
|
||||
/*std::cout << "TRMM_L: A is square! Go to BLAS TRMM implementation! \n";*/ \
|
||||
/* call ?trmm*/ \
|
||||
BLASPREFIX##trmm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \
|
||||
BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \
|
||||
\
|
||||
/* Add op(a_triangular)*b into res*/ \
|
||||
Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
|
||||
@@ -180,13 +180,20 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
|
||||
} \
|
||||
};
|
||||
|
||||
EIGEN_BLAS_TRMM_L(double, double, d, d)
|
||||
EIGEN_BLAS_TRMM_L(dcomplex, double, cd, z)
|
||||
EIGEN_BLAS_TRMM_L(float, float, f, s)
|
||||
EIGEN_BLAS_TRMM_L(scomplex, float, cf, c)
|
||||
#ifdef EIGEN_USE_MKL
|
||||
EIGEN_BLAS_TRMM_L(double, double, d, dtrmm)
|
||||
EIGEN_BLAS_TRMM_L(dcomplex, MKL_Complex16, cd, ztrmm)
|
||||
EIGEN_BLAS_TRMM_L(float, float, f, strmm)
|
||||
EIGEN_BLAS_TRMM_L(scomplex, MKL_Complex8, cf, ctrmm)
|
||||
#else
|
||||
EIGEN_BLAS_TRMM_L(double, double, d, dtrmm_)
|
||||
EIGEN_BLAS_TRMM_L(dcomplex, double, cd, ztrmm_)
|
||||
EIGEN_BLAS_TRMM_L(float, float, f, strmm_)
|
||||
EIGEN_BLAS_TRMM_L(scomplex, float, cf, ctrmm_)
|
||||
#endif
|
||||
|
||||
// implements col-major += alpha * op(general) * op(triangular)
|
||||
#define EIGEN_BLAS_TRMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
|
||||
#define EIGEN_BLAS_TRMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
|
||||
template <typename Index, int Mode, \
|
||||
int LhsStorageOrder, bool ConjugateLhs, \
|
||||
int RhsStorageOrder, bool ConjugateRhs> \
|
||||
@@ -282,7 +289,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
|
||||
} \
|
||||
/*std::cout << "TRMM_R: A is square! Go to BLAS TRMM implementation! \n";*/ \
|
||||
/* call ?trmm*/ \
|
||||
BLASPREFIX##trmm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \
|
||||
BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \
|
||||
\
|
||||
/* Add op(a_triangular)*b into res*/ \
|
||||
Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
|
||||
@@ -290,11 +297,17 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
|
||||
} \
|
||||
};
|
||||
|
||||
EIGEN_BLAS_TRMM_R(double, double, d, d)
|
||||
EIGEN_BLAS_TRMM_R(dcomplex, double, cd, z)
|
||||
EIGEN_BLAS_TRMM_R(float, float, f, s)
|
||||
EIGEN_BLAS_TRMM_R(scomplex, float, cf, c)
|
||||
|
||||
#ifdef EIGEN_USE_MKL
|
||||
EIGEN_BLAS_TRMM_R(double, double, d, dtrmm)
|
||||
EIGEN_BLAS_TRMM_R(dcomplex, MKL_Complex16, cd, ztrmm)
|
||||
EIGEN_BLAS_TRMM_R(float, float, f, strmm)
|
||||
EIGEN_BLAS_TRMM_R(scomplex, MKL_Complex8, cf, ctrmm)
|
||||
#else
|
||||
EIGEN_BLAS_TRMM_R(double, double, d, dtrmm_)
|
||||
EIGEN_BLAS_TRMM_R(dcomplex, double, cd, ztrmm_)
|
||||
EIGEN_BLAS_TRMM_R(float, float, f, strmm_)
|
||||
EIGEN_BLAS_TRMM_R(scomplex, float, cf, ctrmm_)
|
||||
#endif
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
@@ -221,8 +221,9 @@ template<int Mode> struct trmv_selector<Mode,ColMajor>
|
||||
typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
|
||||
typename internal::add_const_on_value_type<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
|
||||
|
||||
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
|
||||
* RhsBlasTraits::extractScalarFactor(rhs);
|
||||
LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(lhs);
|
||||
RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(rhs);
|
||||
ResScalar actualAlpha = alpha * lhs_alpha * rhs_alpha;
|
||||
|
||||
enum {
|
||||
// FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
|
||||
@@ -274,6 +275,12 @@ template<int Mode> struct trmv_selector<Mode,ColMajor>
|
||||
else
|
||||
dest = MappedDest(actualDestPtr, dest.size());
|
||||
}
|
||||
|
||||
if ( ((Mode&UnitDiag)==UnitDiag) && (lhs_alpha!=LhsScalar(1)) )
|
||||
{
|
||||
Index diagSize = (std::min)(lhs.rows(),lhs.cols());
|
||||
dest.head(diagSize) -= (lhs_alpha-LhsScalar(1))*rhs.head(diagSize);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@@ -295,8 +302,9 @@ template<int Mode> struct trmv_selector<Mode,RowMajor>
|
||||
typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
|
||||
typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
|
||||
|
||||
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
|
||||
* RhsBlasTraits::extractScalarFactor(rhs);
|
||||
LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(lhs);
|
||||
RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(rhs);
|
||||
ResScalar actualAlpha = alpha * lhs_alpha * rhs_alpha;
|
||||
|
||||
enum {
|
||||
DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1
|
||||
@@ -326,6 +334,12 @@ template<int Mode> struct trmv_selector<Mode,RowMajor>
|
||||
actualRhsPtr,1,
|
||||
dest.data(),dest.innerStride(),
|
||||
actualAlpha);
|
||||
|
||||
if ( ((Mode&UnitDiag)==UnitDiag) && (lhs_alpha!=LhsScalar(1)) )
|
||||
{
|
||||
Index diagSize = (std::min)(lhs.rows(),lhs.cols());
|
||||
dest.head(diagSize) -= (lhs_alpha-LhsScalar(1))*rhs.head(diagSize);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -71,7 +71,7 @@ EIGEN_BLAS_TRMV_SPECIALIZE(dcomplex)
|
||||
EIGEN_BLAS_TRMV_SPECIALIZE(scomplex)
|
||||
|
||||
// implements col-major: res += alpha * op(triangular) * vector
|
||||
#define EIGEN_BLAS_TRMV_CM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
|
||||
#define EIGEN_BLAS_TRMV_CM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX, BLASPOSTFIX) \
|
||||
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
|
||||
struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor> { \
|
||||
enum { \
|
||||
@@ -121,10 +121,10 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
|
||||
diag = IsUnitDiag ? 'U' : 'N'; \
|
||||
\
|
||||
/* call ?TRMV*/ \
|
||||
BLASPREFIX##trmv_(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \
|
||||
BLASPREFIX##trmv##BLASPOSTFIX(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \
|
||||
\
|
||||
/* Add op(a_tr)rhs into res*/ \
|
||||
BLASPREFIX##axpy_(&n, &numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \
|
||||
BLASPREFIX##axpy##BLASPOSTFIX(&n, (const BLASTYPE*)&numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \
|
||||
/* Non-square case - doesn't fit to BLAS ?TRMV. Fall to default triangular product*/ \
|
||||
if (size<(std::max)(rows,cols)) { \
|
||||
if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
|
||||
@@ -142,18 +142,25 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
|
||||
m = convert_index<BlasIndex>(size); \
|
||||
n = convert_index<BlasIndex>(cols-size); \
|
||||
} \
|
||||
BLASPREFIX##gemv_(&trans, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, &numext::real_ref(beta), (BLASTYPE*)y, &incy); \
|
||||
BLASPREFIX##gemv##BLASPOSTFIX(&trans, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)y, &incy); \
|
||||
} \
|
||||
} \
|
||||
};
|
||||
|
||||
EIGEN_BLAS_TRMV_CM(double, double, d, d)
|
||||
EIGEN_BLAS_TRMV_CM(dcomplex, double, cd, z)
|
||||
EIGEN_BLAS_TRMV_CM(float, float, f, s)
|
||||
EIGEN_BLAS_TRMV_CM(scomplex, float, cf, c)
|
||||
#ifdef EIGEN_USE_MKL
|
||||
EIGEN_BLAS_TRMV_CM(double, double, d, d,)
|
||||
EIGEN_BLAS_TRMV_CM(dcomplex, MKL_Complex16, cd, z,)
|
||||
EIGEN_BLAS_TRMV_CM(float, float, f, s,)
|
||||
EIGEN_BLAS_TRMV_CM(scomplex, MKL_Complex8, cf, c,)
|
||||
#else
|
||||
EIGEN_BLAS_TRMV_CM(double, double, d, d, _)
|
||||
EIGEN_BLAS_TRMV_CM(dcomplex, double, cd, z, _)
|
||||
EIGEN_BLAS_TRMV_CM(float, float, f, s, _)
|
||||
EIGEN_BLAS_TRMV_CM(scomplex, float, cf, c, _)
|
||||
#endif
|
||||
|
||||
// implements row-major: res += alpha * op(triangular) * vector
|
||||
#define EIGEN_BLAS_TRMV_RM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
|
||||
#define EIGEN_BLAS_TRMV_RM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX, BLASPOSTFIX) \
|
||||
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
|
||||
struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor> { \
|
||||
enum { \
|
||||
@@ -203,10 +210,10 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
|
||||
diag = IsUnitDiag ? 'U' : 'N'; \
|
||||
\
|
||||
/* call ?TRMV*/ \
|
||||
BLASPREFIX##trmv_(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \
|
||||
BLASPREFIX##trmv##BLASPOSTFIX(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \
|
||||
\
|
||||
/* Add op(a_tr)rhs into res*/ \
|
||||
BLASPREFIX##axpy_(&n, &numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \
|
||||
BLASPREFIX##axpy##BLASPOSTFIX(&n, (const BLASTYPE*)&numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \
|
||||
/* Non-square case - doesn't fit to BLAS ?TRMV. Fall to default triangular product*/ \
|
||||
if (size<(std::max)(rows,cols)) { \
|
||||
if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
|
||||
@@ -224,15 +231,22 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
|
||||
m = convert_index<BlasIndex>(size); \
|
||||
n = convert_index<BlasIndex>(cols-size); \
|
||||
} \
|
||||
BLASPREFIX##gemv_(&trans, &n, &m, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, &numext::real_ref(beta), (BLASTYPE*)y, &incy); \
|
||||
BLASPREFIX##gemv##BLASPOSTFIX(&trans, &n, &m, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)y, &incy); \
|
||||
} \
|
||||
} \
|
||||
};
|
||||
|
||||
EIGEN_BLAS_TRMV_RM(double, double, d, d)
|
||||
EIGEN_BLAS_TRMV_RM(dcomplex, double, cd, z)
|
||||
EIGEN_BLAS_TRMV_RM(float, float, f, s)
|
||||
EIGEN_BLAS_TRMV_RM(scomplex, float, cf, c)
|
||||
#ifdef EIGEN_USE_MKL
|
||||
EIGEN_BLAS_TRMV_RM(double, double, d, d,)
|
||||
EIGEN_BLAS_TRMV_RM(dcomplex, MKL_Complex16, cd, z,)
|
||||
EIGEN_BLAS_TRMV_RM(float, float, f, s,)
|
||||
EIGEN_BLAS_TRMV_RM(scomplex, MKL_Complex8, cf, c,)
|
||||
#else
|
||||
EIGEN_BLAS_TRMV_RM(double, double, d, d,_)
|
||||
EIGEN_BLAS_TRMV_RM(dcomplex, double, cd, z,_)
|
||||
EIGEN_BLAS_TRMV_RM(float, float, f, s,_)
|
||||
EIGEN_BLAS_TRMV_RM(scomplex, float, cf, c,_)
|
||||
#endif
|
||||
|
||||
} // end namespase internal
|
||||
|
||||
|
||||
@@ -183,7 +183,7 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conju
|
||||
}
|
||||
}
|
||||
|
||||
/* Optimized triangular solver with multiple left hand sides and the trinagular matrix on the right
|
||||
/* Optimized triangular solver with multiple left hand sides and the triangular matrix on the right
|
||||
*/
|
||||
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder>
|
||||
struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor>
|
||||
@@ -202,6 +202,7 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conj
|
||||
level3_blocking<Scalar,Scalar>& blocking)
|
||||
{
|
||||
Index rows = otherSize;
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
|
||||
typedef blas_data_mapper<Scalar, Index, ColMajor> LhsMapper;
|
||||
typedef const_blas_data_mapper<Scalar, Index, TriStorageOrder> RhsMapper;
|
||||
@@ -306,9 +307,9 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conj
|
||||
}
|
||||
if((Mode & UnitDiag)==0)
|
||||
{
|
||||
Scalar b = conj(rhs(j,j));
|
||||
Scalar inv_rjj = RealScalar(1)/conj(rhs(j,j));
|
||||
for (Index i=0; i<actual_mc; ++i)
|
||||
r[i] /= b;
|
||||
r[i] *= inv_rjj;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -38,7 +38,7 @@ namespace Eigen {
|
||||
namespace internal {
|
||||
|
||||
// implements LeftSide op(triangular)^-1 * general
|
||||
#define EIGEN_BLAS_TRSM_L(EIGTYPE, BLASTYPE, BLASPREFIX) \
|
||||
#define EIGEN_BLAS_TRSM_L(EIGTYPE, BLASTYPE, BLASFUNC) \
|
||||
template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
|
||||
struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor> \
|
||||
{ \
|
||||
@@ -80,18 +80,24 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorage
|
||||
} \
|
||||
if (IsUnitDiag) diag='U'; \
|
||||
/* call ?trsm*/ \
|
||||
BLASPREFIX##trsm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \
|
||||
BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \
|
||||
} \
|
||||
};
|
||||
|
||||
EIGEN_BLAS_TRSM_L(double, double, d)
|
||||
EIGEN_BLAS_TRSM_L(dcomplex, double, z)
|
||||
EIGEN_BLAS_TRSM_L(float, float, s)
|
||||
EIGEN_BLAS_TRSM_L(scomplex, float, c)
|
||||
|
||||
#ifdef EIGEN_USE_MKL
|
||||
EIGEN_BLAS_TRSM_L(double, double, dtrsm)
|
||||
EIGEN_BLAS_TRSM_L(dcomplex, MKL_Complex16, ztrsm)
|
||||
EIGEN_BLAS_TRSM_L(float, float, strsm)
|
||||
EIGEN_BLAS_TRSM_L(scomplex, MKL_Complex8, ctrsm)
|
||||
#else
|
||||
EIGEN_BLAS_TRSM_L(double, double, dtrsm_)
|
||||
EIGEN_BLAS_TRSM_L(dcomplex, double, ztrsm_)
|
||||
EIGEN_BLAS_TRSM_L(float, float, strsm_)
|
||||
EIGEN_BLAS_TRSM_L(scomplex, float, ctrsm_)
|
||||
#endif
|
||||
|
||||
// implements RightSide general * op(triangular)^-1
|
||||
#define EIGEN_BLAS_TRSM_R(EIGTYPE, BLASTYPE, BLASPREFIX) \
|
||||
#define EIGEN_BLAS_TRSM_R(EIGTYPE, BLASTYPE, BLASFUNC) \
|
||||
template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
|
||||
struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor> \
|
||||
{ \
|
||||
@@ -133,16 +139,22 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorag
|
||||
} \
|
||||
if (IsUnitDiag) diag='U'; \
|
||||
/* call ?trsm*/ \
|
||||
BLASPREFIX##trsm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \
|
||||
BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \
|
||||
/*std::cout << "TRMS_L specialization!\n";*/ \
|
||||
} \
|
||||
};
|
||||
|
||||
EIGEN_BLAS_TRSM_R(double, double, d)
|
||||
EIGEN_BLAS_TRSM_R(dcomplex, double, z)
|
||||
EIGEN_BLAS_TRSM_R(float, float, s)
|
||||
EIGEN_BLAS_TRSM_R(scomplex, float, c)
|
||||
|
||||
#ifdef EIGEN_USE_MKL
|
||||
EIGEN_BLAS_TRSM_R(double, double, dtrsm)
|
||||
EIGEN_BLAS_TRSM_R(dcomplex, MKL_Complex16, ztrsm)
|
||||
EIGEN_BLAS_TRSM_R(float, float, strsm)
|
||||
EIGEN_BLAS_TRSM_R(scomplex, MKL_Complex8, ctrsm)
|
||||
#else
|
||||
EIGEN_BLAS_TRSM_R(double, double, dtrsm_)
|
||||
EIGEN_BLAS_TRSM_R(dcomplex, double, ztrsm_)
|
||||
EIGEN_BLAS_TRSM_R(float, float, strsm_)
|
||||
EIGEN_BLAS_TRSM_R(scomplex, float, ctrsm_)
|
||||
#endif
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
|
||||
@@ -43,12 +43,20 @@
|
||||
#endif
|
||||
#pragma clang diagnostic ignored "-Wconstant-logical-operand"
|
||||
|
||||
#elif defined __GNUC__ && __GNUC__>=6
|
||||
#elif defined __GNUC__
|
||||
|
||||
#ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
|
||||
#if (!defined(EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS)) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))
|
||||
#pragma GCC diagnostic push
|
||||
#endif
|
||||
#pragma GCC diagnostic ignored "-Wignored-attributes"
|
||||
// g++ warns about local variables shadowing member functions, which is too strict
|
||||
#pragma GCC diagnostic ignored "-Wshadow"
|
||||
#if __GNUC__ == 4 && __GNUC_MINOR__ < 8
|
||||
// Until g++-4.7 there are warnings when comparing unsigned int vs 0, even in templated functions:
|
||||
#pragma GCC diagnostic ignored "-Wtype-limits"
|
||||
#endif
|
||||
#if __GNUC__>=6
|
||||
#pragma GCC diagnostic ignored "-Wignored-attributes"
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@@ -49,10 +49,11 @@
|
||||
#define EIGEN_USE_LAPACKE
|
||||
#endif
|
||||
|
||||
#if defined(EIGEN_USE_MKL_VML)
|
||||
#if defined(EIGEN_USE_MKL_VML) && !defined(EIGEN_USE_MKL)
|
||||
#define EIGEN_USE_MKL
|
||||
#endif
|
||||
|
||||
|
||||
#if defined EIGEN_USE_MKL
|
||||
# include <mkl.h>
|
||||
/*Check IMKL version for compatibility: < 10.3 is not usable with Eigen*/
|
||||
@@ -108,6 +109,10 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(EIGEN_USE_BLAS) && !defined(EIGEN_USE_MKL)
|
||||
#include "../../misc/blas.h"
|
||||
#endif
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
typedef std::complex<double> dcomplex;
|
||||
@@ -121,8 +126,5 @@ typedef int BlasIndex;
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#if defined(EIGEN_USE_BLAS)
|
||||
#include "../../misc/blas.h"
|
||||
#endif
|
||||
|
||||
#endif // EIGEN_MKL_SUPPORT_H
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
|
||||
#define EIGEN_WORLD_VERSION 3
|
||||
#define EIGEN_MAJOR_VERSION 3
|
||||
#define EIGEN_MINOR_VERSION 0
|
||||
#define EIGEN_MINOR_VERSION 7
|
||||
|
||||
#define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
|
||||
(EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
|
||||
@@ -80,8 +80,8 @@
|
||||
// 2015 14 1900
|
||||
// "15" 15 1900
|
||||
|
||||
/// \internal EIGEN_COMP_MSVC_STRICT set to 1 if the compiler is really Microsoft Visual C++ and not ,e.g., ICC
|
||||
#if EIGEN_COMP_MSVC && !(EIGEN_COMP_ICC)
|
||||
/// \internal EIGEN_COMP_MSVC_STRICT set to 1 if the compiler is really Microsoft Visual C++ and not ,e.g., ICC or clang-cl
|
||||
#if EIGEN_COMP_MSVC && !(EIGEN_COMP_ICC || EIGEN_COMP_LLVM || EIGEN_COMP_CLANG)
|
||||
#define EIGEN_COMP_MSVC_STRICT _MSC_VER
|
||||
#else
|
||||
#define EIGEN_COMP_MSVC_STRICT 0
|
||||
@@ -356,7 +356,7 @@
|
||||
#define EIGEN_MAX_CPP_VER 99
|
||||
#endif
|
||||
|
||||
#if EIGEN_MAX_CPP_VER>=11 && defined(__cplusplus) && (__cplusplus >= 201103L)
|
||||
#if EIGEN_MAX_CPP_VER>=11 && (defined(__cplusplus) && (__cplusplus >= 201103L) || EIGEN_COMP_MSVC >= 1900)
|
||||
#define EIGEN_HAS_CXX11 1
|
||||
#else
|
||||
#define EIGEN_HAS_CXX11 0
|
||||
@@ -399,7 +399,7 @@
|
||||
// Does the compiler support variadic templates?
|
||||
#ifndef EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
#if EIGEN_MAX_CPP_VER>=11 && (__cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900) \
|
||||
&& ( !defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000) )
|
||||
&& (!defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (EIGEN_CUDACC_VER >= 80000) )
|
||||
// ^^ Disable the use of variadic templates when compiling with versions of nvcc older than 8.0 on ARM devices:
|
||||
// this prevents nvcc from crashing when compiling Eigen on Tegra X1
|
||||
#define EIGEN_HAS_VARIADIC_TEMPLATES 1
|
||||
@@ -413,7 +413,7 @@
|
||||
|
||||
#ifdef __CUDACC__
|
||||
// Const expressions are supported provided that c++11 is enabled and we're using either clang or nvcc 7.5 or above
|
||||
#if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && defined(__CUDACC_VER__) && (EIGEN_COMP_CLANG || __CUDACC_VER__ >= 70500))
|
||||
#if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && (EIGEN_COMP_CLANG || EIGEN_CUDACC_VER >= 70500))
|
||||
#define EIGEN_HAS_CONSTEXPR 1
|
||||
#endif
|
||||
#elif EIGEN_MAX_CPP_VER>=14 && (__has_feature(cxx_relaxed_constexpr) || (defined(__cplusplus) && __cplusplus >= 201402L) || \
|
||||
@@ -487,20 +487,23 @@
|
||||
// EIGEN_STRONG_INLINE is a stronger version of the inline, using __forceinline on MSVC,
|
||||
// but it still doesn't use GCC's always_inline. This is useful in (common) situations where MSVC needs forceinline
|
||||
// but GCC is still doing fine with just inline.
|
||||
#ifndef EIGEN_STRONG_INLINE
|
||||
#if EIGEN_COMP_MSVC || EIGEN_COMP_ICC
|
||||
#define EIGEN_STRONG_INLINE __forceinline
|
||||
#else
|
||||
#define EIGEN_STRONG_INLINE inline
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// EIGEN_ALWAYS_INLINE is the stronget, it has the effect of making the function inline and adding every possible
|
||||
// attribute to maximize inlining. This should only be used when really necessary: in particular,
|
||||
// it uses __attribute__((always_inline)) on GCC, which most of the time is useless and can severely harm compile times.
|
||||
// FIXME with the always_inline attribute,
|
||||
// gcc 3.4.x reports the following compilation error:
|
||||
// gcc 3.4.x and 4.1 reports the following compilation error:
|
||||
// Eval.h:91: sorry, unimplemented: inlining failed in call to 'const Eigen::Eval<Derived> Eigen::MatrixBase<Scalar, Derived>::eval() const'
|
||||
// : function body not available
|
||||
#if EIGEN_GNUC_AT_LEAST(4,0)
|
||||
// See also bug 1367
|
||||
#if EIGEN_GNUC_AT_LEAST(4,2)
|
||||
#define EIGEN_ALWAYS_INLINE __attribute__((always_inline)) inline
|
||||
#else
|
||||
#define EIGEN_ALWAYS_INLINE EIGEN_STRONG_INLINE
|
||||
@@ -811,7 +814,8 @@ namespace Eigen {
|
||||
// just an empty macro !
|
||||
#define EIGEN_EMPTY
|
||||
|
||||
#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || __CUDACC_VER__) // for older MSVC versions, as well as 1900 && CUDA 8, using the base operator is sufficient (cf Bugs 1000, 1324)
|
||||
#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || EIGEN_CUDACC_VER>0)
|
||||
// for older MSVC versions, as well as 1900 && CUDA 8, using the base operator is sufficient (cf Bugs 1000, 1324)
|
||||
#define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
|
||||
using Base::operator =;
|
||||
#elif EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653)
|
||||
@@ -985,7 +989,13 @@ namespace Eigen {
|
||||
# define EIGEN_NOEXCEPT
|
||||
# define EIGEN_NOEXCEPT_IF(x)
|
||||
# define EIGEN_NO_THROW throw()
|
||||
# define EIGEN_EXCEPTION_SPEC(X) throw(X)
|
||||
# if EIGEN_COMP_MSVC
|
||||
// MSVC does not support exception specifications (warning C4290),
|
||||
// and they are deprecated in c++11 anyway.
|
||||
# define EIGEN_EXCEPTION_SPEC(X) throw()
|
||||
# else
|
||||
# define EIGEN_EXCEPTION_SPEC(X) throw(X)
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#endif // EIGEN_MACROS_H
|
||||
|
||||
@@ -70,7 +70,7 @@ inline void throw_std_bad_alloc()
|
||||
throw std::bad_alloc();
|
||||
#else
|
||||
std::size_t huge = static_cast<std::size_t>(-1);
|
||||
new int[huge];
|
||||
::operator new(huge);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -150,7 +150,7 @@ EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
|
||||
/** \internal Allocates \a size bytes. The returned pointer is guaranteed to have 16 or 32 bytes alignment depending on the requirements.
|
||||
* On allocation error, the returned pointer is null, and std::bad_alloc is thrown.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC inline void* aligned_malloc(size_t size)
|
||||
EIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size)
|
||||
{
|
||||
check_that_malloc_is_allowed();
|
||||
|
||||
@@ -185,7 +185,7 @@ EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr)
|
||||
* \brief Reallocates an aligned block of memory.
|
||||
* \throws std::bad_alloc on allocation failure
|
||||
*/
|
||||
inline void* aligned_realloc(void *ptr, size_t new_size, size_t old_size)
|
||||
inline void* aligned_realloc(void *ptr, std::size_t new_size, std::size_t old_size)
|
||||
{
|
||||
EIGEN_UNUSED_VARIABLE(old_size);
|
||||
|
||||
@@ -209,12 +209,12 @@ inline void* aligned_realloc(void *ptr, size_t new_size, size_t old_size)
|
||||
/** \internal Allocates \a size bytes. If Align is true, then the returned ptr is 16-byte-aligned.
|
||||
* On allocation error, the returned pointer is null, and a std::bad_alloc is thrown.
|
||||
*/
|
||||
template<bool Align> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc(size_t size)
|
||||
template<bool Align> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc(std::size_t size)
|
||||
{
|
||||
return aligned_malloc(size);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc<false>(size_t size)
|
||||
template<> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc<false>(std::size_t size)
|
||||
{
|
||||
check_that_malloc_is_allowed();
|
||||
|
||||
@@ -235,12 +235,12 @@ template<> EIGEN_DEVICE_FUNC inline void conditional_aligned_free<false>(void *p
|
||||
std::free(ptr);
|
||||
}
|
||||
|
||||
template<bool Align> inline void* conditional_aligned_realloc(void* ptr, size_t new_size, size_t old_size)
|
||||
template<bool Align> inline void* conditional_aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size)
|
||||
{
|
||||
return aligned_realloc(ptr, new_size, old_size);
|
||||
}
|
||||
|
||||
template<> inline void* conditional_aligned_realloc<false>(void* ptr, size_t new_size, size_t)
|
||||
template<> inline void* conditional_aligned_realloc<false>(void* ptr, std::size_t new_size, std::size_t)
|
||||
{
|
||||
return std::realloc(ptr, new_size);
|
||||
}
|
||||
@@ -252,7 +252,7 @@ template<> inline void* conditional_aligned_realloc<false>(void* ptr, size_t new
|
||||
/** \internal Destructs the elements of an array.
|
||||
* The \a size parameters tells on how many objects to call the destructor of T.
|
||||
*/
|
||||
template<typename T> EIGEN_DEVICE_FUNC inline void destruct_elements_of_array(T *ptr, size_t size)
|
||||
template<typename T> EIGEN_DEVICE_FUNC inline void destruct_elements_of_array(T *ptr, std::size_t size)
|
||||
{
|
||||
// always destruct an array starting from the end.
|
||||
if(ptr)
|
||||
@@ -262,9 +262,9 @@ template<typename T> EIGEN_DEVICE_FUNC inline void destruct_elements_of_array(T
|
||||
/** \internal Constructs the elements of an array.
|
||||
* The \a size parameter tells on how many objects to call the constructor of T.
|
||||
*/
|
||||
template<typename T> EIGEN_DEVICE_FUNC inline T* construct_elements_of_array(T *ptr, size_t size)
|
||||
template<typename T> EIGEN_DEVICE_FUNC inline T* construct_elements_of_array(T *ptr, std::size_t size)
|
||||
{
|
||||
size_t i;
|
||||
std::size_t i;
|
||||
EIGEN_TRY
|
||||
{
|
||||
for (i = 0; i < size; ++i) ::new (ptr + i) T;
|
||||
@@ -283,9 +283,9 @@ template<typename T> EIGEN_DEVICE_FUNC inline T* construct_elements_of_array(T *
|
||||
*****************************************************************************/
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void check_size_for_overflow(size_t size)
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void check_size_for_overflow(std::size_t size)
|
||||
{
|
||||
if(size > size_t(-1) / sizeof(T))
|
||||
if(size > std::size_t(-1) / sizeof(T))
|
||||
throw_std_bad_alloc();
|
||||
}
|
||||
|
||||
@@ -293,7 +293,7 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void check_size_for_overflow(size_t size)
|
||||
* On allocation error, the returned pointer is undefined, but a std::bad_alloc is thrown.
|
||||
* The default constructor of T is called.
|
||||
*/
|
||||
template<typename T> EIGEN_DEVICE_FUNC inline T* aligned_new(size_t size)
|
||||
template<typename T> EIGEN_DEVICE_FUNC inline T* aligned_new(std::size_t size)
|
||||
{
|
||||
check_size_for_overflow<T>(size);
|
||||
T *result = reinterpret_cast<T*>(aligned_malloc(sizeof(T)*size));
|
||||
@@ -309,7 +309,7 @@ template<typename T> EIGEN_DEVICE_FUNC inline T* aligned_new(size_t size)
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_new(size_t size)
|
||||
template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_new(std::size_t size)
|
||||
{
|
||||
check_size_for_overflow<T>(size);
|
||||
T *result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size));
|
||||
@@ -328,7 +328,7 @@ template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned
|
||||
/** \internal Deletes objects constructed with aligned_new
|
||||
* The \a size parameters tells on how many objects to call the destructor of T.
|
||||
*/
|
||||
template<typename T> EIGEN_DEVICE_FUNC inline void aligned_delete(T *ptr, size_t size)
|
||||
template<typename T> EIGEN_DEVICE_FUNC inline void aligned_delete(T *ptr, std::size_t size)
|
||||
{
|
||||
destruct_elements_of_array<T>(ptr, size);
|
||||
aligned_free(ptr);
|
||||
@@ -337,13 +337,13 @@ template<typename T> EIGEN_DEVICE_FUNC inline void aligned_delete(T *ptr, size_t
|
||||
/** \internal Deletes objects constructed with conditional_aligned_new
|
||||
* The \a size parameters tells on how many objects to call the destructor of T.
|
||||
*/
|
||||
template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_delete(T *ptr, size_t size)
|
||||
template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_delete(T *ptr, std::size_t size)
|
||||
{
|
||||
destruct_elements_of_array<T>(ptr, size);
|
||||
conditional_aligned_free<Align>(ptr);
|
||||
}
|
||||
|
||||
template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_realloc_new(T* pts, size_t new_size, size_t old_size)
|
||||
template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_realloc_new(T* pts, std::size_t new_size, std::size_t old_size)
|
||||
{
|
||||
check_size_for_overflow<T>(new_size);
|
||||
check_size_for_overflow<T>(old_size);
|
||||
@@ -366,7 +366,7 @@ template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned
|
||||
}
|
||||
|
||||
|
||||
template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_new_auto(size_t size)
|
||||
template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_new_auto(std::size_t size)
|
||||
{
|
||||
if(size==0)
|
||||
return 0; // short-cut. Also fixes Bug 884
|
||||
@@ -387,7 +387,7 @@ template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename T, bool Align> inline T* conditional_aligned_realloc_new_auto(T* pts, size_t new_size, size_t old_size)
|
||||
template<typename T, bool Align> inline T* conditional_aligned_realloc_new_auto(T* pts, std::size_t new_size, std::size_t old_size)
|
||||
{
|
||||
check_size_for_overflow<T>(new_size);
|
||||
check_size_for_overflow<T>(old_size);
|
||||
@@ -409,7 +409,7 @@ template<typename T, bool Align> inline T* conditional_aligned_realloc_new_auto(
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_delete_auto(T *ptr, size_t size)
|
||||
template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_delete_auto(T *ptr, std::size_t size)
|
||||
{
|
||||
if(NumTraits<T>::RequireInitialization)
|
||||
destruct_elements_of_array<T>(ptr, size);
|
||||
@@ -493,7 +493,7 @@ template<typename T> struct smart_copy_helper<T,true> {
|
||||
IntPtr size = IntPtr(end)-IntPtr(start);
|
||||
if(size==0) return;
|
||||
eigen_internal_assert(start!=0 && end!=0 && target!=0);
|
||||
memcpy(target, start, size);
|
||||
std::memcpy(target, start, size);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -523,7 +523,7 @@ template<typename T> struct smart_memmove_helper<T,true> {
|
||||
template<typename T> struct smart_memmove_helper<T,false> {
|
||||
static inline void run(const T* start, const T* end, T* target)
|
||||
{
|
||||
if (uintptr_t(target) < uintptr_t(start))
|
||||
if (UIntPtr(target) < UIntPtr(start))
|
||||
{
|
||||
std::copy(start, end, target);
|
||||
}
|
||||
@@ -561,7 +561,7 @@ template<typename T> class aligned_stack_memory_handler : noncopyable
|
||||
* In this case, the buffer elements will also be destructed when this handler will be destructed.
|
||||
* Finally, if \a dealloc is true, then the pointer \a ptr is freed.
|
||||
**/
|
||||
aligned_stack_memory_handler(T* ptr, size_t size, bool dealloc)
|
||||
aligned_stack_memory_handler(T* ptr, std::size_t size, bool dealloc)
|
||||
: m_ptr(ptr), m_size(size), m_deallocate(dealloc)
|
||||
{
|
||||
if(NumTraits<T>::RequireInitialization && m_ptr)
|
||||
@@ -576,7 +576,7 @@ template<typename T> class aligned_stack_memory_handler : noncopyable
|
||||
}
|
||||
protected:
|
||||
T* m_ptr;
|
||||
size_t m_size;
|
||||
std::size_t m_size;
|
||||
bool m_deallocate;
|
||||
};
|
||||
|
||||
@@ -655,15 +655,15 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
|
||||
|
||||
#if EIGEN_MAX_ALIGN_BYTES!=0
|
||||
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
|
||||
void* operator new(size_t size, const std::nothrow_t&) EIGEN_NO_THROW { \
|
||||
void* operator new(std::size_t size, const std::nothrow_t&) EIGEN_NO_THROW { \
|
||||
EIGEN_TRY { return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); } \
|
||||
EIGEN_CATCH (...) { return 0; } \
|
||||
}
|
||||
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \
|
||||
void *operator new(size_t size) { \
|
||||
void *operator new(std::size_t size) { \
|
||||
return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
|
||||
} \
|
||||
void *operator new[](size_t size) { \
|
||||
void *operator new[](std::size_t size) { \
|
||||
return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
|
||||
} \
|
||||
void operator delete(void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
|
||||
@@ -673,8 +673,8 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
|
||||
/* in-place new and delete. since (at least afaik) there is no actual */ \
|
||||
/* memory allocated we can safely let the default implementation handle */ \
|
||||
/* this particular case. */ \
|
||||
static void *operator new(size_t size, void *ptr) { return ::operator new(size,ptr); } \
|
||||
static void *operator new[](size_t size, void* ptr) { return ::operator new[](size,ptr); } \
|
||||
static void *operator new(std::size_t size, void *ptr) { return ::operator new(size,ptr); } \
|
||||
static void *operator new[](std::size_t size, void* ptr) { return ::operator new[](size,ptr); } \
|
||||
void operator delete(void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete(memory,ptr); } \
|
||||
void operator delete[](void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete[](memory,ptr); } \
|
||||
/* nothrow-new (returns zero instead of std::bad_alloc) */ \
|
||||
@@ -696,7 +696,15 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
|
||||
/** \class aligned_allocator
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief STL compatible allocator to use with with 16 byte aligned types
|
||||
* \brief STL compatible allocator to use with types requiring a non standrad alignment.
|
||||
*
|
||||
* The memory is aligned as for dynamically aligned matrix/array types such as MatrixXd.
|
||||
* By default, it will thus provide at least 16 bytes alignment and more in following cases:
|
||||
* - 32 bytes alignment if AVX is enabled.
|
||||
* - 64 bytes alignment if AVX512 is enabled.
|
||||
*
|
||||
* This can be controled using the \c EIGEN_MAX_ALIGN_BYTES macro as documented
|
||||
* \link TopicPreprocessorDirectivesPerformance there \endlink.
|
||||
*
|
||||
* Example:
|
||||
* \code
|
||||
@@ -713,7 +721,7 @@ template<class T>
|
||||
class aligned_allocator : public std::allocator<T>
|
||||
{
|
||||
public:
|
||||
typedef size_t size_type;
|
||||
typedef std::size_t size_type;
|
||||
typedef std::ptrdiff_t difference_type;
|
||||
typedef T* pointer;
|
||||
typedef const T* const_pointer;
|
||||
@@ -739,7 +747,15 @@ public:
|
||||
pointer allocate(size_type num, const void* /*hint*/ = 0)
|
||||
{
|
||||
internal::check_size_for_overflow<T>(num);
|
||||
return static_cast<pointer>( internal::aligned_malloc(num * sizeof(T)) );
|
||||
size_type size = num * sizeof(T);
|
||||
#if EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_LEAST(7,0)
|
||||
// workaround gcc bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87544
|
||||
// It triggered eigen/Eigen/src/Core/util/Memory.h:189:12: warning: argument 1 value '18446744073709551612' exceeds maximum object size 9223372036854775807
|
||||
if(size>=std::size_t((std::numeric_limits<std::ptrdiff_t>::max)()))
|
||||
return 0;
|
||||
else
|
||||
#endif
|
||||
return static_cast<pointer>( internal::aligned_malloc(size) );
|
||||
}
|
||||
|
||||
void deallocate(pointer p, size_type /*num*/)
|
||||
|
||||
@@ -109,6 +109,28 @@ template<> struct is_integral<unsigned int> { enum { value = true }; };
|
||||
template<> struct is_integral<signed long> { enum { value = true }; };
|
||||
template<> struct is_integral<unsigned long> { enum { value = true }; };
|
||||
|
||||
#if EIGEN_HAS_CXX11
|
||||
using std::make_unsigned;
|
||||
#else
|
||||
// TODO: Possibly improve this implementation of make_unsigned.
|
||||
// It is currently used only by
|
||||
// template<typename Scalar> struct random_default_impl<Scalar, false, true>.
|
||||
template<typename> struct make_unsigned;
|
||||
template<> struct make_unsigned<char> { typedef unsigned char type; };
|
||||
template<> struct make_unsigned<signed char> { typedef unsigned char type; };
|
||||
template<> struct make_unsigned<unsigned char> { typedef unsigned char type; };
|
||||
template<> struct make_unsigned<signed short> { typedef unsigned short type; };
|
||||
template<> struct make_unsigned<unsigned short> { typedef unsigned short type; };
|
||||
template<> struct make_unsigned<signed int> { typedef unsigned int type; };
|
||||
template<> struct make_unsigned<unsigned int> { typedef unsigned int type; };
|
||||
template<> struct make_unsigned<signed long> { typedef unsigned long type; };
|
||||
template<> struct make_unsigned<unsigned long> { typedef unsigned long type; };
|
||||
#if EIGEN_COMP_MSVC
|
||||
template<> struct make_unsigned<signed __int64> { typedef unsigned __int64 type; };
|
||||
template<> struct make_unsigned<unsigned __int64> { typedef unsigned __int64 type; };
|
||||
#endif
|
||||
#endif
|
||||
|
||||
template <typename T> struct add_const { typedef const T type; };
|
||||
template <typename T> struct add_const<T&> { typedef T& type; };
|
||||
|
||||
@@ -381,12 +403,12 @@ struct has_ReturnType
|
||||
enum { value = sizeof(testFunctor<T>(0)) == sizeof(meta_yes) };
|
||||
};
|
||||
|
||||
template<typename T> const T& return_ref();
|
||||
template<typename T> const T* return_ptr();
|
||||
|
||||
template <typename T, typename IndexType=Index>
|
||||
struct has_nullary_operator
|
||||
{
|
||||
template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ref<C>().operator()())>0)>::type * = 0);
|
||||
template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ptr<C>()->operator()())>0)>::type * = 0);
|
||||
static meta_no testFunctor(...);
|
||||
|
||||
enum { value = sizeof(testFunctor(static_cast<T*>(0))) == sizeof(meta_yes) };
|
||||
@@ -395,7 +417,7 @@ struct has_nullary_operator
|
||||
template <typename T, typename IndexType=Index>
|
||||
struct has_unary_operator
|
||||
{
|
||||
template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ref<C>().operator()(IndexType(0)))>0)>::type * = 0);
|
||||
template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ptr<C>()->operator()(IndexType(0)))>0)>::type * = 0);
|
||||
static meta_no testFunctor(...);
|
||||
|
||||
enum { value = sizeof(testFunctor(static_cast<T*>(0))) == sizeof(meta_yes) };
|
||||
@@ -404,7 +426,7 @@ struct has_unary_operator
|
||||
template <typename T, typename IndexType=Index>
|
||||
struct has_binary_operator
|
||||
{
|
||||
template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ref<C>().operator()(IndexType(0),IndexType(0)))>0)>::type * = 0);
|
||||
template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ptr<C>()->operator()(IndexType(0),IndexType(0)))>0)>::type * = 0);
|
||||
static meta_no testFunctor(...);
|
||||
|
||||
enum { value = sizeof(testFunctor(static_cast<T*>(0))) == sizeof(meta_yes) };
|
||||
@@ -485,6 +507,26 @@ T div_ceil(const T &a, const T &b)
|
||||
return (a+b-1) / b;
|
||||
}
|
||||
|
||||
// The aim of the following functions is to bypass -Wfloat-equal warnings
|
||||
// when we really want a strict equality comparison on floating points.
|
||||
template<typename X, typename Y> EIGEN_STRONG_INLINE
|
||||
bool equal_strict(const X& x,const Y& y) { return x == y; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE
|
||||
bool equal_strict(const float& x,const float& y) { return std::equal_to<float>()(x,y); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE
|
||||
bool equal_strict(const double& x,const double& y) { return std::equal_to<double>()(x,y); }
|
||||
|
||||
template<typename X, typename Y> EIGEN_STRONG_INLINE
|
||||
bool not_equal_strict(const X& x,const Y& y) { return x != y; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE
|
||||
bool not_equal_strict(const float& x,const float& y) { return std::not_equal_to<float>()(x,y); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE
|
||||
bool not_equal_strict(const double& x,const double& y) { return std::not_equal_to<double>()(x,y); }
|
||||
|
||||
} // end namespace numext
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user