mirror of
https://gitlab.com/libeigen/eigen.git
synced 2026-04-10 11:34:33 +08:00
Compare commits
361 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3dc3a0ea2d | ||
|
|
79120a4c63 | ||
|
|
e0412f18fd | ||
|
|
40b0c43bda | ||
|
|
72f3e20e74 | ||
|
|
676a7a3271 | ||
|
|
f843239452 | ||
|
|
a4ab0c6b6a | ||
|
|
ef955ea8e5 | ||
|
|
3ec11d8f17 | ||
|
|
ec067ac5e3 | ||
|
|
316969d839 | ||
|
|
7a0a9581b5 | ||
|
|
8880be60fa | ||
|
|
e41713d52e | ||
|
|
b69e465d7a | ||
|
|
0db83fc571 | ||
|
|
1ac703f641 | ||
|
|
2c32368642 | ||
|
|
db40309e70 | ||
|
|
e36c1f7501 | ||
|
|
3aef5c1a2f | ||
|
|
ab6bb89980 | ||
|
|
983ace99d4 | ||
|
|
72fa6775e8 | ||
|
|
9f25cdf4f6 | ||
|
|
6e5edd68d3 | ||
|
|
e8978ffa99 | ||
|
|
c753fe7cc3 | ||
|
|
e59e345720 | ||
|
|
07c2244440 | ||
|
|
1865dccd58 | ||
|
|
f2e6ee9687 | ||
|
|
9219307e13 | ||
|
|
f2e8f96151 | ||
|
|
faf8af25ed | ||
|
|
106ba41c2a | ||
|
|
87939ea0dd | ||
|
|
e813640aa1 | ||
|
|
612b8f2749 | ||
|
|
ead8e1b796 | ||
|
|
3d4265f2d5 | ||
|
|
d66586ac90 | ||
|
|
44920624fb | ||
|
|
208058b9ad | ||
|
|
b4218b8473 | ||
|
|
3c2f0812f6 | ||
|
|
17bbd82f7d | ||
|
|
e1385337ff | ||
|
|
d367ecb475 | ||
|
|
c3b658b2c9 | ||
|
|
f9d655a8c8 | ||
|
|
ad3e4d1a49 | ||
|
|
222ed66f79 | ||
|
|
6bceebfabf | ||
|
|
2ca3eb8407 | ||
|
|
698205cddf | ||
|
|
2ecb33820f | ||
|
|
a0de6eb4ce | ||
|
|
7962ac1a58 | ||
|
|
9c97b053f3 | ||
|
|
f61b0d56f0 | ||
|
|
5087e016eb | ||
|
|
fa9f5d7170 | ||
|
|
6975534cb2 | ||
|
|
95c6d8db75 | ||
|
|
e0548e9ff3 | ||
|
|
c289ef20f3 | ||
|
|
b8cf157e8c | ||
|
|
b4d2b404b0 | ||
|
|
70fcaf9bd8 | ||
|
|
2f31c6b1d8 | ||
|
|
9e55467b4c | ||
|
|
35bf99c63e | ||
|
|
f9b8729597 | ||
|
|
4b2e7f26aa | ||
|
|
5202bc92e6 | ||
|
|
9d83411cc4 | ||
|
|
556c03a09d | ||
|
|
ce463b9fa4 | ||
|
|
477d1e8192 | ||
|
|
0eaff8fdf2 | ||
|
|
582c96691b | ||
|
|
0b22158d9f | ||
|
|
dafdb0d8a8 | ||
|
|
1d1686c62b | ||
|
|
ad95b924d0 | ||
|
|
9499684320 | ||
|
|
5b6a31626b | ||
|
|
bc3fee2d8e | ||
|
|
eaa9223277 | ||
|
|
c9ba1165e7 | ||
|
|
dd2d5d67ff | ||
|
|
404322b64f | ||
|
|
ce37bae2cd | ||
|
|
3900dbc341 | ||
|
|
5f586c2bd0 | ||
|
|
215f88a417 | ||
|
|
2257f40f4a | ||
|
|
9e0fa0ef6d | ||
|
|
0fddbf3dc7 | ||
|
|
eda635bd58 | ||
|
|
26197bb467 | ||
|
|
772e59d475 | ||
|
|
e8f83cbb5d | ||
|
|
dce584d799 | ||
|
|
0bcef9557d | ||
|
|
2b3c876b2a | ||
|
|
a05f6aad0e | ||
|
|
59187285e1 | ||
|
|
1dd074ea7e | ||
|
|
24fa7a01bd | ||
|
|
e236d3443c | ||
|
|
4ec8833220 | ||
|
|
dd3685cc6a | ||
|
|
487a6e6515 | ||
|
|
75f0b8aae3 | ||
|
|
23aca8a586 | ||
|
|
28bf2bf070 | ||
|
|
0164f4c682 | ||
|
|
bbff608a42 | ||
|
|
ea56d2ff2c | ||
|
|
a4c8701e9a | ||
|
|
a9bb9796e0 | ||
|
|
449883be74 | ||
|
|
0a08d4c60b | ||
|
|
4086187e49 | ||
|
|
91864f85d3 | ||
|
|
c3597106ab | ||
|
|
aed1d6597f | ||
|
|
b6f04a2dd4 | ||
|
|
a9aa3bcf50 | ||
|
|
32b8da66e3 | ||
|
|
eb94179ea3 | ||
|
|
52a7386aef | ||
|
|
8cada1d894 | ||
|
|
6e4a664c42 | ||
|
|
1cd1a96d56 | ||
|
|
86ab00cdcf | ||
|
|
65f09be8d2 | ||
|
|
400d756b82 | ||
|
|
9d31798a84 | ||
|
|
723ed92e0e | ||
|
|
0a7de0b273 | ||
|
|
d6b9bc1ccd | ||
|
|
0eff51e2ed | ||
|
|
1b7dd46d94 | ||
|
|
b2eb1bf3dc | ||
|
|
fe48c25682 | ||
|
|
0ba6da3470 | ||
|
|
a287140f72 | ||
|
|
4d89ec8a00 | ||
|
|
441760f239 | ||
|
|
664162fb8a | ||
|
|
aa3c761002 | ||
|
|
94f2cfc9c7 | ||
|
|
4a13d79df6 | ||
|
|
463176cc44 | ||
|
|
5aab97fba6 | ||
|
|
89abc6806d | ||
|
|
baf793ebaa | ||
|
|
b4ddafcfac | ||
|
|
1079967710 | ||
|
|
eeac81b8c0 | ||
|
|
e80bc2ddb0 | ||
|
|
db3903498d | ||
|
|
dcc14bee64 | ||
|
|
b88c1117d4 | ||
|
|
912cb3d660 | ||
|
|
1b345b0895 | ||
|
|
1b95717358 | ||
|
|
d57430dd73 | ||
|
|
73985ead27 | ||
|
|
436a111792 | ||
|
|
afc55b1885 | ||
|
|
a5c2d8a3cc | ||
|
|
f8bfe10613 | ||
|
|
fc7180cda8 | ||
|
|
4d226ab5b5 | ||
|
|
ad086b03e4 | ||
|
|
dad177be01 | ||
|
|
55b4fd1d40 | ||
|
|
a354c3ca59 | ||
|
|
d46a36cc84 | ||
|
|
0ebe3808ca | ||
|
|
47d1b4a609 | ||
|
|
ba05572dcb | ||
|
|
5c3995769c | ||
|
|
fbe672d599 | ||
|
|
ca0ba0d9a4 | ||
|
|
c80587c92b | ||
|
|
3f1d0cdc22 | ||
|
|
78e93ac1ad | ||
|
|
3e37166d0b | ||
|
|
0585b2965d | ||
|
|
e6e77ed08b | ||
|
|
b238f387b4 | ||
|
|
c8db17301e | ||
|
|
a07bb428df | ||
|
|
598de8b193 | ||
|
|
e44519744e | ||
|
|
0a6ae41555 | ||
|
|
b730952414 | ||
|
|
7a0e96b80d | ||
|
|
51af6ae971 | ||
|
|
0a9ad6fc72 | ||
|
|
d5f88e2357 | ||
|
|
0b4b0f11e8 | ||
|
|
306daa24a3 | ||
|
|
8471cf1996 | ||
|
|
b0c5bfdf78 | ||
|
|
2ebb314fa7 | ||
|
|
530f20c21a | ||
|
|
c3ce4f9ac0 | ||
|
|
7d64e6752c | ||
|
|
0a4c4d40b4 | ||
|
|
3ecb343dc3 | ||
|
|
6ed571744b | ||
|
|
97feea9d39 | ||
|
|
ca6a2a5248 | ||
|
|
5f2dd503ff | ||
|
|
1644bafe29 | ||
|
|
b15a5dc3f4 | ||
|
|
aad72f3c6d | ||
|
|
3e194a6a73 | ||
|
|
58146be99b | ||
|
|
13fc18d3a2 | ||
|
|
2634f9386c | ||
|
|
9e8f07d7b5 | ||
|
|
b027d7a8cf | ||
|
|
b11aab5fcc | ||
|
|
53c77061f0 | ||
|
|
e8e56c7642 | ||
|
|
40f62974b7 | ||
|
|
cf20b30d65 | ||
|
|
03b63e182c | ||
|
|
d3943cd50c | ||
|
|
8fb162fc85 | ||
|
|
e36cb91c99 | ||
|
|
2e188dd4d4 | ||
|
|
15380f9a87 | ||
|
|
692b30ca95 | ||
|
|
050c681bdd | ||
|
|
e742da8b28 | ||
|
|
524fa4c46f | ||
|
|
737e4152c3 | ||
|
|
d0ee2267d6 | ||
|
|
a94791b69a | ||
|
|
ac63d6891c | ||
|
|
7e4a6754b2 | ||
|
|
38b6048e14 | ||
|
|
e74612b9a0 | ||
|
|
78d2926508 | ||
|
|
2e2f48e30e | ||
|
|
f939c351cb | ||
|
|
091d373ee9 | ||
|
|
471075f7ad | ||
|
|
5c366fe1d7 | ||
|
|
86711497c4 | ||
|
|
47150af1c8 | ||
|
|
89e315152c | ||
|
|
7f0599b6eb | ||
|
|
5727e4d89c | ||
|
|
5266ff8966 | ||
|
|
5c68051cd7 | ||
|
|
4860727ac2 | ||
|
|
507b661106 | ||
|
|
a498ff7df6 | ||
|
|
8ba3c41fcf | ||
|
|
a7473d6d5a | ||
|
|
5e64cea896 | ||
|
|
33fba3f08d | ||
|
|
bfc264abe8 | ||
|
|
e2e9cdd169 | ||
|
|
d485d12c51 | ||
|
|
48c635e223 | ||
|
|
9f3276981c | ||
|
|
80b5133789 | ||
|
|
4131074818 | ||
|
|
cb5cd69872 | ||
|
|
78b569f685 | ||
|
|
9c2b6c049b | ||
|
|
6f3cd529af | ||
|
|
d7f9679a34 | ||
|
|
ae1385c7e4 | ||
|
|
73b0012945 | ||
|
|
c84084c0c0 | ||
|
|
4387433acf | ||
|
|
aad20d700d | ||
|
|
8b69d5d730 | ||
|
|
ed7a220b04 | ||
|
|
ceee1c008b | ||
|
|
698ff69450 | ||
|
|
7f67e6dfdb | ||
|
|
765615609d | ||
|
|
3ed67cb0bb | ||
|
|
6af5ac7e27 | ||
|
|
2f6d1607c8 | ||
|
|
881b90e984 | ||
|
|
616a7a1912 | ||
|
|
409e887d78 | ||
|
|
9d6d0dff8f | ||
|
|
8b84801f7f | ||
|
|
422530946f | ||
|
|
67b4f45836 | ||
|
|
27f3970453 | ||
|
|
3860a0bc8f | ||
|
|
33500050c3 | ||
|
|
27d7628f16 | ||
|
|
2bda1b0d93 | ||
|
|
dd602e62c8 | ||
|
|
f3a00dd2b5 | ||
|
|
892afb9416 | ||
|
|
779774f98c | ||
|
|
6565f8d60f | ||
|
|
48dfe98abd | ||
|
|
fe29157d02 | ||
|
|
f6ac51a054 | ||
|
|
00d4e65f00 | ||
|
|
86caba838d | ||
|
|
b9f7a17e47 | ||
|
|
1301d744f8 | ||
|
|
2a69290ddb | ||
|
|
3946768916 | ||
|
|
c771df6bc3 | ||
|
|
b91e021172 | ||
|
|
cb81975714 | ||
|
|
f899e08946 | ||
|
|
4c05fb03a3 | ||
|
|
577a07a86e | ||
|
|
3b8da4be5a | ||
|
|
2f28ccbea3 | ||
|
|
7a4bd337d9 | ||
|
|
07a247dcf4 | ||
|
|
fa5a8f055a | ||
|
|
ef3ac9d05a | ||
|
|
d7b75e8d86 | ||
|
|
5e89ded685 | ||
|
|
5f85662ad8 | ||
|
|
d37ee89ca8 | ||
|
|
8bfe739cd2 | ||
|
|
d6e596174d | ||
|
|
3ca1ae2bb7 | ||
|
|
23f69ab936 | ||
|
|
6c9cf117c1 | ||
|
|
d93b71a301 | ||
|
|
ef66f2887b | ||
|
|
85b6d82b49 | ||
|
|
c1a42c2d0d | ||
|
|
0366478df8 | ||
|
|
3cfd16f3af | ||
|
|
67f44365ea | ||
|
|
a282eb1363 | ||
|
|
7832485575 | ||
|
|
99093c0fe0 | ||
|
|
9f9d8d2f62 | ||
|
|
b74887d5f2 | ||
|
|
6ffb208c77 | ||
|
|
994d1c60b9 | ||
|
|
b8861b0c25 | ||
|
|
9a415fb1e2 |
101
CMakeLists.txt
101
CMakeLists.txt
@@ -133,7 +133,6 @@ if(NOT MSVC)
|
||||
if(COMPILER_SUPPORT_WERROR)
|
||||
set(CMAKE_REQUIRED_FLAGS "-Werror")
|
||||
endif()
|
||||
|
||||
ei_add_cxx_compiler_flag("-pedantic")
|
||||
ei_add_cxx_compiler_flag("-Wall")
|
||||
ei_add_cxx_compiler_flag("-Wextra")
|
||||
@@ -231,6 +230,12 @@ if(NOT MSVC)
|
||||
message(STATUS "Enabling FMA in tests/examples")
|
||||
endif()
|
||||
|
||||
option(EIGEN_TEST_AVX512 "Enable/Disable AVX512 in tests/examples" OFF)
|
||||
if(EIGEN_TEST_AVX512)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -fabi-version=6 -DEIGEN_ENABLE_AVX512")
|
||||
message(STATUS "Enabling AVX512 in tests/examples")
|
||||
endif()
|
||||
|
||||
option(EIGEN_TEST_F16C "Enable/Disable F16C in tests/examples" OFF)
|
||||
if(EIGEN_TEST_F16C)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mf16c")
|
||||
@@ -375,7 +380,7 @@ else()
|
||||
)
|
||||
endif()
|
||||
set(CMAKEPACKAGE_INSTALL_DIR
|
||||
"${CMAKE_INSTALL_LIBDIR}/cmake/eigen3"
|
||||
"${CMAKE_INSTALL_DATADIR}/eigen3/cmake"
|
||||
CACHE PATH "The directory relative to CMAKE_PREFIX_PATH where Eigen3Config.cmake is installed"
|
||||
)
|
||||
set(PKGCONFIG_INSTALL_DIR
|
||||
@@ -431,6 +436,13 @@ else()
|
||||
add_subdirectory(lapack EXCLUDE_FROM_ALL)
|
||||
endif()
|
||||
|
||||
# add SYCL
|
||||
option(EIGEN_TEST_SYCL "Add Sycl support." OFF)
|
||||
if(EIGEN_TEST_SYCL)
|
||||
set (CMAKE_MODULE_PATH "${CMAKE_ROOT}/Modules" "cmake/Modules/" "${CMAKE_MODULE_PATH}")
|
||||
include(FindComputeCpp)
|
||||
endif()
|
||||
|
||||
add_subdirectory(unsupported)
|
||||
|
||||
add_subdirectory(demos EXCLUDE_FROM_ALL)
|
||||
@@ -495,18 +507,89 @@ set ( EIGEN_VERSION_MINOR ${EIGEN_MAJOR_VERSION} )
|
||||
set ( EIGEN_VERSION_PATCH ${EIGEN_MINOR_VERSION} )
|
||||
set ( EIGEN_DEFINITIONS "")
|
||||
set ( EIGEN_INCLUDE_DIR "${CMAKE_INSTALL_PREFIX}/${INCLUDE_INSTALL_DIR}" )
|
||||
set ( EIGEN_INCLUDE_DIRS ${EIGEN_INCLUDE_DIR} )
|
||||
set ( EIGEN_ROOT_DIR ${CMAKE_INSTALL_PREFIX} )
|
||||
|
||||
configure_file ( ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Eigen3Config.cmake.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
|
||||
@ONLY ESCAPE_QUOTES
|
||||
)
|
||||
# Interface libraries require at least CMake 3.0
|
||||
if (NOT CMAKE_VERSION VERSION_LESS 3.0)
|
||||
include (CMakePackageConfigHelpers)
|
||||
|
||||
# Imported target support
|
||||
add_library (eigen INTERFACE)
|
||||
|
||||
target_compile_definitions (eigen INTERFACE ${EIGEN_DEFINITIONS})
|
||||
target_include_directories (eigen INTERFACE
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
|
||||
$<INSTALL_INTERFACE:${INCLUDE_INSTALL_DIR}>
|
||||
)
|
||||
|
||||
# Export as title case Eigen
|
||||
set_target_properties (eigen PROPERTIES EXPORT_NAME Eigen)
|
||||
|
||||
install (TARGETS eigen EXPORT Eigen3Targets)
|
||||
|
||||
configure_package_config_file (
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmake/Eigen3Config.cmake.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
|
||||
PATH_VARS EIGEN_INCLUDE_DIR EIGEN_ROOT_DIR
|
||||
INSTALL_DESTINATION ${CMAKEPACKAGE_INSTALL_DIR}
|
||||
NO_CHECK_REQUIRED_COMPONENTS_MACRO # Eigen does not provide components
|
||||
)
|
||||
# Remove CMAKE_SIZEOF_VOID_P from Eigen3ConfigVersion.cmake since Eigen does
|
||||
# not depend on architecture specific settings or libraries. More
|
||||
# specifically, an Eigen3Config.cmake generated from a 64 bit target can be
|
||||
# used for 32 bit targets as well (and vice versa).
|
||||
set (_Eigen3_CMAKE_SIZEOF_VOID_P ${CMAKE_SIZEOF_VOID_P})
|
||||
unset (CMAKE_SIZEOF_VOID_P)
|
||||
write_basic_package_version_file (Eigen3ConfigVersion.cmake
|
||||
VERSION ${EIGEN_VERSION_NUMBER}
|
||||
COMPATIBILITY SameMajorVersion)
|
||||
set (CMAKE_SIZEOF_VOID_P ${_Eigen3_CMAKE_SIZEOF_VOID_P})
|
||||
|
||||
# The Eigen target will be located in the Eigen3 namespace. Other CMake
|
||||
# targets can refer to it using Eigen3::Eigen.
|
||||
export (TARGETS eigen NAMESPACE Eigen3:: FILE Eigen3Targets.cmake)
|
||||
# Export Eigen3 package to CMake registry such that it can be easily found by
|
||||
# CMake even if it has not been installed to a standard directory.
|
||||
export (PACKAGE Eigen3)
|
||||
|
||||
install (EXPORT Eigen3Targets NAMESPACE Eigen3:: DESTINATION ${CMAKEPACKAGE_INSTALL_DIR})
|
||||
|
||||
else (NOT CMAKE_VERSION VERSION_LESS 3.0)
|
||||
# Fallback to legacy Eigen3Config.cmake without the imported target
|
||||
|
||||
# If CMakePackageConfigHelpers module is available (CMake >= 2.8.8)
|
||||
# create a relocatable Config file, otherwise leave the hardcoded paths
|
||||
include(CMakePackageConfigHelpers OPTIONAL RESULT_VARIABLE CPCH_PATH)
|
||||
|
||||
if(CPCH_PATH)
|
||||
configure_package_config_file (
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmake/Eigen3ConfigLegacy.cmake.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
|
||||
PATH_VARS EIGEN_INCLUDE_DIR EIGEN_ROOT_DIR
|
||||
INSTALL_DESTINATION ${CMAKEPACKAGE_INSTALL_DIR}
|
||||
NO_CHECK_REQUIRED_COMPONENTS_MACRO # Eigen does not provide components
|
||||
)
|
||||
else()
|
||||
# The PACKAGE_* variables are defined by the configure_package_config_file
|
||||
# but without it we define them manually to the hardcoded paths
|
||||
set(PACKAGE_INIT "")
|
||||
set(PACKAGE_EIGEN_INCLUDE_DIR ${EIGEN_INCLUDE_DIR})
|
||||
set(PACKAGE_EIGEN_ROOT_DIR ${EIGEN_ROOT_DIR})
|
||||
configure_file ( ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Eigen3ConfigLegacy.cmake.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
|
||||
@ONLY ESCAPE_QUOTES )
|
||||
endif()
|
||||
|
||||
write_basic_package_version_file( Eigen3ConfigVersion.cmake
|
||||
VERSION ${EIGEN_VERSION_NUMBER}
|
||||
COMPATIBILITY SameMajorVersion )
|
||||
|
||||
endif (NOT CMAKE_VERSION VERSION_LESS 3.0)
|
||||
|
||||
install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/cmake/UseEigen3.cmake
|
||||
${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
|
||||
DESTINATION ${CMAKEPACKAGE_INSTALL_DIR}
|
||||
)
|
||||
${CMAKE_CURRENT_BINARY_DIR}/Eigen3ConfigVersion.cmake
|
||||
DESTINATION ${CMAKEPACKAGE_INSTALL_DIR} )
|
||||
|
||||
# Add uninstall target
|
||||
add_custom_target ( uninstall
|
||||
|
||||
@@ -4,14 +4,10 @@
|
||||
## # The following are required to uses Dart and the Cdash dashboard
|
||||
## ENABLE_TESTING()
|
||||
## INCLUDE(CTest)
|
||||
set(CTEST_PROJECT_NAME "Eigen")
|
||||
set(CTEST_PROJECT_NAME "Eigen3.3")
|
||||
set(CTEST_NIGHTLY_START_TIME "00:00:00 UTC")
|
||||
|
||||
set(CTEST_DROP_METHOD "http")
|
||||
set(CTEST_DROP_SITE "manao.inria.fr")
|
||||
set(CTEST_DROP_LOCATION "/CDash/submit.php?project=Eigen")
|
||||
set(CTEST_DROP_LOCATION "/CDash/submit.php?project=Eigen3.3")
|
||||
set(CTEST_DROP_SITE_CDASH TRUE)
|
||||
set(CTEST_PROJECT_SUBPROJECTS
|
||||
Official
|
||||
Unsupported
|
||||
)
|
||||
|
||||
60
Eigen/Core
60
Eigen/Core
@@ -14,9 +14,9 @@
|
||||
// first thing Eigen does: stop the compiler from committing suicide
|
||||
#include "src/Core/util/DisableStupidWarnings.h"
|
||||
|
||||
// Handle NVCC/CUDA
|
||||
#ifdef __CUDACC__
|
||||
// Do not try asserts on CUDA!
|
||||
// Handle NVCC/CUDA/SYCL
|
||||
#if defined(__CUDACC__) || defined(__SYCL_DEVICE_ONLY__)
|
||||
// Do not try asserts on CUDA and SYCL!
|
||||
#ifndef EIGEN_NO_DEBUG
|
||||
#define EIGEN_NO_DEBUG
|
||||
#endif
|
||||
@@ -25,17 +25,24 @@
|
||||
#undef EIGEN_INTERNAL_DEBUGGING
|
||||
#endif
|
||||
|
||||
// Do not try to vectorize on CUDA!
|
||||
#ifndef EIGEN_DONT_VECTORIZE
|
||||
#define EIGEN_DONT_VECTORIZE
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN_EXCEPTIONS
|
||||
#undef EIGEN_EXCEPTIONS
|
||||
#endif
|
||||
|
||||
// All functions callable from CUDA code must be qualified with __device__
|
||||
#define EIGEN_DEVICE_FUNC __host__ __device__
|
||||
#ifdef __CUDACC__
|
||||
// Do not try to vectorize on CUDA and SYCL!
|
||||
#ifndef EIGEN_DONT_VECTORIZE
|
||||
#define EIGEN_DONT_VECTORIZE
|
||||
#endif
|
||||
|
||||
#define EIGEN_DEVICE_FUNC __host__ __device__
|
||||
// We need math_functions.hpp to ensure that that EIGEN_USING_STD_MATH macro
|
||||
// works properly on the device side
|
||||
#include <math_functions.hpp>
|
||||
#else
|
||||
#define EIGEN_DEVICE_FUNC
|
||||
#endif
|
||||
|
||||
#else
|
||||
#define EIGEN_DEVICE_FUNC
|
||||
@@ -51,7 +58,7 @@
|
||||
#define EIGEN_USING_STD_MATH(FUNC) using std::FUNC;
|
||||
#endif
|
||||
|
||||
#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(__CUDA_ARCH__) && !defined(EIGEN_EXCEPTIONS)
|
||||
#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(__CUDA_ARCH__) && !defined(EIGEN_EXCEPTIONS) && !defined(EIGEN_USE_SYCL)
|
||||
#define EIGEN_EXCEPTIONS
|
||||
#endif
|
||||
|
||||
@@ -140,6 +147,15 @@
|
||||
#ifdef __FMA__
|
||||
#define EIGEN_VECTORIZE_FMA
|
||||
#endif
|
||||
#if defined(__AVX512F__) && defined(EIGEN_ENABLE_AVX512)
|
||||
#define EIGEN_VECTORIZE_AVX512
|
||||
#define EIGEN_VECTORIZE_AVX2
|
||||
#define EIGEN_VECTORIZE_AVX
|
||||
#define EIGEN_VECTORIZE_FMA
|
||||
#ifdef __AVX512DQ__
|
||||
#define EIGEN_VECTORIZE_AVX512DQ
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// include files
|
||||
|
||||
@@ -171,7 +187,7 @@
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_2
|
||||
#include <nmmintrin.h>
|
||||
#endif
|
||||
#ifdef EIGEN_VECTORIZE_AVX
|
||||
#if defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_AVX512)
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
#endif
|
||||
@@ -271,7 +287,9 @@
|
||||
namespace Eigen {
|
||||
|
||||
inline static const char *SimdInstructionSetsInUse(void) {
|
||||
#if defined(EIGEN_VECTORIZE_AVX)
|
||||
#if defined(EIGEN_VECTORIZE_AVX512)
|
||||
return "AVX512, FMA, AVX2, AVX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
|
||||
#elif defined(EIGEN_VECTORIZE_AVX)
|
||||
return "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
|
||||
#elif defined(EIGEN_VECTORIZE_SSE4_2)
|
||||
return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
|
||||
@@ -303,12 +321,16 @@ inline static const char *SimdInstructionSetsInUse(void) {
|
||||
#error Eigen2-support is only available up to version 3.2. Please go to "http://eigen.tuxfamily.org/index.php?title=Eigen2" for further information
|
||||
#endif
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
// we use size_t frequently and we'll never remember to prepend it with std:: everytime just to
|
||||
// ensure QNX/QCC support
|
||||
using std::size_t;
|
||||
// gcc 4.6.0 wants std:: for ptrdiff_t
|
||||
using std::ptrdiff_t;
|
||||
|
||||
}
|
||||
|
||||
/** \defgroup Core_Module Core module
|
||||
* This is the main module of Eigen providing dense matrix and vector support
|
||||
* (both fixed and dynamic size) with all the features corresponding to a BLAS library
|
||||
@@ -331,7 +353,12 @@ using std::ptrdiff_t;
|
||||
#include "src/Core/GenericPacketMath.h"
|
||||
#include "src/Core/MathFunctionsImpl.h"
|
||||
|
||||
#if defined EIGEN_VECTORIZE_AVX
|
||||
#if defined EIGEN_VECTORIZE_AVX512
|
||||
#include "src/Core/arch/SSE/PacketMath.h"
|
||||
#include "src/Core/arch/AVX/PacketMath.h"
|
||||
#include "src/Core/arch/AVX512/PacketMath.h"
|
||||
#include "src/Core/arch/AVX512/MathFunctions.h"
|
||||
#elif defined EIGEN_VECTORIZE_AVX
|
||||
// Use AVX for floats and doubles, SSE for integers
|
||||
#include "src/Core/arch/SSE/PacketMath.h"
|
||||
#include "src/Core/arch/SSE/Complex.h"
|
||||
@@ -359,7 +386,6 @@ using std::ptrdiff_t;
|
||||
#include "src/Core/arch/ZVector/Complex.h"
|
||||
#endif
|
||||
|
||||
#include "src/Core/arch/CUDA/Complex.h"
|
||||
// Half float support
|
||||
#include "src/Core/arch/CUDA/Half.h"
|
||||
#include "src/Core/arch/CUDA/PacketMathHalf.h"
|
||||
@@ -379,6 +405,11 @@ using std::ptrdiff_t;
|
||||
#include "src/Core/functors/StlFunctors.h"
|
||||
#include "src/Core/functors/AssignmentFunctors.h"
|
||||
|
||||
// Specialized functors to enable the processing of complex numbers
|
||||
// on CUDA devices
|
||||
#include "src/Core/arch/CUDA/Complex.h"
|
||||
|
||||
#include "src/Core/IO.h"
|
||||
#include "src/Core/DenseCoeffsBase.h"
|
||||
#include "src/Core/DenseBase.h"
|
||||
#include "src/Core/MatrixBase.h"
|
||||
@@ -426,7 +457,6 @@ using std::ptrdiff_t;
|
||||
#include "src/Core/Redux.h"
|
||||
#include "src/Core/Visitor.h"
|
||||
#include "src/Core/Fuzzy.h"
|
||||
#include "src/Core/IO.h"
|
||||
#include "src/Core/Swap.h"
|
||||
#include "src/Core/CommaInitializer.h"
|
||||
#include "src/Core/GeneralProduct.h"
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
#include "src/Core/util/DisableStupidWarnings.h"
|
||||
|
||||
void *qMalloc(size_t size)
|
||||
void *qMalloc(std::size_t size)
|
||||
{
|
||||
return Eigen::internal::aligned_malloc(size);
|
||||
}
|
||||
@@ -24,7 +24,7 @@ void qFree(void *ptr)
|
||||
Eigen::internal::aligned_free(ptr);
|
||||
}
|
||||
|
||||
void *qRealloc(void *ptr, size_t size)
|
||||
void *qRealloc(void *ptr, std::size_t size)
|
||||
{
|
||||
void* newPtr = Eigen::internal::aligned_malloc(size);
|
||||
memcpy(newPtr, ptr, size);
|
||||
|
||||
@@ -25,7 +25,9 @@
|
||||
|
||||
#include "SparseCore"
|
||||
#include "OrderingMethods"
|
||||
#ifndef EIGEN_MPL2_ONLY
|
||||
#include "SparseCholesky"
|
||||
#endif
|
||||
#include "SparseLU"
|
||||
#include "SparseQR"
|
||||
#include "IterativeLinearSolvers"
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
#include "Core"
|
||||
#include <deque>
|
||||
|
||||
#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 /* MSVC auto aligns in 64 bit builds */
|
||||
#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 && (EIGEN_MAX_STATIC_ALIGN_BYTES<=16) /* MSVC auto aligns up to 16 bytes in 64 bit builds */
|
||||
|
||||
#define EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(...)
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
#include "Core"
|
||||
#include <list>
|
||||
|
||||
#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 /* MSVC auto aligns in 64 bit builds */
|
||||
#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 && (EIGEN_MAX_STATIC_ALIGN_BYTES<=16) /* MSVC auto aligns up to 16 bytes in 64 bit builds */
|
||||
|
||||
#define EIGEN_DEFINE_STL_LIST_SPECIALIZATION(...)
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
#include "Core"
|
||||
#include <vector>
|
||||
|
||||
#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 /* MSVC auto aligns in 64 bit builds */
|
||||
#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 && (EIGEN_MAX_STATIC_ALIGN_BYTES<=16) /* MSVC auto aligns up to 16 bytes in 64 bit builds */
|
||||
|
||||
#define EIGEN_DEFINE_STL_VECTOR_SPECIALIZATION(...)
|
||||
|
||||
|
||||
@@ -351,7 +351,7 @@ template<typename Scalar> struct llt_inplace<Scalar, Lower>
|
||||
Index ret;
|
||||
if((ret=unblocked(A11))>=0) return k+ret;
|
||||
if(rs>0) A11.adjoint().template triangularView<Upper>().template solveInPlace<OnTheRight>(A21);
|
||||
if(rs>0) A22.template selfadjointView<Lower>().rankUpdate(A21,-1); // bottleneck
|
||||
if(rs>0) A22.template selfadjointView<Lower>().rankUpdate(A21,typename NumTraits<RealScalar>::Literal(-1)); // bottleneck
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
@@ -14,34 +14,40 @@ namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename Scalar, typename CholmodType>
|
||||
void cholmod_configure_matrix(CholmodType& mat)
|
||||
{
|
||||
if (internal::is_same<Scalar,float>::value)
|
||||
{
|
||||
mat.xtype = CHOLMOD_REAL;
|
||||
mat.dtype = CHOLMOD_SINGLE;
|
||||
}
|
||||
else if (internal::is_same<Scalar,double>::value)
|
||||
{
|
||||
template<typename Scalar> struct cholmod_configure_matrix;
|
||||
|
||||
template<> struct cholmod_configure_matrix<double> {
|
||||
template<typename CholmodType>
|
||||
static void run(CholmodType& mat) {
|
||||
mat.xtype = CHOLMOD_REAL;
|
||||
mat.dtype = CHOLMOD_DOUBLE;
|
||||
}
|
||||
else if (internal::is_same<Scalar,std::complex<float> >::value)
|
||||
{
|
||||
mat.xtype = CHOLMOD_COMPLEX;
|
||||
mat.dtype = CHOLMOD_SINGLE;
|
||||
}
|
||||
else if (internal::is_same<Scalar,std::complex<double> >::value)
|
||||
{
|
||||
};
|
||||
|
||||
template<> struct cholmod_configure_matrix<std::complex<double> > {
|
||||
template<typename CholmodType>
|
||||
static void run(CholmodType& mat) {
|
||||
mat.xtype = CHOLMOD_COMPLEX;
|
||||
mat.dtype = CHOLMOD_DOUBLE;
|
||||
}
|
||||
else
|
||||
{
|
||||
eigen_assert(false && "Scalar type not supported by CHOLMOD");
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Other scalar types are not yet suppotred by Cholmod
|
||||
// template<> struct cholmod_configure_matrix<float> {
|
||||
// template<typename CholmodType>
|
||||
// static void run(CholmodType& mat) {
|
||||
// mat.xtype = CHOLMOD_REAL;
|
||||
// mat.dtype = CHOLMOD_SINGLE;
|
||||
// }
|
||||
// };
|
||||
//
|
||||
// template<> struct cholmod_configure_matrix<std::complex<float> > {
|
||||
// template<typename CholmodType>
|
||||
// static void run(CholmodType& mat) {
|
||||
// mat.xtype = CHOLMOD_COMPLEX;
|
||||
// mat.dtype = CHOLMOD_SINGLE;
|
||||
// }
|
||||
// };
|
||||
|
||||
} // namespace internal
|
||||
|
||||
@@ -49,11 +55,11 @@ void cholmod_configure_matrix(CholmodType& mat)
|
||||
* Note that the data are shared.
|
||||
*/
|
||||
template<typename _Scalar, int _Options, typename _StorageIndex>
|
||||
cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_StorageIndex>& mat)
|
||||
cholmod_sparse viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_StorageIndex> > mat)
|
||||
{
|
||||
cholmod_sparse res;
|
||||
res.nzmax = mat.nonZeros();
|
||||
res.nrow = mat.rows();;
|
||||
res.nrow = mat.rows();
|
||||
res.ncol = mat.cols();
|
||||
res.p = mat.outerIndexPtr();
|
||||
res.i = mat.innerIndexPtr();
|
||||
@@ -88,7 +94,7 @@ cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_StorageIndex>& mat)
|
||||
}
|
||||
|
||||
// setup res.xtype
|
||||
internal::cholmod_configure_matrix<_Scalar>(res);
|
||||
internal::cholmod_configure_matrix<_Scalar>::run(res);
|
||||
|
||||
res.stype = 0;
|
||||
|
||||
@@ -98,7 +104,14 @@ cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_StorageIndex>& mat)
|
||||
template<typename _Scalar, int _Options, typename _Index>
|
||||
const cholmod_sparse viewAsCholmod(const SparseMatrix<_Scalar,_Options,_Index>& mat)
|
||||
{
|
||||
cholmod_sparse res = viewAsCholmod(mat.const_cast_derived());
|
||||
cholmod_sparse res = viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_Index> >(mat.const_cast_derived()));
|
||||
return res;
|
||||
}
|
||||
|
||||
template<typename _Scalar, int _Options, typename _Index>
|
||||
const cholmod_sparse viewAsCholmod(const SparseVector<_Scalar,_Options,_Index>& mat)
|
||||
{
|
||||
cholmod_sparse res = viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_Index> >(mat.const_cast_derived()));
|
||||
return res;
|
||||
}
|
||||
|
||||
@@ -107,7 +120,7 @@ const cholmod_sparse viewAsCholmod(const SparseMatrix<_Scalar,_Options,_Index>&
|
||||
template<typename _Scalar, int _Options, typename _Index, unsigned int UpLo>
|
||||
cholmod_sparse viewAsCholmod(const SparseSelfAdjointView<const SparseMatrix<_Scalar,_Options,_Index>, UpLo>& mat)
|
||||
{
|
||||
cholmod_sparse res = viewAsCholmod(mat.matrix().const_cast_derived());
|
||||
cholmod_sparse res = viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_Index> >(mat.matrix().const_cast_derived()));
|
||||
|
||||
if(UpLo==Upper) res.stype = 1;
|
||||
if(UpLo==Lower) res.stype = -1;
|
||||
@@ -131,7 +144,7 @@ cholmod_dense viewAsCholmod(MatrixBase<Derived>& mat)
|
||||
res.x = (void*)(mat.derived().data());
|
||||
res.z = 0;
|
||||
|
||||
internal::cholmod_configure_matrix<Scalar>(res);
|
||||
internal::cholmod_configure_matrix<Scalar>::run(res);
|
||||
|
||||
return res;
|
||||
}
|
||||
@@ -180,14 +193,16 @@ class CholmodBase : public SparseSolverBase<Derived>
|
||||
CholmodBase()
|
||||
: m_cholmodFactor(0), m_info(Success), m_factorizationIsOk(false), m_analysisIsOk(false)
|
||||
{
|
||||
m_shiftOffset[0] = m_shiftOffset[1] = RealScalar(0.0);
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<double,RealScalar>::value), CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY);
|
||||
m_shiftOffset[0] = m_shiftOffset[1] = 0.0;
|
||||
cholmod_start(&m_cholmod);
|
||||
}
|
||||
|
||||
explicit CholmodBase(const MatrixType& matrix)
|
||||
: m_cholmodFactor(0), m_info(Success), m_factorizationIsOk(false), m_analysisIsOk(false)
|
||||
{
|
||||
m_shiftOffset[0] = m_shiftOffset[1] = RealScalar(0.0);
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<double,RealScalar>::value), CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY);
|
||||
m_shiftOffset[0] = m_shiftOffset[1] = 0.0;
|
||||
cholmod_start(&m_cholmod);
|
||||
compute(matrix);
|
||||
}
|
||||
@@ -254,7 +269,7 @@ class CholmodBase : public SparseSolverBase<Derived>
|
||||
eigen_assert(m_analysisIsOk && "You must first call analyzePattern()");
|
||||
cholmod_sparse A = viewAsCholmod(matrix.template selfadjointView<UpLo>());
|
||||
cholmod_factorize_p(&A, m_shiftOffset, 0, 0, m_cholmodFactor, &m_cholmod);
|
||||
|
||||
|
||||
// If the factorization failed, minor is the column at which it did. On success minor == n.
|
||||
this->m_info = (m_cholmodFactor->minor == m_cholmodFactor->n ? Success : NumericalIssue);
|
||||
m_factorizationIsOk = true;
|
||||
@@ -290,8 +305,8 @@ class CholmodBase : public SparseSolverBase<Derived>
|
||||
}
|
||||
|
||||
/** \internal */
|
||||
template<typename RhsScalar, int RhsOptions, typename RhsIndex, typename DestScalar, int DestOptions, typename DestIndex>
|
||||
void _solve_impl(const SparseMatrix<RhsScalar,RhsOptions,RhsIndex> &b, SparseMatrix<DestScalar,DestOptions,DestIndex> &dest) const
|
||||
template<typename RhsDerived, typename DestDerived>
|
||||
void _solve_impl(const SparseMatrixBase<RhsDerived> &b, SparseMatrixBase<DestDerived> &dest) const
|
||||
{
|
||||
eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()");
|
||||
const Index size = m_cholmodFactor->n;
|
||||
@@ -299,7 +314,8 @@ class CholmodBase : public SparseSolverBase<Derived>
|
||||
eigen_assert(size==b.rows());
|
||||
|
||||
// note: cs stands for Cholmod Sparse
|
||||
cholmod_sparse b_cs = viewAsCholmod(b);
|
||||
Ref<SparseMatrix<typename RhsDerived::Scalar,ColMajor,typename RhsDerived::StorageIndex> > b_ref(b.const_cast_derived());
|
||||
cholmod_sparse b_cs = viewAsCholmod(b_ref);
|
||||
cholmod_sparse* x_cs = cholmod_spsolve(CHOLMOD_A, m_cholmodFactor, &b_cs, &m_cholmod);
|
||||
if(!x_cs)
|
||||
{
|
||||
@@ -307,7 +323,7 @@ class CholmodBase : public SparseSolverBase<Derived>
|
||||
return;
|
||||
}
|
||||
// TODO optimize this copy by swapping when possible (be careful with alignment, etc.)
|
||||
dest = viewAsEigen<DestScalar,DestOptions,DestIndex>(*x_cs);
|
||||
dest.derived() = viewAsEigen<typename DestDerived::Scalar,ColMajor,typename DestDerived::StorageIndex>(*x_cs);
|
||||
cholmod_free_sparse(&x_cs, &m_cholmod);
|
||||
}
|
||||
#endif // EIGEN_PARSED_BY_DOXYGEN
|
||||
@@ -324,7 +340,7 @@ class CholmodBase : public SparseSolverBase<Derived>
|
||||
*/
|
||||
Derived& setShift(const RealScalar& offset)
|
||||
{
|
||||
m_shiftOffset[0] = offset;
|
||||
m_shiftOffset[0] = double(offset);
|
||||
return derived();
|
||||
}
|
||||
|
||||
@@ -386,7 +402,7 @@ class CholmodBase : public SparseSolverBase<Derived>
|
||||
protected:
|
||||
mutable cholmod_common m_cholmod;
|
||||
cholmod_factor* m_cholmodFactor;
|
||||
RealScalar m_shiftOffset[2];
|
||||
double m_shiftOffset[2];
|
||||
mutable ComputationInfo m_info;
|
||||
int m_factorizationIsOk;
|
||||
int m_analysisIsOk;
|
||||
@@ -410,6 +426,8 @@ class CholmodBase : public SparseSolverBase<Derived>
|
||||
*
|
||||
* This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.
|
||||
*
|
||||
* \warning Only double precision real and complex scalar types are supported by Cholmod.
|
||||
*
|
||||
* \sa \ref TutorialSparseSolverConcept, class CholmodSupernodalLLT, class SimplicialLLT
|
||||
*/
|
||||
template<typename _MatrixType, int _UpLo = Lower>
|
||||
@@ -459,6 +477,8 @@ class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimpl
|
||||
*
|
||||
* This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.
|
||||
*
|
||||
* \warning Only double precision real and complex scalar types are supported by Cholmod.
|
||||
*
|
||||
* \sa \ref TutorialSparseSolverConcept, class CholmodSupernodalLLT, class SimplicialLDLT
|
||||
*/
|
||||
template<typename _MatrixType, int _UpLo = Lower>
|
||||
@@ -506,6 +526,8 @@ class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimp
|
||||
*
|
||||
* This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.
|
||||
*
|
||||
* \warning Only double precision real and complex scalar types are supported by Cholmod.
|
||||
*
|
||||
* \sa \ref TutorialSparseSolverConcept
|
||||
*/
|
||||
template<typename _MatrixType, int _UpLo = Lower>
|
||||
@@ -555,6 +577,8 @@ class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSuper
|
||||
*
|
||||
* This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.
|
||||
*
|
||||
* \warning Only double precision real and complex scalar types are supported by Cholmod.
|
||||
*
|
||||
* \sa \ref TutorialSparseSolverConcept
|
||||
*/
|
||||
template<typename _MatrixType, int _UpLo = Lower>
|
||||
|
||||
@@ -231,10 +231,16 @@ class Array
|
||||
: Base(other)
|
||||
{ }
|
||||
|
||||
private:
|
||||
struct PrivateType {};
|
||||
public:
|
||||
|
||||
/** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Array(const EigenBase<OtherDerived> &other)
|
||||
EIGEN_STRONG_INLINE Array(const EigenBase<OtherDerived> &other,
|
||||
typename internal::enable_if<internal::is_convertible<typename OtherDerived::Scalar,Scalar>::value,
|
||||
PrivateType>::type = PrivateType())
|
||||
: Base(other.derived())
|
||||
{ }
|
||||
|
||||
|
||||
@@ -175,7 +175,7 @@ template<typename Derived> class ArrayBase
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
EIGEN_STRONG_INLINE Derived &
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
|
||||
ArrayBase<Derived>::operator-=(const ArrayBase<OtherDerived> &other)
|
||||
{
|
||||
call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
|
||||
@@ -188,7 +188,7 @@ ArrayBase<Derived>::operator-=(const ArrayBase<OtherDerived> &other)
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
EIGEN_STRONG_INLINE Derived &
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
|
||||
ArrayBase<Derived>::operator+=(const ArrayBase<OtherDerived>& other)
|
||||
{
|
||||
call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
|
||||
@@ -201,7 +201,7 @@ ArrayBase<Derived>::operator+=(const ArrayBase<OtherDerived>& other)
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
EIGEN_STRONG_INLINE Derived &
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
|
||||
ArrayBase<Derived>::operator*=(const ArrayBase<OtherDerived>& other)
|
||||
{
|
||||
call_assignment(derived(), other.derived(), internal::mul_assign_op<Scalar,typename OtherDerived::Scalar>());
|
||||
@@ -214,7 +214,7 @@ ArrayBase<Derived>::operator*=(const ArrayBase<OtherDerived>& other)
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
EIGEN_STRONG_INLINE Derived &
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
|
||||
ArrayBase<Derived>::operator/=(const ArrayBase<OtherDerived>& other)
|
||||
{
|
||||
call_assignment(derived(), other.derived(), internal::div_assign_op<Scalar,typename OtherDerived::Scalar>());
|
||||
|
||||
@@ -32,7 +32,8 @@ struct traits<ArrayWrapper<ExpressionType> >
|
||||
// Let's remove NestByRefBit
|
||||
enum {
|
||||
Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags,
|
||||
Flags = Flags0 & ~NestByRefBit
|
||||
LvalueBitFlag = is_lvalue<ExpressionType>::value ? LvalueBit : 0,
|
||||
Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag
|
||||
};
|
||||
};
|
||||
}
|
||||
@@ -54,6 +55,8 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
|
||||
|
||||
typedef typename internal::ref_selector<ExpressionType>::non_const_type NestedExpressionType;
|
||||
|
||||
using Base::coeffRef;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {}
|
||||
|
||||
@@ -71,66 +74,18 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar* data() const { return m_expression.data(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline CoeffReturnType coeff(Index rowId, Index colId) const
|
||||
{
|
||||
return m_expression.coeff(rowId, colId);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index rowId, Index colId)
|
||||
{
|
||||
return m_expression.coeffRef(rowId, colId);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index rowId, Index colId) const
|
||||
{
|
||||
return m_expression.coeffRef(rowId, colId);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
return m_expression.coeff(index);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index index)
|
||||
{
|
||||
return m_expression.coeffRef(index);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index index) const
|
||||
{
|
||||
return m_expression.coeffRef(index);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline const PacketScalar packet(Index rowId, Index colId) const
|
||||
{
|
||||
return m_expression.template packet<LoadMode>(rowId, colId);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
|
||||
{
|
||||
m_expression.template writePacket<LoadMode>(rowId, colId, val);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline const PacketScalar packet(Index index) const
|
||||
{
|
||||
return m_expression.template packet<LoadMode>(index);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline void writePacket(Index index, const PacketScalar& val)
|
||||
{
|
||||
m_expression.template writePacket<LoadMode>(index, val);
|
||||
}
|
||||
|
||||
template<typename Dest>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void evalTo(Dest& dst) const { dst = m_expression; }
|
||||
@@ -175,7 +130,8 @@ struct traits<MatrixWrapper<ExpressionType> >
|
||||
// Let's remove NestByRefBit
|
||||
enum {
|
||||
Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags,
|
||||
Flags = Flags0 & ~NestByRefBit
|
||||
LvalueBitFlag = is_lvalue<ExpressionType>::value ? LvalueBit : 0,
|
||||
Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag
|
||||
};
|
||||
};
|
||||
}
|
||||
@@ -197,6 +153,8 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
|
||||
|
||||
typedef typename internal::ref_selector<ExpressionType>::non_const_type NestedExpressionType;
|
||||
|
||||
using Base::coeffRef;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit inline MatrixWrapper(ExpressionType& matrix) : m_expression(matrix) {}
|
||||
|
||||
@@ -214,66 +172,18 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar* data() const { return m_expression.data(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline CoeffReturnType coeff(Index rowId, Index colId) const
|
||||
{
|
||||
return m_expression.coeff(rowId, colId);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index rowId, Index colId)
|
||||
{
|
||||
return m_expression.coeffRef(rowId, colId);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index rowId, Index colId) const
|
||||
{
|
||||
return m_expression.derived().coeffRef(rowId, colId);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
return m_expression.coeff(index);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index index)
|
||||
{
|
||||
return m_expression.coeffRef(index);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index index) const
|
||||
{
|
||||
return m_expression.coeffRef(index);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline const PacketScalar packet(Index rowId, Index colId) const
|
||||
{
|
||||
return m_expression.template packet<LoadMode>(rowId, colId);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
|
||||
{
|
||||
m_expression.template writePacket<LoadMode>(rowId, colId, val);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline const PacketScalar packet(Index index) const
|
||||
{
|
||||
return m_expression.template packet<LoadMode>(index);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline void writePacket(Index index, const PacketScalar& val)
|
||||
{
|
||||
m_expression.template writePacket<LoadMode>(index, val);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<NestedExpressionType>::type&
|
||||
nestedExpression() const
|
||||
|
||||
@@ -407,7 +407,7 @@ struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling>
|
||||
: int(Kernel::AssignmentTraits::DstAlignment),
|
||||
srcAlignment = Kernel::AssignmentTraits::JointAlignment
|
||||
};
|
||||
const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned<requestedAlignment>(&kernel.dstEvaluator().coeffRef(0), size);
|
||||
const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned<requestedAlignment>(kernel.dstDataPtr(), size);
|
||||
const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
|
||||
|
||||
unaligned_dense_assignment_loop<dstIsAligned!=0>::run(kernel, 0, alignedStart);
|
||||
@@ -515,7 +515,7 @@ struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling>
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel)
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
{
|
||||
typedef typename Kernel::Scalar Scalar;
|
||||
typedef typename Kernel::PacketType PacketType;
|
||||
@@ -527,7 +527,7 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
|
||||
dstAlignment = alignable ? int(requestedAlignment)
|
||||
: int(Kernel::AssignmentTraits::DstAlignment)
|
||||
};
|
||||
const Scalar *dst_ptr = &kernel.dstEvaluator().coeffRef(0,0);
|
||||
const Scalar *dst_ptr = kernel.dstDataPtr();
|
||||
if((!bool(dstIsAligned)) && (UIntPtr(dst_ptr) % sizeof(Scalar))>0)
|
||||
{
|
||||
// the pointer is not aligend-on scalar, so alignment is not possible
|
||||
@@ -554,7 +554,7 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
|
||||
for(Index inner = alignedEnd; inner<innerSize ; ++inner)
|
||||
kernel.assignCoeffByOuterInner(outer, inner);
|
||||
|
||||
alignedStart = std::min<Index>((alignedStart+alignedStep)%packetSize, innerSize);
|
||||
alignedStart = numext::mini((alignedStart+alignedStep)%packetSize, innerSize);
|
||||
}
|
||||
}
|
||||
};
|
||||
@@ -563,7 +563,7 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, InnerUnrolling>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel)
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
{
|
||||
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
|
||||
typedef typename Kernel::PacketType PacketType;
|
||||
@@ -683,6 +683,11 @@ public:
|
||||
: int(DstEvaluatorType::Flags)&RowMajorBit ? inner
|
||||
: outer;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC const Scalar* dstDataPtr() const
|
||||
{
|
||||
return m_dstExpr.data();
|
||||
}
|
||||
|
||||
protected:
|
||||
DstEvaluatorType& m_dst;
|
||||
@@ -696,16 +701,39 @@ protected:
|
||||
* Part 5 : Entry point for dense rectangular assignment
|
||||
***************************************************************************/
|
||||
|
||||
template<typename DstXprType, typename SrcXprType, typename Functor>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func)
|
||||
template<typename DstXprType,typename SrcXprType, typename Functor>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const Functor &/*func*/)
|
||||
{
|
||||
EIGEN_ONLY_USED_FOR_DEBUG(dst);
|
||||
EIGEN_ONLY_USED_FOR_DEBUG(src);
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||
|
||||
}
|
||||
|
||||
template<typename DstXprType,typename SrcXprType, typename T1, typename T2>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const internal::assign_op<T1,T2> &/*func*/)
|
||||
{
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if(((dst.rows()!=dstRows) || (dst.cols()!=dstCols)))
|
||||
dst.resize(dstRows, dstCols);
|
||||
eigen_assert(dst.rows() == dstRows && dst.cols() == dstCols);
|
||||
}
|
||||
|
||||
template<typename DstXprType, typename SrcXprType, typename Functor>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor &func)
|
||||
{
|
||||
typedef evaluator<DstXprType> DstEvaluatorType;
|
||||
typedef evaluator<SrcXprType> SrcEvaluatorType;
|
||||
|
||||
DstEvaluatorType dstEvaluator(dst);
|
||||
SrcEvaluatorType srcEvaluator(src);
|
||||
|
||||
// NOTE To properly handle A = (A*A.transpose())/s with A rectangular,
|
||||
// we need to resize the destination after the source evaluator has been created.
|
||||
resize_if_allowed(dst, src, func);
|
||||
|
||||
DstEvaluatorType dstEvaluator(dst);
|
||||
|
||||
typedef generic_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;
|
||||
Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
|
||||
@@ -714,7 +742,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(const DstX
|
||||
}
|
||||
|
||||
template<typename DstXprType, typename SrcXprType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src)
|
||||
{
|
||||
call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar>());
|
||||
}
|
||||
@@ -796,11 +824,6 @@ void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
|
||||
) && int(Dst::SizeAtCompileTime) != 1
|
||||
};
|
||||
|
||||
Index dstRows = NeedToTranspose ? src.cols() : src.rows();
|
||||
Index dstCols = NeedToTranspose ? src.rows() : src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
|
||||
typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst>::type ActualDstTypeCleaned;
|
||||
typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst&>::type ActualDstType;
|
||||
ActualDstType actualDst(dst);
|
||||
@@ -823,15 +846,11 @@ template<typename Dst, typename Src, typename Func>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func)
|
||||
{
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
|
||||
// TODO check whether this is the right place to perform these checks:
|
||||
EIGEN_STATIC_ASSERT_LVALUE(Dst)
|
||||
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst,Src)
|
||||
|
||||
EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename Dst::Scalar,typename Src::Scalar);
|
||||
|
||||
Assignment<Dst,Src,Func>::run(dst, src, func);
|
||||
}
|
||||
template<typename Dst, typename Src>
|
||||
@@ -853,8 +872,6 @@ struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Weak>
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
|
||||
{
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||
|
||||
#ifndef EIGEN_NO_DEBUG
|
||||
internal::check_for_aliasing(dst, src);
|
||||
#endif
|
||||
@@ -873,9 +890,42 @@ struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Weak>
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
|
||||
{
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||
src.evalTo(dst);
|
||||
}
|
||||
|
||||
// NOTE The following two functions are templated to avoid their instanciation if not needed
|
||||
// This is needed because some expressions supports evalTo only and/or have 'void' as scalar type.
|
||||
template<typename SrcScalarType>
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar,SrcScalarType> &/*func*/)
|
||||
{
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||
src.addTo(dst);
|
||||
}
|
||||
|
||||
template<typename SrcScalarType>
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar,SrcScalarType> &/*func*/)
|
||||
{
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||
src.subTo(dst);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
|
||||
@@ -817,73 +817,79 @@ struct mapbase_evaluator : evaluator_base<Derived>
|
||||
ColsAtCompileTime = XprType::ColsAtCompileTime,
|
||||
CoeffReadCost = NumTraits<Scalar>::ReadCost
|
||||
};
|
||||
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit mapbase_evaluator(const XprType& map)
|
||||
: m_data(const_cast<PointerType>(map.data())),
|
||||
m_xpr(map)
|
||||
: m_data(const_cast<PointerType>(map.data())),
|
||||
m_innerStride(map.innerStride()),
|
||||
m_outerStride(map.outerStride())
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(evaluator<Derived>::Flags&PacketAccessBit, internal::inner_stride_at_compile_time<Derived>::ret==1),
|
||||
PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1);
|
||||
EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
|
||||
}
|
||||
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
CoeffReturnType coeff(Index row, Index col) const
|
||||
{
|
||||
return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()];
|
||||
return m_data[col * colStride() + row * rowStride()];
|
||||
}
|
||||
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
return m_data[index * m_xpr.innerStride()];
|
||||
return m_data[index * m_innerStride.value()];
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Scalar& coeffRef(Index row, Index col)
|
||||
{
|
||||
return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()];
|
||||
return m_data[col * colStride() + row * rowStride()];
|
||||
}
|
||||
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Scalar& coeffRef(Index index)
|
||||
{
|
||||
return m_data[index * m_xpr.innerStride()];
|
||||
return m_data[index * m_innerStride.value()];
|
||||
}
|
||||
|
||||
|
||||
template<int LoadMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE
|
||||
PacketType packet(Index row, Index col) const
|
||||
PacketType packet(Index row, Index col) const
|
||||
{
|
||||
PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride();
|
||||
PointerType ptr = m_data + row * rowStride() + col * colStride();
|
||||
return internal::ploadt<PacketType, LoadMode>(ptr);
|
||||
}
|
||||
|
||||
template<int LoadMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE
|
||||
PacketType packet(Index index) const
|
||||
PacketType packet(Index index) const
|
||||
{
|
||||
return internal::ploadt<PacketType, LoadMode>(m_data + index * m_xpr.innerStride());
|
||||
return internal::ploadt<PacketType, LoadMode>(m_data + index * m_innerStride.value());
|
||||
}
|
||||
|
||||
|
||||
template<int StoreMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE
|
||||
void writePacket(Index row, Index col, const PacketType& x)
|
||||
void writePacket(Index row, Index col, const PacketType& x)
|
||||
{
|
||||
PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride();
|
||||
PointerType ptr = m_data + row * rowStride() + col * colStride();
|
||||
return internal::pstoret<Scalar, PacketType, StoreMode>(ptr, x);
|
||||
}
|
||||
|
||||
|
||||
template<int StoreMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE
|
||||
void writePacket(Index index, const PacketType& x)
|
||||
void writePacket(Index index, const PacketType& x)
|
||||
{
|
||||
internal::pstoret<Scalar, PacketType, StoreMode>(m_data + index * m_xpr.innerStride(), x);
|
||||
internal::pstoret<Scalar, PacketType, StoreMode>(m_data + index * m_innerStride.value(), x);
|
||||
}
|
||||
|
||||
protected:
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index rowStride() const { return XprType::IsRowMajor ? m_outerStride.value() : m_innerStride.value(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index colStride() const { return XprType::IsRowMajor ? m_innerStride.value() : m_outerStride.value(); }
|
||||
|
||||
PointerType m_data;
|
||||
const XprType& m_xpr;
|
||||
const internal::variable_if_dynamic<Index, XprType::InnerStrideAtCompileTime> m_innerStride;
|
||||
const internal::variable_if_dynamic<Index, XprType::OuterStrideAtCompileTime> m_outerStride;
|
||||
};
|
||||
|
||||
template<typename PlainObjectType, int MapOptions, typename StrideType>
|
||||
@@ -1296,7 +1302,7 @@ struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
|
||||
}
|
||||
|
||||
protected:
|
||||
const ArgTypeNested m_arg;
|
||||
typename internal::add_const_on_value_type<ArgTypeNested>::type m_arg;
|
||||
const MemberOp m_functor;
|
||||
};
|
||||
|
||||
@@ -1550,9 +1556,7 @@ struct evaluator<Diagonal<ArgType, DiagIndex> >
|
||||
{ }
|
||||
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
// FIXME having to check whether ArgType is sparse here i not very nice.
|
||||
typedef typename internal::conditional<!internal::is_same<typename ArgType::StorageKind,Sparse>::value,
|
||||
typename XprType::CoeffReturnType,Scalar>::type CoeffReturnType;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
CoeffReturnType coeff(Index row, Index) const
|
||||
|
||||
@@ -46,7 +46,7 @@ struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
|
||||
typedef typename remove_reference<LhsNested>::type _LhsNested;
|
||||
typedef typename remove_reference<RhsNested>::type _RhsNested;
|
||||
enum {
|
||||
Flags = _LhsNested::Flags & RowMajorBit
|
||||
Flags = cwise_promote_storage_order<typename traits<Lhs>::StorageKind,typename traits<Rhs>::StorageKind,_LhsNested::Flags & RowMajorBit,_RhsNested::Flags & RowMajorBit>::value
|
||||
};
|
||||
};
|
||||
} // end namespace internal
|
||||
@@ -84,6 +84,7 @@ class CwiseBinaryOp :
|
||||
{
|
||||
public:
|
||||
|
||||
typedef typename internal::remove_all<BinaryOp>::type Functor;
|
||||
typedef typename internal::remove_all<LhsType>::type Lhs;
|
||||
typedef typename internal::remove_all<RhsType>::type Rhs;
|
||||
|
||||
|
||||
@@ -105,7 +105,7 @@ class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename CustomNullaryOp>
|
||||
EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
|
||||
DenseBase<Derived>::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func)
|
||||
{
|
||||
return CwiseNullaryOp<CustomNullaryOp, PlainObject>(rows, cols, func);
|
||||
@@ -150,7 +150,7 @@ DenseBase<Derived>::NullaryExpr(Index size, const CustomNullaryOp& func)
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename CustomNullaryOp>
|
||||
EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
|
||||
DenseBase<Derived>::NullaryExpr(const CustomNullaryOp& func)
|
||||
{
|
||||
return CwiseNullaryOp<CustomNullaryOp, PlainObject>(RowsAtCompileTime, ColsAtCompileTime, func);
|
||||
@@ -192,7 +192,7 @@ DenseBase<Derived>::Constant(Index rows, Index cols, const Scalar& value)
|
||||
* \sa class CwiseNullaryOp
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
|
||||
DenseBase<Derived>::Constant(Index size, const Scalar& value)
|
||||
{
|
||||
return DenseBase<Derived>::NullaryExpr(size, internal::scalar_constant_op<Scalar>(value));
|
||||
@@ -208,49 +208,36 @@ DenseBase<Derived>::Constant(Index size, const Scalar& value)
|
||||
* \sa class CwiseNullaryOp
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
|
||||
DenseBase<Derived>::Constant(const Scalar& value)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
|
||||
return DenseBase<Derived>::NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_constant_op<Scalar>(value));
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Sets a linearly spaced vector.
|
||||
/** \deprecated because of accuracy loss. In Eigen 3.3, it is an alias for LinSpaced(Index,const Scalar&,const Scalar&)
|
||||
*
|
||||
* The function generates 'size' equally spaced values in the closed interval [low,high].
|
||||
* This particular version of LinSpaced() uses sequential access, i.e. vector access is
|
||||
* assumed to be a(0), a(1), ..., a(size-1). This assumption allows for better vectorization
|
||||
* and yields faster code than the random access version.
|
||||
*
|
||||
* When size is set to 1, a vector of length 1 containing 'high' is returned.
|
||||
*
|
||||
* \only_for_vectors
|
||||
*
|
||||
* Example: \include DenseBase_LinSpaced_seq.cpp
|
||||
* Output: \verbinclude DenseBase_LinSpaced_seq.out
|
||||
*
|
||||
* \sa setLinSpaced(Index,const Scalar&,const Scalar&), LinSpaced(Index,Scalar,Scalar), CwiseNullaryOp
|
||||
* \sa LinSpaced(Index,Scalar,Scalar), setLinSpaced(Index,const Scalar&,const Scalar&)
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::SequentialLinSpacedReturnType
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
|
||||
DenseBase<Derived>::LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,PacketScalar,false>(low,high,size));
|
||||
return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,PacketScalar>(low,high,size));
|
||||
}
|
||||
|
||||
/**
|
||||
* \copydoc DenseBase::LinSpaced(Sequential_t, Index, const Scalar&, const Scalar&)
|
||||
* Special version for fixed size types which does not require the size parameter.
|
||||
/** \deprecated because of accuracy loss. In Eigen 3.3, it is an alias for LinSpaced(const Scalar&,const Scalar&)
|
||||
*
|
||||
* \sa LinSpaced(Scalar,Scalar)
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::SequentialLinSpacedReturnType
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
|
||||
DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& high)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
|
||||
return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,PacketScalar,false>(low,high,Derived::SizeAtCompileTime));
|
||||
return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,PacketScalar>(low,high,Derived::SizeAtCompileTime));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -264,14 +251,24 @@ DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& hig
|
||||
* Example: \include DenseBase_LinSpaced.cpp
|
||||
* Output: \verbinclude DenseBase_LinSpaced.out
|
||||
*
|
||||
* \sa setLinSpaced(Index,const Scalar&,const Scalar&), LinSpaced(Sequential_t,Index,const Scalar&,const Scalar&,Index), CwiseNullaryOp
|
||||
* For integer scalar types, an even spacing is possible if and only if the length of the range,
|
||||
* i.e., \c high-low is a scalar multiple of \c size-1, or if \c size is a scalar multiple of the
|
||||
* number of values \c high-low+1 (meaning each value can be repeated the same number of time).
|
||||
* If one of these two considions is not satisfied, then \c high is lowered to the largest value
|
||||
* satisfying one of this constraint.
|
||||
* Here are some examples:
|
||||
*
|
||||
* Example: \include DenseBase_LinSpacedInt.cpp
|
||||
* Output: \verbinclude DenseBase_LinSpacedInt.out
|
||||
*
|
||||
* \sa setLinSpaced(Index,const Scalar&,const Scalar&), CwiseNullaryOp
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
|
||||
DenseBase<Derived>::LinSpaced(Index size, const Scalar& low, const Scalar& high)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,PacketScalar,true>(low,high,size));
|
||||
return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,PacketScalar>(low,high,size));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -279,17 +276,17 @@ DenseBase<Derived>::LinSpaced(Index size, const Scalar& low, const Scalar& high)
|
||||
* Special version for fixed size types which does not require the size parameter.
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
|
||||
DenseBase<Derived>::LinSpaced(const Scalar& low, const Scalar& high)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
|
||||
return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,PacketScalar,true>(low,high,Derived::SizeAtCompileTime));
|
||||
return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,PacketScalar>(low,high,Derived::SizeAtCompileTime));
|
||||
}
|
||||
|
||||
/** \returns true if all coefficients in this matrix are approximately equal to \a val, to within precision \a prec */
|
||||
template<typename Derived>
|
||||
bool DenseBase<Derived>::isApproxToConstant
|
||||
EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isApproxToConstant
|
||||
(const Scalar& val, const RealScalar& prec) const
|
||||
{
|
||||
typename internal::nested_eval<Derived,1>::type self(derived());
|
||||
@@ -304,7 +301,7 @@ bool DenseBase<Derived>::isApproxToConstant
|
||||
*
|
||||
* \returns true if all coefficients in this matrix are approximately equal to \a value, to within precision \a prec */
|
||||
template<typename Derived>
|
||||
bool DenseBase<Derived>::isConstant
|
||||
EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isConstant
|
||||
(const Scalar& val, const RealScalar& prec) const
|
||||
{
|
||||
return isApproxToConstant(val, prec);
|
||||
@@ -315,7 +312,7 @@ bool DenseBase<Derived>::isConstant
|
||||
* \sa setConstant(), Constant(), class CwiseNullaryOp
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE void DenseBase<Derived>::fill(const Scalar& val)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void DenseBase<Derived>::fill(const Scalar& val)
|
||||
{
|
||||
setConstant(val);
|
||||
}
|
||||
@@ -325,7 +322,7 @@ EIGEN_STRONG_INLINE void DenseBase<Derived>::fill(const Scalar& val)
|
||||
* \sa fill(), setConstant(Index,const Scalar&), setConstant(Index,Index,const Scalar&), setZero(), setOnes(), Constant(), class CwiseNullaryOp, setZero(), setOnes()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setConstant(const Scalar& val)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setConstant(const Scalar& val)
|
||||
{
|
||||
return derived() = Constant(rows(), cols(), val);
|
||||
}
|
||||
@@ -340,7 +337,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setConstant(const Scalar& val)
|
||||
* \sa MatrixBase::setConstant(const Scalar&), setConstant(Index,Index,const Scalar&), class CwiseNullaryOp, MatrixBase::Constant(const Scalar&)
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived&
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
|
||||
PlainObjectBase<Derived>::setConstant(Index size, const Scalar& val)
|
||||
{
|
||||
resize(size);
|
||||
@@ -359,7 +356,7 @@ PlainObjectBase<Derived>::setConstant(Index size, const Scalar& val)
|
||||
* \sa MatrixBase::setConstant(const Scalar&), setConstant(Index,const Scalar&), class CwiseNullaryOp, MatrixBase::Constant(const Scalar&)
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived&
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
|
||||
PlainObjectBase<Derived>::setConstant(Index rows, Index cols, const Scalar& val)
|
||||
{
|
||||
resize(rows, cols);
|
||||
@@ -377,27 +374,33 @@ PlainObjectBase<Derived>::setConstant(Index rows, Index cols, const Scalar& val)
|
||||
* Example: \include DenseBase_setLinSpaced.cpp
|
||||
* Output: \verbinclude DenseBase_setLinSpaced.out
|
||||
*
|
||||
* \sa CwiseNullaryOp
|
||||
* For integer scalar types, do not miss the explanations on the definition
|
||||
* of \link LinSpaced(Index,const Scalar&,const Scalar&) even spacing \endlink.
|
||||
*
|
||||
* \sa LinSpaced(Index,const Scalar&,const Scalar&), CwiseNullaryOp
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index newSize, const Scalar& low, const Scalar& high)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index newSize, const Scalar& low, const Scalar& high)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
return derived() = Derived::NullaryExpr(newSize, internal::linspaced_op<Scalar,PacketScalar,false>(low,high,newSize));
|
||||
return derived() = Derived::NullaryExpr(newSize, internal::linspaced_op<Scalar,PacketScalar>(low,high,newSize));
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Sets a linearly spaced vector.
|
||||
*
|
||||
* The function fills *this with equally spaced values in the closed interval [low,high].
|
||||
* The function fills \c *this with equally spaced values in the closed interval [low,high].
|
||||
* When size is set to 1, a vector of length 1 containing 'high' is returned.
|
||||
*
|
||||
* \only_for_vectors
|
||||
*
|
||||
* \sa setLinSpaced(Index, const Scalar&, const Scalar&), CwiseNullaryOp
|
||||
* For integer scalar types, do not miss the explanations on the definition
|
||||
* of \link LinSpaced(Index,const Scalar&,const Scalar&) even spacing \endlink.
|
||||
*
|
||||
* \sa LinSpaced(Index,const Scalar&,const Scalar&), setLinSpaced(Index, const Scalar&, const Scalar&), CwiseNullaryOp
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(const Scalar& low, const Scalar& high)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(const Scalar& low, const Scalar& high)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
return setLinSpaced(size(), low, high);
|
||||
@@ -420,7 +423,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(const Scalar& low,
|
||||
* \sa Zero(), Zero(Index)
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
|
||||
DenseBase<Derived>::Zero(Index rows, Index cols)
|
||||
{
|
||||
return Constant(rows, cols, Scalar(0));
|
||||
@@ -443,7 +446,7 @@ DenseBase<Derived>::Zero(Index rows, Index cols)
|
||||
* \sa Zero(), Zero(Index,Index)
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
|
||||
DenseBase<Derived>::Zero(Index size)
|
||||
{
|
||||
return Constant(size, Scalar(0));
|
||||
@@ -460,7 +463,7 @@ DenseBase<Derived>::Zero(Index size)
|
||||
* \sa Zero(Index), Zero(Index,Index)
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
|
||||
DenseBase<Derived>::Zero()
|
||||
{
|
||||
return Constant(Scalar(0));
|
||||
@@ -475,7 +478,7 @@ DenseBase<Derived>::Zero()
|
||||
* \sa class CwiseNullaryOp, Zero()
|
||||
*/
|
||||
template<typename Derived>
|
||||
bool DenseBase<Derived>::isZero(const RealScalar& prec) const
|
||||
EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isZero(const RealScalar& prec) const
|
||||
{
|
||||
typename internal::nested_eval<Derived,1>::type self(derived());
|
||||
for(Index j = 0; j < cols(); ++j)
|
||||
@@ -493,7 +496,7 @@ bool DenseBase<Derived>::isZero(const RealScalar& prec) const
|
||||
* \sa class CwiseNullaryOp, Zero()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setZero()
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setZero()
|
||||
{
|
||||
return setConstant(Scalar(0));
|
||||
}
|
||||
@@ -508,7 +511,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setZero()
|
||||
* \sa DenseBase::setZero(), setZero(Index,Index), class CwiseNullaryOp, DenseBase::Zero()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived&
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
|
||||
PlainObjectBase<Derived>::setZero(Index newSize)
|
||||
{
|
||||
resize(newSize);
|
||||
@@ -526,7 +529,7 @@ PlainObjectBase<Derived>::setZero(Index newSize)
|
||||
* \sa DenseBase::setZero(), setZero(Index), class CwiseNullaryOp, DenseBase::Zero()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived&
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
|
||||
PlainObjectBase<Derived>::setZero(Index rows, Index cols)
|
||||
{
|
||||
resize(rows, cols);
|
||||
@@ -550,7 +553,7 @@ PlainObjectBase<Derived>::setZero(Index rows, Index cols)
|
||||
* \sa Ones(), Ones(Index), isOnes(), class Ones
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
|
||||
DenseBase<Derived>::Ones(Index rows, Index cols)
|
||||
{
|
||||
return Constant(rows, cols, Scalar(1));
|
||||
@@ -573,7 +576,7 @@ DenseBase<Derived>::Ones(Index rows, Index cols)
|
||||
* \sa Ones(), Ones(Index,Index), isOnes(), class Ones
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
|
||||
DenseBase<Derived>::Ones(Index newSize)
|
||||
{
|
||||
return Constant(newSize, Scalar(1));
|
||||
@@ -590,7 +593,7 @@ DenseBase<Derived>::Ones(Index newSize)
|
||||
* \sa Ones(Index), Ones(Index,Index), isOnes(), class Ones
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
|
||||
DenseBase<Derived>::Ones()
|
||||
{
|
||||
return Constant(Scalar(1));
|
||||
@@ -605,7 +608,7 @@ DenseBase<Derived>::Ones()
|
||||
* \sa class CwiseNullaryOp, Ones()
|
||||
*/
|
||||
template<typename Derived>
|
||||
bool DenseBase<Derived>::isOnes
|
||||
EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isOnes
|
||||
(const RealScalar& prec) const
|
||||
{
|
||||
return isApproxToConstant(Scalar(1), prec);
|
||||
@@ -619,7 +622,7 @@ bool DenseBase<Derived>::isOnes
|
||||
* \sa class CwiseNullaryOp, Ones()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setOnes()
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setOnes()
|
||||
{
|
||||
return setConstant(Scalar(1));
|
||||
}
|
||||
@@ -634,7 +637,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setOnes()
|
||||
* \sa MatrixBase::setOnes(), setOnes(Index,Index), class CwiseNullaryOp, MatrixBase::Ones()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived&
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
|
||||
PlainObjectBase<Derived>::setOnes(Index newSize)
|
||||
{
|
||||
resize(newSize);
|
||||
@@ -652,7 +655,7 @@ PlainObjectBase<Derived>::setOnes(Index newSize)
|
||||
* \sa MatrixBase::setOnes(), setOnes(Index), class CwiseNullaryOp, MatrixBase::Ones()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived&
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
|
||||
PlainObjectBase<Derived>::setOnes(Index rows, Index cols)
|
||||
{
|
||||
resize(rows, cols);
|
||||
@@ -676,7 +679,7 @@ PlainObjectBase<Derived>::setOnes(Index rows, Index cols)
|
||||
* \sa Identity(), setIdentity(), isIdentity()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType
|
||||
MatrixBase<Derived>::Identity(Index rows, Index cols)
|
||||
{
|
||||
return DenseBase<Derived>::NullaryExpr(rows, cols, internal::scalar_identity_op<Scalar>());
|
||||
@@ -693,7 +696,7 @@ MatrixBase<Derived>::Identity(Index rows, Index cols)
|
||||
* \sa Identity(Index,Index), setIdentity(), isIdentity()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType
|
||||
MatrixBase<Derived>::Identity()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
|
||||
@@ -752,7 +755,7 @@ struct setIdentity_impl<Derived, true>
|
||||
static EIGEN_STRONG_INLINE Derived& run(Derived& m)
|
||||
{
|
||||
m.setZero();
|
||||
const Index size = (std::min)(m.rows(), m.cols());
|
||||
const Index size = numext::mini(m.rows(), m.cols());
|
||||
for(Index i = 0; i < size; ++i) m.coeffRef(i,i) = typename Derived::Scalar(1);
|
||||
return m;
|
||||
}
|
||||
@@ -768,7 +771,7 @@ struct setIdentity_impl<Derived, true>
|
||||
* \sa class CwiseNullaryOp, Identity(), Identity(Index,Index), isIdentity()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity()
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity()
|
||||
{
|
||||
return internal::setIdentity_impl<Derived>::run(derived());
|
||||
}
|
||||
@@ -784,7 +787,7 @@ EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity()
|
||||
* \sa MatrixBase::setIdentity(), class CwiseNullaryOp, MatrixBase::Identity()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity(Index rows, Index cols)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity(Index rows, Index cols)
|
||||
{
|
||||
derived().resize(rows, cols);
|
||||
return setIdentity();
|
||||
@@ -797,7 +800,7 @@ EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity(Index rows, Index
|
||||
* \sa MatrixBase::Unit(Index), MatrixBase::UnitX(), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index newSize, Index i)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index newSize, Index i)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
return BasisReturnType(SquareMatrixType::Identity(newSize,newSize), i);
|
||||
@@ -812,7 +815,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBa
|
||||
* \sa MatrixBase::Unit(Index,Index), MatrixBase::UnitX(), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index i)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index i)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
return BasisReturnType(SquareMatrixType::Identity(),i);
|
||||
@@ -825,7 +828,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBa
|
||||
* \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitX()
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitX()
|
||||
{ return Derived::Unit(0); }
|
||||
|
||||
/** \returns an expression of the Y axis unit vector (0,1{,0}^*)
|
||||
@@ -835,7 +838,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBa
|
||||
* \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitY()
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitY()
|
||||
{ return Derived::Unit(1); }
|
||||
|
||||
/** \returns an expression of the Z axis unit vector (0,0,1{,0}^*)
|
||||
@@ -845,7 +848,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBa
|
||||
* \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitZ()
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitZ()
|
||||
{ return Derived::Unit(2); }
|
||||
|
||||
/** \returns an expression of the W axis unit vector (0,0,0,1)
|
||||
@@ -855,7 +858,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBa
|
||||
* \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitW()
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitW()
|
||||
{ return Derived::Unit(3); }
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
@@ -260,10 +260,10 @@ template<typename Derived> class DenseBase
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
/** \internal Represents a matrix with all coefficients equal to one another*/
|
||||
typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,PlainObject> ConstantReturnType;
|
||||
/** \internal Represents a vector with linearly spaced coefficients that allows sequential access only. */
|
||||
typedef CwiseNullaryOp<internal::linspaced_op<Scalar,PacketScalar,false>,PlainObject> SequentialLinSpacedReturnType;
|
||||
/** \internal \deprecated Represents a vector with linearly spaced coefficients that allows sequential access only. */
|
||||
typedef CwiseNullaryOp<internal::linspaced_op<Scalar,PacketScalar>,PlainObject> SequentialLinSpacedReturnType;
|
||||
/** \internal Represents a vector with linearly spaced coefficients that allows random access. */
|
||||
typedef CwiseNullaryOp<internal::linspaced_op<Scalar,PacketScalar,true>,PlainObject> RandomAccessLinSpacedReturnType;
|
||||
typedef CwiseNullaryOp<internal::linspaced_op<Scalar,PacketScalar>,PlainObject> RandomAccessLinSpacedReturnType;
|
||||
/** \internal the return type of MatrixBase::eigenvalues() */
|
||||
typedef Matrix<typename NumTraits<typename internal::traits<Derived>::Scalar>::Real, internal::traits<Derived>::ColsAtCompileTime, 1> EigenvaluesReturnType;
|
||||
|
||||
@@ -296,7 +296,7 @@ template<typename Derived> class DenseBase
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator=(const ReturnByValue<OtherDerived>& func);
|
||||
|
||||
/** \ínternal
|
||||
/** \internal
|
||||
* Copies \a other into *this without evaluating other. \returns a reference to *this.
|
||||
* \deprecated */
|
||||
template<typename OtherDerived>
|
||||
@@ -463,7 +463,17 @@ template<typename Derived> class DenseBase
|
||||
EIGEN_DEVICE_FUNC
|
||||
void visit(Visitor& func) const;
|
||||
|
||||
inline const WithFormat<Derived> format(const IOFormat& fmt) const;
|
||||
/** \returns a WithFormat proxy object allowing to print a matrix the with given
|
||||
* format \a fmt.
|
||||
*
|
||||
* See class IOFormat for some examples.
|
||||
*
|
||||
* \sa class IOFormat, class WithFormat
|
||||
*/
|
||||
inline const WithFormat<Derived> format(const IOFormat& fmt) const
|
||||
{
|
||||
return WithFormat<Derived>(derived(), fmt);
|
||||
}
|
||||
|
||||
/** \returns the unique coefficient of a 1x1 expression */
|
||||
EIGEN_DEVICE_FUNC
|
||||
@@ -474,9 +484,9 @@ template<typename Derived> class DenseBase
|
||||
return derived().coeff(0,0);
|
||||
}
|
||||
|
||||
bool all() const;
|
||||
bool any() const;
|
||||
Index count() const;
|
||||
EIGEN_DEVICE_FUNC bool all() const;
|
||||
EIGEN_DEVICE_FUNC bool any() const;
|
||||
EIGEN_DEVICE_FUNC Index count() const;
|
||||
|
||||
typedef VectorwiseOp<Derived, Horizontal> RowwiseReturnType;
|
||||
typedef const VectorwiseOp<const Derived, Horizontal> ConstRowwiseReturnType;
|
||||
|
||||
@@ -624,7 +624,7 @@ struct first_aligned_impl<Alignment, Derived, false>
|
||||
{
|
||||
static inline Index run(const Derived& m)
|
||||
{
|
||||
return internal::first_aligned<Alignment>(&m.const_cast_derived().coeffRef(0,0), m.size());
|
||||
return internal::first_aligned<Alignment>(m.data(), m.size());
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -13,9 +13,9 @@
|
||||
#define EIGEN_MATRIXSTORAGE_H
|
||||
|
||||
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
#define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN EIGEN_DENSE_STORAGE_CTOR_PLUGIN;
|
||||
#define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(X) X; EIGEN_DENSE_STORAGE_CTOR_PLUGIN;
|
||||
#else
|
||||
#define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
|
||||
#define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(X)
|
||||
#endif
|
||||
|
||||
namespace Eigen {
|
||||
@@ -184,12 +184,16 @@ template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseSt
|
||||
{
|
||||
internal::plain_array<T,Size,_Options> m_data;
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC DenseStorage() {}
|
||||
EIGEN_DEVICE_FUNC DenseStorage() {
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = Size)
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
|
||||
: m_data(internal::constructor_without_unaligned_array_assert()) {}
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseStorage(const DenseStorage& other) : m_data(other.m_data) {}
|
||||
DenseStorage(const DenseStorage& other) : m_data(other.m_data) {
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = Size)
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseStorage& operator=(const DenseStorage& other)
|
||||
{
|
||||
@@ -197,7 +201,7 @@ template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseSt
|
||||
return *this;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) {
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
|
||||
eigen_internal_assert(size==rows*cols && rows==_Rows && cols==_Cols);
|
||||
EIGEN_UNUSED_VARIABLE(size);
|
||||
EIGEN_UNUSED_VARIABLE(rows);
|
||||
@@ -343,7 +347,7 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
|
||||
EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols)
|
||||
: m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(rows), m_cols(cols)
|
||||
{
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
|
||||
eigen_internal_assert(size==rows*cols && rows>=0 && cols >=0);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
|
||||
@@ -351,6 +355,7 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
|
||||
, m_rows(other.m_rows)
|
||||
, m_cols(other.m_cols)
|
||||
{
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_rows*m_cols)
|
||||
internal::smart_copy(other.m_data, other.m_data+other.m_rows*other.m_cols, m_data);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
|
||||
@@ -403,7 +408,7 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
|
||||
m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
|
||||
else
|
||||
m_data = 0;
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
|
||||
}
|
||||
m_rows = rows;
|
||||
m_cols = cols;
|
||||
@@ -422,7 +427,7 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
|
||||
explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_cols(cols)
|
||||
{
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
|
||||
eigen_internal_assert(size==rows*cols && rows==_Rows && cols >=0);
|
||||
EIGEN_UNUSED_VARIABLE(rows);
|
||||
}
|
||||
@@ -430,6 +435,7 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
|
||||
: m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(_Rows*other.m_cols))
|
||||
, m_cols(other.m_cols)
|
||||
{
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_cols*_Rows)
|
||||
internal::smart_copy(other.m_data, other.m_data+_Rows*m_cols, m_data);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
|
||||
@@ -477,7 +483,7 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
|
||||
m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
|
||||
else
|
||||
m_data = 0;
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
|
||||
}
|
||||
m_cols = cols;
|
||||
}
|
||||
@@ -495,7 +501,7 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
|
||||
explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(rows)
|
||||
{
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
|
||||
eigen_internal_assert(size==rows*cols && rows>=0 && cols == _Cols);
|
||||
EIGEN_UNUSED_VARIABLE(cols);
|
||||
}
|
||||
@@ -503,6 +509,7 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
|
||||
: m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(other.m_rows*_Cols))
|
||||
, m_rows(other.m_rows)
|
||||
{
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_rows*_Cols)
|
||||
internal::smart_copy(other.m_data, other.m_data+other.m_rows*_Cols, m_data);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
|
||||
@@ -550,7 +557,7 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
|
||||
m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
|
||||
else
|
||||
m_data = 0;
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
|
||||
}
|
||||
m_rows = rows;
|
||||
}
|
||||
|
||||
@@ -21,7 +21,7 @@ namespace Eigen {
|
||||
* \param MatrixType the type of the object in which we are taking a sub/main/super diagonal
|
||||
* \param DiagIndex the index of the sub/super diagonal. The default is 0 and it means the main diagonal.
|
||||
* A positive value means a superdiagonal, a negative value means a subdiagonal.
|
||||
* You can also use Dynamic so the index can be set at runtime.
|
||||
* You can also use DynamicIndex so the index can be set at runtime.
|
||||
*
|
||||
* The matrix is not required to be square.
|
||||
*
|
||||
|
||||
@@ -290,12 +290,11 @@ MatrixBase<Derived>::asDiagonal() const
|
||||
template<typename Derived>
|
||||
bool MatrixBase<Derived>::isDiagonal(const RealScalar& prec) const
|
||||
{
|
||||
using std::abs;
|
||||
if(cols() != rows()) return false;
|
||||
RealScalar maxAbsOnDiagonal = static_cast<RealScalar>(-1);
|
||||
for(Index j = 0; j < cols(); ++j)
|
||||
{
|
||||
RealScalar absOnDiagonal = abs(coeff(j,j));
|
||||
RealScalar absOnDiagonal = numext::abs(coeff(j,j));
|
||||
if(absOnDiagonal > maxAbsOnDiagonal) maxAbsOnDiagonal = absOnDiagonal;
|
||||
}
|
||||
for(Index j = 0; j < cols(); ++j)
|
||||
@@ -321,6 +320,11 @@ struct Assignment<DstXprType, SrcXprType, Functor, Diagonal2Dense>
|
||||
{
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
|
||||
{
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
|
||||
dst.setZero();
|
||||
dst.diagonal() = src.diagonal();
|
||||
}
|
||||
|
||||
@@ -51,7 +51,8 @@ struct dot_nocheck<T, U, true>
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/** \returns the dot product of *this with other.
|
||||
/** \fn MatrixBase::dot
|
||||
* \returns the dot product of *this with other.
|
||||
*
|
||||
* \only_for_vectors
|
||||
*
|
||||
@@ -70,9 +71,11 @@ MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
|
||||
EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived)
|
||||
#if !(defined(EIGEN_NO_STATIC_ASSERT) && defined(EIGEN_NO_DEBUG))
|
||||
typedef internal::scalar_conj_product_op<Scalar,typename OtherDerived::Scalar> func;
|
||||
EIGEN_CHECK_BINARY_COMPATIBILIY(func,Scalar,typename OtherDerived::Scalar);
|
||||
|
||||
#endif
|
||||
|
||||
eigen_assert(size() == other.size());
|
||||
|
||||
return internal::dot_nocheck<Derived,OtherDerived>::run(*this, other);
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
namespace Eigen {
|
||||
|
||||
/** \class EigenBase
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* Common base class for all classes T such that MatrixBase has an operator=(T) and a constructor MatrixBase(T).
|
||||
*
|
||||
@@ -128,6 +129,7 @@ template<typename Derived> struct EigenBase
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& DenseBase<Derived>::operator=(const EigenBase<OtherDerived> &other)
|
||||
{
|
||||
call_assignment(derived(), other.derived());
|
||||
@@ -136,6 +138,7 @@ Derived& DenseBase<Derived>::operator=(const EigenBase<OtherDerived> &other)
|
||||
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& DenseBase<Derived>::operator+=(const EigenBase<OtherDerived> &other)
|
||||
{
|
||||
call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
|
||||
@@ -144,6 +147,7 @@ Derived& DenseBase<Derived>::operator+=(const EigenBase<OtherDerived> &other)
|
||||
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& DenseBase<Derived>::operator-=(const EigenBase<OtherDerived> &other)
|
||||
{
|
||||
call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
|
||||
|
||||
@@ -25,7 +25,8 @@ template<int Rows, int Cols, int Depth> struct product_type_selector;
|
||||
template<int Size, int MaxSize> struct product_size_category
|
||||
{
|
||||
enum { is_large = MaxSize == Dynamic ||
|
||||
Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD,
|
||||
Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ||
|
||||
(Size==Dynamic && MaxSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD),
|
||||
value = is_large ? Large
|
||||
: Size == 1 ? 1
|
||||
: Small
|
||||
@@ -223,50 +224,65 @@ template<> struct gemv_dense_selector<OnTheRight,ColMajor,true>
|
||||
// on, the other hand it is good for the cache to pack the vector anyways...
|
||||
EvalToDestAtCompileTime = (ActualDest::InnerStrideAtCompileTime==1),
|
||||
ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
|
||||
MightCannotUseDest = (ActualDest::InnerStrideAtCompileTime!=1) || ComplexByReal
|
||||
MightCannotUseDest = (!EvalToDestAtCompileTime) || ComplexByReal
|
||||
};
|
||||
|
||||
gemv_static_vector_if<ResScalar,ActualDest::SizeAtCompileTime,ActualDest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
|
||||
|
||||
const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0));
|
||||
const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
|
||||
|
||||
RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
|
||||
|
||||
ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
|
||||
evalToDest ? dest.data() : static_dest.data());
|
||||
|
||||
if(!evalToDest)
|
||||
{
|
||||
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
Index size = dest.size();
|
||||
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
#endif
|
||||
if(!alphaIsCompatible)
|
||||
{
|
||||
MappedDest(actualDestPtr, dest.size()).setZero();
|
||||
compatibleAlpha = RhsScalar(1);
|
||||
}
|
||||
else
|
||||
MappedDest(actualDestPtr, dest.size()) = dest;
|
||||
}
|
||||
|
||||
typedef const_blas_data_mapper<LhsScalar,Index,ColMajor> LhsMapper;
|
||||
typedef const_blas_data_mapper<RhsScalar,Index,RowMajor> RhsMapper;
|
||||
general_matrix_vector_product
|
||||
<Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
|
||||
actualLhs.rows(), actualLhs.cols(),
|
||||
LhsMapper(actualLhs.data(), actualLhs.outerStride()),
|
||||
RhsMapper(actualRhs.data(), actualRhs.innerStride()),
|
||||
actualDestPtr, 1,
|
||||
compatibleAlpha);
|
||||
RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
|
||||
|
||||
if (!evalToDest)
|
||||
if(!MightCannotUseDest)
|
||||
{
|
||||
if(!alphaIsCompatible)
|
||||
dest += actualAlpha * MappedDest(actualDestPtr, dest.size());
|
||||
else
|
||||
dest = MappedDest(actualDestPtr, dest.size());
|
||||
// shortcut if we are sure to be able to use dest directly,
|
||||
// this ease the compiler to generate cleaner and more optimzized code for most common cases
|
||||
general_matrix_vector_product
|
||||
<Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
|
||||
actualLhs.rows(), actualLhs.cols(),
|
||||
LhsMapper(actualLhs.data(), actualLhs.outerStride()),
|
||||
RhsMapper(actualRhs.data(), actualRhs.innerStride()),
|
||||
dest.data(), 1,
|
||||
compatibleAlpha);
|
||||
}
|
||||
else
|
||||
{
|
||||
gemv_static_vector_if<ResScalar,ActualDest::SizeAtCompileTime,ActualDest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
|
||||
|
||||
const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0));
|
||||
const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
|
||||
|
||||
ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
|
||||
evalToDest ? dest.data() : static_dest.data());
|
||||
|
||||
if(!evalToDest)
|
||||
{
|
||||
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
Index size = dest.size();
|
||||
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
#endif
|
||||
if(!alphaIsCompatible)
|
||||
{
|
||||
MappedDest(actualDestPtr, dest.size()).setZero();
|
||||
compatibleAlpha = RhsScalar(1);
|
||||
}
|
||||
else
|
||||
MappedDest(actualDestPtr, dest.size()) = dest;
|
||||
}
|
||||
|
||||
general_matrix_vector_product
|
||||
<Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
|
||||
actualLhs.rows(), actualLhs.cols(),
|
||||
LhsMapper(actualLhs.data(), actualLhs.outerStride()),
|
||||
RhsMapper(actualRhs.data(), actualRhs.innerStride()),
|
||||
actualDestPtr, 1,
|
||||
compatibleAlpha);
|
||||
|
||||
if (!evalToDest)
|
||||
{
|
||||
if(!alphaIsCompatible)
|
||||
dest.matrix() += actualAlpha * MappedDest(actualDestPtr, dest.size());
|
||||
else
|
||||
dest = MappedDest(actualDestPtr, dest.size());
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
@@ -329,6 +345,7 @@ template<> struct gemv_dense_selector<OnTheRight,ColMajor,false>
|
||||
template<typename Lhs, typename Rhs, typename Dest>
|
||||
static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((!nested_eval<Lhs,1>::Evaluate),EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||
// TODO if rhs is large enough it might be beneficial to make sure that dest is sequentially stored in memory, otherwise use a temp
|
||||
typename nested_eval<Rhs,1>::type actual_rhs(rhs);
|
||||
const Index size = rhs.rows();
|
||||
@@ -342,6 +359,7 @@ template<> struct gemv_dense_selector<OnTheRight,RowMajor,false>
|
||||
template<typename Lhs, typename Rhs, typename Dest>
|
||||
static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((!nested_eval<Lhs,1>::Evaluate),EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||
typename nested_eval<Rhs,Lhs::RowsAtCompileTime>::type actual_rhs(rhs);
|
||||
const Index rows = dest.rows();
|
||||
for(Index i=0; i<rows; ++i)
|
||||
|
||||
@@ -230,7 +230,7 @@ pload1(const typename unpacket_traits<Packet>::type *a) { return pset1<Packet>(
|
||||
* duplicated to form: {from[0],from[0],from[1],from[1],from[2],from[2],from[3],from[3]}
|
||||
* Currently, this function is only used for scalar * complex products.
|
||||
*/
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
|
||||
ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; }
|
||||
|
||||
/** \internal \returns a packet with elements of \a *from quadrupled.
|
||||
@@ -278,7 +278,7 @@ inline void pbroadcast2(const typename unpacket_traits<Packet>::type *a,
|
||||
}
|
||||
|
||||
/** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */
|
||||
template<typename Packet> inline Packet
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
|
||||
plset(const typename unpacket_traits<Packet>::type& a) { return a; }
|
||||
|
||||
/** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */
|
||||
@@ -329,7 +329,7 @@ template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Pack
|
||||
*/
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline
|
||||
typename conditional<(unpacket_traits<Packet>::size%8)==0,typename unpacket_traits<Packet>::half,Packet>::type
|
||||
predux4(const Packet& a)
|
||||
predux_downto4(const Packet& a)
|
||||
{ return a; }
|
||||
|
||||
/** \internal \returns the product of the elements of \a a*/
|
||||
@@ -482,7 +482,7 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& fro
|
||||
* by the current computation.
|
||||
*/
|
||||
template<typename Packet, int LoadMode>
|
||||
inline Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from)
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from)
|
||||
{
|
||||
return ploadt<Packet, LoadMode>(from);
|
||||
}
|
||||
@@ -558,6 +558,34 @@ pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket, const Packet& th
|
||||
return ifPacket.select[0] ? thenPacket : elsePacket;
|
||||
}
|
||||
|
||||
/** \internal \returns \a a with the first coefficient replaced by the scalar b */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pinsertfirst(const Packet& a, typename unpacket_traits<Packet>::type b)
|
||||
{
|
||||
// Default implementation based on pblend.
|
||||
// It must be specialized for higher performance.
|
||||
Selector<unpacket_traits<Packet>::size> mask;
|
||||
mask.select[0] = true;
|
||||
// This for loop should be optimized away by the compiler.
|
||||
for(Index i=1; i<unpacket_traits<Packet>::size; ++i)
|
||||
mask.select[i] = false;
|
||||
return pblend(mask, pset1<Packet>(b), a);
|
||||
}
|
||||
|
||||
/** \internal \returns \a a with the last coefficient replaced by the scalar b */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pinsertlast(const Packet& a, typename unpacket_traits<Packet>::type b)
|
||||
{
|
||||
// Default implementation based on pblend.
|
||||
// It must be specialized for higher performance.
|
||||
Selector<unpacket_traits<Packet>::size> mask;
|
||||
// This for loop should be optimized away by the compiler.
|
||||
for(Index i=0; i<unpacket_traits<Packet>::size-1; ++i)
|
||||
mask.select[i] = false;
|
||||
mask.select[unpacket_traits<Packet>::size-1] = true;
|
||||
return pblend(mask, pset1<Packet>(b), a);
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
@@ -105,24 +105,10 @@ class WithFormat
|
||||
}
|
||||
|
||||
protected:
|
||||
const typename ExpressionType::Nested m_matrix;
|
||||
typename ExpressionType::Nested m_matrix;
|
||||
IOFormat m_format;
|
||||
};
|
||||
|
||||
/** \returns a WithFormat proxy object allowing to print a matrix the with given
|
||||
* format \a fmt.
|
||||
*
|
||||
* See class IOFormat for some examples.
|
||||
*
|
||||
* \sa class IOFormat, class WithFormat
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline const WithFormat<Derived>
|
||||
DenseBase<Derived>::format(const IOFormat& fmt) const
|
||||
{
|
||||
return WithFormat<Derived>(derived(), fmt);
|
||||
}
|
||||
|
||||
namespace internal {
|
||||
|
||||
// NOTE: This helper is kept for backward compatibility with previous code specializing
|
||||
|
||||
@@ -45,6 +45,7 @@ class Inverse : public InverseImpl<XprType,typename internal::traits<XprType>::S
|
||||
public:
|
||||
typedef typename XprType::StorageIndex StorageIndex;
|
||||
typedef typename XprType::PlainObject PlainObject;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename internal::ref_selector<XprType>::type XprTypeNested;
|
||||
typedef typename internal::remove_all<XprTypeNested>::type XprTypeNestedCleaned;
|
||||
typedef typename internal::ref_selector<Inverse>::type Nested;
|
||||
|
||||
@@ -97,6 +97,19 @@ struct real_default_impl<Scalar,true>
|
||||
|
||||
template<typename Scalar> struct real_impl : real_default_impl<Scalar> {};
|
||||
|
||||
#ifdef __CUDA_ARCH__
|
||||
template<typename T>
|
||||
struct real_impl<std::complex<T> >
|
||||
{
|
||||
typedef T RealScalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline T run(const std::complex<T>& x)
|
||||
{
|
||||
return x.real();
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
template<typename Scalar>
|
||||
struct real_retval
|
||||
{
|
||||
@@ -132,6 +145,19 @@ struct imag_default_impl<Scalar,true>
|
||||
|
||||
template<typename Scalar> struct imag_impl : imag_default_impl<Scalar> {};
|
||||
|
||||
#ifdef __CUDA_ARCH__
|
||||
template<typename T>
|
||||
struct imag_impl<std::complex<T> >
|
||||
{
|
||||
typedef T RealScalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline T run(const std::complex<T>& x)
|
||||
{
|
||||
return x.imag();
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
template<typename Scalar>
|
||||
struct imag_retval
|
||||
{
|
||||
@@ -1035,11 +1061,24 @@ double log(const double &x) { return ::log(x); }
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
typename NumTraits<T>::Real abs(const T &x) {
|
||||
typename internal::enable_if<NumTraits<T>::IsSigned || NumTraits<T>::IsComplex,typename NumTraits<T>::Real>::type
|
||||
abs(const T &x) {
|
||||
EIGEN_USING_STD_MATH(abs);
|
||||
return abs(x);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
typename internal::enable_if<!(NumTraits<T>::IsSigned || NumTraits<T>::IsComplex),typename NumTraits<T>::Real>::type
|
||||
abs(const T &x) {
|
||||
return x;
|
||||
}
|
||||
|
||||
#if defined(__SYCL_DEVICE_ONLY__)
|
||||
EIGEN_ALWAYS_INLINE float abs(float x) { return cl::sycl::fabs(x); }
|
||||
EIGEN_ALWAYS_INLINE double abs(double x) { return cl::sycl::fabs(x); }
|
||||
#endif // defined(__SYCL_DEVICE_ONLY__)
|
||||
|
||||
#ifdef __CUDACC__
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
float abs(const float &x) { return ::fabsf(x); }
|
||||
|
||||
@@ -294,7 +294,7 @@ template<typename Derived> class MatrixBase
|
||||
* fuzzy comparison such as isApprox()
|
||||
* \sa isApprox(), operator!= */
|
||||
template<typename OtherDerived>
|
||||
inline bool operator==(const MatrixBase<OtherDerived>& other) const
|
||||
EIGEN_DEVICE_FUNC inline bool operator==(const MatrixBase<OtherDerived>& other) const
|
||||
{ return cwiseEqual(other).all(); }
|
||||
|
||||
/** \returns true if at least one pair of coefficients of \c *this and \a other are not exactly equal to each other.
|
||||
@@ -302,7 +302,7 @@ template<typename Derived> class MatrixBase
|
||||
* fuzzy comparison such as isApprox()
|
||||
* \sa isApprox(), operator== */
|
||||
template<typename OtherDerived>
|
||||
inline bool operator!=(const MatrixBase<OtherDerived>& other) const
|
||||
EIGEN_DEVICE_FUNC inline bool operator!=(const MatrixBase<OtherDerived>& other) const
|
||||
{ return cwiseNotEqual(other).any(); }
|
||||
|
||||
NoAlias<Derived,Eigen::MatrixBase > noalias();
|
||||
@@ -399,12 +399,14 @@ template<typename Derived> class MatrixBase
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline PlainObject unitOrthogonal(void) const;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Matrix<Scalar,3,1> eulerAngles(Index a0, Index a1, Index a2) const;
|
||||
|
||||
// put this as separate enum value to work around possible GCC 4.3 bug (?)
|
||||
enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1&&RowsAtCompileTime==1 ? ((internal::traits<Derived>::Flags&RowMajorBit)==RowMajorBit ? Horizontal : Vertical)
|
||||
: ColsAtCompileTime==1 ? Vertical : Horizontal };
|
||||
typedef Homogeneous<Derived, HomogeneousReturnTypeDirection> HomogeneousReturnType;
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline HomogeneousReturnType homogeneous() const;
|
||||
|
||||
enum {
|
||||
@@ -414,7 +416,7 @@ template<typename Derived> class MatrixBase
|
||||
internal::traits<Derived>::ColsAtCompileTime==1 ? SizeMinusOne : 1,
|
||||
internal::traits<Derived>::ColsAtCompileTime==1 ? 1 : SizeMinusOne> ConstStartMinusOne;
|
||||
typedef EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(ConstStartMinusOne,Scalar,quotient) HNormalizedReturnType;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const HNormalizedReturnType hnormalized() const;
|
||||
|
||||
////////// Householder module ///////////
|
||||
|
||||
@@ -215,6 +215,8 @@ struct NumTraits<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
|
||||
static inline RealScalar epsilon() { return NumTraits<RealScalar>::epsilon(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline RealScalar dummy_precision() { return NumTraits<RealScalar>::dummy_precision(); }
|
||||
|
||||
static inline int digits10() { return NumTraits<Scalar>::digits10(); }
|
||||
};
|
||||
|
||||
template<> struct NumTraits<std::string>
|
||||
|
||||
@@ -41,7 +41,7 @@ template<> struct check_rows_cols_for_overflow<Dynamic> {
|
||||
{
|
||||
// http://hg.mozilla.org/mozilla-central/file/6c8a909977d3/xpcom/ds/CheckedInt.h#l242
|
||||
// we assume Index is signed
|
||||
Index max_index = (size_t(1) << (8 * sizeof(Index) - 1)) - 1; // assume Index is signed
|
||||
Index max_index = (std::size_t(1) << (8 * sizeof(Index) - 1)) - 1; // assume Index is signed
|
||||
bool error = (rows == 0 || cols == 0) ? false
|
||||
: (rows > max_index / cols);
|
||||
if (error)
|
||||
@@ -58,6 +58,28 @@ template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers> struct m
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
#ifdef EIGEN_PARSED_BY_DOXYGEN
|
||||
namespace doxygen {
|
||||
|
||||
// This is a workaround to doxygen not being able to understand the inheritance logic
|
||||
// when it is hidden by the dense_xpr_base helper struct.
|
||||
// Moreover, doxygen fails to include members that are not documented in the declaration body of
|
||||
// MatrixBase if we inherits MatrixBase<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >,
|
||||
// this is why we simply inherits MatrixBase, though this does not make sense.
|
||||
|
||||
/** This class is just a workaround for Doxygen and it does not not actually exist. */
|
||||
template<typename Derived> struct dense_xpr_base_dispatcher;
|
||||
/** This class is just a workaround for Doxygen and it does not not actually exist. */
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
|
||||
struct dense_xpr_base_dispatcher<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
|
||||
: public MatrixBase {};
|
||||
/** This class is just a workaround for Doxygen and it does not not actually exist. */
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
|
||||
struct dense_xpr_base_dispatcher<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
|
||||
: public ArrayBase {};
|
||||
|
||||
} // namespace doxygen
|
||||
|
||||
/** \class PlainObjectBase
|
||||
* \ingroup Core_Module
|
||||
* \brief %Dense storage base class for matrices and arrays.
|
||||
@@ -65,26 +87,10 @@ template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers> struct m
|
||||
* This class can be extended with the help of the plugin mechanism described on the page
|
||||
* \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_PLAINOBJECTBASE_PLUGIN.
|
||||
*
|
||||
* \tparam Derived is the derived type, e.g., a Matrix or Array
|
||||
*
|
||||
* \sa \ref TopicClassHierarchy
|
||||
*/
|
||||
#ifdef EIGEN_PARSED_BY_DOXYGEN
|
||||
namespace doxygen {
|
||||
|
||||
// this is a workaround to doxygen not being able to understand the inheritance logic
|
||||
// when it is hidden by the dense_xpr_base helper struct.
|
||||
/** This class is just a workaround for Doxygen and it does not not actually exist. */
|
||||
template<typename Derived> struct dense_xpr_base_dispatcher;
|
||||
/** This class is just a workaround for Doxygen and it does not not actually exist. */
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
|
||||
struct dense_xpr_base_dispatcher<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
|
||||
: public MatrixBase<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > {};
|
||||
/** This class is just a workaround for Doxygen and it does not not actually exist. */
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
|
||||
struct dense_xpr_base_dispatcher<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
|
||||
: public ArrayBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > {};
|
||||
|
||||
} // namespace doxygen
|
||||
|
||||
template<typename Derived>
|
||||
class PlainObjectBase : public doxygen::dense_xpr_base_dispatcher<Derived>
|
||||
#else
|
||||
@@ -554,7 +560,8 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
|
||||
public:
|
||||
|
||||
/** \copydoc DenseBase::operator=(const EigenBase<OtherDerived>&)
|
||||
/** \brief Copies the generic expression \a other into *this.
|
||||
* \copydetails DenseBase::operator=(const EigenBase<OtherDerived> &other)
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
@@ -763,6 +770,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
{
|
||||
// NOTE MSVC 2008 complains if we directly put bool(NumTraits<T>::IsInteger) as the EIGEN_STATIC_ASSERT argument.
|
||||
const bool is_integer = NumTraits<T>::IsInteger;
|
||||
EIGEN_UNUSED_VARIABLE(is_integer);
|
||||
EIGEN_STATIC_ASSERT(is_integer,
|
||||
FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED)
|
||||
resize(size);
|
||||
@@ -804,6 +812,13 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
this->_set_noalias(other);
|
||||
}
|
||||
|
||||
// Initialize an arbitrary matrix from an object convertible to the Derived type.
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void _init1(const Derived& other){
|
||||
this->_set_noalias(other);
|
||||
}
|
||||
|
||||
// Initialize an arbitrary matrix from a generic Eigen expression
|
||||
template<typename T, typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
@@ -826,7 +841,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
this->derived() = r;
|
||||
}
|
||||
|
||||
// For fixed -size arrays:
|
||||
// For fixed-size Array<Scalar,...>
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void _init1(const Scalar& val0,
|
||||
@@ -838,6 +853,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
Base::setConstant(val0);
|
||||
}
|
||||
|
||||
// For fixed-size Array<Index,...>
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void _init1(const Index& val0,
|
||||
@@ -916,8 +932,8 @@ struct conservative_resize_like_impl
|
||||
{
|
||||
// The storage order does not allow us to use reallocation.
|
||||
typename Derived::PlainObject tmp(rows,cols);
|
||||
const Index common_rows = (std::min)(rows, _this.rows());
|
||||
const Index common_cols = (std::min)(cols, _this.cols());
|
||||
const Index common_rows = numext::mini(rows, _this.rows());
|
||||
const Index common_cols = numext::mini(cols, _this.cols());
|
||||
tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
|
||||
_this.derived().swap(tmp);
|
||||
}
|
||||
@@ -950,8 +966,8 @@ struct conservative_resize_like_impl
|
||||
{
|
||||
// The storage order does not allow us to use reallocation.
|
||||
typename Derived::PlainObject tmp(other);
|
||||
const Index common_rows = (std::min)(tmp.rows(), _this.rows());
|
||||
const Index common_cols = (std::min)(tmp.cols(), _this.cols());
|
||||
const Index common_rows = numext::mini(tmp.rows(), _this.rows());
|
||||
const Index common_cols = numext::mini(tmp.cols(), _this.cols());
|
||||
tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
|
||||
_this.derived().swap(tmp);
|
||||
}
|
||||
|
||||
@@ -140,6 +140,10 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scal
|
||||
static EIGEN_STRONG_INLINE
|
||||
void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
|
||||
{
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
// FIXME shall we handle nested_eval here?
|
||||
generic_product_impl<Lhs, Rhs>::evalTo(dst, src.lhs(), src.rhs());
|
||||
}
|
||||
@@ -154,6 +158,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<
|
||||
static EIGEN_STRONG_INLINE
|
||||
void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,Scalar> &)
|
||||
{
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||
// FIXME shall we handle nested_eval here?
|
||||
generic_product_impl<Lhs, Rhs>::addTo(dst, src.lhs(), src.rhs());
|
||||
}
|
||||
@@ -168,6 +173,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<
|
||||
static EIGEN_STRONG_INLINE
|
||||
void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,Scalar> &)
|
||||
{
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||
// FIXME shall we handle nested_eval here?
|
||||
generic_product_impl<Lhs, Rhs>::subTo(dst, src.lhs(), src.rhs());
|
||||
}
|
||||
@@ -201,6 +207,12 @@ struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_sum_op<typename
|
||||
static const bool value = true;
|
||||
};
|
||||
|
||||
template<typename OtherXpr, typename Lhs, typename Rhs>
|
||||
struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_difference_op<typename OtherXpr::Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, const OtherXpr,
|
||||
const Product<Lhs,Rhs,DefaultProduct> >, DenseShape > {
|
||||
static const bool value = true;
|
||||
};
|
||||
|
||||
template<typename DstXprType, typename OtherXpr, typename ProductType, typename Func1, typename Func2>
|
||||
struct assignment_from_xpr_op_product
|
||||
{
|
||||
@@ -265,7 +277,7 @@ void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const
|
||||
// FIXME not very good if rhs is real and lhs complex while alpha is real too
|
||||
const Index cols = dst.cols();
|
||||
for (Index j=0; j<cols; ++j)
|
||||
func(dst.col(j), rhsEval.coeff(0,j) * actual_lhs);
|
||||
func(dst.col(j), rhsEval.coeff(Index(0),j) * actual_lhs);
|
||||
}
|
||||
|
||||
// Row major result
|
||||
@@ -278,7 +290,7 @@ void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const
|
||||
// FIXME not very good if lhs is real and rhs complex while alpha is real too
|
||||
const Index rows = dst.rows();
|
||||
for (Index i=0; i<rows; ++i)
|
||||
func(dst.row(i), lhsEval.coeff(i,0) * actual_rhs);
|
||||
func(dst.row(i), lhsEval.coeff(i,Index(0)) * actual_rhs);
|
||||
}
|
||||
|
||||
template<typename Lhs, typename Rhs>
|
||||
@@ -354,17 +366,21 @@ template<typename Lhs, typename Rhs>
|
||||
struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct>
|
||||
: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct> >
|
||||
{
|
||||
typedef typename nested_eval<Lhs,1>::type LhsNested;
|
||||
typedef typename nested_eval<Rhs,1>::type RhsNested;
|
||||
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
|
||||
enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
|
||||
typedef typename internal::conditional<int(Side)==OnTheRight,Lhs,Rhs>::type MatrixType;
|
||||
typedef typename internal::remove_all<typename internal::conditional<int(Side)==OnTheRight,LhsNested,RhsNested>::type>::type MatrixType;
|
||||
|
||||
template<typename Dest>
|
||||
static EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
||||
{
|
||||
LhsNested actual_lhs(lhs);
|
||||
RhsNested actual_rhs(rhs);
|
||||
internal::gemv_dense_selector<Side,
|
||||
(int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
|
||||
bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)
|
||||
>::run(lhs, rhs, dst, alpha);
|
||||
>::run(actual_lhs, actual_rhs, dst, alpha);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -437,6 +453,18 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
||||
EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost);
|
||||
EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::AddCost);
|
||||
EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
|
||||
#if 0
|
||||
std::cerr << "LhsOuterStrideBytes= " << LhsOuterStrideBytes << "\n";
|
||||
std::cerr << "RhsOuterStrideBytes= " << RhsOuterStrideBytes << "\n";
|
||||
std::cerr << "LhsAlignment= " << LhsAlignment << "\n";
|
||||
std::cerr << "RhsAlignment= " << RhsAlignment << "\n";
|
||||
std::cerr << "CanVectorizeLhs= " << CanVectorizeLhs << "\n";
|
||||
std::cerr << "CanVectorizeRhs= " << CanVectorizeRhs << "\n";
|
||||
std::cerr << "CanVectorizeInner= " << CanVectorizeInner << "\n";
|
||||
std::cerr << "EvalToRowMajor= " << EvalToRowMajor << "\n";
|
||||
std::cerr << "Alignment= " << Alignment << "\n";
|
||||
std::cerr << "Flags= " << Flags << "\n";
|
||||
#endif
|
||||
}
|
||||
|
||||
// Everything below here is taken from CoeffBasedProduct.h
|
||||
@@ -503,8 +531,8 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
||||
LhsOuterStrideBytes = int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)),
|
||||
RhsOuterStrideBytes = int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)),
|
||||
|
||||
Alignment = bool(CanVectorizeLhs) ? (LhsOuterStrideBytes<0 || (int(LhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,LhsAlignment))!=0 ? 0 : LhsAlignment)
|
||||
: bool(CanVectorizeRhs) ? (RhsOuterStrideBytes<0 || (int(RhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,RhsAlignment))!=0 ? 0 : RhsAlignment)
|
||||
Alignment = bool(CanVectorizeLhs) ? (LhsOuterStrideBytes<=0 || (int(LhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,LhsAlignment))!=0 ? 0 : LhsAlignment)
|
||||
: bool(CanVectorizeRhs) ? (RhsOuterStrideBytes<=0 || (int(RhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,RhsAlignment))!=0 ? 0 : RhsAlignment)
|
||||
: 0,
|
||||
|
||||
/* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
|
||||
@@ -555,8 +583,8 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
||||
}
|
||||
|
||||
protected:
|
||||
const LhsNested m_lhs;
|
||||
const RhsNested m_rhs;
|
||||
typename internal::add_const_on_value_type<LhsNested>::type m_lhs;
|
||||
typename internal::add_const_on_value_type<RhsNested>::type m_rhs;
|
||||
|
||||
LhsEtorType m_lhsImpl;
|
||||
RhsEtorType m_rhsImpl;
|
||||
@@ -590,7 +618,7 @@ struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, Load
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
|
||||
{
|
||||
etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
|
||||
res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex-1)), rhs.template packet<LoadMode,Packet>(UnrollingIndex-1, col), res);
|
||||
res = pmadd(pset1<Packet>(lhs.coeff(row, Index(UnrollingIndex-1))), rhs.template packet<LoadMode,Packet>(Index(UnrollingIndex-1), col), res);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -600,7 +628,7 @@ struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, Load
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
|
||||
{
|
||||
etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
|
||||
res = pmadd(lhs.template packet<LoadMode,Packet>(row, UnrollingIndex-1), pset1<Packet>(rhs.coeff(UnrollingIndex-1, col)), res);
|
||||
res = pmadd(lhs.template packet<LoadMode,Packet>(row, Index(UnrollingIndex-1)), pset1<Packet>(rhs.coeff(Index(UnrollingIndex-1), col)), res);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -609,7 +637,7 @@ struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
|
||||
{
|
||||
res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode,Packet>(0, col));
|
||||
res = pmul(pset1<Packet>(lhs.coeff(row, Index(0))),rhs.template packet<LoadMode,Packet>(Index(0), col));
|
||||
}
|
||||
};
|
||||
|
||||
@@ -618,7 +646,7 @@ struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
|
||||
{
|
||||
res = pmul(lhs.template packet<LoadMode,Packet>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
|
||||
res = pmul(lhs.template packet<LoadMode,Packet>(row, Index(0)), pset1<Packet>(rhs.coeff(Index(0), col)));
|
||||
}
|
||||
};
|
||||
|
||||
@@ -627,7 +655,7 @@ struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
|
||||
{
|
||||
res = pset1<Packet>(0);
|
||||
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
|
||||
}
|
||||
};
|
||||
|
||||
@@ -636,7 +664,7 @@ struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
|
||||
{
|
||||
res = pset1<Packet>(0);
|
||||
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
|
||||
}
|
||||
};
|
||||
|
||||
@@ -645,7 +673,7 @@ struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
|
||||
{
|
||||
res = pset1<Packet>(0);
|
||||
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
|
||||
for(Index i = 0; i < innerDim; ++i)
|
||||
res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode,Packet>(i, col), res);
|
||||
}
|
||||
@@ -656,7 +684,7 @@ struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
|
||||
{
|
||||
res = pset1<Packet>(0);
|
||||
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
|
||||
for(Index i = 0; i < innerDim; ++i)
|
||||
res = pmadd(lhs.template packet<LoadMode,Packet>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
|
||||
}
|
||||
|
||||
@@ -45,7 +45,7 @@ struct traits<SelfAdjointView<MatrixType, UpLo> > : traits<MatrixType>
|
||||
};
|
||||
}
|
||||
|
||||
// FIXME could also be called SelfAdjointWrapper to be consistent with DiagonalWrapper ??
|
||||
|
||||
template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
|
||||
: public TriangularBase<SelfAdjointView<_MatrixType, UpLo> >
|
||||
{
|
||||
@@ -60,10 +60,12 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
|
||||
/** \brief The type of coefficients in this matrix */
|
||||
typedef typename internal::traits<SelfAdjointView>::Scalar Scalar;
|
||||
typedef typename MatrixType::StorageIndex StorageIndex;
|
||||
typedef typename internal::remove_all<typename MatrixType::ConjugateReturnType>::type MatrixConjugateReturnType;
|
||||
|
||||
enum {
|
||||
Mode = internal::traits<SelfAdjointView>::Mode,
|
||||
Flags = internal::traits<SelfAdjointView>::Flags
|
||||
Flags = internal::traits<SelfAdjointView>::Flags,
|
||||
TransposeMode = ((Mode & Upper) ? Lower : 0) | ((Mode & Lower) ? Upper : 0)
|
||||
};
|
||||
typedef typename MatrixType::PlainObject PlainObject;
|
||||
|
||||
@@ -187,6 +189,36 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
|
||||
TriangularView<typename MatrixType::AdjointReturnType,TriMode> >::type(tmp2);
|
||||
}
|
||||
|
||||
typedef SelfAdjointView<const MatrixConjugateReturnType,Mode> ConjugateReturnType;
|
||||
/** \sa MatrixBase::conjugate() const */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const ConjugateReturnType conjugate() const
|
||||
{ return ConjugateReturnType(m_matrix.conjugate()); }
|
||||
|
||||
typedef SelfAdjointView<const typename MatrixType::AdjointReturnType,TransposeMode> AdjointReturnType;
|
||||
/** \sa MatrixBase::adjoint() const */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const AdjointReturnType adjoint() const
|
||||
{ return AdjointReturnType(m_matrix.adjoint()); }
|
||||
|
||||
typedef SelfAdjointView<typename MatrixType::TransposeReturnType,TransposeMode> TransposeReturnType;
|
||||
/** \sa MatrixBase::transpose() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline TransposeReturnType transpose()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
|
||||
typename MatrixType::TransposeReturnType tmp(m_matrix);
|
||||
return TransposeReturnType(tmp);
|
||||
}
|
||||
|
||||
typedef SelfAdjointView<const typename MatrixType::ConstTransposeReturnType,TransposeMode> ConstTransposeReturnType;
|
||||
/** \sa MatrixBase::transpose() const */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const ConstTransposeReturnType transpose() const
|
||||
{
|
||||
return ConstTransposeReturnType(m_matrix.transpose());
|
||||
}
|
||||
|
||||
/** \returns a const expression of the main diagonal of the matrix \c *this
|
||||
*
|
||||
* This method simply returns the diagonal of the nested expression, thus by-passing the SelfAdjointView decorator.
|
||||
@@ -287,6 +319,7 @@ public:
|
||||
* Implementation of MatrixBase methods
|
||||
***************************************************************************/
|
||||
|
||||
/** This is the const version of MatrixBase::selfadjointView() */
|
||||
template<typename Derived>
|
||||
template<unsigned int UpLo>
|
||||
typename MatrixBase<Derived>::template ConstSelfAdjointViewReturnType<UpLo>::Type
|
||||
@@ -295,6 +328,15 @@ MatrixBase<Derived>::selfadjointView() const
|
||||
return typename ConstSelfAdjointViewReturnType<UpLo>::Type(derived());
|
||||
}
|
||||
|
||||
/** \returns an expression of a symmetric/self-adjoint view extracted from the upper or lower triangular part of the current matrix
|
||||
*
|
||||
* The parameter \a UpLo can be either \c #Upper or \c #Lower
|
||||
*
|
||||
* Example: \include MatrixBase_selfadjointView.cpp
|
||||
* Output: \verbinclude MatrixBase_selfadjointView.out
|
||||
*
|
||||
* \sa class SelfAdjointView
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<unsigned int UpLo>
|
||||
typename MatrixBase<Derived>::template SelfAdjointViewReturnType<UpLo>::Type
|
||||
|
||||
@@ -15,7 +15,7 @@ namespace Eigen {
|
||||
// TODO generalize the scalar type of 'other'
|
||||
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(const Scalar& other)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(const Scalar& other)
|
||||
{
|
||||
typedef typename Derived::PlainObject PlainObject;
|
||||
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op<Scalar,Scalar>());
|
||||
@@ -23,7 +23,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(const Scalar& other)
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator+=(const Scalar& other)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator+=(const Scalar& other)
|
||||
{
|
||||
typedef typename Derived::PlainObject PlainObject;
|
||||
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op<Scalar,Scalar>());
|
||||
@@ -31,7 +31,7 @@ EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator+=(const Scalar& other)
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator-=(const Scalar& other)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator-=(const Scalar& other)
|
||||
{
|
||||
typedef typename Derived::PlainObject PlainObject;
|
||||
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op<Scalar,Scalar>());
|
||||
@@ -39,7 +39,7 @@ EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator-=(const Scalar& other)
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator/=(const Scalar& other)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator/=(const Scalar& other)
|
||||
{
|
||||
typedef typename Derived::PlainObject PlainObject;
|
||||
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op<Scalar,Scalar>());
|
||||
|
||||
@@ -34,12 +34,12 @@ template<typename Decomposition, typename RhsType,typename StorageKind> struct s
|
||||
template<typename Decomposition, typename RhsType>
|
||||
struct solve_traits<Decomposition,RhsType,Dense>
|
||||
{
|
||||
typedef Matrix<typename RhsType::Scalar,
|
||||
typedef typename make_proper_matrix_type<typename RhsType::Scalar,
|
||||
Decomposition::ColsAtCompileTime,
|
||||
RhsType::ColsAtCompileTime,
|
||||
RhsType::PlainObject::Options,
|
||||
Decomposition::MaxColsAtCompileTime,
|
||||
RhsType::MaxColsAtCompileTime> PlainObject;
|
||||
RhsType::MaxColsAtCompileTime>::type PlainObject;
|
||||
};
|
||||
|
||||
template<typename Decomposition, typename RhsType>
|
||||
@@ -139,7 +139,11 @@ struct Assignment<DstXprType, Solve<DecType,RhsType>, internal::assign_op<Scalar
|
||||
typedef Solve<DecType,RhsType> SrcXprType;
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
|
||||
{
|
||||
// FIXME shall we resize dst here?
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
|
||||
src.dec()._solve_impl(src.rhs(), dst);
|
||||
}
|
||||
};
|
||||
@@ -151,6 +155,11 @@ struct Assignment<DstXprType, Solve<Transpose<const DecType>,RhsType>, internal:
|
||||
typedef Solve<Transpose<const DecType>,RhsType> SrcXprType;
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
|
||||
{
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
|
||||
src.dec().nestedExpression().template _solve_impl_transposed<false>(src.rhs(), dst);
|
||||
}
|
||||
};
|
||||
@@ -163,6 +172,11 @@ struct Assignment<DstXprType, Solve<CwiseUnaryOp<internal::scalar_conjugate_op<t
|
||||
typedef Solve<CwiseUnaryOp<internal::scalar_conjugate_op<typename DecType::Scalar>, const Transpose<const DecType> >,RhsType> SrcXprType;
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
|
||||
{
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
|
||||
src.dec().nestedExpression().nestedExpression().template _solve_impl_transposed<true>(src.rhs(), dst);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -161,6 +161,7 @@ struct triangular_solver_selector<Lhs,Rhs,OnTheRight,Mode,CompleteUnrolling,1> {
|
||||
* TriangularView methods
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename MatrixType, unsigned int Mode>
|
||||
template<int Side, typename OtherDerived>
|
||||
void TriangularViewImpl<MatrixType,Mode,Dense>::solveInPlace(const MatrixBase<OtherDerived>& _other) const
|
||||
@@ -188,6 +189,7 @@ TriangularViewImpl<Derived,Mode,Dense>::solve(const MatrixBase<Other>& other) co
|
||||
{
|
||||
return internal::triangular_solve_retval<Side,TriangularViewType,Other>(derived(), other.derived());
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace internal {
|
||||
|
||||
|
||||
@@ -170,7 +170,8 @@ MatrixBase<Derived>::stableNorm() const
|
||||
enum {
|
||||
CanAlign = ( (int(DerivedCopyClean::Flags)&DirectAccessBit)
|
||||
|| (int(internal::evaluator<DerivedCopyClean>::Alignment)>0) // FIXME Alignment)>0 might not be enough
|
||||
) && (blockSize*sizeof(Scalar)*2<EIGEN_STACK_ALLOCATION_LIMIT) // ifwe cannot allocate on the stack, then let's not bother about this optimization
|
||||
) && (blockSize*sizeof(Scalar)*2<EIGEN_STACK_ALLOCATION_LIMIT)
|
||||
&& (EIGEN_MAX_STATIC_ALIGN_BYTES>0) // if we cannot allocate on the stack, then let's not bother about this optimization
|
||||
};
|
||||
typedef typename internal::conditional<CanAlign, Ref<const Matrix<Scalar,Dynamic,1,0,blockSize,1>, internal::evaluator<DerivedCopyClean>::Alignment>,
|
||||
typename DerivedCopyClean::ConstSegmentReturnType>::type SegmentWrapper;
|
||||
|
||||
@@ -78,6 +78,11 @@ template<typename MatrixType> class Transpose
|
||||
typename internal::remove_reference<MatrixTypeNested>::type&
|
||||
nestedExpression() { return m_matrix; }
|
||||
|
||||
/** \internal */
|
||||
void resize(Index nrows, Index ncols) {
|
||||
m_matrix.resize(ncols,nrows);
|
||||
}
|
||||
|
||||
protected:
|
||||
typename internal::ref_selector<MatrixType>::non_const_type m_matrix;
|
||||
};
|
||||
|
||||
@@ -470,6 +470,8 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_Mat
|
||||
* \a Side==OnTheLeft (the default), or the right-inverse-multiply \a other * inverse(\c *this) if
|
||||
* \a Side==OnTheRight.
|
||||
*
|
||||
* Note that the template parameter \c Side can be ommitted, in which case \c Side==OnTheLeft
|
||||
*
|
||||
* The matrix \c *this must be triangular and invertible (i.e., all the coefficients of the
|
||||
* diagonal must be non zero). It works as a forward (resp. backward) substitution if \c *this
|
||||
* is an upper (resp. lower) triangular matrix.
|
||||
@@ -495,6 +497,8 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_Mat
|
||||
* \warning The parameter is only marked 'const' to make the C++ compiler accept a temporary expression here.
|
||||
* This function will const_cast it, so constness isn't honored here.
|
||||
*
|
||||
* Note that the template parameter \c Side can be ommitted, in which case \c Side==OnTheLeft
|
||||
*
|
||||
* See TriangularView:solve() for the details.
|
||||
*/
|
||||
template<int Side, typename OtherDerived>
|
||||
@@ -539,13 +543,14 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_Mat
|
||||
|
||||
template<typename ProductType>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE TriangularViewType& _assignProduct(const ProductType& prod, const Scalar& alpha);
|
||||
EIGEN_STRONG_INLINE TriangularViewType& _assignProduct(const ProductType& prod, const Scalar& alpha, bool beta);
|
||||
};
|
||||
|
||||
/***************************************************************************
|
||||
* Implementation of triangular evaluation/assignment
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
// FIXME should we keep that possibility
|
||||
template<typename MatrixType, unsigned int Mode>
|
||||
template<typename OtherDerived>
|
||||
@@ -583,6 +588,7 @@ void TriangularViewImpl<MatrixType, Mode, Dense>::lazyAssign(const TriangularBas
|
||||
eigen_assert(Mode == int(OtherDerived::Mode));
|
||||
internal::call_assignment_no_alias(derived(), other.derived());
|
||||
}
|
||||
#endif
|
||||
|
||||
/***************************************************************************
|
||||
* Implementation of TriangularBase methods
|
||||
@@ -641,21 +647,20 @@ MatrixBase<Derived>::triangularView() const
|
||||
template<typename Derived>
|
||||
bool MatrixBase<Derived>::isUpperTriangular(const RealScalar& prec) const
|
||||
{
|
||||
using std::abs;
|
||||
RealScalar maxAbsOnUpperPart = static_cast<RealScalar>(-1);
|
||||
for(Index j = 0; j < cols(); ++j)
|
||||
{
|
||||
Index maxi = (std::min)(j, rows()-1);
|
||||
Index maxi = numext::mini(j, rows()-1);
|
||||
for(Index i = 0; i <= maxi; ++i)
|
||||
{
|
||||
RealScalar absValue = abs(coeff(i,j));
|
||||
RealScalar absValue = numext::abs(coeff(i,j));
|
||||
if(absValue > maxAbsOnUpperPart) maxAbsOnUpperPart = absValue;
|
||||
}
|
||||
}
|
||||
RealScalar threshold = maxAbsOnUpperPart * prec;
|
||||
for(Index j = 0; j < cols(); ++j)
|
||||
for(Index i = j+1; i < rows(); ++i)
|
||||
if(abs(coeff(i, j)) > threshold) return false;
|
||||
if(numext::abs(coeff(i, j)) > threshold) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -667,20 +672,19 @@ bool MatrixBase<Derived>::isUpperTriangular(const RealScalar& prec) const
|
||||
template<typename Derived>
|
||||
bool MatrixBase<Derived>::isLowerTriangular(const RealScalar& prec) const
|
||||
{
|
||||
using std::abs;
|
||||
RealScalar maxAbsOnLowerPart = static_cast<RealScalar>(-1);
|
||||
for(Index j = 0; j < cols(); ++j)
|
||||
for(Index i = j; i < rows(); ++i)
|
||||
{
|
||||
RealScalar absValue = abs(coeff(i,j));
|
||||
RealScalar absValue = numext::abs(coeff(i,j));
|
||||
if(absValue > maxAbsOnLowerPart) maxAbsOnLowerPart = absValue;
|
||||
}
|
||||
RealScalar threshold = maxAbsOnLowerPart * prec;
|
||||
for(Index j = 1; j < cols(); ++j)
|
||||
{
|
||||
Index maxi = (std::min)(j, rows()-1);
|
||||
Index maxi = numext::mini(j, rows()-1);
|
||||
for(Index i = 0; i < maxi; ++i)
|
||||
if(abs(coeff(i, j)) > threshold) return false;
|
||||
if(numext::abs(coeff(i, j)) > threshold) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@@ -777,15 +781,18 @@ public:
|
||||
|
||||
template<int Mode, bool SetOpposite, typename DstXprType, typename SrcXprType, typename Functor>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func)
|
||||
void call_triangular_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor &func)
|
||||
{
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||
|
||||
typedef evaluator<DstXprType> DstEvaluatorType;
|
||||
typedef evaluator<SrcXprType> SrcEvaluatorType;
|
||||
|
||||
DstEvaluatorType dstEvaluator(dst);
|
||||
SrcEvaluatorType srcEvaluator(src);
|
||||
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
DstEvaluatorType dstEvaluator(dst);
|
||||
|
||||
typedef triangular_dense_assignment_kernel< Mode&(Lower|Upper),Mode&(UnitDiag|ZeroDiag|SelfAdjoint),SetOpposite,
|
||||
DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;
|
||||
@@ -802,7 +809,7 @@ void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& sr
|
||||
|
||||
template<int Mode, bool SetOpposite, typename DstXprType, typename SrcXprType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& src)
|
||||
void call_triangular_assignment_loop(DstXprType& dst, const SrcXprType& src)
|
||||
{
|
||||
call_triangular_assignment_loop<Mode,SetOpposite>(dst, src, internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar>());
|
||||
}
|
||||
@@ -893,7 +900,7 @@ struct triangular_assignment_loop<Kernel, Mode, Dynamic, SetOpposite>
|
||||
{
|
||||
for(Index j = 0; j < kernel.cols(); ++j)
|
||||
{
|
||||
Index maxi = (std::min)(j, kernel.rows());
|
||||
Index maxi = numext::mini(j, kernel.rows());
|
||||
Index i = 0;
|
||||
if (((Mode&Lower) && SetOpposite) || (Mode&Upper))
|
||||
{
|
||||
@@ -938,8 +945,12 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::assign_
|
||||
typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,typename SrcXprType::Scalar> &)
|
||||
{
|
||||
dst.setZero();
|
||||
dst._assignProduct(src, 1);
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
|
||||
dst._assignProduct(src, 1, 0);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -950,7 +961,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::add_ass
|
||||
typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,typename SrcXprType::Scalar> &)
|
||||
{
|
||||
dst._assignProduct(src, 1);
|
||||
dst._assignProduct(src, 1, 1);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -961,7 +972,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::sub_ass
|
||||
typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,typename SrcXprType::Scalar> &)
|
||||
{
|
||||
dst._assignProduct(src, -1);
|
||||
dst._assignProduct(src, -1, 1);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -602,7 +602,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
return m_matrix / extendedTo(other.derived());
|
||||
}
|
||||
|
||||
/** \returns an expression where each column of row of the referenced matrix are normalized.
|
||||
/** \returns an expression where each column (or row) of the referenced matrix are normalized.
|
||||
* The referenced matrix is \b not modified.
|
||||
* \sa MatrixBase::normalized(), normalize()
|
||||
*/
|
||||
@@ -625,6 +625,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
/////////// Geometry module ///////////
|
||||
|
||||
typedef Homogeneous<ExpressionType,Direction> HomogeneousReturnType;
|
||||
EIGEN_DEVICE_FUNC
|
||||
HomogeneousReturnType homogeneous() const;
|
||||
|
||||
typedef typename ExpressionType::PlainObject CrossReturnType;
|
||||
@@ -654,6 +655,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
Direction==Horizontal ? HNormalized_SizeMinusOne : 1> >
|
||||
HNormalizedReturnType;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const HNormalizedReturnType hnormalized() const;
|
||||
|
||||
protected:
|
||||
|
||||
@@ -194,7 +194,8 @@ struct functor_traits<max_coeff_visitor<Scalar> > {
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/** \returns the minimum of all coefficients of *this and puts in *row and *col its location.
|
||||
/** \fn DenseBase<Derived>::minCoeff(IndexType* rowId, IndexType* colId) const
|
||||
* \returns the minimum of all coefficients of *this and puts in *row and *col its location.
|
||||
* \warning the result is undefined if \c *this contains NaN.
|
||||
*
|
||||
* \sa DenseBase::minCoeff(Index*), DenseBase::maxCoeff(Index*,Index*), DenseBase::visit(), DenseBase::minCoeff()
|
||||
@@ -230,7 +231,8 @@ DenseBase<Derived>::minCoeff(IndexType* index) const
|
||||
return minVisitor.res;
|
||||
}
|
||||
|
||||
/** \returns the maximum of all coefficients of *this and puts in *row and *col its location.
|
||||
/** \fn DenseBase<Derived>::maxCoeff(IndexType* rowId, IndexType* colId) const
|
||||
* \returns the maximum of all coefficients of *this and puts in *row and *col its location.
|
||||
* \warning the result is undefined if \c *this contains NaN.
|
||||
*
|
||||
* \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visit(), DenseBase::maxCoeff()
|
||||
|
||||
@@ -456,6 +456,26 @@ ptranspose(PacketBlock<Packet2cd,2>& kernel) {
|
||||
kernel.packet[0].v = tmp;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf pinsertfirst(const Packet4cf& a, std::complex<float> b)
|
||||
{
|
||||
return Packet4cf(_mm256_blend_ps(a.v,pset1<Packet4cf>(b).v,1|2));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd pinsertfirst(const Packet2cd& a, std::complex<double> b)
|
||||
{
|
||||
return Packet2cd(_mm256_blend_pd(a.v,pset1<Packet2cd>(b).v,1|2));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf pinsertlast(const Packet4cf& a, std::complex<float> b)
|
||||
{
|
||||
return Packet4cf(_mm256_blend_ps(a.v,pset1<Packet4cf>(b).v,(1<<7)|(1<<6)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd pinsertlast(const Packet2cd& a, std::complex<double> b)
|
||||
{
|
||||
return Packet2cd(_mm256_blend_pd(a.v,pset1<Packet2cd>(b).v,(1<<3)|(1<<2)));
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
@@ -355,30 +355,27 @@ pexp<Packet4d>(const Packet4d& _x) {
|
||||
// Functions for sqrt.
|
||||
// The EIGEN_FAST_MATH version uses the _mm_rsqrt_ps approximation and one step
|
||||
// of Newton's method, at a cost of 1-2 bits of precision as opposed to the
|
||||
// exact solution. The main advantage of this approach is not just speed, but
|
||||
// also the fact that it can be inlined and pipelined with other computations,
|
||||
// further reducing its effective latency.
|
||||
// exact solution. It does not handle +inf, or denormalized numbers correctly.
|
||||
// The main advantage of this approach is not just speed, but also the fact that
|
||||
// it can be inlined and pipelined with other computations, further reducing its
|
||||
// effective latency. This is similar to Quake3's fast inverse square root.
|
||||
// For detail see here: http://www.beyond3d.com/content/articles/8/
|
||||
#if EIGEN_FAST_MATH
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
|
||||
psqrt<Packet8f>(const Packet8f& _x) {
|
||||
_EIGEN_DECLARE_CONST_Packet8f(one_point_five, 1.5f);
|
||||
_EIGEN_DECLARE_CONST_Packet8f(minus_half, -0.5f);
|
||||
_EIGEN_DECLARE_CONST_Packet8f_FROM_INT(flt_min, 0x00800000);
|
||||
|
||||
Packet8f neg_half = pmul(_x, p8f_minus_half);
|
||||
|
||||
// select only the inverse sqrt of positive normal inputs (denormals are
|
||||
// flushed to zero and cause infs as well).
|
||||
Packet8f non_zero_mask = _mm256_cmp_ps(_x, p8f_flt_min, _CMP_GE_OQ);
|
||||
Packet8f x = _mm256_and_ps(non_zero_mask, _mm256_rsqrt_ps(_x));
|
||||
Packet8f half = pmul(_x, pset1<Packet8f>(.5f));
|
||||
Packet8f denormal_mask = _mm256_and_ps(
|
||||
_mm256_cmp_ps(_x, pset1<Packet8f>((std::numeric_limits<float>::min)()),
|
||||
_CMP_LT_OQ),
|
||||
_mm256_cmp_ps(_x, _mm256_setzero_ps(), _CMP_GE_OQ));
|
||||
|
||||
// Compute approximate reciprocal sqrt.
|
||||
Packet8f x = _mm256_rsqrt_ps(_x);
|
||||
// Do a single step of Newton's iteration.
|
||||
x = pmul(x, pmadd(neg_half, pmul(x, x), p8f_one_point_five));
|
||||
|
||||
// Multiply the original _x by it's reciprocal square root to extract the
|
||||
// square root.
|
||||
return pmul(_x, x);
|
||||
x = pmul(x, psub(pset1<Packet8f>(1.5f), pmul(half, pmul(x,x))));
|
||||
// Flush results for denormals to zero.
|
||||
return _mm256_andnot_ps(denormal_mask, pmul(_x,x));
|
||||
}
|
||||
#else
|
||||
template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
|
||||
@@ -48,7 +48,9 @@ template<> struct is_arithmetic<__m256d> { enum { value = true }; };
|
||||
#define _EIGEN_DECLARE_CONST_Packet8i(NAME,X) \
|
||||
const Packet8i p8i_##NAME = pset1<Packet8i>(X)
|
||||
|
||||
|
||||
// Use the packet_traits defined in AVX512/PacketMath.h instead if we're going
|
||||
// to leverage AVX512 instructions.
|
||||
#ifndef EIGEN_VECTORIZE_AVX512
|
||||
template<> struct packet_traits<float> : default_packet_traits
|
||||
{
|
||||
typedef Packet8f type;
|
||||
@@ -93,6 +95,7 @@ template<> struct packet_traits<double> : default_packet_traits
|
||||
HasCeil = 1
|
||||
};
|
||||
};
|
||||
#endif
|
||||
|
||||
template<> struct scalar_div_cost<float,true> { enum { value = 14 }; };
|
||||
template<> struct scalar_div_cost<double,true> { enum { value = 16 }; };
|
||||
@@ -304,9 +307,11 @@ template<> EIGEN_STRONG_INLINE void pstore1<Packet8i>(int* to, const int& a)
|
||||
pstore(to, pa);
|
||||
}
|
||||
|
||||
#ifndef EIGEN_VECTORIZE_AVX512
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
#endif
|
||||
|
||||
template<> EIGEN_STRONG_INLINE float pfirst<Packet8f>(const Packet8f& a) {
|
||||
return _mm_cvtss_f32(_mm256_castps256_ps128(a));
|
||||
@@ -390,17 +395,14 @@ template<> EIGEN_STRONG_INLINE Packet4d preduxp<Packet4d>(const Packet4d* vecs)
|
||||
|
||||
template<> EIGEN_STRONG_INLINE float predux<Packet8f>(const Packet8f& a)
|
||||
{
|
||||
Packet8f tmp0 = _mm256_hadd_ps(a,_mm256_permute2f128_ps(a,a,1));
|
||||
tmp0 = _mm256_hadd_ps(tmp0,tmp0);
|
||||
return pfirst(_mm256_hadd_ps(tmp0, tmp0));
|
||||
return predux(Packet4f(_mm_add_ps(_mm256_castps256_ps128(a),_mm256_extractf128_ps(a,1))));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE double predux<Packet4d>(const Packet4d& a)
|
||||
{
|
||||
Packet4d tmp0 = _mm256_hadd_pd(a,_mm256_permute2f128_pd(a,a,1));
|
||||
return pfirst(_mm256_hadd_pd(tmp0,tmp0));
|
||||
return predux(Packet2d(_mm_add_pd(_mm256_castpd256_pd128(a),_mm256_extractf128_pd(a,1))));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f predux4<Packet8f>(const Packet8f& a)
|
||||
template<> EIGEN_STRONG_INLINE Packet4f predux_downto4<Packet8f>(const Packet8f& a)
|
||||
{
|
||||
return _mm_add_ps(_mm256_castps256_ps128(a),_mm256_extractf128_ps(a,1));
|
||||
}
|
||||
@@ -604,6 +606,26 @@ template<> EIGEN_STRONG_INLINE Packet4d pblend(const Selector<4>& ifPacket, cons
|
||||
return _mm256_blendv_pd(thenPacket, elsePacket, false_mask);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pinsertfirst(const Packet8f& a, float b)
|
||||
{
|
||||
return _mm256_blend_ps(a,pset1<Packet8f>(b),1);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pinsertfirst(const Packet4d& a, double b)
|
||||
{
|
||||
return _mm256_blend_pd(a,pset1<Packet4d>(b),1);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pinsertlast(const Packet8f& a, float b)
|
||||
{
|
||||
return _mm256_blend_ps(a,pset1<Packet8f>(b),(1<<7));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pinsertlast(const Packet4d& a, double b)
|
||||
{
|
||||
return _mm256_blend_pd(a,pset1<Packet4d>(b),(1<<3));
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
396
Eigen/src/Core/arch/AVX512/MathFunctions.h
Normal file
396
Eigen/src/Core/arch/AVX512/MathFunctions.h
Normal file
@@ -0,0 +1,396 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2016 Pedro Gonnet (pedro.gonnet@gmail.com)
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef THIRD_PARTY_EIGEN3_EIGEN_SRC_CORE_ARCH_AVX512_MATHFUNCTIONS_H_
|
||||
#define THIRD_PARTY_EIGEN3_EIGEN_SRC_CORE_ARCH_AVX512_MATHFUNCTIONS_H_
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
// Disable the code for older versions of gcc that don't support many of the required avx512 instrinsics.
|
||||
#if EIGEN_GNUC_AT_LEAST(5, 3)
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet16f(NAME, X) \
|
||||
const Packet16f p16f_##NAME = pset1<Packet16f>(X)
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(NAME, X) \
|
||||
const Packet16f p16f_##NAME = (__m512)pset1<Packet16i>(X)
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet8d(NAME, X) \
|
||||
const Packet8d p8d_##NAME = pset1<Packet8d>(X)
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(NAME, X) \
|
||||
const Packet8d p8d_##NAME = _mm512_castsi512_pd(_mm512_set1_epi64(X))
|
||||
|
||||
// Natural logarithm
|
||||
// Computes log(x) as log(2^e * m) = C*e + log(m), where the constant C =log(2)
|
||||
// and m is in the range [sqrt(1/2),sqrt(2)). In this range, the logarithm can
|
||||
// be easily approximated by a polynomial centered on m=1 for stability.
|
||||
#if defined(EIGEN_VECTORIZE_AVX512DQ)
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
|
||||
plog<Packet16f>(const Packet16f& _x) {
|
||||
Packet16f x = _x;
|
||||
_EIGEN_DECLARE_CONST_Packet16f(1, 1.0f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(half, 0.5f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(126f, 126.0f);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet16f_FROM_INT(inv_mant_mask, ~0x7f800000);
|
||||
|
||||
// The smallest non denormalized float number.
|
||||
_EIGEN_DECLARE_CONST_Packet16f_FROM_INT(min_norm_pos, 0x00800000);
|
||||
_EIGEN_DECLARE_CONST_Packet16f_FROM_INT(minus_inf, 0xff800000);
|
||||
_EIGEN_DECLARE_CONST_Packet16f_FROM_INT(nan, 0x7fc00000);
|
||||
|
||||
// Polynomial coefficients.
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_SQRTHF, 0.707106781186547524f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_p0, 7.0376836292E-2f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_p1, -1.1514610310E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_p2, 1.1676998740E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_p3, -1.2420140846E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_p4, +1.4249322787E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_p5, -1.6668057665E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_p6, +2.0000714765E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_p7, -2.4999993993E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_p8, +3.3333331174E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_q1, -2.12194440e-4f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_q2, 0.693359375f);
|
||||
|
||||
// invalid_mask is set to true when x is NaN
|
||||
__mmask16 invalid_mask =
|
||||
_mm512_cmp_ps_mask(x, _mm512_setzero_ps(), _CMP_NGE_UQ);
|
||||
__mmask16 iszero_mask =
|
||||
_mm512_cmp_ps_mask(x, _mm512_setzero_ps(), _CMP_EQ_UQ);
|
||||
|
||||
// Truncate input values to the minimum positive normal.
|
||||
x = pmax(x, p16f_min_norm_pos);
|
||||
|
||||
// Extract the shifted exponents.
|
||||
Packet16f emm0 = _mm512_cvtepi32_ps(_mm512_srli_epi32((__m512i)x, 23));
|
||||
Packet16f e = _mm512_sub_ps(emm0, p16f_126f);
|
||||
|
||||
// Set the exponents to -1, i.e. x are in the range [0.5,1).
|
||||
x = _mm512_and_ps(x, p16f_inv_mant_mask);
|
||||
x = _mm512_or_ps(x, p16f_half);
|
||||
|
||||
// part2: Shift the inputs from the range [0.5,1) to [sqrt(1/2),sqrt(2))
|
||||
// and shift by -1. The values are then centered around 0, which improves
|
||||
// the stability of the polynomial evaluation.
|
||||
// if( x < SQRTHF ) {
|
||||
// e -= 1;
|
||||
// x = x + x - 1.0;
|
||||
// } else { x = x - 1.0; }
|
||||
__mmask16 mask = _mm512_cmp_ps_mask(x, p16f_cephes_SQRTHF, _CMP_LT_OQ);
|
||||
Packet16f tmp = _mm512_mask_blend_ps(mask, x, _mm512_setzero_ps());
|
||||
x = psub(x, p16f_1);
|
||||
e = psub(e, _mm512_mask_blend_ps(mask, p16f_1, _mm512_setzero_ps()));
|
||||
x = padd(x, tmp);
|
||||
|
||||
Packet16f x2 = pmul(x, x);
|
||||
Packet16f x3 = pmul(x2, x);
|
||||
|
||||
// Evaluate the polynomial approximant of degree 8 in three parts, probably
|
||||
// to improve instruction-level parallelism.
|
||||
Packet16f y, y1, y2;
|
||||
y = pmadd(p16f_cephes_log_p0, x, p16f_cephes_log_p1);
|
||||
y1 = pmadd(p16f_cephes_log_p3, x, p16f_cephes_log_p4);
|
||||
y2 = pmadd(p16f_cephes_log_p6, x, p16f_cephes_log_p7);
|
||||
y = pmadd(y, x, p16f_cephes_log_p2);
|
||||
y1 = pmadd(y1, x, p16f_cephes_log_p5);
|
||||
y2 = pmadd(y2, x, p16f_cephes_log_p8);
|
||||
y = pmadd(y, x3, y1);
|
||||
y = pmadd(y, x3, y2);
|
||||
y = pmul(y, x3);
|
||||
|
||||
// Add the logarithm of the exponent back to the result of the interpolation.
|
||||
y1 = pmul(e, p16f_cephes_log_q1);
|
||||
tmp = pmul(x2, p16f_half);
|
||||
y = padd(y, y1);
|
||||
x = psub(x, tmp);
|
||||
y2 = pmul(e, p16f_cephes_log_q2);
|
||||
x = padd(x, y);
|
||||
x = padd(x, y2);
|
||||
|
||||
// Filter out invalid inputs, i.e. negative arg will be NAN, 0 will be -INF.
|
||||
return _mm512_mask_blend_ps(iszero_mask, p16f_minus_inf,
|
||||
_mm512_mask_blend_ps(invalid_mask, p16f_nan, x));
|
||||
}
|
||||
#endif
|
||||
|
||||
// Exponential function. Works by writing "x = m*log(2) + r" where
|
||||
// "m = floor(x/log(2)+1/2)" and "r" is the remainder. The result is then
|
||||
// "exp(x) = 2^m*exp(r)" where exp(r) is in the range [-1,1).
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
|
||||
pexp<Packet16f>(const Packet16f& _x) {
|
||||
_EIGEN_DECLARE_CONST_Packet16f(1, 1.0f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(half, 0.5f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(127, 127.0f);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet16f(exp_hi, 88.3762626647950f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(exp_lo, -88.3762626647949f);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_LOG2EF, 1.44269504088896341f);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p0, 1.9875691500E-4f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p1, 1.3981999507E-3f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p2, 8.3334519073E-3f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p3, 4.1665795894E-2f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p4, 1.6666665459E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p5, 5.0000001201E-1f);
|
||||
|
||||
// Clamp x.
|
||||
Packet16f x = pmax(pmin(_x, p16f_exp_hi), p16f_exp_lo);
|
||||
|
||||
// Express exp(x) as exp(m*ln(2) + r), start by extracting
|
||||
// m = floor(x/ln(2) + 0.5).
|
||||
Packet16f m = _mm512_floor_ps(pmadd(x, p16f_cephes_LOG2EF, p16f_half));
|
||||
|
||||
// Get r = x - m*ln(2). Note that we can do this without losing more than one
|
||||
// ulp precision due to the FMA instruction.
|
||||
_EIGEN_DECLARE_CONST_Packet16f(nln2, -0.6931471805599453f);
|
||||
Packet16f r = _mm512_fmadd_ps(m, p16f_nln2, x);
|
||||
Packet16f r2 = pmul(r, r);
|
||||
|
||||
// TODO(gonnet): Split into odd/even polynomials and try to exploit
|
||||
// instruction-level parallelism.
|
||||
Packet16f y = p16f_cephes_exp_p0;
|
||||
y = pmadd(y, r, p16f_cephes_exp_p1);
|
||||
y = pmadd(y, r, p16f_cephes_exp_p2);
|
||||
y = pmadd(y, r, p16f_cephes_exp_p3);
|
||||
y = pmadd(y, r, p16f_cephes_exp_p4);
|
||||
y = pmadd(y, r, p16f_cephes_exp_p5);
|
||||
y = pmadd(y, r2, r);
|
||||
y = padd(y, p16f_1);
|
||||
|
||||
// Build emm0 = 2^m.
|
||||
Packet16i emm0 = _mm512_cvttps_epi32(padd(m, p16f_127));
|
||||
emm0 = _mm512_slli_epi32(emm0, 23);
|
||||
|
||||
// Return 2^m * exp(r).
|
||||
return pmax(pmul(y, _mm512_castsi512_ps(emm0)), _x);
|
||||
}
|
||||
|
||||
/*template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
|
||||
pexp<Packet8d>(const Packet8d& _x) {
|
||||
Packet8d x = _x;
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet8d(1, 1.0);
|
||||
_EIGEN_DECLARE_CONST_Packet8d(2, 2.0);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet8d(exp_hi, 709.437);
|
||||
_EIGEN_DECLARE_CONST_Packet8d(exp_lo, -709.436139303);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet8d(cephes_LOG2EF, 1.4426950408889634073599);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet8d(cephes_exp_p0, 1.26177193074810590878e-4);
|
||||
_EIGEN_DECLARE_CONST_Packet8d(cephes_exp_p1, 3.02994407707441961300e-2);
|
||||
_EIGEN_DECLARE_CONST_Packet8d(cephes_exp_p2, 9.99999999999999999910e-1);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet8d(cephes_exp_q0, 3.00198505138664455042e-6);
|
||||
_EIGEN_DECLARE_CONST_Packet8d(cephes_exp_q1, 2.52448340349684104192e-3);
|
||||
_EIGEN_DECLARE_CONST_Packet8d(cephes_exp_q2, 2.27265548208155028766e-1);
|
||||
_EIGEN_DECLARE_CONST_Packet8d(cephes_exp_q3, 2.00000000000000000009e0);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet8d(cephes_exp_C1, 0.693145751953125);
|
||||
_EIGEN_DECLARE_CONST_Packet8d(cephes_exp_C2, 1.42860682030941723212e-6);
|
||||
|
||||
// clamp x
|
||||
x = pmax(pmin(x, p8d_exp_hi), p8d_exp_lo);
|
||||
|
||||
// Express exp(x) as exp(g + n*log(2)).
|
||||
const Packet8d n =
|
||||
_mm512_mul_round_pd(p8d_cephes_LOG2EF, x, _MM_FROUND_TO_NEAREST_INT);
|
||||
|
||||
// Get the remainder modulo log(2), i.e. the "g" described above. Subtract
|
||||
// n*log(2) out in two steps, i.e. n*C1 + n*C2, C1+C2=log2 to get the last
|
||||
// digits right.
|
||||
const Packet8d nC1 = pmul(n, p8d_cephes_exp_C1);
|
||||
const Packet8d nC2 = pmul(n, p8d_cephes_exp_C2);
|
||||
x = psub(x, nC1);
|
||||
x = psub(x, nC2);
|
||||
|
||||
const Packet8d x2 = pmul(x, x);
|
||||
|
||||
// Evaluate the numerator polynomial of the rational interpolant.
|
||||
Packet8d px = p8d_cephes_exp_p0;
|
||||
px = pmadd(px, x2, p8d_cephes_exp_p1);
|
||||
px = pmadd(px, x2, p8d_cephes_exp_p2);
|
||||
px = pmul(px, x);
|
||||
|
||||
// Evaluate the denominator polynomial of the rational interpolant.
|
||||
Packet8d qx = p8d_cephes_exp_q0;
|
||||
qx = pmadd(qx, x2, p8d_cephes_exp_q1);
|
||||
qx = pmadd(qx, x2, p8d_cephes_exp_q2);
|
||||
qx = pmadd(qx, x2, p8d_cephes_exp_q3);
|
||||
|
||||
// I don't really get this bit, copied from the SSE2 routines, so...
|
||||
// TODO(gonnet): Figure out what is going on here, perhaps find a better
|
||||
// rational interpolant?
|
||||
x = _mm512_div_pd(px, psub(qx, px));
|
||||
x = pmadd(p8d_2, x, p8d_1);
|
||||
|
||||
// Build e=2^n.
|
||||
const Packet8d e = _mm512_castsi512_pd(_mm512_slli_epi64(
|
||||
_mm512_add_epi64(_mm512_cvtpd_epi64(n), _mm512_set1_epi64(1023)), 52));
|
||||
|
||||
// Construct the result 2^n * exp(g) = e * x. The max is used to catch
|
||||
// non-finite values in the input.
|
||||
return pmax(pmul(x, e), _x);
|
||||
}*/
|
||||
|
||||
// Functions for sqrt.
|
||||
// The EIGEN_FAST_MATH version uses the _mm_rsqrt_ps approximation and one step
|
||||
// of Newton's method, at a cost of 1-2 bits of precision as opposed to the
|
||||
// exact solution. The main advantage of this approach is not just speed, but
|
||||
// also the fact that it can be inlined and pipelined with other computations,
|
||||
// further reducing its effective latency.
|
||||
#if EIGEN_FAST_MATH
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
|
||||
psqrt<Packet16f>(const Packet16f& _x) {
|
||||
_EIGEN_DECLARE_CONST_Packet16f(one_point_five, 1.5f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(minus_half, -0.5f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f_FROM_INT(flt_min, 0x00800000);
|
||||
|
||||
Packet16f neg_half = pmul(_x, p16f_minus_half);
|
||||
|
||||
// select only the inverse sqrt of positive normal inputs (denormals are
|
||||
// flushed to zero and cause infs as well).
|
||||
__mmask16 non_zero_mask = _mm512_cmp_ps_mask(_x, p16f_flt_min, _CMP_GE_OQ);
|
||||
Packet16f x = _mm512_mask_blend_ps(non_zero_mask, _mm512_rsqrt14_ps(_x),
|
||||
_mm512_setzero_ps());
|
||||
|
||||
// Do a single step of Newton's iteration.
|
||||
x = pmul(x, pmadd(neg_half, pmul(x, x), p16f_one_point_five));
|
||||
|
||||
// Multiply the original _x by it's reciprocal square root to extract the
|
||||
// square root.
|
||||
return pmul(_x, x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
|
||||
psqrt<Packet8d>(const Packet8d& _x) {
|
||||
_EIGEN_DECLARE_CONST_Packet8d(one_point_five, 1.5);
|
||||
_EIGEN_DECLARE_CONST_Packet8d(minus_half, -0.5);
|
||||
_EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(dbl_min, 0x0010000000000000LL);
|
||||
|
||||
Packet8d neg_half = pmul(_x, p8d_minus_half);
|
||||
|
||||
// select only the inverse sqrt of positive normal inputs (denormals are
|
||||
// flushed to zero and cause infs as well).
|
||||
__mmask8 non_zero_mask = _mm512_cmp_pd_mask(_x, p8d_dbl_min, _CMP_GE_OQ);
|
||||
Packet8d x = _mm512_mask_blend_pd(non_zero_mask, _mm512_rsqrt14_pd(_x),
|
||||
_mm512_setzero_pd());
|
||||
|
||||
// Do a first step of Newton's iteration.
|
||||
x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
|
||||
|
||||
// Do a second step of Newton's iteration.
|
||||
x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
|
||||
|
||||
// Multiply the original _x by it's reciprocal square root to extract the
|
||||
// square root.
|
||||
return pmul(_x, x);
|
||||
}
|
||||
#else
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16f psqrt<Packet16f>(const Packet16f& x) {
|
||||
return _mm512_sqrt_ps(x);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8d psqrt<Packet8d>(const Packet8d& x) {
|
||||
return _mm512_sqrt_pd(x);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Functions for rsqrt.
|
||||
// Almost identical to the sqrt routine, just leave out the last multiplication
|
||||
// and fill in NaN/Inf where needed. Note that this function only exists as an
|
||||
// iterative version for doubles since there is no instruction for diretly
|
||||
// computing the reciprocal square root in AVX-512.
|
||||
#ifdef EIGEN_FAST_MATH
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
|
||||
prsqrt<Packet16f>(const Packet16f& _x) {
|
||||
_EIGEN_DECLARE_CONST_Packet16f_FROM_INT(inf, 0x7f800000);
|
||||
_EIGEN_DECLARE_CONST_Packet16f_FROM_INT(nan, 0x7fc00000);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(one_point_five, 1.5f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(minus_half, -0.5f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f_FROM_INT(flt_min, 0x00800000);
|
||||
|
||||
Packet16f neg_half = pmul(_x, p16f_minus_half);
|
||||
|
||||
// select only the inverse sqrt of positive normal inputs (denormals are
|
||||
// flushed to zero and cause infs as well).
|
||||
__mmask16 le_zero_mask = _mm512_cmp_ps_mask(_x, p16f_flt_min, _CMP_LT_OQ);
|
||||
Packet16f x = _mm512_mask_blend_ps(le_zero_mask, _mm512_setzero_ps(),
|
||||
_mm512_rsqrt14_ps(_x));
|
||||
|
||||
// Fill in NaNs and Infs for the negative/zero entries.
|
||||
__mmask16 neg_mask = _mm512_cmp_ps_mask(_x, _mm512_setzero_ps(), _CMP_LT_OQ);
|
||||
Packet16f infs_and_nans = _mm512_mask_blend_ps(
|
||||
neg_mask, p16f_nan,
|
||||
_mm512_mask_blend_ps(le_zero_mask, p16f_inf, _mm512_setzero_ps()));
|
||||
|
||||
// Do a single step of Newton's iteration.
|
||||
x = pmul(x, pmadd(neg_half, pmul(x, x), p16f_one_point_five));
|
||||
|
||||
// Insert NaNs and Infs in all the right places.
|
||||
return _mm512_mask_blend_ps(le_zero_mask, infs_and_nans, x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
|
||||
prsqrt<Packet8d>(const Packet8d& _x) {
|
||||
_EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(inf, 0x7ff0000000000000LL);
|
||||
_EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(nan, 0x7ff1000000000000LL);
|
||||
_EIGEN_DECLARE_CONST_Packet8d(one_point_five, 1.5);
|
||||
_EIGEN_DECLARE_CONST_Packet8d(minus_half, -0.5);
|
||||
_EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(dbl_min, 0x0010000000000000LL);
|
||||
|
||||
Packet8d neg_half = pmul(_x, p8d_minus_half);
|
||||
|
||||
// select only the inverse sqrt of positive normal inputs (denormals are
|
||||
// flushed to zero and cause infs as well).
|
||||
__mmask8 le_zero_mask = _mm512_cmp_pd_mask(_x, p8d_dbl_min, _CMP_LT_OQ);
|
||||
Packet8d x = _mm512_mask_blend_pd(le_zero_mask, _mm512_setzero_pd(),
|
||||
_mm512_rsqrt14_pd(_x));
|
||||
|
||||
// Fill in NaNs and Infs for the negative/zero entries.
|
||||
__mmask8 neg_mask = _mm512_cmp_pd_mask(_x, _mm512_setzero_pd(), _CMP_LT_OQ);
|
||||
Packet8d infs_and_nans = _mm512_mask_blend_pd(
|
||||
neg_mask, p8d_nan,
|
||||
_mm512_mask_blend_pd(le_zero_mask, p8d_inf, _mm512_setzero_pd()));
|
||||
|
||||
// Do a first step of Newton's iteration.
|
||||
x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
|
||||
|
||||
// Do a second step of Newton's iteration.
|
||||
x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
|
||||
|
||||
// Insert NaNs and Infs in all the right places.
|
||||
return _mm512_mask_blend_pd(le_zero_mask, infs_and_nans, x);
|
||||
}
|
||||
#else
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16f prsqrt<Packet16f>(const Packet16f& x) {
|
||||
return _mm512_rsqrt28_ps(x);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // THIRD_PARTY_EIGEN3_EIGEN_SRC_CORE_ARCH_AVX512_MATHFUNCTIONS_H_
|
||||
1316
Eigen/src/Core/arch/AVX512/PacketMath.h
Normal file
1316
Eigen/src/Core/arch/AVX512/PacketMath.h
Normal file
File diff suppressed because it is too large
Load Diff
@@ -15,14 +15,14 @@ namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
static Packet4ui p4ui_CONJ_XOR = vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_ZERO_);//{ 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
|
||||
static Packet4ui p4ui_CONJ_XOR = vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_MZERO);//{ 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
|
||||
#ifdef __VSX__
|
||||
#if defined(_BIG_ENDIAN)
|
||||
static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2d_ZERO_, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
|
||||
static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_ZERO_, 8);//{ 0x8000000000000000, 0x0000000000000000 };
|
||||
static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2d_MZERO, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
|
||||
static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_MZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
|
||||
#else
|
||||
static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_ZERO_, 8);//{ 0x8000000000000000, 0x0000000000000000 };
|
||||
static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2d_ZERO_, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
|
||||
static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_MZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
|
||||
static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2d_MZERO, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@@ -65,7 +65,7 @@ template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type;
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
|
||||
{
|
||||
Packet2cf res;
|
||||
if((ptrdiff_t(&from) % 16) == 0)
|
||||
if((std::ptrdiff_t(&from) % 16) == 0)
|
||||
res.v = pload<Packet4f>((const float *)&from);
|
||||
else
|
||||
res.v = ploadu<Packet4f>((const float *)&from);
|
||||
|
||||
@@ -84,8 +84,10 @@ static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
|
||||
|
||||
#ifdef __POWER8_VECTOR__
|
||||
static Packet2l p2l_1023 = { 1023, 1023 };
|
||||
static Packet2ul p2ul_52 = { 52, 52 };
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@@ -72,7 +72,7 @@ static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0); //{ 0, 0, 0, 0,}
|
||||
static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE,1); //{ 1, 1, 1, 1}
|
||||
static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS16,-16); //{ -16, -16, -16, -16}
|
||||
static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS1,-1); //{ -1, -1, -1, -1}
|
||||
static Packet4f p4f_ZERO_ = (Packet4f) vec_sl((Packet4ui)p4i_MINUS1, (Packet4ui)p4i_MINUS1); //{ 0x80000000, 0x80000000, 0x80000000, 0x80000000}
|
||||
static Packet4f p4f_MZERO = (Packet4f) vec_sl((Packet4ui)p4i_MINUS1, (Packet4ui)p4i_MINUS1); //{ 0x80000000, 0x80000000, 0x80000000, 0x80000000}
|
||||
#ifndef __VSX__
|
||||
static Packet4f p4f_ONE = vec_ctf(p4i_ONE, 0); //{ 1.0, 1.0, 1.0, 1.0}
|
||||
#endif
|
||||
@@ -90,7 +90,7 @@ static Packet16uc p16uc_DUPLICATE32_HI = { 0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7 };
|
||||
#define _EIGEN_MASK_ALIGNMENT 0xfffffff0
|
||||
#endif
|
||||
|
||||
#define _EIGEN_ALIGNED_PTR(x) ((ptrdiff_t)(x) & _EIGEN_MASK_ALIGNMENT)
|
||||
#define _EIGEN_ALIGNED_PTR(x) ((std::ptrdiff_t)(x) & _EIGEN_MASK_ALIGNMENT)
|
||||
|
||||
// Handle endianness properly while loading constants
|
||||
// Define global static constants:
|
||||
@@ -358,7 +358,7 @@ template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return p4i_
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_madd(a,b, p4f_ZERO); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_madd(a,b, p4f_MZERO); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { return a * b; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
@@ -373,7 +373,7 @@ template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const
|
||||
t = vec_nmsub(y_0, b, p4f_ONE);
|
||||
y_1 = vec_madd(y_0, t, y_0);
|
||||
|
||||
return vec_madd(a, y_1, p4f_ZERO);
|
||||
return vec_madd(a, y_1, p4f_MZERO);
|
||||
#else
|
||||
return vec_div(a, b);
|
||||
#endif
|
||||
@@ -450,15 +450,15 @@ template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
|
||||
{
|
||||
Packet4f p;
|
||||
if((ptrdiff_t(from) % 16) == 0) p = pload<Packet4f>(from);
|
||||
else p = ploadu<Packet4f>(from);
|
||||
if((std::ptrdiff_t(from) % 16) == 0) p = pload<Packet4f>(from);
|
||||
else p = ploadu<Packet4f>(from);
|
||||
return vec_perm(p, p, p16uc_DUPLICATE32_HI);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
|
||||
{
|
||||
Packet4i p;
|
||||
if((ptrdiff_t(from) % 16) == 0) p = pload<Packet4i>(from);
|
||||
else p = ploadu<Packet4i>(from);
|
||||
if((std::ptrdiff_t(from) % 16) == 0) p = pload<Packet4i>(from);
|
||||
else p = ploadu<Packet4i>(from);
|
||||
return vec_perm(p, p, p16uc_DUPLICATE32_HI);
|
||||
}
|
||||
|
||||
@@ -766,7 +766,7 @@ static Packet2l p2l_ONE = { 1, 1 };
|
||||
static Packet2l p2l_ZERO = reinterpret_cast<Packet2l>(p4i_ZERO);
|
||||
static Packet2d p2d_ONE = { 1.0, 1.0 };
|
||||
static Packet2d p2d_ZERO = reinterpret_cast<Packet2d>(p4f_ZERO);
|
||||
static Packet2d p2d_ZERO_ = { -0.0, -0.0 };
|
||||
static Packet2d p2d_MZERO = { -0.0, -0.0 };
|
||||
|
||||
#ifdef _BIG_ENDIAN
|
||||
static Packet2d p2d_COUNTDOWN = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(p2d_ZERO), reinterpret_cast<Packet4f>(p2d_ONE), 8));
|
||||
@@ -904,7 +904,7 @@ template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return p2d_
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_madd(a,b,p2d_ZERO); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_madd(a,b,p2d_MZERO); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_div(a,b); }
|
||||
|
||||
// for some weird raisons, it has to be overloaded for packet of integers
|
||||
@@ -935,8 +935,8 @@ template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from)
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
|
||||
{
|
||||
Packet2d p;
|
||||
if((ptrdiff_t(from) % 16) == 0) p = pload<Packet2d>(from);
|
||||
else p = ploadu<Packet2d>(from);
|
||||
if((std::ptrdiff_t(from) % 16) == 0) p = pload<Packet2d>(from);
|
||||
else p = ploadu<Packet2d>(from);
|
||||
return vec_splat_dbl<0>(p);
|
||||
}
|
||||
|
||||
|
||||
@@ -24,34 +24,43 @@ namespace internal {
|
||||
// compile. Here, we manually specialize these functors for complex types when
|
||||
// building for CUDA to avoid non-constexpr methods.
|
||||
|
||||
template<typename T> struct scalar_sum_op<std::complex<T>> {
|
||||
// Sum
|
||||
template<typename T> struct scalar_sum_op<const std::complex<T>, const std::complex<T> > : binary_op_base<const std::complex<T>, const std::complex<T> > {
|
||||
typedef typename std::complex<T> result_type;
|
||||
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
|
||||
return std::complex<T>(numext::real(a) + numext::real(b),
|
||||
numext::imag(a) + numext::imag(b));
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T> struct scalar_difference_op<std::complex<T>> {
|
||||
template<typename T> struct scalar_sum_op<std::complex<T>, std::complex<T> > : scalar_sum_op<const std::complex<T>, const std::complex<T> > {};
|
||||
|
||||
|
||||
// Difference
|
||||
template<typename T> struct scalar_difference_op<const std::complex<T>, const std::complex<T> > : binary_op_base<const std::complex<T>, const std::complex<T> > {
|
||||
typedef typename std::complex<T> result_type;
|
||||
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
|
||||
return std::complex<T>(numext::real(a) - numext::real(b),
|
||||
numext::imag(a) - numext::imag(b));
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T> struct scalar_product_op<std::complex<T>, std::complex<T>> {
|
||||
template<typename T> struct scalar_difference_op<std::complex<T>, std::complex<T> > : scalar_difference_op<const std::complex<T>, const std::complex<T> > {};
|
||||
|
||||
|
||||
// Product
|
||||
template<typename T> struct scalar_product_op<const std::complex<T>, const std::complex<T> > : binary_op_base<const std::complex<T>, const std::complex<T> > {
|
||||
enum {
|
||||
Vectorizable = packet_traits<std::complex<T>>::HasMul
|
||||
};
|
||||
typedef typename std::complex<T> result_type;
|
||||
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
|
||||
const T a_real = numext::real(a);
|
||||
const T a_imag = numext::imag(a);
|
||||
const T b_real = numext::real(b);
|
||||
@@ -61,14 +70,18 @@ template<typename T> struct scalar_product_op<std::complex<T>, std::complex<T>>
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T> struct scalar_quotient_op<std::complex<T>, std::complex<T>> {
|
||||
template<typename T> struct scalar_product_op<std::complex<T>, std::complex<T> > : scalar_product_op<const std::complex<T>, const std::complex<T> > {};
|
||||
|
||||
|
||||
// Quotient
|
||||
template<typename T> struct scalar_quotient_op<const std::complex<T>, const std::complex<T> > : binary_op_base<const std::complex<T>, const std::complex<T> > {
|
||||
enum {
|
||||
Vectorizable = packet_traits<std::complex<T>>::HasDiv
|
||||
};
|
||||
typedef typename std::complex<T> result_type;
|
||||
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
|
||||
const T a_real = numext::real(a);
|
||||
const T a_imag = numext::imag(a);
|
||||
const T b_real = numext::real(b);
|
||||
@@ -79,6 +92,8 @@ template<typename T> struct scalar_quotient_op<std::complex<T>, std::complex<T>>
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T> struct scalar_quotient_op<std::complex<T>, std::complex<T> > : scalar_quotient_op<const std::complex<T>, const std::complex<T> > {};
|
||||
|
||||
#endif
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted.
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
@@ -474,9 +474,59 @@ template<> struct is_arithmetic<half> { enum { value = true }; };
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
namespace std {
|
||||
template<>
|
||||
struct numeric_limits<Eigen::half> {
|
||||
static const bool is_specialized = true;
|
||||
static const bool is_signed = true;
|
||||
static const bool is_integer = false;
|
||||
static const bool is_exact = false;
|
||||
static const bool has_infinity = true;
|
||||
static const bool has_quiet_NaN = true;
|
||||
static const bool has_signaling_NaN = true;
|
||||
static const float_denorm_style has_denorm = denorm_present;
|
||||
static const bool has_denorm_loss = false;
|
||||
static const std::float_round_style round_style = std::round_to_nearest;
|
||||
static const bool is_iec559 = false;
|
||||
static const bool is_bounded = false;
|
||||
static const bool is_modulo = false;
|
||||
static const int digits = 11;
|
||||
static const int digits10 = 2;
|
||||
//static const int max_digits10 = ;
|
||||
static const int radix = 2;
|
||||
static const int min_exponent = -13;
|
||||
static const int min_exponent10 = -4;
|
||||
static const int max_exponent = 16;
|
||||
static const int max_exponent10 = 4;
|
||||
static const bool traps = true;
|
||||
static const bool tinyness_before = false;
|
||||
|
||||
static Eigen::half (min)() { return Eigen::half_impl::raw_uint16_to_half(0x400); }
|
||||
static Eigen::half lowest() { return Eigen::half_impl::raw_uint16_to_half(0xfbff); }
|
||||
static Eigen::half (max)() { return Eigen::half_impl::raw_uint16_to_half(0x7bff); }
|
||||
static Eigen::half epsilon() { return Eigen::half_impl::raw_uint16_to_half(0x0800); }
|
||||
static Eigen::half round_error() { return Eigen::half(0.5); }
|
||||
static Eigen::half infinity() { return Eigen::half_impl::raw_uint16_to_half(0x7c00); }
|
||||
static Eigen::half quiet_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); }
|
||||
static Eigen::half signaling_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); }
|
||||
static Eigen::half denorm_min() { return Eigen::half_impl::raw_uint16_to_half(0x1); }
|
||||
};
|
||||
}
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
template<> struct NumTraits<Eigen::half>
|
||||
: GenericNumTraits<Eigen::half>
|
||||
{
|
||||
enum {
|
||||
IsSigned = true,
|
||||
IsInteger = false,
|
||||
IsComplex = false,
|
||||
RequireInitialization = false
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half epsilon() {
|
||||
return half_impl::raw_uint16_to_half(0x0800);
|
||||
}
|
||||
|
||||
@@ -291,7 +291,7 @@ template<> EIGEN_DEVICE_FUNC inline double2 pabs<double2>(const double2& a) {
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<float4,4>& kernel) {
|
||||
double tmp = kernel.packet[0].y;
|
||||
float tmp = kernel.packet[0].y;
|
||||
kernel.packet[0].y = kernel.packet[1].x;
|
||||
kernel.packet[1].x = tmp;
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ namespace Eigen {
|
||||
namespace internal {
|
||||
|
||||
// Most of the following operations require arch >= 3.0
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDACC__) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
||||
|
||||
template<> struct is_arithmetic<half2> { enum { value = true }; };
|
||||
|
||||
@@ -41,15 +41,15 @@ template<> struct packet_traits<Eigen::half> : default_packet_traits
|
||||
|
||||
template<> struct unpacket_traits<half2> { typedef Eigen::half type; enum {size=2, alignment=Aligned16}; typedef half2 half; };
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pset1<half2>(const Eigen::half& from) {
|
||||
template<> __device__ EIGEN_STRONG_INLINE half2 pset1<half2>(const Eigen::half& from) {
|
||||
return __half2half2(from);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pload<half2>(const Eigen::half* from) {
|
||||
template<> __device__ EIGEN_STRONG_INLINE half2 pload<half2>(const Eigen::half* from) {
|
||||
return *reinterpret_cast<const half2*>(from);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 ploadu<half2>(const Eigen::half* from) {
|
||||
template<> __device__ EIGEN_STRONG_INLINE half2 ploadu<half2>(const Eigen::half* from) {
|
||||
return __halves2half2(from[0], from[1]);
|
||||
}
|
||||
|
||||
@@ -57,17 +57,17 @@ template<> EIGEN_STRONG_INLINE half2 ploaddup<half2>(const Eigen::half* from) {
|
||||
return __halves2half2(from[0], from[0]);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<Eigen::half>(Eigen::half* to, const half2& from) {
|
||||
template<> __device__ EIGEN_STRONG_INLINE void pstore<Eigen::half>(Eigen::half* to, const half2& from) {
|
||||
*reinterpret_cast<half2*>(to) = from;
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<Eigen::half>(Eigen::half* to, const half2& from) {
|
||||
template<> __device__ EIGEN_STRONG_INLINE void pstoreu<Eigen::half>(Eigen::half* to, const half2& from) {
|
||||
to[0] = __low2half(from);
|
||||
to[1] = __high2half(from);
|
||||
}
|
||||
|
||||
template<>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Aligned>(const Eigen::half* from) {
|
||||
__device__ EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Aligned>(const Eigen::half* from) {
|
||||
#if __CUDA_ARCH__ >= 350
|
||||
return __ldg((const half2*)from);
|
||||
#else
|
||||
@@ -76,7 +76,7 @@ template<>
|
||||
}
|
||||
|
||||
template<>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Unaligned>(const Eigen::half* from) {
|
||||
__device__ EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Unaligned>(const Eigen::half* from) {
|
||||
#if __CUDA_ARCH__ >= 350
|
||||
return __halves2half2(__ldg(from+0), __ldg(from+1));
|
||||
#else
|
||||
@@ -84,27 +84,27 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Unaligned>(const Ei
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pgather<Eigen::half, half2>(const Eigen::half* from, Index stride) {
|
||||
template<> __device__ EIGEN_STRONG_INLINE half2 pgather<Eigen::half, half2>(const Eigen::half* from, Index stride) {
|
||||
return __halves2half2(from[0*stride], from[1*stride]);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<Eigen::half, half2>(Eigen::half* to, const half2& from, Index stride) {
|
||||
template<> __device__ EIGEN_STRONG_INLINE void pscatter<Eigen::half, half2>(Eigen::half* to, const half2& from, Index stride) {
|
||||
to[stride*0] = __low2half(from);
|
||||
to[stride*1] = __high2half(from);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half pfirst<half2>(const half2& a) {
|
||||
template<> __device__ EIGEN_STRONG_INLINE Eigen::half pfirst<half2>(const half2& a) {
|
||||
return __low2half(a);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pabs<half2>(const half2& a) {
|
||||
template<> __device__ EIGEN_STRONG_INLINE half2 pabs<half2>(const half2& a) {
|
||||
half2 result;
|
||||
result.x = a.x & 0x7FFF7FFF;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void
|
||||
__device__ EIGEN_STRONG_INLINE void
|
||||
ptranspose(PacketBlock<half2,2>& kernel) {
|
||||
__half a1 = __low2half(kernel.packet[0]);
|
||||
__half a2 = __high2half(kernel.packet[0]);
|
||||
@@ -114,7 +114,7 @@ ptranspose(PacketBlock<half2,2>& kernel) {
|
||||
kernel.packet[1] = __halves2half2(a2, b2);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plset<half2>(const Eigen::half& a) {
|
||||
template<> __device__ EIGEN_STRONG_INLINE half2 plset<half2>(const Eigen::half& a) {
|
||||
#if __CUDA_ARCH__ >= 530
|
||||
return __halves2half2(a, __hadd(a, __float2half(1.0f)));
|
||||
#else
|
||||
@@ -123,7 +123,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plset<half2>(const Eigen:
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 padd<half2>(const half2& a, const half2& b) {
|
||||
template<> __device__ EIGEN_STRONG_INLINE half2 padd<half2>(const half2& a, const half2& b) {
|
||||
#if __CUDA_ARCH__ >= 530
|
||||
return __hadd2(a, b);
|
||||
#else
|
||||
@@ -137,7 +137,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 padd<half2>(const half2&
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 psub<half2>(const half2& a, const half2& b) {
|
||||
template<> __device__ EIGEN_STRONG_INLINE half2 psub<half2>(const half2& a, const half2& b) {
|
||||
#if __CUDA_ARCH__ >= 530
|
||||
return __hsub2(a, b);
|
||||
#else
|
||||
@@ -151,7 +151,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 psub<half2>(const half2&
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pnegate(const half2& a) {
|
||||
template<> __device__ EIGEN_STRONG_INLINE half2 pnegate(const half2& a) {
|
||||
#if __CUDA_ARCH__ >= 530
|
||||
return __hneg2(a);
|
||||
#else
|
||||
@@ -161,9 +161,9 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pnegate(const half2& a) {
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pconj(const half2& a) { return a; }
|
||||
template<> __device__ EIGEN_STRONG_INLINE half2 pconj(const half2& a) { return a; }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmul<half2>(const half2& a, const half2& b) {
|
||||
template<> __device__ EIGEN_STRONG_INLINE half2 pmul<half2>(const half2& a, const half2& b) {
|
||||
#if __CUDA_ARCH__ >= 530
|
||||
return __hmul2(a, b);
|
||||
#else
|
||||
@@ -177,7 +177,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmul<half2>(const half2&
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmadd<half2>(const half2& a, const half2& b, const half2& c) {
|
||||
template<> __device__ EIGEN_STRONG_INLINE half2 pmadd<half2>(const half2& a, const half2& b, const half2& c) {
|
||||
#if __CUDA_ARCH__ >= 530
|
||||
return __hfma2(a, b, c);
|
||||
#else
|
||||
@@ -193,7 +193,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmadd<half2>(const half2&
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pdiv<half2>(const half2& a, const half2& b) {
|
||||
template<> __device__ EIGEN_STRONG_INLINE half2 pdiv<half2>(const half2& a, const half2& b) {
|
||||
float a1 = __low2float(a);
|
||||
float a2 = __high2float(a);
|
||||
float b1 = __low2float(b);
|
||||
@@ -203,7 +203,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pdiv<half2>(const half2&
|
||||
return __floats2half2_rn(r1, r2);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmin<half2>(const half2& a, const half2& b) {
|
||||
template<> __device__ EIGEN_STRONG_INLINE half2 pmin<half2>(const half2& a, const half2& b) {
|
||||
float a1 = __low2float(a);
|
||||
float a2 = __high2float(a);
|
||||
float b1 = __low2float(b);
|
||||
@@ -213,7 +213,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmin<half2>(const half2&
|
||||
return __halves2half2(r1, r2);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmax<half2>(const half2& a, const half2& b) {
|
||||
template<> __device__ EIGEN_STRONG_INLINE half2 pmax<half2>(const half2& a, const half2& b) {
|
||||
float a1 = __low2float(a);
|
||||
float a2 = __high2float(a);
|
||||
float b1 = __low2float(b);
|
||||
@@ -223,7 +223,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmax<half2>(const half2&
|
||||
return __halves2half2(r1, r2);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux<half2>(const half2& a) {
|
||||
template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux<half2>(const half2& a) {
|
||||
#if __CUDA_ARCH__ >= 530
|
||||
return __hadd(__low2half(a), __high2half(a));
|
||||
#else
|
||||
@@ -233,7 +233,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux<half2>(const
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_max<half2>(const half2& a) {
|
||||
template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_max<half2>(const half2& a) {
|
||||
#if __CUDA_ARCH__ >= 530
|
||||
__half first = __low2half(a);
|
||||
__half second = __high2half(a);
|
||||
@@ -245,7 +245,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_max<half2>(c
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_min<half2>(const half2& a) {
|
||||
template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_min<half2>(const half2& a) {
|
||||
#if __CUDA_ARCH__ >= 530
|
||||
__half first = __low2half(a);
|
||||
__half second = __high2half(a);
|
||||
@@ -257,7 +257,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_min<half2>(c
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_mul<half2>(const half2& a) {
|
||||
template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_mul<half2>(const half2& a) {
|
||||
#if __CUDA_ARCH__ >= 530
|
||||
return __hmul(__low2half(a), __high2half(a));
|
||||
#else
|
||||
@@ -267,7 +267,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_mul<half2>(c
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plog1p<half2>(const half2& a) {
|
||||
template<> __device__ EIGEN_STRONG_INLINE half2 plog1p<half2>(const half2& a) {
|
||||
float a1 = __low2float(a);
|
||||
float a2 = __high2float(a);
|
||||
float r1 = log1pf(a1);
|
||||
@@ -277,29 +277,29 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plog1p<half2>(const half2
|
||||
|
||||
#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
template<> __device__ EIGEN_STRONG_INLINE
|
||||
half2 plog<half2>(const half2& a) {
|
||||
return h2log(a);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
template<> __device__ EIGEN_STRONG_INLINE
|
||||
half2 pexp<half2>(const half2& a) {
|
||||
return h2exp(a);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
template<> __device__ EIGEN_STRONG_INLINE
|
||||
half2 psqrt<half2>(const half2& a) {
|
||||
return h2sqrt(a);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
template<> __device__ EIGEN_STRONG_INLINE
|
||||
half2 prsqrt<half2>(const half2& a) {
|
||||
return h2rsqrt(a);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plog<half2>(const half2& a) {
|
||||
template<> __device__ EIGEN_STRONG_INLINE half2 plog<half2>(const half2& a) {
|
||||
float a1 = __low2float(a);
|
||||
float a2 = __high2float(a);
|
||||
float r1 = logf(a1);
|
||||
@@ -307,7 +307,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plog<half2>(const half2&
|
||||
return __floats2half2_rn(r1, r2);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pexp<half2>(const half2& a) {
|
||||
template<> __device__ EIGEN_STRONG_INLINE half2 pexp<half2>(const half2& a) {
|
||||
float a1 = __low2float(a);
|
||||
float a2 = __high2float(a);
|
||||
float r1 = expf(a1);
|
||||
@@ -315,7 +315,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pexp<half2>(const half2&
|
||||
return __floats2half2_rn(r1, r2);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 psqrt<half2>(const half2& a) {
|
||||
template<> __device__ EIGEN_STRONG_INLINE half2 psqrt<half2>(const half2& a) {
|
||||
float a1 = __low2float(a);
|
||||
float a2 = __high2float(a);
|
||||
float r1 = sqrtf(a1);
|
||||
@@ -323,7 +323,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 psqrt<half2>(const half2&
|
||||
return __floats2half2_rn(r1, r2);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 prsqrt<half2>(const half2& a) {
|
||||
template<> __device__ EIGEN_STRONG_INLINE half2 prsqrt<half2>(const half2& a) {
|
||||
float a1 = __low2float(a);
|
||||
float a2 = __high2float(a);
|
||||
float r1 = rsqrtf(a1);
|
||||
@@ -333,6 +333,374 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 prsqrt<half2>(const half2
|
||||
|
||||
#endif
|
||||
|
||||
#elif defined EIGEN_VECTORIZE_AVX512
|
||||
|
||||
typedef struct {
|
||||
__m256i x;
|
||||
} Packet16h;
|
||||
|
||||
|
||||
template<> struct is_arithmetic<Packet16h> { enum { value = true }; };
|
||||
|
||||
template <>
|
||||
struct packet_traits<half> : default_packet_traits {
|
||||
typedef Packet16h type;
|
||||
// There is no half-size packet for Packet16h.
|
||||
typedef Packet16h half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size = 16,
|
||||
HasHalfPacket = 0,
|
||||
HasAdd = 0,
|
||||
HasSub = 0,
|
||||
HasMul = 0,
|
||||
HasNegate = 0,
|
||||
HasAbs = 0,
|
||||
HasAbs2 = 0,
|
||||
HasMin = 0,
|
||||
HasMax = 0,
|
||||
HasConj = 0,
|
||||
HasSetLinear = 0,
|
||||
HasDiv = 0,
|
||||
HasSqrt = 0,
|
||||
HasRsqrt = 0,
|
||||
HasExp = 0,
|
||||
HasLog = 0,
|
||||
HasBlend = 0
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
template<> struct unpacket_traits<Packet16h> { typedef Eigen::half type; enum {size=16, alignment=Aligned32}; typedef Packet16h half; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet16h pset1<Packet16h>(const Eigen::half& from) {
|
||||
Packet16h result;
|
||||
result.x = _mm256_set1_epi16(from.x);
|
||||
return result;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Eigen::half pfirst<Packet16h>(const Packet16h& from) {
|
||||
return half_impl::raw_uint16_to_half(static_cast<unsigned short>(_mm256_extract_epi16(from.x, 0)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet16h pload<Packet16h>(const Eigen::half* from) {
|
||||
Packet16h result;
|
||||
result.x = _mm256_load_si256(reinterpret_cast<const __m256i*>(from));
|
||||
return result;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet16h ploadu<Packet16h>(const Eigen::half* from) {
|
||||
Packet16h result;
|
||||
result.x = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(from));
|
||||
return result;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstore<half>(Eigen::half* to, const Packet16h& from) {
|
||||
_mm256_store_si256((__m256i*)to, from.x);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<half>(Eigen::half* to, const Packet16h& from) {
|
||||
_mm256_storeu_si256((__m256i*)to, from.x);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet16h
|
||||
ploadquad(const Eigen::half* from) {
|
||||
Packet16h result;
|
||||
unsigned short a = from[0].x;
|
||||
unsigned short b = from[1].x;
|
||||
unsigned short c = from[2].x;
|
||||
unsigned short d = from[3].x;
|
||||
result.x = _mm256_set_epi16(d, d, d, d, c, c, c, c, b, b, b, b, a, a, a, a);
|
||||
return result;
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE Packet16f half2float(const Packet16h& a) {
|
||||
#ifdef EIGEN_HAS_FP16_C
|
||||
return _mm512_cvtph_ps(a.x);
|
||||
#else
|
||||
EIGEN_ALIGN64 half aux[16];
|
||||
pstore(aux, a);
|
||||
float f0(aux[0]);
|
||||
float f1(aux[1]);
|
||||
float f2(aux[2]);
|
||||
float f3(aux[3]);
|
||||
float f4(aux[4]);
|
||||
float f5(aux[5]);
|
||||
float f6(aux[6]);
|
||||
float f7(aux[7]);
|
||||
float f8(aux[8]);
|
||||
float f9(aux[9]);
|
||||
float fa(aux[10]);
|
||||
float fb(aux[11]);
|
||||
float fc(aux[12]);
|
||||
float fd(aux[13]);
|
||||
float fe(aux[14]);
|
||||
float ff(aux[15]);
|
||||
|
||||
return _mm512_set_ps(
|
||||
ff, fe, fd, fc, fb, fa, f9, f8, f7, f6, f5, f4, f3, f2, f1, f0);
|
||||
#endif
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE Packet16h float2half(const Packet16f& a) {
|
||||
#ifdef EIGEN_HAS_FP16_C
|
||||
Packet16h result;
|
||||
result.x = _mm512_cvtps_ph(a, _MM_FROUND_TO_NEAREST_INT|_MM_FROUND_NO_EXC);
|
||||
return result;
|
||||
#else
|
||||
EIGEN_ALIGN64 float aux[16];
|
||||
pstore(aux, a);
|
||||
half h0(aux[0]);
|
||||
half h1(aux[1]);
|
||||
half h2(aux[2]);
|
||||
half h3(aux[3]);
|
||||
half h4(aux[4]);
|
||||
half h5(aux[5]);
|
||||
half h6(aux[6]);
|
||||
half h7(aux[7]);
|
||||
half h8(aux[8]);
|
||||
half h9(aux[9]);
|
||||
half ha(aux[10]);
|
||||
half hb(aux[11]);
|
||||
half hc(aux[12]);
|
||||
half hd(aux[13]);
|
||||
half he(aux[14]);
|
||||
half hf(aux[15]);
|
||||
|
||||
Packet16h result;
|
||||
result.x = _mm256_set_epi16(
|
||||
hf.x, he.x, hd.x, hc.x, hb.x, ha.x, h9.x, h8.x,
|
||||
h7.x, h6.x, h5.x, h4.x, h3.x, h2.x, h1.x, h0.x);
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet16h padd<Packet16h>(const Packet16h& a, const Packet16h& b) {
|
||||
Packet16f af = half2float(a);
|
||||
Packet16f bf = half2float(b);
|
||||
Packet16f rf = padd(af, bf);
|
||||
return float2half(rf);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet16h pmul<Packet16h>(const Packet16h& a, const Packet16h& b) {
|
||||
Packet16f af = half2float(a);
|
||||
Packet16f bf = half2float(b);
|
||||
Packet16f rf = pmul(af, bf);
|
||||
return float2half(rf);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE half predux<Packet16h>(const Packet16h& from) {
|
||||
Packet16f from_float = half2float(from);
|
||||
return half(predux(from_float));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet16h pgather<Eigen::half, Packet16h>(const Eigen::half* from, Index stride)
|
||||
{
|
||||
Packet16h result;
|
||||
result.x = _mm256_set_epi16(
|
||||
from[15*stride].x, from[14*stride].x, from[13*stride].x, from[12*stride].x,
|
||||
from[11*stride].x, from[10*stride].x, from[9*stride].x, from[8*stride].x,
|
||||
from[7*stride].x, from[6*stride].x, from[5*stride].x, from[4*stride].x,
|
||||
from[3*stride].x, from[2*stride].x, from[1*stride].x, from[0*stride].x);
|
||||
return result;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pscatter<half, Packet16h>(half* to, const Packet16h& from, Index stride)
|
||||
{
|
||||
EIGEN_ALIGN64 half aux[16];
|
||||
pstore(aux, from);
|
||||
to[stride*0].x = aux[0].x;
|
||||
to[stride*1].x = aux[1].x;
|
||||
to[stride*2].x = aux[2].x;
|
||||
to[stride*3].x = aux[3].x;
|
||||
to[stride*4].x = aux[4].x;
|
||||
to[stride*5].x = aux[5].x;
|
||||
to[stride*6].x = aux[6].x;
|
||||
to[stride*7].x = aux[7].x;
|
||||
to[stride*8].x = aux[8].x;
|
||||
to[stride*9].x = aux[9].x;
|
||||
to[stride*10].x = aux[10].x;
|
||||
to[stride*11].x = aux[11].x;
|
||||
to[stride*12].x = aux[12].x;
|
||||
to[stride*13].x = aux[13].x;
|
||||
to[stride*14].x = aux[14].x;
|
||||
to[stride*15].x = aux[15].x;
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void
|
||||
ptranspose(PacketBlock<Packet16h,16>& kernel) {
|
||||
__m256i a = kernel.packet[0].x;
|
||||
__m256i b = kernel.packet[1].x;
|
||||
__m256i c = kernel.packet[2].x;
|
||||
__m256i d = kernel.packet[3].x;
|
||||
__m256i e = kernel.packet[4].x;
|
||||
__m256i f = kernel.packet[5].x;
|
||||
__m256i g = kernel.packet[6].x;
|
||||
__m256i h = kernel.packet[7].x;
|
||||
__m256i i = kernel.packet[8].x;
|
||||
__m256i j = kernel.packet[9].x;
|
||||
__m256i k = kernel.packet[10].x;
|
||||
__m256i l = kernel.packet[11].x;
|
||||
__m256i m = kernel.packet[12].x;
|
||||
__m256i n = kernel.packet[13].x;
|
||||
__m256i o = kernel.packet[14].x;
|
||||
__m256i p = kernel.packet[15].x;
|
||||
|
||||
__m256i ab_07 = _mm256_unpacklo_epi16(a, b);
|
||||
__m256i cd_07 = _mm256_unpacklo_epi16(c, d);
|
||||
__m256i ef_07 = _mm256_unpacklo_epi16(e, f);
|
||||
__m256i gh_07 = _mm256_unpacklo_epi16(g, h);
|
||||
__m256i ij_07 = _mm256_unpacklo_epi16(i, j);
|
||||
__m256i kl_07 = _mm256_unpacklo_epi16(k, l);
|
||||
__m256i mn_07 = _mm256_unpacklo_epi16(m, n);
|
||||
__m256i op_07 = _mm256_unpacklo_epi16(o, p);
|
||||
|
||||
__m256i ab_8f = _mm256_unpackhi_epi16(a, b);
|
||||
__m256i cd_8f = _mm256_unpackhi_epi16(c, d);
|
||||
__m256i ef_8f = _mm256_unpackhi_epi16(e, f);
|
||||
__m256i gh_8f = _mm256_unpackhi_epi16(g, h);
|
||||
__m256i ij_8f = _mm256_unpackhi_epi16(i, j);
|
||||
__m256i kl_8f = _mm256_unpackhi_epi16(k, l);
|
||||
__m256i mn_8f = _mm256_unpackhi_epi16(m, n);
|
||||
__m256i op_8f = _mm256_unpackhi_epi16(o, p);
|
||||
|
||||
__m256i abcd_03 = _mm256_unpacklo_epi32(ab_07, cd_07);
|
||||
__m256i abcd_47 = _mm256_unpackhi_epi32(ab_07, cd_07);
|
||||
__m256i efgh_03 = _mm256_unpacklo_epi32(ef_07, gh_07);
|
||||
__m256i efgh_47 = _mm256_unpackhi_epi32(ef_07, gh_07);
|
||||
__m256i ijkl_03 = _mm256_unpacklo_epi32(ij_07, kl_07);
|
||||
__m256i ijkl_47 = _mm256_unpackhi_epi32(ij_07, kl_07);
|
||||
__m256i mnop_03 = _mm256_unpacklo_epi32(mn_07, op_07);
|
||||
__m256i mnop_47 = _mm256_unpackhi_epi32(mn_07, op_07);
|
||||
|
||||
__m256i abcd_8b = _mm256_unpacklo_epi32(ab_8f, cd_8f);
|
||||
__m256i abcd_cf = _mm256_unpackhi_epi32(ab_8f, cd_8f);
|
||||
__m256i efgh_8b = _mm256_unpacklo_epi32(ef_8f, gh_8f);
|
||||
__m256i efgh_cf = _mm256_unpackhi_epi32(ef_8f, gh_8f);
|
||||
__m256i ijkl_8b = _mm256_unpacklo_epi32(ij_8f, kl_8f);
|
||||
__m256i ijkl_cf = _mm256_unpackhi_epi32(ij_8f, kl_8f);
|
||||
__m256i mnop_8b = _mm256_unpacklo_epi32(mn_8f, op_8f);
|
||||
__m256i mnop_cf = _mm256_unpackhi_epi32(mn_8f, op_8f);
|
||||
|
||||
__m256i abcdefgh_01 = _mm256_unpacklo_epi64(abcd_03, efgh_03);
|
||||
__m256i abcdefgh_23 = _mm256_unpackhi_epi64(abcd_03, efgh_03);
|
||||
__m256i ijklmnop_01 = _mm256_unpacklo_epi64(ijkl_03, mnop_03);
|
||||
__m256i ijklmnop_23 = _mm256_unpackhi_epi64(ijkl_03, mnop_03);
|
||||
__m256i abcdefgh_45 = _mm256_unpacklo_epi64(abcd_47, efgh_47);
|
||||
__m256i abcdefgh_67 = _mm256_unpackhi_epi64(abcd_47, efgh_47);
|
||||
__m256i ijklmnop_45 = _mm256_unpacklo_epi64(ijkl_47, mnop_47);
|
||||
__m256i ijklmnop_67 = _mm256_unpackhi_epi64(ijkl_47, mnop_47);
|
||||
__m256i abcdefgh_89 = _mm256_unpacklo_epi64(abcd_8b, efgh_8b);
|
||||
__m256i abcdefgh_ab = _mm256_unpackhi_epi64(abcd_8b, efgh_8b);
|
||||
__m256i ijklmnop_89 = _mm256_unpacklo_epi64(ijkl_8b, mnop_8b);
|
||||
__m256i ijklmnop_ab = _mm256_unpackhi_epi64(ijkl_8b, mnop_8b);
|
||||
__m256i abcdefgh_cd = _mm256_unpacklo_epi64(abcd_cf, efgh_cf);
|
||||
__m256i abcdefgh_ef = _mm256_unpackhi_epi64(abcd_cf, efgh_cf);
|
||||
__m256i ijklmnop_cd = _mm256_unpacklo_epi64(ijkl_cf, mnop_cf);
|
||||
__m256i ijklmnop_ef = _mm256_unpackhi_epi64(ijkl_cf, mnop_cf);
|
||||
|
||||
// NOTE: no unpacklo/hi instr in this case, so using permute instr.
|
||||
__m256i a_p_0 = _mm256_permute2x128_si256(abcdefgh_01, ijklmnop_01, 0x20);
|
||||
__m256i a_p_1 = _mm256_permute2x128_si256(abcdefgh_01, ijklmnop_01, 0x31);
|
||||
__m256i a_p_2 = _mm256_permute2x128_si256(abcdefgh_23, ijklmnop_23, 0x20);
|
||||
__m256i a_p_3 = _mm256_permute2x128_si256(abcdefgh_23, ijklmnop_23, 0x31);
|
||||
__m256i a_p_4 = _mm256_permute2x128_si256(abcdefgh_45, ijklmnop_45, 0x20);
|
||||
__m256i a_p_5 = _mm256_permute2x128_si256(abcdefgh_45, ijklmnop_45, 0x31);
|
||||
__m256i a_p_6 = _mm256_permute2x128_si256(abcdefgh_67, ijklmnop_67, 0x20);
|
||||
__m256i a_p_7 = _mm256_permute2x128_si256(abcdefgh_67, ijklmnop_67, 0x31);
|
||||
__m256i a_p_8 = _mm256_permute2x128_si256(abcdefgh_89, ijklmnop_89, 0x20);
|
||||
__m256i a_p_9 = _mm256_permute2x128_si256(abcdefgh_89, ijklmnop_89, 0x31);
|
||||
__m256i a_p_a = _mm256_permute2x128_si256(abcdefgh_ab, ijklmnop_ab, 0x20);
|
||||
__m256i a_p_b = _mm256_permute2x128_si256(abcdefgh_ab, ijklmnop_ab, 0x31);
|
||||
__m256i a_p_c = _mm256_permute2x128_si256(abcdefgh_cd, ijklmnop_cd, 0x20);
|
||||
__m256i a_p_d = _mm256_permute2x128_si256(abcdefgh_cd, ijklmnop_cd, 0x31);
|
||||
__m256i a_p_e = _mm256_permute2x128_si256(abcdefgh_ef, ijklmnop_ef, 0x20);
|
||||
__m256i a_p_f = _mm256_permute2x128_si256(abcdefgh_ef, ijklmnop_ef, 0x31);
|
||||
|
||||
kernel.packet[0].x = a_p_0;
|
||||
kernel.packet[1].x = a_p_1;
|
||||
kernel.packet[2].x = a_p_2;
|
||||
kernel.packet[3].x = a_p_3;
|
||||
kernel.packet[4].x = a_p_4;
|
||||
kernel.packet[5].x = a_p_5;
|
||||
kernel.packet[6].x = a_p_6;
|
||||
kernel.packet[7].x = a_p_7;
|
||||
kernel.packet[8].x = a_p_8;
|
||||
kernel.packet[9].x = a_p_9;
|
||||
kernel.packet[10].x = a_p_a;
|
||||
kernel.packet[11].x = a_p_b;
|
||||
kernel.packet[12].x = a_p_c;
|
||||
kernel.packet[13].x = a_p_d;
|
||||
kernel.packet[14].x = a_p_e;
|
||||
kernel.packet[15].x = a_p_f;
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void
|
||||
ptranspose(PacketBlock<Packet16h,8>& kernel) {
|
||||
EIGEN_ALIGN64 half in[8][16];
|
||||
pstore<half>(in[0], kernel.packet[0]);
|
||||
pstore<half>(in[1], kernel.packet[1]);
|
||||
pstore<half>(in[2], kernel.packet[2]);
|
||||
pstore<half>(in[3], kernel.packet[3]);
|
||||
pstore<half>(in[4], kernel.packet[4]);
|
||||
pstore<half>(in[5], kernel.packet[5]);
|
||||
pstore<half>(in[6], kernel.packet[6]);
|
||||
pstore<half>(in[7], kernel.packet[7]);
|
||||
|
||||
EIGEN_ALIGN64 half out[8][16];
|
||||
|
||||
for (int i = 0; i < 8; ++i) {
|
||||
for (int j = 0; j < 8; ++j) {
|
||||
out[i][j] = in[j][2*i];
|
||||
}
|
||||
for (int j = 0; j < 8; ++j) {
|
||||
out[i][j+8] = in[j][2*i+1];
|
||||
}
|
||||
}
|
||||
|
||||
kernel.packet[0] = pload<Packet16h>(out[0]);
|
||||
kernel.packet[1] = pload<Packet16h>(out[1]);
|
||||
kernel.packet[2] = pload<Packet16h>(out[2]);
|
||||
kernel.packet[3] = pload<Packet16h>(out[3]);
|
||||
kernel.packet[4] = pload<Packet16h>(out[4]);
|
||||
kernel.packet[5] = pload<Packet16h>(out[5]);
|
||||
kernel.packet[6] = pload<Packet16h>(out[6]);
|
||||
kernel.packet[7] = pload<Packet16h>(out[7]);
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void
|
||||
ptranspose(PacketBlock<Packet16h,4>& kernel) {
|
||||
EIGEN_ALIGN64 half in[4][16];
|
||||
pstore<half>(in[0], kernel.packet[0]);
|
||||
pstore<half>(in[1], kernel.packet[1]);
|
||||
pstore<half>(in[2], kernel.packet[2]);
|
||||
pstore<half>(in[3], kernel.packet[3]);
|
||||
|
||||
EIGEN_ALIGN64 half out[4][16];
|
||||
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
for (int j = 0; j < 4; ++j) {
|
||||
out[i][j] = in[j][4*i];
|
||||
}
|
||||
for (int j = 0; j < 4; ++j) {
|
||||
out[i][j+4] = in[j][4*i+1];
|
||||
}
|
||||
for (int j = 0; j < 4; ++j) {
|
||||
out[i][j+8] = in[j][4*i+2];
|
||||
}
|
||||
for (int j = 0; j < 4; ++j) {
|
||||
out[i][j+12] = in[j][4*i+3];
|
||||
}
|
||||
}
|
||||
|
||||
kernel.packet[0] = pload<Packet16h>(out[0]);
|
||||
kernel.packet[1] = pload<Packet16h>(out[1]);
|
||||
kernel.packet[2] = pload<Packet16h>(out[2]);
|
||||
kernel.packet[3] = pload<Packet16h>(out[3]);
|
||||
}
|
||||
|
||||
|
||||
#elif defined EIGEN_VECTORIZE_AVX
|
||||
|
||||
typedef struct {
|
||||
@@ -492,6 +860,30 @@ template<> EIGEN_STRONG_INLINE void pscatter<Eigen::half, Packet8h>(Eigen::half*
|
||||
to[stride*7].x = aux[7].x;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Eigen::half predux<Packet8h>(const Packet8h& a) {
|
||||
Packet8f af = half2float(a);
|
||||
float reduced = predux<Packet8f>(af);
|
||||
return Eigen::half(reduced);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Eigen::half predux_max<Packet8h>(const Packet8h& a) {
|
||||
Packet8f af = half2float(a);
|
||||
float reduced = predux_max<Packet8f>(af);
|
||||
return Eigen::half(reduced);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Eigen::half predux_min<Packet8h>(const Packet8h& a) {
|
||||
Packet8f af = half2float(a);
|
||||
float reduced = predux_min<Packet8f>(af);
|
||||
return Eigen::half(reduced);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Eigen::half predux_mul<Packet8h>(const Packet8h& a) {
|
||||
Packet8f af = half2float(a);
|
||||
float reduced = predux_mul<Packet8f>(af);
|
||||
return Eigen::half(reduced);
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void
|
||||
ptranspose(PacketBlock<Packet8h,8>& kernel) {
|
||||
__m128i a = kernel.packet[0].x;
|
||||
|
||||
@@ -100,6 +100,33 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcast<float4, half2>(cons
|
||||
return __floats2half2_rn(a.x, a.y);
|
||||
}
|
||||
|
||||
#elif defined EIGEN_VECTORIZE_AVX512
|
||||
template <>
|
||||
struct type_casting_traits<half, float> {
|
||||
enum {
|
||||
VectorizedCast = 1,
|
||||
SrcCoeffRatio = 1,
|
||||
TgtCoeffRatio = 1
|
||||
};
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet16f pcast<Packet16h, Packet16f>(const Packet16h& a) {
|
||||
return half2float(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
struct type_casting_traits<float, half> {
|
||||
enum {
|
||||
VectorizedCast = 1,
|
||||
SrcCoeffRatio = 1,
|
||||
TgtCoeffRatio = 1
|
||||
};
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet16h pcast<Packet16f, Packet16h>(const Packet16f& a) {
|
||||
return float2half(a);
|
||||
}
|
||||
|
||||
#elif defined EIGEN_VECTORIZE_AVX
|
||||
|
||||
template <>
|
||||
|
||||
@@ -16,8 +16,14 @@ namespace Eigen {
|
||||
namespace internal {
|
||||
|
||||
inline uint32x4_t p4ui_CONJ_XOR() {
|
||||
// See bug 1325, clang fails to call vld1q_u64.
|
||||
#if EIGEN_COMP_CLANG
|
||||
uint32x4_t ret = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
|
||||
return ret;
|
||||
#else
|
||||
static const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
|
||||
return vld1q_u32( conj_XOR_DATA );
|
||||
#endif
|
||||
}
|
||||
|
||||
inline uint32x2_t p2ui_CONJ_XOR() {
|
||||
@@ -282,8 +288,13 @@ ptranspose(PacketBlock<Packet2cf,2>& kernel) {
|
||||
//---------- double ----------
|
||||
#if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
|
||||
|
||||
const uint64_t p2ul_conj_XOR_DATA[] = { 0x0, 0x8000000000000000 };
|
||||
static uint64x2_t p2ul_CONJ_XOR = vld1q_u64( p2ul_conj_XOR_DATA );
|
||||
// See bug 1325, clang fails to call vld1q_u64.
|
||||
#if EIGEN_COMP_CLANG
|
||||
static uint64x2_t p2ul_CONJ_XOR = {0x0, 0x8000000000000000};
|
||||
#else
|
||||
const uint64_t p2ul_conj_XOR_DATA[] = { 0x0, 0x8000000000000000 };
|
||||
static uint64x2_t p2ul_CONJ_XOR = vld1q_u64( p2ul_conj_XOR_DATA );
|
||||
#endif
|
||||
|
||||
struct Packet1cd
|
||||
{
|
||||
|
||||
@@ -28,11 +28,13 @@ namespace internal {
|
||||
#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
|
||||
#endif
|
||||
|
||||
// FIXME NEON has 16 quad registers, but since the current register allocator
|
||||
// is so bad, it is much better to reduce it to 8
|
||||
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
|
||||
#if EIGEN_ARCH_ARM64
|
||||
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
|
||||
#else
|
||||
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 16
|
||||
#endif
|
||||
#endif
|
||||
|
||||
typedef float32x2_t Packet2f;
|
||||
typedef float32x4_t Packet4f;
|
||||
@@ -44,19 +46,22 @@ typedef uint32x4_t Packet4ui;
|
||||
const Packet4f p4f_##NAME = pset1<Packet4f>(X)
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
|
||||
const Packet4f p4f_##NAME = vreinterpretq_f32_u32(pset1<int>(X))
|
||||
const Packet4f p4f_##NAME = vreinterpretq_f32_u32(pset1<int32_t>(X))
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
|
||||
const Packet4i p4i_##NAME = pset1<Packet4i>(X)
|
||||
|
||||
// arm64 does have the pld instruction. If available, let's trust the __builtin_prefetch built-in function
|
||||
// which available on LLVM and GCC (at least)
|
||||
#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC
|
||||
#if EIGEN_ARCH_ARM64
|
||||
// __builtin_prefetch tends to do nothing on ARM64 compilers because the
|
||||
// prefetch instructions there are too detailed for __builtin_prefetch to map
|
||||
// meaningfully to them.
|
||||
#define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__("prfm pldl1keep, [%[addr]]\n" ::[addr] "r"(ADDR) : );
|
||||
#elif EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC
|
||||
#define EIGEN_ARM_PREFETCH(ADDR) __builtin_prefetch(ADDR);
|
||||
#elif defined __pld
|
||||
#define EIGEN_ARM_PREFETCH(ADDR) __pld(ADDR)
|
||||
#elif !EIGEN_ARCH_ARM64
|
||||
#define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__ ( " pld [%[addr]]\n" :: [addr] "r" (ADDR) : "cc" );
|
||||
#elif EIGEN_ARCH_ARM32
|
||||
#define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__ ("pld [%[addr]]\n" :: [addr] "r" (ADDR) : );
|
||||
#else
|
||||
// by default no explicit prefetching
|
||||
#define EIGEN_ARM_PREFETCH(ADDR)
|
||||
@@ -81,7 +86,7 @@ template<> struct packet_traits<float> : default_packet_traits
|
||||
HasSqrt = 0
|
||||
};
|
||||
};
|
||||
template<> struct packet_traits<int> : default_packet_traits
|
||||
template<> struct packet_traits<int32_t> : default_packet_traits
|
||||
{
|
||||
typedef Packet4i type;
|
||||
typedef Packet4i half; // Packet2i intrinsics not implemented yet
|
||||
@@ -103,19 +108,19 @@ EIGEN_STRONG_INLINE void vst1q_f32(float* to, float32x4_t from) { ::vst1q
|
||||
EIGEN_STRONG_INLINE void vst1_f32 (float* to, float32x2_t from) { ::vst1_f32 ((float32_t*)to,from); }
|
||||
#endif
|
||||
|
||||
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; };
|
||||
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
|
||||
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; };
|
||||
template<> struct unpacket_traits<Packet4i> { typedef int32_t type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return vdupq_n_f32(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return vdupq_n_s32(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int32_t& from) { return vdupq_n_s32(from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a)
|
||||
{
|
||||
const float32_t f[] = {0, 1, 2, 3};
|
||||
const float f[] = {0, 1, 2, 3};
|
||||
Packet4f countdown = vld1q_f32(f);
|
||||
return vaddq_f32(pset1<Packet4f>(a), countdown);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a)
|
||||
template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int32_t& a)
|
||||
{
|
||||
const int32_t i[] = {0, 1, 2, 3};
|
||||
Packet4i countdown = vld1q_s32(i);
|
||||
@@ -238,20 +243,20 @@ template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, con
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vbicq_s32(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int32_t* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_f32(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_s32(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_f32(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int32_t* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_s32(from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
|
||||
{
|
||||
float32x2_t lo, hi;
|
||||
lo = vld1_dup_f32(from);
|
||||
hi = vld1_dup_f32(from+1);
|
||||
return vcombine_f32(lo, hi);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int32_t* from)
|
||||
{
|
||||
int32x2_t lo, hi;
|
||||
lo = vld1_dup_s32(from);
|
||||
@@ -259,11 +264,11 @@ template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
|
||||
return vcombine_s32(lo, hi);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_f32(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_s32(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstore<float> (float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_f32(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstore<int32_t>(int32_t* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_s32(to, from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f32(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_s32(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<float> (float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f32(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<int32_t>(int32_t* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_s32(to, from); }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)
|
||||
{
|
||||
@@ -274,7 +279,7 @@ template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const floa
|
||||
res = vsetq_lane_f32(from[3*stride], res, 3);
|
||||
return res;
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride)
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int32_t, Packet4i>(const int32_t* from, Index stride)
|
||||
{
|
||||
Packet4i res = pset1<Packet4i>(0);
|
||||
res = vsetq_lane_s32(from[0*stride], res, 0);
|
||||
@@ -291,7 +296,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, co
|
||||
to[stride*2] = vgetq_lane_f32(from, 2);
|
||||
to[stride*3] = vgetq_lane_f32(from, 3);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, Index stride)
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<int32_t, Packet4i>(int32_t* to, const Packet4i& from, Index stride)
|
||||
{
|
||||
to[stride*0] = vgetq_lane_s32(from, 0);
|
||||
to[stride*1] = vgetq_lane_s32(from, 1);
|
||||
@@ -299,12 +304,12 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const
|
||||
to[stride*3] = vgetq_lane_s32(from, 3);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { EIGEN_ARM_PREFETCH(addr); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { EIGEN_ARM_PREFETCH(addr); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<float> (const float* addr) { EIGEN_ARM_PREFETCH(addr); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<int32_t>(const int32_t* addr) { EIGEN_ARM_PREFETCH(addr); }
|
||||
|
||||
// FIXME only store the 2 first elements ?
|
||||
template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vst1q_f32(x, a); return x[0]; }
|
||||
template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; vst1q_s32(x, a); return x[0]; }
|
||||
template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vst1q_f32(x, a); return x[0]; }
|
||||
template<> EIGEN_STRONG_INLINE int32_t pfirst<Packet4i>(const Packet4i& a) { int32_t EIGEN_ALIGN16 x[4]; vst1q_s32(x, a); return x[0]; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) {
|
||||
float32x2_t a_lo, a_hi;
|
||||
@@ -359,7 +364,7 @@ template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
|
||||
return sum;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
|
||||
template<> EIGEN_STRONG_INLINE int32_t predux<Packet4i>(const Packet4i& a)
|
||||
{
|
||||
int32x2_t a_lo, a_hi, sum;
|
||||
|
||||
@@ -406,7 +411,7 @@ template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
|
||||
|
||||
return vget_lane_f32(prod, 0);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
|
||||
template<> EIGEN_STRONG_INLINE int32_t predux_mul<Packet4i>(const Packet4i& a)
|
||||
{
|
||||
int32x2_t a_lo, a_hi, prod;
|
||||
|
||||
@@ -434,7 +439,7 @@ template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
|
||||
return vget_lane_f32(min, 0);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
|
||||
template<> EIGEN_STRONG_INLINE int32_t predux_min<Packet4i>(const Packet4i& a)
|
||||
{
|
||||
int32x2_t a_lo, a_hi, min;
|
||||
|
||||
@@ -459,7 +464,7 @@ template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
|
||||
return vget_lane_f32(max, 0);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
|
||||
template<> EIGEN_STRONG_INLINE int32_t predux_max<Packet4i>(const Packet4i& a)
|
||||
{
|
||||
int32x2_t a_lo, a_hi, max;
|
||||
|
||||
|
||||
@@ -476,6 +476,26 @@ template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, co
|
||||
return Packet2cf(_mm_castpd_ps(result));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pinsertfirst(const Packet2cf& a, std::complex<float> b)
|
||||
{
|
||||
return Packet2cf(_mm_loadl_pi(a.v, reinterpret_cast<const __m64*>(&b)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pinsertfirst(const Packet1cd&, std::complex<double> b)
|
||||
{
|
||||
return pset1<Packet1cd>(b);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pinsertlast(const Packet2cf& a, std::complex<float> b)
|
||||
{
|
||||
return Packet2cf(_mm_loadh_pi(a.v, reinterpret_cast<const __m64*>(&b)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pinsertlast(const Packet1cd&, std::complex<double> b)
|
||||
{
|
||||
return pset1<Packet1cd>(b);
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
@@ -32,7 +32,7 @@ Packet4f plog<Packet4f>(const Packet4f& _x)
|
||||
/* the smallest non denormalized float number */
|
||||
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos, 0x00800000);
|
||||
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_inf, 0xff800000);//-1.f/0.f);
|
||||
|
||||
|
||||
/* natural logarithm computed for 4 simultaneous float
|
||||
return NaN for x <= 0
|
||||
*/
|
||||
@@ -444,25 +444,33 @@ Packet4f pcos<Packet4f>(const Packet4f& _x)
|
||||
|
||||
#if EIGEN_FAST_MATH
|
||||
|
||||
// This is based on Quake3's fast inverse square root.
|
||||
// Functions for sqrt.
|
||||
// The EIGEN_FAST_MATH version uses the _mm_rsqrt_ps approximation and one step
|
||||
// of Newton's method, at a cost of 1-2 bits of precision as opposed to the
|
||||
// exact solution. It does not handle +inf, or denormalized numbers correctly.
|
||||
// The main advantage of this approach is not just speed, but also the fact that
|
||||
// it can be inlined and pipelined with other computations, further reducing its
|
||||
// effective latency. This is similar to Quake3's fast inverse square root.
|
||||
// For detail see here: http://www.beyond3d.com/content/articles/8/
|
||||
// It lacks 1 (or 2 bits in some rare cases) of precision, and does not handle negative, +inf, or denormalized numbers correctly.
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4f psqrt<Packet4f>(const Packet4f& _x)
|
||||
{
|
||||
Packet4f half = pmul(_x, pset1<Packet4f>(.5f));
|
||||
Packet4f denormal_mask = _mm_and_ps(
|
||||
_mm_cmpge_ps(_x, _mm_setzero_ps()),
|
||||
_mm_cmplt_ps(_x, pset1<Packet4f>((std::numeric_limits<float>::min)())));
|
||||
|
||||
/* select only the inverse sqrt of non-zero inputs */
|
||||
Packet4f non_zero_mask = _mm_cmpge_ps(_x, pset1<Packet4f>((std::numeric_limits<float>::min)()));
|
||||
Packet4f x = _mm_and_ps(non_zero_mask, _mm_rsqrt_ps(_x));
|
||||
|
||||
// Compute approximate reciprocal sqrt.
|
||||
Packet4f x = _mm_rsqrt_ps(_x);
|
||||
// Do a single step of Newton's iteration.
|
||||
x = pmul(x, psub(pset1<Packet4f>(1.5f), pmul(half, pmul(x,x))));
|
||||
return pmul(_x,x);
|
||||
// Flush results for denormals to zero.
|
||||
return _mm_andnot_ps(denormal_mask, pmul(_x,x));
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
template<>EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
template<>EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4f psqrt<Packet4f>(const Packet4f& x) { return _mm_sqrt_ps(x); }
|
||||
|
||||
#endif
|
||||
@@ -491,7 +499,7 @@ Packet4f prsqrt<Packet4f>(const Packet4f& _x) {
|
||||
Packet4f neg_mask = _mm_cmplt_ps(_x, _mm_setzero_ps());
|
||||
Packet4f zero_mask = _mm_andnot_ps(neg_mask, le_zero_mask);
|
||||
Packet4f infs_and_nans = _mm_or_ps(_mm_and_ps(neg_mask, p4f_nan),
|
||||
_mm_and_ps(zero_mask, p4f_inf));
|
||||
_mm_and_ps(zero_mask, p4f_inf));
|
||||
|
||||
// Do a single step of Newton's iteration.
|
||||
x = pmul(x, pmadd(neg_half, pmul(x, x), p4f_one_point_five));
|
||||
|
||||
@@ -28,7 +28,7 @@ namespace internal {
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if (defined EIGEN_VECTORIZE_AVX) && EIGEN_COMP_GNUC_STRICT && (__GXX_ABI_VERSION < 1004)
|
||||
#if (defined EIGEN_VECTORIZE_AVX) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_MINGW) && (__GXX_ABI_VERSION < 1004)
|
||||
// With GCC's default ABI version, a __m128 or __m256 are the same types and therefore we cannot
|
||||
// have overloads for both types without linking error.
|
||||
// One solution is to increase ABI version using -fabi-version=4 (or greater).
|
||||
@@ -504,30 +504,13 @@ template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
|
||||
{
|
||||
return _mm_hadd_ps(_mm_hadd_ps(vecs[0], vecs[1]),_mm_hadd_ps(vecs[2], vecs[3]));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
|
||||
{
|
||||
return _mm_hadd_pd(vecs[0], vecs[1]);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet4f tmp0 = _mm_hadd_ps(a,a);
|
||||
return pfirst<Packet4f>(_mm_hadd_ps(tmp0, tmp0));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) { return pfirst<Packet2d>(_mm_hadd_pd(a, a)); }
|
||||
#else
|
||||
// SSE2 versions
|
||||
template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet4f tmp = _mm_add_ps(a, _mm_movehl_ps(a,a));
|
||||
return pfirst<Packet4f>(_mm_add_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
|
||||
{
|
||||
return pfirst<Packet2d>(_mm_add_sd(a, _mm_unpackhi_pd(a,a)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
|
||||
{
|
||||
Packet4f tmp0, tmp1, tmp2;
|
||||
@@ -548,6 +531,29 @@ template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
|
||||
}
|
||||
#endif // SSE3
|
||||
|
||||
template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
// Disable SSE3 _mm_hadd_pd that is extremely slow on all existing Intel's architectures
|
||||
// (from Nehalem to Haswell)
|
||||
// #ifdef EIGEN_VECTORIZE_SSE3
|
||||
// Packet4f tmp = _mm_add_ps(a, vec4f_swizzle1(a,2,3,2,3));
|
||||
// return pfirst<Packet4f>(_mm_hadd_ps(tmp, tmp));
|
||||
// #else
|
||||
Packet4f tmp = _mm_add_ps(a, _mm_movehl_ps(a,a));
|
||||
return pfirst<Packet4f>(_mm_add_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
|
||||
// #endif
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
|
||||
{
|
||||
// Disable SSE3 _mm_hadd_pd that is extremely slow on all existing Intel's architectures
|
||||
// (from Nehalem to Haswell)
|
||||
// #ifdef EIGEN_VECTORIZE_SSE3
|
||||
// return pfirst<Packet2d>(_mm_hadd_pd(a, a));
|
||||
// #else
|
||||
return pfirst<Packet2d>(_mm_add_sd(a, _mm_unpackhi_pd(a,a)));
|
||||
// #endif
|
||||
}
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_SSSE3
|
||||
template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
|
||||
@@ -818,6 +824,44 @@ template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, cons
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pinsertfirst(const Packet4f& a, float b)
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
return _mm_blend_ps(a,pset1<Packet4f>(b),1);
|
||||
#else
|
||||
return _mm_move_ss(a, _mm_load_ss(&b));
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pinsertfirst(const Packet2d& a, double b)
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
return _mm_blend_pd(a,pset1<Packet2d>(b),1);
|
||||
#else
|
||||
return _mm_move_sd(a, _mm_load_sd(&b));
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pinsertlast(const Packet4f& a, float b)
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
return _mm_blend_ps(a,pset1<Packet4f>(b),(1<<3));
|
||||
#else
|
||||
const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x0,0x0,0x0,0xFFFFFFFF));
|
||||
return _mm_or_ps(_mm_andnot_ps(mask, a), _mm_and_ps(mask, pset1<Packet4f>(b)));
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pinsertlast(const Packet2d& a, double b)
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
return _mm_blend_pd(a,pset1<Packet2d>(b),(1<<1));
|
||||
#else
|
||||
const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x0,0xFFFFFFFF,0xFFFFFFFF));
|
||||
return _mm_or_pd(_mm_andnot_pd(mask, a), _mm_and_pd(mask, pset1<Packet2d>(b)));
|
||||
#endif
|
||||
}
|
||||
|
||||
// Scalar path for pmadd with FMA to ensure consistency with vectorized path.
|
||||
#ifdef __FMA__
|
||||
template<> EIGEN_STRONG_INLINE float pmadd(const float& a, const float& b, const float& c) {
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2016 Konstantinos Margaritis <markos@freevec.org>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
@@ -24,13 +25,48 @@ struct Packet1cd
|
||||
Packet2d v;
|
||||
};
|
||||
|
||||
struct Packet2cf
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf() {}
|
||||
EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
|
||||
union {
|
||||
Packet4f v;
|
||||
Packet1cd cd[2];
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
||||
{
|
||||
typedef Packet2cf type;
|
||||
typedef Packet2cf half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size = 2,
|
||||
HasHalfPacket = 0,
|
||||
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasMul = 1,
|
||||
HasDiv = 1,
|
||||
HasNegate = 1,
|
||||
HasAbs = 0,
|
||||
HasAbs2 = 0,
|
||||
HasMin = 0,
|
||||
HasMax = 0,
|
||||
HasBlend = 1,
|
||||
HasSetLinear = 0
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
template<> struct packet_traits<std::complex<double> > : default_packet_traits
|
||||
{
|
||||
typedef Packet1cd type;
|
||||
typedef Packet1cd half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 0,
|
||||
AlignedOnScalar = 1,
|
||||
size = 1,
|
||||
HasHalfPacket = 0,
|
||||
|
||||
@@ -47,20 +83,68 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
|
||||
template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
|
||||
|
||||
/* Forward declaration */
|
||||
EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel);
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pload <Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>((const float*)from)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>((const float*)from)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
|
||||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
|
||||
{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
|
||||
{
|
||||
Packet2cf res;
|
||||
res.cd[0] = Packet1cd(vec_ld2f((const float *)&from));
|
||||
res.cd[1] = res.cd[0];
|
||||
return res;
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
|
||||
{
|
||||
std::complex<float> EIGEN_ALIGN16 af[2];
|
||||
af[0] = from[0*stride];
|
||||
af[1] = from[1*stride];
|
||||
return pload<Packet2cf>(af);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride EIGEN_UNUSED)
|
||||
{
|
||||
return pload<Packet1cd>(from);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
|
||||
{
|
||||
std::complex<float> EIGEN_ALIGN16 af[2];
|
||||
pstore<std::complex<float> >((std::complex<float> *) af, from);
|
||||
to[0*stride] = af[0];
|
||||
to[1*stride] = af[1];
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index stride EIGEN_UNUSED)
|
||||
{
|
||||
pstore<std::complex<double> >(to, from);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(padd<Packet4f>(a.v, b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v + b.v); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(psub<Packet4f>(a.v, b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v - b.v); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(Packet4f(a.v))); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd((Packet2d)vec_xor((Packet2d)a.v, (Packet2d)p2ul_CONJ_XOR2)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
|
||||
{
|
||||
Packet2cf res;
|
||||
res.v.v4f[0] = pconj(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[0]))).v;
|
||||
res.v.v4f[1] = pconj(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[1]))).v;
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
||||
{
|
||||
@@ -79,43 +163,90 @@ template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, con
|
||||
|
||||
return Packet1cd(v1 + v2);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_or(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_xor(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v, vec_nor(b.v,b.v))); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from)
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
||||
{
|
||||
return pset1<Packet1cd>(*from);
|
||||
Packet2cf res;
|
||||
res.v.v4f[0] = pmul(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[0])), Packet1cd(reinterpret_cast<Packet2d>(b.v.v4f[0]))).v;
|
||||
res.v.v4f[1] = pmul(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[1])), Packet1cd(reinterpret_cast<Packet2d>(b.v.v4f[1]))).v;
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pand<Packet4f>(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_or(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(por<Packet4f>(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_xor(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pxor<Packet4f>(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v, vec_nor(b.v,b.v))); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pandnot<Packet4f>(a.v,b.v)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
|
||||
{
|
||||
std::complex<double> EIGEN_ALIGN16 res[2];
|
||||
pstore<std::complex<double> >(res, a);
|
||||
std::complex<double> EIGEN_ALIGN16 res;
|
||||
pstore<std::complex<double> >(&res, a);
|
||||
|
||||
return res;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
|
||||
{
|
||||
std::complex<float> EIGEN_ALIGN16 res[2];
|
||||
pstore<std::complex<float> >(res, a);
|
||||
|
||||
return res[0];
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
|
||||
{
|
||||
Packet2cf res;
|
||||
res.cd[0] = a.cd[1];
|
||||
res.cd[1] = a.cd[0];
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a)
|
||||
{
|
||||
return pfirst(a);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
|
||||
{
|
||||
std::complex<float> res;
|
||||
Packet1cd b = padd<Packet1cd>(a.cd[0], a.cd[1]);
|
||||
vec_st2f(b.v, (float*)&res);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs)
|
||||
{
|
||||
return vecs[0];
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
|
||||
{
|
||||
PacketBlock<Packet2cf,2> transpose;
|
||||
transpose.packet[0] = vecs[0];
|
||||
transpose.packet[1] = vecs[1];
|
||||
ptranspose(transpose);
|
||||
|
||||
return padd<Packet2cf>(transpose.packet[0], transpose.packet[1]);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a)
|
||||
{
|
||||
return pfirst(a);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
|
||||
{
|
||||
std::complex<float> res;
|
||||
Packet1cd b = pmul<Packet1cd>(a.cd[0], a.cd[1]);
|
||||
vec_st2f(b.v, (float*)&res);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet1cd>
|
||||
@@ -127,6 +258,18 @@ struct palign_impl<Offset,Packet1cd>
|
||||
}
|
||||
};
|
||||
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet2cf>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
|
||||
{
|
||||
if (Offset == 1) {
|
||||
first.cd[0] = first.cd[1];
|
||||
first.cd[1] = second.cd[0];
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
|
||||
@@ -160,6 +303,39 @@ template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
||||
{
|
||||
return internal::pmul(a, pconj(b));
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
||||
{
|
||||
return internal::pmul(pconj(a), b);
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
||||
{
|
||||
return pconj(internal::pmul(a, b));
|
||||
}
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
||||
{
|
||||
// TODO optimize it for AltiVec
|
||||
@@ -168,17 +344,49 @@ template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, con
|
||||
return Packet1cd(pdiv(res.v, s + vec_perm(s, s, p16uc_REVERSE64)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
||||
{
|
||||
// TODO optimize it for AltiVec
|
||||
Packet2cf res;
|
||||
res.cd[0] = pdiv<Packet1cd>(a.cd[0], b.cd[0]);
|
||||
res.cd[1] = pdiv<Packet1cd>(a.cd[1], b.cd[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
|
||||
{
|
||||
return Packet1cd(preverse(Packet2d(x.v)));
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pcplxflip/*<Packet2cf>*/(const Packet2cf& x)
|
||||
{
|
||||
Packet2cf res;
|
||||
res.cd[0] = pcplxflip(x.cd[0]);
|
||||
res.cd[1] = pcplxflip(x.cd[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)
|
||||
{
|
||||
Packet2d tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI);
|
||||
kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO);
|
||||
kernel.packet[0].v = tmp;
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel)
|
||||
{
|
||||
Packet1cd tmp = kernel.packet[0].cd[1];
|
||||
kernel.packet[0].cd[1] = kernel.packet[1].cd[0];
|
||||
kernel.packet[1].cd[0] = tmp;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) {
|
||||
Packet2cf result;
|
||||
const Selector<4> ifPacket4 = { ifPacket.select[0], ifPacket.select[0], ifPacket.select[1], ifPacket.select[1] };
|
||||
result.v = pblend<Packet4f>(ifPacket4, thenPacket.v, elsePacket.v);
|
||||
return result;
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
//
|
||||
// Copyright (C) 2007 Julien Pommier
|
||||
// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2016 Konstantinos Margaritis <markos@freevec.org>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
@@ -19,32 +20,32 @@ namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(1 , 1.0);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(2 , 2.0);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
|
||||
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(exp_hi, 709.437);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303);
|
||||
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
|
||||
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
|
||||
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
|
||||
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet2d pexp<Packet2d>(const Packet2d& _x)
|
||||
{
|
||||
Packet2d x = _x;
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet2d(1 , 1.0);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(2 , 2.0);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet2d(exp_hi, 709.437);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
|
||||
|
||||
Packet2d tmp, fx;
|
||||
Packet2l emm0;
|
||||
|
||||
@@ -91,18 +92,44 @@ Packet2d pexp<Packet2d>(const Packet2d& _x)
|
||||
isnumber_mask);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4f pexp<Packet4f>(const Packet4f& x)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = pexp<Packet2d>(x.v4f[0]);
|
||||
res.v4f[1] = pexp<Packet2d>(x.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet2d psqrt<Packet2d>(const Packet2d& x)
|
||||
{
|
||||
return __builtin_s390_vfsqdb(x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4f psqrt<Packet4f>(const Packet4f& x)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = psqrt<Packet2d>(x.v4f[0]);
|
||||
res.v4f[1] = psqrt<Packet2d>(x.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet2d prsqrt<Packet2d>(const Packet2d& x) {
|
||||
// Unfortunately we can't use the much faster mm_rqsrt_pd since it only provides an approximation.
|
||||
return pset1<Packet2d>(1.0) / psqrt<Packet2d>(x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4f prsqrt<Packet4f>(const Packet4f& x) {
|
||||
Packet4f res;
|
||||
res.v4f[0] = prsqrt<Packet2d>(x.v4f[0]);
|
||||
res.v4f[1] = prsqrt<Packet2d>(x.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
@@ -28,9 +28,8 @@ namespace internal {
|
||||
#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
|
||||
#endif
|
||||
|
||||
// NOTE Altivec has 32 registers, but Eigen only accepts a value of 8 or 16
|
||||
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
|
||||
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
|
||||
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 16
|
||||
#endif
|
||||
|
||||
typedef __vector int Packet4i;
|
||||
@@ -42,6 +41,10 @@ typedef __vector double Packet2d;
|
||||
typedef __vector unsigned long long Packet2ul;
|
||||
typedef __vector long long Packet2l;
|
||||
|
||||
typedef struct {
|
||||
Packet2d v4f[2];
|
||||
} Packet4f;
|
||||
|
||||
typedef union {
|
||||
int32_t i[4];
|
||||
uint32_t ui[4];
|
||||
@@ -88,6 +91,7 @@ static Packet2d p2d_ONE = { 1.0, 1.0 };
|
||||
static Packet2d p2d_ZERO_ = { -0.0, -0.0 };
|
||||
|
||||
static Packet4i p4i_COUNTDOWN = { 0, 1, 2, 3 };
|
||||
static Packet4f p4f_COUNTDOWN = { 0.0, 1.0, 2.0, 3.0 };
|
||||
static Packet2d p2d_COUNTDOWN = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet16uc>(p2d_ZERO), reinterpret_cast<Packet16uc>(p2d_ONE), 8));
|
||||
|
||||
static Packet16uc p16uc_PSET64_HI = { 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 };
|
||||
@@ -96,7 +100,7 @@ static Packet16uc p16uc_DUPLICATE32_HI = { 0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7 };
|
||||
// Mask alignment
|
||||
#define _EIGEN_MASK_ALIGNMENT 0xfffffffffffffff0
|
||||
|
||||
#define _EIGEN_ALIGNED_PTR(x) ((ptrdiff_t)(x) & _EIGEN_MASK_ALIGNMENT)
|
||||
#define _EIGEN_ALIGNED_PTR(x) ((std::ptrdiff_t)(x) & _EIGEN_MASK_ALIGNMENT)
|
||||
|
||||
// Handle endianness properly while loading constants
|
||||
// Define global static constants:
|
||||
@@ -132,13 +136,11 @@ template<> struct packet_traits<int> : default_packet_traits
|
||||
typedef Packet4i type;
|
||||
typedef Packet4i half;
|
||||
enum {
|
||||
// FIXME check the Has*
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size = 4,
|
||||
HasHalfPacket = 0,
|
||||
|
||||
// FIXME check the Has*
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasMul = 1,
|
||||
@@ -147,6 +149,37 @@ template<> struct packet_traits<int> : default_packet_traits
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct packet_traits<float> : default_packet_traits
|
||||
{
|
||||
typedef Packet4f type;
|
||||
typedef Packet4f half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size=4,
|
||||
HasHalfPacket = 0,
|
||||
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasMul = 1,
|
||||
HasDiv = 1,
|
||||
HasMin = 1,
|
||||
HasMax = 1,
|
||||
HasAbs = 1,
|
||||
HasSin = 0,
|
||||
HasCos = 0,
|
||||
HasLog = 0,
|
||||
HasExp = 1,
|
||||
HasSqrt = 1,
|
||||
HasRsqrt = 1,
|
||||
HasRound = 1,
|
||||
HasFloor = 1,
|
||||
HasCeil = 1,
|
||||
HasNegate = 1,
|
||||
HasBlend = 1
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct packet_traits<double> : default_packet_traits
|
||||
{
|
||||
typedef Packet2d type;
|
||||
@@ -157,7 +190,6 @@ template<> struct packet_traits<double> : default_packet_traits
|
||||
size=2,
|
||||
HasHalfPacket = 1,
|
||||
|
||||
// FIXME check the Has*
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasMul = 1,
|
||||
@@ -180,8 +212,12 @@ template<> struct packet_traits<double> : default_packet_traits
|
||||
};
|
||||
|
||||
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
|
||||
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; };
|
||||
template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
|
||||
|
||||
/* Forward declaration */
|
||||
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4f,4>& kernel);
|
||||
|
||||
inline std::ostream & operator <<(std::ostream & s, const Packet4i & v)
|
||||
{
|
||||
Packet vt;
|
||||
@@ -222,6 +258,32 @@ inline std::ostream & operator <<(std::ostream & s, const Packet2d & v)
|
||||
return s;
|
||||
}
|
||||
|
||||
/* Helper function to simulate a vec_splat_packet4f
|
||||
*/
|
||||
template<int element> EIGEN_STRONG_INLINE Packet4f vec_splat_packet4f(const Packet4f& from)
|
||||
{
|
||||
Packet4f splat;
|
||||
switch (element) {
|
||||
case 0:
|
||||
splat.v4f[0] = vec_splat(from.v4f[0], 0);
|
||||
splat.v4f[1] = splat.v4f[0];
|
||||
break;
|
||||
case 1:
|
||||
splat.v4f[0] = vec_splat(from.v4f[0], 1);
|
||||
splat.v4f[1] = splat.v4f[0];
|
||||
break;
|
||||
case 2:
|
||||
splat.v4f[0] = vec_splat(from.v4f[1], 0);
|
||||
splat.v4f[1] = splat.v4f[0];
|
||||
break;
|
||||
case 3:
|
||||
splat.v4f[0] = vec_splat(from.v4f[1], 1);
|
||||
splat.v4f[1] = splat.v4f[0];
|
||||
break;
|
||||
}
|
||||
return splat;
|
||||
}
|
||||
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet4i>
|
||||
{
|
||||
@@ -238,6 +300,31 @@ struct palign_impl<Offset,Packet4i>
|
||||
}
|
||||
};
|
||||
|
||||
/* This is a tricky one, we have to translate float alignment to vector elements of sizeof double
|
||||
*/
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet4f>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
|
||||
{
|
||||
switch (Offset % 4) {
|
||||
case 1:
|
||||
first.v4f[0] = vec_sld(first.v4f[0], first.v4f[1], 8);
|
||||
first.v4f[1] = vec_sld(first.v4f[1], second.v4f[0], 8);
|
||||
break;
|
||||
case 2:
|
||||
first.v4f[0] = first.v4f[1];
|
||||
first.v4f[1] = second.v4f[0];
|
||||
break;
|
||||
case 3:
|
||||
first.v4f[0] = vec_sld(first.v4f[1], second.v4f[0], 8);
|
||||
first.v4f[1] = vec_sld(second.v4f[0], second.v4f[1], 8);
|
||||
break;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet2d>
|
||||
{
|
||||
@@ -257,6 +344,16 @@ template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from)
|
||||
return vfrom->v4i;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from)
|
||||
{
|
||||
// FIXME: No intrinsic yet
|
||||
EIGEN_DEBUG_ALIGNED_LOAD
|
||||
Packet4f vfrom;
|
||||
vfrom.v4f[0] = vec_ld2f(&from[0]);
|
||||
vfrom.v4f[1] = vec_ld2f(&from[2]);
|
||||
return vfrom;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from)
|
||||
{
|
||||
// FIXME: No intrinsic yet
|
||||
@@ -275,6 +372,15 @@ template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& f
|
||||
vto->v4i = from;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from)
|
||||
{
|
||||
// FIXME: No intrinsic yet
|
||||
EIGEN_DEBUG_ALIGNED_STORE
|
||||
vec_st2f(from.v4f[0], &to[0]);
|
||||
vec_st2f(from.v4f[1], &to[2]);
|
||||
}
|
||||
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from)
|
||||
{
|
||||
// FIXME: No intrinsic yet
|
||||
@@ -288,10 +394,16 @@ template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from)
|
||||
{
|
||||
return vec_splats(from);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) {
|
||||
return vec_splats(from);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from)
|
||||
{
|
||||
Packet4f to;
|
||||
to.v4f[0] = pset1<Packet2d>(static_cast<const double&>(from));
|
||||
to.v4f[1] = to.v4f[0];
|
||||
return to;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void
|
||||
pbroadcast4<Packet4i>(const int *a,
|
||||
@@ -304,6 +416,17 @@ pbroadcast4<Packet4i>(const int *a,
|
||||
a3 = vec_splat(a3, 3);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void
|
||||
pbroadcast4<Packet4f>(const float *a,
|
||||
Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
|
||||
{
|
||||
a3 = pload<Packet4f>(a);
|
||||
a0 = vec_splat_packet4f<0>(a3);
|
||||
a1 = vec_splat_packet4f<1>(a3);
|
||||
a2 = vec_splat_packet4f<2>(a3);
|
||||
a3 = vec_splat_packet4f<3>(a3);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void
|
||||
pbroadcast4<Packet2d>(const double *a,
|
||||
Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)
|
||||
@@ -326,6 +449,16 @@ template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* f
|
||||
return pload<Packet4i>(ai);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)
|
||||
{
|
||||
float EIGEN_ALIGN16 ai[4];
|
||||
ai[0] = from[0*stride];
|
||||
ai[1] = from[1*stride];
|
||||
ai[2] = from[2*stride];
|
||||
ai[3] = from[3*stride];
|
||||
return pload<Packet4f>(ai);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride)
|
||||
{
|
||||
double EIGEN_ALIGN16 af[2];
|
||||
@@ -344,6 +477,16 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const
|
||||
to[3*stride] = ai[3];
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride)
|
||||
{
|
||||
float EIGEN_ALIGN16 ai[4];
|
||||
pstore<float>((float *)ai, from);
|
||||
to[0*stride] = ai[0];
|
||||
to[1*stride] = ai[1];
|
||||
to[2*stride] = ai[2];
|
||||
to[3*stride] = ai[3];
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride)
|
||||
{
|
||||
double EIGEN_ALIGN16 af[2];
|
||||
@@ -353,52 +496,160 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to,
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a + b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
Packet4f c;
|
||||
c.v4f[0] = a.v4f[0] + b.v4f[0];
|
||||
c.v4f[1] = a.v4f[1] + b.v4f[1];
|
||||
return c;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a + b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a - b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
Packet4f c;
|
||||
c.v4f[0] = a.v4f[0] - b.v4f[0];
|
||||
c.v4f[1] = a.v4f[1] - b.v4f[1];
|
||||
return c;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a - b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a * b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
Packet4f c;
|
||||
c.v4f[0] = a.v4f[0] * b.v4f[0];
|
||||
c.v4f[1] = a.v4f[1] * b.v4f[1];
|
||||
return c;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a * b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a / b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
Packet4f c;
|
||||
c.v4f[0] = a.v4f[0] / b.v4f[0];
|
||||
c.v4f[1] = a.v4f[1] / b.v4f[1];
|
||||
return c;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a / b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return (-a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a)
|
||||
{
|
||||
Packet4f c;
|
||||
c.v4f[0] = -a.v4f[0];
|
||||
c.v4f[1] = -a.v4f[1];
|
||||
return c;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return (-a); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd<Packet4i>(pmul<Packet4i>(a, b), c); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = vec_madd(a.v4f[0], b.v4f[0], c.v4f[0]);
|
||||
res.v4f[1] = vec_madd(a.v4f[1], b.v4f[1], c.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vec_madd(a, b, c); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a) { return padd<Packet4i>(pset1<Packet4i>(a), p4i_COUNTDOWN); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) { return padd<Packet4f>(pset1<Packet4f>(a), p4f_COUNTDOWN); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) { return padd<Packet2d>(pset1<Packet2d>(a), p2d_COUNTDOWN); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_min(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = pmin(a.v4f[0], b.v4f[0]);
|
||||
res.v4f[1] = pmin(a.v4f[1], b.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_max(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = pmax(a.v4f[0], b.v4f[0]);
|
||||
res.v4f[1] = pmax(a.v4f[1], b.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = pand(a.v4f[0], b.v4f[0]);
|
||||
res.v4f[1] = pand(a.v4f[1], b.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_or(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_or(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = pand(a.v4f[0], b.v4f[0]);
|
||||
res.v4f[1] = pand(a.v4f[1], b.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_xor(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_xor(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = pand(a.v4f[0], b.v4f[0]);
|
||||
res.v4f[1] = pand(a.v4f[1], b.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return pand<Packet4i>(a, vec_nor(b, b)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, vec_nor(b, b)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = pandnot(a.v4f[0], b.v4f[0]);
|
||||
res.v4f[1] = pandnot(a.v4f[1], b.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = vec_round(a.v4f[0]);
|
||||
res.v4f[1] = vec_round(a.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a) { return vec_round(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = vec_ceil(a.v4f[0]);
|
||||
res.v4f[1] = vec_ceil(a.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a) { return vec_ceil(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = vec_floor(a.v4f[0]);
|
||||
res.v4f[1] = vec_floor(a.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) { return vec_floor(a); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) { return pload<Packet4i>(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) { return pload<Packet4f>(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) { return pload<Packet2d>(from); }
|
||||
|
||||
|
||||
@@ -408,6 +659,14 @@ template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
|
||||
return vec_perm(p, p, p16uc_DUPLICATE32_HI);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
|
||||
{
|
||||
Packet4f p = pload<Packet4f>(from);
|
||||
p.v4f[1] = vec_splat(p.v4f[0], 1);
|
||||
p.v4f[0] = vec_splat(p.v4f[0], 0);
|
||||
return p;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
|
||||
{
|
||||
Packet2d p = pload<Packet2d>(from);
|
||||
@@ -415,12 +674,15 @@ template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { pstore<int>(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { pstore<float>(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) { pstore<double>(to, from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; pstore(x, a); return x[0]; }
|
||||
template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[2]; vec_st2f(a.v4f[0], &x[0]); return x[0]; }
|
||||
template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { double EIGEN_ALIGN16 x[2]; pstore(x, a); return x[0]; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a)
|
||||
@@ -433,8 +695,23 @@ template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a)
|
||||
return reinterpret_cast<Packet2d>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE64));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vec_abs(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vec_abs(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a)
|
||||
{
|
||||
Packet4f rev;
|
||||
rev.v4f[0] = preverse<Packet2d>(a.v4f[1]);
|
||||
rev.v4f[1] = preverse<Packet2d>(a.v4f[0]);
|
||||
return rev;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pabs<Packet4i>(const Packet4i& a) { return vec_abs(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pabs<Packet2d>(const Packet2d& a) { return vec_abs(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pabs<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = pabs(a.v4f[0]);
|
||||
res.v4f[1] = pabs(a.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
|
||||
{
|
||||
@@ -453,6 +730,13 @@ template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
|
||||
sum = padd<Packet2d>(a, b);
|
||||
return pfirst(sum);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet2d sum;
|
||||
sum = padd<Packet2d>(a.v4f[0], a.v4f[1]);
|
||||
double first = predux<Packet2d>(sum);
|
||||
return static_cast<float>(first);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
|
||||
{
|
||||
@@ -493,6 +777,21 @@ template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
|
||||
return sum;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
|
||||
{
|
||||
PacketBlock<Packet4f,4> transpose;
|
||||
transpose.packet[0] = vecs[0];
|
||||
transpose.packet[1] = vecs[1];
|
||||
transpose.packet[2] = vecs[2];
|
||||
transpose.packet[3] = vecs[3];
|
||||
ptranspose(transpose);
|
||||
|
||||
Packet4f sum = padd(transpose.packet[0], transpose.packet[1]);
|
||||
sum = padd(sum, transpose.packet[2]);
|
||||
sum = padd(sum, transpose.packet[3]);
|
||||
return sum;
|
||||
}
|
||||
|
||||
// Other reduction functions:
|
||||
// mul
|
||||
template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
|
||||
@@ -507,6 +806,12 @@ template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a)
|
||||
return pfirst(pmul(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8))));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
// Return predux_mul<Packet2d> of the subvectors product
|
||||
return static_cast<float>(pfirst(predux_mul(pmul(a.v4f[0], a.v4f[1]))));
|
||||
}
|
||||
|
||||
// min
|
||||
template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
|
||||
{
|
||||
@@ -521,6 +826,14 @@ template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)
|
||||
return pfirst(pmin<Packet2d>(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8))));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet2d b, res;
|
||||
b = pmin<Packet2d>(a.v4f[0], a.v4f[1]);
|
||||
res = pmin<Packet2d>(b, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(b), reinterpret_cast<Packet4i>(b), 8)));
|
||||
return static_cast<float>(pfirst(res));
|
||||
}
|
||||
|
||||
// max
|
||||
template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
|
||||
{
|
||||
@@ -536,6 +849,14 @@ template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
|
||||
return pfirst(pmax<Packet2d>(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8))));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet2d b, res;
|
||||
b = pmax<Packet2d>(a.v4f[0], a.v4f[1]);
|
||||
res = pmax<Packet2d>(b, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(b), reinterpret_cast<Packet4i>(b), 8)));
|
||||
return static_cast<float>(pfirst(res));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet4i,4>& kernel) {
|
||||
Packet4i t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);
|
||||
@@ -556,12 +877,61 @@ ptranspose(PacketBlock<Packet2d,2>& kernel) {
|
||||
kernel.packet[1] = t1;
|
||||
}
|
||||
|
||||
/* Split the Packet4f PacketBlock into 4 Packet2d PacketBlocks and transpose each one
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet4f,4>& kernel) {
|
||||
PacketBlock<Packet2d,2> t0,t1,t2,t3;
|
||||
// copy top-left 2x2 Packet2d block
|
||||
t0.packet[0] = kernel.packet[0].v4f[0];
|
||||
t0.packet[1] = kernel.packet[1].v4f[0];
|
||||
|
||||
// copy top-right 2x2 Packet2d block
|
||||
t1.packet[0] = kernel.packet[0].v4f[1];
|
||||
t1.packet[1] = kernel.packet[1].v4f[1];
|
||||
|
||||
// copy bottom-left 2x2 Packet2d block
|
||||
t2.packet[0] = kernel.packet[2].v4f[0];
|
||||
t2.packet[1] = kernel.packet[3].v4f[0];
|
||||
|
||||
// copy bottom-right 2x2 Packet2d block
|
||||
t3.packet[0] = kernel.packet[2].v4f[1];
|
||||
t3.packet[1] = kernel.packet[3].v4f[1];
|
||||
|
||||
// Transpose all 2x2 blocks
|
||||
ptranspose(t0);
|
||||
ptranspose(t1);
|
||||
ptranspose(t2);
|
||||
ptranspose(t3);
|
||||
|
||||
// Copy back transposed blocks, but exchange t1 and t2 due to transposition
|
||||
kernel.packet[0].v4f[0] = t0.packet[0];
|
||||
kernel.packet[0].v4f[1] = t2.packet[0];
|
||||
kernel.packet[1].v4f[0] = t0.packet[1];
|
||||
kernel.packet[1].v4f[1] = t2.packet[1];
|
||||
kernel.packet[2].v4f[0] = t1.packet[0];
|
||||
kernel.packet[2].v4f[1] = t3.packet[0];
|
||||
kernel.packet[3].v4f[0] = t1.packet[1];
|
||||
kernel.packet[3].v4f[1] = t3.packet[1];
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) {
|
||||
Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3] };
|
||||
Packet4ui mask = vec_cmpeq(select, reinterpret_cast<Packet4ui>(p4i_ONE));
|
||||
return vec_sel(elsePacket, thenPacket, mask);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket) {
|
||||
Packet2ul select_hi = { ifPacket.select[0], ifPacket.select[1] };
|
||||
Packet2ul select_lo = { ifPacket.select[2], ifPacket.select[3] };
|
||||
Packet2ul mask_hi = vec_cmpeq(select_hi, reinterpret_cast<Packet2ul>(p2l_ONE));
|
||||
Packet2ul mask_lo = vec_cmpeq(select_lo, reinterpret_cast<Packet2ul>(p2l_ONE));
|
||||
Packet4f result;
|
||||
result.v4f[0] = vec_sel(elsePacket.v4f[0], thenPacket.v4f[0], mask_hi);
|
||||
result.v4f[1] = vec_sel(elsePacket.v4f[1], thenPacket.v4f[1], mask_lo);
|
||||
return result;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) {
|
||||
Packet2ul select = { ifPacket.select[0], ifPacket.select[1] };
|
||||
Packet2ul mask = vec_cmpeq(select, reinterpret_cast<Packet2ul>(p2l_ONE));
|
||||
|
||||
@@ -28,7 +28,7 @@ template<typename DstScalar,typename SrcScalar> struct assign_op {
|
||||
{ internal::pstoret<DstScalar,Packet,Alignment>(a,b); }
|
||||
};
|
||||
|
||||
// Empty overload for void type (used by PermutationMatrix
|
||||
// Empty overload for void type (used by PermutationMatrix)
|
||||
template<typename DstScalar> struct assign_op<DstScalar,void> {};
|
||||
|
||||
template<typename DstScalar,typename SrcScalar>
|
||||
|
||||
@@ -266,7 +266,7 @@ struct scalar_hypot_op<Scalar,Scalar> : binary_op_base<Scalar,Scalar>
|
||||
// typedef typename NumTraits<Scalar>::Real result_type;
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const
|
||||
{
|
||||
using std::sqrt;
|
||||
EIGEN_USING_STD_MATH(sqrt)
|
||||
Scalar p, qp;
|
||||
if(_x>_y)
|
||||
{
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2008-2016 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
@@ -37,87 +37,77 @@ template<typename Scalar>
|
||||
struct functor_traits<scalar_identity_op<Scalar> >
|
||||
{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = false, IsRepeatable = true }; };
|
||||
|
||||
template <typename Scalar, typename Packet, bool RandomAccess, bool IsInteger> struct linspaced_op_impl;
|
||||
template <typename Scalar, typename Packet, bool IsInteger> struct linspaced_op_impl;
|
||||
|
||||
// linear access for packet ops:
|
||||
// 1) initialization
|
||||
// base = [low, ..., low] + ([step, ..., step] * [-size, ..., 0])
|
||||
// 2) each step (where size is 1 for coeff access or PacketSize for packet access)
|
||||
// base += [size*step, ..., size*step]
|
||||
//
|
||||
// TODO: Perhaps it's better to initialize lazily (so not in the constructor but in packetOp)
|
||||
// in order to avoid the padd() in operator() ?
|
||||
template <typename Scalar, typename Packet>
|
||||
struct linspaced_op_impl<Scalar,Packet,/*RandomAccess*/false,/*IsInteger*/false>
|
||||
struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/false>
|
||||
{
|
||||
linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) :
|
||||
m_low(low), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)),
|
||||
m_packetStep(pset1<Packet>(unpacket_traits<Packet>::size*m_step)),
|
||||
m_base(padd(pset1<Packet>(low), pmul(pset1<Packet>(m_step),plset<Packet>(-unpacket_traits<Packet>::size)))) {}
|
||||
m_low(low), m_high(high), m_size1(num_steps==1 ? 1 : num_steps-1), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)),
|
||||
m_flip(numext::abs(high)<numext::abs(low))
|
||||
{}
|
||||
|
||||
template<typename IndexType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const
|
||||
{
|
||||
m_base = padd(m_base, pset1<Packet>(m_step));
|
||||
return m_low+Scalar(i)*m_step;
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const {
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
if(m_flip)
|
||||
return (i==0)? m_low : (m_high - RealScalar(m_size1-i)*m_step);
|
||||
else
|
||||
return (i==m_size1)? m_high : (m_low + RealScalar(i)*m_step);
|
||||
}
|
||||
|
||||
template<typename IndexType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(IndexType) const { return m_base = padd(m_base,m_packetStep); }
|
||||
|
||||
const Scalar m_low;
|
||||
const Scalar m_step;
|
||||
const Packet m_packetStep;
|
||||
mutable Packet m_base;
|
||||
};
|
||||
|
||||
// random access for packet ops:
|
||||
// 1) each step
|
||||
// [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) )
|
||||
template <typename Scalar, typename Packet>
|
||||
struct linspaced_op_impl<Scalar,Packet,/*RandomAccess*/true,/*IsInteger*/false>
|
||||
{
|
||||
linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) :
|
||||
m_low(low), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)),
|
||||
m_lowPacket(pset1<Packet>(m_low)), m_stepPacket(pset1<Packet>(m_step)), m_interPacket(plset<Packet>(0)) {}
|
||||
|
||||
template<typename IndexType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const { return m_low+i*m_step; }
|
||||
|
||||
template<typename IndexType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(IndexType i) const
|
||||
{ return internal::padd(m_lowPacket, pmul(m_stepPacket, padd(pset1<Packet>(Scalar(i)),m_interPacket))); }
|
||||
{
|
||||
// Principle:
|
||||
// [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) )
|
||||
if(m_flip)
|
||||
{
|
||||
Packet pi = plset<Packet>(Scalar(i-m_size1));
|
||||
Packet res = padd(pset1<Packet>(m_high), pmul(pset1<Packet>(m_step), pi));
|
||||
if(i==0)
|
||||
res = pinsertfirst(res, m_low);
|
||||
return res;
|
||||
}
|
||||
else
|
||||
{
|
||||
Packet pi = plset<Packet>(Scalar(i));
|
||||
Packet res = padd(pset1<Packet>(m_low), pmul(pset1<Packet>(m_step), pi));
|
||||
if(i==m_size1-unpacket_traits<Packet>::size+1)
|
||||
res = pinsertlast(res, m_high);
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
const Scalar m_low;
|
||||
const Scalar m_high;
|
||||
const Index m_size1;
|
||||
const Scalar m_step;
|
||||
const Packet m_lowPacket;
|
||||
const Packet m_stepPacket;
|
||||
const Packet m_interPacket;
|
||||
const bool m_flip;
|
||||
};
|
||||
|
||||
template <typename Scalar, typename Packet>
|
||||
struct linspaced_op_impl<Scalar,Packet,/*RandomAccess*/true,/*IsInteger*/true>
|
||||
struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/true>
|
||||
{
|
||||
linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) :
|
||||
m_low(low), m_length(high-low), m_divisor(convert_index<Scalar>(num_steps==1?1:num_steps-1)), m_interPacket(plset<Packet>(0))
|
||||
m_low(low),
|
||||
m_multiplier((high-low)/convert_index<Scalar>(num_steps<=1 ? 1 : num_steps-1)),
|
||||
m_divisor(convert_index<Scalar>((high>=low?num_steps:-num_steps)+(high-low))/((numext::abs(high-low)+1)==0?1:(numext::abs(high-low)+1))),
|
||||
m_use_divisor(num_steps>1 && (numext::abs(high-low)+1)<num_steps)
|
||||
{}
|
||||
|
||||
template<typename IndexType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const Scalar operator() (IndexType i) const {
|
||||
return m_low + (m_length*Scalar(i))/m_divisor;
|
||||
const Scalar operator() (IndexType i) const
|
||||
{
|
||||
if(m_use_divisor) return m_low + convert_index<Scalar>(i)/m_divisor;
|
||||
else return m_low + convert_index<Scalar>(i)*m_multiplier;
|
||||
}
|
||||
|
||||
template<typename IndexType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const Packet packetOp(IndexType i) const {
|
||||
return internal::padd(pset1<Packet>(m_low), pdiv(pmul(pset1<Packet>(m_length), padd(pset1<Packet>(Scalar(i)),m_interPacket)),
|
||||
pset1<Packet>(m_divisor))); }
|
||||
|
||||
const Scalar m_low;
|
||||
const Scalar m_length;
|
||||
const Scalar m_divisor;
|
||||
const Packet m_interPacket;
|
||||
const Scalar m_multiplier;
|
||||
const Scalar m_divisor;
|
||||
const bool m_use_divisor;
|
||||
};
|
||||
|
||||
// ----- Linspace functor ----------------------------------------------------------------
|
||||
@@ -125,18 +115,18 @@ struct linspaced_op_impl<Scalar,Packet,/*RandomAccess*/true,/*IsInteger*/true>
|
||||
// Forward declaration (we default to random access which does not really give
|
||||
// us a speed gain when using packet access but it allows to use the functor in
|
||||
// nested expressions).
|
||||
template <typename Scalar, typename PacketType, bool RandomAccess = true> struct linspaced_op;
|
||||
template <typename Scalar, typename PacketType, bool RandomAccess> struct functor_traits< linspaced_op<Scalar,PacketType,RandomAccess> >
|
||||
template <typename Scalar, typename PacketType> struct linspaced_op;
|
||||
template <typename Scalar, typename PacketType> struct functor_traits< linspaced_op<Scalar,PacketType> >
|
||||
{
|
||||
enum
|
||||
{
|
||||
Cost = 1,
|
||||
PacketAccess = packet_traits<Scalar>::HasSetLinear
|
||||
&& ((!NumTraits<Scalar>::IsInteger) || packet_traits<Scalar>::HasDiv),
|
||||
PacketAccess = (!NumTraits<Scalar>::IsInteger) && packet_traits<Scalar>::HasSetLinear && packet_traits<Scalar>::HasBlend,
|
||||
/*&& ((!NumTraits<Scalar>::IsInteger) || packet_traits<Scalar>::HasDiv),*/ // <- vectorization for integer is currently disabled
|
||||
IsRepeatable = true
|
||||
};
|
||||
};
|
||||
template <typename Scalar, typename PacketType, bool RandomAccess> struct linspaced_op
|
||||
template <typename Scalar, typename PacketType> struct linspaced_op
|
||||
{
|
||||
linspaced_op(const Scalar& low, const Scalar& high, Index num_steps)
|
||||
: impl((num_steps==1 ? high : low),high,num_steps)
|
||||
@@ -148,20 +138,49 @@ template <typename Scalar, typename PacketType, bool RandomAccess> struct linspa
|
||||
template<typename Packet,typename IndexType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(IndexType i) const { return impl.packetOp(i); }
|
||||
|
||||
// This proxy object handles the actual required temporaries, the different
|
||||
// implementations (random vs. sequential access) as well as the
|
||||
// correct piping to size 2/4 packet operations.
|
||||
// As long as we don't have a Bresenham-like implementation for linear-access and integer types,
|
||||
// we have to by-pass RandomAccess for integer types. See bug 698.
|
||||
const linspaced_op_impl<Scalar,PacketType,(NumTraits<Scalar>::IsInteger?true:RandomAccess),NumTraits<Scalar>::IsInteger> impl;
|
||||
// This proxy object handles the actual required temporaries and the different
|
||||
// implementations (integer vs. floating point).
|
||||
const linspaced_op_impl<Scalar,PacketType,NumTraits<Scalar>::IsInteger> impl;
|
||||
};
|
||||
|
||||
// Linear access is automatically determined from the operator() prototypes available for the given functor.
|
||||
// If it exposes an operator()(i,j), then we assume the i and j coefficients are required independently
|
||||
// and linear access is not possible. In all other cases, linear access is enabled.
|
||||
// Users should not have to deal with this struture.
|
||||
// Users should not have to deal with this structure.
|
||||
template<typename Functor> struct functor_has_linear_access { enum { ret = !has_binary_operator<Functor>::value }; };
|
||||
|
||||
// For unreliable compilers, let's specialize the has_*ary_operator
|
||||
// helpers so that at least built-in nullary functors work fine.
|
||||
#if !( (EIGEN_COMP_MSVC>1600) || (EIGEN_GNUC_AT_LEAST(4,8)) || (EIGEN_COMP_ICC>=1600))
|
||||
template<typename Scalar,typename IndexType>
|
||||
struct has_nullary_operator<scalar_constant_op<Scalar>,IndexType> { enum { value = 1}; };
|
||||
template<typename Scalar,typename IndexType>
|
||||
struct has_unary_operator<scalar_constant_op<Scalar>,IndexType> { enum { value = 0}; };
|
||||
template<typename Scalar,typename IndexType>
|
||||
struct has_binary_operator<scalar_constant_op<Scalar>,IndexType> { enum { value = 0}; };
|
||||
|
||||
template<typename Scalar,typename IndexType>
|
||||
struct has_nullary_operator<scalar_identity_op<Scalar>,IndexType> { enum { value = 0}; };
|
||||
template<typename Scalar,typename IndexType>
|
||||
struct has_unary_operator<scalar_identity_op<Scalar>,IndexType> { enum { value = 0}; };
|
||||
template<typename Scalar,typename IndexType>
|
||||
struct has_binary_operator<scalar_identity_op<Scalar>,IndexType> { enum { value = 1}; };
|
||||
|
||||
template<typename Scalar, typename PacketType,typename IndexType>
|
||||
struct has_nullary_operator<linspaced_op<Scalar,PacketType>,IndexType> { enum { value = 0}; };
|
||||
template<typename Scalar, typename PacketType,typename IndexType>
|
||||
struct has_unary_operator<linspaced_op<Scalar,PacketType>,IndexType> { enum { value = 1}; };
|
||||
template<typename Scalar, typename PacketType,typename IndexType>
|
||||
struct has_binary_operator<linspaced_op<Scalar,PacketType>,IndexType> { enum { value = 0}; };
|
||||
|
||||
template<typename Scalar,typename IndexType>
|
||||
struct has_nullary_operator<scalar_random_op<Scalar>,IndexType> { enum { value = 1}; };
|
||||
template<typename Scalar,typename IndexType>
|
||||
struct has_unary_operator<scalar_random_op<Scalar>,IndexType> { enum { value = 0}; };
|
||||
template<typename Scalar,typename IndexType>
|
||||
struct has_binary_operator<scalar_random_op<Scalar>,IndexType> { enum { value = 0}; };
|
||||
#endif
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
@@ -72,7 +72,7 @@ template<typename T>
|
||||
struct functor_traits<std::not_equal_to<T> >
|
||||
{ enum { Cost = 1, PacketAccess = false }; };
|
||||
|
||||
#if(__cplusplus < 201103L)
|
||||
#if (__cplusplus < 201103L) && (EIGEN_COMP_MSVC <= 1900)
|
||||
// std::binder* are deprecated since c++11 and will be removed in c++17
|
||||
template<typename T>
|
||||
struct functor_traits<std::binder2nd<T> >
|
||||
|
||||
@@ -321,7 +321,7 @@ struct functor_traits<scalar_log1p_op<Scalar> > {
|
||||
*/
|
||||
template<typename Scalar> struct scalar_log10_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_log10_op)
|
||||
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::log10; return log10(a); }
|
||||
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { EIGEN_USING_STD_MATH(log10) return log10(a); }
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plog10(a); }
|
||||
};
|
||||
|
||||
@@ -580,7 +580,7 @@ DoublePacket<Packet> padd(const DoublePacket<Packet> &a, const DoublePacket<Pack
|
||||
}
|
||||
|
||||
template<typename Packet>
|
||||
const DoublePacket<Packet>& predux4(const DoublePacket<Packet> &a)
|
||||
const DoublePacket<Packet>& predux_downto4(const DoublePacket<Packet> &a)
|
||||
{
|
||||
return a;
|
||||
}
|
||||
@@ -972,7 +972,7 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
|
||||
EIGEN_ASM_COMMENT("begin step of gebp micro kernel 3pX4"); \
|
||||
EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
|
||||
internal::prefetch(blA+(3*K+16)*LhsProgress); \
|
||||
if (EIGEN_ARCH_ARM) internal::prefetch(blB+(4*K+16)*RhsProgress); /* Bug 953 */ \
|
||||
if (EIGEN_ARCH_ARM) { internal::prefetch(blB+(4*K+16)*RhsProgress); } /* Bug 953 */ \
|
||||
traits.loadLhs(&blA[(0+3*K)*LhsProgress], A0); \
|
||||
traits.loadLhs(&blA[(1+3*K)*LhsProgress], A1); \
|
||||
traits.loadLhs(&blA[(2+3*K)*LhsProgress], A2); \
|
||||
@@ -1581,10 +1581,10 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
|
||||
if(SwappedTraits::LhsProgress==8)
|
||||
{
|
||||
// Special case where we have to first reduce the accumulation register C0
|
||||
typedef typename conditional<SwappedTraits::LhsProgress==8,typename unpacket_traits<SResPacket>::half,SResPacket>::type SResPacketHalf;
|
||||
typedef typename conditional<SwappedTraits::LhsProgress==8,typename unpacket_traits<SLhsPacket>::half,SLhsPacket>::type SLhsPacketHalf;
|
||||
typedef typename conditional<SwappedTraits::LhsProgress==8,typename unpacket_traits<SLhsPacket>::half,SRhsPacket>::type SRhsPacketHalf;
|
||||
typedef typename conditional<SwappedTraits::LhsProgress==8,typename unpacket_traits<SAccPacket>::half,SAccPacket>::type SAccPacketHalf;
|
||||
typedef typename conditional<SwappedTraits::LhsProgress>=8,typename unpacket_traits<SResPacket>::half,SResPacket>::type SResPacketHalf;
|
||||
typedef typename conditional<SwappedTraits::LhsProgress>=8,typename unpacket_traits<SLhsPacket>::half,SLhsPacket>::type SLhsPacketHalf;
|
||||
typedef typename conditional<SwappedTraits::LhsProgress>=8,typename unpacket_traits<SLhsPacket>::half,SRhsPacket>::type SRhsPacketHalf;
|
||||
typedef typename conditional<SwappedTraits::LhsProgress>=8,typename unpacket_traits<SAccPacket>::half,SAccPacket>::type SAccPacketHalf;
|
||||
|
||||
SResPacketHalf R = res.template gatherPacket<SResPacketHalf>(i, j2);
|
||||
SResPacketHalf alphav = pset1<SResPacketHalf>(alpha);
|
||||
@@ -1596,13 +1596,13 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
|
||||
SRhsPacketHalf b0;
|
||||
straits.loadLhsUnaligned(blB, a0);
|
||||
straits.loadRhs(blA, b0);
|
||||
SAccPacketHalf c0 = predux4(C0);
|
||||
SAccPacketHalf c0 = predux_downto4(C0);
|
||||
straits.madd(a0,b0,c0,b0);
|
||||
straits.acc(c0, alphav, R);
|
||||
}
|
||||
else
|
||||
{
|
||||
straits.acc(predux4(C0), alphav, R);
|
||||
straits.acc(predux_downto4(C0), alphav, R);
|
||||
}
|
||||
res.scatterPacket(i, j2, R);
|
||||
}
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
#ifndef EIGEN_GENERAL_MATRIX_MATRIX_H
|
||||
#define EIGEN_GENERAL_MATRIX_MATRIX_H
|
||||
|
||||
namespace Eigen {
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
@@ -24,7 +24,7 @@ template<
|
||||
struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor>
|
||||
{
|
||||
typedef gebp_traits<RhsScalar,LhsScalar> Traits;
|
||||
|
||||
|
||||
typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
|
||||
static EIGEN_STRONG_INLINE void run(
|
||||
Index rows, Index cols, Index depth,
|
||||
@@ -54,7 +54,7 @@ struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLh
|
||||
{
|
||||
|
||||
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
|
||||
|
||||
|
||||
typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
|
||||
static void run(Index rows, Index cols, Index depth,
|
||||
const LhsScalar* _lhs, Index lhsStride,
|
||||
@@ -83,15 +83,15 @@ static void run(Index rows, Index cols, Index depth,
|
||||
if(info)
|
||||
{
|
||||
// this is the parallel version!
|
||||
Index tid = omp_get_thread_num();
|
||||
Index threads = omp_get_num_threads();
|
||||
|
||||
int tid = omp_get_thread_num();
|
||||
int threads = omp_get_num_threads();
|
||||
|
||||
LhsScalar* blockA = blocking.blockA();
|
||||
eigen_internal_assert(blockA!=0);
|
||||
|
||||
|
||||
std::size_t sizeB = kc*nc;
|
||||
ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, 0);
|
||||
|
||||
|
||||
// For each horizontal panel of the rhs, and corresponding vertical panel of the lhs...
|
||||
for(Index k=0; k<depth; k+=kc)
|
||||
{
|
||||
@@ -114,11 +114,11 @@ static void run(Index rows, Index cols, Index depth,
|
||||
|
||||
// Notify the other threads that the part A'_i is ready to go.
|
||||
info[tid].sync = k;
|
||||
|
||||
|
||||
// Computes C_i += A' * B' per A'_i
|
||||
for(Index shift=0; shift<threads; ++shift)
|
||||
for(int shift=0; shift<threads; ++shift)
|
||||
{
|
||||
Index i = (tid+shift)%threads;
|
||||
int i = (tid+shift)%threads;
|
||||
|
||||
// At this point we have to make sure that A'_i has been updated by the thread i,
|
||||
// we use testAndSetOrdered to mimic a volatile access.
|
||||
@@ -161,7 +161,7 @@ static void run(Index rows, Index cols, Index depth,
|
||||
|
||||
ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, blocking.blockA());
|
||||
ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, blocking.blockB());
|
||||
|
||||
|
||||
const bool pack_rhs_once = mc!=rows && kc==depth && nc==cols;
|
||||
|
||||
// For each horizontal panel of the rhs, and corresponding panel of the lhs...
|
||||
@@ -172,24 +172,24 @@ static void run(Index rows, Index cols, Index depth,
|
||||
for(Index k2=0; k2<depth; k2+=kc)
|
||||
{
|
||||
const Index actual_kc = (std::min)(k2+kc,depth)-k2;
|
||||
|
||||
|
||||
// OK, here we have selected one horizontal panel of rhs and one vertical panel of lhs.
|
||||
// => Pack lhs's panel into a sequential chunk of memory (L2/L3 caching)
|
||||
// Note that this panel will be read as many times as the number of blocks in the rhs's
|
||||
// horizontal panel which is, in practice, a very low number.
|
||||
pack_lhs(blockA, lhs.getSubMapper(i2,k2), actual_kc, actual_mc);
|
||||
|
||||
|
||||
// For each kc x nc block of the rhs's horizontal panel...
|
||||
for(Index j2=0; j2<cols; j2+=nc)
|
||||
{
|
||||
const Index actual_nc = (std::min)(j2+nc,cols)-j2;
|
||||
|
||||
|
||||
// We pack the rhs's block into a sequential chunk of memory (L2 caching)
|
||||
// Note that this block will be read a very high number of times, which is equal to the number of
|
||||
// micro horizontal panel of the large rhs's panel (e.g., rows/12 times).
|
||||
if((!pack_rhs_once) || i2==0)
|
||||
pack_rhs(blockB, rhs.getSubMapper(k2,j2), actual_kc, actual_nc);
|
||||
|
||||
|
||||
// Everything is packed, we can now call the panel * block kernel:
|
||||
gebp(res.getSubMapper(i2, j2), blockA, blockB, actual_mc, actual_kc, actual_nc, alpha);
|
||||
}
|
||||
@@ -229,7 +229,7 @@ struct gemm_functor
|
||||
(Scalar*)&(m_dest.coeffRef(row,col)), m_dest.outerStride(),
|
||||
m_actualAlpha, m_blocking, info);
|
||||
}
|
||||
|
||||
|
||||
typedef typename Gemm::Traits Traits;
|
||||
|
||||
protected:
|
||||
@@ -313,7 +313,7 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
|
||||
this->m_blockB = reinterpret_cast<RhsScalar*>((internal::UIntPtr(m_staticB) + (EIGEN_DEFAULT_ALIGN_BYTES-1)) & ~std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1));
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void initParallel(Index, Index, Index, Index)
|
||||
{}
|
||||
|
||||
@@ -359,14 +359,14 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
|
||||
m_sizeA = this->m_mc * this->m_kc;
|
||||
m_sizeB = this->m_kc * this->m_nc;
|
||||
}
|
||||
|
||||
|
||||
void initParallel(Index rows, Index cols, Index depth, Index num_threads)
|
||||
{
|
||||
this->m_mc = Transpose ? cols : rows;
|
||||
this->m_nc = Transpose ? rows : cols;
|
||||
this->m_kc = depth;
|
||||
|
||||
eigen_internal_assert(this->m_blockA==0 && this->m_blockB==0);
|
||||
|
||||
eigen_internal_assert(this->m_blockA==0 && this->m_blockB==0);
|
||||
Index m = this->m_mc;
|
||||
computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, m, this->m_nc, num_threads);
|
||||
m_sizeA = this->m_mc * this->m_kc;
|
||||
@@ -401,7 +401,7 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
|
||||
} // end namespace internal
|
||||
|
||||
namespace internal {
|
||||
|
||||
|
||||
template<typename Lhs, typename Rhs>
|
||||
struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
|
||||
: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct> >
|
||||
@@ -409,21 +409,21 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
|
||||
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
|
||||
typedef typename Lhs::Scalar LhsScalar;
|
||||
typedef typename Rhs::Scalar RhsScalar;
|
||||
|
||||
|
||||
typedef internal::blas_traits<Lhs> LhsBlasTraits;
|
||||
typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
|
||||
typedef typename internal::remove_all<ActualLhsType>::type ActualLhsTypeCleaned;
|
||||
|
||||
|
||||
typedef internal::blas_traits<Rhs> RhsBlasTraits;
|
||||
typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
|
||||
typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
|
||||
|
||||
|
||||
enum {
|
||||
MaxDepthAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(Lhs::MaxColsAtCompileTime,Rhs::MaxRowsAtCompileTime)
|
||||
};
|
||||
|
||||
|
||||
typedef generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode> lazyproduct;
|
||||
|
||||
|
||||
template<typename Dst>
|
||||
static void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
{
|
||||
@@ -453,7 +453,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
|
||||
else
|
||||
scaleAndAddTo(dst, lhs, rhs, Scalar(-1));
|
||||
}
|
||||
|
||||
|
||||
template<typename Dest>
|
||||
static void scaleAndAddTo(Dest& dst, const Lhs& a_lhs, const Rhs& a_rhs, const Scalar& alpha)
|
||||
{
|
||||
@@ -481,7 +481,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
|
||||
|
||||
BlockingType blocking(dst.rows(), dst.cols(), lhs.cols(), 1, true);
|
||||
internal::parallelize_gemm<(Dest::MaxRowsAtCompileTime>32 || Dest::MaxRowsAtCompileTime==Dynamic)>
|
||||
(GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), a_lhs.rows(), a_rhs.cols(), Dest::Flags&RowMajorBit);
|
||||
(GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), a_lhs.rows(), a_rhs.cols(), a_lhs.cols(), Dest::Flags&RowMajorBit);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -148,7 +148,7 @@ struct tribb_kernel
|
||||
ResMapper res(_res, resStride);
|
||||
gebp_kernel<LhsScalar, RhsScalar, Index, ResMapper, mr, nr, ConjLhs, ConjRhs> gebp_kernel;
|
||||
|
||||
Matrix<ResScalar,BlockSize,BlockSize,ColMajor> buffer;
|
||||
Matrix<ResScalar,BlockSize,BlockSize,ColMajor> buffer((internal::constructor_without_unaligned_array_assert()));
|
||||
|
||||
// let's process the block per panel of actual_mc x BlockSize,
|
||||
// again, each is split into three parts, etc.
|
||||
@@ -199,7 +199,7 @@ struct general_product_to_triangular_selector;
|
||||
template<typename MatrixType, typename ProductType, int UpLo>
|
||||
struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,true>
|
||||
{
|
||||
static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha)
|
||||
static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha, bool beta)
|
||||
{
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
|
||||
@@ -217,6 +217,9 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,true>
|
||||
|
||||
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());
|
||||
|
||||
if(!beta)
|
||||
mat.template triangularView<UpLo>().setZero();
|
||||
|
||||
enum {
|
||||
StorageOrder = (internal::traits<MatrixType>::Flags&RowMajorBit) ? RowMajor : ColMajor,
|
||||
UseLhsDirectly = _ActualLhs::InnerStrideAtCompileTime==1,
|
||||
@@ -244,7 +247,7 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,true>
|
||||
template<typename MatrixType, typename ProductType, int UpLo>
|
||||
struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false>
|
||||
{
|
||||
static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha)
|
||||
static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha, bool beta)
|
||||
{
|
||||
typedef typename internal::remove_all<typename ProductType::LhsNested>::type Lhs;
|
||||
typedef internal::blas_traits<Lhs> LhsBlasTraits;
|
||||
@@ -260,13 +263,19 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false>
|
||||
|
||||
typename ProductType::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());
|
||||
|
||||
if(!beta)
|
||||
mat.template triangularView<UpLo>().setZero();
|
||||
|
||||
enum {
|
||||
IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0,
|
||||
LhsIsRowMajor = _ActualLhs::Flags&RowMajorBit ? 1 : 0,
|
||||
RhsIsRowMajor = _ActualRhs::Flags&RowMajorBit ? 1 : 0
|
||||
RhsIsRowMajor = _ActualRhs::Flags&RowMajorBit ? 1 : 0,
|
||||
SkipDiag = (UpLo&(UnitDiag|ZeroDiag))!=0
|
||||
};
|
||||
|
||||
Index size = mat.cols();
|
||||
if(SkipDiag)
|
||||
size--;
|
||||
Index depth = actualLhs.cols();
|
||||
|
||||
typedef internal::gemm_blocking_space<IsRowMajor ? RowMajor : ColMajor,typename Lhs::Scalar,typename Rhs::Scalar,
|
||||
@@ -277,20 +286,22 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false>
|
||||
internal::general_matrix_matrix_triangular_product<Index,
|
||||
typename Lhs::Scalar, LhsIsRowMajor ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
|
||||
typename Rhs::Scalar, RhsIsRowMajor ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
|
||||
IsRowMajor ? RowMajor : ColMajor, UpLo>
|
||||
IsRowMajor ? RowMajor : ColMajor, UpLo&(Lower|Upper)>
|
||||
::run(size, depth,
|
||||
&actualLhs.coeffRef(0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,0), actualRhs.outerStride(),
|
||||
mat.data(), mat.outerStride(), actualAlpha, blocking);
|
||||
&actualLhs.coeffRef(SkipDiag&&(UpLo&Lower)==Lower ? 1 : 0,0), actualLhs.outerStride(),
|
||||
&actualRhs.coeffRef(0,SkipDiag&&(UpLo&Upper)==Upper ? 1 : 0), actualRhs.outerStride(),
|
||||
mat.data() + (SkipDiag ? (bool(IsRowMajor) != ((UpLo&Lower)==Lower) ? 1 : mat.outerStride() ) : 0), mat.outerStride(), actualAlpha, blocking);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename MatrixType, unsigned int UpLo>
|
||||
template<typename ProductType>
|
||||
TriangularView<MatrixType,UpLo>& TriangularViewImpl<MatrixType,UpLo,Dense>::_assignProduct(const ProductType& prod, const Scalar& alpha)
|
||||
TriangularView<MatrixType,UpLo>& TriangularViewImpl<MatrixType,UpLo,Dense>::_assignProduct(const ProductType& prod, const Scalar& alpha, bool beta)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((UpLo&UnitDiag)==0, WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED);
|
||||
eigen_assert(derived().nestedExpression().rows() == prod.rows() && derived().cols() == prod.cols());
|
||||
|
||||
general_product_to_triangular_selector<MatrixType, ProductType, UpLo, internal::traits<ProductType>::InnerSize==1>::run(derived().nestedExpression().const_cast_derived(), prod, alpha);
|
||||
general_product_to_triangular_selector<MatrixType, ProductType, UpLo, internal::traits<ProductType>::InnerSize==1>::run(derived().nestedExpression().const_cast_derived(), prod, alpha, beta);
|
||||
|
||||
return derived();
|
||||
}
|
||||
|
||||
@@ -33,7 +33,7 @@
|
||||
#ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_BLAS_H
|
||||
#define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_BLAS_H
|
||||
|
||||
namespace Eigen {
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
@@ -52,7 +52,7 @@ struct general_matrix_matrix_triangular_product<Index,Scalar,LhsStorageOrder,Con
|
||||
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const Scalar* lhs, Index lhsStride, \
|
||||
const Scalar* rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha, level3_blocking<Scalar, Scalar>& blocking) \
|
||||
{ \
|
||||
if (lhs==rhs) { \
|
||||
if ( lhs==rhs && ((UpLo&(Lower|Upper)==UpLo)) ) { \
|
||||
general_matrix_matrix_rankupdate<Index,Scalar,LhsStorageOrder,ConjugateLhs,ColMajor,UpLo> \
|
||||
::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha,blocking); \
|
||||
} else { \
|
||||
@@ -86,8 +86,8 @@ struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,C
|
||||
/* typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs;*/ \
|
||||
\
|
||||
BlasIndex lda=convert_index<BlasIndex>(lhsStride), ldc=convert_index<BlasIndex>(resStride), n=convert_index<BlasIndex>(size), k=convert_index<BlasIndex>(depth); \
|
||||
char uplo=(IsLower) ? 'L' : 'U', trans=(AStorageOrder==RowMajor) ? 'T':'N'; \
|
||||
EIGTYPE beta; \
|
||||
char uplo=((IsLower) ? 'L' : 'U'), trans=((AStorageOrder==RowMajor) ? 'T':'N'); \
|
||||
EIGTYPE beta(1); \
|
||||
BLASFUNC(&uplo, &trans, &n, &k, &numext::real_ref(alpha), lhs, &lda, &numext::real_ref(beta), res, &ldc); \
|
||||
} \
|
||||
};
|
||||
@@ -107,7 +107,7 @@ struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,C
|
||||
typedef Matrix<EIGTYPE, Dynamic, Dynamic, AStorageOrder> MatrixType; \
|
||||
\
|
||||
BlasIndex lda=convert_index<BlasIndex>(lhsStride), ldc=convert_index<BlasIndex>(resStride), n=convert_index<BlasIndex>(size), k=convert_index<BlasIndex>(depth); \
|
||||
char uplo=(IsLower) ? 'L' : 'U', trans=(AStorageOrder==RowMajor) ? 'C':'N'; \
|
||||
char uplo=((IsLower) ? 'L' : 'U'), trans=((AStorageOrder==RowMajor) ? 'C':'N'); \
|
||||
RTYPE alpha_, beta_; \
|
||||
const EIGTYPE* a_ptr; \
|
||||
\
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
#ifndef EIGEN_PARALLELIZER_H
|
||||
#define EIGEN_PARALLELIZER_H
|
||||
|
||||
namespace Eigen {
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
@@ -75,7 +75,7 @@ template<typename Index> struct GemmParallelInfo
|
||||
{
|
||||
GemmParallelInfo() : sync(-1), users(0), lhs_start(0), lhs_length(0) {}
|
||||
|
||||
int volatile sync;
|
||||
Index volatile sync;
|
||||
int volatile users;
|
||||
|
||||
Index lhs_start;
|
||||
@@ -83,7 +83,7 @@ template<typename Index> struct GemmParallelInfo
|
||||
};
|
||||
|
||||
template<bool Condition, typename Functor, typename Index>
|
||||
void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpose)
|
||||
void parallelize_gemm(const Functor& func, Index rows, Index cols, Index depth, bool transpose)
|
||||
{
|
||||
// TODO when EIGEN_USE_BLAS is defined,
|
||||
// we should still enable OMP for other scalar types
|
||||
@@ -92,6 +92,7 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos
|
||||
// the matrix product when multithreading is enabled. This is a temporary
|
||||
// fix to support row-major destination matrices. This whole
|
||||
// parallelizer mechanism has to be redisigned anyway.
|
||||
EIGEN_UNUSED_VARIABLE(depth);
|
||||
EIGEN_UNUSED_VARIABLE(transpose);
|
||||
func(0,rows, 0,cols);
|
||||
#else
|
||||
@@ -103,9 +104,16 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos
|
||||
// - the sizes are large enough
|
||||
|
||||
// compute the maximal number of threads from the size of the product:
|
||||
// FIXME this has to be fine tuned
|
||||
// This first heuristic takes into account that the product kernel is fully optimized when working with nr columns at once.
|
||||
Index size = transpose ? rows : cols;
|
||||
Index pb_max_threads = std::max<Index>(1,size / 32);
|
||||
Index pb_max_threads = std::max<Index>(1,size / Functor::Traits::nr);
|
||||
|
||||
// compute the maximal number of threads from the total amount of work:
|
||||
double work = static_cast<double>(rows) * static_cast<double>(cols) *
|
||||
static_cast<double>(depth);
|
||||
double kMinTaskSize = 50000; // FIXME improve this heuristic.
|
||||
pb_max_threads = std::max<Index>(1, std::min<Index>(pb_max_threads, work / kMinTaskSize));
|
||||
|
||||
// compute the number of threads we are going to use
|
||||
Index threads = std::min<Index>(nbThreads(), pb_max_threads);
|
||||
|
||||
@@ -120,19 +128,19 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos
|
||||
|
||||
if(transpose)
|
||||
std::swap(rows,cols);
|
||||
|
||||
|
||||
ei_declare_aligned_stack_constructed_variable(GemmParallelInfo<Index>,info,threads,0);
|
||||
|
||||
|
||||
#pragma omp parallel num_threads(threads)
|
||||
{
|
||||
Index i = omp_get_thread_num();
|
||||
// Note that the actual number of threads might be lower than the number of request ones.
|
||||
Index actual_threads = omp_get_num_threads();
|
||||
|
||||
|
||||
Index blockCols = (cols / actual_threads) & ~Index(0x3);
|
||||
Index blockRows = (rows / actual_threads);
|
||||
blockRows = (blockRows/Functor::Traits::mr)*Functor::Traits::mr;
|
||||
|
||||
|
||||
Index r0 = i*blockRows;
|
||||
Index actualBlockRows = (i+1==actual_threads) ? rows-r0 : blockRows;
|
||||
|
||||
|
||||
@@ -83,10 +83,10 @@ EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrd
|
||||
Scalar t3(0);
|
||||
Packet ptmp3 = pset1<Packet>(t3);
|
||||
|
||||
size_t starti = FirstTriangular ? 0 : j+2;
|
||||
size_t endi = FirstTriangular ? j : size;
|
||||
size_t alignedStart = (starti) + internal::first_default_aligned(&res[starti], endi-starti);
|
||||
size_t alignedEnd = alignedStart + ((endi-alignedStart)/(PacketSize))*(PacketSize);
|
||||
Index starti = FirstTriangular ? 0 : j+2;
|
||||
Index endi = FirstTriangular ? j : size;
|
||||
Index alignedStart = (starti) + internal::first_default_aligned(&res[starti], endi-starti);
|
||||
Index alignedEnd = alignedStart + ((endi-alignedStart)/(PacketSize))*(PacketSize);
|
||||
|
||||
res[j] += cjd.pmul(numext::real(A0[j]), t0);
|
||||
res[j+1] += cjd.pmul(numext::real(A1[j+1]), t1);
|
||||
@@ -101,7 +101,7 @@ EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrd
|
||||
t2 += cj1.pmul(A0[j+1], rhs[j+1]);
|
||||
}
|
||||
|
||||
for (size_t i=starti; i<alignedStart; ++i)
|
||||
for (Index i=starti; i<alignedStart; ++i)
|
||||
{
|
||||
res[i] += cj0.pmul(A0[i], t0) + cj0.pmul(A1[i],t1);
|
||||
t2 += cj1.pmul(A0[i], rhs[i]);
|
||||
@@ -113,7 +113,7 @@ EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrd
|
||||
const Scalar* EIGEN_RESTRICT a1It = A1 + alignedStart;
|
||||
const Scalar* EIGEN_RESTRICT rhsIt = rhs + alignedStart;
|
||||
Scalar* EIGEN_RESTRICT resIt = res + alignedStart;
|
||||
for (size_t i=alignedStart; i<alignedEnd; i+=PacketSize)
|
||||
for (Index i=alignedStart; i<alignedEnd; i+=PacketSize)
|
||||
{
|
||||
Packet A0i = ploadu<Packet>(a0It); a0It += PacketSize;
|
||||
Packet A1i = ploadu<Packet>(a1It); a1It += PacketSize;
|
||||
@@ -125,7 +125,7 @@ EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrd
|
||||
ptmp3 = pcj1.pmadd(A1i, Bi, ptmp3);
|
||||
pstore(resIt,Xi); resIt += PacketSize;
|
||||
}
|
||||
for (size_t i=alignedEnd; i<endi; i++)
|
||||
for (Index i=alignedEnd; i<endi; i++)
|
||||
{
|
||||
res[i] += cj0.pmul(A0[i], t0) + cj0.pmul(A1[i],t1);
|
||||
t2 += cj1.pmul(A0[i], rhs[i]);
|
||||
|
||||
@@ -137,7 +137,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
|
||||
|
||||
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer;
|
||||
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer((internal::constructor_without_unaligned_array_assert()));
|
||||
triangularBuffer.setZero();
|
||||
if((Mode&ZeroDiag)==ZeroDiag)
|
||||
triangularBuffer.diagonal().setZero();
|
||||
@@ -284,7 +284,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
|
||||
|
||||
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer;
|
||||
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer((internal::constructor_without_unaligned_array_assert()));
|
||||
triangularBuffer.setZero();
|
||||
if((Mode&ZeroDiag)==ZeroDiag)
|
||||
triangularBuffer.diagonal().setZero();
|
||||
|
||||
@@ -183,7 +183,7 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conju
|
||||
}
|
||||
}
|
||||
|
||||
/* Optimized triangular solver with multiple left hand sides and the trinagular matrix on the right
|
||||
/* Optimized triangular solver with multiple left hand sides and the triangular matrix on the right
|
||||
*/
|
||||
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder>
|
||||
struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor>
|
||||
@@ -202,6 +202,7 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conj
|
||||
level3_blocking<Scalar,Scalar>& blocking)
|
||||
{
|
||||
Index rows = otherSize;
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
|
||||
typedef blas_data_mapper<Scalar, Index, ColMajor> LhsMapper;
|
||||
typedef const_blas_data_mapper<Scalar, Index, TriStorageOrder> RhsMapper;
|
||||
@@ -306,9 +307,9 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conj
|
||||
}
|
||||
if((Mode & UnitDiag)==0)
|
||||
{
|
||||
Scalar b = conj(rhs(j,j));
|
||||
Scalar inv_rjj = RealScalar(1)/conj(rhs(j,j));
|
||||
for (Index i=0; i<actual_mc; ++i)
|
||||
r[i] /= b;
|
||||
r[i] *= inv_rjj;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -14,12 +14,13 @@
|
||||
// 4512 - assignment operator could not be generated
|
||||
// 4522 - 'class' : multiple assignment operators specified
|
||||
// 4700 - uninitialized local variable 'xyz' used
|
||||
// 4714 - function marked as __forceinline not inlined
|
||||
// 4717 - 'function' : recursive on all control paths, function will cause runtime stack overflow
|
||||
// 4800 - 'type' : forcing value to bool 'true' or 'false' (performance warning)
|
||||
#ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
|
||||
#pragma warning( push )
|
||||
#endif
|
||||
#pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4503 4512 4522 4700 4717 4800)
|
||||
#pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4503 4512 4522 4700 4714 4717 4800)
|
||||
|
||||
#elif defined __INTEL_COMPILER
|
||||
// 2196 - routine is both "inline" and "noinline" ("noinline" assumed)
|
||||
@@ -67,6 +68,8 @@
|
||||
#pragma diag_suppress 2669
|
||||
#pragma diag_suppress 2670
|
||||
#pragma diag_suppress 2671
|
||||
#pragma diag_suppress 2735
|
||||
#pragma diag_suppress 2737
|
||||
#endif
|
||||
|
||||
#endif // not EIGEN_WARNINGS_DISABLED
|
||||
|
||||
@@ -12,8 +12,8 @@
|
||||
#define EIGEN_MACROS_H
|
||||
|
||||
#define EIGEN_WORLD_VERSION 3
|
||||
#define EIGEN_MAJOR_VERSION 2
|
||||
#define EIGEN_MINOR_VERSION 94
|
||||
#define EIGEN_MAJOR_VERSION 3
|
||||
#define EIGEN_MINOR_VERSION 4
|
||||
|
||||
#define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
|
||||
(EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
|
||||
@@ -80,8 +80,8 @@
|
||||
// 2015 14 1900
|
||||
// "15" 15 1900
|
||||
|
||||
/// \internal EIGEN_COMP_MSVC_STRICT set to 1 if the compiler is really Microsoft Visual C++ and not ,e.g., ICC
|
||||
#if EIGEN_COMP_MSVC && !(EIGEN_COMP_ICC)
|
||||
/// \internal EIGEN_COMP_MSVC_STRICT set to 1 if the compiler is really Microsoft Visual C++ and not ,e.g., ICC or clang-cl
|
||||
#if EIGEN_COMP_MSVC && !(EIGEN_COMP_ICC || EIGEN_COMP_LLVM || EIGEN_COMP_CLANG)
|
||||
#define EIGEN_COMP_MSVC_STRICT _MSC_VER
|
||||
#else
|
||||
#define EIGEN_COMP_MSVC_STRICT 0
|
||||
@@ -356,6 +356,13 @@
|
||||
#define EIGEN_MAX_CPP_VER 99
|
||||
#endif
|
||||
|
||||
#if EIGEN_MAX_CPP_VER>=11 && (defined(__cplusplus) && (__cplusplus >= 201103L) || EIGEN_COMP_MSVC >= 1900)
|
||||
#define EIGEN_HAS_CXX11 1
|
||||
#else
|
||||
#define EIGEN_HAS_CXX11 0
|
||||
#endif
|
||||
|
||||
|
||||
// Do we support r-value references?
|
||||
#ifndef EIGEN_HAS_RVALUE_REFERENCES
|
||||
#if EIGEN_MAX_CPP_VER>=11 && \
|
||||
@@ -392,8 +399,8 @@
|
||||
// Does the compiler support variadic templates?
|
||||
#ifndef EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
#if EIGEN_MAX_CPP_VER>=11 && (__cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900) \
|
||||
&& ( !defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 )
|
||||
// ^^ Disable the use of variadic templates when compiling with nvcc on ARM devices:
|
||||
&& ( !defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000) )
|
||||
// ^^ Disable the use of variadic templates when compiling with versions of nvcc older than 8.0 on ARM devices:
|
||||
// this prevents nvcc from crashing when compiling Eigen on Tegra X1
|
||||
#define EIGEN_HAS_VARIADIC_TEMPLATES 1
|
||||
#else
|
||||
@@ -490,10 +497,11 @@
|
||||
// attribute to maximize inlining. This should only be used when really necessary: in particular,
|
||||
// it uses __attribute__((always_inline)) on GCC, which most of the time is useless and can severely harm compile times.
|
||||
// FIXME with the always_inline attribute,
|
||||
// gcc 3.4.x reports the following compilation error:
|
||||
// gcc 3.4.x and 4.1 reports the following compilation error:
|
||||
// Eval.h:91: sorry, unimplemented: inlining failed in call to 'const Eigen::Eval<Derived> Eigen::MatrixBase<Scalar, Derived>::eval() const'
|
||||
// : function body not available
|
||||
#if EIGEN_GNUC_AT_LEAST(4,0)
|
||||
// See also bug 1367
|
||||
#if EIGEN_GNUC_AT_LEAST(4,2)
|
||||
#define EIGEN_ALWAYS_INLINE __attribute__((always_inline)) inline
|
||||
#else
|
||||
#define EIGEN_ALWAYS_INLINE EIGEN_STRONG_INLINE
|
||||
@@ -652,6 +660,9 @@ namespace Eigen {
|
||||
// If the user explicitly disable vectorization, then we also disable alignment
|
||||
#if defined(EIGEN_DONT_VECTORIZE)
|
||||
#define EIGEN_IDEAL_MAX_ALIGN_BYTES 0
|
||||
#elif defined(EIGEN_VECTORIZE_AVX512)
|
||||
// 64 bytes static alignmeent is preferred only if really required
|
||||
#define EIGEN_IDEAL_MAX_ALIGN_BYTES 64
|
||||
#elif defined(__AVX__)
|
||||
// 32 bytes static alignmeent is preferred only if really required
|
||||
#define EIGEN_IDEAL_MAX_ALIGN_BYTES 32
|
||||
@@ -801,7 +812,7 @@ namespace Eigen {
|
||||
// just an empty macro !
|
||||
#define EIGEN_EMPTY
|
||||
|
||||
#if EIGEN_COMP_MSVC_STRICT && EIGEN_COMP_MSVC < 1900 // for older MSVC versions using the base operator is sufficient (cf Bug 1000)
|
||||
#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || defined(__CUDACC_VER__)) // for older MSVC versions, as well as 1900 && CUDA 8, using the base operator is sufficient (cf Bugs 1000, 1324)
|
||||
#define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
|
||||
using Base::operator =;
|
||||
#elif EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653)
|
||||
|
||||
@@ -150,7 +150,7 @@ EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
|
||||
/** \internal Allocates \a size bytes. The returned pointer is guaranteed to have 16 or 32 bytes alignment depending on the requirements.
|
||||
* On allocation error, the returned pointer is null, and std::bad_alloc is thrown.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC inline void* aligned_malloc(size_t size)
|
||||
EIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size)
|
||||
{
|
||||
check_that_malloc_is_allowed();
|
||||
|
||||
@@ -185,7 +185,7 @@ EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr)
|
||||
* \brief Reallocates an aligned block of memory.
|
||||
* \throws std::bad_alloc on allocation failure
|
||||
*/
|
||||
inline void* aligned_realloc(void *ptr, size_t new_size, size_t old_size)
|
||||
inline void* aligned_realloc(void *ptr, std::size_t new_size, std::size_t old_size)
|
||||
{
|
||||
EIGEN_UNUSED_VARIABLE(old_size);
|
||||
|
||||
@@ -209,12 +209,12 @@ inline void* aligned_realloc(void *ptr, size_t new_size, size_t old_size)
|
||||
/** \internal Allocates \a size bytes. If Align is true, then the returned ptr is 16-byte-aligned.
|
||||
* On allocation error, the returned pointer is null, and a std::bad_alloc is thrown.
|
||||
*/
|
||||
template<bool Align> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc(size_t size)
|
||||
template<bool Align> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc(std::size_t size)
|
||||
{
|
||||
return aligned_malloc(size);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc<false>(size_t size)
|
||||
template<> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc<false>(std::size_t size)
|
||||
{
|
||||
check_that_malloc_is_allowed();
|
||||
|
||||
@@ -235,12 +235,12 @@ template<> EIGEN_DEVICE_FUNC inline void conditional_aligned_free<false>(void *p
|
||||
std::free(ptr);
|
||||
}
|
||||
|
||||
template<bool Align> inline void* conditional_aligned_realloc(void* ptr, size_t new_size, size_t old_size)
|
||||
template<bool Align> inline void* conditional_aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size)
|
||||
{
|
||||
return aligned_realloc(ptr, new_size, old_size);
|
||||
}
|
||||
|
||||
template<> inline void* conditional_aligned_realloc<false>(void* ptr, size_t new_size, size_t)
|
||||
template<> inline void* conditional_aligned_realloc<false>(void* ptr, std::size_t new_size, std::size_t)
|
||||
{
|
||||
return std::realloc(ptr, new_size);
|
||||
}
|
||||
@@ -252,7 +252,7 @@ template<> inline void* conditional_aligned_realloc<false>(void* ptr, size_t new
|
||||
/** \internal Destructs the elements of an array.
|
||||
* The \a size parameters tells on how many objects to call the destructor of T.
|
||||
*/
|
||||
template<typename T> EIGEN_DEVICE_FUNC inline void destruct_elements_of_array(T *ptr, size_t size)
|
||||
template<typename T> EIGEN_DEVICE_FUNC inline void destruct_elements_of_array(T *ptr, std::size_t size)
|
||||
{
|
||||
// always destruct an array starting from the end.
|
||||
if(ptr)
|
||||
@@ -262,9 +262,9 @@ template<typename T> EIGEN_DEVICE_FUNC inline void destruct_elements_of_array(T
|
||||
/** \internal Constructs the elements of an array.
|
||||
* The \a size parameter tells on how many objects to call the constructor of T.
|
||||
*/
|
||||
template<typename T> EIGEN_DEVICE_FUNC inline T* construct_elements_of_array(T *ptr, size_t size)
|
||||
template<typename T> EIGEN_DEVICE_FUNC inline T* construct_elements_of_array(T *ptr, std::size_t size)
|
||||
{
|
||||
size_t i;
|
||||
std::size_t i;
|
||||
EIGEN_TRY
|
||||
{
|
||||
for (i = 0; i < size; ++i) ::new (ptr + i) T;
|
||||
@@ -283,9 +283,9 @@ template<typename T> EIGEN_DEVICE_FUNC inline T* construct_elements_of_array(T *
|
||||
*****************************************************************************/
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void check_size_for_overflow(size_t size)
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void check_size_for_overflow(std::size_t size)
|
||||
{
|
||||
if(size > size_t(-1) / sizeof(T))
|
||||
if(size > std::size_t(-1) / sizeof(T))
|
||||
throw_std_bad_alloc();
|
||||
}
|
||||
|
||||
@@ -293,7 +293,7 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void check_size_for_overflow(size_t size)
|
||||
* On allocation error, the returned pointer is undefined, but a std::bad_alloc is thrown.
|
||||
* The default constructor of T is called.
|
||||
*/
|
||||
template<typename T> EIGEN_DEVICE_FUNC inline T* aligned_new(size_t size)
|
||||
template<typename T> EIGEN_DEVICE_FUNC inline T* aligned_new(std::size_t size)
|
||||
{
|
||||
check_size_for_overflow<T>(size);
|
||||
T *result = reinterpret_cast<T*>(aligned_malloc(sizeof(T)*size));
|
||||
@@ -309,7 +309,7 @@ template<typename T> EIGEN_DEVICE_FUNC inline T* aligned_new(size_t size)
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_new(size_t size)
|
||||
template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_new(std::size_t size)
|
||||
{
|
||||
check_size_for_overflow<T>(size);
|
||||
T *result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size));
|
||||
@@ -328,7 +328,7 @@ template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned
|
||||
/** \internal Deletes objects constructed with aligned_new
|
||||
* The \a size parameters tells on how many objects to call the destructor of T.
|
||||
*/
|
||||
template<typename T> EIGEN_DEVICE_FUNC inline void aligned_delete(T *ptr, size_t size)
|
||||
template<typename T> EIGEN_DEVICE_FUNC inline void aligned_delete(T *ptr, std::size_t size)
|
||||
{
|
||||
destruct_elements_of_array<T>(ptr, size);
|
||||
aligned_free(ptr);
|
||||
@@ -337,13 +337,13 @@ template<typename T> EIGEN_DEVICE_FUNC inline void aligned_delete(T *ptr, size_t
|
||||
/** \internal Deletes objects constructed with conditional_aligned_new
|
||||
* The \a size parameters tells on how many objects to call the destructor of T.
|
||||
*/
|
||||
template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_delete(T *ptr, size_t size)
|
||||
template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_delete(T *ptr, std::size_t size)
|
||||
{
|
||||
destruct_elements_of_array<T>(ptr, size);
|
||||
conditional_aligned_free<Align>(ptr);
|
||||
}
|
||||
|
||||
template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_realloc_new(T* pts, size_t new_size, size_t old_size)
|
||||
template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_realloc_new(T* pts, std::size_t new_size, std::size_t old_size)
|
||||
{
|
||||
check_size_for_overflow<T>(new_size);
|
||||
check_size_for_overflow<T>(old_size);
|
||||
@@ -366,7 +366,7 @@ template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned
|
||||
}
|
||||
|
||||
|
||||
template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_new_auto(size_t size)
|
||||
template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_new_auto(std::size_t size)
|
||||
{
|
||||
if(size==0)
|
||||
return 0; // short-cut. Also fixes Bug 884
|
||||
@@ -387,7 +387,7 @@ template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename T, bool Align> inline T* conditional_aligned_realloc_new_auto(T* pts, size_t new_size, size_t old_size)
|
||||
template<typename T, bool Align> inline T* conditional_aligned_realloc_new_auto(T* pts, std::size_t new_size, std::size_t old_size)
|
||||
{
|
||||
check_size_for_overflow<T>(new_size);
|
||||
check_size_for_overflow<T>(old_size);
|
||||
@@ -409,7 +409,7 @@ template<typename T, bool Align> inline T* conditional_aligned_realloc_new_auto(
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_delete_auto(T *ptr, size_t size)
|
||||
template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_delete_auto(T *ptr, std::size_t size)
|
||||
{
|
||||
if(NumTraits<T>::RequireInitialization)
|
||||
destruct_elements_of_array<T>(ptr, size);
|
||||
@@ -523,7 +523,7 @@ template<typename T> struct smart_memmove_helper<T,true> {
|
||||
template<typename T> struct smart_memmove_helper<T,false> {
|
||||
static inline void run(const T* start, const T* end, T* target)
|
||||
{
|
||||
if (uintptr_t(target) < uintptr_t(start))
|
||||
if (UIntPtr(target) < UIntPtr(start))
|
||||
{
|
||||
std::copy(start, end, target);
|
||||
}
|
||||
@@ -561,7 +561,7 @@ template<typename T> class aligned_stack_memory_handler : noncopyable
|
||||
* In this case, the buffer elements will also be destructed when this handler will be destructed.
|
||||
* Finally, if \a dealloc is true, then the pointer \a ptr is freed.
|
||||
**/
|
||||
aligned_stack_memory_handler(T* ptr, size_t size, bool dealloc)
|
||||
aligned_stack_memory_handler(T* ptr, std::size_t size, bool dealloc)
|
||||
: m_ptr(ptr), m_size(size), m_deallocate(dealloc)
|
||||
{
|
||||
if(NumTraits<T>::RequireInitialization && m_ptr)
|
||||
@@ -576,7 +576,7 @@ template<typename T> class aligned_stack_memory_handler : noncopyable
|
||||
}
|
||||
protected:
|
||||
T* m_ptr;
|
||||
size_t m_size;
|
||||
std::size_t m_size;
|
||||
bool m_deallocate;
|
||||
};
|
||||
|
||||
@@ -655,15 +655,15 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
|
||||
|
||||
#if EIGEN_MAX_ALIGN_BYTES!=0
|
||||
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
|
||||
void* operator new(size_t size, const std::nothrow_t&) EIGEN_NO_THROW { \
|
||||
void* operator new(std::size_t size, const std::nothrow_t&) EIGEN_NO_THROW { \
|
||||
EIGEN_TRY { return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); } \
|
||||
EIGEN_CATCH (...) { return 0; } \
|
||||
}
|
||||
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \
|
||||
void *operator new(size_t size) { \
|
||||
void *operator new(std::size_t size) { \
|
||||
return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
|
||||
} \
|
||||
void *operator new[](size_t size) { \
|
||||
void *operator new[](std::size_t size) { \
|
||||
return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
|
||||
} \
|
||||
void operator delete(void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
|
||||
@@ -673,8 +673,8 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
|
||||
/* in-place new and delete. since (at least afaik) there is no actual */ \
|
||||
/* memory allocated we can safely let the default implementation handle */ \
|
||||
/* this particular case. */ \
|
||||
static void *operator new(size_t size, void *ptr) { return ::operator new(size,ptr); } \
|
||||
static void *operator new[](size_t size, void* ptr) { return ::operator new[](size,ptr); } \
|
||||
static void *operator new(std::size_t size, void *ptr) { return ::operator new(size,ptr); } \
|
||||
static void *operator new[](std::size_t size, void* ptr) { return ::operator new[](size,ptr); } \
|
||||
void operator delete(void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete(memory,ptr); } \
|
||||
void operator delete[](void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete[](memory,ptr); } \
|
||||
/* nothrow-new (returns zero instead of std::bad_alloc) */ \
|
||||
@@ -713,7 +713,7 @@ template<class T>
|
||||
class aligned_allocator : public std::allocator<T>
|
||||
{
|
||||
public:
|
||||
typedef size_t size_type;
|
||||
typedef std::size_t size_type;
|
||||
typedef std::ptrdiff_t difference_type;
|
||||
typedef T* pointer;
|
||||
typedef const T* const_pointer;
|
||||
|
||||
@@ -381,12 +381,12 @@ struct has_ReturnType
|
||||
enum { value = sizeof(testFunctor<T>(0)) == sizeof(meta_yes) };
|
||||
};
|
||||
|
||||
template<typename T> const T& return_ref();
|
||||
template<typename T> const T* return_ptr();
|
||||
|
||||
template <typename T, typename IndexType=Index>
|
||||
struct has_nullary_operator
|
||||
{
|
||||
template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ref<C>().operator()())>0)>::type * = 0);
|
||||
template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ptr<C>()->operator()())>0)>::type * = 0);
|
||||
static meta_no testFunctor(...);
|
||||
|
||||
enum { value = sizeof(testFunctor(static_cast<T*>(0))) == sizeof(meta_yes) };
|
||||
@@ -395,7 +395,7 @@ struct has_nullary_operator
|
||||
template <typename T, typename IndexType=Index>
|
||||
struct has_unary_operator
|
||||
{
|
||||
template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ref<C>().operator()(IndexType(0)))>0)>::type * = 0);
|
||||
template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ptr<C>()->operator()(IndexType(0)))>0)>::type * = 0);
|
||||
static meta_no testFunctor(...);
|
||||
|
||||
enum { value = sizeof(testFunctor(static_cast<T*>(0))) == sizeof(meta_yes) };
|
||||
@@ -404,7 +404,7 @@ struct has_unary_operator
|
||||
template <typename T, typename IndexType=Index>
|
||||
struct has_binary_operator
|
||||
{
|
||||
template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ref<C>().operator()(IndexType(0),IndexType(0)))>0)>::type * = 0);
|
||||
template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ptr<C>()->operator()(IndexType(0),IndexType(0)))>0)>::type * = 0);
|
||||
static meta_no testFunctor(...);
|
||||
|
||||
enum { value = sizeof(testFunctor(static_cast<T*>(0))) == sizeof(meta_yes) };
|
||||
|
||||
@@ -100,7 +100,8 @@
|
||||
MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY,
|
||||
THIS_TYPE_IS_NOT_SUPPORTED,
|
||||
STORAGE_KIND_MUST_MATCH,
|
||||
STORAGE_INDEX_MUST_MATCH
|
||||
STORAGE_INDEX_MUST_MATCH,
|
||||
CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
@@ -191,7 +191,7 @@ struct find_best_packet
|
||||
#if EIGEN_MAX_STATIC_ALIGN_BYTES>0
|
||||
template<int ArrayBytes, int AlignmentBytes,
|
||||
bool Match = bool((ArrayBytes%AlignmentBytes)==0),
|
||||
bool TryHalf = bool(AlignmentBytes>EIGEN_MIN_ALIGN_BYTES) >
|
||||
bool TryHalf = bool(EIGEN_MIN_ALIGN_BYTES<AlignmentBytes) >
|
||||
struct compute_default_alignment_helper
|
||||
{
|
||||
enum { value = 0 };
|
||||
@@ -445,15 +445,11 @@ template<typename T, int n, typename PlainObject = typename plain_object_eval<T>
|
||||
// Another solution could be to count the number of temps?
|
||||
NAsInteger = n == Dynamic ? HugeCost : n,
|
||||
CostEval = (NAsInteger+1) * ScalarReadCost + CoeffReadCost,
|
||||
CostNoEval = NAsInteger * CoeffReadCost
|
||||
CostNoEval = NAsInteger * CoeffReadCost,
|
||||
Evaluate = (int(evaluator<T>::Flags) & EvalBeforeNestingBit) || (int(CostEval) < int(CostNoEval))
|
||||
};
|
||||
|
||||
typedef typename conditional<
|
||||
( (int(evaluator<T>::Flags) & EvalBeforeNestingBit) ||
|
||||
(int(CostEval) < int(CostNoEval)) ),
|
||||
PlainObject,
|
||||
typename ref_selector<T>::type
|
||||
>::type type;
|
||||
typedef typename conditional<Evaluate, PlainObject, typename ref_selector<T>::type>::type type;
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
@@ -536,6 +532,15 @@ template <typename B, typename Functor> struct cwise_promote_s
|
||||
template <typename Functor> struct cwise_promote_storage_type<Sparse,Dense,Functor> { typedef Sparse ret; };
|
||||
template <typename Functor> struct cwise_promote_storage_type<Dense,Sparse,Functor> { typedef Sparse ret; };
|
||||
|
||||
template <typename LhsKind, typename RhsKind, int LhsOrder, int RhsOrder> struct cwise_promote_storage_order {
|
||||
enum { value = LhsOrder };
|
||||
};
|
||||
|
||||
template <typename LhsKind, int LhsOrder, int RhsOrder> struct cwise_promote_storage_order<LhsKind,Sparse,LhsOrder,RhsOrder> { enum { value = RhsOrder }; };
|
||||
template <typename RhsKind, int LhsOrder, int RhsOrder> struct cwise_promote_storage_order<Sparse,RhsKind,LhsOrder,RhsOrder> { enum { value = LhsOrder }; };
|
||||
template <int Order> struct cwise_promote_storage_order<Sparse,Sparse,Order,Order> { enum { value = Order }; };
|
||||
|
||||
|
||||
/** \internal Specify the "storage kind" of multiplying an expression of kind A with kind B.
|
||||
* The template parameter ProductTag permits to specialize the resulting storage kind wrt to
|
||||
* some compile-time properties of the product: GemmProduct, GemvProduct, OuterProduct, InnerProduct.
|
||||
@@ -633,7 +638,7 @@ struct plain_constant_type
|
||||
template<typename ExpressionType>
|
||||
struct is_lvalue
|
||||
{
|
||||
enum { value = !bool(is_const<ExpressionType>::value) &&
|
||||
enum { value = (!bool(is_const<ExpressionType>::value)) &&
|
||||
bool(traits<ExpressionType>::Flags & LvalueBit) };
|
||||
};
|
||||
|
||||
|
||||
@@ -250,7 +250,7 @@ template<typename _MatrixType> class ComplexEigenSolver
|
||||
EigenvectorType m_matX;
|
||||
|
||||
private:
|
||||
void doComputeEigenvectors(const RealScalar& matrixnorm);
|
||||
void doComputeEigenvectors(RealScalar matrixnorm);
|
||||
void sortEigenvalues(bool computeEigenvectors);
|
||||
};
|
||||
|
||||
@@ -284,10 +284,12 @@ ComplexEigenSolver<MatrixType>::compute(const EigenBase<InputType>& matrix, bool
|
||||
|
||||
|
||||
template<typename MatrixType>
|
||||
void ComplexEigenSolver<MatrixType>::doComputeEigenvectors(const RealScalar& matrixnorm)
|
||||
void ComplexEigenSolver<MatrixType>::doComputeEigenvectors(RealScalar matrixnorm)
|
||||
{
|
||||
const Index n = m_eivalues.size();
|
||||
|
||||
matrixnorm = numext::maxi(matrixnorm,(std::numeric_limits<RealScalar>::min)());
|
||||
|
||||
// Compute X such that T = X D X^(-1), where D is the diagonal of T.
|
||||
// The matrix X is unit triangular.
|
||||
m_matX = EigenvectorType::Zero(n, n);
|
||||
|
||||
@@ -248,12 +248,24 @@ template<typename MatrixType>
|
||||
template<typename InputType>
|
||||
RealSchur<MatrixType>& RealSchur<MatrixType>::compute(const EigenBase<InputType>& matrix, bool computeU)
|
||||
{
|
||||
const Scalar considerAsZero = (std::numeric_limits<Scalar>::min)();
|
||||
|
||||
eigen_assert(matrix.cols() == matrix.rows());
|
||||
Index maxIters = m_maxIters;
|
||||
if (maxIters == -1)
|
||||
maxIters = m_maxIterationsPerRow * matrix.rows();
|
||||
|
||||
Scalar scale = matrix.derived().cwiseAbs().maxCoeff();
|
||||
if(scale<considerAsZero)
|
||||
{
|
||||
m_matT.setZero(matrix.rows(),matrix.cols());
|
||||
if(computeU)
|
||||
m_matU.setIdentity(matrix.rows(),matrix.cols());
|
||||
m_info = Success;
|
||||
m_isInitialized = true;
|
||||
m_matUisUptodate = computeU;
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Step 1. Reduce to Hessenberg form
|
||||
m_hess.compute(matrix.derived()/scale);
|
||||
|
||||
@@ -414,7 +414,8 @@ SelfAdjointEigenSolver<MatrixType>& SelfAdjointEigenSolver<MatrixType>
|
||||
|
||||
if(n==1)
|
||||
{
|
||||
m_eivalues.coeffRef(0,0) = numext::real(matrix.diagonal()[0]);
|
||||
m_eivec = matrix;
|
||||
m_eivalues.coeffRef(0,0) = numext::real(m_eivec.coeff(0,0));
|
||||
if(computeEigenvectors)
|
||||
m_eivec.setOnes(n,n);
|
||||
m_info = Success;
|
||||
|
||||
@@ -62,57 +62,57 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
|
||||
|
||||
|
||||
/** Default constructor initializing a null box. */
|
||||
inline AlignedBox()
|
||||
EIGEN_DEVICE_FUNC inline AlignedBox()
|
||||
{ if (AmbientDimAtCompileTime!=Dynamic) setEmpty(); }
|
||||
|
||||
/** Constructs a null box with \a _dim the dimension of the ambient space. */
|
||||
inline explicit AlignedBox(Index _dim) : m_min(_dim), m_max(_dim)
|
||||
EIGEN_DEVICE_FUNC inline explicit AlignedBox(Index _dim) : m_min(_dim), m_max(_dim)
|
||||
{ setEmpty(); }
|
||||
|
||||
/** Constructs a box with extremities \a _min and \a _max.
|
||||
* \warning If either component of \a _min is larger than the same component of \a _max, the constructed box is empty. */
|
||||
template<typename OtherVectorType1, typename OtherVectorType2>
|
||||
inline AlignedBox(const OtherVectorType1& _min, const OtherVectorType2& _max) : m_min(_min), m_max(_max) {}
|
||||
EIGEN_DEVICE_FUNC inline AlignedBox(const OtherVectorType1& _min, const OtherVectorType2& _max) : m_min(_min), m_max(_max) {}
|
||||
|
||||
/** Constructs a box containing a single point \a p. */
|
||||
template<typename Derived>
|
||||
inline explicit AlignedBox(const MatrixBase<Derived>& p) : m_min(p), m_max(m_min)
|
||||
EIGEN_DEVICE_FUNC inline explicit AlignedBox(const MatrixBase<Derived>& p) : m_min(p), m_max(m_min)
|
||||
{ }
|
||||
|
||||
~AlignedBox() {}
|
||||
EIGEN_DEVICE_FUNC ~AlignedBox() {}
|
||||
|
||||
/** \returns the dimension in which the box holds */
|
||||
inline Index dim() const { return AmbientDimAtCompileTime==Dynamic ? m_min.size() : Index(AmbientDimAtCompileTime); }
|
||||
EIGEN_DEVICE_FUNC inline Index dim() const { return AmbientDimAtCompileTime==Dynamic ? m_min.size() : Index(AmbientDimAtCompileTime); }
|
||||
|
||||
/** \deprecated use isEmpty() */
|
||||
inline bool isNull() const { return isEmpty(); }
|
||||
EIGEN_DEVICE_FUNC inline bool isNull() const { return isEmpty(); }
|
||||
|
||||
/** \deprecated use setEmpty() */
|
||||
inline void setNull() { setEmpty(); }
|
||||
EIGEN_DEVICE_FUNC inline void setNull() { setEmpty(); }
|
||||
|
||||
/** \returns true if the box is empty.
|
||||
* \sa setEmpty */
|
||||
inline bool isEmpty() const { return (m_min.array() > m_max.array()).any(); }
|
||||
EIGEN_DEVICE_FUNC inline bool isEmpty() const { return (m_min.array() > m_max.array()).any(); }
|
||||
|
||||
/** Makes \c *this an empty box.
|
||||
* \sa isEmpty */
|
||||
inline void setEmpty()
|
||||
EIGEN_DEVICE_FUNC inline void setEmpty()
|
||||
{
|
||||
m_min.setConstant( ScalarTraits::highest() );
|
||||
m_max.setConstant( ScalarTraits::lowest() );
|
||||
}
|
||||
|
||||
/** \returns the minimal corner */
|
||||
inline const VectorType& (min)() const { return m_min; }
|
||||
EIGEN_DEVICE_FUNC inline const VectorType& (min)() const { return m_min; }
|
||||
/** \returns a non const reference to the minimal corner */
|
||||
inline VectorType& (min)() { return m_min; }
|
||||
EIGEN_DEVICE_FUNC inline VectorType& (min)() { return m_min; }
|
||||
/** \returns the maximal corner */
|
||||
inline const VectorType& (max)() const { return m_max; }
|
||||
EIGEN_DEVICE_FUNC inline const VectorType& (max)() const { return m_max; }
|
||||
/** \returns a non const reference to the maximal corner */
|
||||
inline VectorType& (max)() { return m_max; }
|
||||
EIGEN_DEVICE_FUNC inline VectorType& (max)() { return m_max; }
|
||||
|
||||
/** \returns the center of the box */
|
||||
inline const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(VectorTypeSum, RealScalar, quotient)
|
||||
EIGEN_DEVICE_FUNC inline const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(VectorTypeSum, RealScalar, quotient)
|
||||
center() const
|
||||
{ return (m_min+m_max)/RealScalar(2); }
|
||||
|
||||
@@ -120,18 +120,18 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
|
||||
* Note that this function does not get the same
|
||||
* result for integral or floating scalar types: see
|
||||
*/
|
||||
inline const CwiseBinaryOp< internal::scalar_difference_op<Scalar,Scalar>, const VectorType, const VectorType> sizes() const
|
||||
EIGEN_DEVICE_FUNC inline const CwiseBinaryOp< internal::scalar_difference_op<Scalar,Scalar>, const VectorType, const VectorType> sizes() const
|
||||
{ return m_max - m_min; }
|
||||
|
||||
/** \returns the volume of the bounding box */
|
||||
inline Scalar volume() const
|
||||
EIGEN_DEVICE_FUNC inline Scalar volume() const
|
||||
{ return sizes().prod(); }
|
||||
|
||||
/** \returns an expression for the bounding box diagonal vector
|
||||
* if the length of the diagonal is needed: diagonal().norm()
|
||||
* will provide it.
|
||||
*/
|
||||
inline CwiseBinaryOp< internal::scalar_difference_op<Scalar,Scalar>, const VectorType, const VectorType> diagonal() const
|
||||
EIGEN_DEVICE_FUNC inline CwiseBinaryOp< internal::scalar_difference_op<Scalar,Scalar>, const VectorType, const VectorType> diagonal() const
|
||||
{ return sizes(); }
|
||||
|
||||
/** \returns the vertex of the bounding box at the corner defined by
|
||||
@@ -143,7 +143,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
|
||||
* For 3D bounding boxes, the following names are added:
|
||||
* BottomLeftCeil, BottomRightCeil, TopLeftCeil, TopRightCeil.
|
||||
*/
|
||||
inline VectorType corner(CornerType corner) const
|
||||
EIGEN_DEVICE_FUNC inline VectorType corner(CornerType corner) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(_AmbientDim <= 3, THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE);
|
||||
|
||||
@@ -161,7 +161,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
|
||||
|
||||
/** \returns a random point inside the bounding box sampled with
|
||||
* a uniform distribution */
|
||||
inline VectorType sample() const
|
||||
EIGEN_DEVICE_FUNC inline VectorType sample() const
|
||||
{
|
||||
VectorType r(dim());
|
||||
for(Index d=0; d<dim(); ++d)
|
||||
@@ -179,25 +179,25 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
|
||||
|
||||
/** \returns true if the point \a p is inside the box \c *this. */
|
||||
template<typename Derived>
|
||||
inline bool contains(const MatrixBase<Derived>& p) const
|
||||
EIGEN_DEVICE_FUNC inline bool contains(const MatrixBase<Derived>& p) const
|
||||
{
|
||||
typename internal::nested_eval<Derived,2>::type p_n(p.derived());
|
||||
return (m_min.array()<=p_n.array()).all() && (p_n.array()<=m_max.array()).all();
|
||||
}
|
||||
|
||||
/** \returns true if the box \a b is entirely inside the box \c *this. */
|
||||
inline bool contains(const AlignedBox& b) const
|
||||
EIGEN_DEVICE_FUNC inline bool contains(const AlignedBox& b) const
|
||||
{ return (m_min.array()<=(b.min)().array()).all() && ((b.max)().array()<=m_max.array()).all(); }
|
||||
|
||||
/** \returns true if the box \a b is intersecting the box \c *this.
|
||||
* \sa intersection, clamp */
|
||||
inline bool intersects(const AlignedBox& b) const
|
||||
EIGEN_DEVICE_FUNC inline bool intersects(const AlignedBox& b) const
|
||||
{ return (m_min.array()<=(b.max)().array()).all() && ((b.min)().array()<=m_max.array()).all(); }
|
||||
|
||||
/** Extends \c *this such that it contains the point \a p and returns a reference to \c *this.
|
||||
* \sa extend(const AlignedBox&) */
|
||||
template<typename Derived>
|
||||
inline AlignedBox& extend(const MatrixBase<Derived>& p)
|
||||
EIGEN_DEVICE_FUNC inline AlignedBox& extend(const MatrixBase<Derived>& p)
|
||||
{
|
||||
typename internal::nested_eval<Derived,2>::type p_n(p.derived());
|
||||
m_min = m_min.cwiseMin(p_n);
|
||||
@@ -207,7 +207,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
|
||||
|
||||
/** Extends \c *this such that it contains the box \a b and returns a reference to \c *this.
|
||||
* \sa merged, extend(const MatrixBase&) */
|
||||
inline AlignedBox& extend(const AlignedBox& b)
|
||||
EIGEN_DEVICE_FUNC inline AlignedBox& extend(const AlignedBox& b)
|
||||
{
|
||||
m_min = m_min.cwiseMin(b.m_min);
|
||||
m_max = m_max.cwiseMax(b.m_max);
|
||||
@@ -217,7 +217,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
|
||||
/** Clamps \c *this by the box \a b and returns a reference to \c *this.
|
||||
* \note If the boxes don't intersect, the resulting box is empty.
|
||||
* \sa intersection(), intersects() */
|
||||
inline AlignedBox& clamp(const AlignedBox& b)
|
||||
EIGEN_DEVICE_FUNC inline AlignedBox& clamp(const AlignedBox& b)
|
||||
{
|
||||
m_min = m_min.cwiseMax(b.m_min);
|
||||
m_max = m_max.cwiseMin(b.m_max);
|
||||
@@ -227,18 +227,18 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
|
||||
/** Returns an AlignedBox that is the intersection of \a b and \c *this
|
||||
* \note If the boxes don't intersect, the resulting box is empty.
|
||||
* \sa intersects(), clamp, contains() */
|
||||
inline AlignedBox intersection(const AlignedBox& b) const
|
||||
EIGEN_DEVICE_FUNC inline AlignedBox intersection(const AlignedBox& b) const
|
||||
{return AlignedBox(m_min.cwiseMax(b.m_min), m_max.cwiseMin(b.m_max)); }
|
||||
|
||||
/** Returns an AlignedBox that is the union of \a b and \c *this.
|
||||
* \note Merging with an empty box may result in a box bigger than \c *this.
|
||||
* \sa extend(const AlignedBox&) */
|
||||
inline AlignedBox merged(const AlignedBox& b) const
|
||||
EIGEN_DEVICE_FUNC inline AlignedBox merged(const AlignedBox& b) const
|
||||
{ return AlignedBox(m_min.cwiseMin(b.m_min), m_max.cwiseMax(b.m_max)); }
|
||||
|
||||
/** Translate \c *this by the vector \a t and returns a reference to \c *this. */
|
||||
template<typename Derived>
|
||||
inline AlignedBox& translate(const MatrixBase<Derived>& a_t)
|
||||
EIGEN_DEVICE_FUNC inline AlignedBox& translate(const MatrixBase<Derived>& a_t)
|
||||
{
|
||||
const typename internal::nested_eval<Derived,2>::type t(a_t.derived());
|
||||
m_min += t;
|
||||
@@ -251,28 +251,28 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
|
||||
* \sa exteriorDistance(const MatrixBase&), squaredExteriorDistance(const AlignedBox&)
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline Scalar squaredExteriorDistance(const MatrixBase<Derived>& p) const;
|
||||
EIGEN_DEVICE_FUNC inline Scalar squaredExteriorDistance(const MatrixBase<Derived>& p) const;
|
||||
|
||||
/** \returns the squared distance between the boxes \a b and \c *this,
|
||||
* and zero if the boxes intersect.
|
||||
* \sa exteriorDistance(const AlignedBox&), squaredExteriorDistance(const MatrixBase&)
|
||||
*/
|
||||
inline Scalar squaredExteriorDistance(const AlignedBox& b) const;
|
||||
EIGEN_DEVICE_FUNC inline Scalar squaredExteriorDistance(const AlignedBox& b) const;
|
||||
|
||||
/** \returns the distance between the point \a p and the box \c *this,
|
||||
* and zero if \a p is inside the box.
|
||||
* \sa squaredExteriorDistance(const MatrixBase&), exteriorDistance(const AlignedBox&)
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline NonInteger exteriorDistance(const MatrixBase<Derived>& p) const
|
||||
{ using std::sqrt; return sqrt(NonInteger(squaredExteriorDistance(p))); }
|
||||
EIGEN_DEVICE_FUNC inline NonInteger exteriorDistance(const MatrixBase<Derived>& p) const
|
||||
{ EIGEN_USING_STD_MATH(sqrt) return sqrt(NonInteger(squaredExteriorDistance(p))); }
|
||||
|
||||
/** \returns the distance between the boxes \a b and \c *this,
|
||||
* and zero if the boxes intersect.
|
||||
* \sa squaredExteriorDistance(const AlignedBox&), exteriorDistance(const MatrixBase&)
|
||||
*/
|
||||
inline NonInteger exteriorDistance(const AlignedBox& b) const
|
||||
{ using std::sqrt; return sqrt(NonInteger(squaredExteriorDistance(b))); }
|
||||
EIGEN_DEVICE_FUNC inline NonInteger exteriorDistance(const AlignedBox& b) const
|
||||
{ EIGEN_USING_STD_MATH(sqrt) return sqrt(NonInteger(squaredExteriorDistance(b))); }
|
||||
|
||||
/** \returns \c *this with scalar type casted to \a NewScalarType
|
||||
*
|
||||
@@ -280,7 +280,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
|
||||
* then this function smartly returns a const reference to \c *this.
|
||||
*/
|
||||
template<typename NewScalarType>
|
||||
inline typename internal::cast_return_type<AlignedBox,
|
||||
EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<AlignedBox,
|
||||
AlignedBox<NewScalarType,AmbientDimAtCompileTime> >::type cast() const
|
||||
{
|
||||
return typename internal::cast_return_type<AlignedBox,
|
||||
@@ -289,7 +289,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
|
||||
|
||||
/** Copy constructor with scalar type conversion */
|
||||
template<typename OtherScalarType>
|
||||
inline explicit AlignedBox(const AlignedBox<OtherScalarType,AmbientDimAtCompileTime>& other)
|
||||
EIGEN_DEVICE_FUNC inline explicit AlignedBox(const AlignedBox<OtherScalarType,AmbientDimAtCompileTime>& other)
|
||||
{
|
||||
m_min = (other.min)().template cast<Scalar>();
|
||||
m_max = (other.max)().template cast<Scalar>();
|
||||
@@ -299,7 +299,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
|
||||
* determined by \a prec.
|
||||
*
|
||||
* \sa MatrixBase::isApprox() */
|
||||
bool isApprox(const AlignedBox& other, const RealScalar& prec = ScalarTraits::dummy_precision()) const
|
||||
EIGEN_DEVICE_FUNC bool isApprox(const AlignedBox& other, const RealScalar& prec = ScalarTraits::dummy_precision()) const
|
||||
{ return m_min.isApprox(other.m_min, prec) && m_max.isApprox(other.m_max, prec); }
|
||||
|
||||
protected:
|
||||
@@ -311,7 +311,7 @@ protected:
|
||||
|
||||
template<typename Scalar,int AmbientDim>
|
||||
template<typename Derived>
|
||||
inline Scalar AlignedBox<Scalar,AmbientDim>::squaredExteriorDistance(const MatrixBase<Derived>& a_p) const
|
||||
EIGEN_DEVICE_FUNC inline Scalar AlignedBox<Scalar,AmbientDim>::squaredExteriorDistance(const MatrixBase<Derived>& a_p) const
|
||||
{
|
||||
typename internal::nested_eval<Derived,2*AmbientDim>::type p(a_p.derived());
|
||||
Scalar dist2(0);
|
||||
@@ -333,7 +333,7 @@ inline Scalar AlignedBox<Scalar,AmbientDim>::squaredExteriorDistance(const Matri
|
||||
}
|
||||
|
||||
template<typename Scalar,int AmbientDim>
|
||||
inline Scalar AlignedBox<Scalar,AmbientDim>::squaredExteriorDistance(const AlignedBox& b) const
|
||||
EIGEN_DEVICE_FUNC inline Scalar AlignedBox<Scalar,AmbientDim>::squaredExteriorDistance(const AlignedBox& b) const
|
||||
{
|
||||
Scalar dist2(0);
|
||||
Scalar aux;
|
||||
|
||||
@@ -69,59 +69,61 @@ protected:
|
||||
public:
|
||||
|
||||
/** Default constructor without initialization. */
|
||||
AngleAxis() {}
|
||||
EIGEN_DEVICE_FUNC AngleAxis() {}
|
||||
/** Constructs and initialize the angle-axis rotation from an \a angle in radian
|
||||
* and an \a axis which \b must \b be \b normalized.
|
||||
*
|
||||
* \warning If the \a axis vector is not normalized, then the angle-axis object
|
||||
* represents an invalid rotation. */
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline AngleAxis(const Scalar& angle, const MatrixBase<Derived>& axis) : m_axis(axis), m_angle(angle) {}
|
||||
/** Constructs and initialize the angle-axis rotation from a quaternion \a q.
|
||||
* This function implicitly normalizes the quaternion \a q.
|
||||
*/
|
||||
template<typename QuatDerived> inline explicit AngleAxis(const QuaternionBase<QuatDerived>& q) { *this = q; }
|
||||
template<typename QuatDerived>
|
||||
EIGEN_DEVICE_FUNC inline explicit AngleAxis(const QuaternionBase<QuatDerived>& q) { *this = q; }
|
||||
/** Constructs and initialize the angle-axis rotation from a 3x3 rotation matrix. */
|
||||
template<typename Derived>
|
||||
inline explicit AngleAxis(const MatrixBase<Derived>& m) { *this = m; }
|
||||
EIGEN_DEVICE_FUNC inline explicit AngleAxis(const MatrixBase<Derived>& m) { *this = m; }
|
||||
|
||||
/** \returns the value of the rotation angle in radian */
|
||||
Scalar angle() const { return m_angle; }
|
||||
EIGEN_DEVICE_FUNC Scalar angle() const { return m_angle; }
|
||||
/** \returns a read-write reference to the stored angle in radian */
|
||||
Scalar& angle() { return m_angle; }
|
||||
EIGEN_DEVICE_FUNC Scalar& angle() { return m_angle; }
|
||||
|
||||
/** \returns the rotation axis */
|
||||
const Vector3& axis() const { return m_axis; }
|
||||
EIGEN_DEVICE_FUNC const Vector3& axis() const { return m_axis; }
|
||||
/** \returns a read-write reference to the stored rotation axis.
|
||||
*
|
||||
* \warning The rotation axis must remain a \b unit vector.
|
||||
*/
|
||||
Vector3& axis() { return m_axis; }
|
||||
EIGEN_DEVICE_FUNC Vector3& axis() { return m_axis; }
|
||||
|
||||
/** Concatenates two rotations */
|
||||
inline QuaternionType operator* (const AngleAxis& other) const
|
||||
EIGEN_DEVICE_FUNC inline QuaternionType operator* (const AngleAxis& other) const
|
||||
{ return QuaternionType(*this) * QuaternionType(other); }
|
||||
|
||||
/** Concatenates two rotations */
|
||||
inline QuaternionType operator* (const QuaternionType& other) const
|
||||
EIGEN_DEVICE_FUNC inline QuaternionType operator* (const QuaternionType& other) const
|
||||
{ return QuaternionType(*this) * other; }
|
||||
|
||||
/** Concatenates two rotations */
|
||||
friend inline QuaternionType operator* (const QuaternionType& a, const AngleAxis& b)
|
||||
friend EIGEN_DEVICE_FUNC inline QuaternionType operator* (const QuaternionType& a, const AngleAxis& b)
|
||||
{ return a * QuaternionType(b); }
|
||||
|
||||
/** \returns the inverse rotation, i.e., an angle-axis with opposite rotation angle */
|
||||
AngleAxis inverse() const
|
||||
EIGEN_DEVICE_FUNC AngleAxis inverse() const
|
||||
{ return AngleAxis(-m_angle, m_axis); }
|
||||
|
||||
template<class QuatDerived>
|
||||
AngleAxis& operator=(const QuaternionBase<QuatDerived>& q);
|
||||
EIGEN_DEVICE_FUNC AngleAxis& operator=(const QuaternionBase<QuatDerived>& q);
|
||||
template<typename Derived>
|
||||
AngleAxis& operator=(const MatrixBase<Derived>& m);
|
||||
EIGEN_DEVICE_FUNC AngleAxis& operator=(const MatrixBase<Derived>& m);
|
||||
|
||||
template<typename Derived>
|
||||
AngleAxis& fromRotationMatrix(const MatrixBase<Derived>& m);
|
||||
Matrix3 toRotationMatrix(void) const;
|
||||
EIGEN_DEVICE_FUNC AngleAxis& fromRotationMatrix(const MatrixBase<Derived>& m);
|
||||
EIGEN_DEVICE_FUNC Matrix3 toRotationMatrix(void) const;
|
||||
|
||||
/** \returns \c *this with scalar type casted to \a NewScalarType
|
||||
*
|
||||
@@ -129,24 +131,24 @@ public:
|
||||
* then this function smartly returns a const reference to \c *this.
|
||||
*/
|
||||
template<typename NewScalarType>
|
||||
inline typename internal::cast_return_type<AngleAxis,AngleAxis<NewScalarType> >::type cast() const
|
||||
EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<AngleAxis,AngleAxis<NewScalarType> >::type cast() const
|
||||
{ return typename internal::cast_return_type<AngleAxis,AngleAxis<NewScalarType> >::type(*this); }
|
||||
|
||||
/** Copy constructor with scalar type conversion */
|
||||
template<typename OtherScalarType>
|
||||
inline explicit AngleAxis(const AngleAxis<OtherScalarType>& other)
|
||||
EIGEN_DEVICE_FUNC inline explicit AngleAxis(const AngleAxis<OtherScalarType>& other)
|
||||
{
|
||||
m_axis = other.axis().template cast<Scalar>();
|
||||
m_angle = Scalar(other.angle());
|
||||
}
|
||||
|
||||
static inline const AngleAxis Identity() { return AngleAxis(Scalar(0), Vector3::UnitX()); }
|
||||
EIGEN_DEVICE_FUNC static inline const AngleAxis Identity() { return AngleAxis(Scalar(0), Vector3::UnitX()); }
|
||||
|
||||
/** \returns \c true if \c *this is approximately equal to \a other, within the precision
|
||||
* determined by \a prec.
|
||||
*
|
||||
* \sa MatrixBase::isApprox() */
|
||||
bool isApprox(const AngleAxis& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
|
||||
EIGEN_DEVICE_FUNC bool isApprox(const AngleAxis& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
|
||||
{ return m_axis.isApprox(other.m_axis, prec) && internal::isApprox(m_angle,other.m_angle, prec); }
|
||||
};
|
||||
|
||||
@@ -158,21 +160,26 @@ typedef AngleAxis<float> AngleAxisf;
|
||||
typedef AngleAxis<double> AngleAxisd;
|
||||
|
||||
/** Set \c *this from a \b unit quaternion.
|
||||
* The resulting axis is normalized.
|
||||
*
|
||||
* The resulting axis is normalized, and the computed angle is in the [0,pi] range.
|
||||
*
|
||||
* This function implicitly normalizes the quaternion \a q.
|
||||
*/
|
||||
template<typename Scalar>
|
||||
template<typename QuatDerived>
|
||||
AngleAxis<Scalar>& AngleAxis<Scalar>::operator=(const QuaternionBase<QuatDerived>& q)
|
||||
EIGEN_DEVICE_FUNC AngleAxis<Scalar>& AngleAxis<Scalar>::operator=(const QuaternionBase<QuatDerived>& q)
|
||||
{
|
||||
using std::atan2;
|
||||
EIGEN_USING_STD_MATH(atan2)
|
||||
EIGEN_USING_STD_MATH(abs)
|
||||
Scalar n = q.vec().norm();
|
||||
if(n<NumTraits<Scalar>::epsilon())
|
||||
n = q.vec().stableNorm();
|
||||
if (n > Scalar(0))
|
||||
|
||||
if (n != Scalar(0))
|
||||
{
|
||||
m_angle = Scalar(2)*atan2(n, q.w());
|
||||
m_angle = Scalar(2)*atan2(n, abs(q.w()));
|
||||
if(q.w() < 0)
|
||||
n = -n;
|
||||
m_axis = q.vec() / n;
|
||||
}
|
||||
else
|
||||
@@ -187,7 +194,7 @@ AngleAxis<Scalar>& AngleAxis<Scalar>::operator=(const QuaternionBase<QuatDerived
|
||||
*/
|
||||
template<typename Scalar>
|
||||
template<typename Derived>
|
||||
AngleAxis<Scalar>& AngleAxis<Scalar>::operator=(const MatrixBase<Derived>& mat)
|
||||
EIGEN_DEVICE_FUNC AngleAxis<Scalar>& AngleAxis<Scalar>::operator=(const MatrixBase<Derived>& mat)
|
||||
{
|
||||
// Since a direct conversion would not be really faster,
|
||||
// let's use the robust Quaternion implementation:
|
||||
@@ -199,7 +206,7 @@ AngleAxis<Scalar>& AngleAxis<Scalar>::operator=(const MatrixBase<Derived>& mat)
|
||||
**/
|
||||
template<typename Scalar>
|
||||
template<typename Derived>
|
||||
AngleAxis<Scalar>& AngleAxis<Scalar>::fromRotationMatrix(const MatrixBase<Derived>& mat)
|
||||
EIGEN_DEVICE_FUNC AngleAxis<Scalar>& AngleAxis<Scalar>::fromRotationMatrix(const MatrixBase<Derived>& mat)
|
||||
{
|
||||
return *this = QuaternionType(mat);
|
||||
}
|
||||
@@ -208,10 +215,10 @@ AngleAxis<Scalar>& AngleAxis<Scalar>::fromRotationMatrix(const MatrixBase<Derive
|
||||
*/
|
||||
template<typename Scalar>
|
||||
typename AngleAxis<Scalar>::Matrix3
|
||||
AngleAxis<Scalar>::toRotationMatrix(void) const
|
||||
EIGEN_DEVICE_FUNC AngleAxis<Scalar>::toRotationMatrix(void) const
|
||||
{
|
||||
using std::sin;
|
||||
using std::cos;
|
||||
EIGEN_USING_STD_MATH(sin)
|
||||
EIGEN_USING_STD_MATH(cos)
|
||||
Matrix3 res;
|
||||
Vector3 sin_axis = sin(m_angle) * m_axis;
|
||||
Scalar c = cos(m_angle);
|
||||
|
||||
@@ -33,12 +33,12 @@ namespace Eigen {
|
||||
* \sa class AngleAxis
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline Matrix<typename MatrixBase<Derived>::Scalar,3,1>
|
||||
EIGEN_DEVICE_FUNC inline Matrix<typename MatrixBase<Derived>::Scalar,3,1>
|
||||
MatrixBase<Derived>::eulerAngles(Index a0, Index a1, Index a2) const
|
||||
{
|
||||
using std::atan2;
|
||||
using std::sin;
|
||||
using std::cos;
|
||||
EIGEN_USING_STD_MATH(atan2)
|
||||
EIGEN_USING_STD_MATH(sin)
|
||||
EIGEN_USING_STD_MATH(cos)
|
||||
/* Implemented from Graphics Gems IV */
|
||||
EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(Derived,3,3)
|
||||
|
||||
|
||||
@@ -68,17 +68,17 @@ template<typename MatrixType,int _Direction> class Homogeneous
|
||||
typedef MatrixBase<Homogeneous> Base;
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(Homogeneous)
|
||||
|
||||
explicit inline Homogeneous(const MatrixType& matrix)
|
||||
EIGEN_DEVICE_FUNC explicit inline Homogeneous(const MatrixType& matrix)
|
||||
: m_matrix(matrix)
|
||||
{}
|
||||
|
||||
inline Index rows() const { return m_matrix.rows() + (int(Direction)==Vertical ? 1 : 0); }
|
||||
inline Index cols() const { return m_matrix.cols() + (int(Direction)==Horizontal ? 1 : 0); }
|
||||
EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows() + (int(Direction)==Vertical ? 1 : 0); }
|
||||
EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols() + (int(Direction)==Horizontal ? 1 : 0); }
|
||||
|
||||
const NestedExpression& nestedExpression() const { return m_matrix; }
|
||||
EIGEN_DEVICE_FUNC const NestedExpression& nestedExpression() const { return m_matrix; }
|
||||
|
||||
template<typename Rhs>
|
||||
inline const Product<Homogeneous,Rhs>
|
||||
EIGEN_DEVICE_FUNC inline const Product<Homogeneous,Rhs>
|
||||
operator* (const MatrixBase<Rhs>& rhs) const
|
||||
{
|
||||
eigen_assert(int(Direction)==Horizontal);
|
||||
@@ -86,7 +86,7 @@ template<typename MatrixType,int _Direction> class Homogeneous
|
||||
}
|
||||
|
||||
template<typename Lhs> friend
|
||||
inline const Product<Lhs,Homogeneous>
|
||||
EIGEN_DEVICE_FUNC inline const Product<Lhs,Homogeneous>
|
||||
operator* (const MatrixBase<Lhs>& lhs, const Homogeneous& rhs)
|
||||
{
|
||||
eigen_assert(int(Direction)==Vertical);
|
||||
@@ -94,7 +94,7 @@ template<typename MatrixType,int _Direction> class Homogeneous
|
||||
}
|
||||
|
||||
template<typename Scalar, int Dim, int Mode, int Options> friend
|
||||
inline const Product<Transform<Scalar,Dim,Mode,Options>, Homogeneous >
|
||||
EIGEN_DEVICE_FUNC inline const Product<Transform<Scalar,Dim,Mode,Options>, Homogeneous >
|
||||
operator* (const Transform<Scalar,Dim,Mode,Options>& lhs, const Homogeneous& rhs)
|
||||
{
|
||||
eigen_assert(int(Direction)==Vertical);
|
||||
@@ -102,7 +102,7 @@ template<typename MatrixType,int _Direction> class Homogeneous
|
||||
}
|
||||
|
||||
template<typename Func>
|
||||
EIGEN_STRONG_INLINE typename internal::result_of<Func(Scalar,Scalar)>::type
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::result_of<Func(Scalar,Scalar)>::type
|
||||
redux(const Func& func) const
|
||||
{
|
||||
return func(m_matrix.redux(func), Scalar(1));
|
||||
@@ -114,7 +114,9 @@ template<typename MatrixType,int _Direction> class Homogeneous
|
||||
|
||||
/** \geometry_module \ingroup Geometry_Module
|
||||
*
|
||||
* \return an expression of the equivalent homogeneous vector
|
||||
* \returns a vector expression that is one longer than the vector argument, with the value 1 symbolically appended as the last coefficient.
|
||||
*
|
||||
* This can be used to convert affine coordinates to homogeneous coordinates.
|
||||
*
|
||||
* \only_for_vectors
|
||||
*
|
||||
@@ -124,7 +126,7 @@ template<typename MatrixType,int _Direction> class Homogeneous
|
||||
* \sa VectorwiseOp::homogeneous(), class Homogeneous
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline typename MatrixBase<Derived>::HomogeneousReturnType
|
||||
EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::HomogeneousReturnType
|
||||
MatrixBase<Derived>::homogeneous() const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);
|
||||
@@ -133,14 +135,16 @@ MatrixBase<Derived>::homogeneous() const
|
||||
|
||||
/** \geometry_module \ingroup Geometry_Module
|
||||
*
|
||||
* \returns a matrix expression of homogeneous column (or row) vectors
|
||||
* \returns an expression where the value 1 is symbolically appended as the final coefficient to each column (or row) of the matrix.
|
||||
*
|
||||
* This can be used to convert affine coordinates to homogeneous coordinates.
|
||||
*
|
||||
* Example: \include VectorwiseOp_homogeneous.cpp
|
||||
* Output: \verbinclude VectorwiseOp_homogeneous.out
|
||||
*
|
||||
* \sa MatrixBase::homogeneous(), class Homogeneous */
|
||||
template<typename ExpressionType, int Direction>
|
||||
inline Homogeneous<ExpressionType,Direction>
|
||||
EIGEN_DEVICE_FUNC inline Homogeneous<ExpressionType,Direction>
|
||||
VectorwiseOp<ExpressionType,Direction>::homogeneous() const
|
||||
{
|
||||
return HomogeneousReturnType(_expression());
|
||||
@@ -148,14 +152,23 @@ VectorwiseOp<ExpressionType,Direction>::homogeneous() const
|
||||
|
||||
/** \geometry_module \ingroup Geometry_Module
|
||||
*
|
||||
* \returns an expression of the homogeneous normalized vector of \c *this
|
||||
* \brief homogeneous normalization
|
||||
*
|
||||
* \returns a vector expression of the N-1 first coefficients of \c *this divided by that last coefficient.
|
||||
*
|
||||
* This can be used to convert homogeneous coordinates to affine coordinates.
|
||||
*
|
||||
* It is essentially a shortcut for:
|
||||
* \code
|
||||
this->head(this->size()-1)/this->coeff(this->size()-1);
|
||||
\endcode
|
||||
*
|
||||
* Example: \include MatrixBase_hnormalized.cpp
|
||||
* Output: \verbinclude MatrixBase_hnormalized.out
|
||||
*
|
||||
* \sa VectorwiseOp::hnormalized() */
|
||||
template<typename Derived>
|
||||
inline const typename MatrixBase<Derived>::HNormalizedReturnType
|
||||
EIGEN_DEVICE_FUNC inline const typename MatrixBase<Derived>::HNormalizedReturnType
|
||||
MatrixBase<Derived>::hnormalized() const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);
|
||||
@@ -166,14 +179,20 @@ MatrixBase<Derived>::hnormalized() const
|
||||
|
||||
/** \geometry_module \ingroup Geometry_Module
|
||||
*
|
||||
* \returns an expression of the homogeneous normalized vector of \c *this
|
||||
* \brief column or row-wise homogeneous normalization
|
||||
*
|
||||
* \returns an expression of the first N-1 coefficients of each column (or row) of \c *this divided by the last coefficient of each column (or row).
|
||||
*
|
||||
* This can be used to convert homogeneous coordinates to affine coordinates.
|
||||
*
|
||||
* It is conceptually equivalent to calling MatrixBase::hnormalized() to each column (or row) of \c *this.
|
||||
*
|
||||
* Example: \include DirectionWise_hnormalized.cpp
|
||||
* Output: \verbinclude DirectionWise_hnormalized.out
|
||||
*
|
||||
* \sa MatrixBase::hnormalized() */
|
||||
template<typename ExpressionType, int Direction>
|
||||
inline const typename VectorwiseOp<ExpressionType,Direction>::HNormalizedReturnType
|
||||
EIGEN_DEVICE_FUNC inline const typename VectorwiseOp<ExpressionType,Direction>::HNormalizedReturnType
|
||||
VectorwiseOp<ExpressionType,Direction>::hnormalized() const
|
||||
{
|
||||
return HNormalized_Block(_expression(),0,0,
|
||||
@@ -197,7 +216,7 @@ template<typename MatrixOrTransformType>
|
||||
struct take_matrix_for_product
|
||||
{
|
||||
typedef MatrixOrTransformType type;
|
||||
static const type& run(const type &x) { return x; }
|
||||
EIGEN_DEVICE_FUNC static const type& run(const type &x) { return x; }
|
||||
};
|
||||
|
||||
template<typename Scalar, int Dim, int Mode,int Options>
|
||||
@@ -205,7 +224,7 @@ struct take_matrix_for_product<Transform<Scalar, Dim, Mode, Options> >
|
||||
{
|
||||
typedef Transform<Scalar, Dim, Mode, Options> TransformType;
|
||||
typedef typename internal::add_const<typename TransformType::ConstAffinePart>::type type;
|
||||
static type run (const TransformType& x) { return x.affine(); }
|
||||
EIGEN_DEVICE_FUNC static type run (const TransformType& x) { return x.affine(); }
|
||||
};
|
||||
|
||||
template<typename Scalar, int Dim, int Options>
|
||||
@@ -213,7 +232,7 @@ struct take_matrix_for_product<Transform<Scalar, Dim, Projective, Options> >
|
||||
{
|
||||
typedef Transform<Scalar, Dim, Projective, Options> TransformType;
|
||||
typedef typename TransformType::MatrixType type;
|
||||
static const type& run (const TransformType& x) { return x.matrix(); }
|
||||
EIGEN_DEVICE_FUNC static const type& run (const TransformType& x) { return x.matrix(); }
|
||||
};
|
||||
|
||||
template<typename MatrixType,typename Lhs>
|
||||
@@ -238,15 +257,15 @@ struct homogeneous_left_product_impl<Homogeneous<MatrixType,Vertical>,Lhs>
|
||||
typedef typename traits<homogeneous_left_product_impl>::LhsMatrixType LhsMatrixType;
|
||||
typedef typename remove_all<LhsMatrixType>::type LhsMatrixTypeCleaned;
|
||||
typedef typename remove_all<typename LhsMatrixTypeCleaned::Nested>::type LhsMatrixTypeNested;
|
||||
homogeneous_left_product_impl(const Lhs& lhs, const MatrixType& rhs)
|
||||
EIGEN_DEVICE_FUNC homogeneous_left_product_impl(const Lhs& lhs, const MatrixType& rhs)
|
||||
: m_lhs(take_matrix_for_product<Lhs>::run(lhs)),
|
||||
m_rhs(rhs)
|
||||
{}
|
||||
|
||||
inline Index rows() const { return m_lhs.rows(); }
|
||||
inline Index cols() const { return m_rhs.cols(); }
|
||||
EIGEN_DEVICE_FUNC inline Index rows() const { return m_lhs.rows(); }
|
||||
EIGEN_DEVICE_FUNC inline Index cols() const { return m_rhs.cols(); }
|
||||
|
||||
template<typename Dest> void evalTo(Dest& dst) const
|
||||
template<typename Dest> EIGEN_DEVICE_FUNC void evalTo(Dest& dst) const
|
||||
{
|
||||
// FIXME investigate how to allow lazy evaluation of this product when possible
|
||||
dst = Block<const LhsMatrixTypeNested,
|
||||
@@ -277,14 +296,14 @@ struct homogeneous_right_product_impl<Homogeneous<MatrixType,Horizontal>,Rhs>
|
||||
: public ReturnByValue<homogeneous_right_product_impl<Homogeneous<MatrixType,Horizontal>,Rhs> >
|
||||
{
|
||||
typedef typename remove_all<typename Rhs::Nested>::type RhsNested;
|
||||
homogeneous_right_product_impl(const MatrixType& lhs, const Rhs& rhs)
|
||||
EIGEN_DEVICE_FUNC homogeneous_right_product_impl(const MatrixType& lhs, const Rhs& rhs)
|
||||
: m_lhs(lhs), m_rhs(rhs)
|
||||
{}
|
||||
|
||||
inline Index rows() const { return m_lhs.rows(); }
|
||||
inline Index cols() const { return m_rhs.cols(); }
|
||||
EIGEN_DEVICE_FUNC inline Index rows() const { return m_lhs.rows(); }
|
||||
EIGEN_DEVICE_FUNC inline Index cols() const { return m_rhs.cols(); }
|
||||
|
||||
template<typename Dest> void evalTo(Dest& dst) const
|
||||
template<typename Dest> EIGEN_DEVICE_FUNC void evalTo(Dest& dst) const
|
||||
{
|
||||
// FIXME investigate how to allow lazy evaluation of this product when possible
|
||||
dst = m_lhs * Block<const RhsNested,
|
||||
@@ -317,7 +336,7 @@ struct unary_evaluator<Homogeneous<ArgType,Direction>, IndexBased>
|
||||
typedef typename XprType::PlainObject PlainObject;
|
||||
typedef evaluator<PlainObject> Base;
|
||||
|
||||
explicit unary_evaluator(const XprType& op)
|
||||
EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op)
|
||||
: Base(), m_temp(op)
|
||||
{
|
||||
::new (static_cast<Base*>(this)) Base(m_temp);
|
||||
@@ -332,8 +351,13 @@ template< typename DstXprType, typename ArgType, typename Scalar>
|
||||
struct Assignment<DstXprType, Homogeneous<ArgType,Vertical>, internal::assign_op<Scalar,typename ArgType::Scalar>, Dense2Dense>
|
||||
{
|
||||
typedef Homogeneous<ArgType,Vertical> SrcXprType;
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,typename ArgType::Scalar> &)
|
||||
EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,typename ArgType::Scalar> &)
|
||||
{
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
|
||||
dst.template topRows<ArgType::RowsAtCompileTime>(src.nestedExpression().rows()) = src.nestedExpression();
|
||||
dst.row(dst.rows()-1).setOnes();
|
||||
}
|
||||
@@ -344,8 +368,13 @@ template< typename DstXprType, typename ArgType, typename Scalar>
|
||||
struct Assignment<DstXprType, Homogeneous<ArgType,Horizontal>, internal::assign_op<Scalar,typename ArgType::Scalar>, Dense2Dense>
|
||||
{
|
||||
typedef Homogeneous<ArgType,Horizontal> SrcXprType;
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,typename ArgType::Scalar> &)
|
||||
EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,typename ArgType::Scalar> &)
|
||||
{
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
|
||||
dst.template leftCols<ArgType::ColsAtCompileTime>(src.nestedExpression().cols()) = src.nestedExpression();
|
||||
dst.col(dst.cols()-1).setOnes();
|
||||
}
|
||||
@@ -355,7 +384,7 @@ template<typename LhsArg, typename Rhs, int ProductTag>
|
||||
struct generic_product_impl<Homogeneous<LhsArg,Horizontal>, Rhs, HomogeneousShape, DenseShape, ProductTag>
|
||||
{
|
||||
template<typename Dest>
|
||||
static void evalTo(Dest& dst, const Homogeneous<LhsArg,Horizontal>& lhs, const Rhs& rhs)
|
||||
EIGEN_DEVICE_FUNC static void evalTo(Dest& dst, const Homogeneous<LhsArg,Horizontal>& lhs, const Rhs& rhs)
|
||||
{
|
||||
homogeneous_right_product_impl<Homogeneous<LhsArg,Horizontal>, Rhs>(lhs.nestedExpression(), rhs).evalTo(dst);
|
||||
}
|
||||
@@ -396,12 +425,24 @@ template<typename Lhs, typename RhsArg, int ProductTag>
|
||||
struct generic_product_impl<Lhs, Homogeneous<RhsArg,Vertical>, DenseShape, HomogeneousShape, ProductTag>
|
||||
{
|
||||
template<typename Dest>
|
||||
static void evalTo(Dest& dst, const Lhs& lhs, const Homogeneous<RhsArg,Vertical>& rhs)
|
||||
EIGEN_DEVICE_FUNC static void evalTo(Dest& dst, const Lhs& lhs, const Homogeneous<RhsArg,Vertical>& rhs)
|
||||
{
|
||||
homogeneous_left_product_impl<Homogeneous<RhsArg,Vertical>, Lhs>(lhs, rhs.nestedExpression()).evalTo(dst);
|
||||
}
|
||||
};
|
||||
|
||||
// TODO: the following specialization is to address a regression from 3.2 to 3.3
|
||||
// In the future, this path should be optimized.
|
||||
template<typename Lhs, typename RhsArg, int ProductTag>
|
||||
struct generic_product_impl<Lhs, Homogeneous<RhsArg,Vertical>, TriangularShape, HomogeneousShape, ProductTag>
|
||||
{
|
||||
template<typename Dest>
|
||||
static void evalTo(Dest& dst, const Lhs& lhs, const Homogeneous<RhsArg,Vertical>& rhs)
|
||||
{
|
||||
dst.noalias() = lhs * rhs.eval();
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs,typename Rhs>
|
||||
struct homogeneous_left_product_refactoring_helper
|
||||
{
|
||||
@@ -438,7 +479,7 @@ struct generic_product_impl<Transform<Scalar,Dim,Mode,Options>, Homogeneous<RhsA
|
||||
{
|
||||
typedef Transform<Scalar,Dim,Mode,Options> TransformType;
|
||||
template<typename Dest>
|
||||
static void evalTo(Dest& dst, const TransformType& lhs, const Homogeneous<RhsArg,Vertical>& rhs)
|
||||
EIGEN_DEVICE_FUNC static void evalTo(Dest& dst, const TransformType& lhs, const Homogeneous<RhsArg,Vertical>& rhs)
|
||||
{
|
||||
homogeneous_left_product_impl<Homogeneous<RhsArg,Vertical>, TransformType>(lhs, rhs.nestedExpression()).evalTo(dst);
|
||||
}
|
||||
|
||||
@@ -50,21 +50,21 @@ public:
|
||||
typedef const Block<const Coefficients,AmbientDimAtCompileTime,1> ConstNormalReturnType;
|
||||
|
||||
/** Default constructor without initialization */
|
||||
inline Hyperplane() {}
|
||||
EIGEN_DEVICE_FUNC inline Hyperplane() {}
|
||||
|
||||
template<int OtherOptions>
|
||||
Hyperplane(const Hyperplane<Scalar,AmbientDimAtCompileTime,OtherOptions>& other)
|
||||
EIGEN_DEVICE_FUNC Hyperplane(const Hyperplane<Scalar,AmbientDimAtCompileTime,OtherOptions>& other)
|
||||
: m_coeffs(other.coeffs())
|
||||
{}
|
||||
|
||||
/** Constructs a dynamic-size hyperplane with \a _dim the dimension
|
||||
* of the ambient space */
|
||||
inline explicit Hyperplane(Index _dim) : m_coeffs(_dim+1) {}
|
||||
EIGEN_DEVICE_FUNC inline explicit Hyperplane(Index _dim) : m_coeffs(_dim+1) {}
|
||||
|
||||
/** Construct a plane from its normal \a n and a point \a e onto the plane.
|
||||
* \warning the vector normal is assumed to be normalized.
|
||||
*/
|
||||
inline Hyperplane(const VectorType& n, const VectorType& e)
|
||||
EIGEN_DEVICE_FUNC inline Hyperplane(const VectorType& n, const VectorType& e)
|
||||
: m_coeffs(n.size()+1)
|
||||
{
|
||||
normal() = n;
|
||||
@@ -75,7 +75,7 @@ public:
|
||||
* such that the algebraic equation of the plane is \f$ n \cdot x + d = 0 \f$.
|
||||
* \warning the vector normal is assumed to be normalized.
|
||||
*/
|
||||
inline Hyperplane(const VectorType& n, const Scalar& d)
|
||||
EIGEN_DEVICE_FUNC inline Hyperplane(const VectorType& n, const Scalar& d)
|
||||
: m_coeffs(n.size()+1)
|
||||
{
|
||||
normal() = n;
|
||||
@@ -85,7 +85,7 @@ public:
|
||||
/** Constructs a hyperplane passing through the two points. If the dimension of the ambient space
|
||||
* is greater than 2, then there isn't uniqueness, so an arbitrary choice is made.
|
||||
*/
|
||||
static inline Hyperplane Through(const VectorType& p0, const VectorType& p1)
|
||||
EIGEN_DEVICE_FUNC static inline Hyperplane Through(const VectorType& p0, const VectorType& p1)
|
||||
{
|
||||
Hyperplane result(p0.size());
|
||||
result.normal() = (p1 - p0).unitOrthogonal();
|
||||
@@ -96,7 +96,7 @@ public:
|
||||
/** Constructs a hyperplane passing through the three points. The dimension of the ambient space
|
||||
* is required to be exactly 3.
|
||||
*/
|
||||
static inline Hyperplane Through(const VectorType& p0, const VectorType& p1, const VectorType& p2)
|
||||
EIGEN_DEVICE_FUNC static inline Hyperplane Through(const VectorType& p0, const VectorType& p1, const VectorType& p2)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(VectorType, 3)
|
||||
Hyperplane result(p0.size());
|
||||
@@ -120,19 +120,19 @@ public:
|
||||
* so an arbitrary choice is made.
|
||||
*/
|
||||
// FIXME to be consitent with the rest this could be implemented as a static Through function ??
|
||||
explicit Hyperplane(const ParametrizedLine<Scalar, AmbientDimAtCompileTime>& parametrized)
|
||||
EIGEN_DEVICE_FUNC explicit Hyperplane(const ParametrizedLine<Scalar, AmbientDimAtCompileTime>& parametrized)
|
||||
{
|
||||
normal() = parametrized.direction().unitOrthogonal();
|
||||
offset() = -parametrized.origin().dot(normal());
|
||||
}
|
||||
|
||||
~Hyperplane() {}
|
||||
EIGEN_DEVICE_FUNC ~Hyperplane() {}
|
||||
|
||||
/** \returns the dimension in which the plane holds */
|
||||
inline Index dim() const { return AmbientDimAtCompileTime==Dynamic ? m_coeffs.size()-1 : Index(AmbientDimAtCompileTime); }
|
||||
EIGEN_DEVICE_FUNC inline Index dim() const { return AmbientDimAtCompileTime==Dynamic ? m_coeffs.size()-1 : Index(AmbientDimAtCompileTime); }
|
||||
|
||||
/** normalizes \c *this */
|
||||
void normalize(void)
|
||||
EIGEN_DEVICE_FUNC void normalize(void)
|
||||
{
|
||||
m_coeffs /= normal().norm();
|
||||
}
|
||||
@@ -140,45 +140,45 @@ public:
|
||||
/** \returns the signed distance between the plane \c *this and a point \a p.
|
||||
* \sa absDistance()
|
||||
*/
|
||||
inline Scalar signedDistance(const VectorType& p) const { return normal().dot(p) + offset(); }
|
||||
EIGEN_DEVICE_FUNC inline Scalar signedDistance(const VectorType& p) const { return normal().dot(p) + offset(); }
|
||||
|
||||
/** \returns the absolute distance between the plane \c *this and a point \a p.
|
||||
* \sa signedDistance()
|
||||
*/
|
||||
inline Scalar absDistance(const VectorType& p) const { using std::abs; return abs(signedDistance(p)); }
|
||||
EIGEN_DEVICE_FUNC inline Scalar absDistance(const VectorType& p) const { return numext::abs(signedDistance(p)); }
|
||||
|
||||
/** \returns the projection of a point \a p onto the plane \c *this.
|
||||
*/
|
||||
inline VectorType projection(const VectorType& p) const { return p - signedDistance(p) * normal(); }
|
||||
EIGEN_DEVICE_FUNC inline VectorType projection(const VectorType& p) const { return p - signedDistance(p) * normal(); }
|
||||
|
||||
/** \returns a constant reference to the unit normal vector of the plane, which corresponds
|
||||
* to the linear part of the implicit equation.
|
||||
*/
|
||||
inline ConstNormalReturnType normal() const { return ConstNormalReturnType(m_coeffs,0,0,dim(),1); }
|
||||
EIGEN_DEVICE_FUNC inline ConstNormalReturnType normal() const { return ConstNormalReturnType(m_coeffs,0,0,dim(),1); }
|
||||
|
||||
/** \returns a non-constant reference to the unit normal vector of the plane, which corresponds
|
||||
* to the linear part of the implicit equation.
|
||||
*/
|
||||
inline NormalReturnType normal() { return NormalReturnType(m_coeffs,0,0,dim(),1); }
|
||||
EIGEN_DEVICE_FUNC inline NormalReturnType normal() { return NormalReturnType(m_coeffs,0,0,dim(),1); }
|
||||
|
||||
/** \returns the distance to the origin, which is also the "constant term" of the implicit equation
|
||||
* \warning the vector normal is assumed to be normalized.
|
||||
*/
|
||||
inline const Scalar& offset() const { return m_coeffs.coeff(dim()); }
|
||||
EIGEN_DEVICE_FUNC inline const Scalar& offset() const { return m_coeffs.coeff(dim()); }
|
||||
|
||||
/** \returns a non-constant reference to the distance to the origin, which is also the constant part
|
||||
* of the implicit equation */
|
||||
inline Scalar& offset() { return m_coeffs(dim()); }
|
||||
EIGEN_DEVICE_FUNC inline Scalar& offset() { return m_coeffs(dim()); }
|
||||
|
||||
/** \returns a constant reference to the coefficients c_i of the plane equation:
|
||||
* \f$ c_0*x_0 + ... + c_{d-1}*x_{d-1} + c_d = 0 \f$
|
||||
*/
|
||||
inline const Coefficients& coeffs() const { return m_coeffs; }
|
||||
EIGEN_DEVICE_FUNC inline const Coefficients& coeffs() const { return m_coeffs; }
|
||||
|
||||
/** \returns a non-constant reference to the coefficients c_i of the plane equation:
|
||||
* \f$ c_0*x_0 + ... + c_{d-1}*x_{d-1} + c_d = 0 \f$
|
||||
*/
|
||||
inline Coefficients& coeffs() { return m_coeffs; }
|
||||
EIGEN_DEVICE_FUNC inline Coefficients& coeffs() { return m_coeffs; }
|
||||
|
||||
/** \returns the intersection of *this with \a other.
|
||||
*
|
||||
@@ -186,16 +186,15 @@ public:
|
||||
*
|
||||
* \note If \a other is approximately parallel to *this, this method will return any point on *this.
|
||||
*/
|
||||
VectorType intersection(const Hyperplane& other) const
|
||||
EIGEN_DEVICE_FUNC VectorType intersection(const Hyperplane& other) const
|
||||
{
|
||||
using std::abs;
|
||||
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(VectorType, 2)
|
||||
Scalar det = coeffs().coeff(0) * other.coeffs().coeff(1) - coeffs().coeff(1) * other.coeffs().coeff(0);
|
||||
// since the line equations ax+by=c are normalized with a^2+b^2=1, the following tests
|
||||
// whether the two lines are approximately parallel.
|
||||
if(internal::isMuchSmallerThan(det, Scalar(1)))
|
||||
{ // special case where the two lines are approximately parallel. Pick any point on the first line.
|
||||
if(abs(coeffs().coeff(1))>abs(coeffs().coeff(0)))
|
||||
if(numext::abs(coeffs().coeff(1))>numext::abs(coeffs().coeff(0)))
|
||||
return VectorType(coeffs().coeff(1), -coeffs().coeff(2)/coeffs().coeff(1)-coeffs().coeff(0));
|
||||
else
|
||||
return VectorType(-coeffs().coeff(2)/coeffs().coeff(0)-coeffs().coeff(1), coeffs().coeff(0));
|
||||
@@ -215,10 +214,13 @@ public:
|
||||
* or a more generic #Affine transformation. The default is #Affine.
|
||||
*/
|
||||
template<typename XprType>
|
||||
inline Hyperplane& transform(const MatrixBase<XprType>& mat, TransformTraits traits = Affine)
|
||||
EIGEN_DEVICE_FUNC inline Hyperplane& transform(const MatrixBase<XprType>& mat, TransformTraits traits = Affine)
|
||||
{
|
||||
if (traits==Affine)
|
||||
{
|
||||
normal() = mat.inverse().transpose() * normal();
|
||||
m_coeffs /= normal().norm();
|
||||
}
|
||||
else if (traits==Isometry)
|
||||
normal() = mat * normal();
|
||||
else
|
||||
@@ -236,7 +238,7 @@ public:
|
||||
* Other kind of transformations are not supported.
|
||||
*/
|
||||
template<int TrOptions>
|
||||
inline Hyperplane& transform(const Transform<Scalar,AmbientDimAtCompileTime,Affine,TrOptions>& t,
|
||||
EIGEN_DEVICE_FUNC inline Hyperplane& transform(const Transform<Scalar,AmbientDimAtCompileTime,Affine,TrOptions>& t,
|
||||
TransformTraits traits = Affine)
|
||||
{
|
||||
transform(t.linear(), traits);
|
||||
@@ -250,7 +252,7 @@ public:
|
||||
* then this function smartly returns a const reference to \c *this.
|
||||
*/
|
||||
template<typename NewScalarType>
|
||||
inline typename internal::cast_return_type<Hyperplane,
|
||||
EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<Hyperplane,
|
||||
Hyperplane<NewScalarType,AmbientDimAtCompileTime,Options> >::type cast() const
|
||||
{
|
||||
return typename internal::cast_return_type<Hyperplane,
|
||||
@@ -259,7 +261,7 @@ public:
|
||||
|
||||
/** Copy constructor with scalar type conversion */
|
||||
template<typename OtherScalarType,int OtherOptions>
|
||||
inline explicit Hyperplane(const Hyperplane<OtherScalarType,AmbientDimAtCompileTime,OtherOptions>& other)
|
||||
EIGEN_DEVICE_FUNC inline explicit Hyperplane(const Hyperplane<OtherScalarType,AmbientDimAtCompileTime,OtherOptions>& other)
|
||||
{ m_coeffs = other.coeffs().template cast<Scalar>(); }
|
||||
|
||||
/** \returns \c true if \c *this is approximately equal to \a other, within the precision
|
||||
@@ -267,7 +269,7 @@ public:
|
||||
*
|
||||
* \sa MatrixBase::isApprox() */
|
||||
template<int OtherOptions>
|
||||
bool isApprox(const Hyperplane<Scalar,AmbientDimAtCompileTime,OtherOptions>& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
|
||||
EIGEN_DEVICE_FUNC bool isApprox(const Hyperplane<Scalar,AmbientDimAtCompileTime,OtherOptions>& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
|
||||
{ return m_coeffs.isApprox(other.m_coeffs, prec); }
|
||||
|
||||
protected:
|
||||
|
||||
@@ -27,7 +27,7 @@ namespace Eigen {
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
inline typename MatrixBase<Derived>::template cross_product_return_type<OtherDerived>::type
|
||||
EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::template cross_product_return_type<OtherDerived>::type
|
||||
#else
|
||||
inline typename MatrixBase<Derived>::PlainObject
|
||||
#endif
|
||||
@@ -53,7 +53,7 @@ template< int Arch,typename VectorLhs,typename VectorRhs,
|
||||
typename Scalar = typename VectorLhs::Scalar,
|
||||
bool Vectorizable = bool((VectorLhs::Flags&VectorRhs::Flags)&PacketAccessBit)>
|
||||
struct cross3_impl {
|
||||
static inline typename internal::plain_matrix_type<VectorLhs>::type
|
||||
EIGEN_DEVICE_FUNC static inline typename internal::plain_matrix_type<VectorLhs>::type
|
||||
run(const VectorLhs& lhs, const VectorRhs& rhs)
|
||||
{
|
||||
return typename internal::plain_matrix_type<VectorLhs>::type(
|
||||
@@ -78,7 +78,7 @@ struct cross3_impl {
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
inline typename MatrixBase<Derived>::PlainObject
|
||||
EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::PlainObject
|
||||
MatrixBase<Derived>::cross3(const MatrixBase<OtherDerived>& other) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Derived,4)
|
||||
@@ -105,6 +105,7 @@ MatrixBase<Derived>::cross3(const MatrixBase<OtherDerived>& other) const
|
||||
* \sa MatrixBase::cross() */
|
||||
template<typename ExpressionType, int Direction>
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename VectorwiseOp<ExpressionType,Direction>::CrossReturnType
|
||||
VectorwiseOp<ExpressionType,Direction>::cross(const MatrixBase<OtherDerived>& other) const
|
||||
{
|
||||
@@ -221,7 +222,7 @@ struct unitOrthogonal_selector<Derived,2>
|
||||
* \sa cross()
|
||||
*/
|
||||
template<typename Derived>
|
||||
typename MatrixBase<Derived>::PlainObject
|
||||
EIGEN_DEVICE_FUNC typename MatrixBase<Derived>::PlainObject
|
||||
MatrixBase<Derived>::unitOrthogonal() const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
|
||||
@@ -41,45 +41,45 @@ public:
|
||||
typedef Matrix<Scalar,AmbientDimAtCompileTime,1,Options> VectorType;
|
||||
|
||||
/** Default constructor without initialization */
|
||||
inline ParametrizedLine() {}
|
||||
EIGEN_DEVICE_FUNC inline ParametrizedLine() {}
|
||||
|
||||
template<int OtherOptions>
|
||||
ParametrizedLine(const ParametrizedLine<Scalar,AmbientDimAtCompileTime,OtherOptions>& other)
|
||||
EIGEN_DEVICE_FUNC ParametrizedLine(const ParametrizedLine<Scalar,AmbientDimAtCompileTime,OtherOptions>& other)
|
||||
: m_origin(other.origin()), m_direction(other.direction())
|
||||
{}
|
||||
|
||||
/** Constructs a dynamic-size line with \a _dim the dimension
|
||||
* of the ambient space */
|
||||
inline explicit ParametrizedLine(Index _dim) : m_origin(_dim), m_direction(_dim) {}
|
||||
EIGEN_DEVICE_FUNC inline explicit ParametrizedLine(Index _dim) : m_origin(_dim), m_direction(_dim) {}
|
||||
|
||||
/** Initializes a parametrized line of direction \a direction and origin \a origin.
|
||||
* \warning the vector direction is assumed to be normalized.
|
||||
*/
|
||||
ParametrizedLine(const VectorType& origin, const VectorType& direction)
|
||||
EIGEN_DEVICE_FUNC ParametrizedLine(const VectorType& origin, const VectorType& direction)
|
||||
: m_origin(origin), m_direction(direction) {}
|
||||
|
||||
template <int OtherOptions>
|
||||
explicit ParametrizedLine(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane);
|
||||
EIGEN_DEVICE_FUNC explicit ParametrizedLine(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane);
|
||||
|
||||
/** Constructs a parametrized line going from \a p0 to \a p1. */
|
||||
static inline ParametrizedLine Through(const VectorType& p0, const VectorType& p1)
|
||||
EIGEN_DEVICE_FUNC static inline ParametrizedLine Through(const VectorType& p0, const VectorType& p1)
|
||||
{ return ParametrizedLine(p0, (p1-p0).normalized()); }
|
||||
|
||||
~ParametrizedLine() {}
|
||||
EIGEN_DEVICE_FUNC ~ParametrizedLine() {}
|
||||
|
||||
/** \returns the dimension in which the line holds */
|
||||
inline Index dim() const { return m_direction.size(); }
|
||||
EIGEN_DEVICE_FUNC inline Index dim() const { return m_direction.size(); }
|
||||
|
||||
const VectorType& origin() const { return m_origin; }
|
||||
VectorType& origin() { return m_origin; }
|
||||
EIGEN_DEVICE_FUNC const VectorType& origin() const { return m_origin; }
|
||||
EIGEN_DEVICE_FUNC VectorType& origin() { return m_origin; }
|
||||
|
||||
const VectorType& direction() const { return m_direction; }
|
||||
VectorType& direction() { return m_direction; }
|
||||
EIGEN_DEVICE_FUNC const VectorType& direction() const { return m_direction; }
|
||||
EIGEN_DEVICE_FUNC VectorType& direction() { return m_direction; }
|
||||
|
||||
/** \returns the squared distance of a point \a p to its projection onto the line \c *this.
|
||||
* \sa distance()
|
||||
*/
|
||||
RealScalar squaredDistance(const VectorType& p) const
|
||||
EIGEN_DEVICE_FUNC RealScalar squaredDistance(const VectorType& p) const
|
||||
{
|
||||
VectorType diff = p - origin();
|
||||
return (diff - direction().dot(diff) * direction()).squaredNorm();
|
||||
@@ -87,22 +87,22 @@ public:
|
||||
/** \returns the distance of a point \a p to its projection onto the line \c *this.
|
||||
* \sa squaredDistance()
|
||||
*/
|
||||
RealScalar distance(const VectorType& p) const { using std::sqrt; return sqrt(squaredDistance(p)); }
|
||||
EIGEN_DEVICE_FUNC RealScalar distance(const VectorType& p) const { EIGEN_USING_STD_MATH(sqrt) return sqrt(squaredDistance(p)); }
|
||||
|
||||
/** \returns the projection of a point \a p onto the line \c *this. */
|
||||
VectorType projection(const VectorType& p) const
|
||||
EIGEN_DEVICE_FUNC VectorType projection(const VectorType& p) const
|
||||
{ return origin() + direction().dot(p-origin()) * direction(); }
|
||||
|
||||
VectorType pointAt(const Scalar& t) const;
|
||||
EIGEN_DEVICE_FUNC VectorType pointAt(const Scalar& t) const;
|
||||
|
||||
template <int OtherOptions>
|
||||
Scalar intersectionParameter(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const;
|
||||
EIGEN_DEVICE_FUNC Scalar intersectionParameter(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const;
|
||||
|
||||
template <int OtherOptions>
|
||||
Scalar intersection(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const;
|
||||
EIGEN_DEVICE_FUNC Scalar intersection(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const;
|
||||
|
||||
template <int OtherOptions>
|
||||
VectorType intersectionPoint(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const;
|
||||
EIGEN_DEVICE_FUNC VectorType intersectionPoint(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const;
|
||||
|
||||
/** \returns \c *this with scalar type casted to \a NewScalarType
|
||||
*
|
||||
@@ -110,7 +110,7 @@ public:
|
||||
* then this function smartly returns a const reference to \c *this.
|
||||
*/
|
||||
template<typename NewScalarType>
|
||||
inline typename internal::cast_return_type<ParametrizedLine,
|
||||
EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<ParametrizedLine,
|
||||
ParametrizedLine<NewScalarType,AmbientDimAtCompileTime,Options> >::type cast() const
|
||||
{
|
||||
return typename internal::cast_return_type<ParametrizedLine,
|
||||
@@ -119,7 +119,7 @@ public:
|
||||
|
||||
/** Copy constructor with scalar type conversion */
|
||||
template<typename OtherScalarType,int OtherOptions>
|
||||
inline explicit ParametrizedLine(const ParametrizedLine<OtherScalarType,AmbientDimAtCompileTime,OtherOptions>& other)
|
||||
EIGEN_DEVICE_FUNC inline explicit ParametrizedLine(const ParametrizedLine<OtherScalarType,AmbientDimAtCompileTime,OtherOptions>& other)
|
||||
{
|
||||
m_origin = other.origin().template cast<Scalar>();
|
||||
m_direction = other.direction().template cast<Scalar>();
|
||||
@@ -129,7 +129,7 @@ public:
|
||||
* determined by \a prec.
|
||||
*
|
||||
* \sa MatrixBase::isApprox() */
|
||||
bool isApprox(const ParametrizedLine& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
|
||||
EIGEN_DEVICE_FUNC bool isApprox(const ParametrizedLine& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
|
||||
{ return m_origin.isApprox(other.m_origin, prec) && m_direction.isApprox(other.m_direction, prec); }
|
||||
|
||||
protected:
|
||||
@@ -143,7 +143,7 @@ protected:
|
||||
*/
|
||||
template <typename _Scalar, int _AmbientDim, int _Options>
|
||||
template <int OtherOptions>
|
||||
inline ParametrizedLine<_Scalar, _AmbientDim,_Options>::ParametrizedLine(const Hyperplane<_Scalar, _AmbientDim,OtherOptions>& hyperplane)
|
||||
EIGEN_DEVICE_FUNC inline ParametrizedLine<_Scalar, _AmbientDim,_Options>::ParametrizedLine(const Hyperplane<_Scalar, _AmbientDim,OtherOptions>& hyperplane)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(VectorType, 2)
|
||||
direction() = hyperplane.normal().unitOrthogonal();
|
||||
@@ -153,7 +153,7 @@ inline ParametrizedLine<_Scalar, _AmbientDim,_Options>::ParametrizedLine(const H
|
||||
/** \returns the point at \a t along this line
|
||||
*/
|
||||
template <typename _Scalar, int _AmbientDim, int _Options>
|
||||
inline typename ParametrizedLine<_Scalar, _AmbientDim,_Options>::VectorType
|
||||
EIGEN_DEVICE_FUNC inline typename ParametrizedLine<_Scalar, _AmbientDim,_Options>::VectorType
|
||||
ParametrizedLine<_Scalar, _AmbientDim,_Options>::pointAt(const _Scalar& t) const
|
||||
{
|
||||
return origin() + (direction()*t);
|
||||
@@ -163,7 +163,7 @@ ParametrizedLine<_Scalar, _AmbientDim,_Options>::pointAt(const _Scalar& t) const
|
||||
*/
|
||||
template <typename _Scalar, int _AmbientDim, int _Options>
|
||||
template <int OtherOptions>
|
||||
inline _Scalar ParametrizedLine<_Scalar, _AmbientDim,_Options>::intersectionParameter(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const
|
||||
EIGEN_DEVICE_FUNC inline _Scalar ParametrizedLine<_Scalar, _AmbientDim,_Options>::intersectionParameter(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const
|
||||
{
|
||||
return -(hyperplane.offset()+hyperplane.normal().dot(origin()))
|
||||
/ hyperplane.normal().dot(direction());
|
||||
@@ -175,7 +175,7 @@ inline _Scalar ParametrizedLine<_Scalar, _AmbientDim,_Options>::intersectionPara
|
||||
*/
|
||||
template <typename _Scalar, int _AmbientDim, int _Options>
|
||||
template <int OtherOptions>
|
||||
inline _Scalar ParametrizedLine<_Scalar, _AmbientDim,_Options>::intersection(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const
|
||||
EIGEN_DEVICE_FUNC inline _Scalar ParametrizedLine<_Scalar, _AmbientDim,_Options>::intersection(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const
|
||||
{
|
||||
return intersectionParameter(hyperplane);
|
||||
}
|
||||
@@ -184,7 +184,7 @@ inline _Scalar ParametrizedLine<_Scalar, _AmbientDim,_Options>::intersection(con
|
||||
*/
|
||||
template <typename _Scalar, int _AmbientDim, int _Options>
|
||||
template <int OtherOptions>
|
||||
inline typename ParametrizedLine<_Scalar, _AmbientDim,_Options>::VectorType
|
||||
EIGEN_DEVICE_FUNC inline typename ParametrizedLine<_Scalar, _AmbientDim,_Options>::VectorType
|
||||
ParametrizedLine<_Scalar, _AmbientDim,_Options>::intersectionPoint(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const
|
||||
{
|
||||
return pointAt(intersectionParameter(hyperplane));
|
||||
|
||||
@@ -58,37 +58,37 @@ class QuaternionBase : public RotationBase<Derived, 3>
|
||||
|
||||
|
||||
/** \returns the \c x coefficient */
|
||||
inline Scalar x() const { return this->derived().coeffs().coeff(0); }
|
||||
EIGEN_DEVICE_FUNC inline Scalar x() const { return this->derived().coeffs().coeff(0); }
|
||||
/** \returns the \c y coefficient */
|
||||
inline Scalar y() const { return this->derived().coeffs().coeff(1); }
|
||||
EIGEN_DEVICE_FUNC inline Scalar y() const { return this->derived().coeffs().coeff(1); }
|
||||
/** \returns the \c z coefficient */
|
||||
inline Scalar z() const { return this->derived().coeffs().coeff(2); }
|
||||
EIGEN_DEVICE_FUNC inline Scalar z() const { return this->derived().coeffs().coeff(2); }
|
||||
/** \returns the \c w coefficient */
|
||||
inline Scalar w() const { return this->derived().coeffs().coeff(3); }
|
||||
EIGEN_DEVICE_FUNC inline Scalar w() const { return this->derived().coeffs().coeff(3); }
|
||||
|
||||
/** \returns a reference to the \c x coefficient */
|
||||
inline Scalar& x() { return this->derived().coeffs().coeffRef(0); }
|
||||
EIGEN_DEVICE_FUNC inline Scalar& x() { return this->derived().coeffs().coeffRef(0); }
|
||||
/** \returns a reference to the \c y coefficient */
|
||||
inline Scalar& y() { return this->derived().coeffs().coeffRef(1); }
|
||||
EIGEN_DEVICE_FUNC inline Scalar& y() { return this->derived().coeffs().coeffRef(1); }
|
||||
/** \returns a reference to the \c z coefficient */
|
||||
inline Scalar& z() { return this->derived().coeffs().coeffRef(2); }
|
||||
EIGEN_DEVICE_FUNC inline Scalar& z() { return this->derived().coeffs().coeffRef(2); }
|
||||
/** \returns a reference to the \c w coefficient */
|
||||
inline Scalar& w() { return this->derived().coeffs().coeffRef(3); }
|
||||
EIGEN_DEVICE_FUNC inline Scalar& w() { return this->derived().coeffs().coeffRef(3); }
|
||||
|
||||
/** \returns a read-only vector expression of the imaginary part (x,y,z) */
|
||||
inline const VectorBlock<const Coefficients,3> vec() const { return coeffs().template head<3>(); }
|
||||
EIGEN_DEVICE_FUNC inline const VectorBlock<const Coefficients,3> vec() const { return coeffs().template head<3>(); }
|
||||
|
||||
/** \returns a vector expression of the imaginary part (x,y,z) */
|
||||
inline VectorBlock<Coefficients,3> vec() { return coeffs().template head<3>(); }
|
||||
EIGEN_DEVICE_FUNC inline VectorBlock<Coefficients,3> vec() { return coeffs().template head<3>(); }
|
||||
|
||||
/** \returns a read-only vector expression of the coefficients (x,y,z,w) */
|
||||
inline const typename internal::traits<Derived>::Coefficients& coeffs() const { return derived().coeffs(); }
|
||||
EIGEN_DEVICE_FUNC inline const typename internal::traits<Derived>::Coefficients& coeffs() const { return derived().coeffs(); }
|
||||
|
||||
/** \returns a vector expression of the coefficients (x,y,z,w) */
|
||||
inline typename internal::traits<Derived>::Coefficients& coeffs() { return derived().coeffs(); }
|
||||
EIGEN_DEVICE_FUNC inline typename internal::traits<Derived>::Coefficients& coeffs() { return derived().coeffs(); }
|
||||
|
||||
EIGEN_STRONG_INLINE QuaternionBase<Derived>& operator=(const QuaternionBase<Derived>& other);
|
||||
template<class OtherDerived> EIGEN_STRONG_INLINE Derived& operator=(const QuaternionBase<OtherDerived>& other);
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE QuaternionBase<Derived>& operator=(const QuaternionBase<Derived>& other);
|
||||
template<class OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const QuaternionBase<OtherDerived>& other);
|
||||
|
||||
// disabled this copy operator as it is giving very strange compilation errors when compiling
|
||||
// test_stdvector with GCC 4.4.2. This looks like a GCC bug though, so feel free to re-enable it if it's
|
||||
@@ -97,72 +97,72 @@ class QuaternionBase : public RotationBase<Derived, 3>
|
||||
// Derived& operator=(const QuaternionBase& other)
|
||||
// { return operator=<Derived>(other); }
|
||||
|
||||
Derived& operator=(const AngleAxisType& aa);
|
||||
template<class OtherDerived> Derived& operator=(const MatrixBase<OtherDerived>& m);
|
||||
EIGEN_DEVICE_FUNC Derived& operator=(const AngleAxisType& aa);
|
||||
template<class OtherDerived> EIGEN_DEVICE_FUNC Derived& operator=(const MatrixBase<OtherDerived>& m);
|
||||
|
||||
/** \returns a quaternion representing an identity rotation
|
||||
* \sa MatrixBase::Identity()
|
||||
*/
|
||||
static inline Quaternion<Scalar> Identity() { return Quaternion<Scalar>(Scalar(1), Scalar(0), Scalar(0), Scalar(0)); }
|
||||
EIGEN_DEVICE_FUNC static inline Quaternion<Scalar> Identity() { return Quaternion<Scalar>(Scalar(1), Scalar(0), Scalar(0), Scalar(0)); }
|
||||
|
||||
/** \sa QuaternionBase::Identity(), MatrixBase::setIdentity()
|
||||
*/
|
||||
inline QuaternionBase& setIdentity() { coeffs() << Scalar(0), Scalar(0), Scalar(0), Scalar(1); return *this; }
|
||||
EIGEN_DEVICE_FUNC inline QuaternionBase& setIdentity() { coeffs() << Scalar(0), Scalar(0), Scalar(0), Scalar(1); return *this; }
|
||||
|
||||
/** \returns the squared norm of the quaternion's coefficients
|
||||
* \sa QuaternionBase::norm(), MatrixBase::squaredNorm()
|
||||
*/
|
||||
inline Scalar squaredNorm() const { return coeffs().squaredNorm(); }
|
||||
EIGEN_DEVICE_FUNC inline Scalar squaredNorm() const { return coeffs().squaredNorm(); }
|
||||
|
||||
/** \returns the norm of the quaternion's coefficients
|
||||
* \sa QuaternionBase::squaredNorm(), MatrixBase::norm()
|
||||
*/
|
||||
inline Scalar norm() const { return coeffs().norm(); }
|
||||
EIGEN_DEVICE_FUNC inline Scalar norm() const { return coeffs().norm(); }
|
||||
|
||||
/** Normalizes the quaternion \c *this
|
||||
* \sa normalized(), MatrixBase::normalize() */
|
||||
inline void normalize() { coeffs().normalize(); }
|
||||
EIGEN_DEVICE_FUNC inline void normalize() { coeffs().normalize(); }
|
||||
/** \returns a normalized copy of \c *this
|
||||
* \sa normalize(), MatrixBase::normalized() */
|
||||
inline Quaternion<Scalar> normalized() const { return Quaternion<Scalar>(coeffs().normalized()); }
|
||||
EIGEN_DEVICE_FUNC inline Quaternion<Scalar> normalized() const { return Quaternion<Scalar>(coeffs().normalized()); }
|
||||
|
||||
/** \returns the dot product of \c *this and \a other
|
||||
* Geometrically speaking, the dot product of two unit quaternions
|
||||
* corresponds to the cosine of half the angle between the two rotations.
|
||||
* \sa angularDistance()
|
||||
*/
|
||||
template<class OtherDerived> inline Scalar dot(const QuaternionBase<OtherDerived>& other) const { return coeffs().dot(other.coeffs()); }
|
||||
template<class OtherDerived> EIGEN_DEVICE_FUNC inline Scalar dot(const QuaternionBase<OtherDerived>& other) const { return coeffs().dot(other.coeffs()); }
|
||||
|
||||
template<class OtherDerived> Scalar angularDistance(const QuaternionBase<OtherDerived>& other) const;
|
||||
template<class OtherDerived> EIGEN_DEVICE_FUNC Scalar angularDistance(const QuaternionBase<OtherDerived>& other) const;
|
||||
|
||||
/** \returns an equivalent 3x3 rotation matrix */
|
||||
Matrix3 toRotationMatrix() const;
|
||||
EIGEN_DEVICE_FUNC Matrix3 toRotationMatrix() const;
|
||||
|
||||
/** \returns the quaternion which transform \a a into \a b through a rotation */
|
||||
template<typename Derived1, typename Derived2>
|
||||
Derived& setFromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b);
|
||||
EIGEN_DEVICE_FUNC Derived& setFromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b);
|
||||
|
||||
template<class OtherDerived> EIGEN_STRONG_INLINE Quaternion<Scalar> operator* (const QuaternionBase<OtherDerived>& q) const;
|
||||
template<class OtherDerived> EIGEN_STRONG_INLINE Derived& operator*= (const QuaternionBase<OtherDerived>& q);
|
||||
template<class OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Quaternion<Scalar> operator* (const QuaternionBase<OtherDerived>& q) const;
|
||||
template<class OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator*= (const QuaternionBase<OtherDerived>& q);
|
||||
|
||||
/** \returns the quaternion describing the inverse rotation */
|
||||
Quaternion<Scalar> inverse() const;
|
||||
EIGEN_DEVICE_FUNC Quaternion<Scalar> inverse() const;
|
||||
|
||||
/** \returns the conjugated quaternion */
|
||||
Quaternion<Scalar> conjugate() const;
|
||||
EIGEN_DEVICE_FUNC Quaternion<Scalar> conjugate() const;
|
||||
|
||||
template<class OtherDerived> Quaternion<Scalar> slerp(const Scalar& t, const QuaternionBase<OtherDerived>& other) const;
|
||||
template<class OtherDerived> EIGEN_DEVICE_FUNC Quaternion<Scalar> slerp(const Scalar& t, const QuaternionBase<OtherDerived>& other) const;
|
||||
|
||||
/** \returns \c true if \c *this is approximately equal to \a other, within the precision
|
||||
* determined by \a prec.
|
||||
*
|
||||
* \sa MatrixBase::isApprox() */
|
||||
template<class OtherDerived>
|
||||
bool isApprox(const QuaternionBase<OtherDerived>& other, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const
|
||||
EIGEN_DEVICE_FUNC bool isApprox(const QuaternionBase<OtherDerived>& other, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const
|
||||
{ return coeffs().isApprox(other.coeffs(), prec); }
|
||||
|
||||
/** return the result vector of \a v through the rotation*/
|
||||
EIGEN_STRONG_INLINE Vector3 _transformVector(const Vector3& v) const;
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Vector3 _transformVector(const Vector3& v) const;
|
||||
|
||||
/** \returns \c *this with scalar type casted to \a NewScalarType
|
||||
*
|
||||
@@ -170,7 +170,7 @@ class QuaternionBase : public RotationBase<Derived, 3>
|
||||
* then this function smartly returns a const reference to \c *this.
|
||||
*/
|
||||
template<typename NewScalarType>
|
||||
inline typename internal::cast_return_type<Derived,Quaternion<NewScalarType> >::type cast() const
|
||||
EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<Derived,Quaternion<NewScalarType> >::type cast() const
|
||||
{
|
||||
return typename internal::cast_return_type<Derived,Quaternion<NewScalarType> >::type(derived());
|
||||
}
|
||||
@@ -239,7 +239,7 @@ public:
|
||||
typedef typename Base::AngleAxisType AngleAxisType;
|
||||
|
||||
/** Default constructor leaving the quaternion uninitialized. */
|
||||
inline Quaternion() {}
|
||||
EIGEN_DEVICE_FUNC inline Quaternion() {}
|
||||
|
||||
/** Constructs and initializes the quaternion \f$ w+xi+yj+zk \f$ from
|
||||
* its four coefficients \a w, \a x, \a y and \a z.
|
||||
@@ -248,36 +248,36 @@ public:
|
||||
* while internally the coefficients are stored in the following order:
|
||||
* [\c x, \c y, \c z, \c w]
|
||||
*/
|
||||
inline Quaternion(const Scalar& w, const Scalar& x, const Scalar& y, const Scalar& z) : m_coeffs(x, y, z, w){}
|
||||
EIGEN_DEVICE_FUNC inline Quaternion(const Scalar& w, const Scalar& x, const Scalar& y, const Scalar& z) : m_coeffs(x, y, z, w){}
|
||||
|
||||
/** Constructs and initialize a quaternion from the array data */
|
||||
explicit inline Quaternion(const Scalar* data) : m_coeffs(data) {}
|
||||
EIGEN_DEVICE_FUNC explicit inline Quaternion(const Scalar* data) : m_coeffs(data) {}
|
||||
|
||||
/** Copy constructor */
|
||||
template<class Derived> EIGEN_STRONG_INLINE Quaternion(const QuaternionBase<Derived>& other) { this->Base::operator=(other); }
|
||||
template<class Derived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Quaternion(const QuaternionBase<Derived>& other) { this->Base::operator=(other); }
|
||||
|
||||
/** Constructs and initializes a quaternion from the angle-axis \a aa */
|
||||
explicit inline Quaternion(const AngleAxisType& aa) { *this = aa; }
|
||||
EIGEN_DEVICE_FUNC explicit inline Quaternion(const AngleAxisType& aa) { *this = aa; }
|
||||
|
||||
/** Constructs and initializes a quaternion from either:
|
||||
* - a rotation matrix expression,
|
||||
* - a 4D vector expression representing quaternion coefficients.
|
||||
*/
|
||||
template<typename Derived>
|
||||
explicit inline Quaternion(const MatrixBase<Derived>& other) { *this = other; }
|
||||
EIGEN_DEVICE_FUNC explicit inline Quaternion(const MatrixBase<Derived>& other) { *this = other; }
|
||||
|
||||
/** Explicit copy constructor with scalar conversion */
|
||||
template<typename OtherScalar, int OtherOptions>
|
||||
explicit inline Quaternion(const Quaternion<OtherScalar, OtherOptions>& other)
|
||||
EIGEN_DEVICE_FUNC explicit inline Quaternion(const Quaternion<OtherScalar, OtherOptions>& other)
|
||||
{ m_coeffs = other.coeffs().template cast<Scalar>(); }
|
||||
|
||||
static Quaternion UnitRandom();
|
||||
EIGEN_DEVICE_FUNC static Quaternion UnitRandom();
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
static Quaternion FromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b);
|
||||
EIGEN_DEVICE_FUNC static Quaternion FromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b);
|
||||
|
||||
inline Coefficients& coeffs() { return m_coeffs;}
|
||||
inline const Coefficients& coeffs() const { return m_coeffs;}
|
||||
EIGEN_DEVICE_FUNC inline Coefficients& coeffs() { return m_coeffs;}
|
||||
EIGEN_DEVICE_FUNC inline const Coefficients& coeffs() const { return m_coeffs;}
|
||||
|
||||
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(NeedsAlignment))
|
||||
|
||||
@@ -357,9 +357,9 @@ class Map<const Quaternion<_Scalar>, _Options >
|
||||
* \code *coeffs == {x, y, z, w} \endcode
|
||||
*
|
||||
* If the template parameter _Options is set to #Aligned, then the pointer coeffs must be aligned. */
|
||||
explicit EIGEN_STRONG_INLINE Map(const Scalar* coeffs) : m_coeffs(coeffs) {}
|
||||
EIGEN_DEVICE_FUNC explicit EIGEN_STRONG_INLINE Map(const Scalar* coeffs) : m_coeffs(coeffs) {}
|
||||
|
||||
inline const Coefficients& coeffs() const { return m_coeffs;}
|
||||
EIGEN_DEVICE_FUNC inline const Coefficients& coeffs() const { return m_coeffs;}
|
||||
|
||||
protected:
|
||||
const Coefficients m_coeffs;
|
||||
@@ -394,10 +394,10 @@ class Map<Quaternion<_Scalar>, _Options >
|
||||
* \code *coeffs == {x, y, z, w} \endcode
|
||||
*
|
||||
* If the template parameter _Options is set to #Aligned, then the pointer coeffs must be aligned. */
|
||||
explicit EIGEN_STRONG_INLINE Map(Scalar* coeffs) : m_coeffs(coeffs) {}
|
||||
EIGEN_DEVICE_FUNC explicit EIGEN_STRONG_INLINE Map(Scalar* coeffs) : m_coeffs(coeffs) {}
|
||||
|
||||
inline Coefficients& coeffs() { return m_coeffs; }
|
||||
inline const Coefficients& coeffs() const { return m_coeffs; }
|
||||
EIGEN_DEVICE_FUNC inline Coefficients& coeffs() { return m_coeffs; }
|
||||
EIGEN_DEVICE_FUNC inline const Coefficients& coeffs() const { return m_coeffs; }
|
||||
|
||||
protected:
|
||||
Coefficients m_coeffs;
|
||||
@@ -423,9 +423,9 @@ typedef Map<Quaternion<double>, Aligned> QuaternionMapAlignedd;
|
||||
// Generic Quaternion * Quaternion product
|
||||
// This product can be specialized for a given architecture via the Arch template argument.
|
||||
namespace internal {
|
||||
template<int Arch, class Derived1, class Derived2, typename Scalar, int _Options> struct quat_product
|
||||
template<int Arch, class Derived1, class Derived2, typename Scalar> struct quat_product
|
||||
{
|
||||
static EIGEN_STRONG_INLINE Quaternion<Scalar> run(const QuaternionBase<Derived1>& a, const QuaternionBase<Derived2>& b){
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Quaternion<Scalar> run(const QuaternionBase<Derived1>& a, const QuaternionBase<Derived2>& b){
|
||||
return Quaternion<Scalar>
|
||||
(
|
||||
a.w() * b.w() - a.x() * b.x() - a.y() * b.y() - a.z() * b.z(),
|
||||
@@ -440,20 +440,19 @@ template<int Arch, class Derived1, class Derived2, typename Scalar, int _Options
|
||||
/** \returns the concatenation of two rotations as a quaternion-quaternion product */
|
||||
template <class Derived>
|
||||
template <class OtherDerived>
|
||||
EIGEN_STRONG_INLINE Quaternion<typename internal::traits<Derived>::Scalar>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Quaternion<typename internal::traits<Derived>::Scalar>
|
||||
QuaternionBase<Derived>::operator* (const QuaternionBase<OtherDerived>& other) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<typename Derived::Scalar, typename OtherDerived::Scalar>::value),
|
||||
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
|
||||
return internal::quat_product<Architecture::Target, Derived, OtherDerived,
|
||||
typename internal::traits<Derived>::Scalar,
|
||||
EIGEN_PLAIN_ENUM_MIN(internal::traits<Derived>::Alignment, internal::traits<OtherDerived>::Alignment)>::run(*this, other);
|
||||
typename internal::traits<Derived>::Scalar>::run(*this, other);
|
||||
}
|
||||
|
||||
/** \sa operator*(Quaternion) */
|
||||
template <class Derived>
|
||||
template <class OtherDerived>
|
||||
EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator*= (const QuaternionBase<OtherDerived>& other)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator*= (const QuaternionBase<OtherDerived>& other)
|
||||
{
|
||||
derived() = derived() * other.derived();
|
||||
return derived();
|
||||
@@ -467,7 +466,7 @@ EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator*= (const Quaterni
|
||||
* - Via a Matrix3: 24 + 15n
|
||||
*/
|
||||
template <class Derived>
|
||||
EIGEN_STRONG_INLINE typename QuaternionBase<Derived>::Vector3
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename QuaternionBase<Derived>::Vector3
|
||||
QuaternionBase<Derived>::_transformVector(const Vector3& v) const
|
||||
{
|
||||
// Note that this algorithm comes from the optimization by hand
|
||||
@@ -481,7 +480,7 @@ QuaternionBase<Derived>::_transformVector(const Vector3& v) const
|
||||
}
|
||||
|
||||
template<class Derived>
|
||||
EIGEN_STRONG_INLINE QuaternionBase<Derived>& QuaternionBase<Derived>::operator=(const QuaternionBase<Derived>& other)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE QuaternionBase<Derived>& QuaternionBase<Derived>::operator=(const QuaternionBase<Derived>& other)
|
||||
{
|
||||
coeffs() = other.coeffs();
|
||||
return derived();
|
||||
@@ -489,7 +488,7 @@ EIGEN_STRONG_INLINE QuaternionBase<Derived>& QuaternionBase<Derived>::operator=(
|
||||
|
||||
template<class Derived>
|
||||
template<class OtherDerived>
|
||||
EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator=(const QuaternionBase<OtherDerived>& other)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator=(const QuaternionBase<OtherDerived>& other)
|
||||
{
|
||||
coeffs() = other.coeffs();
|
||||
return derived();
|
||||
@@ -498,10 +497,10 @@ EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator=(const Quaternion
|
||||
/** Set \c *this from an angle-axis \a aa and returns a reference to \c *this
|
||||
*/
|
||||
template<class Derived>
|
||||
EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator=(const AngleAxisType& aa)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator=(const AngleAxisType& aa)
|
||||
{
|
||||
using std::cos;
|
||||
using std::sin;
|
||||
EIGEN_USING_STD_MATH(cos)
|
||||
EIGEN_USING_STD_MATH(sin)
|
||||
Scalar ha = Scalar(0.5)*aa.angle(); // Scalar(0.5) to suppress precision loss warnings
|
||||
this->w() = cos(ha);
|
||||
this->vec() = sin(ha) * aa.axis();
|
||||
@@ -516,7 +515,7 @@ EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator=(const AngleAxisT
|
||||
|
||||
template<class Derived>
|
||||
template<class MatrixDerived>
|
||||
inline Derived& QuaternionBase<Derived>::operator=(const MatrixBase<MatrixDerived>& xpr)
|
||||
EIGEN_DEVICE_FUNC inline Derived& QuaternionBase<Derived>::operator=(const MatrixBase<MatrixDerived>& xpr)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<typename Derived::Scalar, typename MatrixDerived::Scalar>::value),
|
||||
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
|
||||
@@ -528,7 +527,7 @@ inline Derived& QuaternionBase<Derived>::operator=(const MatrixBase<MatrixDerive
|
||||
* be normalized, otherwise the result is undefined.
|
||||
*/
|
||||
template<class Derived>
|
||||
inline typename QuaternionBase<Derived>::Matrix3
|
||||
EIGEN_DEVICE_FUNC inline typename QuaternionBase<Derived>::Matrix3
|
||||
QuaternionBase<Derived>::toRotationMatrix(void) const
|
||||
{
|
||||
// NOTE if inlined, then gcc 4.2 and 4.4 get rid of the temporary (not gcc 4.3 !!)
|
||||
@@ -575,9 +574,9 @@ QuaternionBase<Derived>::toRotationMatrix(void) const
|
||||
*/
|
||||
template<class Derived>
|
||||
template<typename Derived1, typename Derived2>
|
||||
inline Derived& QuaternionBase<Derived>::setFromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b)
|
||||
EIGEN_DEVICE_FUNC inline Derived& QuaternionBase<Derived>::setFromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b)
|
||||
{
|
||||
using std::sqrt;
|
||||
EIGEN_USING_STD_MATH(sqrt)
|
||||
Vector3 v0 = a.normalized();
|
||||
Vector3 v1 = b.normalized();
|
||||
Scalar c = v1.dot(v0);
|
||||
@@ -616,11 +615,11 @@ inline Derived& QuaternionBase<Derived>::setFromTwoVectors(const MatrixBase<Deri
|
||||
* \note The implementation is based on http://planning.cs.uiuc.edu/node198.html
|
||||
*/
|
||||
template<typename Scalar, int Options>
|
||||
Quaternion<Scalar,Options> Quaternion<Scalar,Options>::UnitRandom()
|
||||
EIGEN_DEVICE_FUNC Quaternion<Scalar,Options> Quaternion<Scalar,Options>::UnitRandom()
|
||||
{
|
||||
using std::sqrt;
|
||||
using std::sin;
|
||||
using std::cos;
|
||||
EIGEN_USING_STD_MATH(sqrt)
|
||||
EIGEN_USING_STD_MATH(sin)
|
||||
EIGEN_USING_STD_MATH(cos)
|
||||
const Scalar u1 = internal::random<Scalar>(0, 1),
|
||||
u2 = internal::random<Scalar>(0, 2*EIGEN_PI),
|
||||
u3 = internal::random<Scalar>(0, 2*EIGEN_PI);
|
||||
@@ -642,7 +641,7 @@ Quaternion<Scalar,Options> Quaternion<Scalar,Options>::UnitRandom()
|
||||
*/
|
||||
template<typename Scalar, int Options>
|
||||
template<typename Derived1, typename Derived2>
|
||||
Quaternion<Scalar,Options> Quaternion<Scalar,Options>::FromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b)
|
||||
EIGEN_DEVICE_FUNC Quaternion<Scalar,Options> Quaternion<Scalar,Options>::FromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b)
|
||||
{
|
||||
Quaternion quat;
|
||||
quat.setFromTwoVectors(a, b);
|
||||
@@ -657,7 +656,7 @@ Quaternion<Scalar,Options> Quaternion<Scalar,Options>::FromTwoVectors(const Matr
|
||||
* \sa QuaternionBase::conjugate()
|
||||
*/
|
||||
template <class Derived>
|
||||
inline Quaternion<typename internal::traits<Derived>::Scalar> QuaternionBase<Derived>::inverse() const
|
||||
EIGEN_DEVICE_FUNC inline Quaternion<typename internal::traits<Derived>::Scalar> QuaternionBase<Derived>::inverse() const
|
||||
{
|
||||
// FIXME should this function be called multiplicativeInverse and conjugate() be called inverse() or opposite() ??
|
||||
Scalar n2 = this->squaredNorm();
|
||||
@@ -672,9 +671,9 @@ inline Quaternion<typename internal::traits<Derived>::Scalar> QuaternionBase<Der
|
||||
|
||||
// Generic conjugate of a Quaternion
|
||||
namespace internal {
|
||||
template<int Arch, class Derived, typename Scalar, int _Options> struct quat_conj
|
||||
template<int Arch, class Derived, typename Scalar> struct quat_conj
|
||||
{
|
||||
static EIGEN_STRONG_INLINE Quaternion<Scalar> run(const QuaternionBase<Derived>& q){
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Quaternion<Scalar> run(const QuaternionBase<Derived>& q){
|
||||
return Quaternion<Scalar>(q.w(),-q.x(),-q.y(),-q.z());
|
||||
}
|
||||
};
|
||||
@@ -687,12 +686,11 @@ template<int Arch, class Derived, typename Scalar, int _Options> struct quat_con
|
||||
* \sa Quaternion2::inverse()
|
||||
*/
|
||||
template <class Derived>
|
||||
inline Quaternion<typename internal::traits<Derived>::Scalar>
|
||||
EIGEN_DEVICE_FUNC inline Quaternion<typename internal::traits<Derived>::Scalar>
|
||||
QuaternionBase<Derived>::conjugate() const
|
||||
{
|
||||
return internal::quat_conj<Architecture::Target, Derived,
|
||||
typename internal::traits<Derived>::Scalar,
|
||||
internal::traits<Derived>::Alignment>::run(*this);
|
||||
typename internal::traits<Derived>::Scalar>::run(*this);
|
||||
|
||||
}
|
||||
|
||||
@@ -701,13 +699,12 @@ QuaternionBase<Derived>::conjugate() const
|
||||
*/
|
||||
template <class Derived>
|
||||
template <class OtherDerived>
|
||||
inline typename internal::traits<Derived>::Scalar
|
||||
EIGEN_DEVICE_FUNC inline typename internal::traits<Derived>::Scalar
|
||||
QuaternionBase<Derived>::angularDistance(const QuaternionBase<OtherDerived>& other) const
|
||||
{
|
||||
using std::atan2;
|
||||
using std::abs;
|
||||
EIGEN_USING_STD_MATH(atan2)
|
||||
Quaternion<Scalar> d = (*this) * other.conjugate();
|
||||
return Scalar(2) * atan2( d.vec().norm(), abs(d.w()) );
|
||||
return Scalar(2) * atan2( d.vec().norm(), numext::abs(d.w()) );
|
||||
}
|
||||
|
||||
|
||||
@@ -720,15 +717,14 @@ QuaternionBase<Derived>::angularDistance(const QuaternionBase<OtherDerived>& oth
|
||||
*/
|
||||
template <class Derived>
|
||||
template <class OtherDerived>
|
||||
Quaternion<typename internal::traits<Derived>::Scalar>
|
||||
EIGEN_DEVICE_FUNC Quaternion<typename internal::traits<Derived>::Scalar>
|
||||
QuaternionBase<Derived>::slerp(const Scalar& t, const QuaternionBase<OtherDerived>& other) const
|
||||
{
|
||||
using std::acos;
|
||||
using std::sin;
|
||||
using std::abs;
|
||||
EIGEN_USING_STD_MATH(acos)
|
||||
EIGEN_USING_STD_MATH(sin)
|
||||
const Scalar one = Scalar(1) - NumTraits<Scalar>::epsilon();
|
||||
Scalar d = this->dot(other);
|
||||
Scalar absD = abs(d);
|
||||
Scalar absD = numext::abs(d);
|
||||
|
||||
Scalar scale0;
|
||||
Scalar scale1;
|
||||
@@ -759,10 +755,10 @@ template<typename Other>
|
||||
struct quaternionbase_assign_impl<Other,3,3>
|
||||
{
|
||||
typedef typename Other::Scalar Scalar;
|
||||
template<class Derived> static inline void run(QuaternionBase<Derived>& q, const Other& a_mat)
|
||||
template<class Derived> EIGEN_DEVICE_FUNC static inline void run(QuaternionBase<Derived>& q, const Other& a_mat)
|
||||
{
|
||||
const typename internal::nested_eval<Other,2>::type mat(a_mat);
|
||||
using std::sqrt;
|
||||
EIGEN_USING_STD_MATH(sqrt)
|
||||
// This algorithm comes from "Quaternion Calculus and Fast Animation",
|
||||
// Ken Shoemake, 1987 SIGGRAPH course notes
|
||||
Scalar t = mat.trace();
|
||||
@@ -800,7 +796,7 @@ template<typename Other>
|
||||
struct quaternionbase_assign_impl<Other,4,1>
|
||||
{
|
||||
typedef typename Other::Scalar Scalar;
|
||||
template<class Derived> static inline void run(QuaternionBase<Derived>& q, const Other& vec)
|
||||
template<class Derived> EIGEN_DEVICE_FUNC static inline void run(QuaternionBase<Derived>& q, const Other& vec)
|
||||
{
|
||||
q.coeffs() = vec;
|
||||
}
|
||||
|
||||
@@ -59,35 +59,35 @@ protected:
|
||||
public:
|
||||
|
||||
/** Construct a 2D counter clock wise rotation from the angle \a a in radian. */
|
||||
explicit inline Rotation2D(const Scalar& a) : m_angle(a) {}
|
||||
EIGEN_DEVICE_FUNC explicit inline Rotation2D(const Scalar& a) : m_angle(a) {}
|
||||
|
||||
/** Default constructor wihtout initialization. The represented rotation is undefined. */
|
||||
Rotation2D() {}
|
||||
EIGEN_DEVICE_FUNC Rotation2D() {}
|
||||
|
||||
/** Construct a 2D rotation from a 2x2 rotation matrix \a mat.
|
||||
*
|
||||
* \sa fromRotationMatrix()
|
||||
*/
|
||||
template<typename Derived>
|
||||
explicit Rotation2D(const MatrixBase<Derived>& m)
|
||||
EIGEN_DEVICE_FUNC explicit Rotation2D(const MatrixBase<Derived>& m)
|
||||
{
|
||||
fromRotationMatrix(m.derived());
|
||||
}
|
||||
|
||||
/** \returns the rotation angle */
|
||||
inline Scalar angle() const { return m_angle; }
|
||||
EIGEN_DEVICE_FUNC inline Scalar angle() const { return m_angle; }
|
||||
|
||||
/** \returns a read-write reference to the rotation angle */
|
||||
inline Scalar& angle() { return m_angle; }
|
||||
EIGEN_DEVICE_FUNC inline Scalar& angle() { return m_angle; }
|
||||
|
||||
/** \returns the rotation angle in [0,2pi] */
|
||||
inline Scalar smallestPositiveAngle() const {
|
||||
EIGEN_DEVICE_FUNC inline Scalar smallestPositiveAngle() const {
|
||||
Scalar tmp = numext::fmod(m_angle,Scalar(2*EIGEN_PI));
|
||||
return tmp<Scalar(0) ? tmp + Scalar(2*EIGEN_PI) : tmp;
|
||||
}
|
||||
|
||||
/** \returns the rotation angle in [-pi,pi] */
|
||||
inline Scalar smallestAngle() const {
|
||||
EIGEN_DEVICE_FUNC inline Scalar smallestAngle() const {
|
||||
Scalar tmp = numext::fmod(m_angle,Scalar(2*EIGEN_PI));
|
||||
if(tmp>Scalar(EIGEN_PI)) tmp -= Scalar(2*EIGEN_PI);
|
||||
else if(tmp<-Scalar(EIGEN_PI)) tmp += Scalar(2*EIGEN_PI);
|
||||
@@ -95,23 +95,23 @@ public:
|
||||
}
|
||||
|
||||
/** \returns the inverse rotation */
|
||||
inline Rotation2D inverse() const { return Rotation2D(-m_angle); }
|
||||
EIGEN_DEVICE_FUNC inline Rotation2D inverse() const { return Rotation2D(-m_angle); }
|
||||
|
||||
/** Concatenates two rotations */
|
||||
inline Rotation2D operator*(const Rotation2D& other) const
|
||||
EIGEN_DEVICE_FUNC inline Rotation2D operator*(const Rotation2D& other) const
|
||||
{ return Rotation2D(m_angle + other.m_angle); }
|
||||
|
||||
/** Concatenates two rotations */
|
||||
inline Rotation2D& operator*=(const Rotation2D& other)
|
||||
EIGEN_DEVICE_FUNC inline Rotation2D& operator*=(const Rotation2D& other)
|
||||
{ m_angle += other.m_angle; return *this; }
|
||||
|
||||
/** Applies the rotation to a 2D vector */
|
||||
Vector2 operator* (const Vector2& vec) const
|
||||
EIGEN_DEVICE_FUNC Vector2 operator* (const Vector2& vec) const
|
||||
{ return toRotationMatrix() * vec; }
|
||||
|
||||
template<typename Derived>
|
||||
Rotation2D& fromRotationMatrix(const MatrixBase<Derived>& m);
|
||||
Matrix2 toRotationMatrix() const;
|
||||
EIGEN_DEVICE_FUNC Rotation2D& fromRotationMatrix(const MatrixBase<Derived>& m);
|
||||
EIGEN_DEVICE_FUNC Matrix2 toRotationMatrix() const;
|
||||
|
||||
/** Set \c *this from a 2x2 rotation matrix \a mat.
|
||||
* In other words, this function extract the rotation angle from the rotation matrix.
|
||||
@@ -121,13 +121,13 @@ public:
|
||||
* \sa fromRotationMatrix()
|
||||
*/
|
||||
template<typename Derived>
|
||||
Rotation2D& operator=(const MatrixBase<Derived>& m)
|
||||
EIGEN_DEVICE_FUNC Rotation2D& operator=(const MatrixBase<Derived>& m)
|
||||
{ return fromRotationMatrix(m.derived()); }
|
||||
|
||||
/** \returns the spherical interpolation between \c *this and \a other using
|
||||
* parameter \a t. It is in fact equivalent to a linear interpolation.
|
||||
*/
|
||||
inline Rotation2D slerp(const Scalar& t, const Rotation2D& other) const
|
||||
EIGEN_DEVICE_FUNC inline Rotation2D slerp(const Scalar& t, const Rotation2D& other) const
|
||||
{
|
||||
Scalar dist = Rotation2D(other.m_angle-m_angle).smallestAngle();
|
||||
return Rotation2D(m_angle + dist*t);
|
||||
@@ -139,23 +139,23 @@ public:
|
||||
* then this function smartly returns a const reference to \c *this.
|
||||
*/
|
||||
template<typename NewScalarType>
|
||||
inline typename internal::cast_return_type<Rotation2D,Rotation2D<NewScalarType> >::type cast() const
|
||||
EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<Rotation2D,Rotation2D<NewScalarType> >::type cast() const
|
||||
{ return typename internal::cast_return_type<Rotation2D,Rotation2D<NewScalarType> >::type(*this); }
|
||||
|
||||
/** Copy constructor with scalar type conversion */
|
||||
template<typename OtherScalarType>
|
||||
inline explicit Rotation2D(const Rotation2D<OtherScalarType>& other)
|
||||
EIGEN_DEVICE_FUNC inline explicit Rotation2D(const Rotation2D<OtherScalarType>& other)
|
||||
{
|
||||
m_angle = Scalar(other.angle());
|
||||
}
|
||||
|
||||
static inline Rotation2D Identity() { return Rotation2D(0); }
|
||||
EIGEN_DEVICE_FUNC static inline Rotation2D Identity() { return Rotation2D(0); }
|
||||
|
||||
/** \returns \c true if \c *this is approximately equal to \a other, within the precision
|
||||
* determined by \a prec.
|
||||
*
|
||||
* \sa MatrixBase::isApprox() */
|
||||
bool isApprox(const Rotation2D& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
|
||||
EIGEN_DEVICE_FUNC bool isApprox(const Rotation2D& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
|
||||
{ return internal::isApprox(m_angle,other.m_angle, prec); }
|
||||
|
||||
};
|
||||
@@ -173,9 +173,9 @@ typedef Rotation2D<double> Rotation2Dd;
|
||||
*/
|
||||
template<typename Scalar>
|
||||
template<typename Derived>
|
||||
Rotation2D<Scalar>& Rotation2D<Scalar>::fromRotationMatrix(const MatrixBase<Derived>& mat)
|
||||
EIGEN_DEVICE_FUNC Rotation2D<Scalar>& Rotation2D<Scalar>::fromRotationMatrix(const MatrixBase<Derived>& mat)
|
||||
{
|
||||
using std::atan2;
|
||||
EIGEN_USING_STD_MATH(atan2)
|
||||
EIGEN_STATIC_ASSERT(Derived::RowsAtCompileTime==2 && Derived::ColsAtCompileTime==2,YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
m_angle = atan2(mat.coeff(1,0), mat.coeff(0,0));
|
||||
return *this;
|
||||
@@ -185,10 +185,10 @@ Rotation2D<Scalar>& Rotation2D<Scalar>::fromRotationMatrix(const MatrixBase<Deri
|
||||
*/
|
||||
template<typename Scalar>
|
||||
typename Rotation2D<Scalar>::Matrix2
|
||||
Rotation2D<Scalar>::toRotationMatrix(void) const
|
||||
EIGEN_DEVICE_FUNC Rotation2D<Scalar>::toRotationMatrix(void) const
|
||||
{
|
||||
using std::sin;
|
||||
using std::cos;
|
||||
EIGEN_USING_STD_MATH(sin)
|
||||
EIGEN_USING_STD_MATH(cos)
|
||||
Scalar sinA = sin(m_angle);
|
||||
Scalar cosA = cos(m_angle);
|
||||
return (Matrix2() << cosA, -sinA, sinA, cosA).finished();
|
||||
|
||||
@@ -38,26 +38,26 @@ class RotationBase
|
||||
typedef Matrix<Scalar,Dim,1> VectorType;
|
||||
|
||||
public:
|
||||
inline const Derived& derived() const { return *static_cast<const Derived*>(this); }
|
||||
inline Derived& derived() { return *static_cast<Derived*>(this); }
|
||||
EIGEN_DEVICE_FUNC inline const Derived& derived() const { return *static_cast<const Derived*>(this); }
|
||||
EIGEN_DEVICE_FUNC inline Derived& derived() { return *static_cast<Derived*>(this); }
|
||||
|
||||
/** \returns an equivalent rotation matrix */
|
||||
inline RotationMatrixType toRotationMatrix() const { return derived().toRotationMatrix(); }
|
||||
EIGEN_DEVICE_FUNC inline RotationMatrixType toRotationMatrix() const { return derived().toRotationMatrix(); }
|
||||
|
||||
/** \returns an equivalent rotation matrix
|
||||
* This function is added to be conform with the Transform class' naming scheme.
|
||||
*/
|
||||
inline RotationMatrixType matrix() const { return derived().toRotationMatrix(); }
|
||||
EIGEN_DEVICE_FUNC inline RotationMatrixType matrix() const { return derived().toRotationMatrix(); }
|
||||
|
||||
/** \returns the inverse rotation */
|
||||
inline Derived inverse() const { return derived().inverse(); }
|
||||
EIGEN_DEVICE_FUNC inline Derived inverse() const { return derived().inverse(); }
|
||||
|
||||
/** \returns the concatenation of the rotation \c *this with a translation \a t */
|
||||
inline Transform<Scalar,Dim,Isometry> operator*(const Translation<Scalar,Dim>& t) const
|
||||
EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Isometry> operator*(const Translation<Scalar,Dim>& t) const
|
||||
{ return Transform<Scalar,Dim,Isometry>(*this) * t; }
|
||||
|
||||
/** \returns the concatenation of the rotation \c *this with a uniform scaling \a s */
|
||||
inline RotationMatrixType operator*(const UniformScaling<Scalar>& s) const
|
||||
EIGEN_DEVICE_FUNC inline RotationMatrixType operator*(const UniformScaling<Scalar>& s) const
|
||||
{ return toRotationMatrix() * s.factor(); }
|
||||
|
||||
/** \returns the concatenation of the rotation \c *this with a generic expression \a e
|
||||
@@ -67,17 +67,17 @@ class RotationBase
|
||||
* - a vector of size Dim
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_STRONG_INLINE typename internal::rotation_base_generic_product_selector<Derived,OtherDerived,OtherDerived::IsVectorAtCompileTime>::ReturnType
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::rotation_base_generic_product_selector<Derived,OtherDerived,OtherDerived::IsVectorAtCompileTime>::ReturnType
|
||||
operator*(const EigenBase<OtherDerived>& e) const
|
||||
{ return internal::rotation_base_generic_product_selector<Derived,OtherDerived>::run(derived(), e.derived()); }
|
||||
|
||||
/** \returns the concatenation of a linear transformation \a l with the rotation \a r */
|
||||
template<typename OtherDerived> friend
|
||||
inline RotationMatrixType operator*(const EigenBase<OtherDerived>& l, const Derived& r)
|
||||
EIGEN_DEVICE_FUNC inline RotationMatrixType operator*(const EigenBase<OtherDerived>& l, const Derived& r)
|
||||
{ return l.derived() * r.toRotationMatrix(); }
|
||||
|
||||
/** \returns the concatenation of a scaling \a l with the rotation \a r */
|
||||
friend inline Transform<Scalar,Dim,Affine> operator*(const DiagonalMatrix<Scalar,Dim>& l, const Derived& r)
|
||||
EIGEN_DEVICE_FUNC friend inline Transform<Scalar,Dim,Affine> operator*(const DiagonalMatrix<Scalar,Dim>& l, const Derived& r)
|
||||
{
|
||||
Transform<Scalar,Dim,Affine> res(r);
|
||||
res.linear().applyOnTheLeft(l);
|
||||
@@ -86,11 +86,11 @@ class RotationBase
|
||||
|
||||
/** \returns the concatenation of the rotation \c *this with a transformation \a t */
|
||||
template<int Mode, int Options>
|
||||
inline Transform<Scalar,Dim,Mode> operator*(const Transform<Scalar,Dim,Mode,Options>& t) const
|
||||
EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode> operator*(const Transform<Scalar,Dim,Mode,Options>& t) const
|
||||
{ return toRotationMatrix() * t; }
|
||||
|
||||
template<typename OtherVectorType>
|
||||
inline VectorType _transformVector(const OtherVectorType& v) const
|
||||
EIGEN_DEVICE_FUNC inline VectorType _transformVector(const OtherVectorType& v) const
|
||||
{ return toRotationMatrix() * v; }
|
||||
};
|
||||
|
||||
@@ -102,7 +102,7 @@ struct rotation_base_generic_product_selector<RotationDerived,MatrixType,false>
|
||||
{
|
||||
enum { Dim = RotationDerived::Dim };
|
||||
typedef Matrix<typename RotationDerived::Scalar,Dim,Dim> ReturnType;
|
||||
static inline ReturnType run(const RotationDerived& r, const MatrixType& m)
|
||||
EIGEN_DEVICE_FUNC static inline ReturnType run(const RotationDerived& r, const MatrixType& m)
|
||||
{ return r.toRotationMatrix() * m; }
|
||||
};
|
||||
|
||||
@@ -110,7 +110,7 @@ template<typename RotationDerived, typename Scalar, int Dim, int MaxDim>
|
||||
struct rotation_base_generic_product_selector< RotationDerived, DiagonalMatrix<Scalar,Dim,MaxDim>, false >
|
||||
{
|
||||
typedef Transform<Scalar,Dim,Affine> ReturnType;
|
||||
static inline ReturnType run(const RotationDerived& r, const DiagonalMatrix<Scalar,Dim,MaxDim>& m)
|
||||
EIGEN_DEVICE_FUNC static inline ReturnType run(const RotationDerived& r, const DiagonalMatrix<Scalar,Dim,MaxDim>& m)
|
||||
{
|
||||
ReturnType res(r);
|
||||
res.linear() *= m;
|
||||
@@ -123,7 +123,7 @@ struct rotation_base_generic_product_selector<RotationDerived,OtherVectorType,tr
|
||||
{
|
||||
enum { Dim = RotationDerived::Dim };
|
||||
typedef Matrix<typename RotationDerived::Scalar,Dim,1> ReturnType;
|
||||
static EIGEN_STRONG_INLINE ReturnType run(const RotationDerived& r, const OtherVectorType& v)
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE ReturnType run(const RotationDerived& r, const OtherVectorType& v)
|
||||
{
|
||||
return r._transformVector(v);
|
||||
}
|
||||
@@ -137,7 +137,7 @@ struct rotation_base_generic_product_selector<RotationDerived,OtherVectorType,tr
|
||||
*/
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Storage, int _MaxRows, int _MaxCols>
|
||||
template<typename OtherDerived>
|
||||
Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols>
|
||||
EIGEN_DEVICE_FUNC Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols>
|
||||
::Matrix(const RotationBase<OtherDerived,ColsAtCompileTime>& r)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(Matrix,int(OtherDerived::Dim),int(OtherDerived::Dim))
|
||||
@@ -150,7 +150,7 @@ Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols>
|
||||
*/
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Storage, int _MaxRows, int _MaxCols>
|
||||
template<typename OtherDerived>
|
||||
Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols>&
|
||||
EIGEN_DEVICE_FUNC Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols>&
|
||||
Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols>
|
||||
::operator=(const RotationBase<OtherDerived,ColsAtCompileTime>& r)
|
||||
{
|
||||
@@ -179,20 +179,20 @@ namespace internal {
|
||||
* \sa class Transform, class Rotation2D, class Quaternion, class AngleAxis
|
||||
*/
|
||||
template<typename Scalar, int Dim>
|
||||
static inline Matrix<Scalar,2,2> toRotationMatrix(const Scalar& s)
|
||||
EIGEN_DEVICE_FUNC static inline Matrix<Scalar,2,2> toRotationMatrix(const Scalar& s)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(Dim==2,YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
return Rotation2D<Scalar>(s).toRotationMatrix();
|
||||
}
|
||||
|
||||
template<typename Scalar, int Dim, typename OtherDerived>
|
||||
static inline Matrix<Scalar,Dim,Dim> toRotationMatrix(const RotationBase<OtherDerived,Dim>& r)
|
||||
EIGEN_DEVICE_FUNC static inline Matrix<Scalar,Dim,Dim> toRotationMatrix(const RotationBase<OtherDerived,Dim>& r)
|
||||
{
|
||||
return r.toRotationMatrix();
|
||||
}
|
||||
|
||||
template<typename Scalar, int Dim, typename OtherDerived>
|
||||
static inline const MatrixBase<OtherDerived>& toRotationMatrix(const MatrixBase<OtherDerived>& mat)
|
||||
EIGEN_DEVICE_FUNC static inline const MatrixBase<OtherDerived>& toRotationMatrix(const MatrixBase<OtherDerived>& mat)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(OtherDerived::RowsAtCompileTime==Dim && OtherDerived::ColsAtCompileTime==Dim,
|
||||
YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
|
||||
12
Eigen/src/Geometry/Scaling.h
Normal file → Executable file
12
Eigen/src/Geometry/Scaling.h
Normal file → Executable file
@@ -118,28 +118,28 @@ operator*(const MatrixBase<Derived>& matrix, const UniformScaling<Scalar>& s)
|
||||
{ return matrix.derived() * s.factor(); }
|
||||
|
||||
/** Constructs a uniform scaling from scale factor \a s */
|
||||
static inline UniformScaling<float> Scaling(float s) { return UniformScaling<float>(s); }
|
||||
inline UniformScaling<float> Scaling(float s) { return UniformScaling<float>(s); }
|
||||
/** Constructs a uniform scaling from scale factor \a s */
|
||||
static inline UniformScaling<double> Scaling(double s) { return UniformScaling<double>(s); }
|
||||
inline UniformScaling<double> Scaling(double s) { return UniformScaling<double>(s); }
|
||||
/** Constructs a uniform scaling from scale factor \a s */
|
||||
template<typename RealScalar>
|
||||
static inline UniformScaling<std::complex<RealScalar> > Scaling(const std::complex<RealScalar>& s)
|
||||
inline UniformScaling<std::complex<RealScalar> > Scaling(const std::complex<RealScalar>& s)
|
||||
{ return UniformScaling<std::complex<RealScalar> >(s); }
|
||||
|
||||
/** Constructs a 2D axis aligned scaling */
|
||||
template<typename Scalar>
|
||||
static inline DiagonalMatrix<Scalar,2> Scaling(const Scalar& sx, const Scalar& sy)
|
||||
inline DiagonalMatrix<Scalar,2> Scaling(const Scalar& sx, const Scalar& sy)
|
||||
{ return DiagonalMatrix<Scalar,2>(sx, sy); }
|
||||
/** Constructs a 3D axis aligned scaling */
|
||||
template<typename Scalar>
|
||||
static inline DiagonalMatrix<Scalar,3> Scaling(const Scalar& sx, const Scalar& sy, const Scalar& sz)
|
||||
inline DiagonalMatrix<Scalar,3> Scaling(const Scalar& sx, const Scalar& sy, const Scalar& sz)
|
||||
{ return DiagonalMatrix<Scalar,3>(sx, sy, sz); }
|
||||
|
||||
/** Constructs an axis aligned scaling expression from vector expression \a coeffs
|
||||
* This is an alias for coeffs.asDiagonal()
|
||||
*/
|
||||
template<typename Derived>
|
||||
static inline const DiagonalWrapper<const Derived> Scaling(const MatrixBase<Derived>& coeffs)
|
||||
inline const DiagonalWrapper<const Derived> Scaling(const MatrixBase<Derived>& coeffs)
|
||||
{ return coeffs.asDiagonal(); }
|
||||
|
||||
/** \deprecated */
|
||||
|
||||
@@ -253,43 +253,43 @@ public:
|
||||
|
||||
/** Default constructor without initialization of the meaningful coefficients.
|
||||
* If Mode==Affine, then the last row is set to [0 ... 0 1] */
|
||||
inline Transform()
|
||||
EIGEN_DEVICE_FUNC inline Transform()
|
||||
{
|
||||
check_template_params();
|
||||
internal::transform_make_affine<(int(Mode)==Affine) ? Affine : AffineCompact>::run(m_matrix);
|
||||
}
|
||||
|
||||
inline Transform(const Transform& other)
|
||||
EIGEN_DEVICE_FUNC inline Transform(const Transform& other)
|
||||
{
|
||||
check_template_params();
|
||||
m_matrix = other.m_matrix;
|
||||
}
|
||||
|
||||
inline explicit Transform(const TranslationType& t)
|
||||
EIGEN_DEVICE_FUNC inline explicit Transform(const TranslationType& t)
|
||||
{
|
||||
check_template_params();
|
||||
*this = t;
|
||||
}
|
||||
inline explicit Transform(const UniformScaling<Scalar>& s)
|
||||
EIGEN_DEVICE_FUNC inline explicit Transform(const UniformScaling<Scalar>& s)
|
||||
{
|
||||
check_template_params();
|
||||
*this = s;
|
||||
}
|
||||
template<typename Derived>
|
||||
inline explicit Transform(const RotationBase<Derived, Dim>& r)
|
||||
EIGEN_DEVICE_FUNC inline explicit Transform(const RotationBase<Derived, Dim>& r)
|
||||
{
|
||||
check_template_params();
|
||||
*this = r;
|
||||
}
|
||||
|
||||
inline Transform& operator=(const Transform& other)
|
||||
EIGEN_DEVICE_FUNC inline Transform& operator=(const Transform& other)
|
||||
{ m_matrix = other.m_matrix; return *this; }
|
||||
|
||||
typedef internal::transform_take_affine_part<Transform> take_affine_part;
|
||||
|
||||
/** Constructs and initializes a transformation from a Dim^2 or a (Dim+1)^2 matrix. */
|
||||
template<typename OtherDerived>
|
||||
inline explicit Transform(const EigenBase<OtherDerived>& other)
|
||||
EIGEN_DEVICE_FUNC inline explicit Transform(const EigenBase<OtherDerived>& other)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<Scalar,typename OtherDerived::Scalar>::value),
|
||||
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY);
|
||||
@@ -300,7 +300,7 @@ public:
|
||||
|
||||
/** Set \c *this from a Dim^2 or (Dim+1)^2 matrix. */
|
||||
template<typename OtherDerived>
|
||||
inline Transform& operator=(const EigenBase<OtherDerived>& other)
|
||||
EIGEN_DEVICE_FUNC inline Transform& operator=(const EigenBase<OtherDerived>& other)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<Scalar,typename OtherDerived::Scalar>::value),
|
||||
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY);
|
||||
@@ -310,7 +310,7 @@ public:
|
||||
}
|
||||
|
||||
template<int OtherOptions>
|
||||
inline Transform(const Transform<Scalar,Dim,Mode,OtherOptions>& other)
|
||||
EIGEN_DEVICE_FUNC inline Transform(const Transform<Scalar,Dim,Mode,OtherOptions>& other)
|
||||
{
|
||||
check_template_params();
|
||||
// only the options change, we can directly copy the matrices
|
||||
@@ -318,7 +318,7 @@ public:
|
||||
}
|
||||
|
||||
template<int OtherMode,int OtherOptions>
|
||||
inline Transform(const Transform<Scalar,Dim,OtherMode,OtherOptions>& other)
|
||||
EIGEN_DEVICE_FUNC inline Transform(const Transform<Scalar,Dim,OtherMode,OtherOptions>& other)
|
||||
{
|
||||
check_template_params();
|
||||
// prevent conversions as:
|
||||
@@ -359,14 +359,14 @@ public:
|
||||
}
|
||||
|
||||
template<typename OtherDerived>
|
||||
Transform(const ReturnByValue<OtherDerived>& other)
|
||||
EIGEN_DEVICE_FUNC Transform(const ReturnByValue<OtherDerived>& other)
|
||||
{
|
||||
check_template_params();
|
||||
other.evalTo(*this);
|
||||
}
|
||||
|
||||
template<typename OtherDerived>
|
||||
Transform& operator=(const ReturnByValue<OtherDerived>& other)
|
||||
EIGEN_DEVICE_FUNC Transform& operator=(const ReturnByValue<OtherDerived>& other)
|
||||
{
|
||||
other.evalTo(*this);
|
||||
return *this;
|
||||
@@ -381,35 +381,35 @@ public:
|
||||
inline QTransform toQTransform(void) const;
|
||||
#endif
|
||||
|
||||
Index rows() const { return int(Mode)==int(Projective) ? m_matrix.cols() : (m_matrix.cols()-1); }
|
||||
Index cols() const { return m_matrix.cols(); }
|
||||
EIGEN_DEVICE_FUNC Index rows() const { return int(Mode)==int(Projective) ? m_matrix.cols() : (m_matrix.cols()-1); }
|
||||
EIGEN_DEVICE_FUNC Index cols() const { return m_matrix.cols(); }
|
||||
|
||||
/** shortcut for m_matrix(row,col);
|
||||
* \sa MatrixBase::operator(Index,Index) const */
|
||||
inline Scalar operator() (Index row, Index col) const { return m_matrix(row,col); }
|
||||
EIGEN_DEVICE_FUNC inline Scalar operator() (Index row, Index col) const { return m_matrix(row,col); }
|
||||
/** shortcut for m_matrix(row,col);
|
||||
* \sa MatrixBase::operator(Index,Index) */
|
||||
inline Scalar& operator() (Index row, Index col) { return m_matrix(row,col); }
|
||||
EIGEN_DEVICE_FUNC inline Scalar& operator() (Index row, Index col) { return m_matrix(row,col); }
|
||||
|
||||
/** \returns a read-only expression of the transformation matrix */
|
||||
inline const MatrixType& matrix() const { return m_matrix; }
|
||||
EIGEN_DEVICE_FUNC inline const MatrixType& matrix() const { return m_matrix; }
|
||||
/** \returns a writable expression of the transformation matrix */
|
||||
inline MatrixType& matrix() { return m_matrix; }
|
||||
EIGEN_DEVICE_FUNC inline MatrixType& matrix() { return m_matrix; }
|
||||
|
||||
/** \returns a read-only expression of the linear part of the transformation */
|
||||
inline ConstLinearPart linear() const { return ConstLinearPart(m_matrix,0,0); }
|
||||
EIGEN_DEVICE_FUNC inline ConstLinearPart linear() const { return ConstLinearPart(m_matrix,0,0); }
|
||||
/** \returns a writable expression of the linear part of the transformation */
|
||||
inline LinearPart linear() { return LinearPart(m_matrix,0,0); }
|
||||
EIGEN_DEVICE_FUNC inline LinearPart linear() { return LinearPart(m_matrix,0,0); }
|
||||
|
||||
/** \returns a read-only expression of the Dim x HDim affine part of the transformation */
|
||||
inline ConstAffinePart affine() const { return take_affine_part::run(m_matrix); }
|
||||
EIGEN_DEVICE_FUNC inline ConstAffinePart affine() const { return take_affine_part::run(m_matrix); }
|
||||
/** \returns a writable expression of the Dim x HDim affine part of the transformation */
|
||||
inline AffinePart affine() { return take_affine_part::run(m_matrix); }
|
||||
EIGEN_DEVICE_FUNC inline AffinePart affine() { return take_affine_part::run(m_matrix); }
|
||||
|
||||
/** \returns a read-only expression of the translation vector of the transformation */
|
||||
inline ConstTranslationPart translation() const { return ConstTranslationPart(m_matrix,0,Dim); }
|
||||
EIGEN_DEVICE_FUNC inline ConstTranslationPart translation() const { return ConstTranslationPart(m_matrix,0,Dim); }
|
||||
/** \returns a writable expression of the translation vector of the transformation */
|
||||
inline TranslationPart translation() { return TranslationPart(m_matrix,0,Dim); }
|
||||
EIGEN_DEVICE_FUNC inline TranslationPart translation() { return TranslationPart(m_matrix,0,Dim); }
|
||||
|
||||
/** \returns an expression of the product between the transform \c *this and a matrix expression \a other.
|
||||
*
|
||||
@@ -437,7 +437,7 @@ public:
|
||||
*/
|
||||
// note: this function is defined here because some compilers cannot find the respective declaration
|
||||
template<typename OtherDerived>
|
||||
EIGEN_STRONG_INLINE const typename internal::transform_right_product_impl<Transform, OtherDerived>::ResultType
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename internal::transform_right_product_impl<Transform, OtherDerived>::ResultType
|
||||
operator * (const EigenBase<OtherDerived> &other) const
|
||||
{ return internal::transform_right_product_impl<Transform, OtherDerived>::run(*this,other.derived()); }
|
||||
|
||||
@@ -449,7 +449,7 @@ public:
|
||||
* \li a general transformation matrix of size Dim+1 x Dim+1.
|
||||
*/
|
||||
template<typename OtherDerived> friend
|
||||
inline const typename internal::transform_left_product_impl<OtherDerived,Mode,Options,_Dim,_Dim+1>::ResultType
|
||||
EIGEN_DEVICE_FUNC inline const typename internal::transform_left_product_impl<OtherDerived,Mode,Options,_Dim,_Dim+1>::ResultType
|
||||
operator * (const EigenBase<OtherDerived> &a, const Transform &b)
|
||||
{ return internal::transform_left_product_impl<OtherDerived,Mode,Options,Dim,HDim>::run(a.derived(),b); }
|
||||
|
||||
@@ -460,11 +460,11 @@ public:
|
||||
* mode is no isometry. In that case, the returned transform is an affinity.
|
||||
*/
|
||||
template<typename DiagonalDerived>
|
||||
inline const TransformTimeDiagonalReturnType
|
||||
EIGEN_DEVICE_FUNC inline const TransformTimeDiagonalReturnType
|
||||
operator * (const DiagonalBase<DiagonalDerived> &b) const
|
||||
{
|
||||
TransformTimeDiagonalReturnType res(*this);
|
||||
res.linear() *= b;
|
||||
res.linearExt() *= b;
|
||||
return res;
|
||||
}
|
||||
|
||||
@@ -475,7 +475,7 @@ public:
|
||||
* mode is no isometry. In that case, the returned transform is an affinity.
|
||||
*/
|
||||
template<typename DiagonalDerived>
|
||||
friend inline TransformTimeDiagonalReturnType
|
||||
EIGEN_DEVICE_FUNC friend inline TransformTimeDiagonalReturnType
|
||||
operator * (const DiagonalBase<DiagonalDerived> &a, const Transform &b)
|
||||
{
|
||||
TransformTimeDiagonalReturnType res;
|
||||
@@ -487,10 +487,10 @@ public:
|
||||
}
|
||||
|
||||
template<typename OtherDerived>
|
||||
inline Transform& operator*=(const EigenBase<OtherDerived>& other) { return *this = *this * other; }
|
||||
EIGEN_DEVICE_FUNC inline Transform& operator*=(const EigenBase<OtherDerived>& other) { return *this = *this * other; }
|
||||
|
||||
/** Concatenates two transformations */
|
||||
inline const Transform operator * (const Transform& other) const
|
||||
EIGEN_DEVICE_FUNC inline const Transform operator * (const Transform& other) const
|
||||
{
|
||||
return internal::transform_transform_product_impl<Transform,Transform>::run(*this,other);
|
||||
}
|
||||
@@ -522,7 +522,7 @@ public:
|
||||
#else
|
||||
/** Concatenates two different transformations */
|
||||
template<int OtherMode,int OtherOptions>
|
||||
inline typename internal::transform_transform_product_impl<Transform,Transform<Scalar,Dim,OtherMode,OtherOptions> >::ResultType
|
||||
EIGEN_DEVICE_FUNC inline typename internal::transform_transform_product_impl<Transform,Transform<Scalar,Dim,OtherMode,OtherOptions> >::ResultType
|
||||
operator * (const Transform<Scalar,Dim,OtherMode,OtherOptions>& other) const
|
||||
{
|
||||
return internal::transform_transform_product_impl<Transform,Transform<Scalar,Dim,OtherMode,OtherOptions> >::run(*this,other);
|
||||
@@ -530,47 +530,61 @@ public:
|
||||
#endif
|
||||
|
||||
/** \sa MatrixBase::setIdentity() */
|
||||
void setIdentity() { m_matrix.setIdentity(); }
|
||||
EIGEN_DEVICE_FUNC void setIdentity() { m_matrix.setIdentity(); }
|
||||
|
||||
/**
|
||||
* \brief Returns an identity transformation.
|
||||
* \todo In the future this function should be returning a Transform expression.
|
||||
*/
|
||||
static const Transform Identity()
|
||||
EIGEN_DEVICE_FUNC static const Transform Identity()
|
||||
{
|
||||
return Transform(MatrixType::Identity());
|
||||
}
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Transform& scale(const MatrixBase<OtherDerived> &other);
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Transform& prescale(const MatrixBase<OtherDerived> &other);
|
||||
|
||||
inline Transform& scale(const Scalar& s);
|
||||
inline Transform& prescale(const Scalar& s);
|
||||
EIGEN_DEVICE_FUNC inline Transform& scale(const Scalar& s);
|
||||
EIGEN_DEVICE_FUNC inline Transform& prescale(const Scalar& s);
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Transform& translate(const MatrixBase<OtherDerived> &other);
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Transform& pretranslate(const MatrixBase<OtherDerived> &other);
|
||||
|
||||
template<typename RotationType>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Transform& rotate(const RotationType& rotation);
|
||||
|
||||
template<typename RotationType>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Transform& prerotate(const RotationType& rotation);
|
||||
|
||||
Transform& shear(const Scalar& sx, const Scalar& sy);
|
||||
Transform& preshear(const Scalar& sx, const Scalar& sy);
|
||||
EIGEN_DEVICE_FUNC Transform& shear(const Scalar& sx, const Scalar& sy);
|
||||
EIGEN_DEVICE_FUNC Transform& preshear(const Scalar& sx, const Scalar& sy);
|
||||
|
||||
inline Transform& operator=(const TranslationType& t);
|
||||
EIGEN_DEVICE_FUNC inline Transform& operator=(const TranslationType& t);
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Transform& operator*=(const TranslationType& t) { return translate(t.vector()); }
|
||||
inline Transform operator*(const TranslationType& t) const;
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Transform operator*(const TranslationType& t) const;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Transform& operator=(const UniformScaling<Scalar>& t);
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Transform& operator*=(const UniformScaling<Scalar>& s) { return scale(s.factor()); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline TransformTimeDiagonalReturnType operator*(const UniformScaling<Scalar>& s) const
|
||||
{
|
||||
TransformTimeDiagonalReturnType res = *this;
|
||||
@@ -578,31 +592,36 @@ public:
|
||||
return res;
|
||||
}
|
||||
|
||||
inline Transform& operator*=(const DiagonalMatrix<Scalar,Dim>& s) { linear() *= s; return *this; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Transform& operator*=(const DiagonalMatrix<Scalar,Dim>& s) { linearExt() *= s; return *this; }
|
||||
|
||||
template<typename Derived>
|
||||
inline Transform& operator=(const RotationBase<Derived,Dim>& r);
|
||||
EIGEN_DEVICE_FUNC inline Transform& operator=(const RotationBase<Derived,Dim>& r);
|
||||
template<typename Derived>
|
||||
inline Transform& operator*=(const RotationBase<Derived,Dim>& r) { return rotate(r.toRotationMatrix()); }
|
||||
EIGEN_DEVICE_FUNC inline Transform& operator*=(const RotationBase<Derived,Dim>& r) { return rotate(r.toRotationMatrix()); }
|
||||
template<typename Derived>
|
||||
inline Transform operator*(const RotationBase<Derived,Dim>& r) const;
|
||||
EIGEN_DEVICE_FUNC inline Transform operator*(const RotationBase<Derived,Dim>& r) const;
|
||||
|
||||
const LinearMatrixType rotation() const;
|
||||
EIGEN_DEVICE_FUNC const LinearMatrixType rotation() const;
|
||||
template<typename RotationMatrixType, typename ScalingMatrixType>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void computeRotationScaling(RotationMatrixType *rotation, ScalingMatrixType *scaling) const;
|
||||
template<typename ScalingMatrixType, typename RotationMatrixType>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void computeScalingRotation(ScalingMatrixType *scaling, RotationMatrixType *rotation) const;
|
||||
|
||||
template<typename PositionDerived, typename OrientationType, typename ScaleDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Transform& fromPositionOrientationScale(const MatrixBase<PositionDerived> &position,
|
||||
const OrientationType& orientation, const MatrixBase<ScaleDerived> &scale);
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Transform inverse(TransformTraits traits = (TransformTraits)Mode) const;
|
||||
|
||||
/** \returns a const pointer to the column major internal matrix */
|
||||
const Scalar* data() const { return m_matrix.data(); }
|
||||
EIGEN_DEVICE_FUNC const Scalar* data() const { return m_matrix.data(); }
|
||||
/** \returns a non-const pointer to the column major internal matrix */
|
||||
Scalar* data() { return m_matrix.data(); }
|
||||
EIGEN_DEVICE_FUNC Scalar* data() { return m_matrix.data(); }
|
||||
|
||||
/** \returns \c *this with scalar type casted to \a NewScalarType
|
||||
*
|
||||
@@ -610,12 +629,12 @@ public:
|
||||
* then this function smartly returns a const reference to \c *this.
|
||||
*/
|
||||
template<typename NewScalarType>
|
||||
inline typename internal::cast_return_type<Transform,Transform<NewScalarType,Dim,Mode,Options> >::type cast() const
|
||||
EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<Transform,Transform<NewScalarType,Dim,Mode,Options> >::type cast() const
|
||||
{ return typename internal::cast_return_type<Transform,Transform<NewScalarType,Dim,Mode,Options> >::type(*this); }
|
||||
|
||||
/** Copy constructor with scalar type conversion */
|
||||
template<typename OtherScalarType>
|
||||
inline explicit Transform(const Transform<OtherScalarType,Dim,Mode,Options>& other)
|
||||
EIGEN_DEVICE_FUNC inline explicit Transform(const Transform<OtherScalarType,Dim,Mode,Options>& other)
|
||||
{
|
||||
check_template_params();
|
||||
m_matrix = other.matrix().template cast<Scalar>();
|
||||
@@ -625,12 +644,12 @@ public:
|
||||
* determined by \a prec.
|
||||
*
|
||||
* \sa MatrixBase::isApprox() */
|
||||
bool isApprox(const Transform& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
|
||||
EIGEN_DEVICE_FUNC bool isApprox(const Transform& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
|
||||
{ return m_matrix.isApprox(other.m_matrix, prec); }
|
||||
|
||||
/** Sets the last row to [0 ... 0 1]
|
||||
*/
|
||||
void makeAffine()
|
||||
EIGEN_DEVICE_FUNC void makeAffine()
|
||||
{
|
||||
internal::transform_make_affine<int(Mode)>::run(m_matrix);
|
||||
}
|
||||
@@ -639,26 +658,26 @@ public:
|
||||
* \returns the Dim x Dim linear part if the transformation is affine,
|
||||
* and the HDim x Dim part for projective transformations.
|
||||
*/
|
||||
inline Block<MatrixType,int(Mode)==int(Projective)?HDim:Dim,Dim> linearExt()
|
||||
EIGEN_DEVICE_FUNC inline Block<MatrixType,int(Mode)==int(Projective)?HDim:Dim,Dim> linearExt()
|
||||
{ return m_matrix.template block<int(Mode)==int(Projective)?HDim:Dim,Dim>(0,0); }
|
||||
/** \internal
|
||||
* \returns the Dim x Dim linear part if the transformation is affine,
|
||||
* and the HDim x Dim part for projective transformations.
|
||||
*/
|
||||
inline const Block<MatrixType,int(Mode)==int(Projective)?HDim:Dim,Dim> linearExt() const
|
||||
EIGEN_DEVICE_FUNC inline const Block<MatrixType,int(Mode)==int(Projective)?HDim:Dim,Dim> linearExt() const
|
||||
{ return m_matrix.template block<int(Mode)==int(Projective)?HDim:Dim,Dim>(0,0); }
|
||||
|
||||
/** \internal
|
||||
* \returns the translation part if the transformation is affine,
|
||||
* and the last column for projective transformations.
|
||||
*/
|
||||
inline Block<MatrixType,int(Mode)==int(Projective)?HDim:Dim,1> translationExt()
|
||||
EIGEN_DEVICE_FUNC inline Block<MatrixType,int(Mode)==int(Projective)?HDim:Dim,1> translationExt()
|
||||
{ return m_matrix.template block<int(Mode)==int(Projective)?HDim:Dim,1>(0,Dim); }
|
||||
/** \internal
|
||||
* \returns the translation part if the transformation is affine,
|
||||
* and the last column for projective transformations.
|
||||
*/
|
||||
inline const Block<MatrixType,int(Mode)==int(Projective)?HDim:Dim,1> translationExt() const
|
||||
EIGEN_DEVICE_FUNC inline const Block<MatrixType,int(Mode)==int(Projective)?HDim:Dim,1> translationExt() const
|
||||
{ return m_matrix.template block<int(Mode)==int(Projective)?HDim:Dim,1>(0,Dim); }
|
||||
|
||||
|
||||
@@ -668,7 +687,7 @@ public:
|
||||
|
||||
protected:
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
static EIGEN_STRONG_INLINE void check_template_params()
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void check_template_params()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((Options & (DontAlign|RowMajor)) == Options, INVALID_MATRIX_TEMPLATE_PARAMETERS)
|
||||
}
|
||||
@@ -821,7 +840,7 @@ QTransform Transform<Scalar,Dim,Mode,Options>::toQTransform(void) const
|
||||
*/
|
||||
template<typename Scalar, int Dim, int Mode, int Options>
|
||||
template<typename OtherDerived>
|
||||
Transform<Scalar,Dim,Mode,Options>&
|
||||
EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&
|
||||
Transform<Scalar,Dim,Mode,Options>::scale(const MatrixBase<OtherDerived> &other)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim))
|
||||
@@ -835,7 +854,7 @@ Transform<Scalar,Dim,Mode,Options>::scale(const MatrixBase<OtherDerived> &other)
|
||||
* \sa prescale(Scalar)
|
||||
*/
|
||||
template<typename Scalar, int Dim, int Mode, int Options>
|
||||
inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::scale(const Scalar& s)
|
||||
EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::scale(const Scalar& s)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(Mode!=int(Isometry), THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS)
|
||||
linearExt() *= s;
|
||||
@@ -848,12 +867,12 @@ inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::s
|
||||
*/
|
||||
template<typename Scalar, int Dim, int Mode, int Options>
|
||||
template<typename OtherDerived>
|
||||
Transform<Scalar,Dim,Mode,Options>&
|
||||
EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&
|
||||
Transform<Scalar,Dim,Mode,Options>::prescale(const MatrixBase<OtherDerived> &other)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim))
|
||||
EIGEN_STATIC_ASSERT(Mode!=int(Isometry), THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS)
|
||||
m_matrix.template block<Dim,HDim>(0,0).noalias() = (other.asDiagonal() * m_matrix.template block<Dim,HDim>(0,0));
|
||||
affine().noalias() = (other.asDiagonal() * affine());
|
||||
return *this;
|
||||
}
|
||||
|
||||
@@ -862,7 +881,7 @@ Transform<Scalar,Dim,Mode,Options>::prescale(const MatrixBase<OtherDerived> &oth
|
||||
* \sa scale(Scalar)
|
||||
*/
|
||||
template<typename Scalar, int Dim, int Mode, int Options>
|
||||
inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::prescale(const Scalar& s)
|
||||
EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::prescale(const Scalar& s)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(Mode!=int(Isometry), THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS)
|
||||
m_matrix.template topRows<Dim>() *= s;
|
||||
@@ -875,7 +894,7 @@ inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::p
|
||||
*/
|
||||
template<typename Scalar, int Dim, int Mode, int Options>
|
||||
template<typename OtherDerived>
|
||||
Transform<Scalar,Dim,Mode,Options>&
|
||||
EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&
|
||||
Transform<Scalar,Dim,Mode,Options>::translate(const MatrixBase<OtherDerived> &other)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim))
|
||||
@@ -889,7 +908,7 @@ Transform<Scalar,Dim,Mode,Options>::translate(const MatrixBase<OtherDerived> &ot
|
||||
*/
|
||||
template<typename Scalar, int Dim, int Mode, int Options>
|
||||
template<typename OtherDerived>
|
||||
Transform<Scalar,Dim,Mode,Options>&
|
||||
EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&
|
||||
Transform<Scalar,Dim,Mode,Options>::pretranslate(const MatrixBase<OtherDerived> &other)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim))
|
||||
@@ -919,7 +938,7 @@ Transform<Scalar,Dim,Mode,Options>::pretranslate(const MatrixBase<OtherDerived>
|
||||
*/
|
||||
template<typename Scalar, int Dim, int Mode, int Options>
|
||||
template<typename RotationType>
|
||||
Transform<Scalar,Dim,Mode,Options>&
|
||||
EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&
|
||||
Transform<Scalar,Dim,Mode,Options>::rotate(const RotationType& rotation)
|
||||
{
|
||||
linearExt() *= internal::toRotationMatrix<Scalar,Dim>(rotation);
|
||||
@@ -935,7 +954,7 @@ Transform<Scalar,Dim,Mode,Options>::rotate(const RotationType& rotation)
|
||||
*/
|
||||
template<typename Scalar, int Dim, int Mode, int Options>
|
||||
template<typename RotationType>
|
||||
Transform<Scalar,Dim,Mode,Options>&
|
||||
EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&
|
||||
Transform<Scalar,Dim,Mode,Options>::prerotate(const RotationType& rotation)
|
||||
{
|
||||
m_matrix.template block<Dim,HDim>(0,0) = internal::toRotationMatrix<Scalar,Dim>(rotation)
|
||||
@@ -949,7 +968,7 @@ Transform<Scalar,Dim,Mode,Options>::prerotate(const RotationType& rotation)
|
||||
* \sa preshear()
|
||||
*/
|
||||
template<typename Scalar, int Dim, int Mode, int Options>
|
||||
Transform<Scalar,Dim,Mode,Options>&
|
||||
EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&
|
||||
Transform<Scalar,Dim,Mode,Options>::shear(const Scalar& sx, const Scalar& sy)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(int(Dim)==2, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
@@ -965,7 +984,7 @@ Transform<Scalar,Dim,Mode,Options>::shear(const Scalar& sx, const Scalar& sy)
|
||||
* \sa shear()
|
||||
*/
|
||||
template<typename Scalar, int Dim, int Mode, int Options>
|
||||
Transform<Scalar,Dim,Mode,Options>&
|
||||
EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&
|
||||
Transform<Scalar,Dim,Mode,Options>::preshear(const Scalar& sx, const Scalar& sy)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(int(Dim)==2, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
@@ -979,7 +998,7 @@ Transform<Scalar,Dim,Mode,Options>::preshear(const Scalar& sx, const Scalar& sy)
|
||||
******************************************************/
|
||||
|
||||
template<typename Scalar, int Dim, int Mode, int Options>
|
||||
inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::operator=(const TranslationType& t)
|
||||
EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::operator=(const TranslationType& t)
|
||||
{
|
||||
linear().setIdentity();
|
||||
translation() = t.vector();
|
||||
@@ -988,7 +1007,7 @@ inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::o
|
||||
}
|
||||
|
||||
template<typename Scalar, int Dim, int Mode, int Options>
|
||||
inline Transform<Scalar,Dim,Mode,Options> Transform<Scalar,Dim,Mode,Options>::operator*(const TranslationType& t) const
|
||||
EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode,Options> Transform<Scalar,Dim,Mode,Options>::operator*(const TranslationType& t) const
|
||||
{
|
||||
Transform res = *this;
|
||||
res.translate(t.vector());
|
||||
@@ -996,7 +1015,7 @@ inline Transform<Scalar,Dim,Mode,Options> Transform<Scalar,Dim,Mode,Options>::op
|
||||
}
|
||||
|
||||
template<typename Scalar, int Dim, int Mode, int Options>
|
||||
inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::operator=(const UniformScaling<Scalar>& s)
|
||||
EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::operator=(const UniformScaling<Scalar>& s)
|
||||
{
|
||||
m_matrix.setZero();
|
||||
linear().diagonal().fill(s.factor());
|
||||
@@ -1006,7 +1025,7 @@ inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::o
|
||||
|
||||
template<typename Scalar, int Dim, int Mode, int Options>
|
||||
template<typename Derived>
|
||||
inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::operator=(const RotationBase<Derived,Dim>& r)
|
||||
EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::operator=(const RotationBase<Derived,Dim>& r)
|
||||
{
|
||||
linear() = internal::toRotationMatrix<Scalar,Dim>(r);
|
||||
translation().setZero();
|
||||
@@ -1016,7 +1035,7 @@ inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::o
|
||||
|
||||
template<typename Scalar, int Dim, int Mode, int Options>
|
||||
template<typename Derived>
|
||||
inline Transform<Scalar,Dim,Mode,Options> Transform<Scalar,Dim,Mode,Options>::operator*(const RotationBase<Derived,Dim>& r) const
|
||||
EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode,Options> Transform<Scalar,Dim,Mode,Options>::operator*(const RotationBase<Derived,Dim>& r) const
|
||||
{
|
||||
Transform res = *this;
|
||||
res.rotate(r.derived());
|
||||
@@ -1035,7 +1054,7 @@ inline Transform<Scalar,Dim,Mode,Options> Transform<Scalar,Dim,Mode,Options>::op
|
||||
* \sa computeRotationScaling(), computeScalingRotation(), class SVD
|
||||
*/
|
||||
template<typename Scalar, int Dim, int Mode, int Options>
|
||||
const typename Transform<Scalar,Dim,Mode,Options>::LinearMatrixType
|
||||
EIGEN_DEVICE_FUNC const typename Transform<Scalar,Dim,Mode,Options>::LinearMatrixType
|
||||
Transform<Scalar,Dim,Mode,Options>::rotation() const
|
||||
{
|
||||
LinearMatrixType result;
|
||||
@@ -1057,7 +1076,7 @@ Transform<Scalar,Dim,Mode,Options>::rotation() const
|
||||
*/
|
||||
template<typename Scalar, int Dim, int Mode, int Options>
|
||||
template<typename RotationMatrixType, typename ScalingMatrixType>
|
||||
void Transform<Scalar,Dim,Mode,Options>::computeRotationScaling(RotationMatrixType *rotation, ScalingMatrixType *scaling) const
|
||||
EIGEN_DEVICE_FUNC void Transform<Scalar,Dim,Mode,Options>::computeRotationScaling(RotationMatrixType *rotation, ScalingMatrixType *scaling) const
|
||||
{
|
||||
JacobiSVD<LinearMatrixType> svd(linear(), ComputeFullU | ComputeFullV);
|
||||
|
||||
@@ -1086,7 +1105,7 @@ void Transform<Scalar,Dim,Mode,Options>::computeRotationScaling(RotationMatrixTy
|
||||
*/
|
||||
template<typename Scalar, int Dim, int Mode, int Options>
|
||||
template<typename ScalingMatrixType, typename RotationMatrixType>
|
||||
void Transform<Scalar,Dim,Mode,Options>::computeScalingRotation(ScalingMatrixType *scaling, RotationMatrixType *rotation) const
|
||||
EIGEN_DEVICE_FUNC void Transform<Scalar,Dim,Mode,Options>::computeScalingRotation(ScalingMatrixType *scaling, RotationMatrixType *rotation) const
|
||||
{
|
||||
JacobiSVD<LinearMatrixType> svd(linear(), ComputeFullU | ComputeFullV);
|
||||
|
||||
@@ -1107,7 +1126,7 @@ void Transform<Scalar,Dim,Mode,Options>::computeScalingRotation(ScalingMatrixTyp
|
||||
*/
|
||||
template<typename Scalar, int Dim, int Mode, int Options>
|
||||
template<typename PositionDerived, typename OrientationType, typename ScaleDerived>
|
||||
Transform<Scalar,Dim,Mode,Options>&
|
||||
EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&
|
||||
Transform<Scalar,Dim,Mode,Options>::fromPositionOrientationScale(const MatrixBase<PositionDerived> &position,
|
||||
const OrientationType& orientation, const MatrixBase<ScaleDerived> &scale)
|
||||
{
|
||||
@@ -1124,7 +1143,7 @@ template<int Mode>
|
||||
struct transform_make_affine
|
||||
{
|
||||
template<typename MatrixType>
|
||||
static void run(MatrixType &mat)
|
||||
EIGEN_DEVICE_FUNC static void run(MatrixType &mat)
|
||||
{
|
||||
static const int Dim = MatrixType::ColsAtCompileTime-1;
|
||||
mat.template block<1,Dim>(Dim,0).setZero();
|
||||
@@ -1135,21 +1154,21 @@ struct transform_make_affine
|
||||
template<>
|
||||
struct transform_make_affine<AffineCompact>
|
||||
{
|
||||
template<typename MatrixType> static void run(MatrixType &) { }
|
||||
template<typename MatrixType> EIGEN_DEVICE_FUNC static void run(MatrixType &) { }
|
||||
};
|
||||
|
||||
// selector needed to avoid taking the inverse of a 3x4 matrix
|
||||
template<typename TransformType, int Mode=TransformType::Mode>
|
||||
struct projective_transform_inverse
|
||||
{
|
||||
static inline void run(const TransformType&, TransformType&)
|
||||
EIGEN_DEVICE_FUNC static inline void run(const TransformType&, TransformType&)
|
||||
{}
|
||||
};
|
||||
|
||||
template<typename TransformType>
|
||||
struct projective_transform_inverse<TransformType, Projective>
|
||||
{
|
||||
static inline void run(const TransformType& m, TransformType& res)
|
||||
EIGEN_DEVICE_FUNC static inline void run(const TransformType& m, TransformType& res)
|
||||
{
|
||||
res.matrix() = m.matrix().inverse();
|
||||
}
|
||||
@@ -1179,7 +1198,7 @@ struct projective_transform_inverse<TransformType, Projective>
|
||||
* \sa MatrixBase::inverse()
|
||||
*/
|
||||
template<typename Scalar, int Dim, int Mode, int Options>
|
||||
Transform<Scalar,Dim,Mode,Options>
|
||||
EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>
|
||||
Transform<Scalar,Dim,Mode,Options>::inverse(TransformTraits hint) const
|
||||
{
|
||||
Transform res;
|
||||
|
||||
@@ -51,16 +51,16 @@ protected:
|
||||
public:
|
||||
|
||||
/** Default constructor without initialization. */
|
||||
Translation() {}
|
||||
EIGEN_DEVICE_FUNC Translation() {}
|
||||
/** */
|
||||
inline Translation(const Scalar& sx, const Scalar& sy)
|
||||
EIGEN_DEVICE_FUNC inline Translation(const Scalar& sx, const Scalar& sy)
|
||||
{
|
||||
eigen_assert(Dim==2);
|
||||
m_coeffs.x() = sx;
|
||||
m_coeffs.y() = sy;
|
||||
}
|
||||
/** */
|
||||
inline Translation(const Scalar& sx, const Scalar& sy, const Scalar& sz)
|
||||
EIGEN_DEVICE_FUNC inline Translation(const Scalar& sx, const Scalar& sy, const Scalar& sz)
|
||||
{
|
||||
eigen_assert(Dim==3);
|
||||
m_coeffs.x() = sx;
|
||||
@@ -68,48 +68,48 @@ public:
|
||||
m_coeffs.z() = sz;
|
||||
}
|
||||
/** Constructs and initialize the translation transformation from a vector of translation coefficients */
|
||||
explicit inline Translation(const VectorType& vector) : m_coeffs(vector) {}
|
||||
EIGEN_DEVICE_FUNC explicit inline Translation(const VectorType& vector) : m_coeffs(vector) {}
|
||||
|
||||
/** \brief Retruns the x-translation by value. **/
|
||||
inline Scalar x() const { return m_coeffs.x(); }
|
||||
EIGEN_DEVICE_FUNC inline Scalar x() const { return m_coeffs.x(); }
|
||||
/** \brief Retruns the y-translation by value. **/
|
||||
inline Scalar y() const { return m_coeffs.y(); }
|
||||
EIGEN_DEVICE_FUNC inline Scalar y() const { return m_coeffs.y(); }
|
||||
/** \brief Retruns the z-translation by value. **/
|
||||
inline Scalar z() const { return m_coeffs.z(); }
|
||||
EIGEN_DEVICE_FUNC inline Scalar z() const { return m_coeffs.z(); }
|
||||
|
||||
/** \brief Retruns the x-translation as a reference. **/
|
||||
inline Scalar& x() { return m_coeffs.x(); }
|
||||
EIGEN_DEVICE_FUNC inline Scalar& x() { return m_coeffs.x(); }
|
||||
/** \brief Retruns the y-translation as a reference. **/
|
||||
inline Scalar& y() { return m_coeffs.y(); }
|
||||
EIGEN_DEVICE_FUNC inline Scalar& y() { return m_coeffs.y(); }
|
||||
/** \brief Retruns the z-translation as a reference. **/
|
||||
inline Scalar& z() { return m_coeffs.z(); }
|
||||
EIGEN_DEVICE_FUNC inline Scalar& z() { return m_coeffs.z(); }
|
||||
|
||||
const VectorType& vector() const { return m_coeffs; }
|
||||
VectorType& vector() { return m_coeffs; }
|
||||
EIGEN_DEVICE_FUNC const VectorType& vector() const { return m_coeffs; }
|
||||
EIGEN_DEVICE_FUNC VectorType& vector() { return m_coeffs; }
|
||||
|
||||
const VectorType& translation() const { return m_coeffs; }
|
||||
VectorType& translation() { return m_coeffs; }
|
||||
EIGEN_DEVICE_FUNC const VectorType& translation() const { return m_coeffs; }
|
||||
EIGEN_DEVICE_FUNC VectorType& translation() { return m_coeffs; }
|
||||
|
||||
/** Concatenates two translation */
|
||||
inline Translation operator* (const Translation& other) const
|
||||
EIGEN_DEVICE_FUNC inline Translation operator* (const Translation& other) const
|
||||
{ return Translation(m_coeffs + other.m_coeffs); }
|
||||
|
||||
/** Concatenates a translation and a uniform scaling */
|
||||
inline AffineTransformType operator* (const UniformScaling<Scalar>& other) const;
|
||||
EIGEN_DEVICE_FUNC inline AffineTransformType operator* (const UniformScaling<Scalar>& other) const;
|
||||
|
||||
/** Concatenates a translation and a linear transformation */
|
||||
template<typename OtherDerived>
|
||||
inline AffineTransformType operator* (const EigenBase<OtherDerived>& linear) const;
|
||||
EIGEN_DEVICE_FUNC inline AffineTransformType operator* (const EigenBase<OtherDerived>& linear) const;
|
||||
|
||||
/** Concatenates a translation and a rotation */
|
||||
template<typename Derived>
|
||||
inline IsometryTransformType operator*(const RotationBase<Derived,Dim>& r) const
|
||||
EIGEN_DEVICE_FUNC inline IsometryTransformType operator*(const RotationBase<Derived,Dim>& r) const
|
||||
{ return *this * IsometryTransformType(r); }
|
||||
|
||||
/** \returns the concatenation of a linear transformation \a l with the translation \a t */
|
||||
// its a nightmare to define a templated friend function outside its declaration
|
||||
template<typename OtherDerived> friend
|
||||
inline AffineTransformType operator*(const EigenBase<OtherDerived>& linear, const Translation& t)
|
||||
EIGEN_DEVICE_FUNC inline AffineTransformType operator*(const EigenBase<OtherDerived>& linear, const Translation& t)
|
||||
{
|
||||
AffineTransformType res;
|
||||
res.matrix().setZero();
|
||||
@@ -122,7 +122,7 @@ public:
|
||||
|
||||
/** Concatenates a translation and a transformation */
|
||||
template<int Mode, int Options>
|
||||
inline Transform<Scalar,Dim,Mode> operator* (const Transform<Scalar,Dim,Mode,Options>& t) const
|
||||
EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode> operator* (const Transform<Scalar,Dim,Mode,Options>& t) const
|
||||
{
|
||||
Transform<Scalar,Dim,Mode> res = t;
|
||||
res.pretranslate(m_coeffs);
|
||||
@@ -130,8 +130,10 @@ public:
|
||||
}
|
||||
|
||||
/** Applies translation to vector */
|
||||
inline VectorType operator* (const VectorType& other) const
|
||||
{ return m_coeffs + other; }
|
||||
template<typename Derived>
|
||||
inline typename internal::enable_if<Derived::IsVectorAtCompileTime,VectorType>::type
|
||||
operator* (const MatrixBase<Derived>& vec) const
|
||||
{ return m_coeffs + vec.derived(); }
|
||||
|
||||
/** \returns the inverse translation (opposite) */
|
||||
Translation inverse() const { return Translation(-m_coeffs); }
|
||||
@@ -150,19 +152,19 @@ public:
|
||||
* then this function smartly returns a const reference to \c *this.
|
||||
*/
|
||||
template<typename NewScalarType>
|
||||
inline typename internal::cast_return_type<Translation,Translation<NewScalarType,Dim> >::type cast() const
|
||||
EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<Translation,Translation<NewScalarType,Dim> >::type cast() const
|
||||
{ return typename internal::cast_return_type<Translation,Translation<NewScalarType,Dim> >::type(*this); }
|
||||
|
||||
/** Copy constructor with scalar type conversion */
|
||||
template<typename OtherScalarType>
|
||||
inline explicit Translation(const Translation<OtherScalarType,Dim>& other)
|
||||
EIGEN_DEVICE_FUNC inline explicit Translation(const Translation<OtherScalarType,Dim>& other)
|
||||
{ m_coeffs = other.vector().template cast<Scalar>(); }
|
||||
|
||||
/** \returns \c true if \c *this is approximately equal to \a other, within the precision
|
||||
* determined by \a prec.
|
||||
*
|
||||
* \sa MatrixBase::isApprox() */
|
||||
bool isApprox(const Translation& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
|
||||
EIGEN_DEVICE_FUNC bool isApprox(const Translation& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
|
||||
{ return m_coeffs.isApprox(other.m_coeffs, prec); }
|
||||
|
||||
};
|
||||
@@ -176,7 +178,7 @@ typedef Translation<double,3> Translation3d;
|
||||
//@}
|
||||
|
||||
template<typename Scalar, int Dim>
|
||||
inline typename Translation<Scalar,Dim>::AffineTransformType
|
||||
EIGEN_DEVICE_FUNC inline typename Translation<Scalar,Dim>::AffineTransformType
|
||||
Translation<Scalar,Dim>::operator* (const UniformScaling<Scalar>& other) const
|
||||
{
|
||||
AffineTransformType res;
|
||||
@@ -189,7 +191,7 @@ Translation<Scalar,Dim>::operator* (const UniformScaling<Scalar>& other) const
|
||||
|
||||
template<typename Scalar, int Dim>
|
||||
template<typename OtherDerived>
|
||||
inline typename Translation<Scalar,Dim>::AffineTransformType
|
||||
EIGEN_DEVICE_FUNC inline typename Translation<Scalar,Dim>::AffineTransformType
|
||||
Translation<Scalar,Dim>::operator* (const EigenBase<OtherDerived>& linear) const
|
||||
{
|
||||
AffineTransformType res;
|
||||
|
||||
@@ -16,17 +16,23 @@ namespace Eigen {
|
||||
namespace internal {
|
||||
|
||||
template<class Derived, class OtherDerived>
|
||||
struct quat_product<Architecture::SSE, Derived, OtherDerived, float, Aligned16>
|
||||
struct quat_product<Architecture::SSE, Derived, OtherDerived, float>
|
||||
{
|
||||
enum {
|
||||
AAlignment = traits<Derived>::Alignment,
|
||||
BAlignment = traits<OtherDerived>::Alignment,
|
||||
ResAlignment = traits<Quaternion<float> >::Alignment
|
||||
};
|
||||
static inline Quaternion<float> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b)
|
||||
{
|
||||
Quaternion<float> res;
|
||||
const __m128 mask = _mm_setr_ps(0.f,0.f,0.f,-0.f);
|
||||
__m128 a = _a.coeffs().template packet<Aligned16>(0);
|
||||
__m128 b = _b.coeffs().template packet<Aligned16>(0);
|
||||
__m128 a = _a.coeffs().template packet<AAlignment>(0);
|
||||
__m128 b = _b.coeffs().template packet<BAlignment>(0);
|
||||
__m128 s1 = _mm_mul_ps(vec4f_swizzle1(a,1,2,0,2),vec4f_swizzle1(b,2,0,1,2));
|
||||
__m128 s2 = _mm_mul_ps(vec4f_swizzle1(a,3,3,3,1),vec4f_swizzle1(b,0,1,2,1));
|
||||
pstore(&res.x(),
|
||||
pstoret<float,Packet4f,ResAlignment>(
|
||||
&res.x(),
|
||||
_mm_add_ps(_mm_sub_ps(_mm_mul_ps(a,vec4f_swizzle1(b,3,3,3,3)),
|
||||
_mm_mul_ps(vec4f_swizzle1(a,2,0,1,0),
|
||||
vec4f_swizzle1(b,1,2,0,0))),
|
||||
@@ -36,14 +42,17 @@ struct quat_product<Architecture::SSE, Derived, OtherDerived, float, Aligned16>
|
||||
}
|
||||
};
|
||||
|
||||
template<class Derived, int Alignment>
|
||||
struct quat_conj<Architecture::SSE, Derived, float, Alignment>
|
||||
template<class Derived>
|
||||
struct quat_conj<Architecture::SSE, Derived, float>
|
||||
{
|
||||
enum {
|
||||
ResAlignment = traits<Quaternion<float> >::Alignment
|
||||
};
|
||||
static inline Quaternion<float> run(const QuaternionBase<Derived>& q)
|
||||
{
|
||||
Quaternion<float> res;
|
||||
const __m128 mask = _mm_setr_ps(-0.f,-0.f,-0.f,0.f);
|
||||
pstore(&res.x(), _mm_xor_ps(mask, q.coeffs().template packet<Alignment>(0)));
|
||||
pstoret<float,Packet4f,ResAlignment>(&res.x(), _mm_xor_ps(mask, q.coeffs().template packet<traits<Derived>::Alignment>(0)));
|
||||
return res;
|
||||
}
|
||||
};
|
||||
@@ -52,6 +61,9 @@ struct quat_conj<Architecture::SSE, Derived, float, Alignment>
|
||||
template<typename VectorLhs,typename VectorRhs>
|
||||
struct cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true>
|
||||
{
|
||||
enum {
|
||||
ResAlignment = traits<typename plain_matrix_type<VectorLhs>::type>::Alignment
|
||||
};
|
||||
static inline typename plain_matrix_type<VectorLhs>::type
|
||||
run(const VectorLhs& lhs, const VectorRhs& rhs)
|
||||
{
|
||||
@@ -60,7 +72,7 @@ struct cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true>
|
||||
__m128 mul1=_mm_mul_ps(vec4f_swizzle1(a,1,2,0,3),vec4f_swizzle1(b,2,0,1,3));
|
||||
__m128 mul2=_mm_mul_ps(vec4f_swizzle1(a,2,0,1,3),vec4f_swizzle1(b,1,2,0,3));
|
||||
typename plain_matrix_type<VectorLhs>::type res;
|
||||
pstore(&res.x(),_mm_sub_ps(mul1,mul2));
|
||||
pstoret<float,Packet4f,ResAlignment>(&res.x(),_mm_sub_ps(mul1,mul2));
|
||||
return res;
|
||||
}
|
||||
};
|
||||
@@ -68,9 +80,14 @@ struct cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true>
|
||||
|
||||
|
||||
|
||||
template<class Derived, class OtherDerived, int Alignment>
|
||||
struct quat_product<Architecture::SSE, Derived, OtherDerived, double, Alignment>
|
||||
template<class Derived, class OtherDerived>
|
||||
struct quat_product<Architecture::SSE, Derived, OtherDerived, double>
|
||||
{
|
||||
enum {
|
||||
BAlignment = traits<OtherDerived>::Alignment,
|
||||
ResAlignment = traits<Quaternion<double> >::Alignment
|
||||
};
|
||||
|
||||
static inline Quaternion<double> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b)
|
||||
{
|
||||
const Packet2d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));
|
||||
@@ -78,8 +95,8 @@ struct quat_product<Architecture::SSE, Derived, OtherDerived, double, Alignment>
|
||||
Quaternion<double> res;
|
||||
|
||||
const double* a = _a.coeffs().data();
|
||||
Packet2d b_xy = _b.coeffs().template packet<Alignment>(0);
|
||||
Packet2d b_zw = _b.coeffs().template packet<Alignment>(2);
|
||||
Packet2d b_xy = _b.coeffs().template packet<BAlignment>(0);
|
||||
Packet2d b_zw = _b.coeffs().template packet<BAlignment>(2);
|
||||
Packet2d a_xx = pset1<Packet2d>(a[0]);
|
||||
Packet2d a_yy = pset1<Packet2d>(a[1]);
|
||||
Packet2d a_zz = pset1<Packet2d>(a[2]);
|
||||
@@ -97,9 +114,9 @@ struct quat_product<Architecture::SSE, Derived, OtherDerived, double, Alignment>
|
||||
t2 = psub(pmul(a_zz, b_xy), pmul(a_xx, b_zw));
|
||||
#ifdef EIGEN_VECTORIZE_SSE3
|
||||
EIGEN_UNUSED_VARIABLE(mask)
|
||||
pstore(&res.x(), _mm_addsub_pd(t1, preverse(t2)));
|
||||
pstoret<double,Packet2d,ResAlignment>(&res.x(), _mm_addsub_pd(t1, preverse(t2)));
|
||||
#else
|
||||
pstore(&res.x(), padd(t1, pxor(mask,preverse(t2))));
|
||||
pstoret<double,Packet2d,ResAlignment>(&res.x(), padd(t1, pxor(mask,preverse(t2))));
|
||||
#endif
|
||||
|
||||
/*
|
||||
@@ -111,25 +128,28 @@ struct quat_product<Architecture::SSE, Derived, OtherDerived, double, Alignment>
|
||||
t2 = padd(pmul(a_zz, b_zw), pmul(a_xx, b_xy));
|
||||
#ifdef EIGEN_VECTORIZE_SSE3
|
||||
EIGEN_UNUSED_VARIABLE(mask)
|
||||
pstore(&res.z(), preverse(_mm_addsub_pd(preverse(t1), t2)));
|
||||
pstoret<double,Packet2d,ResAlignment>(&res.z(), preverse(_mm_addsub_pd(preverse(t1), t2)));
|
||||
#else
|
||||
pstore(&res.z(), psub(t1, pxor(mask,preverse(t2))));
|
||||
pstoret<double,Packet2d,ResAlignment>(&res.z(), psub(t1, pxor(mask,preverse(t2))));
|
||||
#endif
|
||||
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
template<class Derived, int Alignment>
|
||||
struct quat_conj<Architecture::SSE, Derived, double, Alignment>
|
||||
template<class Derived>
|
||||
struct quat_conj<Architecture::SSE, Derived, double>
|
||||
{
|
||||
enum {
|
||||
ResAlignment = traits<Quaternion<double> >::Alignment
|
||||
};
|
||||
static inline Quaternion<double> run(const QuaternionBase<Derived>& q)
|
||||
{
|
||||
Quaternion<double> res;
|
||||
const __m128d mask0 = _mm_setr_pd(-0.,-0.);
|
||||
const __m128d mask2 = _mm_setr_pd(-0.,0.);
|
||||
pstore(&res.x(), _mm_xor_pd(mask0, q.coeffs().template packet<Alignment>(0)));
|
||||
pstore(&res.z(), _mm_xor_pd(mask2, q.coeffs().template packet<Alignment>(2)));
|
||||
pstoret<double,Packet2d,ResAlignment>(&res.x(), _mm_xor_pd(mask0, q.coeffs().template packet<traits<Derived>::Alignment>(0)));
|
||||
pstoret<double,Packet2d,ResAlignment>(&res.z(), _mm_xor_pd(mask2, q.coeffs().template packet<traits<Derived>::Alignment>(2)));
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -87,7 +87,8 @@ void apply_block_householder_on_the_left(MatrixType& mat, const VectorsType& vec
|
||||
const TriangularView<const VectorsType, UnitLower> V(vectors);
|
||||
|
||||
// A -= V T V^* A
|
||||
Matrix<typename MatrixType::Scalar,VectorsType::ColsAtCompileTime,MatrixType::ColsAtCompileTime,0,
|
||||
Matrix<typename MatrixType::Scalar,VectorsType::ColsAtCompileTime,MatrixType::ColsAtCompileTime,
|
||||
(VectorsType::MaxColsAtCompileTime==1 && MatrixType::MaxColsAtCompileTime!=1)?RowMajor:ColMajor,
|
||||
VectorsType::MaxColsAtCompileTime,MatrixType::MaxColsAtCompileTime> tmp = V.adjoint() * mat;
|
||||
// FIXME add .noalias() once the triangular product can work inplace
|
||||
if(forward) tmp = T.template triangularView<Upper>() * tmp;
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user