Compare commits

...

53 Commits
3.3.8 ... 3.3

Author SHA1 Message Date
Antonio Sanchez
5e8edd2186 Fix undefined behavior in PPC load.
VSX vec_xl is Causing a bunch of test failures and failing `-fsanitize=undefined` with g++.
Removing the instruction allows tests to pass and eliminates the warning.
2025-03-14 07:58:36 -07:00
Antonio Sanchez
0ac1fc52dd Fix CUDA clang again with new C++11 usages 2025-03-14 07:58:29 -07:00
Antonio Sánchez
6aa0143851 Judge unitary-ness relative to scaling.
(cherry picked from commit c1d637433e)
2025-03-13 15:01:33 -07:00
Antonio Sanchez
c7f6f8315f Update CUDA testing infra to match master branch. 2025-03-13 21:49:24 +00:00
Antonio Sanchez
b0448fc6e0 Fix cxx03 testing job configuration 2025-03-13 11:13:35 -07:00
Antonio Sanchez
3b8644da50 Better rand to fix MSVC random tests 2025-03-13 08:50:26 -07:00
Antonio Sanchez
414c42bfcf Fix cuda clang builds 2025-03-12 21:47:26 -07:00
Antonio Sanchez
952eda443b Fix GPU build failures. 2025-03-09 17:04:41 -07:00
Chip Kerchner
6a4a0b66bd Fix epsilon and dummy_precision values in long double for double doubles. Prevented some algorithms from converging on PPC.
(cherry picked from commit 54459214a1)
2025-03-07 21:19:41 -08:00
Antonio Sanchez
079de53fa5 Adjust tolerance of matrix_power test for MSVC.
(cherry picked from commit 1c2690ed24)
2025-03-07 21:02:39 -08:00
Antonio Sanchez
ce950ca2db Patch PPC PacketMath from 3.4.
This is to fix failing tests for PPC due to UB on loads.
2025-03-07 20:59:30 -08:00
Antonio Sanchez
49bd503308 Fix merge conflict error 2025-03-03 15:21:02 -08:00
Charles Schlosser
5b20d9f326 Fix arm32 float division and related bugs
(cherry picked from commit 81b48065ea)
(cherry picked from commit 72f77ccb3e)
2025-03-03 13:11:37 -08:00
Antonio Sánchez
5f8f69020b Remove poor non-convergence checks in NonLinearOptimization.
(cherry picked from commit d819a33bf6)
(cherry picked from commit b30a2a527e)
2025-03-03 07:56:31 -08:00
Antonio Sánchez
dc9325848a Fix arm32 issues.
(cherry picked from commit a73970a864)
(cherry picked from commit c23abcf25c)
2025-03-03 07:38:59 -08:00
Antonio Sanchez
9df4c76bb8 Fix emulated builds cmake configuration 2025-03-03 07:26:26 -08:00
Antonio Sánchez
0071c2e8a8 Fix more hard-coded magic bounds.
(cherry picked from commit ae5280aa8d)
2025-03-03 07:26:02 -08:00
Antonio Sánchez
03727bdf55 Slightly adjust error bound for nonlinear tests.
(cherry picked from commit 42aa3d17cd)
2025-03-03 07:24:35 -08:00
Antonio Sánchez
5e39ba6642 Fix emulated tests.
(cherry picked from commit 9589cc4e7f)
2025-03-02 16:38:13 -08:00
Antonio Sanchez
d2ce4faa5a Fix cuda 9+ builds
Fix removed `shfl_` intrinsics, disable warnings, update CUDA header inclusion.
2025-03-02 16:21:48 -08:00
Antonio Sanchez
43b7aa2412 Don't check for build type 2025-02-28 22:13:46 -08:00
Antonio Sanchez
23b1682723 Fix cuda device warnings 2025-02-28 22:09:30 -08:00
Antonio Sanchez
c53002f5fb Fix failing tests on arm/ppc 2025-02-28 13:15:33 -08:00
Antonio Sanchez
ea37d9e73e Remove private access of std::deque::_M_impl.
This no longer works on gcc or clang, so we should just remove the hack.
The default should compile to similar code anyways.

(cherry picked from commit 82c0c18a83)
2025-02-28 11:21:13 -08:00
Antonio Sánchez
ece7cec604 Fix parsing of command-line arguments when already specified as a cmake list.
(cherry picked from commit 555cec17ed)
2025-02-26 07:49:23 -08:00
Antonio Sanchez
2e708d48ca Merge CI from 3.4 2025-02-25 21:23:42 -08:00
C. Antonio Sanchez
109935bfce Fix Tensor docs
(cherry picked from commit 42d9cc0b1d)
2025-02-25 21:21:49 -08:00
Antonio Sanchez
339d7188ed Fix up all doxygen warnings. 2025-02-25 21:05:40 -08:00
Jean-Christophe Fillion-Robin
02f420012a [PATCH] cmake: Support source include with add_subdirectory and
find_package use
This commit allows the sources of the project to be included in a parent
project CMakeLists.txt and support use of "find_package(Eigen3 CONFIG REQUIRED)"

Here is an example allowing to test the changes. It is not particularly
useful in itself. This change will allow to support one of the scenario
allowing to create custom 3D Slicer application bundling associated plugins.

/tmp/eigen-git-mirror  # Eigen sources

/tmp/test/CMakeLists.txt:

  cmake_minimum_required(VERSION 3.12)
  project(test)
  add_subdirectory("/tmp/eigen-git-mirror" "eigen-git-mirror")
  find_package(Eigen3 CONFIG REQUIRED)

and configuring it using:

  mkdir /tmp/test-build && cd $_
  cmake \
    -DCMAKE_FIND_PACKAGE_NO_PACKAGE_REGISTRY:BOOL=1 \
    -DEigen3_DIR:PATH=/tmp/test-build/eigen-git-mirror \
    /tmp/test

Co-authored-by: Pablo Hernandez <pablo.hernandez@kitware.com>
---
 CMakeLists.txt              | 1 +
 cmake/Eigen3Config.cmake.in | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)


(cherry picked from commit 2cbd9dd498)
2022-02-13 21:22:14 +00:00
Antonio Sánchez
d45ac54008 Correct use of EIGEN_CUDACC to respect EIGEN_NO_CUDA. 2022-02-04 22:24:31 +00:00
Jakob Struye
d9585478d9 Clearer doc for squaredNorm
(cherry picked from commit 53a29c7e35)
2021-08-18 15:13:11 +00:00
Benoit Steiner
01421e31a2 Added missing EIGEN_DEVICE_FUNC qualifiers
(cherry picked from commit c36bc2d445)
2021-07-20 21:21:53 +00:00
René Wagner
2f81b6363f BooleanRedux.h: Add more EIGEN_DEVICE_FUNC qualifiers.
This enables operator== on Eigen matrices in device code.


(cherry picked from commit 0aebe19aca)
2021-07-20 21:20:54 +00:00
Adam Shapiro
53a7864c48 Fixed sparse conservativeResize() when both num cols and rows decreased.
The previous implementation caused a buffer overflow trying to calculate non-
zero counts for columns that no longer exist.


(cherry picked from commit 2ac0b78739)

(cherry picked from commit f4b67691c42952b44ce7dae62f5c18ed93b53521)
2021-02-23 21:35:46 +00:00
Gael Guennebaud
9fc3d9f3ca Fix some implicit literal to Scalar conversions in SparseCore
(cherry picked from commit afa8d13532)
2021-02-19 18:54:23 +00:00
Antonio Sanchez
84911f9c05 Include <cstdint> in one place, remove custom typedefs
Originating from
[this SO issue](https://stackoverflow.com/questions/65901014/how-to-solve-this-all-error-2-in-this-case),
some win32 compilers define `__int32` as a `long`, but MinGW defines
`std::int32_t` as an `int`, leading to a type conflict.

To avoid this, we remove the custom `typedef` definitions for win32.  The
Tensor module requires C++11 anyways, so we are guaranteed to have
included `<cstdint>` already in `Eigen/Core`.

Also re-arranged the headers to only include `<cstdint>` in one place to
avoid this type of error again.
2021-01-28 11:10:13 -08:00
Rasmus Munk Larsen
77dc6dbb44 Fix bugs in log1p and expm1 where repeated using statements would clobber each other.
Add specializations for complex types since std::log1p and std::exp1m do not support complex.

(cherry picked from commit d55d392e7b)
2021-01-21 11:22:36 +01:00
David Tellenbach
a36d19c4fc Fix a typo in SparseMatrix documentation.
This fixes issue #2091.

(cherry picked from commit 2e8f850c78)
2020-12-09 14:53:09 +01:00
David Tellenbach
0fd6b4f71d Bump to 3.3.9 2020-12-04 22:53:41 +01:00
Florian Maurin
52207cf6f9 Fix typo in doc
(cherry picked from commit c5985c46f5)
2020-11-30 12:09:43 +00:00
Jim Lersch
0c26611d2d Workaround for doxygen class template titles in which the template
part of the class signature is lost due to a problem with forward
declarations.  The problem is probably caused by doxygen bug #7689.
It is confirmed to be fixed in doxygen >= 1.8.19.

(cherry picked from commit 68f69414f7)
2020-11-28 16:21:12 +01:00
Christoph Hertzberg
2a4fcb2c31 Fix doxygen class block that was wrongly named.
Manually cherry-picked from a7170f2aca
2020-11-27 19:41:19 +01:00
Christoph Hertzberg
54930b6b55 Remove unused variable 2020-11-25 17:59:18 +01:00
Martin Vonheim Larsen
4e5385c905 Enable MathJax in Doxygen.in
Note that HTTPS must be used against the MathJax CDN when hosted on `eigen.tuxfamily.org` (which uses HTTPS) in order to avoid `Mixed Content`-errors from browsers. Using HTTPS for MathJax also works if the Eigen docs are hosted on plain HTTP.

(cherry picked from commit 280f4f2407)
2020-11-17 15:39:44 +01:00
Christoph Hertzberg
ac632f663e bug #1746: Removed implementation of standard copy-constructor and standard copy-assign-operator from PermutationMatrix and Transpositions to allow malloc-less std::move. Added unit-test to rvalue_types
(cherry picked from commit efd9867ff0)
2020-11-12 11:54:51 +01:00
Christoph Hertzberg
3620371c5c Bug #2036 make sure find_standard_math_library_test_program actually compiles (and is guaranteed to call math functions)
(cherry picked from commit ecb7bc9514)
2020-11-04 13:38:17 +01:00
David Tellenbach
5dda502f84 Rename test/array.cpp to test/array_cwise.cpp
Having a test named "array" can clash with the standard library header
"array".

Fixes issue #2046
2020-11-04 13:01:17 +01:00
Gael Guennebaud
590aec8fab check two ctors
(cherry picked from commit 572d62697d)
2020-10-28 09:53:23 +01:00
David Tellenbach
75f8b06e50 Mention problems when using potentially throwing scalars and OpenMP
(cherry picked from commit 9022f5aa8a)
2020-10-09 17:41:41 +02:00
Karl Ljungkvist
e91e5d8c87 Fix typo in Tutorial_BlockOperations_block_assignment.cpp
(cherry picked from commit d199c17b14)
2020-10-09 14:19:23 +02:00
Luke Peterson
ef3cc72cb6 Remove error counting in OpenMP parallelize_gemm
This resolves a compilation error associated with
Eigen::eigen_assert_exception. It also eliminates the counting of
exceptions that may occur in the OpenMP parallel section. If an
unhandled exception occurs in this section, the behavior is non-conforming
according to the OpenMP specification.
2020-10-08 18:50:33 -07:00
David Tellenbach
7a0a2a5001 Define coeff-wise binary array operators for base class
This fixes #2012.
2020-10-09 00:53:34 +02:00
szczepaniak bartek
bfdd4a9903 Fix Paradiso.
EIGEN_USING_STD -> EIGEN_USING_STD_MATH
2020-10-08 19:38:35 +00:00
190 changed files with 7167 additions and 4330 deletions

19
.clang-format Normal file
View File

@@ -0,0 +1,19 @@
---
BasedOnStyle: Google
ColumnLimit: 120
---
Language: Cpp
BasedOnStyle: Google
ColumnLimit: 120
StatementMacros:
- EIGEN_STATIC_ASSERT
- EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
- EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
SortIncludes: false
AttributeMacros:
- EIGEN_STRONG_INLINE
- EIGEN_ALWAYS_INLINE
- EIGEN_DEVICE_FUNC
- EIGEN_DONT_INLINE
- EIGEN_DEPRECATED
- EIGEN_UNUSED

34
.gitlab-ci.yml Normal file
View File

@@ -0,0 +1,34 @@
# This file is part of Eigen, a lightweight C++ template library
# for linear algebra.
#
# Copyright (C) 2023, The Eigen Authors
#
# This Source Code Form is subject to the terms of the Mozilla
# Public License v. 2.0. If a copy of the MPL was not distributed
# with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
stages:
- checkformat
- build
- test
- deploy
variables:
# CMake build directory.
EIGEN_CI_BUILDDIR: .build
# Specify the CMake build target.
EIGEN_CI_BUILD_TARGET: ""
# If a test regex is specified, that will be selected.
# Otherwise, we will try a label if specified.
EIGEN_CI_CTEST_REGEX: ""
EIGEN_CI_CTEST_LABEL: ""
EIGEN_CI_CTEST_ARGS: ""
include:
- "/ci/checkformat.gitlab-ci.yml"
- "/ci/common.gitlab-ci.yml"
- "/ci/build.linux.gitlab-ci.yml"
- "/ci/build.windows.gitlab-ci.yml"
- "/ci/test.linux.gitlab-ci.yml"
- "/ci/test.windows.gitlab-ci.yml"
- "/ci/deploy.gitlab-ci.yml"

View File

@@ -1,7 +1,7 @@
project(Eigen3)
cmake_minimum_required(VERSION 2.8.5)
project(Eigen3)
# guard against in-source builds
if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR})
@@ -19,14 +19,6 @@ if (NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "Release")
endif()
string(TOLOWER "${CMAKE_BUILD_TYPE}" cmake_build_type_tolower)
if( NOT cmake_build_type_tolower STREQUAL "debug"
AND NOT cmake_build_type_tolower STREQUAL "release"
AND NOT cmake_build_type_tolower STREQUAL "relwithdebinfo")
message(FATAL_ERROR "Unknown build type \"${CMAKE_BUILD_TYPE}\". Allowed values are Debug, Release, RelWithDebInfo (case-insensitive).")
endif()
#############################################################################
# retrieve version infomation #
#############################################################################
@@ -94,6 +86,20 @@ else()
ei_add_cxx_compiler_flag("-std=c++03")
endif()
function(ei_maybe_separate_arguments variable mode args)
# Use separate_arguments if the input is a single string containing a space.
# Otherwise, if it is already a list or doesn't have a space, just propagate
# the original value. This is to better support multi-argument lists.
list(LENGTH args list_length)
if (${list_length} EQUAL 1)
string(FIND "${args}" " " has_space)
if (${has_space} GREATER -1)
separate_arguments(args ${mode} "${args}")
endif()
endif()
set(${variable} ${args} PARENT_SCOPE)
endfunction(ei_maybe_separate_arguments)
#############################################################################
# find how to link to the standard libraries #
#############################################################################
@@ -104,6 +110,10 @@ find_package(StandardMathLibrary)
set(EIGEN_TEST_CUSTOM_LINKER_FLAGS "" CACHE STRING "Additional linker flags when linking unit tests.")
set(EIGEN_TEST_CUSTOM_CXX_FLAGS "" CACHE STRING "Additional compiler flags when compiling unit tests.")
# Convert space-separated arguments into CMake lists for downstream consumption.
ei_maybe_separate_arguments(EIGEN_TEST_CUSTOM_LINKER_FLAGS NATIVE_COMMAND "${EIGEN_TEST_CUSTOM_LINKER_FLAGS}")
ei_maybe_separate_arguments(EIGEN_TEST_CUSTOM_CXX_FLAGS NATIVE_COMMAND "${EIGEN_TEST_CUSTOM_CXX_FLAGS}")
set(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO "")
if(NOT STANDARD_MATH_LIBRARY_FOUND)
@@ -158,7 +168,7 @@ if(NOT MSVC)
ei_add_cxx_compiler_flag("-Wall")
ei_add_cxx_compiler_flag("-Wextra")
#ei_add_cxx_compiler_flag("-Weverything") # clang
ei_add_cxx_compiler_flag("-Wundef")
ei_add_cxx_compiler_flag("-Wcast-align")
ei_add_cxx_compiler_flag("-Wchar-subscripts")
@@ -173,29 +183,29 @@ if(NOT MSVC)
ei_add_cxx_compiler_flag("-Wc++11-extensions")
ei_add_cxx_compiler_flag("-Wdouble-promotion")
# ei_add_cxx_compiler_flag("-Wconversion")
# -Wshadow is insanely too strict with gcc, hopefully it will become usable with gcc 6
# if(NOT CMAKE_COMPILER_IS_GNUCXX OR (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "5.0.0"))
if(NOT CMAKE_COMPILER_IS_GNUCXX)
ei_add_cxx_compiler_flag("-Wshadow")
endif()
ei_add_cxx_compiler_flag("-Wno-psabi")
ei_add_cxx_compiler_flag("-Wno-variadic-macros")
ei_add_cxx_compiler_flag("-Wno-long-long")
ei_add_cxx_compiler_flag("-fno-check-new")
ei_add_cxx_compiler_flag("-fno-common")
ei_add_cxx_compiler_flag("-fstrict-aliasing")
ei_add_cxx_compiler_flag("-wd981") # disable ICC's "operands are evaluated in unspecified order" remark
ei_add_cxx_compiler_flag("-wd2304") # disable ICC's "warning #2304: non-explicit constructor with single argument may cause implicit type conversion" produced by -Wnon-virtual-dtor
# The -ansi flag must be added last, otherwise it is also used as a linker flag by check_cxx_compiler_flag making it fails
# Moreover we should not set both -strict-ansi and -ansi
check_cxx_compiler_flag("-strict-ansi" COMPILER_SUPPORT_STRICTANSI)
ei_add_cxx_compiler_flag("-Qunused-arguments") # disable clang warning: argument unused during compilation: '-ansi'
if(COMPILER_SUPPORT_STRICTANSI)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -strict-ansi")
else()
@@ -206,7 +216,7 @@ if(NOT MSVC)
ei_add_cxx_compiler_flag("-pie")
ei_add_cxx_compiler_flag("-fPIE")
endif()
set(CMAKE_REQUIRED_FLAGS "")
option(EIGEN_TEST_SSE2 "Enable/Disable SSE2 in tests/examples" OFF)
@@ -380,6 +390,7 @@ if(EIGEN_TEST_NO_EXCEPTIONS)
message(STATUS "Disabling exceptions in tests/examples")
endif()
set(EIGEN_CUDA_CXX_FLAGS "" CACHE STRING "Additional flags to pass to the cuda compiler.")
set(EIGEN_CUDA_COMPUTE_ARCH 30 CACHE STRING "The CUDA compute architecture level to target when compiling CUDA code")
include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
@@ -451,13 +462,15 @@ if(BUILD_TESTING)
endif()
endif()
if(EIGEN_LEAVE_TEST_IN_ALL_TARGET)
add_subdirectory(blas)
add_subdirectory(lapack)
else()
add_subdirectory(blas EXCLUDE_FROM_ALL)
add_subdirectory(lapack EXCLUDE_FROM_ALL)
endif()
if (NOT CMAKE_CROSSCOMPILING)
if(EIGEN_LEAVE_TEST_IN_ALL_TARGET)
add_subdirectory(blas)
add_subdirectory(lapack)
else()
add_subdirectory(blas EXCLUDE_FROM_ALL)
add_subdirectory(lapack EXCLUDE_FROM_ALL)
endif()
endif(NOT CMAKE_CROSSCOMPILING)
# add SYCL
option(EIGEN_TEST_SYCL "Add Sycl support." OFF)
@@ -540,6 +553,7 @@ if (NOT CMAKE_VERSION VERSION_LESS 3.0)
# Imported target support
add_library (eigen INTERFACE)
add_library (Eigen3::Eigen ALIAS eigen)
target_compile_definitions (eigen INTERFACE ${EIGEN_DEFINITIONS})
target_include_directories (eigen INTERFACE
@@ -581,11 +595,11 @@ if (NOT CMAKE_VERSION VERSION_LESS 3.0)
else (NOT CMAKE_VERSION VERSION_LESS 3.0)
# Fallback to legacy Eigen3Config.cmake without the imported target
# If CMakePackageConfigHelpers module is available (CMake >= 2.8.8)
# create a relocatable Config file, otherwise leave the hardcoded paths
# create a relocatable Config file, otherwise leave the hardcoded paths
include(CMakePackageConfigHelpers OPTIONAL RESULT_VARIABLE CPCH_PATH)
if(CPCH_PATH)
configure_package_config_file (
${CMAKE_CURRENT_SOURCE_DIR}/cmake/Eigen3ConfigLegacy.cmake.in
@@ -594,7 +608,7 @@ else (NOT CMAKE_VERSION VERSION_LESS 3.0)
INSTALL_DESTINATION ${CMAKEPACKAGE_INSTALL_DIR}
NO_CHECK_REQUIRED_COMPONENTS_MACRO # Eigen does not provide components
)
else()
else()
# The PACKAGE_* variables are defined by the configure_package_config_file
# but without it we define them manually to the hardcoded paths
set(PACKAGE_INIT "")

View File

@@ -31,7 +31,7 @@
#endif
// Handle NVCC/CUDA/SYCL
#if defined(__CUDACC__) || defined(__SYCL_DEVICE_ONLY__)
#if defined(EIGEN_CUDACC) || defined(__SYCL_DEVICE_ONLY__)
// Do not try asserts on CUDA and SYCL!
#ifndef EIGEN_NO_DEBUG
#define EIGEN_NO_DEBUG
@@ -46,7 +46,7 @@
#endif
// All functions callable from CUDA code must be qualified with __device__
#ifdef __CUDACC__
#ifdef EIGEN_CUDACC
// Do not try to vectorize on CUDA and SYCL!
#ifndef EIGEN_DONT_VECTORIZE
#define EIGEN_DONT_VECTORIZE
@@ -62,9 +62,16 @@
#else
#define EIGEN_DEVICE_FUNC
#endif
#if defined(EIGEN_CUDACC)
#include <cuda.h>
#define EIGEN_CUDA_SDK_VER (CUDA_VERSION * 10)
#else
#define EIGEN_CUDA_SDK_VER 0
#endif
// When compiling CUDA device code with NVCC, pull in math functions from the
// global namespace. In host mode, and when device doee with clang, use the
// std versions.
@@ -123,7 +130,7 @@
#endif
#endif
#ifndef EIGEN_DONT_VECTORIZE
#if !defined(EIGEN_DONT_VECTORIZE) && !defined(EIGEN_CUDACC)
#if defined (EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)
@@ -213,6 +220,7 @@
} // end extern "C"
#elif defined __VSX__
#define EIGEN_VECTORIZE
#define EIGEN_VECTORIZE_FMA
#define EIGEN_VECTORIZE_VSX
#include <altivec.h>
// We need to #undef all these ugly tokens defined in <altivec.h>
@@ -222,6 +230,7 @@
#undef pixel
#elif defined __ALTIVEC__
#define EIGEN_VECTORIZE
#define EIGEN_VECTORIZE_FMA
#define EIGEN_VECTORIZE_ALTIVEC
#include <altivec.h>
// We need to #undef all these ugly tokens defined in <altivec.h>
@@ -233,6 +242,10 @@
#define EIGEN_VECTORIZE
#define EIGEN_VECTORIZE_NEON
#include <arm_neon.h>
// Enable FMA for ARM.
#if defined(__ARM_FEATURE_FMA)
#define EIGEN_VECTORIZE_FMA
#endif
#elif (defined __s390x__ && defined __VEC__)
#define EIGEN_VECTORIZE
#define EIGEN_VECTORIZE_ZVECTOR
@@ -245,16 +258,16 @@
#define EIGEN_HAS_FP16_C
#endif
#if defined __CUDACC__
#if defined EIGEN_CUDACC
#define EIGEN_VECTORIZE_CUDA
#include <vector_types.h>
#if EIGEN_CUDACC_VER >= 70500
#if EIGEN_CUDA_SDK_VER >= 70500
#define EIGEN_HAS_CUDA_FP16
#endif
#endif
#if defined EIGEN_HAS_CUDA_FP16
#include <host_defines.h>
#include <cuda_runtime_api.h>
#include <cuda_fp16.h>
#endif

View File

@@ -44,7 +44,7 @@ namespace internal {
* decomposition to determine whether a system of equations has a solution.
*
* This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism.
*
*
* \sa MatrixBase::ldlt(), SelfAdjointView::ldlt(), class LLT
*/
template<typename _MatrixType, int _UpLo> class LDLT
@@ -558,7 +558,7 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Deri
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename _MatrixType, int _UpLo>
template<typename RhsType, typename DstType>
void LDLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const
EIGEN_DEVICE_FUNC void LDLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const
{
eigen_assert(rhs.rows() == rows());
// dst = P b

View File

@@ -475,7 +475,7 @@ LLT<_MatrixType,_UpLo> LLT<_MatrixType,_UpLo>::rankUpdate(const VectorType& v, c
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename _MatrixType,int _UpLo>
template<typename RhsType, typename DstType>
void LLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const
EIGEN_DEVICE_FUNC void LLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const
{
dst = rhs;
solveInPlace(dst);

View File

@@ -16,7 +16,7 @@ namespace Eigen {
template<typename Derived>
template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>
::lazyAssign(const DenseBase<OtherDerived>& other)
{
enum{
@@ -29,7 +29,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>
eigen_assert(rows() == other.rows() && cols() == other.cols());
internal::call_assignment_no_alias(derived(),other.derived());
return derived();
}

View File

@@ -17,7 +17,7 @@ namespace Eigen {
// This implementation is based on Assign.h
namespace internal {
/***************************************************************************
* Part 1 : the logic deciding a strategy for traversal and unrolling *
***************************************************************************/
@@ -29,12 +29,12 @@ struct copy_using_evaluator_traits
{
typedef typename DstEvaluator::XprType Dst;
typedef typename Dst::Scalar DstScalar;
enum {
DstFlags = DstEvaluator::Flags,
SrcFlags = SrcEvaluator::Flags
};
public:
enum {
DstAlignment = DstEvaluator::Alignment,
@@ -135,7 +135,7 @@ public:
? int(CompleteUnrolling)
: int(NoUnrolling) )
: int(Traversal) == int(LinearTraversal)
? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling)
? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling)
: int(NoUnrolling) )
#if EIGEN_UNALIGNED_VECTORIZE
: int(Traversal) == int(SliceVectorizedTraversal)
@@ -195,7 +195,7 @@ struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
// FIXME: this is not very clean, perhaps this information should be provided by the kernel?
typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
typedef typename DstEvaluatorType::XprType DstXprType;
enum {
outer = Index / DstXprType::InnerSizeAtCompileTime,
inner = Index % DstXprType::InnerSizeAtCompileTime
@@ -261,7 +261,7 @@ struct copy_using_evaluator_innervec_CompleteUnrolling
typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
typedef typename DstEvaluatorType::XprType DstXprType;
typedef typename Kernel::PacketType PacketType;
enum {
outer = Index / DstXprType::InnerSizeAtCompileTime,
inner = Index % DstXprType::InnerSizeAtCompileTime,
@@ -426,7 +426,7 @@ struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrollin
{
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
typedef typename Kernel::PacketType PacketType;
enum { size = DstXprType::SizeAtCompileTime,
packetSize =unpacket_traits<PacketType>::size,
alignedSize = (size/packetSize)*packetSize };
@@ -599,14 +599,14 @@ protected:
typedef typename DstEvaluatorTypeT::XprType DstXprType;
typedef typename SrcEvaluatorTypeT::XprType SrcXprType;
public:
typedef DstEvaluatorTypeT DstEvaluatorType;
typedef SrcEvaluatorTypeT SrcEvaluatorType;
typedef typename DstEvaluatorType::Scalar Scalar;
typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor> AssignmentTraits;
typedef typename AssignmentTraits::PacketType PacketType;
EIGEN_DEVICE_FUNC generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
: m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr)
{
@@ -614,58 +614,58 @@ public:
AssignmentTraits::debug();
#endif
}
EIGEN_DEVICE_FUNC Index size() const { return m_dstExpr.size(); }
EIGEN_DEVICE_FUNC Index innerSize() const { return m_dstExpr.innerSize(); }
EIGEN_DEVICE_FUNC Index outerSize() const { return m_dstExpr.outerSize(); }
EIGEN_DEVICE_FUNC Index rows() const { return m_dstExpr.rows(); }
EIGEN_DEVICE_FUNC Index cols() const { return m_dstExpr.cols(); }
EIGEN_DEVICE_FUNC Index outerStride() const { return m_dstExpr.outerStride(); }
EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() { return m_dst; }
EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const { return m_src; }
/// Assign src(row,col) to dst(row,col) through the assignment functor.
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col)
{
m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col));
}
/// \sa assignCoeff(Index,Index)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index)
{
m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index));
}
/// \sa assignCoeff(Index,Index)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeffByOuterInner(Index outer, Index inner)
{
Index row = rowIndexByOuterInner(outer, inner);
Index col = colIndexByOuterInner(outer, inner);
Index row = rowIndexByOuterInner(outer, inner);
Index col = colIndexByOuterInner(outer, inner);
assignCoeff(row, col);
}
template<int StoreMode, int LoadMode, typename PacketType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index row, Index col)
{
m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row,col), m_src.template packet<LoadMode,PacketType>(row,col));
}
template<int StoreMode, int LoadMode, typename PacketType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index index)
{
m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode,PacketType>(index));
}
template<int StoreMode, int LoadMode, typename PacketType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner)
{
Index row = rowIndexByOuterInner(outer, inner);
Index row = rowIndexByOuterInner(outer, inner);
Index col = colIndexByOuterInner(outer, inner);
assignPacket<StoreMode,LoadMode,PacketType>(row, col);
}
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner)
{
typedef typename DstEvaluatorType::ExpressionTraits Traits;
@@ -688,7 +688,7 @@ public:
{
return m_dstExpr.data();
}
protected:
DstEvaluatorType& m_dst;
const SrcEvaluatorType& m_src;
@@ -734,7 +734,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType
resize_if_allowed(dst, src, func);
DstEvaluatorType dstEvaluator(dst);
typedef generic_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;
Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
@@ -762,7 +762,7 @@ struct EigenBase2EigenBase {};
template<typename,typename> struct AssignmentKind { typedef EigenBase2EigenBase Kind; };
template<> struct AssignmentKind<DenseShape,DenseShape> { typedef Dense2Dense Kind; };
// This is the main assignment class
template< typename DstXprType, typename SrcXprType, typename Functor,
typename Kind = typename AssignmentKind< typename evaluator_traits<DstXprType>::Shape , typename evaluator_traits<SrcXprType>::Shape >::Kind,
@@ -787,7 +787,7 @@ void call_assignment(const Dst& dst, const Src& src)
{
call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
}
// Deal with "assume-aliasing"
template<typename Dst, typename Src, typename Func>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@@ -827,12 +827,12 @@ void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst>::type ActualDstTypeCleaned;
typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst&>::type ActualDstType;
ActualDstType actualDst(dst);
// TODO check whether this is the right place to perform these checks:
EIGEN_STATIC_ASSERT_LVALUE(Dst)
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src)
EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar);
Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func);
}
template<typename Dst, typename Src>
@@ -869,13 +869,12 @@ template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, con
template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Weak>
{
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
{
#ifndef EIGEN_NO_DEBUG
internal::check_for_aliasing(dst, src);
#endif
call_dense_assignment_loop(dst, src, func);
}
};
@@ -887,8 +886,7 @@ struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Weak>
template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Weak>
{
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
{
Index dstRows = src.rows();
Index dstCols = src.cols();

View File

@@ -23,7 +23,7 @@ struct all_unroller
row = (UnrollCount-1) % Traits::RowsAtCompileTime
};
static inline bool run(const Derived &mat)
EIGEN_DEVICE_FUNC static inline bool run(const Derived &mat)
{
return all_unroller<Derived, UnrollCount-1>::run(mat) && mat.coeff(row, col);
}
@@ -32,13 +32,13 @@ struct all_unroller
template<typename Derived>
struct all_unroller<Derived, 0>
{
static inline bool run(const Derived &/*mat*/) { return true; }
EIGEN_DEVICE_FUNC static inline bool run(const Derived &/*mat*/) { return true; }
};
template<typename Derived>
struct all_unroller<Derived, Dynamic>
{
static inline bool run(const Derived &) { return false; }
EIGEN_DEVICE_FUNC static inline bool run(const Derived &) { return false; }
};
template<typename Derived, int UnrollCount>
@@ -50,7 +50,7 @@ struct any_unroller
row = (UnrollCount-1) % Traits::RowsAtCompileTime
};
static inline bool run(const Derived &mat)
EIGEN_DEVICE_FUNC static inline bool run(const Derived &mat)
{
return any_unroller<Derived, UnrollCount-1>::run(mat) || mat.coeff(row, col);
}
@@ -59,13 +59,13 @@ struct any_unroller
template<typename Derived>
struct any_unroller<Derived, 0>
{
static inline bool run(const Derived & /*mat*/) { return false; }
EIGEN_DEVICE_FUNC static inline bool run(const Derived & /*mat*/) { return false; }
};
template<typename Derived>
struct any_unroller<Derived, Dynamic>
{
static inline bool run(const Derived &) { return false; }
EIGEN_DEVICE_FUNC static inline bool run(const Derived &) { return false; }
};
} // end namespace internal
@@ -78,7 +78,7 @@ struct any_unroller<Derived, Dynamic>
* \sa any(), Cwise::operator<()
*/
template<typename Derived>
inline bool DenseBase<Derived>::all() const
EIGEN_DEVICE_FUNC inline bool DenseBase<Derived>::all() const
{
typedef internal::evaluator<Derived> Evaluator;
enum {
@@ -102,7 +102,7 @@ inline bool DenseBase<Derived>::all() const
* \sa all()
*/
template<typename Derived>
inline bool DenseBase<Derived>::any() const
EIGEN_DEVICE_FUNC inline bool DenseBase<Derived>::any() const
{
typedef internal::evaluator<Derived> Evaluator;
enum {
@@ -126,7 +126,7 @@ inline bool DenseBase<Derived>::any() const
* \sa all(), any()
*/
template<typename Derived>
inline Eigen::Index DenseBase<Derived>::count() const
EIGEN_DEVICE_FUNC inline Eigen::Index DenseBase<Derived>::count() const
{
return derived().template cast<bool>().template cast<Index>().sum();
}

View File

@@ -11,7 +11,7 @@
#ifndef EIGEN_COMMAINITIALIZER_H
#define EIGEN_COMMAINITIALIZER_H
namespace Eigen {
namespace Eigen {
/** \class CommaInitializer
* \ingroup Core_Module
@@ -44,7 +44,7 @@ struct CommaInitializer
m_xpr.block(0, 0, other.rows(), other.cols()) = other;
}
/* Copy/Move constructor which transfers ownership. This is crucial in
/* Copy/Move constructor which transfers ownership. This is crucial in
* absence of return value optimization to avoid assertions during destruction. */
// FIXME in C++11 mode this could be replaced by a proper RValue constructor
EIGEN_DEVICE_FUNC
@@ -135,13 +135,13 @@ struct CommaInitializer
*
* Example: \include MatrixBase_set.cpp
* Output: \verbinclude MatrixBase_set.out
*
*
* \note According the c++ standard, the argument expressions of this comma initializer are evaluated in arbitrary order.
*
* \sa CommaInitializer::finished(), class CommaInitializer
*/
template<typename Derived>
inline CommaInitializer<Derived> DenseBase<Derived>::operator<< (const Scalar& s)
EIGEN_DEVICE_FUNC inline CommaInitializer<Derived> DenseBase<Derived>::operator<< (const Scalar& s)
{
return CommaInitializer<Derived>(*static_cast<Derived*>(this), s);
}
@@ -149,7 +149,7 @@ inline CommaInitializer<Derived> DenseBase<Derived>::operator<< (const Scalar& s
/** \sa operator<<(const Scalar&) */
template<typename Derived>
template<typename OtherDerived>
inline CommaInitializer<Derived>
EIGEN_DEVICE_FUNC inline CommaInitializer<Derived>
DenseBase<Derived>::operator<<(const DenseBase<OtherDerived>& other)
{
return CommaInitializer<Derived>(*static_cast<Derived *>(this), other);

View File

@@ -74,7 +74,7 @@ class CwiseBinaryOpImpl;
* \sa MatrixBase::binaryExpr(const MatrixBase<OtherDerived> &,const CustomBinaryOp &) const, class CwiseUnaryOp, class CwiseNullaryOp
*/
template<typename BinaryOp, typename LhsType, typename RhsType>
class CwiseBinaryOp :
class CwiseBinaryOp :
public CwiseBinaryOpImpl<
BinaryOp, LhsType, RhsType,
typename internal::cwise_promote_storage_type<typename internal::traits<LhsType>::StorageKind,
@@ -83,7 +83,7 @@ class CwiseBinaryOp :
internal::no_assignment_operator
{
public:
typedef typename internal::remove_all<BinaryOp>::type Functor;
typedef typename internal::remove_all<LhsType>::type Lhs;
typedef typename internal::remove_all<RhsType>::type Rhs;
@@ -158,7 +158,7 @@ public:
*/
template<typename Derived>
template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived &
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
MatrixBase<Derived>::operator-=(const MatrixBase<OtherDerived> &other)
{
call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
@@ -171,7 +171,7 @@ MatrixBase<Derived>::operator-=(const MatrixBase<OtherDerived> &other)
*/
template<typename Derived>
template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived &
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
MatrixBase<Derived>::operator+=(const MatrixBase<OtherDerived>& other)
{
call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());

View File

@@ -126,12 +126,12 @@ DenseBase<Derived>::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& f
*
* Here is an example with C++11 random generators: \include random_cpp11.cpp
* Output: \verbinclude random_cpp11.out
*
*
* \sa class CwiseNullaryOp
*/
template<typename Derived>
template<typename CustomNullaryOp>
EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
DenseBase<Derived>::NullaryExpr(Index size, const CustomNullaryOp& func)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
@@ -170,7 +170,7 @@ DenseBase<Derived>::NullaryExpr(const CustomNullaryOp& func)
* \sa class CwiseNullaryOp
*/
template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
DenseBase<Derived>::Constant(Index rows, Index cols, const Scalar& value)
{
return DenseBase<Derived>::NullaryExpr(rows, cols, internal::scalar_constant_op<Scalar>(value));
@@ -272,7 +272,7 @@ DenseBase<Derived>::LinSpaced(Index size, const Scalar& low, const Scalar& high)
}
/**
* \copydoc DenseBase::LinSpaced(Index, const Scalar&, const Scalar&)
* \copydoc DenseBase::LinSpaced(Index, const DenseBase::Scalar&, const DenseBase::Scalar&)
* Special version for fixed size types which does not require the size parameter.
*/
template<typename Derived>

View File

@@ -11,7 +11,7 @@
#ifndef EIGEN_DIAGONAL_H
#define EIGEN_DIAGONAL_H
namespace Eigen {
namespace Eigen {
/** \class Diagonal
* \ingroup Core_Module
@@ -149,8 +149,8 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
}
EIGEN_DEVICE_FUNC
inline const typename internal::remove_all<typename MatrixType::Nested>::type&
nestedExpression() const
inline const typename internal::remove_all<typename MatrixType::Nested>::type&
nestedExpression() const
{
return m_matrix;
}
@@ -187,7 +187,7 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
*
* \sa class Diagonal */
template<typename Derived>
inline typename MatrixBase<Derived>::DiagonalReturnType
EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::DiagonalReturnType
MatrixBase<Derived>::diagonal()
{
return DiagonalReturnType(derived());
@@ -195,7 +195,7 @@ MatrixBase<Derived>::diagonal()
/** This is the const version of diagonal(). */
template<typename Derived>
inline typename MatrixBase<Derived>::ConstDiagonalReturnType
EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::ConstDiagonalReturnType
MatrixBase<Derived>::diagonal() const
{
return ConstDiagonalReturnType(derived());
@@ -213,7 +213,7 @@ MatrixBase<Derived>::diagonal() const
*
* \sa MatrixBase::diagonal(), class Diagonal */
template<typename Derived>
inline typename MatrixBase<Derived>::DiagonalDynamicIndexReturnType
EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::DiagonalDynamicIndexReturnType
MatrixBase<Derived>::diagonal(Index index)
{
return DiagonalDynamicIndexReturnType(derived(), index);
@@ -221,7 +221,7 @@ MatrixBase<Derived>::diagonal(Index index)
/** This is the const version of diagonal(Index). */
template<typename Derived>
inline typename MatrixBase<Derived>::ConstDiagonalDynamicIndexReturnType
EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::ConstDiagonalDynamicIndexReturnType
MatrixBase<Derived>::diagonal(Index index) const
{
return ConstDiagonalDynamicIndexReturnType(derived(), index);
@@ -240,7 +240,7 @@ MatrixBase<Derived>::diagonal(Index index) const
* \sa MatrixBase::diagonal(), class Diagonal */
template<typename Derived>
template<int Index_>
inline typename MatrixBase<Derived>::template DiagonalIndexReturnType<Index_>::Type
EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::template DiagonalIndexReturnType<Index_>::Type
MatrixBase<Derived>::diagonal()
{
return typename DiagonalIndexReturnType<Index_>::Type(derived());
@@ -249,7 +249,7 @@ MatrixBase<Derived>::diagonal()
/** This is the const version of diagonal<int>(). */
template<typename Derived>
template<int Index_>
inline typename MatrixBase<Derived>::template ConstDiagonalIndexReturnType<Index_>::Type
EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::template ConstDiagonalIndexReturnType<Index_>::Type
MatrixBase<Derived>::diagonal() const
{
return typename ConstDiagonalIndexReturnType<Index_>::Type(derived());

View File

@@ -11,7 +11,7 @@
#ifndef EIGEN_DIAGONALMATRIX_H
#define EIGEN_DIAGONALMATRIX_H
namespace Eigen {
namespace Eigen {
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename Derived>
@@ -44,7 +44,7 @@ class DiagonalBase : public EigenBase<Derived>
EIGEN_DEVICE_FUNC
DenseMatrixType toDenseMatrix() const { return derived(); }
EIGEN_DEVICE_FUNC
inline const DiagonalVectorType& diagonal() const { return derived().diagonal(); }
EIGEN_DEVICE_FUNC
@@ -70,7 +70,7 @@ class DiagonalBase : public EigenBase<Derived>
{
return InverseReturnType(diagonal().cwiseInverse());
}
EIGEN_DEVICE_FUNC
inline const DiagonalWrapper<const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DiagonalVectorType,Scalar,product) >
operator*(const Scalar& scalar) const
@@ -273,7 +273,7 @@ class DiagonalWrapper
* \sa class DiagonalWrapper, class DiagonalMatrix, diagonal(), isDiagonal()
**/
template<typename Derived>
inline const DiagonalWrapper<const Derived>
EIGEN_DEVICE_FUNC inline const DiagonalWrapper<const Derived>
MatrixBase<Derived>::asDiagonal() const
{
return DiagonalWrapper<const Derived>(derived());
@@ -318,20 +318,20 @@ template<> struct AssignmentKind<DenseShape,DiagonalShape> { typedef Diagonal2De
template< typename DstXprType, typename SrcXprType, typename Functor>
struct Assignment<DstXprType, SrcXprType, Functor, Diagonal2Dense>
{
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
static EIGEN_DEVICE_FUNC void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
{
Index dstRows = src.rows();
Index dstCols = src.cols();
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
dst.resize(dstRows, dstCols);
dst.setZero();
dst.diagonal() = src.diagonal();
}
static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
{ dst.diagonal() += src.diagonal(); }
static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
{ dst.diagonal() -= src.diagonal(); }
};

View File

@@ -17,7 +17,7 @@ namespace Eigen {
*/
template<typename Derived>
template<typename DiagonalDerived>
inline const Product<Derived, DiagonalDerived, LazyProduct>
EIGEN_DEVICE_FUNC inline const Product<Derived, DiagonalDerived, LazyProduct>
MatrixBase<Derived>::operator*(const DiagonalBase<DiagonalDerived> &a_diagonal) const
{
return Product<Derived, DiagonalDerived, LazyProduct>(derived(),a_diagonal.derived());

View File

@@ -10,7 +10,7 @@
#ifndef EIGEN_DOT_H
#define EIGEN_DOT_H
namespace Eigen {
namespace Eigen {
namespace internal {
@@ -78,7 +78,7 @@ MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
typedef internal::scalar_conj_product_op<Scalar,typename OtherDerived::Scalar> func;
EIGEN_CHECK_BINARY_COMPATIBILIY(func,Scalar,typename OtherDerived::Scalar);
#endif
eigen_assert(size() == other.size());
return internal::dot_nocheck<Derived,OtherDerived>::run(*this, other);
@@ -86,14 +86,14 @@ MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
//---------- implementation of L2 norm and related functions ----------
/** \returns, for vectors, the squared \em l2 norm of \c *this, and for matrices the Frobenius norm.
/** \returns, for vectors, the squared \em l2 norm of \c *this, and for matrices the squared Frobenius norm.
* In both cases, it consists in the sum of the square of all the matrix entries.
* For vectors, this is also equals to the dot product of \c *this with itself.
*
* \sa dot(), norm(), lpNorm()
*/
template<typename Derived>
EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::squaredNorm() const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::squaredNorm() const
{
return numext::real((*this).cwiseAbs2().sum());
}
@@ -105,7 +105,7 @@ EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scala
* \sa lpNorm(), dot(), squaredNorm()
*/
template<typename Derived>
EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const
{
return numext::sqrt(squaredNorm());
}
@@ -120,7 +120,7 @@ EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scala
* \sa norm(), normalize()
*/
template<typename Derived>
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::PlainObject
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::PlainObject
MatrixBase<Derived>::normalized() const
{
typedef typename internal::nested_eval<Derived,2>::type _Nested;
@@ -142,7 +142,7 @@ MatrixBase<Derived>::normalized() const
* \sa norm(), normalized()
*/
template<typename Derived>
EIGEN_STRONG_INLINE void MatrixBase<Derived>::normalize()
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void MatrixBase<Derived>::normalize()
{
RealScalar z = squaredNorm();
// NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU
@@ -163,7 +163,7 @@ EIGEN_STRONG_INLINE void MatrixBase<Derived>::normalize()
* \sa stableNorm(), stableNormalize(), normalized()
*/
template<typename Derived>
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::PlainObject
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::PlainObject
MatrixBase<Derived>::stableNormalized() const
{
typedef typename internal::nested_eval<Derived,3>::type _Nested;
@@ -188,7 +188,7 @@ MatrixBase<Derived>::stableNormalized() const
* \sa stableNorm(), stableNormalized(), normalize()
*/
template<typename Derived>
EIGEN_STRONG_INLINE void MatrixBase<Derived>::stableNormalize()
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void MatrixBase<Derived>::stableNormalize()
{
RealScalar w = cwiseAbs().maxCoeff();
RealScalar z = (derived()/w).squaredNorm();
@@ -260,9 +260,9 @@ struct lpNorm_selector<Derived, Infinity>
template<typename Derived>
template<int p>
#ifndef EIGEN_PARSED_BY_DOXYGEN
inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
EIGEN_DEVICE_FUNC inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
#else
MatrixBase<Derived>::RealScalar
EIGEN_DEVICE_FUNC MatrixBase<Derived>::RealScalar
#endif
MatrixBase<Derived>::lpNorm() const
{

View File

@@ -11,7 +11,7 @@
#ifndef EIGEN_FUZZY_H
#define EIGEN_FUZZY_H
namespace Eigen {
namespace Eigen {
namespace internal
{
@@ -100,7 +100,7 @@ struct isMuchSmallerThan_scalar_selector<Derived, true>
*/
template<typename Derived>
template<typename OtherDerived>
bool DenseBase<Derived>::isApprox(
EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isApprox(
const DenseBase<OtherDerived>& other,
const RealScalar& prec
) const
@@ -122,7 +122,7 @@ bool DenseBase<Derived>::isApprox(
* \sa isApprox(), isMuchSmallerThan(const DenseBase<OtherDerived>&, RealScalar) const
*/
template<typename Derived>
bool DenseBase<Derived>::isMuchSmallerThan(
EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isMuchSmallerThan(
const typename NumTraits<Scalar>::Real& other,
const RealScalar& prec
) const
@@ -142,7 +142,7 @@ bool DenseBase<Derived>::isMuchSmallerThan(
*/
template<typename Derived>
template<typename OtherDerived>
bool DenseBase<Derived>::isMuchSmallerThan(
EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isMuchSmallerThan(
const DenseBase<OtherDerived>& other,
const RealScalar& prec
) const

View File

@@ -207,12 +207,12 @@ template<> struct gemv_dense_selector<OnTheRight,ColMajor,true>
typedef typename Rhs::Scalar RhsScalar;
typedef typename Dest::Scalar ResScalar;
typedef typename Dest::RealScalar RealScalar;
typedef internal::blas_traits<Lhs> LhsBlasTraits;
typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
typedef internal::blas_traits<Rhs> RhsBlasTraits;
typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
typedef Map<Matrix<ResScalar,Dynamic,1>, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits<ResScalar>::size)> MappedDest;
ActualLhsType actualLhs = LhsBlasTraits::extract(lhs);
@@ -300,7 +300,7 @@ template<> struct gemv_dense_selector<OnTheRight,RowMajor,true>
typedef typename Lhs::Scalar LhsScalar;
typedef typename Rhs::Scalar RhsScalar;
typedef typename Dest::Scalar ResScalar;
typedef internal::blas_traits<Lhs> LhsBlasTraits;
typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
typedef internal::blas_traits<Rhs> RhsBlasTraits;
@@ -386,7 +386,7 @@ template<> struct gemv_dense_selector<OnTheRight,RowMajor,false>
*/
template<typename Derived>
template<typename OtherDerived>
inline const Product<Derived, OtherDerived>
EIGEN_DEVICE_FUNC inline const Product<Derived, OtherDerived>
MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
{
// A note regarding the function declaration: In MSVC, this function will sometimes
@@ -428,7 +428,7 @@ MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
*/
template<typename Derived>
template<typename OtherDerived>
const Product<Derived,OtherDerived,LazyProduct>
EIGEN_DEVICE_FUNC const Product<Derived,OtherDerived,LazyProduct>
MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const
{
enum {

View File

@@ -237,7 +237,7 @@ ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; }
* For instance, for a packet of 8 elements, 2 scalars will be read from \a *from and
* replicated to form: {from[0],from[0],from[0],from[0],from[1],from[1],from[1],from[1]}
* Currently, this function is only used in matrix products.
* For packet-size smaller or equal to 4, this function is equivalent to pload1
* For packet-size smaller or equal to 4, this function is equivalent to pload1
*/
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
ploadquad(const typename unpacket_traits<Packet>::type* from)
@@ -299,7 +299,7 @@ template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu
template<typename Scalar> EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr)
{
#ifdef __CUDA_ARCH__
#if defined(__LP64__)
#if defined(__LP64__) || EIGEN_OS_WIN64
// 64-bit pointer operand constraint for inlined asm
asm(" prefetch.L1 [ %1 ];" : "=l"(addr) : "l"(addr));
#else
@@ -359,77 +359,77 @@ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet
***************************/
/** \internal \returns the sine of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet psin(const Packet& a) { using std::sin; return sin(a); }
/** \internal \returns the cosine of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet pcos(const Packet& a) { using std::cos; return cos(a); }
/** \internal \returns the tan of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet ptan(const Packet& a) { using std::tan; return tan(a); }
/** \internal \returns the arc sine of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet pasin(const Packet& a) { using std::asin; return asin(a); }
/** \internal \returns the arc cosine of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet pacos(const Packet& a) { using std::acos; return acos(a); }
/** \internal \returns the arc tangent of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet patan(const Packet& a) { using std::atan; return atan(a); }
/** \internal \returns the hyperbolic sine of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet psinh(const Packet& a) { using std::sinh; return sinh(a); }
/** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet pcosh(const Packet& a) { using std::cosh; return cosh(a); }
/** \internal \returns the hyperbolic tan of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet ptanh(const Packet& a) { using std::tanh; return tanh(a); }
/** \internal \returns the exp of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet pexp(const Packet& a) { using std::exp; return exp(a); }
/** \internal \returns the log of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet plog(const Packet& a) { using std::log; return log(a); }
/** \internal \returns the log1p of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet plog1p(const Packet& a) { return numext::log1p(a); }
/** \internal \returns the log10 of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet plog10(const Packet& a) { using std::log10; return log10(a); }
/** \internal \returns the square-root of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet psqrt(const Packet& a) { using std::sqrt; return sqrt(a); }
/** \internal \returns the reciprocal square-root of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet prsqrt(const Packet& a) {
return pdiv(pset1<Packet>(1), psqrt(a));
}
/** \internal \returns the rounded value of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet pround(const Packet& a) { using numext::round; return round(a); }
/** \internal \returns the floor of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet pfloor(const Packet& a) { using numext::floor; return floor(a); }
/** \internal \returns the ceil of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); }
/***************************************************************************
@@ -494,14 +494,14 @@ struct palign_impl
/** \internal update \a first using the concatenation of the packet_size minus \a Offset last elements
* of \a first and \a Offset first elements of \a second.
*
*
* This function is currently only used to optimize matrix-vector products on unligned matrices.
* It takes 2 packets that represent a contiguous memory array, and returns a packet starting
* at the position \a Offset. For instance, for packets of 4 elements, we have:
* Input:
* - first = {f0,f1,f2,f3}
* - second = {s0,s1,s2,s3}
* Output:
* Output:
* - if Offset==0 then {f0,f1,f2,f3}
* - if Offset==1 then {f1,f2,f3,s0}
* - if Offset==2 then {f2,f3,s0,s1}
@@ -518,7 +518,7 @@ inline void palign(PacketType& first, const PacketType& second)
***************************************************************************/
// Eigen+CUDA does not support complexes.
#ifndef __CUDACC__
#ifndef EIGEN_CUDACC
template<> inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b)
{ return std::complex<float>(a.real()*b.real() - a.imag()*b.imag(), a.imag()*b.real() + a.real()*b.imag()); }

View File

@@ -461,6 +461,65 @@ struct arg_retval
typedef typename NumTraits<Scalar>::Real type;
};
/****************************************************************************
* Implementation of expm1 *
****************************************************************************/
// This implementation is based on GSL Math's expm1.
namespace std_fallback {
// fallback expm1 implementation in case there is no expm1(Scalar) function in namespace of Scalar,
// or that there is no suitable std::expm1 function available. Implementation
// attributed to Kahan. See: http://www.plunk.org/~hatch/rightway.php.
template<typename Scalar>
EIGEN_DEVICE_FUNC inline Scalar expm1(const Scalar& x) {
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
typedef typename NumTraits<Scalar>::Real RealScalar;
EIGEN_USING_STD_MATH(exp);
Scalar u = exp(x);
if (numext::equal_strict(u, Scalar(1))) {
return x;
}
Scalar um1 = u - RealScalar(1);
if (numext::equal_strict(um1, Scalar(-1))) {
return RealScalar(-1);
}
EIGEN_USING_STD_MATH(log);
return (u - RealScalar(1)) * x / log(u);
}
}
template<typename Scalar>
struct expm1_impl {
EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x)
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
#if EIGEN_HAS_CXX11_MATH
using std::expm1;
#else
using std_fallback::expm1;
#endif
return expm1(x);
}
};
// Specialization for complex types that are not supported by std::expm1.
template <typename RealScalar>
struct expm1_impl<std::complex<RealScalar> > {
EIGEN_DEVICE_FUNC static inline std::complex<RealScalar> run(
const std::complex<RealScalar>& x) {
EIGEN_STATIC_ASSERT_NON_INTEGER(RealScalar)
return std_fallback::expm1(x);
}
};
template<typename Scalar>
struct expm1_retval
{
typedef Scalar type;
};
/****************************************************************************
* Implementation of log1p *
****************************************************************************/
@@ -480,17 +539,27 @@ namespace std_fallback {
template<typename Scalar>
struct log1p_impl {
static inline Scalar run(const Scalar& x)
static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x)
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
#if EIGEN_HAS_CXX11_MATH
using std::log1p;
#endif
#else
using std_fallback::log1p;
#endif
return log1p(x);
}
};
// Specialization for complex types that are not supported by std::log1p.
template <typename RealScalar>
struct log1p_impl<std::complex<RealScalar> > {
EIGEN_DEVICE_FUNC static inline std::complex<RealScalar> run(
const std::complex<RealScalar>& x) {
EIGEN_STATIC_ASSERT_NON_INTEGER(RealScalar)
return std_fallback::log1p(x);
}
};
template<typename Scalar>
struct log1p_retval
@@ -555,19 +624,6 @@ struct random_retval
template<typename Scalar> inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random(const Scalar& x, const Scalar& y);
template<typename Scalar> inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random();
template<typename Scalar>
struct random_default_impl<Scalar, false, false>
{
static inline Scalar run(const Scalar& x, const Scalar& y)
{
return x + (y-x) * Scalar(std::rand()) / Scalar(RAND_MAX);
}
static inline Scalar run()
{
return run(Scalar(NumTraits<Scalar>::IsSigned ? -1 : 0), Scalar(1));
}
};
enum {
meta_floor_log2_terminate,
meta_floor_log2_move_up,
@@ -615,6 +671,38 @@ struct meta_floor_log2<n, lower, upper, meta_floor_log2_bogus>
// no value, error at compile time
};
#define EIGEN_RAND_MAX INT_MAX
// Fill a signed positive int with random bits.
// This is to overcome issues in MSVC which limits RAND_MAX to 32767.
inline int random_int() {
#if RAND_MAX == INT_MAX
return std::rand();
#else
enum {
rand_bits = meta_floor_log2<(unsigned int)(RAND_MAX)+1>::value,
int_bits = meta_floor_log2<(unsigned int)(INT_MAX)+1>::value,
};
unsigned int out = std::rand();
for (int bit = rand_bits; bit < int(int_bits); bit += rand_bits) {
out = (out << rand_bits) ^ std::rand();
}
return static_cast<int>(out & INT_MAX);
#endif
}
template<typename Scalar>
struct random_default_impl<Scalar, false, false>
{
static inline Scalar run(const Scalar& x, const Scalar& y)
{
return x + (y-x) * Scalar(random_int()) / Scalar(EIGEN_RAND_MAX);
}
static inline Scalar run()
{
return run(Scalar(NumTraits<Scalar>::IsSigned ? -1 : 0), Scalar(1));
}
};
template<typename Scalar>
struct random_default_impl<Scalar, false, true>
{
@@ -635,12 +723,12 @@ struct random_default_impl<Scalar, false, true>
ScalarX offset = 0;
ScalarX divisor = 1;
ScalarX multiplier = 1;
const unsigned rand_max = RAND_MAX;
const unsigned rand_max = EIGEN_RAND_MAX;
if (range <= rand_max) divisor = (rand_max + 1) / (range + 1);
else multiplier = 1 + range / (rand_max + 1);
// Rejection sampling.
do {
offset = (unsigned(std::rand()) * multiplier) / divisor;
offset = (unsigned(random_int()) * multiplier) / divisor;
} while (offset > range);
return Scalar(ScalarX(x) + offset);
}
@@ -650,12 +738,12 @@ struct random_default_impl<Scalar, false, true>
#ifdef EIGEN_MAKING_DOCS
return run(Scalar(NumTraits<Scalar>::IsSigned ? -10 : 0), Scalar(10));
#else
enum { rand_bits = meta_floor_log2<(unsigned int)(RAND_MAX)+1>::value,
enum { rand_bits = meta_floor_log2<(unsigned int)(EIGEN_RAND_MAX)+1>::value,
scalar_bits = sizeof(Scalar) * CHAR_BIT,
shift = EIGEN_PLAIN_ENUM_MAX(0, int(rand_bits) - int(scalar_bits)),
offset = NumTraits<Scalar>::IsSigned ? (1 << (EIGEN_PLAIN_ENUM_MIN(rand_bits,scalar_bits)-1)) : 0
};
return Scalar((std::rand() >> shift) - offset);
return Scalar((random_int() >> shift) - offset);
#endif
}
};
@@ -943,7 +1031,7 @@ inline EIGEN_MATHFUNC_RETVAL(log1p, Scalar) log1p(const Scalar& x)
return EIGEN_MATHFUNC_IMPL(log1p, Scalar)::run(x);
}
#ifdef __CUDACC__
#ifdef EIGEN_CUDACC
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float log1p(const float &x) { return ::log1pf(x); }
@@ -977,7 +1065,7 @@ T (floor)(const T& x)
return floor(x);
}
#ifdef __CUDACC__
#ifdef EIGEN_CUDACC
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float floor(const float &x) { return ::floorf(x); }
@@ -993,7 +1081,7 @@ T (ceil)(const T& x)
return ceil(x);
}
#ifdef __CUDACC__
#ifdef EIGEN_CUDACC
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float ceil(const float &x) { return ::ceilf(x); }
@@ -1041,7 +1129,7 @@ T log(const T &x) {
return log(x);
}
#ifdef __CUDACC__
#ifdef EIGEN_CUDACC
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float log(const float &x) { return ::logf(x); }
@@ -1069,7 +1157,7 @@ EIGEN_ALWAYS_INLINE float abs(float x) { return cl::sycl::fabs(x); }
EIGEN_ALWAYS_INLINE double abs(double x) { return cl::sycl::fabs(x); }
#endif // defined(__SYCL_DEVICE_ONLY__)
#ifdef __CUDACC__
#ifdef EIGEN_CUDACC
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float abs(const float &x) { return ::fabsf(x); }
@@ -1094,7 +1182,7 @@ T exp(const T &x) {
return exp(x);
}
#ifdef __CUDACC__
#ifdef EIGEN_CUDACC
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float exp(const float &x) { return ::expf(x); }
@@ -1109,7 +1197,7 @@ T cos(const T &x) {
return cos(x);
}
#ifdef __CUDACC__
#ifdef EIGEN_CUDACC
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float cos(const float &x) { return ::cosf(x); }
@@ -1124,7 +1212,7 @@ T sin(const T &x) {
return sin(x);
}
#ifdef __CUDACC__
#ifdef EIGEN_CUDACC
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float sin(const float &x) { return ::sinf(x); }
@@ -1139,7 +1227,7 @@ T tan(const T &x) {
return tan(x);
}
#ifdef __CUDACC__
#ifdef EIGEN_CUDACC
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float tan(const float &x) { return ::tanf(x); }
@@ -1154,7 +1242,7 @@ T acos(const T &x) {
return acos(x);
}
#ifdef __CUDACC__
#ifdef EIGEN_CUDACC
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float acos(const float &x) { return ::acosf(x); }
@@ -1169,7 +1257,7 @@ T asin(const T &x) {
return asin(x);
}
#ifdef __CUDACC__
#ifdef EIGEN_CUDACC
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float asin(const float &x) { return ::asinf(x); }
@@ -1184,7 +1272,7 @@ T atan(const T &x) {
return atan(x);
}
#ifdef __CUDACC__
#ifdef EIGEN_CUDACC
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float atan(const float &x) { return ::atanf(x); }
@@ -1200,7 +1288,7 @@ T cosh(const T &x) {
return cosh(x);
}
#ifdef __CUDACC__
#ifdef EIGEN_CUDACC
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float cosh(const float &x) { return ::coshf(x); }
@@ -1215,7 +1303,7 @@ T sinh(const T &x) {
return sinh(x);
}
#ifdef __CUDACC__
#ifdef EIGEN_CUDACC
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float sinh(const float &x) { return ::sinhf(x); }
@@ -1230,12 +1318,12 @@ T tanh(const T &x) {
return tanh(x);
}
#if (!defined(__CUDACC__)) && EIGEN_FAST_MATH
#if (!defined(EIGEN_CUDACC)) && EIGEN_FAST_MATH
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float tanh(float x) { return internal::generic_fast_tanh_float(x); }
#endif
#ifdef __CUDACC__
#ifdef EIGEN_CUDACC
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float tanh(const float &x) { return ::tanhf(x); }
@@ -1250,7 +1338,7 @@ T fmod(const T& a, const T& b) {
return fmod(a, b);
}
#ifdef __CUDACC__
#ifdef EIGEN_CUDACC
template <>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float fmod(const float& a, const float& b) {

View File

@@ -99,7 +99,7 @@ template<typename ExpressionType> class NestByValue
/** \returns an expression of the temporary version of *this.
*/
template<typename Derived>
inline const NestByValue<Derived>
EIGEN_DEVICE_FUNC inline const NestByValue<Derived>
DenseBase<Derived>::nestByValue() const
{
return NestByValue<Derived>(derived());

View File

@@ -54,34 +54,34 @@ struct default_digits10_impl<T,false,true> // Integer
*
* The provided data consists of:
* \li A typedef \c Real, giving the "real part" type of \a T. If \a T is already real,
* then \c Real is just a typedef to \a T. If \a T is \c std::complex<U> then \c Real
* then \c Real is just a typedef to \a T. If \a T is `std::complex<U>` then \c Real
* is a typedef to \a U.
* \li A typedef \c NonInteger, giving the type that should be used for operations producing non-integral values,
* such as quotients, square roots, etc. If \a T is a floating-point type, then this typedef just gives
* \a T again. Note however that many Eigen functions such as internal::sqrt simply refuse to
* take integers. Outside of a few cases, Eigen doesn't do automatic type promotion. Thus, this typedef is
* only intended as a helper for code that needs to explicitly promote types.
* \li A typedef \c Literal giving the type to use for numeric literals such as "2" or "0.5". For instance, for \c std::complex<U>, Literal is defined as \c U.
* \li A typedef \c Literal giving the type to use for numeric literals such as "2" or "0.5". For instance, for `std::complex<U>`, Literal is defined as \c U.
* Of course, this type must be fully compatible with \a T. In doubt, just use \a T here.
* \li A typedef \a Nested giving the type to use to nest a value inside of the expression tree. If you don't know what
* \li A typedef \c Nested giving the type to use to nest a value inside of the expression tree. If you don't know what
* this means, just use \a T here.
* \li An enum value \a IsComplex. It is equal to 1 if \a T is a \c std::complex
* \li An enum value \c IsComplex. It is equal to 1 if \a T is a `std::complex`
* type, and to 0 otherwise.
* \li An enum value \a IsInteger. It is equal to \c 1 if \a T is an integer type such as \c int,
* \li An enum value \c IsInteger. It is equal to \c 1 if \a T is an integer type such as \c int,
* and to \c 0 otherwise.
* \li Enum values ReadCost, AddCost and MulCost representing a rough estimate of the number of CPU cycles needed
* \li Enum values \c ReadCost, \c AddCost and \c MulCost representing a rough estimate of the number of CPU cycles needed
* to by move / add / mul instructions respectively, assuming the data is already stored in CPU registers.
* Stay vague here. No need to do architecture-specific stuff.
* \li An enum value \a IsSigned. It is equal to \c 1 if \a T is a signed type and to 0 if \a T is unsigned.
* \li An enum value \a RequireInitialization. It is equal to \c 1 if the constructor of the numeric type \a T must
* \li An enum value \c IsSigned. It is equal to \c 1 if \a T is a signed type and to 0 if \a T is unsigned.
* \li An enum value \c RequireInitialization. It is equal to \c 1 if the constructor of the numeric type \a T must
* be called, and to 0 if it is safe not to call it. Default is 0 if \a T is an arithmetic type, and 1 otherwise.
* \li An epsilon() function which, unlike <a href="http://en.cppreference.com/w/cpp/types/numeric_limits/epsilon">std::numeric_limits::epsilon()</a>,
* \li An `epsilon()` function which, unlike <a href="http://en.cppreference.com/w/cpp/types/numeric_limits/epsilon">`std::numeric_limits::epsilon()`</a>,
* it returns a \a Real instead of a \a T.
* \li A dummy_precision() function returning a weak epsilon value. It is mainly used as a default
* \li A `dummy_precision()` function returning a weak epsilon value. It is mainly used as a default
* value by the fuzzy comparison operators.
* \li highest() and lowest() functions returning the highest and lowest possible values respectively.
* \li digits10() function returning the number of decimal digits that can be represented without change. This is
* the analogue of <a href="http://en.cppreference.com/w/cpp/types/numeric_limits/digits10">std::numeric_limits<T>::digits10</a>
* \li `highest()` and `lowest()` functions returning the highest and lowest possible values respectively.
* \li `digits10()` function returning the number of decimal digits that can be represented without change. This is
* the analogue of <a href="http://en.cppreference.com/w/cpp/types/numeric_limits/digits10">`std::numeric_limits<T>::digits10`</a>
* which is used as the default implementation if specialized.
*/
@@ -166,7 +166,16 @@ template<> struct NumTraits<double> : GenericNumTraits<double>
template<> struct NumTraits<long double>
: GenericNumTraits<long double>
{
static inline long double dummy_precision() { return 1e-15l; }
static inline long double dummy_precision() { return static_cast<long double>(1e-15l); }
#if defined(EIGEN_ARCH_PPC) && (__LDBL_MANT_DIG__ == 106)
// PowerPC double double causes issues with some values
static inline long double epsilon()
{
// 2^(-(__LDBL_MANT_DIG__)+1)
return static_cast<long double>(2.4651903288156618919116517665087e-32l);
}
#endif
};
template<typename _Real> struct NumTraits<std::complex<_Real> >

View File

@@ -87,17 +87,6 @@ class PermutationBase : public EigenBase<Derived>
return derived();
}
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** This is a special case of the templated operator=. Its purpose is to
* prevent a default operator= from hiding the templated operator=.
*/
Derived& operator=(const PermutationBase& other)
{
indices() = other.indices();
return derived();
}
#endif
/** \returns the number of rows */
inline Index rows() const { return Index(indices().size()); }
@@ -333,12 +322,6 @@ class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompile
inline PermutationMatrix(const PermutationBase<OtherDerived>& other)
: m_indices(other.indices()) {}
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** Standard copy constructor. Defined only to prevent a default copy constructor
* from hiding the other templated constructor */
inline PermutationMatrix(const PermutationMatrix& other) : m_indices(other.indices()) {}
#endif
/** Generic constructor from expression of the indices. The indices
* array has the meaning that the permutations sends each integer i to indices[i].
*
@@ -373,17 +356,6 @@ class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompile
return Base::operator=(tr.derived());
}
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** This is a special case of the templated operator=. Its purpose is to
* prevent a default operator= from hiding the templated operator=.
*/
PermutationMatrix& operator=(const PermutationMatrix& other)
{
m_indices = other.m_indices;
return *this;
}
#endif
/** const version of indices(). */
const IndicesType& indices() const { return m_indices; }
/** \returns a reference to the stored array representing the permutation. */

View File

@@ -14,7 +14,7 @@
#define EIGEN_PRODUCTEVALUATORS_H
namespace Eigen {
namespace internal {
/** \internal
@@ -22,19 +22,19 @@ namespace internal {
* Since products require special treatments to handle all possible cases,
* we simply deffer the evaluation logic to a product_evaluator class
* which offers more partial specialization possibilities.
*
*
* \sa class product_evaluator
*/
template<typename Lhs, typename Rhs, int Options>
struct evaluator<Product<Lhs, Rhs, Options> >
struct evaluator<Product<Lhs, Rhs, Options> >
: public product_evaluator<Product<Lhs, Rhs, Options> >
{
typedef Product<Lhs, Rhs, Options> XprType;
typedef product_evaluator<XprType> Base;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(xpr) {}
};
// Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B"
// TODO we should apply that rule only if that's really helpful
template<typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1>
@@ -62,12 +62,12 @@ struct evaluator<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
template<typename Lhs, typename Rhs, int DiagIndex>
struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> >
struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> >
: public evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> >
{
typedef Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> XprType;
typedef evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> > Base;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr)
: Base(Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>(
Product<Lhs, Rhs, LazyProduct>(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()),
@@ -108,23 +108,23 @@ struct product_evaluator<Product<Lhs, Rhs, Options>, ProductTag, LhsShape, RhsSh
: m_result(xpr.rows(), xpr.cols())
{
::new (static_cast<Base*>(this)) Base(m_result);
// FIXME shall we handle nested_eval here?,
// if so, then we must take care at removing the call to nested_eval in the specializations (e.g., in permutation_matrix_product, transposition_matrix_product, etc.)
// typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
// typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
// typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
// typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
//
//
// const LhsNested lhs(xpr.lhs());
// const RhsNested rhs(xpr.rhs());
//
//
// generic_product_impl<LhsNestedCleaned, RhsNestedCleaned>::evalTo(m_result, lhs, rhs);
generic_product_impl<Lhs, Rhs, LhsShape, RhsShape, ProductTag>::evalTo(m_result, xpr.lhs(), xpr.rhs());
}
protected:
protected:
PlainObject m_result;
};
@@ -137,7 +137,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scal
typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
{
typedef Product<Lhs,Rhs,Options> SrcXprType;
static EIGEN_STRONG_INLINE
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
{
Index dstRows = src.rows();
@@ -155,7 +155,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<
typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
{
typedef Product<Lhs,Rhs,Options> SrcXprType;
static EIGEN_STRONG_INLINE
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,Scalar> &)
{
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
@@ -170,7 +170,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<
typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
{
typedef Product<Lhs,Rhs,Options> SrcXprType;
static EIGEN_STRONG_INLINE
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,Scalar> &)
{
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
@@ -190,7 +190,7 @@ struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_product_op<ScalarBi
typedef CwiseBinaryOp<internal::scalar_product_op<ScalarBis,Scalar>,
const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>,Plain>,
const Product<Lhs,Rhs,DefaultProduct> > SrcXprType;
static EIGEN_STRONG_INLINE
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func)
{
call_assignment_no_alias(dst, (src.lhs().functor().m_other * src.rhs().lhs())*src.rhs().rhs(), func);
@@ -250,13 +250,13 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct>
{
dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
}
template<typename Dst>
static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{
dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum();
}
template<typename Dst>
static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{ dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); }
@@ -298,7 +298,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,OuterProduct>
{
template<typename T> struct is_row_major : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {};
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
// TODO it would be nice to be able to exploit our *_assign_op functors for that purpose
struct set { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } };
struct add { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } };
@@ -310,31 +310,31 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,OuterProduct>
dst.const_cast_derived() += m_scale * src;
}
};
template<typename Dst>
static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{
internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major<Dst>());
}
template<typename Dst>
static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{
internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major<Dst>());
}
template<typename Dst>
static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{
internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major<Dst>());
}
template<typename Dst>
static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
{
internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major<Dst>());
}
};
@@ -343,7 +343,7 @@ template<typename Lhs, typename Rhs, typename Derived>
struct generic_product_impl_base
{
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
template<typename Dst>
static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{ dst.setZero(); scaleAndAddTo(dst, lhs, rhs, Scalar(1)); }
@@ -355,7 +355,7 @@ struct generic_product_impl_base
template<typename Dst>
static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{ scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); }
template<typename Dst>
static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
{ Derived::scaleAndAddTo(dst,lhs,rhs,alpha); }
@@ -385,12 +385,12 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct>
};
template<typename Lhs, typename Rhs>
struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
{
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
template<typename Dst>
static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{
// Same as: dst.noalias() = lhs.lazyProduct(rhs);
// but easier on the compiler side
@@ -403,7 +403,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
// dst.noalias() += lhs.lazyProduct(rhs);
call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op<typename Dst::Scalar,Scalar>());
}
template<typename Dst>
static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{
@@ -435,8 +435,8 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
{
call_assignment_no_alias(dst, lhs.lazyProduct(rhs), func);
}
// template<typename Dst>
// static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
// { dst.noalias() += alpha * lhs.lazyProduct(rhs); }
@@ -497,7 +497,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
@@ -516,7 +516,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
typedef typename find_best_packet<Scalar,ColsAtCompileTime>::type RhsVecPacketType;
enum {
LhsCoeffReadCost = LhsEtorType::CoeffReadCost,
RhsCoeffReadCost = RhsEtorType::CoeffReadCost,
CoeffReadCost = InnerSize==0 ? NumTraits<Scalar>::ReadCost
@@ -525,10 +525,10 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
+ (InnerSize - 1) * NumTraits<Scalar>::AddCost,
Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
LhsFlags = LhsEtorType::Flags,
RhsFlags = RhsEtorType::Flags,
LhsRowMajor = LhsFlags & RowMajorBit,
RhsRowMajor = RhsFlags & RowMajorBit,
@@ -538,7 +538,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
// Here, we don't care about alignment larger than the usable packet size.
LhsAlignment = EIGEN_PLAIN_ENUM_MIN(LhsEtorType::Alignment,LhsVecPacketSize*int(sizeof(typename LhsNestedCleaned::Scalar))),
RhsAlignment = EIGEN_PLAIN_ENUM_MIN(RhsEtorType::Alignment,RhsVecPacketSize*int(sizeof(typename RhsNestedCleaned::Scalar))),
SameType = is_same<typename LhsNestedCleaned::Scalar,typename RhsNestedCleaned::Scalar>::value,
CanVectorizeRhs = bool(RhsRowMajor) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime!=1),
@@ -553,7 +553,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
// TODO enable vectorization for mixed types
| (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0)
| (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0),
LhsOuterStrideBytes = int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)),
RhsOuterStrideBytes = int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)),
@@ -572,7 +572,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
&& (LhsFlags & RhsFlags & ActualPacketAccessBit)
&& (InnerSize % packet_traits<Scalar>::size == 0)
};
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const
{
return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum();
@@ -611,7 +611,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
protected:
typename internal::add_const_on_value_type<LhsNested>::type m_lhs;
typename internal::add_const_on_value_type<RhsNested>::type m_rhs;
LhsEtorType m_lhsImpl;
RhsEtorType m_rhsImpl;
@@ -730,7 +730,7 @@ struct generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag>
: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag> >
{
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
template<typename Dest>
static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
{
@@ -744,7 +744,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag>
: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag> >
{
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
template<typename Dest>
static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
{
@@ -765,7 +765,7 @@ struct generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag>
: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag> >
{
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
template<typename Dest>
static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
{
@@ -778,7 +778,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag>
: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag> >
{
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
template<typename Dest>
static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
{
@@ -790,7 +790,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag>
/***************************************************************************
* Diagonal products
***************************************************************************/
template<typename MatrixType, typename DiagonalType, typename Derived, int ProductOrder>
struct diagonal_product_evaluator_base
: evaluator_base<Derived>
@@ -799,7 +799,7 @@ struct diagonal_product_evaluator_base
public:
enum {
CoeffReadCost = NumTraits<Scalar>::MulCost + evaluator<MatrixType>::CoeffReadCost + evaluator<DiagonalType>::CoeffReadCost,
MatrixFlags = evaluator<MatrixType>::Flags,
DiagFlags = evaluator<DiagonalType>::Flags,
_StorageOrder = MatrixFlags & RowMajorBit ? RowMajor : ColMajor,
@@ -817,14 +817,14 @@ public:
|| (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::RowsAtCompileTime==1 && ProductOrder==OnTheLeft)
|| (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::ColsAtCompileTime==1 && ProductOrder==OnTheRight)
};
diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag)
: m_diagImpl(diag), m_matImpl(mat)
{
EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost);
EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const
{
if(AsScalarProduct)
@@ -832,7 +832,7 @@ public:
else
return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx);
}
protected:
template<int LoadMode,typename PacketType>
EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::true_type) const
@@ -840,7 +840,7 @@ protected:
return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col),
internal::pset1<PacketType>(m_diagImpl.coeff(id)));
}
template<int LoadMode,typename PacketType>
EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::false_type) const
{
@@ -851,7 +851,7 @@ protected:
return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col),
m_diagImpl.template packet<DiagonalPacketLoadMode,PacketType>(id));
}
evaluator<DiagonalType> m_diagImpl;
evaluator<MatrixType> m_matImpl;
};
@@ -866,10 +866,10 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalSha
using Base::m_matImpl;
using Base::coeff;
typedef typename Base::Scalar Scalar;
typedef Product<Lhs, Rhs, ProductKind> XprType;
typedef typename XprType::PlainObject PlainObject;
enum {
StorageOrder = int(Rhs::Flags) & RowMajorBit ? RowMajor : ColMajor
};
@@ -878,12 +878,12 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalSha
: Base(xpr.rhs(), xpr.lhs().diagonal())
{
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
{
return m_diagImpl.coeff(row) * m_matImpl.coeff(row, col);
}
#ifndef __CUDACC__
template<int LoadMode,typename PacketType>
EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
@@ -893,7 +893,7 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalSha
return this->template packet_impl<LoadMode,PacketType>(row,col, row,
typename internal::conditional<int(StorageOrder)==RowMajor, internal::true_type, internal::false_type>::type());
}
template<int LoadMode,typename PacketType>
EIGEN_STRONG_INLINE PacketType packet(Index idx) const
{
@@ -912,22 +912,22 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape,
using Base::m_matImpl;
using Base::coeff;
typedef typename Base::Scalar Scalar;
typedef Product<Lhs, Rhs, ProductKind> XprType;
typedef typename XprType::PlainObject PlainObject;
enum { StorageOrder = int(Lhs::Flags) & RowMajorBit ? RowMajor : ColMajor };
EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
: Base(xpr.lhs(), xpr.rhs().diagonal())
{
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
{
return m_matImpl.coeff(row, col) * m_diagImpl.coeff(col);
}
#ifndef __CUDACC__
template<int LoadMode,typename PacketType>
EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
@@ -935,7 +935,7 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape,
return this->template packet_impl<LoadMode,PacketType>(row,col, col,
typename internal::conditional<int(StorageOrder)==ColMajor, internal::true_type, internal::false_type>::type());
}
template<int LoadMode,typename PacketType>
EIGEN_STRONG_INLINE PacketType packet(Index idx) const
{
@@ -1017,7 +1017,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
struct generic_product_impl<Lhs, Rhs, PermutationShape, MatrixShape, ProductTag>
{
template<typename Dest>
static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
static EIGEN_DEVICE_FUNC void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
{
permutation_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);
}
@@ -1027,7 +1027,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
struct generic_product_impl<Lhs, Rhs, MatrixShape, PermutationShape, ProductTag>
{
template<typename Dest>
static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
static EIGEN_DEVICE_FUNC void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
{
permutation_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);
}
@@ -1037,7 +1037,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
struct generic_product_impl<Inverse<Lhs>, Rhs, PermutationShape, MatrixShape, ProductTag>
{
template<typename Dest>
static void evalTo(Dest& dst, const Inverse<Lhs>& lhs, const Rhs& rhs)
static EIGEN_DEVICE_FUNC void evalTo(Dest& dst, const Inverse<Lhs>& lhs, const Rhs& rhs)
{
permutation_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
}
@@ -1047,7 +1047,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
struct generic_product_impl<Lhs, Inverse<Rhs>, MatrixShape, PermutationShape, ProductTag>
{
template<typename Dest>
static void evalTo(Dest& dst, const Lhs& lhs, const Inverse<Rhs>& rhs)
static EIGEN_DEVICE_FUNC void evalTo(Dest& dst, const Lhs& lhs, const Inverse<Rhs>& rhs)
{
permutation_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
}
@@ -1069,7 +1069,7 @@ struct transposition_matrix_product
{
typedef typename nested_eval<ExpressionType, 1>::type MatrixType;
typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;
template<typename Dest, typename TranspositionType>
static inline void run(Dest& dst, const TranspositionType& tr, const ExpressionType& xpr)
{
@@ -1094,7 +1094,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
struct generic_product_impl<Lhs, Rhs, TranspositionsShape, MatrixShape, ProductTag>
{
template<typename Dest>
static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
static EIGEN_DEVICE_FUNC void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
{
transposition_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);
}
@@ -1104,7 +1104,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
struct generic_product_impl<Lhs, Rhs, MatrixShape, TranspositionsShape, ProductTag>
{
template<typename Dest>
static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
static EIGEN_DEVICE_FUNC void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
{
transposition_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);
}
@@ -1115,7 +1115,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
struct generic_product_impl<Transpose<Lhs>, Rhs, TranspositionsShape, MatrixShape, ProductTag>
{
template<typename Dest>
static void evalTo(Dest& dst, const Transpose<Lhs>& lhs, const Rhs& rhs)
static EIGEN_DEVICE_FUNC void evalTo(Dest& dst, const Transpose<Lhs>& lhs, const Rhs& rhs)
{
transposition_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
}
@@ -1125,7 +1125,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
struct generic_product_impl<Lhs, Transpose<Rhs>, MatrixShape, TranspositionsShape, ProductTag>
{
template<typename Dest>
static void evalTo(Dest& dst, const Lhs& lhs, const Transpose<Rhs>& rhs)
static EIGEN_DEVICE_FUNC void evalTo(Dest& dst, const Lhs& lhs, const Transpose<Rhs>& rhs)
{
transposition_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
}

View File

@@ -10,7 +10,7 @@
#ifndef EIGEN_RANDOM_H
#define EIGEN_RANDOM_H
namespace Eigen {
namespace Eigen {
namespace internal {
@@ -29,16 +29,16 @@ struct functor_traits<scalar_random_op<Scalar> >
*
* Numbers are uniformly spread through their whole definition range for integer types,
* and in the [-1:1] range for floating point scalar types.
*
*
* The parameters \a rows and \a cols are the number of rows and of columns of
* the returned matrix. Must be compatible with this MatrixBase type.
*
* \not_reentrant
*
*
* This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
* it is redundant to pass \a rows and \a cols as arguments, so Random() should be used
* instead.
*
*
*
* Example: \include MatrixBase_random_int_int.cpp
* Output: \verbinclude MatrixBase_random_int_int.out
@@ -46,7 +46,7 @@ struct functor_traits<scalar_random_op<Scalar> >
* This expression has the "evaluate before nesting" flag so that it will be evaluated into
* a temporary matrix whenever it is nested in a larger expression. This prevents unexpected
* behavior with expressions involving random matrices.
*
*
* See DenseBase::NullaryExpr(Index, const CustomNullaryOp&) for an example using C++11 random generators.
*
* \sa DenseBase::setRandom(), DenseBase::Random(Index), DenseBase::Random()
@@ -93,7 +93,7 @@ DenseBase<Derived>::Random(Index size)
*
* Numbers are uniformly spread through their whole definition range for integer types,
* and in the [-1:1] range for floating point scalar types.
*
*
* This variant is only for fixed-size MatrixBase types. For dynamic-size types, you
* need to use the variants taking size arguments.
*
@@ -103,7 +103,7 @@ DenseBase<Derived>::Random(Index size)
* This expression has the "evaluate before nesting" flag so that it will be evaluated into
* a temporary matrix whenever it is nested in a larger expression. This prevents unexpected
* behavior with expressions involving random matrices.
*
*
* \not_reentrant
*
* \sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random(Index)
@@ -119,16 +119,16 @@ DenseBase<Derived>::Random()
*
* Numbers are uniformly spread through their whole definition range for integer types,
* and in the [-1:1] range for floating point scalar types.
*
*
* \not_reentrant
*
*
* Example: \include MatrixBase_setRandom.cpp
* Output: \verbinclude MatrixBase_setRandom.out
*
* \sa class CwiseNullaryOp, setRandom(Index), setRandom(Index,Index)
*/
template<typename Derived>
inline Derived& DenseBase<Derived>::setRandom()
EIGEN_DEVICE_FUNC inline Derived& DenseBase<Derived>::setRandom()
{
return *this = Random(rows(), cols());
}
@@ -137,7 +137,7 @@ inline Derived& DenseBase<Derived>::setRandom()
*
* Numbers are uniformly spread through their whole definition range for integer types,
* and in the [-1:1] range for floating point scalar types.
*
*
* \only_for_vectors
* \not_reentrant
*
@@ -160,7 +160,7 @@ PlainObjectBase<Derived>::setRandom(Index newSize)
* and in the [-1:1] range for floating point scalar types.
*
* \not_reentrant
*
*
* \param rows the new number of rows
* \param cols the new number of columns
*

View File

@@ -11,7 +11,7 @@
#ifndef EIGEN_REDUX_H
#define EIGEN_REDUX_H
namespace Eigen {
namespace Eigen {
namespace internal {
@@ -60,7 +60,7 @@ public:
enum {
Unrolling = Cost <= UnrollingLimit ? CompleteUnrolling : NoUnrolling
};
#ifdef EIGEN_DEBUG_ASSIGN
static void debug()
{
@@ -128,7 +128,7 @@ template<typename Func, typename Derived, int Start>
struct redux_novec_unroller<Func, Derived, Start, 0>
{
typedef typename Derived::Scalar Scalar;
EIGEN_DEVICE_FUNC
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); }
};
@@ -215,7 +215,7 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
static Scalar run(const Derived &mat, const Func& func)
{
const Index size = mat.size();
const Index packetSize = redux_traits<Func, Derived>::PacketSize;
const int packetAlignment = unpacket_traits<PacketScalar>::alignment;
enum {
@@ -336,12 +336,12 @@ class redux_evaluator
public:
typedef _XprType XprType;
EIGEN_DEVICE_FUNC explicit redux_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) {}
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename XprType::PacketScalar PacketScalar;
typedef typename XprType::PacketReturnType PacketReturnType;
enum {
MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime,
MaxColsAtCompileTime = XprType::MaxColsAtCompileTime,
@@ -353,7 +353,7 @@ public:
CoeffReadCost = evaluator<XprType>::CoeffReadCost,
Alignment = evaluator<XprType>::Alignment
};
EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); }
EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); }
EIGEN_DEVICE_FUNC Index size() const { return m_xpr.size(); }
@@ -375,17 +375,17 @@ public:
template<int LoadMode, typename PacketType>
PacketType packet(Index index) const
{ return m_evaluator.template packet<LoadMode,PacketType>(index); }
EIGEN_DEVICE_FUNC
CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
{ return m_evaluator.coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
template<int LoadMode, typename PacketType>
PacketType packetByOuterInner(Index outer, Index inner) const
{ return m_evaluator.template packet<LoadMode,PacketType>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
const XprType & nestedExpression() const { return m_xpr; }
protected:
internal::evaluator<XprType> m_evaluator;
const XprType &m_xpr;
@@ -407,14 +407,14 @@ protected:
*/
template<typename Derived>
template<typename Func>
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
DenseBase<Derived>::redux(const Func& func) const
{
eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
typedef typename internal::redux_evaluator<Derived> ThisEvaluator;
ThisEvaluator thisEval(derived());
return internal::redux_impl<Func, ThisEvaluator>::run(thisEval, func);
}
@@ -422,7 +422,7 @@ DenseBase<Derived>::redux(const Func& func) const
* \warning the result is undefined if \c *this contains NaN.
*/
template<typename Derived>
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
DenseBase<Derived>::minCoeff() const
{
return derived().redux(Eigen::internal::scalar_min_op<Scalar,Scalar>());
@@ -432,7 +432,7 @@ DenseBase<Derived>::minCoeff() const
* \warning the result is undefined if \c *this contains NaN.
*/
template<typename Derived>
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
DenseBase<Derived>::maxCoeff() const
{
return derived().redux(Eigen::internal::scalar_max_op<Scalar,Scalar>());
@@ -445,7 +445,7 @@ DenseBase<Derived>::maxCoeff() const
* \sa trace(), prod(), mean()
*/
template<typename Derived>
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
DenseBase<Derived>::sum() const
{
if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
@@ -458,7 +458,7 @@ DenseBase<Derived>::sum() const
* \sa trace(), prod(), sum()
*/
template<typename Derived>
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
DenseBase<Derived>::mean() const
{
#ifdef __INTEL_COMPILER
@@ -479,7 +479,7 @@ DenseBase<Derived>::mean() const
* \sa sum(), mean(), trace()
*/
template<typename Derived>
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
DenseBase<Derived>::prod() const
{
if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
@@ -494,7 +494,7 @@ DenseBase<Derived>::prod() const
* \sa diagonal(), sum()
*/
template<typename Derived>
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
MatrixBase<Derived>::trace() const
{
return derived().diagonal().sum();

View File

@@ -10,7 +10,7 @@
#ifndef EIGEN_REPLICATE_H
#define EIGEN_REPLICATE_H
namespace Eigen {
namespace Eigen {
namespace internal {
template<typename MatrixType,int RowFactor,int ColFactor>
@@ -35,7 +35,7 @@ struct traits<Replicate<MatrixType,RowFactor,ColFactor> >
IsRowMajor = MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1 ? 1
: MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1 ? 0
: (MatrixType::Flags & RowMajorBit) ? 1 : 0,
// FIXME enable DirectAccess with negative strides?
Flags = IsRowMajor ? RowMajorBit : 0
};
@@ -95,8 +95,8 @@ template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
EIGEN_DEVICE_FUNC
const _MatrixTypeNested& nestedExpression() const
{
return m_matrix;
{
return m_matrix;
}
protected:
@@ -115,7 +115,7 @@ template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
*/
template<typename Derived>
template<int RowFactor, int ColFactor>
const Replicate<Derived,RowFactor,ColFactor>
EIGEN_DEVICE_FUNC const Replicate<Derived,RowFactor,ColFactor>
DenseBase<Derived>::replicate() const
{
return Replicate<Derived,RowFactor,ColFactor>(derived());
@@ -130,7 +130,7 @@ DenseBase<Derived>::replicate() const
* \sa VectorwiseOp::replicate(), DenseBase::replicate(), class Replicate
*/
template<typename ExpressionType, int Direction>
const typename VectorwiseOp<ExpressionType,Direction>::ReplicateReturnType
EIGEN_DEVICE_FUNC const typename VectorwiseOp<ExpressionType,Direction>::ReplicateReturnType
VectorwiseOp<ExpressionType,Direction>::replicate(Index factor) const
{
return typename VectorwiseOp<ExpressionType,Direction>::ReplicateReturnType

View File

@@ -79,7 +79,7 @@ template<typename Derived> class ReturnByValue
template<typename Derived>
template<typename OtherDerived>
Derived& DenseBase<Derived>::operator=(const ReturnByValue<OtherDerived>& other)
EIGEN_DEVICE_FUNC Derived& DenseBase<Derived>::operator=(const ReturnByValue<OtherDerived>& other)
{
other.evalTo(derived());
return derived();
@@ -90,7 +90,7 @@ namespace internal {
// Expression is evaluated in a temporary; default implementation of Assignment is bypassed so that
// when a ReturnByValue expression is assigned, the evaluator is not constructed.
// TODO: Finalize port to new regime; ReturnByValue should not exist in the expression world
template<typename Derived>
struct evaluator<ReturnByValue<Derived> >
: public evaluator<typename internal::traits<Derived>::ReturnType>
@@ -98,7 +98,7 @@ struct evaluator<ReturnByValue<Derived> >
typedef ReturnByValue<Derived> XprType;
typedef typename internal::traits<Derived>::ReturnType PlainObject;
typedef evaluator<PlainObject> Base;
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
: m_result(xpr.rows(), xpr.cols())
{

View File

@@ -12,7 +12,7 @@
#ifndef EIGEN_REVERSE_H
#define EIGEN_REVERSE_H
namespace Eigen {
namespace Eigen {
namespace internal {
@@ -44,7 +44,7 @@ template<typename PacketType> struct reverse_packet_cond<PacketType,false>
static inline PacketType run(const PacketType& x) { return x; }
};
} // end namespace internal
} // end namespace internal
/** \class Reverse
* \ingroup Core_Module
@@ -98,7 +98,7 @@ template<typename MatrixType, int Direction> class Reverse
}
EIGEN_DEVICE_FUNC const typename internal::remove_all<typename MatrixType::Nested>::type&
nestedExpression() const
nestedExpression() const
{
return m_matrix;
}
@@ -114,7 +114,7 @@ template<typename MatrixType, int Direction> class Reverse
*
*/
template<typename Derived>
inline typename DenseBase<Derived>::ReverseReturnType
EIGEN_DEVICE_FUNC inline typename DenseBase<Derived>::ReverseReturnType
DenseBase<Derived>::reverse()
{
return ReverseReturnType(derived());
@@ -136,7 +136,7 @@ DenseBase<Derived>::reverse()
*
* \sa VectorwiseOp::reverseInPlace(), reverse() */
template<typename Derived>
inline void DenseBase<Derived>::reverseInPlace()
EIGEN_DEVICE_FUNC inline void DenseBase<Derived>::reverseInPlace()
{
if(cols()>rows())
{
@@ -161,7 +161,7 @@ inline void DenseBase<Derived>::reverseInPlace()
}
namespace internal {
template<int Direction>
struct vectorwise_reverse_inplace_impl;
@@ -201,7 +201,7 @@ struct vectorwise_reverse_inplace_impl<Horizontal>
*
* \sa DenseBase::reverseInPlace(), reverse() */
template<typename ExpressionType, int Direction>
void VectorwiseOp<ExpressionType,Direction>::reverseInPlace()
EIGEN_DEVICE_FUNC void VectorwiseOp<ExpressionType,Direction>::reverseInPlace()
{
internal::vectorwise_reverse_inplace_impl<Direction>::run(_expression().const_cast_derived());
}

View File

@@ -10,7 +10,7 @@
#ifndef EIGEN_SELFADJOINTMATRIX_H
#define EIGEN_SELFADJOINTMATRIX_H
namespace Eigen {
namespace Eigen {
/** \class SelfAdjointView
* \ingroup Core_Module
@@ -58,7 +58,7 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
typedef MatrixTypeNestedCleaned NestedExpression;
/** \brief The type of coefficients in this matrix */
typedef typename internal::traits<SelfAdjointView>::Scalar Scalar;
typedef typename internal::traits<SelfAdjointView>::Scalar Scalar;
typedef typename MatrixType::StorageIndex StorageIndex;
typedef typename internal::remove_all<typename MatrixType::ConjugateReturnType>::type MatrixConjugateReturnType;
@@ -131,7 +131,7 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
{
return Product<OtherDerived,SelfAdjointView>(lhs.derived(),rhs);
}
friend EIGEN_DEVICE_FUNC
const SelfAdjointView<const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,MatrixType,product),UpLo>
operator*(const Scalar& s, const SelfAdjointView& mat)
@@ -287,17 +287,17 @@ protected:
using Base::m_src;
using Base::m_functor;
public:
typedef typename Base::DstEvaluatorType DstEvaluatorType;
typedef typename Base::SrcEvaluatorType SrcEvaluatorType;
typedef typename Base::Scalar Scalar;
typedef typename Base::AssignmentTraits AssignmentTraits;
EIGEN_DEVICE_FUNC triangular_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
: Base(dst, src, func, dstExpr)
{}
EIGEN_DEVICE_FUNC void assignCoeff(Index row, Index col)
{
eigen_internal_assert(row!=col);
@@ -305,12 +305,12 @@ public:
m_functor.assignCoeff(m_dst.coeffRef(row,col), tmp);
m_functor.assignCoeff(m_dst.coeffRef(col,row), numext::conj(tmp));
}
EIGEN_DEVICE_FUNC void assignDiagonalCoeff(Index id)
{
Base::assignCoeff(id,id);
}
EIGEN_DEVICE_FUNC void assignOppositeCoeff(Index, Index)
{ eigen_internal_assert(false && "should never be called"); }
};
@@ -324,7 +324,7 @@ public:
/** This is the const version of MatrixBase::selfadjointView() */
template<typename Derived>
template<unsigned int UpLo>
typename MatrixBase<Derived>::template ConstSelfAdjointViewReturnType<UpLo>::Type
EIGEN_DEVICE_FUNC typename MatrixBase<Derived>::template ConstSelfAdjointViewReturnType<UpLo>::Type
MatrixBase<Derived>::selfadjointView() const
{
return typename ConstSelfAdjointViewReturnType<UpLo>::Type(derived());
@@ -341,7 +341,7 @@ MatrixBase<Derived>::selfadjointView() const
*/
template<typename Derived>
template<unsigned int UpLo>
typename MatrixBase<Derived>::template SelfAdjointViewReturnType<UpLo>::Type
EIGEN_DEVICE_FUNC typename MatrixBase<Derived>::template SelfAdjointViewReturnType<UpLo>::Type
MatrixBase<Derived>::selfadjointView()
{
return typename SelfAdjointViewReturnType<UpLo>::Type(derived());

View File

@@ -13,7 +13,7 @@
namespace Eigen {
template<typename Decomposition, typename RhsType, typename StorageKind> class SolveImpl;
/** \class Solve
* \ingroup Core_Module
*
@@ -64,11 +64,11 @@ class Solve : public SolveImpl<Decomposition,RhsType,typename internal::traits<R
public:
typedef typename internal::traits<Solve>::PlainObject PlainObject;
typedef typename internal::traits<Solve>::StorageIndex StorageIndex;
Solve(const Decomposition &dec, const RhsType &rhs)
: m_dec(dec), m_rhs(rhs)
{}
EIGEN_DEVICE_FUNC Index rows() const { return m_dec.cols(); }
EIGEN_DEVICE_FUNC Index cols() const { return m_rhs.cols(); }
@@ -87,14 +87,14 @@ class SolveImpl<Decomposition,RhsType,Dense>
: public MatrixBase<Solve<Decomposition,RhsType> >
{
typedef Solve<Decomposition,RhsType> Derived;
public:
typedef MatrixBase<Solve<Decomposition,RhsType> > Base;
EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
private:
Scalar coeff(Index row, Index col) const;
Scalar coeff(Index i) const;
};
@@ -119,15 +119,15 @@ struct evaluator<Solve<Decomposition,RhsType> >
typedef evaluator<PlainObject> Base;
enum { Flags = Base::Flags | EvalBeforeNestingBit };
EIGEN_DEVICE_FUNC explicit evaluator(const SolveType& solve)
: m_result(solve.rows(), solve.cols())
{
::new (static_cast<Base*>(this)) Base(m_result);
solve.dec()._solve_impl(solve.rhs(), m_result);
}
protected:
protected:
PlainObject m_result;
};
@@ -137,7 +137,7 @@ template<typename DstXprType, typename DecType, typename RhsType, typename Scala
struct Assignment<DstXprType, Solve<DecType,RhsType>, internal::assign_op<Scalar,Scalar>, Dense2Dense>
{
typedef Solve<DecType,RhsType> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
static EIGEN_DEVICE_FUNC void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
{
Index dstRows = src.rows();
Index dstCols = src.cols();
@@ -153,7 +153,7 @@ template<typename DstXprType, typename DecType, typename RhsType, typename Scala
struct Assignment<DstXprType, Solve<Transpose<const DecType>,RhsType>, internal::assign_op<Scalar,Scalar>, Dense2Dense>
{
typedef Solve<Transpose<const DecType>,RhsType> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
static EIGEN_DEVICE_FUNC void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
{
Index dstRows = src.rows();
Index dstCols = src.cols();
@@ -170,13 +170,13 @@ struct Assignment<DstXprType, Solve<CwiseUnaryOp<internal::scalar_conjugate_op<t
internal::assign_op<Scalar,Scalar>, Dense2Dense>
{
typedef Solve<CwiseUnaryOp<internal::scalar_conjugate_op<typename DecType::Scalar>, const Transpose<const DecType> >,RhsType> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
static EIGEN_DEVICE_FUNC void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
{
Index dstRows = src.rows();
Index dstCols = src.cols();
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
dst.resize(dstRows, dstCols);
src.dec().nestedExpression().nestedExpression().template _solve_impl_transposed<true>(src.rhs(), dst);
}
};

View File

@@ -10,7 +10,7 @@
#ifndef EIGEN_SOLVETRIANGULAR_H
#define EIGEN_SOLVETRIANGULAR_H
namespace Eigen {
namespace Eigen {
namespace internal {
@@ -64,7 +64,7 @@ struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,1>
ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhs,rhs.size(),
(useRhsDirectly ? rhs.data() : 0));
if(!useRhsDirectly)
MappedRhs(actualRhs,rhs.size()) = rhs;
@@ -148,7 +148,7 @@ struct triangular_solver_selector<Lhs,Rhs,OnTheRight,Mode,CompleteUnrolling,1> {
{
Transpose<const Lhs> trLhs(lhs);
Transpose<Rhs> trRhs(rhs);
triangular_solver_unroller<Transpose<const Lhs>,Transpose<Rhs>,
((Mode&Upper)==Upper ? Lower : Upper) | (Mode&UnitDiag),
0,Rhs::SizeAtCompileTime>::run(trLhs,trRhs);
@@ -164,7 +164,7 @@ struct triangular_solver_selector<Lhs,Rhs,OnTheRight,Mode,CompleteUnrolling,1> {
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename MatrixType, unsigned int Mode>
template<int Side, typename OtherDerived>
void TriangularViewImpl<MatrixType,Mode,Dense>::solveInPlace(const MatrixBase<OtherDerived>& _other) const
EIGEN_DEVICE_FUNC void TriangularViewImpl<MatrixType,Mode,Dense>::solveInPlace(const MatrixBase<OtherDerived>& _other) const
{
OtherDerived& other = _other.const_cast_derived();
eigen_assert( derived().cols() == derived().rows() && ((Side==OnTheLeft && derived().cols() == other.rows()) || (Side==OnTheRight && derived().cols() == other.cols())) );
@@ -187,7 +187,7 @@ void TriangularViewImpl<MatrixType,Mode,Dense>::solveInPlace(const MatrixBase<Ot
template<typename Derived, unsigned int Mode>
template<int Side, typename Other>
const internal::triangular_solve_retval<Side,TriangularView<Derived,Mode>,Other>
EIGEN_DEVICE_FUNC const internal::triangular_solve_retval<Side,TriangularView<Derived,Mode>,Other>
TriangularViewImpl<Derived,Mode,Dense>::solve(const MatrixBase<Other>& other) const
{
return internal::triangular_solve_retval<Side,TriangularViewType,Other>(derived(), other.derived());

View File

@@ -11,7 +11,7 @@
#ifndef EIGEN_TRANSPOSE_H
#define EIGEN_TRANSPOSE_H
namespace Eigen {
namespace Eigen {
namespace internal {
template<typename MatrixType>
@@ -170,7 +170,7 @@ template<typename MatrixType> class TransposeImpl<MatrixType,Dense>
*
* \sa transposeInPlace(), adjoint() */
template<typename Derived>
inline Transpose<Derived>
EIGEN_DEVICE_FUNC inline Transpose<Derived>
DenseBase<Derived>::transpose()
{
return TransposeReturnType(derived());
@@ -182,7 +182,7 @@ DenseBase<Derived>::transpose()
*
* \sa transposeInPlace(), adjoint() */
template<typename Derived>
inline typename DenseBase<Derived>::ConstTransposeReturnType
EIGEN_DEVICE_FUNC inline typename DenseBase<Derived>::ConstTransposeReturnType
DenseBase<Derived>::transpose() const
{
return ConstTransposeReturnType(derived());
@@ -208,7 +208,7 @@ DenseBase<Derived>::transpose() const
*
* \sa adjointInPlace(), transpose(), conjugate(), class Transpose, class internal::scalar_conjugate_op */
template<typename Derived>
inline const typename MatrixBase<Derived>::AdjointReturnType
EIGEN_DEVICE_FUNC inline const typename MatrixBase<Derived>::AdjointReturnType
MatrixBase<Derived>::adjoint() const
{
return AdjointReturnType(this->transpose());
@@ -278,12 +278,12 @@ struct inplace_transpose_selector<MatrixType,false,MatchPacketSize> { // non squ
* Notice however that this method is only useful if you want to replace a matrix by its own transpose.
* If you just need the transpose of a matrix, use transpose().
*
* \note if the matrix is not square, then \c *this must be a resizable matrix.
* \note if the matrix is not square, then \c *this must be a resizable matrix.
* This excludes (non-square) fixed-size matrices, block-expressions and maps.
*
* \sa transpose(), adjoint(), adjointInPlace() */
template<typename Derived>
inline void DenseBase<Derived>::transposeInPlace()
EIGEN_DEVICE_FUNC inline void DenseBase<Derived>::transposeInPlace()
{
eigen_assert((rows() == cols() || (RowsAtCompileTime == Dynamic && ColsAtCompileTime == Dynamic))
&& "transposeInPlace() called on a non-square non-resizable matrix");
@@ -314,7 +314,7 @@ inline void DenseBase<Derived>::transposeInPlace()
*
* \sa transpose(), adjoint(), transposeInPlace() */
template<typename Derived>
inline void MatrixBase<Derived>::adjointInPlace()
EIGEN_DEVICE_FUNC inline void MatrixBase<Derived>::adjointInPlace()
{
derived() = adjoint().eval();
}

View File

@@ -33,17 +33,6 @@ class TranspositionsBase
indices() = other.indices();
return derived();
}
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** This is a special case of the templated operator=. Its purpose is to
* prevent a default operator= from hiding the templated operator=.
*/
Derived& operator=(const TranspositionsBase& other)
{
indices() = other.indices();
return derived();
}
#endif
/** \returns the number of transpositions */
Index size() const { return indices().size(); }
@@ -171,12 +160,6 @@ class Transpositions : public TranspositionsBase<Transpositions<SizeAtCompileTim
inline Transpositions(const TranspositionsBase<OtherDerived>& other)
: m_indices(other.indices()) {}
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** Standard copy constructor. Defined only to prevent a default copy constructor
* from hiding the other templated constructor */
inline Transpositions(const Transpositions& other) : m_indices(other.indices()) {}
#endif
/** Generic constructor from expression of the transposition indices. */
template<typename Other>
explicit inline Transpositions(const MatrixBase<Other>& indices) : m_indices(indices)
@@ -189,17 +172,6 @@ class Transpositions : public TranspositionsBase<Transpositions<SizeAtCompileTim
return Base::operator=(other);
}
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** This is a special case of the templated operator=. Its purpose is to
* prevent a default operator= from hiding the templated operator=.
*/
Transpositions& operator=(const Transpositions& other)
{
m_indices = other.m_indices;
return *this;
}
#endif
/** Constructs an uninitialized permutation matrix of given size.
*/
inline Transpositions(Index size) : m_indices(size)
@@ -306,17 +278,6 @@ class TranspositionsWrapper
return Base::operator=(other);
}
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** This is a special case of the templated operator=. Its purpose is to
* prevent a default operator= from hiding the templated operator=.
*/
TranspositionsWrapper& operator=(const TranspositionsWrapper& other)
{
m_indices = other.m_indices;
return *this;
}
#endif
/** const version of indices(). */
const IndicesType& indices() const { return m_indices; }

View File

@@ -11,12 +11,12 @@
#ifndef EIGEN_TRIANGULARMATRIX_H
#define EIGEN_TRIANGULARMATRIX_H
namespace Eigen {
namespace Eigen {
namespace internal {
template<int Side, typename TriangularType, typename Rhs> struct triangular_solve_retval;
}
/** \class TriangularBase
@@ -34,16 +34,16 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
MaxRowsAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime,
MaxColsAtCompileTime = internal::traits<Derived>::MaxColsAtCompileTime,
SizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::RowsAtCompileTime,
internal::traits<Derived>::ColsAtCompileTime>::ret),
/**< This is equal to the number of coefficients, i.e. the number of
* rows times the number of columns, or to \a Dynamic if this is not
* known at compile-time. \sa RowsAtCompileTime, ColsAtCompileTime */
MaxSizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::MaxRowsAtCompileTime,
internal::traits<Derived>::MaxColsAtCompileTime>::ret)
};
typedef typename internal::traits<Derived>::Scalar Scalar;
typedef typename internal::traits<Derived>::StorageKind StorageKind;
@@ -63,7 +63,7 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
inline Index outerStride() const { return derived().outerStride(); }
EIGEN_DEVICE_FUNC
inline Index innerStride() const { return derived().innerStride(); }
// dummy resize function
void resize(Index rows, Index cols)
{
@@ -155,7 +155,7 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
* \param MatrixType the type of the object in which we are taking the triangular part
* \param Mode the kind of triangular matrix expression to construct. Can be #Upper,
* #Lower, #UnitUpper, #UnitLower, #StrictlyUpper, or #StrictlyLower.
* This is in fact a bit field; it must have either #Upper or #Lower,
* This is in fact a bit field; it must have either #Upper or #Lower,
* and additionally it may have #UnitDiag or #ZeroDiag or neither.
*
* This class represents a triangular part of a matrix, not necessarily square. Strictly speaking, for rectangular
@@ -197,7 +197,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
typedef typename internal::traits<TriangularView>::MatrixTypeNestedNonRef MatrixTypeNestedNonRef;
typedef typename internal::remove_all<typename MatrixType::ConjugateReturnType>::type MatrixConjugateReturnType;
public:
typedef typename internal::traits<TriangularView>::StorageKind StorageKind;
@@ -216,7 +216,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
EIGEN_DEVICE_FUNC
explicit inline TriangularView(MatrixType& matrix) : m_matrix(matrix)
{}
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(TriangularView)
/** \copydoc EigenBase::rows() */
@@ -233,7 +233,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
/** \returns a reference to the nested expression */
EIGEN_DEVICE_FUNC
NestedExpression& nestedExpression() { return m_matrix; }
typedef TriangularView<const MatrixConjugateReturnType,Mode> ConjugateReturnType;
/** \sa MatrixBase::conjugate() const */
EIGEN_DEVICE_FUNC
@@ -255,7 +255,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
typename MatrixType::TransposeReturnType tmp(m_matrix);
return TransposeReturnType(tmp);
}
typedef TriangularView<const typename MatrixType::ConstTransposeReturnType,TransposeMode> ConstTransposeReturnType;
/** \sa MatrixBase::transpose() const */
EIGEN_DEVICE_FUNC
@@ -266,10 +266,10 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
template<typename Other>
EIGEN_DEVICE_FUNC
inline const Solve<TriangularView, Other>
inline const Solve<TriangularView, Other>
solve(const MatrixBase<Other>& other) const
{ return Solve<TriangularView, Other>(*this, other.derived()); }
// workaround MSVC ICE
#if EIGEN_COMP_MSVC
template<int Side, typename Other>
@@ -313,7 +313,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
else
return m_matrix.diagonal().prod();
}
protected:
MatrixTypeNested m_matrix;
@@ -375,7 +375,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_Mat
internal::call_assignment_no_alias(derived(), other.derived(), internal::sub_assign_op<Scalar,typename Other::Scalar>());
return derived();
}
/** \sa MatrixBase::operator*=() */
EIGEN_DEVICE_FUNC
TriangularViewType& operator*=(const typename internal::traits<MatrixType>::Scalar& other) { return *this = derived().nestedExpression() * other; }
@@ -556,7 +556,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_Mat
// FIXME should we keep that possibility
template<typename MatrixType, unsigned int Mode>
template<typename OtherDerived>
inline TriangularView<MatrixType, Mode>&
EIGEN_DEVICE_FUNC inline TriangularView<MatrixType, Mode>&
TriangularViewImpl<MatrixType, Mode, Dense>::operator=(const MatrixBase<OtherDerived>& other)
{
internal::call_assignment_no_alias(derived(), other.derived(), internal::assign_op<Scalar,typename OtherDerived::Scalar>());
@@ -566,7 +566,7 @@ TriangularViewImpl<MatrixType, Mode, Dense>::operator=(const MatrixBase<OtherDer
// FIXME should we keep that possibility
template<typename MatrixType, unsigned int Mode>
template<typename OtherDerived>
void TriangularViewImpl<MatrixType, Mode, Dense>::lazyAssign(const MatrixBase<OtherDerived>& other)
EIGEN_DEVICE_FUNC void TriangularViewImpl<MatrixType, Mode, Dense>::lazyAssign(const MatrixBase<OtherDerived>& other)
{
internal::call_assignment_no_alias(derived(), other.template triangularView<Mode>());
}
@@ -575,7 +575,7 @@ void TriangularViewImpl<MatrixType, Mode, Dense>::lazyAssign(const MatrixBase<Ot
template<typename MatrixType, unsigned int Mode>
template<typename OtherDerived>
inline TriangularView<MatrixType, Mode>&
EIGEN_DEVICE_FUNC inline TriangularView<MatrixType, Mode>&
TriangularViewImpl<MatrixType, Mode, Dense>::operator=(const TriangularBase<OtherDerived>& other)
{
eigen_assert(Mode == int(OtherDerived::Mode));
@@ -585,7 +585,7 @@ TriangularViewImpl<MatrixType, Mode, Dense>::operator=(const TriangularBase<Othe
template<typename MatrixType, unsigned int Mode>
template<typename OtherDerived>
void TriangularViewImpl<MatrixType, Mode, Dense>::lazyAssign(const TriangularBase<OtherDerived>& other)
EIGEN_DEVICE_FUNC void TriangularViewImpl<MatrixType, Mode, Dense>::lazyAssign(const TriangularBase<OtherDerived>& other)
{
eigen_assert(Mode == int(OtherDerived::Mode));
internal::call_assignment_no_alias(derived(), other.derived());
@@ -600,7 +600,7 @@ void TriangularViewImpl<MatrixType, Mode, Dense>::lazyAssign(const TriangularBas
* If the matrix is triangular, the opposite part is set to zero. */
template<typename Derived>
template<typename DenseDerived>
void TriangularBase<Derived>::evalTo(MatrixBase<DenseDerived> &other) const
EIGEN_DEVICE_FUNC void TriangularBase<Derived>::evalTo(MatrixBase<DenseDerived> &other) const
{
evalToLazy(other.derived());
}
@@ -626,7 +626,7 @@ void TriangularBase<Derived>::evalTo(MatrixBase<DenseDerived> &other) const
*/
template<typename Derived>
template<unsigned int Mode>
typename MatrixBase<Derived>::template TriangularViewReturnType<Mode>::Type
EIGEN_DEVICE_FUNC typename MatrixBase<Derived>::template TriangularViewReturnType<Mode>::Type
MatrixBase<Derived>::triangularView()
{
return typename TriangularViewReturnType<Mode>::Type(derived());
@@ -635,7 +635,7 @@ MatrixBase<Derived>::triangularView()
/** This is the const version of MatrixBase::triangularView() */
template<typename Derived>
template<unsigned int Mode>
typename MatrixBase<Derived>::template ConstTriangularViewReturnType<Mode>::Type
EIGEN_DEVICE_FUNC typename MatrixBase<Derived>::template ConstTriangularViewReturnType<Mode>::Type
MatrixBase<Derived>::triangularView() const
{
return typename ConstTriangularViewReturnType<Mode>::Type(derived());
@@ -700,7 +700,7 @@ bool MatrixBase<Derived>::isLowerTriangular(const RealScalar& prec) const
namespace internal {
// TODO currently a triangular expression has the form TriangularView<.,.>
// in the future triangular-ness should be defined by the expression traits
// such that Transpose<TriangularView<.,.> > is valid. (currently TriangularBase::transpose() is overloaded to make it work)
@@ -728,7 +728,7 @@ struct Dense2Triangular {};
template<typename Kernel, unsigned int Mode, int UnrollCount, bool ClearOpposite> struct triangular_assignment_loop;
/** \internal Specialization of the dense assignment kernel for triangular matrices.
* The main difference is that the triangular, diagonal, and opposite parts are processed through three different functions.
* \tparam UpLo must be either Lower or Upper
@@ -745,17 +745,17 @@ protected:
using Base::m_src;
using Base::m_functor;
public:
typedef typename Base::DstEvaluatorType DstEvaluatorType;
typedef typename Base::SrcEvaluatorType SrcEvaluatorType;
typedef typename Base::Scalar Scalar;
typedef typename Base::AssignmentTraits AssignmentTraits;
EIGEN_DEVICE_FUNC triangular_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
: Base(dst, src, func, dstExpr)
{}
#ifdef EIGEN_INTERNAL_DEBUGGING
EIGEN_DEVICE_FUNC void assignCoeff(Index row, Index col)
{
@@ -765,16 +765,16 @@ public:
#else
using Base::assignCoeff;
#endif
EIGEN_DEVICE_FUNC void assignDiagonalCoeff(Index id)
{
if(Mode==UnitDiag && SetOpposite) m_functor.assignCoeff(m_dst.coeffRef(id,id), Scalar(1));
else if(Mode==ZeroDiag && SetOpposite) m_functor.assignCoeff(m_dst.coeffRef(id,id), Scalar(0));
else if(Mode==0) Base::assignCoeff(id,id);
}
EIGEN_DEVICE_FUNC void assignOppositeCoeff(Index row, Index col)
{
{
eigen_internal_assert(row!=col);
if(SetOpposite)
m_functor.assignCoeff(m_dst.coeffRef(row,col), Scalar(0));
@@ -795,17 +795,17 @@ void call_triangular_assignment_loop(DstXprType& dst, const SrcXprType& src, con
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
dst.resize(dstRows, dstCols);
DstEvaluatorType dstEvaluator(dst);
typedef triangular_dense_assignment_kernel< Mode&(Lower|Upper),Mode&(UnitDiag|ZeroDiag|SelfAdjoint),SetOpposite,
DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;
Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
enum {
unroll = DstXprType::SizeAtCompileTime != Dynamic
&& SrcEvaluatorType::CoeffReadCost < HugeCost
&& DstXprType::SizeAtCompileTime * (DstEvaluatorType::CoeffReadCost+SrcEvaluatorType::CoeffReadCost) / 2 <= EIGEN_UNROLLING_LIMIT
};
triangular_assignment_loop<Kernel, Mode, unroll ? int(DstXprType::SizeAtCompileTime) : Dynamic, SetOpposite>::run(kernel);
}
@@ -827,8 +827,8 @@ struct Assignment<DstXprType, SrcXprType, Functor, Triangular2Triangular>
EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
{
eigen_assert(int(DstXprType::Mode) == int(SrcXprType::Mode));
call_triangular_assignment_loop<DstXprType::Mode, false>(dst, src, func);
call_triangular_assignment_loop<DstXprType::Mode, false>(dst, src, func);
}
};
@@ -837,7 +837,7 @@ struct Assignment<DstXprType, SrcXprType, Functor, Triangular2Dense>
{
EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
{
call_triangular_assignment_loop<SrcXprType::Mode, (SrcXprType::Mode&SelfAdjoint)==0>(dst, src, func);
call_triangular_assignment_loop<SrcXprType::Mode, (SrcXprType::Mode&SelfAdjoint)==0>(dst, src, func);
}
};
@@ -846,7 +846,7 @@ struct Assignment<DstXprType, SrcXprType, Functor, Dense2Triangular>
{
EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
{
call_triangular_assignment_loop<DstXprType::Mode, false>(dst, src, func);
call_triangular_assignment_loop<DstXprType::Mode, false>(dst, src, func);
}
};
@@ -857,19 +857,19 @@ struct triangular_assignment_loop
// FIXME: this is not very clean, perhaps this information should be provided by the kernel?
typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
typedef typename DstEvaluatorType::XprType DstXprType;
enum {
col = (UnrollCount-1) / DstXprType::RowsAtCompileTime,
row = (UnrollCount-1) % DstXprType::RowsAtCompileTime
};
typedef typename Kernel::Scalar Scalar;
EIGEN_DEVICE_FUNC
static inline void run(Kernel &kernel)
{
triangular_assignment_loop<Kernel, Mode, UnrollCount-1, SetOpposite>::run(kernel);
if(row==col)
kernel.assignDiagonalCoeff(row);
else if( ((Mode&Lower) && row>col) || ((Mode&Upper) && row<col) )
@@ -912,10 +912,10 @@ struct triangular_assignment_loop<Kernel, Mode, Dynamic, SetOpposite>
}
else
i = maxi;
if(i<kernel.rows()) // then i==j
kernel.assignDiagonalCoeff(i++);
if (((Mode&Upper) && SetOpposite) || (Mode&Lower))
{
for(; i < kernel.rows(); ++i)
@@ -932,20 +932,20 @@ struct triangular_assignment_loop<Kernel, Mode, Dynamic, SetOpposite>
* If the matrix is triangular, the opposite part is set to zero. */
template<typename Derived>
template<typename DenseDerived>
void TriangularBase<Derived>::evalToLazy(MatrixBase<DenseDerived> &other) const
EIGEN_DEVICE_FUNC void TriangularBase<Derived>::evalToLazy(MatrixBase<DenseDerived> &other) const
{
other.derived().resize(this->rows(), this->cols());
internal::call_triangular_assignment_loop<Derived::Mode,(Derived::Mode&SelfAdjoint)==0 /* SetOpposite */>(other.derived(), derived().nestedExpression());
}
namespace internal {
// Triangular = Product
template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar>
struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::assign_op<Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, Dense2Triangular>
{
typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,typename SrcXprType::Scalar> &)
static EIGEN_DEVICE_FUNC void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,typename SrcXprType::Scalar> &)
{
Index dstRows = src.rows();
Index dstCols = src.cols();
@@ -961,7 +961,7 @@ template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar>
struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::add_assign_op<Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, Dense2Triangular>
{
typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,typename SrcXprType::Scalar> &)
static EIGEN_DEVICE_FUNC void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,typename SrcXprType::Scalar> &)
{
dst._assignProduct(src, 1, 1);
}
@@ -972,7 +972,7 @@ template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar>
struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::sub_assign_op<Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, Dense2Triangular>
{
typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,typename SrcXprType::Scalar> &)
static EIGEN_DEVICE_FUNC void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,typename SrcXprType::Scalar> &)
{
dst._assignProduct(src, -1, 1);
}

View File

@@ -670,7 +670,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* \sa rowwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
*/
template<typename Derived>
inline typename DenseBase<Derived>::ColwiseReturnType
EIGEN_DEVICE_FUNC inline typename DenseBase<Derived>::ColwiseReturnType
DenseBase<Derived>::colwise()
{
return ColwiseReturnType(derived());
@@ -684,7 +684,7 @@ DenseBase<Derived>::colwise()
* \sa colwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
*/
template<typename Derived>
inline typename DenseBase<Derived>::RowwiseReturnType
EIGEN_DEVICE_FUNC inline typename DenseBase<Derived>::RowwiseReturnType
DenseBase<Derived>::rowwise()
{
return RowwiseReturnType(derived());

View File

@@ -15,7 +15,9 @@ namespace Eigen {
namespace internal {
static Packet4ui p4ui_CONJ_XOR = vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_MZERO);//{ 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
inline Packet4ui p4ui_CONJ_XOR() {
return vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_MZERO);//{ 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
}
#ifdef __VSX__
#if defined(_BIG_ENDIAN)
static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2d_MZERO, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
@@ -29,8 +31,54 @@ static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2d_MZERO, (P
//---------- float ----------
struct Packet2cf
{
EIGEN_STRONG_INLINE explicit Packet2cf() : v(p4f_ZERO) {}
EIGEN_STRONG_INLINE explicit Packet2cf() {}
EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b)
{
Packet4f v1, v2;
// Permute and multiply the real parts of a and b
v1 = vec_perm(a.v, a.v, p16uc_PSET32_WODD);
// Get the imaginary parts of a
v2 = vec_perm(a.v, a.v, p16uc_PSET32_WEVEN);
// multiply a_re * b
v1 = vec_madd(v1, b.v, p4f_ZERO);
// multiply a_im * b and get the conjugate result
v2 = vec_madd(v2, b.v, p4f_ZERO);
v2 = reinterpret_cast<Packet4f>(pxor(v2, reinterpret_cast<Packet4f>(p4ui_CONJ_XOR())));
// permute back to a proper order
v2 = vec_perm(v2, v2, p16uc_COMPLEX32_REV);
return Packet2cf(padd<Packet4f>(v1, v2));
}
EIGEN_STRONG_INLINE Packet2cf& operator*=(const Packet2cf& b) {
v = pmul(Packet2cf(*this), b).v;
return *this;
}
EIGEN_STRONG_INLINE Packet2cf operator*(const Packet2cf& b) const {
return Packet2cf(*this) *= b;
}
EIGEN_STRONG_INLINE Packet2cf& operator+=(const Packet2cf& b) {
v = padd(v, b.v);
return *this;
}
EIGEN_STRONG_INLINE Packet2cf operator+(const Packet2cf& b) const {
return Packet2cf(*this) += b;
}
EIGEN_STRONG_INLINE Packet2cf& operator-=(const Packet2cf& b) {
v = psub(v, b.v);
return *this;
}
EIGEN_STRONG_INLINE Packet2cf operator-(const Packet2cf& b) const {
return Packet2cf(*this) -= b;
}
EIGEN_STRONG_INLINE Packet2cf operator-(void) const {
return Packet2cf(-v);
}
Packet4f v;
};
@@ -82,14 +130,14 @@ template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<f
template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
{
std::complex<float> EIGEN_ALIGN16 af[2];
EIGEN_ALIGN16 std::complex<float> af[2];
af[0] = from[0*stride];
af[1] = from[1*stride];
return pload<Packet2cf>(af);
}
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
{
std::complex<float> EIGEN_ALIGN16 af[2];
EIGEN_ALIGN16 std::complex<float> af[2];
pstore<std::complex<float> >((std::complex<float> *) af, from);
to[0*stride] = af[0];
to[1*stride] = af[1];
@@ -98,26 +146,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf
template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(a.v + b.v); }
template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(a.v - b.v); }
template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(a.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) { return Packet2cf(pxor<Packet4f>(a.v, reinterpret_cast<Packet4f>(p4ui_CONJ_XOR))); }
template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
Packet4f v1, v2;
// Permute and multiply the real parts of a and b
v1 = vec_perm(a.v, a.v, p16uc_PSET32_WODD);
// Get the imaginary parts of a
v2 = vec_perm(a.v, a.v, p16uc_PSET32_WEVEN);
// multiply a_re * b
v1 = vec_madd(v1, b.v, p4f_ZERO);
// multiply a_im * b and get the conjugate result
v2 = vec_madd(v2, b.v, p4f_ZERO);
v2 = reinterpret_cast<Packet4f>(pxor(v2, reinterpret_cast<Packet4f>(p4ui_CONJ_XOR)));
// permute back to a proper order
v2 = vec_perm(v2, v2, p16uc_COMPLEX32_REV);
return Packet2cf(padd<Packet4f>(v1, v2));
}
template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) { return Packet2cf(pxor<Packet4f>(a.v, reinterpret_cast<Packet4f>(p4ui_CONJ_XOR()))); }
template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pand<Packet4f>(a.v, b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(por<Packet4f>(a.v, b.v)); }
@@ -128,7 +157,7 @@ template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::co
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
{
std::complex<float> EIGEN_ALIGN16 res[2];
EIGEN_ALIGN16 std::complex<float> res[2];
pstore((float *)&res, a.v);
return res[0];
@@ -152,7 +181,7 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packe
template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
{
Packet4f b1, b2;
#ifdef _BIG_ENDIAN
#ifdef _BIG_ENDIAN
b1 = vec_sld(vecs[0].v, vecs[1].v, 8);
b2 = vec_sld(vecs[1].v, vecs[0].v, 8);
#else
@@ -260,6 +289,51 @@ struct Packet1cd
{
EIGEN_STRONG_INLINE Packet1cd() {}
EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {}
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b)
{
Packet2d a_re, a_im, v1, v2;
// Permute and multiply the real parts of a and b
a_re = vec_perm(a.v, a.v, p16uc_PSET64_HI);
// Get the imaginary parts of a
a_im = vec_perm(a.v, a.v, p16uc_PSET64_LO);
// multiply a_re * b
v1 = vec_madd(a_re, b.v, p2d_ZERO);
// multiply a_im * b and get the conjugate result
v2 = vec_madd(a_im, b.v, p2d_ZERO);
v2 = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(v2), reinterpret_cast<Packet4ui>(v2), 8));
v2 = pxor(v2, reinterpret_cast<Packet2d>(p2ul_CONJ_XOR1));
return Packet1cd(padd<Packet2d>(v1, v2));
}
EIGEN_STRONG_INLINE Packet1cd& operator*=(const Packet1cd& b) {
v = pmul(Packet1cd(*this), b).v;
return *this;
}
EIGEN_STRONG_INLINE Packet1cd operator*(const Packet1cd& b) const {
return Packet1cd(*this) *= b;
}
EIGEN_STRONG_INLINE Packet1cd& operator+=(const Packet1cd& b) {
v = padd(v, b.v);
return *this;
}
EIGEN_STRONG_INLINE Packet1cd operator+(const Packet1cd& b) const {
return Packet1cd(*this) += b;
}
EIGEN_STRONG_INLINE Packet1cd& operator-=(const Packet1cd& b) {
v = psub(v, b.v);
return *this;
}
EIGEN_STRONG_INLINE Packet1cd operator-(const Packet1cd& b) const {
return Packet1cd(*this) -= b;
}
EIGEN_STRONG_INLINE Packet1cd operator-(void) const {
return Packet1cd(-v);
}
Packet2d v;
};
@@ -296,19 +370,13 @@ template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<
template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }
template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride)
template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index)
{
std::complex<double> EIGEN_ALIGN16 af[2];
af[0] = from[0*stride];
af[1] = from[1*stride];
return pload<Packet1cd>(af);
return pload<Packet1cd>(from);
}
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index stride)
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index)
{
std::complex<double> EIGEN_ALIGN16 af[2];
pstore<std::complex<double> >(af, from);
to[0*stride] = af[0];
to[1*stride] = af[1];
pstore<std::complex<double> >(to, from);
}
template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v + b.v); }
@@ -316,24 +384,6 @@ template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, con
template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); }
template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd(pxor(a.v, reinterpret_cast<Packet2d>(p2ul_CONJ_XOR2))); }
template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{
Packet2d a_re, a_im, v1, v2;
// Permute and multiply the real parts of a and b
a_re = vec_perm(a.v, a.v, p16uc_PSET64_HI);
// Get the imaginary parts of a
a_im = vec_perm(a.v, a.v, p16uc_PSET64_LO);
// multiply a_re * b
v1 = vec_madd(a_re, b.v, p2d_ZERO);
// multiply a_im * b and get the conjugate result
v2 = vec_madd(a_im, b.v, p2d_ZERO);
v2 = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(v2), reinterpret_cast<Packet4ui>(v2), 8));
v2 = pxor(v2, reinterpret_cast<Packet2d>(p2ul_CONJ_XOR1));
return Packet1cd(padd<Packet2d>(v1, v2));
}
template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pand(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(por(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pxor(a.v,b.v)); }
@@ -345,7 +395,7 @@ template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::c
template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
{
std::complex<double> EIGEN_ALIGN16 res[2];
EIGEN_ALIGN16 std::complex<double> res[2];
pstore<std::complex<double> >(res, a);
return res[0];

View File

@@ -22,10 +22,6 @@ namespace internal {
#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
#endif
#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
#endif
// NOTE Altivec has 32 registers, but Eigen only accepts a value of 8 or 16
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
@@ -40,9 +36,8 @@ typedef __vector unsigned char Packet16uc;
// We don't want to write the same code all the time, but we need to reuse the constants
// and it doesn't really work to declare them global, so we define macros instead
#define _EIGEN_DECLARE_CONST_FAST_Packet4f(NAME,X) \
Packet4f p4f_##NAME = reinterpret_cast<Packet4f>(vec_splat_s32(X))
Packet4f p4f_##NAME = {X, X, X, X}
#define _EIGEN_DECLARE_CONST_FAST_Packet4i(NAME,X) \
Packet4i p4i_##NAME = vec_splat_s32(X)
@@ -64,7 +59,7 @@ typedef __vector unsigned char Packet16uc;
#define DST_CHAN 1
#define DST_CTRL(size, count, stride) (((size) << 24) | ((count) << 16) | (stride))
#define __UNPACK_TYPE__(PACKETNAME) typename unpacket_traits<PACKETNAME>::type
// These constants are endian-agnostic
static _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0); //{ 0.0, 0.0, 0.0, 0.0}
@@ -77,21 +72,15 @@ static Packet4f p4f_MZERO = (Packet4f) vec_sl((Packet4ui)p4i_MINUS1, (Packet4ui)
static Packet4f p4f_ONE = vec_ctf(p4i_ONE, 0); //{ 1.0, 1.0, 1.0, 1.0}
#endif
static Packet4ui p4ui_SIGN = {0x80000000u, 0x80000000u, 0x80000000u, 0x80000000u};
static Packet4ui p4ui_PREV0DOT5 = {0x3EFFFFFFu, 0x3EFFFFFFu, 0x3EFFFFFFu, 0x3EFFFFFFu};
static Packet4f p4f_COUNTDOWN = { 0.0, 1.0, 2.0, 3.0 };
static Packet4i p4i_COUNTDOWN = { 0, 1, 2, 3 };
static Packet16uc p16uc_REVERSE32 = { 12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3 };
static Packet16uc p16uc_DUPLICATE32_HI = { 0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7 };
// Mask alignment
#ifdef __PPC64__
#define _EIGEN_MASK_ALIGNMENT 0xfffffffffffffff0
#else
#define _EIGEN_MASK_ALIGNMENT 0xfffffff0
#endif
#define _EIGEN_ALIGNED_PTR(x) ((std::ptrdiff_t)(x) & _EIGEN_MASK_ALIGNMENT)
// Handle endianness properly while loading constants
// Define global static constants:
#ifdef _BIG_ENDIAN
@@ -235,112 +224,127 @@ inline std::ostream & operator <<(std::ostream & s, const Packet4ui & v)
return s;
}
template <typename Packet, typename Scalar>
EIGEN_STRONG_INLINE Packet pload_common(const Scalar* from)
{
// some versions of GCC throw "unused-but-set-parameter".
// ignoring these warnings for now.
EIGEN_UNUSED_VARIABLE(from);
EIGEN_DEBUG_ALIGNED_LOAD
return vec_ld(0, from);
}
// Need to define them first or we get specialization after instantiation errors
template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from)
{
EIGEN_DEBUG_ALIGNED_LOAD
#ifdef __VSX__
return vec_vsx_ld(0, from);
#else
return vec_ld(0, from);
#endif
return pload_common<Packet4f, float>(from);
}
template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from)
{
EIGEN_DEBUG_ALIGNED_LOAD
return pload_common<Packet4i, int>(from);
}
template <typename Packet>
EIGEN_STRONG_INLINE void pstore_common(__UNPACK_TYPE__(Packet)* to, const Packet& from){
// some versions of GCC throw "unused-but-set-parameter" (float *to).
// ignoring these warnings for now.
EIGEN_UNUSED_VARIABLE(to);
EIGEN_DEBUG_ALIGNED_STORE
#ifdef __VSX__
return vec_vsx_ld(0, from);
vec_xst(from, 0, to);
#else
return vec_ld(0, from);
vec_st(from, 0, to);
#endif
}
template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from)
{
EIGEN_DEBUG_ALIGNED_STORE
#ifdef __VSX__
vec_vsx_st(from, 0, to);
#else
vec_st(from, 0, to);
#endif
pstore_common<Packet4f>(to, from);
}
template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from)
{
EIGEN_DEBUG_ALIGNED_STORE
#ifdef __VSX__
vec_vsx_st(from, 0, to);
#else
vec_st(from, 0, to);
#endif
pstore_common<Packet4i>(to, from);
}
template<typename Packet>
EIGEN_STRONG_INLINE Packet pset1_size4(const __UNPACK_TYPE__(Packet)& from)
{
Packet v = {from, from, from, from};
return v;
}
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) {
Packet4f v = {from, from, from, from};
return v;
return pset1_size4<Packet4f>(from);
}
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) {
Packet4i v = {from, from, from, from};
return v;
return pset1_size4<Packet4i>(from);
}
template<> EIGEN_STRONG_INLINE void
pbroadcast4<Packet4f>(const float *a,
Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
template<typename Packet> EIGEN_STRONG_INLINE void
pbroadcast4_common(const __UNPACK_TYPE__(Packet) *a,
Packet& a0, Packet& a1, Packet& a2, Packet& a3)
{
a3 = pload<Packet4f>(a);
a3 = pload<Packet>(a);
a0 = vec_splat(a3, 0);
a1 = vec_splat(a3, 1);
a2 = vec_splat(a3, 2);
a3 = vec_splat(a3, 3);
}
template<> EIGEN_STRONG_INLINE void
pbroadcast4<Packet4f>(const float *a,
Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
{
pbroadcast4_common<Packet4f>(a, a0, a1, a2, a3);
}
template<> EIGEN_STRONG_INLINE void
pbroadcast4<Packet4i>(const int *a,
Packet4i& a0, Packet4i& a1, Packet4i& a2, Packet4i& a3)
{
a3 = pload<Packet4i>(a);
a0 = vec_splat(a3, 0);
a1 = vec_splat(a3, 1);
a2 = vec_splat(a3, 2);
a3 = vec_splat(a3, 3);
pbroadcast4_common<Packet4i>(a, a0, a1, a2, a3);
}
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather_common(const __UNPACK_TYPE__(Packet)* from, Index stride)
{
EIGEN_ALIGN16 __UNPACK_TYPE__(Packet) a[4];
a[0] = from[0*stride];
a[1] = from[1*stride];
a[2] = from[2*stride];
a[3] = from[3*stride];
return pload<Packet>(a);
}
template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)
{
float EIGEN_ALIGN16 af[4];
af[0] = from[0*stride];
af[1] = from[1*stride];
af[2] = from[2*stride];
af[3] = from[3*stride];
return pload<Packet4f>(af);
return pgather_common<Packet4f>(from, stride);
}
template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride)
{
int EIGEN_ALIGN16 ai[4];
ai[0] = from[0*stride];
ai[1] = from[1*stride];
ai[2] = from[2*stride];
ai[3] = from[3*stride];
return pload<Packet4i>(ai);
return pgather_common<Packet4i>(from, stride);
}
template<typename Packet> EIGEN_DEVICE_FUNC inline void pscatter_size4(__UNPACK_TYPE__(Packet)* to, const Packet& from, Index stride)
{
EIGEN_ALIGN16 __UNPACK_TYPE__(Packet) a[4];
pstore<__UNPACK_TYPE__(Packet)>(a, from);
to[0*stride] = a[0];
to[1*stride] = a[1];
to[2*stride] = a[2];
to[3*stride] = a[3];
}
template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride)
{
float EIGEN_ALIGN16 af[4];
pstore<float>(af, from);
to[0*stride] = af[0];
to[1*stride] = af[1];
to[2*stride] = af[2];
to[3*stride] = af[3];
pscatter_size4<Packet4f>(to, from, stride);
}
template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, Index stride)
{
int EIGEN_ALIGN16 ai[4];
pstore<int>((int *)ai, from);
to[0*stride] = ai[0];
to[1*stride] = ai[1];
to[2*stride] = ai[2];
to[3*stride] = ai[3];
pscatter_size4<Packet4i>(to, from, stride);
}
template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) { return pset1<Packet4f>(a) + p4f_COUNTDOWN; }
@@ -424,66 +428,67 @@ template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const
template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, vec_nor(b, b)); }
template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, vec_nor(b, b)); }
template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a) { return vec_round(a); }
template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a)
{
Packet4f t = vec_add(reinterpret_cast<Packet4f>(vec_or(vec_and(reinterpret_cast<Packet4ui>(a), p4ui_SIGN), p4ui_PREV0DOT5)), a);
Packet4f res;
#ifdef __VSX__
__asm__("xvrspiz %x0, %x1\n\t"
: "=&wa" (res)
: "wa" (t));
#else
__asm__("vrfiz %0, %1\n\t"
: "=v" (res)
: "v" (t));
#endif
return res;
}
template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a) { return vec_ceil(a); }
template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a) { return vec_floor(a); }
#ifdef _BIG_ENDIAN
template<typename Packet> EIGEN_STRONG_INLINE Packet ploadu_common(const __UNPACK_TYPE__(Packet)* from)
{
EIGEN_DEBUG_UNALIGNED_LOAD
Packet16uc mask = vec_lvsl(0, from); // create the permute mask
Packet16uc MSQ = vec_ld(0, (unsigned char *)from); // most significant quadword
Packet16uc LSQ = vec_ld(15, (unsigned char *)from); // least significant quadword
//TODO: Add static_cast here
return (Packet) vec_perm(MSQ, LSQ, mask); // align the data
}
template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
{
EIGEN_DEBUG_ALIGNED_LOAD
Packet16uc MSQ, LSQ;
Packet16uc mask;
MSQ = vec_ld(0, (unsigned char *)from); // most significant quadword
LSQ = vec_ld(15, (unsigned char *)from); // least significant quadword
mask = vec_lvsl(0, from); // create the permute mask
return (Packet4f) vec_perm(MSQ, LSQ, mask); // align the data
return ploadu_common<Packet4f>(from);
}
template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
{
EIGEN_DEBUG_ALIGNED_LOAD
// Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
Packet16uc MSQ, LSQ;
Packet16uc mask;
MSQ = vec_ld(0, (unsigned char *)from); // most significant quadword
LSQ = vec_ld(15, (unsigned char *)from); // least significant quadword
mask = vec_lvsl(0, from); // create the permute mask
return (Packet4i) vec_perm(MSQ, LSQ, mask); // align the data
return ploadu_common<Packet4i>(from);
}
#else
// We also need ot redefine little endian loading of Packet4i/Packet4f using VSX
template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
{
EIGEN_DEBUG_UNALIGNED_LOAD
return (Packet4i) vec_vsx_ld((long)from & 15, (const int*) _EIGEN_ALIGNED_PTR(from));
}
template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
{
EIGEN_DEBUG_UNALIGNED_LOAD
return (Packet4f) vec_vsx_ld((long)from & 15, (const float*) _EIGEN_ALIGNED_PTR(from));
}
#endif
template<typename Packet> EIGEN_STRONG_INLINE Packet ploaddup_common(const __UNPACK_TYPE__(Packet)* from)
{
Packet p;
if((std::ptrdiff_t(from) % 16) == 0) p = pload<Packet>(from);
else p = ploadu<Packet>(from);
return vec_perm(p, p, p16uc_DUPLICATE32_HI);
}
template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
{
Packet4f p;
if((std::ptrdiff_t(from) % 16) == 0) p = pload<Packet4f>(from);
else p = ploadu<Packet4f>(from);
return vec_perm(p, p, p16uc_DUPLICATE32_HI);
return ploaddup_common<Packet4f>(from);
}
template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
{
Packet4i p;
if((std::ptrdiff_t(from) % 16) == 0) p = pload<Packet4i>(from);
else p = ploadu<Packet4i>(from);
return vec_perm(p, p, p16uc_DUPLICATE32_HI);
return ploaddup_common<Packet4i>(from);
}
#ifdef _BIG_ENDIAN
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from)
template<typename Packet> EIGEN_STRONG_INLINE void pstoreu_common(__UNPACK_TYPE__(Packet)* to, const Packet& from)
{
EIGEN_DEBUG_UNALIGNED_STORE
#ifdef __VSX__
vec_xst(from, 0, to);
#else
// Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
// Warning: not thread safe!
Packet16uc MSQ, LSQ, edges;
@@ -497,45 +502,23 @@ template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& f
MSQ = vec_perm(edges,(Packet16uc)from,align); // misalign the data (MSQ)
LSQ = vec_perm((Packet16uc)from,edges,align); // misalign the data (LSQ)
vec_st( LSQ, 15, (unsigned char *)to ); // Store the LSQ part first
vec_st( MSQ, 0, (unsigned char *)to ); // Store the MSQ part
vec_st( MSQ, 0, (unsigned char *)to ); // Store the MSQ part second
#endif
}
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from)
{
pstoreu_common<Packet4f>(to, from);
}
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from)
{
EIGEN_DEBUG_UNALIGNED_STORE
// Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
// Warning: not thread safe!
Packet16uc MSQ, LSQ, edges;
Packet16uc edgeAlign, align;
MSQ = vec_ld(0, (unsigned char *)to); // most significant quadword
LSQ = vec_ld(15, (unsigned char *)to); // least significant quadword
edgeAlign = vec_lvsl(0, to); // permute map to extract edges
edges=vec_perm(LSQ, MSQ, edgeAlign); // extract the edges
align = vec_lvsr( 0, to ); // permute map to misalign data
MSQ = vec_perm(edges, (Packet16uc) from, align); // misalign the data (MSQ)
LSQ = vec_perm((Packet16uc) from, edges, align); // misalign the data (LSQ)
vec_st( LSQ, 15, (unsigned char *)to ); // Store the LSQ part first
vec_st( MSQ, 0, (unsigned char *)to ); // Store the MSQ part
pstoreu_common<Packet4i>(to, from);
}
#else
// We also need ot redefine little endian loading of Packet4i/Packet4f using VSX
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from)
{
EIGEN_DEBUG_ALIGNED_STORE
vec_vsx_st(from, (long)to & 15, (int*) _EIGEN_ALIGNED_PTR(to));
}
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from)
{
EIGEN_DEBUG_ALIGNED_STORE
vec_vsx_st(from, (long)to & 15, (float*) _EIGEN_ALIGNED_PTR(to));
}
#endif
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { EIGEN_PPC_PREFETCH(addr); }
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { EIGEN_PPC_PREFETCH(addr); }
template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x; vec_ste(a, 0, &x); return x; }
template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x; vec_ste(a, 0, &x); return x; }
template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { EIGEN_ALIGN16 float x; vec_ste(a, 0, &x); return x; }
template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { EIGEN_ALIGN16 int x; vec_ste(a, 0, &x); return x; }
template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a)
{
@@ -643,37 +626,42 @@ template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
}
// min
template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
template<typename Packet> EIGEN_STRONG_INLINE
__UNPACK_TYPE__(Packet) predux_min4(const Packet& a)
{
Packet4f b, res;
Packet b, res;
b = vec_min(a, vec_sld(a, a, 8));
res = vec_min(b, vec_sld(b, b, 4));
return pfirst(res);
}
template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
{
return predux_min4<Packet4f>(a);
}
template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
{
Packet4i b, res;
b = vec_min(a, vec_sld(a, a, 8));
res = vec_min(b, vec_sld(b, b, 4));
return pfirst(res);
return predux_min4<Packet4i>(a);
}
// max
template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
template<typename Packet> EIGEN_STRONG_INLINE __UNPACK_TYPE__(Packet) predux_max4(const Packet& a)
{
Packet4f b, res;
Packet b, res;
b = vec_max(a, vec_sld(a, a, 8));
res = vec_max(b, vec_sld(b, b, 4));
return pfirst(res);
}
template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
{
return predux_max4<Packet4f>(a);
}
template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
{
Packet4i b, res;
b = vec_max(a, vec_sld(a, a, 8));
res = vec_max(b, vec_sld(b, b, 4));
return pfirst(res);
return predux_max4<Packet4i>(a);
}
template<int Offset>
@@ -730,9 +718,9 @@ struct palign_impl<Offset,Packet4i>
}
};
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<Packet4f,4>& kernel) {
Packet4f t0, t1, t2, t3;
template <typename T>
EIGEN_DEVICE_FUNC inline void ptranpose_common(PacketBlock<T, 4>& kernel) {
T t0, t1, t2, t3;
t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);
t1 = vec_mergel(kernel.packet[0], kernel.packet[2]);
t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]);
@@ -743,29 +731,23 @@ ptranspose(PacketBlock<Packet4f,4>& kernel) {
kernel.packet[3] = vec_mergel(t1, t3);
}
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<Packet4i,4>& kernel) {
Packet4i t0, t1, t2, t3;
t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);
t1 = vec_mergel(kernel.packet[0], kernel.packet[2]);
t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]);
t3 = vec_mergel(kernel.packet[1], kernel.packet[3]);
kernel.packet[0] = vec_mergeh(t0, t2);
kernel.packet[1] = vec_mergel(t0, t2);
kernel.packet[2] = vec_mergeh(t1, t3);
kernel.packet[3] = vec_mergel(t1, t3);
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4f, 4>& kernel) { ptranpose_common<Packet4f>(kernel); }
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4i, 4>& kernel) { ptranpose_common<Packet4i>(kernel); }
template<typename Packet> EIGEN_STRONG_INLINE
Packet pblend4(const Selector<4>& ifPacket, const Packet& thenPacket, const Packet& elsePacket) {
Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3] };
Packet4ui mask = reinterpret_cast<Packet4ui>(vec_cmpeq(reinterpret_cast<Packet4ui>(select), reinterpret_cast<Packet4ui>(p4i_ONE)));
return vec_sel(elsePacket, thenPacket, mask);
}
template<> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) {
Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3] };
Packet4ui mask = reinterpret_cast<Packet4ui>(vec_cmpeq(reinterpret_cast<Packet4ui>(select), reinterpret_cast<Packet4ui>(p4i_ONE)));
return vec_sel(elsePacket, thenPacket, mask);
return pblend4<Packet4i>(ifPacket, thenPacket, elsePacket);
}
template<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket) {
Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3] };
Packet4ui mask = reinterpret_cast<Packet4ui>(vec_cmpeq(reinterpret_cast<Packet4ui>(select), reinterpret_cast<Packet4ui>(p4i_ONE)));
return vec_sel(elsePacket, thenPacket, mask);
return pblend4<Packet4f>(ifPacket, thenPacket, elsePacket);
}
@@ -785,6 +767,8 @@ static Packet2l p2l_ZERO = reinterpret_cast<Packet2l>(p4i_ZERO);
static Packet2d p2d_ONE = { 1.0, 1.0 };
static Packet2d p2d_ZERO = reinterpret_cast<Packet2d>(p4f_ZERO);
static Packet2d p2d_MZERO = { -0.0, -0.0 };
static Packet2ul p2ul_SIGN = {0x8000000000000000ull, 0x8000000000000000ull};
static Packet2ul p2ul_PREV0DOT5 = {0x3FDFFFFFFFFFFFFFull, 0x3FDFFFFFFFFFFFFFull};
#ifdef _BIG_ENDIAN
static Packet2d p2d_COUNTDOWN = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(p2d_ZERO), reinterpret_cast<Packet4f>(p2d_ONE), 8));
@@ -792,16 +776,9 @@ static Packet2d p2d_COUNTDOWN = reinterpret_cast<Packet2d>(vec_sld(reinterpret_c
static Packet2d p2d_COUNTDOWN = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(p2d_ONE), reinterpret_cast<Packet4f>(p2d_ZERO), 8));
#endif
template<int index> Packet2d vec_splat_dbl(Packet2d& a);
template<> EIGEN_STRONG_INLINE Packet2d vec_splat_dbl<0>(Packet2d& a)
template<int index> Packet2d vec_splat_dbl(Packet2d& a)
{
return reinterpret_cast<Packet2d>(vec_perm(a, a, p16uc_PSET64_HI));
}
template<> EIGEN_STRONG_INLINE Packet2d vec_splat_dbl<1>(Packet2d& a)
{
return reinterpret_cast<Packet2d>(vec_perm(a, a, p16uc_PSET64_LO));
return vec_splat(a, index);
}
template<> struct packet_traits<double> : default_packet_traits
@@ -826,7 +803,11 @@ template<> struct packet_traits<double> : default_packet_traits
HasLog = 0,
HasExp = 1,
HasSqrt = 1,
#if !EIGEN_COMP_CLANG
HasRsqrt = 1,
#else
HasRsqrt = 0,
#endif
HasRound = 1,
HasFloor = 1,
HasCeil = 1,
@@ -863,21 +844,13 @@ inline std::ostream & operator <<(std::ostream & s, const Packet2d & v)
template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from)
{
EIGEN_DEBUG_ALIGNED_LOAD
#ifdef __VSX__
return vec_vsx_ld(0, from);
#else
return vec_ld(0, from);
#endif
return vec_xl(0, const_cast<double *>(from)); // cast needed by Clang
}
template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from)
{
EIGEN_DEBUG_ALIGNED_STORE
#ifdef __VSX__
vec_vsx_st(from, 0, to);
#else
vec_st(from, 0, to);
#endif
vec_xst(from, 0, to);
}
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) {
@@ -889,24 +862,23 @@ template<> EIGEN_STRONG_INLINE void
pbroadcast4<Packet2d>(const double *a,
Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)
{
a1 = pload<Packet2d>(a);
a0 = vec_splat_dbl<0>(a1);
a1 = vec_splat_dbl<1>(a1);
a3 = pload<Packet2d>(a+2);
a2 = vec_splat_dbl<0>(a3);
a3 = vec_splat_dbl<1>(a3);
//This way is faster than vec_splat (at least for doubles in Power 9)
a0 = pset1<Packet2d>(a[0]);
a1 = pset1<Packet2d>(a[1]);
a2 = pset1<Packet2d>(a[2]);
a3 = pset1<Packet2d>(a[3]);
}
template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride)
{
double EIGEN_ALIGN16 af[2];
EIGEN_ALIGN16 double af[2];
af[0] = from[0*stride];
af[1] = from[1*stride];
return pload<Packet2d>(af);
}
template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride)
{
double EIGEN_ALIGN16 af[2];
EIGEN_ALIGN16 double af[2];
pstore<double>(af, from);
to[0*stride] = af[0];
to[1*stride] = af[1];
@@ -918,7 +890,14 @@ template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const
template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return a - b; }
template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return p2d_ZERO - a; }
template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a)
{
#ifdef __POWER8_VECTOR__
return vec_neg(a);
#else
return vec_xor(a, p2d_MZERO);
#endif
}
template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
@@ -950,14 +929,24 @@ template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const
template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, vec_nor(b, b)); }
template<> EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a) { return vec_round(a); }
template<> EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a)
{
Packet2d t = vec_add(reinterpret_cast<Packet2d>(vec_or(vec_and(reinterpret_cast<Packet2ul>(a), p2ul_SIGN), p2ul_PREV0DOT5)), a);
Packet2d res;
__asm__("xvrdpiz %x0, %x1\n\t"
: "=&wa" (res)
: "wa" (t));
return res;
}
template<> EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a) { return vec_ceil(a); }
template<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) { return vec_floor(a); }
template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from)
{
EIGEN_DEBUG_ALIGNED_LOAD
return (Packet2d) vec_vsx_ld((long)from & 15, (const double*) _EIGEN_ALIGNED_PTR(from));
EIGEN_DEBUG_UNALIGNED_LOAD
return vec_xl(0, const_cast<double*>(from));
}
template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
@@ -970,13 +959,13 @@ template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from)
{
EIGEN_DEBUG_ALIGNED_STORE
vec_vsx_st((Packet4f)from, (long)to & 15, (float*) _EIGEN_ALIGNED_PTR(to));
EIGEN_DEBUG_UNALIGNED_STORE
vec_xst(from, 0, to);
}
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { EIGEN_PPC_PREFETCH(addr); }
template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { double EIGEN_ALIGN16 x[2]; pstore<double>(x, a); return x[0]; }
template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { EIGEN_ALIGN16 double x[2]; pstore<double>(x, a); return x[0]; }
template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a)
{

View File

@@ -16,7 +16,7 @@ namespace Eigen {
namespace internal {
#if defined(__CUDACC__) && defined(EIGEN_USE_GPU)
#if defined(EIGEN_CUDACC) && defined(EIGEN_USE_GPU)
// Many std::complex methods such as operator+, operator-, operator* and
// operator/ are not constexpr. Due to this, clang does not treat them as device
@@ -55,7 +55,7 @@ template<typename T> struct scalar_difference_op<std::complex<T>, std::complex<T
// Product
template<typename T> struct scalar_product_op<const std::complex<T>, const std::complex<T> > : binary_op_base<const std::complex<T>, const std::complex<T> > {
enum {
Vectorizable = packet_traits<std::complex<T>>::HasMul
Vectorizable = packet_traits<std::complex<T> >::HasMul
};
typedef typename std::complex<T> result_type;
@@ -76,7 +76,7 @@ template<typename T> struct scalar_product_op<std::complex<T>, std::complex<T> >
// Quotient
template<typename T> struct scalar_quotient_op<const std::complex<T>, const std::complex<T> > : binary_op_base<const std::complex<T>, const std::complex<T> > {
enum {
Vectorizable = packet_traits<std::complex<T>>::HasDiv
Vectorizable = packet_traits<std::complex<T> >::HasDiv
};
typedef typename std::complex<T> result_type;

View File

@@ -57,7 +57,7 @@ struct __half_raw {
explicit EIGEN_DEVICE_FUNC __half_raw(unsigned short raw) : x(raw) {}
unsigned short x;
};
#elif defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER < 90000
#elif EIGEN_CUDA_SDK_VER < 90000
// In CUDA < 9.0, __half is the equivalent of CUDA 9's __half_raw
typedef __half __half_raw;
#endif
@@ -70,7 +70,7 @@ struct half_base : public __half_raw {
EIGEN_DEVICE_FUNC half_base() {}
EIGEN_DEVICE_FUNC half_base(const half_base& h) : __half_raw(h) {}
EIGEN_DEVICE_FUNC half_base(const __half_raw& h) : __half_raw(h) {}
#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER >= 90000
#if defined(EIGEN_HAS_CUDA_FP16) && EIGEN_CUDA_SDK_VER >= 90000
EIGEN_DEVICE_FUNC half_base(const __half& h) : __half_raw(*(__half_raw*)&h) {}
#endif
};
@@ -79,7 +79,7 @@ struct half_base : public __half_raw {
// Class definition.
struct half : public half_impl::half_base {
#if !defined(EIGEN_HAS_CUDA_FP16) || (defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER < 90000)
#if !defined(EIGEN_HAS_CUDA_FP16) || (EIGEN_CUDA_SDK_VER < 90000)
typedef half_impl::__half_raw __half_raw;
#endif
@@ -87,7 +87,7 @@ struct half : public half_impl::half_base {
EIGEN_DEVICE_FUNC half(const __half_raw& h) : half_impl::half_base(h) {}
EIGEN_DEVICE_FUNC half(const half& h) : half_impl::half_base(h) {}
#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER >= 90000
#if defined(EIGEN_HAS_CUDA_FP16) && EIGEN_CUDA_SDK_VER >= 90000
EIGEN_DEVICE_FUNC half(const __half& h) : half_impl::half_base(h) {}
#endif
@@ -209,56 +209,56 @@ namespace half_impl {
// versions to get the ALU speed increased), but you do save the
// conversion steps back and forth.
EIGEN_STRONG_INLINE __device__ half operator + (const half& a, const half& b) {
return __hadd(a, b);
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator + (const half& a, const half& b) {
return __hadd(static_cast<__half>(a), static_cast<__half>(b));
}
EIGEN_STRONG_INLINE __device__ half operator * (const half& a, const half& b) {
return __hmul(a, b);
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator * (const half& a, const half& b) {
return __hmul(static_cast<__half>(a), static_cast<__half>(b));
}
EIGEN_STRONG_INLINE __device__ half operator - (const half& a, const half& b) {
return __hsub(a, b);
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator - (const half& a, const half& b) {
return __hsub(static_cast<__half>(a), static_cast<__half>(b));
}
EIGEN_STRONG_INLINE __device__ half operator / (const half& a, const half& b) {
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator / (const half& a, const half& b) {
float num = __half2float(a);
float denom = __half2float(b);
return __float2half(num / denom);
}
EIGEN_STRONG_INLINE __device__ half operator - (const half& a) {
return __hneg(a);
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator - (const half& a) {
return __hneg(static_cast<__half>(a));
}
EIGEN_STRONG_INLINE __device__ half& operator += (half& a, const half& b) {
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator += (half& a, const half& b) {
a = a + b;
return a;
}
EIGEN_STRONG_INLINE __device__ half& operator *= (half& a, const half& b) {
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator *= (half& a, const half& b) {
a = a * b;
return a;
}
EIGEN_STRONG_INLINE __device__ half& operator -= (half& a, const half& b) {
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator -= (half& a, const half& b) {
a = a - b;
return a;
}
EIGEN_STRONG_INLINE __device__ half& operator /= (half& a, const half& b) {
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator /= (half& a, const half& b) {
a = a / b;
return a;
}
EIGEN_STRONG_INLINE __device__ bool operator == (const half& a, const half& b) {
return __heq(a, b);
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator == (const half& a, const half& b) {
return __heq(static_cast<__half>(a), static_cast<__half>(b));
}
EIGEN_STRONG_INLINE __device__ bool operator != (const half& a, const half& b) {
return __hne(a, b);
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator != (const half& a, const half& b) {
return __hne(static_cast<__half>(a), static_cast<__half>(b));
}
EIGEN_STRONG_INLINE __device__ bool operator < (const half& a, const half& b) {
return __hlt(a, b);
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator < (const half& a, const half& b) {
return __hlt(static_cast<__half>(a), static_cast<__half>(b));
}
EIGEN_STRONG_INLINE __device__ bool operator <= (const half& a, const half& b) {
return __hle(a, b);
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator <= (const half& a, const half& b) {
return __hle(static_cast<__half>(a), static_cast<__half>(b));
}
EIGEN_STRONG_INLINE __device__ bool operator > (const half& a, const half& b) {
return __hgt(a, b);
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator > (const half& a, const half& b) {
return __hgt(static_cast<__half>(a), static_cast<__half>(b));
}
EIGEN_STRONG_INLINE __device__ bool operator >= (const half& a, const half& b) {
return __hge(a, b);
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator >= (const half& a, const half& b) {
return __hge(static_cast<__half>(a), static_cast<__half>(b));
}
#else // Emulate support for half floats
@@ -449,14 +449,14 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half abs(const half& a) {
return result;
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half exp(const half& a) {
#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
#if EIGEN_CUDA_SDK_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
return half(hexp(a));
#else
return half(::expf(float(a)));
#endif
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log(const half& a) {
#if defined(EIGEN_HAS_CUDA_FP16) && EIGEN_CUDACC_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
#if defined(EIGEN_HAS_CUDA_FP16) && EIGEN_CUDA_SDK_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
return half(::hlog(a));
#else
return half(::logf(float(a)));
@@ -469,7 +469,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log10(const half& a) {
return half(::log10f(float(a)));
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sqrt(const half& a) {
#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
#if EIGEN_CUDA_SDK_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
return half(hsqrt(a));
#else
return half(::sqrtf(float(a)));
@@ -491,14 +491,14 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tanh(const half& a) {
return half(::tanhf(float(a)));
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half floor(const half& a) {
#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300
#if EIGEN_CUDA_SDK_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300
return half(hfloor(a));
#else
return half(::floorf(float(a)));
#endif
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half ceil(const half& a) {
#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300
#if EIGEN_CUDA_SDK_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300
return half(hceil(a));
#else
return half(::ceilf(float(a)));
@@ -541,7 +541,7 @@ struct random_default_impl<half, false, false>
{
static inline half run(const half& x, const half& y)
{
return x + (y-x) * half(float(std::rand()) / float(RAND_MAX));
return x + (y-x) * half(random<float>());
}
static inline half run()
{
@@ -593,7 +593,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half exph(const Eigen::half& a) {
return Eigen::half(::expf(float(a)));
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half logh(const Eigen::half& a) {
#if EIGEN_CUDACC_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
#if EIGEN_CUDA_SDK_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
return Eigen::half(::hlog(a));
#else
return Eigen::half(::logf(float(a)));
@@ -626,25 +626,71 @@ struct hash<Eigen::half> {
} // end namespace std
// Add the missing shfl_xor intrinsic
#if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300
__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_xor(Eigen::half var, int laneMask, int width=warpSize) {
#if EIGEN_CUDACC_VER < 90000
return static_cast<Eigen::half>(__shfl_xor(static_cast<float>(var), laneMask, width));
#else
return static_cast<Eigen::half>(__shfl_xor_sync(0xFFFFFFFF, static_cast<float>(var), laneMask, width));
#endif
// Add the missing shfl* intrinsics.
// The __shfl* functions are only valid on HIP or _CUDA_ARCH_ >= 300.
// CUDA defines them for (__CUDA_ARCH__ >= 300 || !defined(__CUDA_ARCH__))
//
// HIP and CUDA prior to SDK 9.0 define
// __shfl, __shfl_up, __shfl_down, __shfl_xor for int and float
// CUDA since 9.0 deprecates those and instead defines
// __shfl_sync, __shfl_up_sync, __shfl_down_sync, __shfl_xor_sync,
// with native support for __half and __nv_bfloat16
//
// Note that the following are __device__ - only functions.
#if defined(EIGEN_CUDACC) && (!defined(EIGEN_CUDA_ARCH) || EIGEN_CUDA_ARCH >= 300)
#if defined(EIGEN_HAS_CUDA_FP16) && EIGEN_CUDA_SDK_VER >= 90000
__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_sync(unsigned mask, Eigen::half var, int srcLane,
int width = warpSize) {
const __half h = var;
return static_cast<Eigen::half>(__shfl_sync(mask, h, srcLane, width));
}
__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_up_sync(unsigned mask, Eigen::half var, unsigned int delta,
int width = warpSize) {
const __half h = var;
return static_cast<Eigen::half>(__shfl_up_sync(mask, h, delta, width));
}
__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_down_sync(unsigned mask, Eigen::half var, unsigned int delta,
int width = warpSize) {
const __half h = var;
return static_cast<Eigen::half>(__shfl_down_sync(mask, h, delta, width));
}
__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_xor_sync(unsigned mask, Eigen::half var, int laneMask,
int width = warpSize) {
const __half h = var;
return static_cast<Eigen::half>(__shfl_xor_sync(mask, h, laneMask, width));
}
#else // CUDA SDK < 9.0
__device__ EIGEN_STRONG_INLINE Eigen::half __shfl(Eigen::half var, int srcLane, int width = warpSize) {
return static_cast<Eigen::half>(__shfl(static_cast<float>(var), srcLane, width));
}
__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_up(Eigen::half var, unsigned int delta, int width = warpSize) {
return static_cast<Eigen::half>(__shfl_up(static_cast<float>(var), delta, width));
}
__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_down(Eigen::half var, unsigned int delta, int width = warpSize) {
return static_cast<Eigen::half>(__shfl_down(static_cast<float>(var), delta, width));
}
__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_xor(Eigen::half var, int laneMask, int width = warpSize) {
return static_cast<Eigen::half>(__shfl_xor(static_cast<float>(var), laneMask, width));
}
#endif
#endif // __shfl*
// ldg() has an overload for __half_raw, but we also need one for Eigen::half.
#if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 350
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half __ldg(const Eigen::half* ptr) {
return Eigen::half_impl::raw_uint16_to_half(
__ldg(reinterpret_cast<const unsigned short*>(ptr)));
#if defined(EIGEN_CUDACC) && (!defined(EIGEN_CUDA_ARCH) || EIGEN_CUDA_ARCH >= 350)
EIGEN_STRONG_INLINE __device__ Eigen::half __ldg(const Eigen::half* ptr) {
return Eigen::half_impl::raw_uint16_to_half(__ldg(reinterpret_cast<const Eigen::numext::uint16_t*>(ptr)));
}
#endif
#endif // __ldg
#if defined(EIGEN_CUDA_ARCH)
namespace Eigen {

View File

@@ -17,7 +17,7 @@ namespace internal {
// Make sure this is only available when targeting a GPU: we don't want to
// introduce conflicts between these packet_traits definitions and the ones
// we'll use on the host side (SSE, AVX, ...)
#if defined(__CUDACC__) && defined(EIGEN_USE_GPU)
#if defined(EIGEN_CUDACC) && defined(EIGEN_USE_GPU)
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
float4 plog<float4>(const float4& a)
{

View File

@@ -17,7 +17,7 @@ namespace internal {
// Make sure this is only available when targeting a GPU: we don't want to
// introduce conflicts between these packet_traits definitions and the ones
// we'll use on the host side (SSE, AVX, ...)
#if defined(__CUDACC__) && defined(EIGEN_USE_GPU)
#if defined(EIGEN_CUDACC) && defined(EIGEN_USE_GPU)
template<> struct is_arithmetic<float4> { enum { value = true }; };
template<> struct is_arithmetic<double2> { enum { value = true }; };
@@ -167,10 +167,10 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 ploadu<double2>(const d
return make_double2(from[0], from[1]);
}
template<> EIGEN_STRONG_INLINE float4 ploaddup<float4>(const float* from) {
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 ploaddup<float4>(const float* from) {
return make_float4(from[0], from[0], from[1], from[1]);
}
template<> EIGEN_STRONG_INLINE double2 ploaddup<double2>(const double* from) {
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 ploaddup<double2>(const double* from) {
return make_double2(from[0], from[0]);
}
@@ -197,7 +197,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<double>(double* to
template<>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Aligned>(const float* from) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
return __ldg((const float4*)from);
return __ldg(reinterpret_cast<const float4*>(from));
#else
return make_float4(from[0], from[1], from[2], from[3]);
#endif
@@ -205,7 +205,7 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Aligned>(const fl
template<>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Aligned>(const double* from) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
return __ldg((const double2*)from);
return __ldg(reinterpret_cast<const double2*>(from));
#else
return make_double2(from[0], from[1]);
#endif

View File

@@ -15,7 +15,7 @@ namespace Eigen {
namespace internal {
// Most of the following operations require arch >= 3.0
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDACC__) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDACC) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
template<> struct is_arithmetic<half2> { enum { value = true }; };
@@ -41,42 +41,42 @@ template<> struct packet_traits<Eigen::half> : default_packet_traits
template<> struct unpacket_traits<half2> { typedef Eigen::half type; enum {size=2, alignment=Aligned16}; typedef half2 half; };
template<> __device__ EIGEN_STRONG_INLINE half2 pset1<half2>(const Eigen::half& from) {
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pset1<half2>(const Eigen::half& from) {
return __half2half2(from);
}
template<> __device__ EIGEN_STRONG_INLINE half2 pload<half2>(const Eigen::half* from) {
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pload<half2>(const Eigen::half* from) {
return *reinterpret_cast<const half2*>(from);
}
template<> __device__ EIGEN_STRONG_INLINE half2 ploadu<half2>(const Eigen::half* from) {
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 ploadu<half2>(const Eigen::half* from) {
return __halves2half2(from[0], from[1]);
}
template<> EIGEN_STRONG_INLINE half2 ploaddup<half2>(const Eigen::half* from) {
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 ploaddup<half2>(const Eigen::half* from) {
return __halves2half2(from[0], from[0]);
}
template<> __device__ EIGEN_STRONG_INLINE void pstore<Eigen::half>(Eigen::half* to, const half2& from) {
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<Eigen::half>(Eigen::half* to, const half2& from) {
*reinterpret_cast<half2*>(to) = from;
}
template<> __device__ EIGEN_STRONG_INLINE void pstoreu<Eigen::half>(Eigen::half* to, const half2& from) {
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<Eigen::half>(Eigen::half* to, const half2& from) {
to[0] = __low2half(from);
to[1] = __high2half(from);
}
template<>
__device__ EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Aligned>(const Eigen::half* from) {
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Aligned>(const Eigen::half* from) {
#if __CUDA_ARCH__ >= 350
return __ldg((const half2*)from);
return __ldg(reinterpret_cast<const half2*>(from));
#else
return __halves2half2(*(from+0), *(from+1));
#endif
}
template<>
__device__ EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Unaligned>(const Eigen::half* from) {
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Unaligned>(const Eigen::half* from) {
#if __CUDA_ARCH__ >= 350
return __halves2half2(__ldg(from+0), __ldg(from+1));
#else
@@ -84,20 +84,20 @@ __device__ EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Unaligned>(const Eigen::ha
#endif
}
template<> __device__ EIGEN_STRONG_INLINE half2 pgather<Eigen::half, half2>(const Eigen::half* from, Index stride) {
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pgather<Eigen::half, half2>(const Eigen::half* from, Index stride) {
return __halves2half2(from[0*stride], from[1*stride]);
}
template<> __device__ EIGEN_STRONG_INLINE void pscatter<Eigen::half, half2>(Eigen::half* to, const half2& from, Index stride) {
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<Eigen::half, half2>(Eigen::half* to, const half2& from, Index stride) {
to[stride*0] = __low2half(from);
to[stride*1] = __high2half(from);
}
template<> __device__ EIGEN_STRONG_INLINE Eigen::half pfirst<half2>(const half2& a) {
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half pfirst<half2>(const half2& a) {
return __low2half(a);
}
template<> __device__ EIGEN_STRONG_INLINE half2 pabs<half2>(const half2& a) {
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pabs<half2>(const half2& a) {
half2 result;
unsigned temp = *(reinterpret_cast<const unsigned*>(&(a)));
*(reinterpret_cast<unsigned*>(&(result))) = temp & 0x7FFF7FFF;
@@ -105,7 +105,7 @@ template<> __device__ EIGEN_STRONG_INLINE half2 pabs<half2>(const half2& a) {
}
__device__ EIGEN_STRONG_INLINE void
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void
ptranspose(PacketBlock<half2,2>& kernel) {
__half a1 = __low2half(kernel.packet[0]);
__half a2 = __high2half(kernel.packet[0]);
@@ -115,7 +115,7 @@ ptranspose(PacketBlock<half2,2>& kernel) {
kernel.packet[1] = __halves2half2(a2, b2);
}
template<> __device__ EIGEN_STRONG_INLINE half2 plset<half2>(const Eigen::half& a) {
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plset<half2>(const Eigen::half& a) {
#if __CUDA_ARCH__ >= 530
return __halves2half2(a, __hadd(a, __float2half(1.0f)));
#else
@@ -124,7 +124,7 @@ template<> __device__ EIGEN_STRONG_INLINE half2 plset<half2>(const Eigen::half&
#endif
}
template<> __device__ EIGEN_STRONG_INLINE half2 padd<half2>(const half2& a, const half2& b) {
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 padd<half2>(const half2& a, const half2& b) {
#if __CUDA_ARCH__ >= 530
return __hadd2(a, b);
#else
@@ -138,7 +138,7 @@ template<> __device__ EIGEN_STRONG_INLINE half2 padd<half2>(const half2& a, cons
#endif
}
template<> __device__ EIGEN_STRONG_INLINE half2 psub<half2>(const half2& a, const half2& b) {
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 psub<half2>(const half2& a, const half2& b) {
#if __CUDA_ARCH__ >= 530
return __hsub2(a, b);
#else
@@ -152,7 +152,7 @@ template<> __device__ EIGEN_STRONG_INLINE half2 psub<half2>(const half2& a, cons
#endif
}
template<> __device__ EIGEN_STRONG_INLINE half2 pnegate(const half2& a) {
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pnegate(const half2& a) {
#if __CUDA_ARCH__ >= 530
return __hneg2(a);
#else
@@ -162,9 +162,9 @@ template<> __device__ EIGEN_STRONG_INLINE half2 pnegate(const half2& a) {
#endif
}
template<> __device__ EIGEN_STRONG_INLINE half2 pconj(const half2& a) { return a; }
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pconj(const half2& a) { return a; }
template<> __device__ EIGEN_STRONG_INLINE half2 pmul<half2>(const half2& a, const half2& b) {
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmul<half2>(const half2& a, const half2& b) {
#if __CUDA_ARCH__ >= 530
return __hmul2(a, b);
#else
@@ -178,7 +178,7 @@ template<> __device__ EIGEN_STRONG_INLINE half2 pmul<half2>(const half2& a, cons
#endif
}
template<> __device__ EIGEN_STRONG_INLINE half2 pmadd<half2>(const half2& a, const half2& b, const half2& c) {
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmadd<half2>(const half2& a, const half2& b, const half2& c) {
#if __CUDA_ARCH__ >= 530
return __hfma2(a, b, c);
#else
@@ -194,7 +194,7 @@ template<> __device__ EIGEN_STRONG_INLINE half2 pmadd<half2>(const half2& a, con
#endif
}
template<> __device__ EIGEN_STRONG_INLINE half2 pdiv<half2>(const half2& a, const half2& b) {
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pdiv<half2>(const half2& a, const half2& b) {
float a1 = __low2float(a);
float a2 = __high2float(a);
float b1 = __low2float(b);
@@ -204,7 +204,7 @@ template<> __device__ EIGEN_STRONG_INLINE half2 pdiv<half2>(const half2& a, cons
return __floats2half2_rn(r1, r2);
}
template<> __device__ EIGEN_STRONG_INLINE half2 pmin<half2>(const half2& a, const half2& b) {
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmin<half2>(const half2& a, const half2& b) {
float a1 = __low2float(a);
float a2 = __high2float(a);
float b1 = __low2float(b);
@@ -214,7 +214,7 @@ template<> __device__ EIGEN_STRONG_INLINE half2 pmin<half2>(const half2& a, cons
return __halves2half2(r1, r2);
}
template<> __device__ EIGEN_STRONG_INLINE half2 pmax<half2>(const half2& a, const half2& b) {
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmax<half2>(const half2& a, const half2& b) {
float a1 = __low2float(a);
float a2 = __high2float(a);
float b1 = __low2float(b);
@@ -224,7 +224,7 @@ template<> __device__ EIGEN_STRONG_INLINE half2 pmax<half2>(const half2& a, cons
return __halves2half2(r1, r2);
}
template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux<half2>(const half2& a) {
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux<half2>(const half2& a) {
#if __CUDA_ARCH__ >= 530
return __hadd(__low2half(a), __high2half(a));
#else
@@ -234,7 +234,7 @@ template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux<half2>(const half2&
#endif
}
template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_max<half2>(const half2& a) {
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_max<half2>(const half2& a) {
#if __CUDA_ARCH__ >= 530
__half first = __low2half(a);
__half second = __high2half(a);
@@ -246,7 +246,7 @@ template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_max<half2>(const ha
#endif
}
template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_min<half2>(const half2& a) {
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_min<half2>(const half2& a) {
#if __CUDA_ARCH__ >= 530
__half first = __low2half(a);
__half second = __high2half(a);
@@ -258,7 +258,7 @@ template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_min<half2>(const ha
#endif
}
template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_mul<half2>(const half2& a) {
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_mul<half2>(const half2& a) {
#if __CUDA_ARCH__ >= 530
return __hmul(__low2half(a), __high2half(a));
#else
@@ -268,7 +268,7 @@ template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_mul<half2>(const ha
#endif
}
template<> __device__ EIGEN_STRONG_INLINE half2 plog1p<half2>(const half2& a) {
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plog1p<half2>(const half2& a) {
float a1 = __low2float(a);
float a2 = __high2float(a);
float r1 = log1pf(a1);
@@ -276,31 +276,31 @@ template<> __device__ EIGEN_STRONG_INLINE half2 plog1p<half2>(const half2& a) {
return __floats2half2_rn(r1, r2);
}
#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
#if EIGEN_CUDA_SDK_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
template<> __device__ EIGEN_STRONG_INLINE
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
half2 plog<half2>(const half2& a) {
return h2log(a);
}
template<> __device__ EIGEN_STRONG_INLINE
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
half2 pexp<half2>(const half2& a) {
return h2exp(a);
}
template<> __device__ EIGEN_STRONG_INLINE
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
half2 psqrt<half2>(const half2& a) {
return h2sqrt(a);
}
template<> __device__ EIGEN_STRONG_INLINE
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
half2 prsqrt<half2>(const half2& a) {
return h2rsqrt(a);
}
#else
template<> __device__ EIGEN_STRONG_INLINE half2 plog<half2>(const half2& a) {
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plog<half2>(const half2& a) {
float a1 = __low2float(a);
float a2 = __high2float(a);
float r1 = logf(a1);
@@ -308,7 +308,7 @@ template<> __device__ EIGEN_STRONG_INLINE half2 plog<half2>(const half2& a) {
return __floats2half2_rn(r1, r2);
}
template<> __device__ EIGEN_STRONG_INLINE half2 pexp<half2>(const half2& a) {
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pexp<half2>(const half2& a) {
float a1 = __low2float(a);
float a2 = __high2float(a);
float r1 = expf(a1);
@@ -316,7 +316,7 @@ template<> __device__ EIGEN_STRONG_INLINE half2 pexp<half2>(const half2& a) {
return __floats2half2_rn(r1, r2);
}
template<> __device__ EIGEN_STRONG_INLINE half2 psqrt<half2>(const half2& a) {
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 psqrt<half2>(const half2& a) {
float a1 = __low2float(a);
float a2 = __high2float(a);
float r1 = sqrtf(a1);
@@ -324,7 +324,7 @@ template<> __device__ EIGEN_STRONG_INLINE half2 psqrt<half2>(const half2& a) {
return __floats2half2_rn(r1, r2);
}
template<> __device__ EIGEN_STRONG_INLINE half2 prsqrt<half2>(const half2& a) {
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 prsqrt<half2>(const half2& a) {
float a1 = __low2float(a);
float a2 = __high2float(a);
float r1 = rsqrtf(a1);

File diff suppressed because it is too large Load Diff

View File

@@ -32,7 +32,7 @@ namespace internal {
#if EIGEN_ARCH_ARM64
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
#else
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 16
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 16
#endif
#endif
@@ -107,7 +107,7 @@ template<> struct packet_traits<float> : default_packet_traits
AlignedOnScalar = 1,
size = 4,
HasHalfPacket=0, // Packet2f intrinsics not implemented yet
HasDiv = 1,
// FIXME check the Has*
HasSin = 0,
@@ -173,32 +173,48 @@ template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return vmulq_f32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { return vmulq_s32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b)
{
#if EIGEN_ARCH_ARM64
return vdivq_f32(a,b);
#else
Packet4f inv, restep, div;
// NEON does not offer a divide instruction, we have to do a reciprocal approximation
// However NEON in contrast to other SIMD engines (AltiVec/SSE), offers
// a reciprocal estimate AND a reciprocal step -which saves a few instructions
// vrecpeq_f32() returns an estimate to 1/b, which we will finetune with
// Newton-Raphson and vrecpsq_f32()
inv = vrecpeq_f32(b);
// This returns a differential, by which we will have to multiply inv to get a better
// approximation of 1/b.
restep = vrecpsq_f32(b, inv);
inv = vmulq_f32(restep, inv);
// Finally, multiply a by 1/b and get the wanted result of the division.
div = vmulq_f32(a, inv);
return div;
#endif
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4f pselect(const Packet4f& mask, const Packet4f& a, const Packet4f& b) {
return vbslq_f32(vreinterpretq_u32_f32(mask), a, b);
}
EIGEN_STRONG_INLINE Packet4f pcmp_le(const Packet4f& a, const Packet4f& b) {
return vreinterpretq_f32_u32(vcleq_f32(a, b));
}
EIGEN_STRONG_INLINE Packet4f preciprocal(const Packet4f& a)
{
// Compute approximate reciprocal.
float32x4_t result = vrecpeq_f32(a);
result = vmulq_f32(vrecpsq_f32(a, result), result);
result = vmulq_f32(vrecpsq_f32(a, result), result);
return result;
}
#if EIGEN_ARCH_ARM64
template<> EIGEN_STRONG_INLINE Packet4f pdiv(const Packet4f& a, const Packet4f& b) { return vdivq_f32(a, b); }
template<> EIGEN_STRONG_INLINE Packet2f pdiv(const Packet2f& a, const Packet2f& b) { return vdiv_f32(a, b); }
#else
template<typename Packet>
EIGEN_STRONG_INLINE Packet pdiv_float_common(const Packet& a, const Packet& b) {
// if b is large, NEON intrinsics will flush preciprocal(b) to zero
// avoid underflow with the following manipulation:
// a / b = f * (a * reciprocal(f * b))
const Packet cst_one = pset1<Packet>(1.0f);
const Packet cst_quarter = pset1<Packet>(0.25f);
const Packet cst_thresh = pset1<Packet>(NumTraits<float>::highest() / 4.0f);
Packet b_will_underflow = pcmp_le(cst_thresh, pabs(b));
Packet f = pselect(b_will_underflow, cst_quarter, cst_one);
Packet result = pmul(f, pmul(a, preciprocal(pmul(b, f))));
return result;
}
template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) {
return pdiv_float_common(a, b);
}
#endif
template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, const Packet4i& /*b*/)
{ eigen_assert(false && "packet integer division are not supported by NEON");
return pset1<Packet4i>(0);
@@ -208,7 +224,7 @@ template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, co
// then implements a slow software scalar fallback calling fmaf()!
// Filed LLVM bug:
// https://llvm.org/bugs/show_bug.cgi?id=27216
#if (defined __ARM_FEATURE_FMA) && !(EIGEN_COMP_CLANG && EIGEN_ARCH_ARM)
#if (defined EIGEN_VECTORIZE_FMA) && !(EIGEN_COMP_CLANG && EIGEN_ARCH_ARM)
// See bug 936.
// FMA is available on VFPv4 i.e. when compiling with -mfpu=neon-vfpv4.
// FMA is a true fused multiply-add i.e. only 1 rounding at the end, no intermediate rounding.
@@ -478,7 +494,7 @@ template<> EIGEN_STRONG_INLINE int32_t predux_min<Packet4i>(const Packet4i& a)
a_hi = vget_high_s32(a);
min = vpmin_s32(a_lo, a_hi);
min = vpmin_s32(min, min);
return vget_lane_s32(min, 0);
}
@@ -595,7 +611,7 @@ template<> struct packet_traits<double> : default_packet_traits
AlignedOnScalar = 1,
size = 2,
HasHalfPacket=0,
HasDiv = 1,
// FIXME check the Has*
HasSin = 0,
@@ -628,7 +644,7 @@ template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const
template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return vdivq_f64(a,b); }
#ifdef __ARM_FEATURE_FMA
#ifdef EIGEN_VECTORIZE_FMA
// See bug 936. See above comment about FMA for float.
template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vfmaq_f64(c,a,b); }
#else
@@ -751,7 +767,7 @@ ptranspose(PacketBlock<Packet2d,2>& kernel) {
kernel.packet[0] = trn1;
kernel.packet[1] = trn2;
}
#endif // EIGEN_ARCH_ARM64
#endif // EIGEN_ARCH_ARM64
} // end namespace internal

View File

@@ -10,8 +10,6 @@
#ifndef EIGEN_PACKET_MATH_ZVECTOR_H
#define EIGEN_PACKET_MATH_ZVECTOR_H
#include <stdint.h>
namespace Eigen {
namespace internal {
@@ -46,10 +44,10 @@ typedef struct {
} Packet4f;
typedef union {
int32_t i[4];
uint32_t ui[4];
int64_t l[2];
uint64_t ul[2];
numext::int32_t i[4];
numext::uint32_t ui[4];
numext::int64_t l[2];
numext::uint64_t ul[2];
double d[2];
Packet4i v4i;
Packet4ui v4ui;

View File

@@ -10,7 +10,7 @@
#ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H
#define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H
namespace Eigen {
namespace Eigen {
template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjLhs, bool ConjRhs>
struct selfadjoint_rank1_update;
@@ -27,7 +27,7 @@ namespace internal {
// forward declarations (defined at the end of this file)
template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int ResInnerStride, int UpLo>
struct tribb_kernel;
/* Optimized matrix-matrix product evaluating only one triangular half */
template <typename Index,
typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
@@ -164,7 +164,7 @@ struct tribb_kernel
if(UpLo==Upper)
gebp_kernel1(res.getSubMapper(0, j), blockA, actual_b, j, depth, actualBlockSize, alpha,
-1, -1, 0, 0);
// selfadjoint micro block
{
Index i = j;
@@ -186,7 +186,7 @@ struct tribb_kernel
if(UpLo==Lower)
{
Index i = j+actualBlockSize;
gebp_kernel1(res.getSubMapper(i, j), blockA+depth*i, actual_b, size-i,
gebp_kernel1(res.getSubMapper(i, j), blockA+depth*i, actual_b, size-i,
depth, actualBlockSize, alpha, -1, -1, 0, 0);
}
}
@@ -207,13 +207,13 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,true>
static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha, bool beta)
{
typedef typename MatrixType::Scalar Scalar;
typedef typename internal::remove_all<typename ProductType::LhsNested>::type Lhs;
typedef internal::blas_traits<Lhs> LhsBlasTraits;
typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs;
typedef typename internal::remove_all<ActualLhs>::type _ActualLhs;
typename internal::add_const_on_value_type<ActualLhs>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
typedef typename internal::remove_all<typename ProductType::RhsNested>::type Rhs;
typedef internal::blas_traits<Rhs> RhsBlasTraits;
typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs;
@@ -230,18 +230,18 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,true>
UseLhsDirectly = _ActualLhs::InnerStrideAtCompileTime==1,
UseRhsDirectly = _ActualRhs::InnerStrideAtCompileTime==1
};
internal::gemv_static_vector_if<Scalar,Lhs::SizeAtCompileTime,Lhs::MaxSizeAtCompileTime,!UseLhsDirectly> static_lhs;
ei_declare_aligned_stack_constructed_variable(Scalar, actualLhsPtr, actualLhs.size(),
(UseLhsDirectly ? const_cast<Scalar*>(actualLhs.data()) : static_lhs.data()));
if(!UseLhsDirectly) Map<typename _ActualLhs::PlainObject>(actualLhsPtr, actualLhs.size()) = actualLhs;
internal::gemv_static_vector_if<Scalar,Rhs::SizeAtCompileTime,Rhs::MaxSizeAtCompileTime,!UseRhsDirectly> static_rhs;
ei_declare_aligned_stack_constructed_variable(Scalar, actualRhsPtr, actualRhs.size(),
(UseRhsDirectly ? const_cast<Scalar*>(actualRhs.data()) : static_rhs.data()));
if(!UseRhsDirectly) Map<typename _ActualRhs::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
selfadjoint_rank1_update<Scalar,Index,StorageOrder,UpLo,
LhsBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex,
RhsBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex>
@@ -259,7 +259,7 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false>
typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs;
typedef typename internal::remove_all<ActualLhs>::type _ActualLhs;
typename internal::add_const_on_value_type<ActualLhs>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
typedef typename internal::remove_all<typename ProductType::RhsNested>::type Rhs;
typedef internal::blas_traits<Rhs> RhsBlasTraits;
typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs;
@@ -300,15 +300,15 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false>
}
};
template<typename MatrixType, unsigned int UpLo>
template<typename _MatrixType, unsigned int _Mode>
template<typename ProductType>
TriangularView<MatrixType,UpLo>& TriangularViewImpl<MatrixType,UpLo,Dense>::_assignProduct(const ProductType& prod, const Scalar& alpha, bool beta)
EIGEN_DEVICE_FUNC TriangularView<_MatrixType,_Mode>& TriangularViewImpl<_MatrixType,_Mode,Dense>::_assignProduct(const ProductType& prod, const Scalar& alpha, bool beta)
{
EIGEN_STATIC_ASSERT((UpLo&UnitDiag)==0, WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED);
EIGEN_STATIC_ASSERT((_Mode&UnitDiag)==0, WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED);
eigen_assert(derived().nestedExpression().rows() == prod.rows() && derived().cols() == prod.cols());
general_product_to_triangular_selector<MatrixType, ProductType, UpLo, internal::traits<ProductType>::InnerSize==1>::run(derived().nestedExpression().const_cast_derived(), prod, alpha, beta);
general_product_to_triangular_selector<_MatrixType, ProductType, _Mode, internal::traits<ProductType>::InnerSize==1>::run(derived().nestedExpression().const_cast_derived(), prod, alpha, beta);
return derived();
}

View File

@@ -132,8 +132,7 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, Index depth,
ei_declare_aligned_stack_constructed_variable(GemmParallelInfo<Index>,info,threads,0);
int errorCount = 0;
#pragma omp parallel num_threads(threads) reduction(+: errorCount)
#pragma omp parallel num_threads(threads)
{
Index i = omp_get_thread_num();
// Note that the actual number of threads might be lower than the number of request ones.
@@ -152,14 +151,11 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, Index depth,
info[i].lhs_start = r0;
info[i].lhs_length = actualBlockRows;
EIGEN_TRY {
if(transpose) func(c0, actualBlockCols, 0, rows, info);
else func(0, rows, c0, actualBlockCols, info);
} EIGEN_CATCH(...) {
++errorCount;
}
if(transpose)
func(c0, actualBlockCols, 0, rows, info);
else
func(0, rows, c0, actualBlockCols, info);
}
if (errorCount) EIGEN_THROW_X(Eigen::eigen_assert_exception());
#endif
}

View File

@@ -16,7 +16,7 @@
* It corresponds to the level 3 SYRK and level 2 SYR Blas routines.
**********************************************************************/
namespace Eigen {
namespace Eigen {
template<typename Scalar, typename Index, int UpLo, bool ConjLhs, bool ConjRhs>
@@ -68,10 +68,10 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,true>
ei_declare_aligned_stack_constructed_variable(Scalar, actualOtherPtr, other.size(),
(UseOtherDirectly ? const_cast<Scalar*>(actualOther.data()) : static_other.data()));
if(!UseOtherDirectly)
Map<typename _ActualOtherType::PlainObject>(actualOtherPtr, actualOther.size()) = actualOther;
selfadjoint_rank1_update<Scalar,Index,StorageOrder,UpLo,
OtherBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex,
(!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex>
@@ -120,7 +120,7 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false>
template<typename MatrixType, unsigned int UpLo>
template<typename DerivedU>
SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
EIGEN_DEVICE_FUNC SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
::rankUpdate(const MatrixBase<DerivedU>& u, const Scalar& alpha)
{
selfadjoint_product_selector<MatrixType,DerivedU,UpLo>::run(_expression().const_cast_derived(), u.derived(), alpha);

View File

@@ -10,7 +10,7 @@
#ifndef EIGEN_SELFADJOINTRANK2UPTADE_H
#define EIGEN_SELFADJOINTRANK2UPTADE_H
namespace Eigen {
namespace Eigen {
namespace internal {
@@ -57,7 +57,7 @@ template<bool Cond, typename T> struct conj_expr_if
template<typename MatrixType, unsigned int UpLo>
template<typename DerivedU, typename DerivedV>
SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
EIGEN_DEVICE_FUNC SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
::rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, const Scalar& alpha)
{
typedef internal::blas_traits<DerivedU> UBlasTraits;

View File

@@ -0,0 +1,521 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2008-2018 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2020, Arm Limited and Contributors
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_CONFIGURE_VECTORIZATION_H
#define EIGEN_CONFIGURE_VECTORIZATION_H
//------------------------------------------------------------------------------------------
// Static and dynamic alignment control
//
// The main purpose of this section is to define EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES
// as the maximal boundary in bytes on which dynamically and statically allocated data may be alignment respectively.
// The values of EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES can be specified by the user. If not,
// a default value is automatically computed based on architecture, compiler, and OS.
//
// This section also defines macros EIGEN_ALIGN_TO_BOUNDARY(N) and the shortcuts EIGEN_ALIGN{8,16,32,_MAX}
// to be used to declare statically aligned buffers.
//------------------------------------------------------------------------------------------
/* EIGEN_ALIGN_TO_BOUNDARY(n) forces data to be n-byte aligned. This is used to satisfy SIMD requirements.
* However, we do that EVEN if vectorization (EIGEN_VECTORIZE) is disabled,
* so that vectorization doesn't affect binary compatibility.
*
* If we made alignment depend on whether or not EIGEN_VECTORIZE is defined, it would be impossible to link
* vectorized and non-vectorized code.
*
* FIXME: this code can be cleaned up once we switch to proper C++11 only.
*/
#if (defined EIGEN_CUDACC)
#define EIGEN_ALIGN_TO_BOUNDARY(n) __align__(n)
#define EIGEN_ALIGNOF(x) __alignof(x)
#elif EIGEN_HAS_ALIGNAS
#define EIGEN_ALIGN_TO_BOUNDARY(n) alignas(n)
#define EIGEN_ALIGNOF(x) alignof(x)
#elif EIGEN_COMP_GNUC || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM
#define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
#define EIGEN_ALIGNOF(x) __alignof(x)
#elif EIGEN_COMP_MSVC
#define EIGEN_ALIGN_TO_BOUNDARY(n) __declspec(align(n))
#define EIGEN_ALIGNOF(x) __alignof(x)
#elif EIGEN_COMP_SUNCC
// FIXME not sure about this one:
#define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
#define EIGEN_ALIGNOF(x) __alignof(x)
#else
#error Please tell me what is the equivalent of alignas(n) and alignof(x) for your compiler
#endif
// If the user explicitly disable vectorization, then we also disable alignment
#if defined(EIGEN_DONT_VECTORIZE)
#if defined(EIGEN_GPUCC)
// GPU code is always vectorized and requires memory alignment for
// statically allocated buffers.
#define EIGEN_IDEAL_MAX_ALIGN_BYTES 16
#else
#define EIGEN_IDEAL_MAX_ALIGN_BYTES 0
#endif
#elif defined(__AVX512F__)
// 64 bytes static alignment is preferred only if really required
#define EIGEN_IDEAL_MAX_ALIGN_BYTES 64
#elif defined(__AVX__)
// 32 bytes static alignment is preferred only if really required
#define EIGEN_IDEAL_MAX_ALIGN_BYTES 32
#else
#define EIGEN_IDEAL_MAX_ALIGN_BYTES 16
#endif
// EIGEN_MIN_ALIGN_BYTES defines the minimal value for which the notion of explicit alignment makes sense
#define EIGEN_MIN_ALIGN_BYTES 16
// Defined the boundary (in bytes) on which the data needs to be aligned. Note
// that unless EIGEN_ALIGN is defined and not equal to 0, the data may not be
// aligned at all regardless of the value of this #define.
#if (defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)) && defined(EIGEN_MAX_STATIC_ALIGN_BYTES) && EIGEN_MAX_STATIC_ALIGN_BYTES>0
#error EIGEN_MAX_STATIC_ALIGN_BYTES and EIGEN_DONT_ALIGN[_STATICALLY] are both defined with EIGEN_MAX_STATIC_ALIGN_BYTES!=0. Use EIGEN_MAX_STATIC_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN_STATICALLY.
#endif
// EIGEN_DONT_ALIGN_STATICALLY and EIGEN_DONT_ALIGN are deprecated
// They imply EIGEN_MAX_STATIC_ALIGN_BYTES=0
#if defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)
#ifdef EIGEN_MAX_STATIC_ALIGN_BYTES
#undef EIGEN_MAX_STATIC_ALIGN_BYTES
#endif
#define EIGEN_MAX_STATIC_ALIGN_BYTES 0
#endif
#ifndef EIGEN_MAX_STATIC_ALIGN_BYTES
// Try to automatically guess what is the best default value for EIGEN_MAX_STATIC_ALIGN_BYTES
// 16 byte alignment is only useful for vectorization. Since it affects the ABI, we need to enable
// 16 byte alignment on all platforms where vectorization might be enabled. In theory we could always
// enable alignment, but it can be a cause of problems on some platforms, so we just disable it in
// certain common platform (compiler+architecture combinations) to avoid these problems.
// Only static alignment is really problematic (relies on nonstandard compiler extensions),
// try to keep heap alignment even when we have to disable static alignment.
#if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64 || EIGEN_ARCH_MIPS)
#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
#elif EIGEN_ARCH_ARM_OR_ARM64 && EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_MOST(4, 6)
// Old versions of GCC on ARM, at least 4.4, were once seen to have buggy static alignment support.
// Not sure which version fixed it, hopefully it doesn't affect 4.7, which is still somewhat in use.
// 4.8 and newer seem definitely unaffected.
#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
#else
#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0
#endif
// static alignment is completely disabled with GCC 3, Sun Studio, and QCC/QNX
#if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT \
&& !EIGEN_GCC3_OR_OLDER \
&& !EIGEN_COMP_SUNCC \
&& !EIGEN_OS_QNX
#define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1
#else
#define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0
#endif
#if EIGEN_ARCH_WANTS_STACK_ALIGNMENT
#define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
#else
#define EIGEN_MAX_STATIC_ALIGN_BYTES 0
#endif
#endif
// If EIGEN_MAX_ALIGN_BYTES is defined, then it is considered as an upper bound for EIGEN_MAX_STATIC_ALIGN_BYTES
#if defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES<EIGEN_MAX_STATIC_ALIGN_BYTES
#undef EIGEN_MAX_STATIC_ALIGN_BYTES
#define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
#endif
#if EIGEN_MAX_STATIC_ALIGN_BYTES==0 && !defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT)
#define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
#endif
// At this stage, EIGEN_MAX_STATIC_ALIGN_BYTES>0 is the true test whether we want to align arrays on the stack or not.
// It takes into account both the user choice to explicitly enable/disable alignment (by setting EIGEN_MAX_STATIC_ALIGN_BYTES)
// and the architecture config (EIGEN_ARCH_WANTS_STACK_ALIGNMENT).
// Henceforth, only EIGEN_MAX_STATIC_ALIGN_BYTES should be used.
// Shortcuts to EIGEN_ALIGN_TO_BOUNDARY
#define EIGEN_ALIGN8 EIGEN_ALIGN_TO_BOUNDARY(8)
#define EIGEN_ALIGN16 EIGEN_ALIGN_TO_BOUNDARY(16)
#define EIGEN_ALIGN32 EIGEN_ALIGN_TO_BOUNDARY(32)
#define EIGEN_ALIGN64 EIGEN_ALIGN_TO_BOUNDARY(64)
#if EIGEN_MAX_STATIC_ALIGN_BYTES>0
#define EIGEN_ALIGN_MAX EIGEN_ALIGN_TO_BOUNDARY(EIGEN_MAX_STATIC_ALIGN_BYTES)
#else
#define EIGEN_ALIGN_MAX
#endif
// Dynamic alignment control
#if defined(EIGEN_DONT_ALIGN) && defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES>0
#error EIGEN_MAX_ALIGN_BYTES and EIGEN_DONT_ALIGN are both defined with EIGEN_MAX_ALIGN_BYTES!=0. Use EIGEN_MAX_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN.
#endif
#ifdef EIGEN_DONT_ALIGN
#ifdef EIGEN_MAX_ALIGN_BYTES
#undef EIGEN_MAX_ALIGN_BYTES
#endif
#define EIGEN_MAX_ALIGN_BYTES 0
#elif !defined(EIGEN_MAX_ALIGN_BYTES)
#define EIGEN_MAX_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
#endif
#if EIGEN_IDEAL_MAX_ALIGN_BYTES > EIGEN_MAX_ALIGN_BYTES
#define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
#else
#define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
#endif
#ifndef EIGEN_UNALIGNED_VECTORIZE
#define EIGEN_UNALIGNED_VECTORIZE 1
#endif
//----------------------------------------------------------------------
// if alignment is disabled, then disable vectorization. Note: EIGEN_MAX_ALIGN_BYTES is the proper check, it takes into
// account both the user's will (EIGEN_MAX_ALIGN_BYTES,EIGEN_DONT_ALIGN) and our own platform checks
#if EIGEN_MAX_ALIGN_BYTES==0
#ifndef EIGEN_DONT_VECTORIZE
#define EIGEN_DONT_VECTORIZE
#endif
#endif
// The following (except #include <malloc.h> and _M_IX86_FP ??) can likely be
// removed as gcc 4.1 and msvc 2008 are not supported anyways.
#if EIGEN_COMP_MSVC
#include <malloc.h> // for _aligned_malloc -- need it regardless of whether vectorization is enabled
#if (EIGEN_COMP_MSVC >= 1500) // 2008 or later
// a user reported that in 64-bit mode, MSVC doesn't care to define _M_IX86_FP.
#if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || EIGEN_ARCH_x86_64
#define EIGEN_SSE2_ON_MSVC_2008_OR_LATER
#endif
#endif
#else
#if (defined __SSE2__) && ( (!EIGEN_COMP_GNUC) || EIGEN_COMP_ICC || EIGEN_GNUC_AT_LEAST(4,2) )
#define EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC
#endif
#endif
#if !(defined(EIGEN_DONT_VECTORIZE) || defined(EIGEN_GPUCC))
#if defined (EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)
// Defines symbols for compile-time detection of which instructions are
// used.
// EIGEN_VECTORIZE_YY is defined if and only if the instruction set YY is used
#define EIGEN_VECTORIZE
#define EIGEN_VECTORIZE_SSE
#define EIGEN_VECTORIZE_SSE2
// Detect sse3/ssse3/sse4:
// gcc and icc defines __SSE3__, ...
// there is no way to know about this on msvc. You can define EIGEN_VECTORIZE_SSE* if you
// want to force the use of those instructions with msvc.
#ifdef __SSE3__
#define EIGEN_VECTORIZE_SSE3
#endif
#ifdef __SSSE3__
#define EIGEN_VECTORIZE_SSSE3
#endif
#ifdef __SSE4_1__
#define EIGEN_VECTORIZE_SSE4_1
#endif
#ifdef __SSE4_2__
#define EIGEN_VECTORIZE_SSE4_2
#endif
#ifdef __AVX__
#ifndef EIGEN_USE_SYCL
#define EIGEN_VECTORIZE_AVX
#endif
#define EIGEN_VECTORIZE_SSE3
#define EIGEN_VECTORIZE_SSSE3
#define EIGEN_VECTORIZE_SSE4_1
#define EIGEN_VECTORIZE_SSE4_2
#endif
#ifdef __AVX2__
#ifndef EIGEN_USE_SYCL
#define EIGEN_VECTORIZE_AVX2
#define EIGEN_VECTORIZE_AVX
#endif
#define EIGEN_VECTORIZE_SSE3
#define EIGEN_VECTORIZE_SSSE3
#define EIGEN_VECTORIZE_SSE4_1
#define EIGEN_VECTORIZE_SSE4_2
#endif
#if defined(__FMA__) || (EIGEN_COMP_MSVC && defined(__AVX2__))
// MSVC does not expose a switch dedicated for FMA
// For MSVC, AVX2 => FMA
#define EIGEN_VECTORIZE_FMA
#endif
#if defined(__AVX512F__)
#ifndef EIGEN_VECTORIZE_FMA
#if EIGEN_COMP_GNUC
#error Please add -mfma to your compiler flags: compiling with -mavx512f alone without SSE/AVX FMA is not supported (bug 1638).
#else
#error Please enable FMA in your compiler flags (e.g. -mfma): compiling with AVX512 alone without SSE/AVX FMA is not supported (bug 1638).
#endif
#endif
#ifndef EIGEN_USE_SYCL
#define EIGEN_VECTORIZE_AVX512
#define EIGEN_VECTORIZE_AVX2
#define EIGEN_VECTORIZE_AVX
#endif
#define EIGEN_VECTORIZE_FMA
#define EIGEN_VECTORIZE_SSE3
#define EIGEN_VECTORIZE_SSSE3
#define EIGEN_VECTORIZE_SSE4_1
#define EIGEN_VECTORIZE_SSE4_2
#ifndef EIGEN_USE_SYCL
#ifdef __AVX512DQ__
#define EIGEN_VECTORIZE_AVX512DQ
#endif
#ifdef __AVX512ER__
#define EIGEN_VECTORIZE_AVX512ER
#endif
#ifdef __AVX512BF16__
#define EIGEN_VECTORIZE_AVX512BF16
#endif
#endif
#endif
// Disable AVX support on broken xcode versions
#if defined(__apple_build_version__) && (__apple_build_version__ == 11000033 ) && ( __MAC_OS_X_VERSION_MIN_REQUIRED == 101500 )
// A nasty bug in the clang compiler shipped with xcode in a common compilation situation
// when XCode 11.0 and Mac deployment target macOS 10.15 is https://trac.macports.org/ticket/58776#no1
#ifdef EIGEN_VECTORIZE_AVX
#undef EIGEN_VECTORIZE_AVX
#warning "Disabling AVX support: clang compiler shipped with XCode 11.[012] generates broken assembly with -macosx-version-min=10.15 and AVX enabled. "
#ifdef EIGEN_VECTORIZE_AVX2
#undef EIGEN_VECTORIZE_AVX2
#endif
#ifdef EIGEN_VECTORIZE_FMA
#undef EIGEN_VECTORIZE_FMA
#endif
#ifdef EIGEN_VECTORIZE_AVX512
#undef EIGEN_VECTORIZE_AVX512
#endif
#ifdef EIGEN_VECTORIZE_AVX512DQ
#undef EIGEN_VECTORIZE_AVX512DQ
#endif
#ifdef EIGEN_VECTORIZE_AVX512ER
#undef EIGEN_VECTORIZE_AVX512ER
#endif
#endif
// NOTE: Confirmed test failures in XCode 11.0, and XCode 11.2 with -macosx-version-min=10.15 and AVX
// NOTE using -macosx-version-min=10.15 with Xcode 11.0 results in runtime segmentation faults in many tests, 11.2 produce core dumps in 3 tests
// NOTE using -macosx-version-min=10.14 produces functioning and passing tests in all cases
// NOTE __clang_version__ "11.0.0 (clang-1100.0.33.8)" XCode 11.0 <- Produces many segfault and core dumping tests
// with -macosx-version-min=10.15 and AVX
// NOTE __clang_version__ "11.0.0 (clang-1100.0.33.12)" XCode 11.2 <- Produces 3 core dumping tests with
// -macosx-version-min=10.15 and AVX
#endif
// include files
// This extern "C" works around a MINGW-w64 compilation issue
// https://sourceforge.net/tracker/index.php?func=detail&aid=3018394&group_id=202880&atid=983354
// In essence, intrin.h is included by windows.h and also declares intrinsics (just as emmintrin.h etc. below do).
// However, intrin.h uses an extern "C" declaration, and g++ thus complains of duplicate declarations
// with conflicting linkage. The linkage for intrinsics doesn't matter, but at that stage the compiler doesn't know;
// so, to avoid compile errors when windows.h is included after Eigen/Core, ensure intrinsics are extern "C" here too.
// notice that since these are C headers, the extern "C" is theoretically needed anyways.
extern "C" {
// In theory we should only include immintrin.h and not the other *mmintrin.h header files directly.
// Doing so triggers some issues with ICC. However old gcc versions seems to not have this file, thus:
#if EIGEN_COMP_ICC >= 1110 || EIGEN_COMP_EMSCRIPTEN
#include <immintrin.h>
#else
#include <mmintrin.h>
#include <emmintrin.h>
#include <xmmintrin.h>
#ifdef EIGEN_VECTORIZE_SSE3
#include <pmmintrin.h>
#endif
#ifdef EIGEN_VECTORIZE_SSSE3
#include <tmmintrin.h>
#endif
#ifdef EIGEN_VECTORIZE_SSE4_1
#include <smmintrin.h>
#endif
#ifdef EIGEN_VECTORIZE_SSE4_2
#include <nmmintrin.h>
#endif
#if defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_AVX512)
#include <immintrin.h>
#endif
#endif
} // end extern "C"
#elif defined(__VSX__) && !defined(__APPLE__)
#define EIGEN_VECTORIZE
#define EIGEN_VECTORIZE_VSX 1
#define EIGEN_VECTORIZE_FMA
#include <altivec.h>
// We need to #undef all these ugly tokens defined in <altivec.h>
// => use __vector instead of vector
#undef bool
#undef vector
#undef pixel
#elif defined __ALTIVEC__
#define EIGEN_VECTORIZE
#define EIGEN_VECTORIZE_ALTIVEC
#define EIGEN_VECTORIZE_FMA
#include <altivec.h>
// We need to #undef all these ugly tokens defined in <altivec.h>
// => use __vector instead of vector
#undef bool
#undef vector
#undef pixel
#elif ((defined __ARM_NEON) || (defined __ARM_NEON__)) && !(defined EIGEN_ARM64_USE_SVE)
#define EIGEN_VECTORIZE
#define EIGEN_VECTORIZE_NEON
#include <arm_neon.h>
// We currently require SVE to be enabled explicitly via EIGEN_ARM64_USE_SVE and
// will not select the backend automatically
#elif (defined __ARM_FEATURE_SVE) && (defined EIGEN_ARM64_USE_SVE)
#define EIGEN_VECTORIZE
#define EIGEN_VECTORIZE_SVE
#include <arm_sve.h>
// Since we depend on knowing SVE vector lengths at compile-time, we need
// to ensure a fixed lengths is set
#if defined __ARM_FEATURE_SVE_BITS
#define EIGEN_ARM64_SVE_VL __ARM_FEATURE_SVE_BITS
#else
#error "Eigen requires a fixed SVE lector length but EIGEN_ARM64_SVE_VL is not set."
#endif
#elif (defined __s390x__ && defined __VEC__)
#define EIGEN_VECTORIZE
#define EIGEN_VECTORIZE_ZVECTOR
#include <vecintrin.h>
#elif defined __mips_msa
// Limit MSA optimizations to little-endian CPUs for now.
// TODO: Perhaps, eventually support MSA optimizations on big-endian CPUs?
#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
#if defined(__LP64__)
#define EIGEN_MIPS_64
#else
#define EIGEN_MIPS_32
#endif
#define EIGEN_VECTORIZE
#define EIGEN_VECTORIZE_MSA
#include <msa.h>
#endif
#endif
#endif
// Following the Arm ACLE arm_neon.h should also include arm_fp16.h but not all
// compilers seem to follow this. We therefore include it explicitly.
// See also: https://bugs.llvm.org/show_bug.cgi?id=47955
#if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
#include <arm_fp16.h>
#endif
// Enable FMA for ARM.
#if defined(__ARM_FEATURE_FMA)
#define EIGEN_VECTORIZE_FMA
#endif
#if defined(__F16C__) && !defined(EIGEN_GPUCC) && (!EIGEN_COMP_CLANG_STRICT || EIGEN_COMP_CLANG>=380)
// We can use the optimized fp16 to float and float to fp16 conversion routines
#define EIGEN_HAS_FP16_C
#if EIGEN_COMP_GNUC
// Make sure immintrin.h is included, even if e.g. vectorization is
// explicitly disabled (see also issue #2395).
// Note that FP16C intrinsics for gcc and clang are included by immintrin.h,
// as opposed to emmintrin.h as suggested by Intel:
// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#othertechs=FP16C&expand=1711
#include <immintrin.h>
#endif
#endif
#if defined EIGEN_CUDACC
#define EIGEN_VECTORIZE_GPU
#include <vector_types.h>
#if EIGEN_CUDA_SDK_VER >= 70500
#define EIGEN_HAS_CUDA_FP16
#endif
#endif
#if defined(EIGEN_HAS_CUDA_FP16)
#include <cuda_runtime_api.h>
#include <cuda_fp16.h>
#endif
#if defined(EIGEN_HIPCC)
#define EIGEN_VECTORIZE_GPU
#include <hip/hip_vector_types.h>
#define EIGEN_HAS_HIP_FP16
#include <hip/hip_fp16.h>
#endif
/** \brief Namespace containing all symbols from the %Eigen library. */
namespace Eigen {
inline static const char *SimdInstructionSetsInUse(void) {
#if defined(EIGEN_VECTORIZE_AVX512)
return "AVX512, FMA, AVX2, AVX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
#elif defined(EIGEN_VECTORIZE_AVX)
return "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
#elif defined(EIGEN_VECTORIZE_SSE4_2)
return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
#elif defined(EIGEN_VECTORIZE_SSE4_1)
return "SSE, SSE2, SSE3, SSSE3, SSE4.1";
#elif defined(EIGEN_VECTORIZE_SSSE3)
return "SSE, SSE2, SSE3, SSSE3";
#elif defined(EIGEN_VECTORIZE_SSE3)
return "SSE, SSE2, SSE3";
#elif defined(EIGEN_VECTORIZE_SSE2)
return "SSE, SSE2";
#elif defined(EIGEN_VECTORIZE_ALTIVEC)
return "AltiVec";
#elif defined(EIGEN_VECTORIZE_VSX)
return "VSX";
#elif defined(EIGEN_VECTORIZE_NEON)
return "ARM NEON";
#elif defined(EIGEN_VECTORIZE_SVE)
return "ARM SVE";
#elif defined(EIGEN_VECTORIZE_ZVECTOR)
return "S390X ZVECTOR";
#elif defined(EIGEN_VECTORIZE_MSA)
return "MIPS MSA";
#else
return "None";
#endif
}
} // end namespace Eigen
#endif // EIGEN_CONFIGURE_VECTORIZATION_H

View File

@@ -1,94 +1,146 @@
#ifndef EIGEN_WARNINGS_DISABLED
#define EIGEN_WARNINGS_DISABLED
#ifdef _MSC_VER
// 4100 - unreferenced formal parameter (occurred e.g. in aligned_allocator::destroy(pointer p))
// 4101 - unreferenced local variable
// 4127 - conditional expression is constant
// 4181 - qualifier applied to reference type ignored
// 4211 - nonstandard extension used : redefined extern to static
// 4244 - 'argument' : conversion from 'type1' to 'type2', possible loss of data
// 4273 - QtAlignedMalloc, inconsistent DLL linkage
// 4324 - structure was padded due to declspec(align())
// 4503 - decorated name length exceeded, name was truncated
// 4512 - assignment operator could not be generated
// 4522 - 'class' : multiple assignment operators specified
// 4700 - uninitialized local variable 'xyz' used
// 4714 - function marked as __forceinline not inlined
// 4717 - 'function' : recursive on all control paths, function will cause runtime stack overflow
// 4800 - 'type' : forcing value to bool 'true' or 'false' (performance warning)
#ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
#pragma warning( push )
#endif
#pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4503 4512 4522 4700 4714 4717 4800)
#elif defined __INTEL_COMPILER
// 2196 - routine is both "inline" and "noinline" ("noinline" assumed)
// ICC 12 generates this warning even without any inline keyword, when defining class methods 'inline' i.e. inside of class body
// typedef that may be a reference type.
// 279 - controlling expression is constant
// ICC 12 generates this warning on assert(constant_expression_depending_on_template_params) and frankly this is a legitimate use case.
// 1684 - conversion from pointer to same-sized integral type (potential portability problem)
// 2259 - non-pointer conversion from "Eigen::Index={ptrdiff_t={long}}" to "int" may lose significant bits
#ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
#pragma warning push
#endif
#pragma warning disable 2196 279 1684 2259
#elif defined __clang__
// -Wconstant-logical-operand - warning: use of logical && with constant operand; switch to bitwise & or remove constant
// this is really a stupid warning as it warns on compile-time expressions involving enums
#ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
#pragma clang diagnostic push
#endif
#pragma clang diagnostic ignored "-Wconstant-logical-operand"
#elif defined __GNUC__
#if (!defined(EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS)) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))
#pragma GCC diagnostic push
#endif
// g++ warns about local variables shadowing member functions, which is too strict
#pragma GCC diagnostic ignored "-Wshadow"
#if __GNUC__ == 4 && __GNUC_MINOR__ < 8
// Until g++-4.7 there are warnings when comparing unsigned int vs 0, even in templated functions:
#pragma GCC diagnostic ignored "-Wtype-limits"
#endif
#if __GNUC__>=6
#pragma GCC diagnostic ignored "-Wignored-attributes"
#endif
#if __GNUC__==7
// See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89325
#pragma GCC diagnostic ignored "-Wattributes"
#endif
#if defined(_MSC_VER)
// 4100 - unreferenced formal parameter (occurred e.g. in aligned_allocator::destroy(pointer p))
// 4101 - unreferenced local variable
// 4127 - conditional expression is constant
// 4181 - qualifier applied to reference type ignored
// 4211 - nonstandard extension used : redefined extern to static
// 4244 - 'argument' : conversion from 'type1' to 'type2', possible loss of data
// 4273 - QtAlignedMalloc, inconsistent DLL linkage
// 4324 - structure was padded due to declspec(align())
// 4503 - decorated name length exceeded, name was truncated
// 4512 - assignment operator could not be generated
// 4522 - 'class' : multiple assignment operators specified
// 4700 - uninitialized local variable 'xyz' used
// 4714 - function marked as __forceinline not inlined
// 4717 - 'function' : recursive on all control paths, function will cause runtime stack overflow
// 4800 - 'type' : forcing value to bool 'true' or 'false' (performance warning)
#ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
#pragma warning(push)
#endif
#pragma warning(disable : 4100 4101 4127 4181 4211 4244 4273 4324 4503 4512 4522 4700 4714 4717 4800)
// We currently rely on has_denorm in tests, and need it defined correctly for half/bfloat16.
#ifndef _SILENCE_CXX23_DENORM_DEPRECATION_WARNING
#define EIGEN_REENABLE_CXX23_DENORM_DEPRECATION_WARNING 1
#define _SILENCE_CXX23_DENORM_DEPRECATION_WARNING
#endif
#if defined __NVCC__
// Disable the "statement is unreachable" message
#pragma diag_suppress code_is_unreachable
// Disable the "dynamic initialization in unreachable code" message
#pragma diag_suppress initialization_not_reachable
// Disable the "invalid error number" message that we get with older versions of nvcc
#pragma diag_suppress 1222
// Disable the "calling a __host__ function from a __host__ __device__ function is not allowed" messages (yes, there are many of them and they seem to change with every version of the compiler)
#pragma diag_suppress 2527
#pragma diag_suppress 2529
#pragma diag_suppress 2651
#pragma diag_suppress 2653
#pragma diag_suppress 2668
#pragma diag_suppress 2669
#pragma diag_suppress 2670
#pragma diag_suppress 2671
#pragma diag_suppress 2735
#pragma diag_suppress 2737
#elif defined __INTEL_COMPILER
// 2196 - routine is both "inline" and "noinline" ("noinline" assumed)
// ICC 12 generates this warning even without any inline keyword, when defining class methods 'inline' i.e.
// inside of class body typedef that may be a reference type.
// 279 - controlling expression is constant
// ICC 12 generates this warning on assert(constant_expression_depending_on_template_params) and frankly this is
// a legitimate use case.
// 1684 - conversion from pointer to same-sized integral type (potential portability problem)
// 2259 - non-pointer conversion from "Eigen::Index={ptrdiff_t={long}}" to "int" may lose significant bits
#ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
#pragma warning push
#endif
#pragma warning disable 2196 279 1684 2259
#elif defined __clang__
#ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
#pragma clang diagnostic push
#endif
#if defined(__has_warning)
// -Wconstant-logical-operand - warning: use of logical && with constant operand; switch to bitwise & or remove constant
// this is really a stupid warning as it warns on compile-time expressions involving enums
#if __has_warning("-Wconstant-logical-operand")
#pragma clang diagnostic ignored "-Wconstant-logical-operand"
#endif
#if __has_warning("-Wimplicit-int-float-conversion")
#pragma clang diagnostic ignored "-Wimplicit-int-float-conversion"
#endif
#if (defined(__ALTIVEC__) || defined(__VSX__)) && (!defined(__STDC_VERSION__) || (__STDC_VERSION__ < 201112L))
// warning: generic selections are a C11-specific feature
// ignoring warnings thrown at vec_ctf in Altivec/PacketMath.h
#if __has_warning("-Wc11-extensions")
#pragma clang diagnostic ignored "-Wc11-extensions"
#endif
#endif
#endif
#elif defined __GNUC__ && !defined(__FUJITSU)
#if (!defined(EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS)) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))
#pragma GCC diagnostic push
#endif
// g++ warns about local variables shadowing member functions, which is too strict
#pragma GCC diagnostic ignored "-Wshadow"
#if __GNUC__ == 4 && __GNUC_MINOR__ < 8
// Until g++-4.7 there are warnings when comparing unsigned int vs 0, even in templated functions:
#pragma GCC diagnostic ignored "-Wtype-limits"
#endif
#if __GNUC__ >= 6
#pragma GCC diagnostic ignored "-Wignored-attributes"
#endif
#if __GNUC__ == 7
// See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89325
#pragma GCC diagnostic ignored "-Wattributes"
#endif
#endif
#if defined __NVCC__ && defined __CUDACC__
// MSVC 14.16 (required by CUDA 9.*) does not support the _Pragma keyword, so
// we instead use Microsoft's __pragma extension.
#if defined _MSC_VER
#define EIGEN_MAKE_PRAGMA(X) __pragma(#X)
#else
#define EIGEN_MAKE_PRAGMA(X) _Pragma(#X)
#endif
#if defined __NVCC_DIAG_PRAGMA_SUPPORT__
#define EIGEN_NV_DIAG_SUPPRESS(X) EIGEN_MAKE_PRAGMA(nv_diag_suppress X)
#else
#define EIGEN_NV_DIAG_SUPPRESS(X) EIGEN_MAKE_PRAGMA(diag_suppress X)
#endif
EIGEN_NV_DIAG_SUPPRESS(boolean_controlling_expr_is_constant)
// Disable the "statement is unreachable" message
EIGEN_NV_DIAG_SUPPRESS(code_is_unreachable)
// Disable the "dynamic initialization in unreachable code" message
EIGEN_NV_DIAG_SUPPRESS(initialization_not_reachable)
// Disable the "invalid error number" message that we get with older versions of nvcc
EIGEN_NV_DIAG_SUPPRESS(1222)
// Disable the "calling a __host__ function from a __host__ __device__ function is not allowed" messages (yes, there are
// many of them and they seem to change with every version of the compiler)
EIGEN_NV_DIAG_SUPPRESS(2527)
EIGEN_NV_DIAG_SUPPRESS(2529)
EIGEN_NV_DIAG_SUPPRESS(2651)
EIGEN_NV_DIAG_SUPPRESS(2653)
EIGEN_NV_DIAG_SUPPRESS(2668)
EIGEN_NV_DIAG_SUPPRESS(2669)
EIGEN_NV_DIAG_SUPPRESS(2670)
EIGEN_NV_DIAG_SUPPRESS(2671)
EIGEN_NV_DIAG_SUPPRESS(2735)
EIGEN_NV_DIAG_SUPPRESS(2737)
EIGEN_NV_DIAG_SUPPRESS(2739)
EIGEN_NV_DIAG_SUPPRESS(2885)
EIGEN_NV_DIAG_SUPPRESS(2888)
EIGEN_NV_DIAG_SUPPRESS(2976)
EIGEN_NV_DIAG_SUPPRESS(2979)
EIGEN_NV_DIAG_SUPPRESS(20011)
EIGEN_NV_DIAG_SUPPRESS(20014)
// Disable the "// __device__ annotation is ignored on a function(...) that is
// explicitly defaulted on its first declaration" message.
// The __device__ annotation seems to actually be needed in some cases,
// otherwise resulting in kernel runtime errors.
EIGEN_NV_DIAG_SUPPRESS(2886)
EIGEN_NV_DIAG_SUPPRESS(2929)
EIGEN_NV_DIAG_SUPPRESS(2977)
EIGEN_NV_DIAG_SUPPRESS(20012)
#undef EIGEN_NV_DIAG_SUPPRESS
#undef EIGEN_MAKE_PRAGMA
#endif
#else
// warnings already disabled:
# ifndef EIGEN_WARNINGS_DISABLED_2
# define EIGEN_WARNINGS_DISABLED_2
# elif defined(EIGEN_INTERNAL_DEBUGGING)
# error "Do not include \"DisableStupidWarnings.h\" recursively more than twice!"
# endif
#ifndef EIGEN_WARNINGS_DISABLED_2
#define EIGEN_WARNINGS_DISABLED_2
#elif defined(EIGEN_INTERNAL_DEBUGGING)
#error "Do not include \"DisableStupidWarnings.h\" recursively more than twice!"
#endif
#endif // not EIGEN_WARNINGS_DISABLED
#endif // not EIGEN_WARNINGS_DISABLED

View File

@@ -13,7 +13,7 @@
#define EIGEN_WORLD_VERSION 3
#define EIGEN_MAJOR_VERSION 3
#define EIGEN_MINOR_VERSION 8
#define EIGEN_MINOR_VERSION 9
#define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
(EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \

View File

@@ -16,8 +16,40 @@
#include <math_constants.h>
#endif
#if EIGEN_COMP_ICC>=1600 && __cplusplus >= 201103L
// Recent versions of ICC require <cstdint> for pointer types below.
#define EIGEN_ICC_NEEDS_CSTDINT (EIGEN_COMP_ICC>=1600 && EIGEN_COMP_CXXVER >= 11)
// Define portable (u)int{32,64} types
#if EIGEN_HAS_CXX11 || EIGEN_ICC_NEEDS_CSTDINT
#include <cstdint>
namespace Eigen {
namespace numext {
typedef std::uint8_t uint8_t;
typedef std::int8_t int8_t;
typedef std::uint16_t uint16_t;
typedef std::int16_t int16_t;
typedef std::uint32_t uint32_t;
typedef std::int32_t int32_t;
typedef std::uint64_t uint64_t;
typedef std::int64_t int64_t;
}
}
#else
// Without c++11, all compilers able to compile Eigen also
// provide the C99 stdint.h header file.
#include <stdint.h>
namespace Eigen {
namespace numext {
typedef ::uint8_t uint8_t;
typedef ::int8_t int8_t;
typedef ::uint16_t uint16_t;
typedef ::int16_t int16_t;
typedef ::uint32_t uint32_t;
typedef ::int32_t int32_t;
typedef ::uint64_t uint64_t;
typedef ::int64_t int64_t;
}
}
#endif
namespace Eigen {
@@ -43,13 +75,14 @@ namespace internal {
// Only recent versions of ICC complain about using ptrdiff_t to hold pointers,
// and older versions do not provide *intptr_t types.
#if EIGEN_COMP_ICC>=1600 && __cplusplus >= 201103L
#if EIGEN_ICC_NEEDS_CSTDINT
typedef std::intptr_t IntPtr;
typedef std::uintptr_t UIntPtr;
#else
typedef std::ptrdiff_t IntPtr;
typedef std::size_t UIntPtr;
#endif
#undef EIGEN_ICC_NEEDS_CSTDINT
struct true_type { enum { value = 1 }; };
struct false_type { enum { value = 0 }; };
@@ -133,10 +166,8 @@ template<> struct make_unsigned<signed int> { typedef unsigned int type; }
template<> struct make_unsigned<unsigned int> { typedef unsigned int type; };
template<> struct make_unsigned<signed long> { typedef unsigned long type; };
template<> struct make_unsigned<unsigned long> { typedef unsigned long type; };
#if EIGEN_COMP_MSVC
template<> struct make_unsigned<signed __int64> { typedef unsigned __int64 type; };
template<> struct make_unsigned<unsigned __int64> { typedef unsigned __int64 type; };
#endif
template<> struct make_unsigned<signed long long> { typedef unsigned long type; };
template<> struct make_unsigned<unsigned long long> { typedef unsigned long type; };
#endif
template <typename T> struct add_const { typedef const T type; };
@@ -494,7 +525,7 @@ template<typename T, typename U> struct scalar_product_traits
} // end namespace internal
namespace numext {
#if defined(__CUDA_ARCH__)
template<typename T> EIGEN_DEVICE_FUNC void swap(T &a, T &b) { T tmp = b; b = a; a = tmp; }
#else
@@ -510,7 +541,7 @@ using std::numeric_limits;
// Integer division with rounding up.
// T is assumed to be an integer type with a>=0, and b>0
template<typename T>
T div_ceil(const T &a, const T &b)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T div_ceil(const T &a, const T &b)
{
return (a+b-1) / b;
}
@@ -539,30 +570,4 @@ bool not_equal_strict(const double& x,const double& y) { return std::not_equal_t
} // end namespace Eigen
// Define portable (u)int{32,64} types
#if EIGEN_HAS_CXX11
#include <cstdint>
namespace Eigen {
namespace numext {
typedef std::uint32_t uint32_t;
typedef std::int32_t int32_t;
typedef std::uint64_t uint64_t;
typedef std::int64_t int64_t;
}
}
#else
// Without c++11, all compilers able to compile Eigen also
// provides the C99 stdint.h header file.
#include <stdint.h>
namespace Eigen {
namespace numext {
typedef ::uint32_t uint32_t;
typedef ::int32_t int32_t;
typedef ::uint64_t uint64_t;
typedef ::int64_t int64_t;
}
}
#endif
#endif // EIGEN_META_H

View File

@@ -11,7 +11,7 @@
#ifndef EIGEN_MATRIXBASEEIGENVALUES_H
#define EIGEN_MATRIXBASEEIGENVALUES_H
namespace Eigen {
namespace Eigen {
namespace internal {
@@ -42,13 +42,13 @@ struct eigenvalues_selector<Derived, false>
} // end namespace internal
/** \brief Computes the eigenvalues of a matrix
/** \brief Computes the eigenvalues of a matrix
* \returns Column vector containing the eigenvalues.
*
* \eigenvalues_module
* This function computes the eigenvalues with the help of the EigenSolver
* class (for real matrices) or the ComplexEigenSolver class (for complex
* matrices).
* matrices).
*
* The eigenvalues are repeated according to their algebraic multiplicity,
* so there are as many eigenvalues as rows in the matrix.
@@ -83,8 +83,8 @@ MatrixBase<Derived>::eigenvalues() const
*
* \sa SelfAdjointEigenSolver::eigenvalues(), MatrixBase::eigenvalues()
*/
template<typename MatrixType, unsigned int UpLo>
inline typename SelfAdjointView<MatrixType, UpLo>::EigenvaluesReturnType
template<typename MatrixType, unsigned int UpLo>
EIGEN_DEVICE_FUNC inline typename SelfAdjointView<MatrixType, UpLo>::EigenvaluesReturnType
SelfAdjointView<MatrixType, UpLo>::eigenvalues() const
{
PlainObject thisAsMatrix(*this);
@@ -147,7 +147,7 @@ MatrixBase<Derived>::operatorNorm() const
* \sa eigenvalues(), MatrixBase::operatorNorm()
*/
template<typename MatrixType, unsigned int UpLo>
inline typename SelfAdjointView<MatrixType, UpLo>::RealScalar
EIGEN_DEVICE_FUNC inline typename SelfAdjointView<MatrixType, UpLo>::RealScalar
SelfAdjointView<MatrixType, UpLo>::operatorNorm() const
{
return eigenvalues().cwiseAbs().maxCoeff();

2
Eigen/src/Geometry/Scaling.h Executable file → Normal file
View File

@@ -14,7 +14,7 @@ namespace Eigen {
/** \geometry_module \ingroup Geometry_Module
*
* \class Scaling
* \class UniformScaling
*
* \brief Represents a generic uniform scaling transformation
*

View File

@@ -443,7 +443,8 @@ typename internal::matrix_type_times_scalar_type<typename VectorsType::Scalar,Ot
return res;
}
/** \ingroup Householder_Module \householder_module
/** \ingroup Householder_Module
* \householder_module
* \brief Convenience function for constructing a Householder sequence.
* \returns A HouseholderSequence constructed from the specified arguments.
*/
@@ -453,7 +454,8 @@ HouseholderSequence<VectorsType,CoeffsType> householderSequence(const VectorsTyp
return HouseholderSequence<VectorsType,CoeffsType,OnTheLeft>(v, h);
}
/** \ingroup Householder_Module \householder_module
/** \ingroup Householder_Module
* \householder_module
* \brief Convenience function for constructing a Householder sequence.
* \returns A HouseholderSequence constructed from the specified arguments.
* \details This function differs from householderSequence() in that the template argument \p OnTheSide of

View File

@@ -53,7 +53,7 @@ template<typename _MatrixType> struct traits<FullPivLU<_MatrixType> >
* Output: \verbinclude class_FullPivLU.out
*
* This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism.
*
*
* \sa MatrixBase::fullPivLu(), MatrixBase::determinant(), MatrixBase::inverse()
*/
template<typename _MatrixType> class FullPivLU
@@ -744,7 +744,7 @@ struct image_retval<FullPivLU<_MatrixType> >
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename _MatrixType>
template<typename RhsType, typename DstType>
void FullPivLU<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const
EIGEN_DEVICE_FUNC void FullPivLU<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const
{
/* The decomposition PAQ = LU can be rewritten as A = P^{-1} L U Q^{-1}.
* So we proceed as follows:
@@ -792,7 +792,7 @@ void FullPivLU<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const
template<typename _MatrixType>
template<bool Conjugate, typename RhsType, typename DstType>
void FullPivLU<_MatrixType>::_solve_impl_transposed(const RhsType &rhs, DstType &dst) const
EIGEN_DEVICE_FUNC void FullPivLU<_MatrixType>::_solve_impl_transposed(const RhsType &rhs, DstType &dst) const
{
/* The decomposition PAQ = LU can be rewritten as A = P^{-1} L U Q^{-1},
* and since permutations are real and unitary, we can write this
@@ -864,7 +864,7 @@ struct Assignment<DstXprType, Inverse<FullPivLU<MatrixType> >, internal::assign_
{
typedef FullPivLU<MatrixType> LuType;
typedef Inverse<LuType> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename MatrixType::Scalar> &)
static EIGEN_DEVICE_FUNC void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename MatrixType::Scalar> &)
{
dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols()));
}

View File

@@ -11,7 +11,7 @@
#ifndef EIGEN_INVERSE_IMPL_H
#define EIGEN_INVERSE_IMPL_H
namespace Eigen {
namespace Eigen {
namespace internal {
@@ -72,7 +72,7 @@ struct compute_inverse_and_det_with_check<MatrixType, ResultType, 1>
****************************/
template<typename MatrixType, typename ResultType>
EIGEN_DEVICE_FUNC
EIGEN_DEVICE_FUNC
inline void compute_inverse_size2_helper(
const MatrixType& matrix, const typename ResultType::Scalar& invdet,
ResultType& result)
@@ -122,7 +122,7 @@ struct compute_inverse_and_det_with_check<MatrixType, ResultType, 2>
****************************/
template<typename MatrixType, int i, int j>
EIGEN_DEVICE_FUNC
EIGEN_DEVICE_FUNC
inline typename MatrixType::Scalar cofactor_3x3(const MatrixType& m)
{
enum {
@@ -200,7 +200,7 @@ struct compute_inverse_and_det_with_check<MatrixType, ResultType, 3>
****************************/
template<typename Derived>
EIGEN_DEVICE_FUNC
EIGEN_DEVICE_FUNC
inline const typename Derived::Scalar general_det3_helper
(const MatrixBase<Derived>& matrix, int i1, int i2, int i3, int j1, int j2, int j3)
{
@@ -209,7 +209,7 @@ inline const typename Derived::Scalar general_det3_helper
}
template<typename MatrixType, int i, int j>
EIGEN_DEVICE_FUNC
EIGEN_DEVICE_FUNC
inline typename MatrixType::Scalar cofactor_4x4(const MatrixType& matrix)
{
enum {
@@ -290,13 +290,13 @@ template<typename DstXprType, typename XprType>
struct Assignment<DstXprType, Inverse<XprType>, internal::assign_op<typename DstXprType::Scalar,typename XprType::Scalar>, Dense2Dense>
{
typedef Inverse<XprType> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename XprType::Scalar> &)
static EIGEN_DEVICE_FUNC void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename XprType::Scalar> &)
{
Index dstRows = src.rows();
Index dstCols = src.cols();
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
dst.resize(dstRows, dstCols);
const int Size = EIGEN_PLAIN_ENUM_MIN(XprType::ColsAtCompileTime,DstXprType::ColsAtCompileTime);
EIGEN_ONLY_USED_FOR_DEBUG(Size);
eigen_assert(( (Size<=1) || (Size>4) || (extract_data(src.nestedExpression())!=extract_data(dst)))
@@ -304,14 +304,14 @@ struct Assignment<DstXprType, Inverse<XprType>, internal::assign_op<typename Dst
typedef typename internal::nested_eval<XprType,XprType::ColsAtCompileTime>::type ActualXprType;
typedef typename internal::remove_all<ActualXprType>::type ActualXprTypeCleanded;
ActualXprType actual_xpr(src.nestedExpression());
compute_inverse<ActualXprTypeCleanded, DstXprType>::run(actual_xpr, dst);
}
};
} // end namespace internal
/** \lu_module

View File

@@ -69,7 +69,7 @@ struct enable_if_ref<Ref<T>,Derived> {
* The data of the LU decomposition can be directly accessed through the methods matrixLU(), permutationP().
*
* This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism.
*
*
* \sa MatrixBase::partialPivLu(), MatrixBase::determinant(), MatrixBase::inverse(), MatrixBase::computeInverse(), class FullPivLU
*/
template<typename _MatrixType> class PartialPivLU
@@ -572,7 +572,7 @@ struct Assignment<DstXprType, Inverse<PartialPivLU<MatrixType> >, internal::assi
{
typedef PartialPivLU<MatrixType> LuType;
typedef Inverse<LuType> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename LuType::Scalar> &)
static EIGEN_DEVICE_FUNC void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename LuType::Scalar> &)
{
dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols()));
}

View File

@@ -192,7 +192,7 @@ class PardisoImpl : public SparseSolverBase<Derived>
void pardisoInit(int type)
{
m_type = type;
EIGEN_USING_STD(abs);
EIGEN_USING_STD_MATH(abs);
bool symmetric = abs(m_type) < 10;
m_iparm[0] = 1; // No solver default
m_iparm[1] = 2; // use Metis for the ordering

View File

@@ -42,7 +42,7 @@ template<typename _MatrixType> struct traits<ColPivHouseholderQR<_MatrixType> >
* numerical stability. It is slower than HouseholderQR, and faster than FullPivHouseholderQR.
*
* This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism.
*
*
* \sa MatrixBase::colPivHouseholderQr()
*/
template<typename _MatrixType> class ColPivHouseholderQR
@@ -582,7 +582,7 @@ void ColPivHouseholderQR<MatrixType>::computeInPlace()
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename _MatrixType>
template<typename RhsType, typename DstType>
void ColPivHouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const
EIGEN_DEVICE_FUNC void ColPivHouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const
{
eigen_assert(rhs.rows() == rows());
@@ -618,7 +618,7 @@ struct Assignment<DstXprType, Inverse<ColPivHouseholderQR<MatrixType> >, interna
{
typedef ColPivHouseholderQR<MatrixType> QrType;
typedef Inverse<QrType> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename QrType::Scalar> &)
static EIGEN_DEVICE_FUNC void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename QrType::Scalar> &)
{
dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols()));
}

View File

@@ -41,7 +41,7 @@ struct traits<CompleteOrthogonalDecomposition<_MatrixType> >
* size rank-by-rank. \b A may be rank deficient.
*
* This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism.
*
*
* \sa MatrixBase::completeOrthogonalDecomposition()
*/
template <typename _MatrixType>
@@ -489,7 +489,7 @@ void CompleteOrthogonalDecomposition<MatrixType>::applyZAdjointOnTheLeftInPlace(
#ifndef EIGEN_PARSED_BY_DOXYGEN
template <typename _MatrixType>
template <typename RhsType, typename DstType>
void CompleteOrthogonalDecomposition<_MatrixType>::_solve_impl(
EIGEN_DEVICE_FUNC void CompleteOrthogonalDecomposition<_MatrixType>::_solve_impl(
const RhsType& rhs, DstType& dst) const {
eigen_assert(rhs.rows() == this->rows());
@@ -532,7 +532,7 @@ struct Assignment<DstXprType, Inverse<CompleteOrthogonalDecomposition<MatrixType
{
typedef CompleteOrthogonalDecomposition<MatrixType> CodType;
typedef Inverse<CodType> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename CodType::Scalar> &)
static EIGEN_DEVICE_FUNC void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename CodType::Scalar> &)
{
dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.rows()));
}

View File

@@ -11,7 +11,7 @@
#ifndef EIGEN_FULLPIVOTINGHOUSEHOLDERQR_H
#define EIGEN_FULLPIVOTINGHOUSEHOLDERQR_H
namespace Eigen {
namespace Eigen {
namespace internal {
@@ -40,18 +40,18 @@ struct traits<FullPivHouseholderQRMatrixQReturnType<MatrixType> >
* \tparam _MatrixType the type of the matrix of which we are computing the QR decomposition
*
* This class performs a rank-revealing QR decomposition of a matrix \b A into matrices \b P, \b P', \b Q and \b R
* such that
* such that
* \f[
* \mathbf{P} \, \mathbf{A} \, \mathbf{P}' = \mathbf{Q} \, \mathbf{R}
* \f]
* by using Householder transformations. Here, \b P and \b P' are permutation matrices, \b Q a unitary matrix
* by using Householder transformations. Here, \b P and \b P' are permutation matrices, \b Q a unitary matrix
* and \b R an upper triangular matrix.
*
* This decomposition performs a very prudent full pivoting in order to be rank-revealing and achieve optimal
* numerical stability. The trade-off is that it is slower than HouseholderQR and ColPivHouseholderQR.
*
* This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism.
*
*
* \sa MatrixBase::fullPivHouseholderQr()
*/
template<typename _MatrixType> class FullPivHouseholderQR
@@ -114,12 +114,12 @@ template<typename _MatrixType> class FullPivHouseholderQR
*
* This constructor computes the QR factorization of the matrix \a matrix by calling
* the method compute(). It is a short cut for:
*
*
* \code
* FullPivHouseholderQR<MatrixType> qr(matrix.rows(), matrix.cols());
* qr.compute(matrix);
* \endcode
*
*
* \sa compute()
*/
template<typename InputType>
@@ -317,9 +317,9 @@ template<typename _MatrixType> class FullPivHouseholderQR
inline Index rows() const { return m_qr.rows(); }
inline Index cols() const { return m_qr.cols(); }
/** \returns a const reference to the vector of Householder coefficients used to represent the factor \c Q.
*
*
* For advanced uses only.
*/
const HCoeffsType& hCoeffs() const { return m_hCoeffs; }
@@ -392,7 +392,7 @@ template<typename _MatrixType> class FullPivHouseholderQR
* diagonal coefficient of U.
*/
RealScalar maxPivot() const { return m_maxpivot; }
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename RhsType, typename DstType>
EIGEN_DEVICE_FUNC
@@ -400,14 +400,14 @@ template<typename _MatrixType> class FullPivHouseholderQR
#endif
protected:
static void check_template_parameters()
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
}
void computeInPlace();
MatrixType m_qr;
HCoeffsType m_hCoeffs;
IntDiagSizeVectorType m_rows_transpositions;
@@ -463,7 +463,7 @@ void FullPivHouseholderQR<MatrixType>::computeInPlace()
Index cols = m_qr.cols();
Index size = (std::min)(rows,cols);
m_hCoeffs.resize(size);
m_temp.resize(cols);
@@ -539,7 +539,7 @@ void FullPivHouseholderQR<MatrixType>::computeInPlace()
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename _MatrixType>
template<typename RhsType, typename DstType>
void FullPivHouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const
EIGEN_DEVICE_FUNC void FullPivHouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const
{
eigen_assert(rhs.rows() == rows());
const Index l_rank = rank();
@@ -574,14 +574,14 @@ void FullPivHouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType
#endif
namespace internal {
template<typename DstXprType, typename MatrixType>
struct Assignment<DstXprType, Inverse<FullPivHouseholderQR<MatrixType> >, internal::assign_op<typename DstXprType::Scalar,typename FullPivHouseholderQR<MatrixType>::Scalar>, Dense2Dense>
{
typedef FullPivHouseholderQR<MatrixType> QrType;
typedef Inverse<QrType> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename QrType::Scalar> &)
{
static EIGEN_DEVICE_FUNC void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename QrType::Scalar> &)
{
dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols()));
}
};

View File

@@ -12,7 +12,7 @@
#ifndef EIGEN_QR_H
#define EIGEN_QR_H
namespace Eigen {
namespace Eigen {
/** \ingroup QR_Module
*
@@ -24,7 +24,7 @@ namespace Eigen {
* \tparam _MatrixType the type of the matrix of which we are computing the QR decomposition
*
* This class performs a QR decomposition of a matrix \b A into matrices \b Q and \b R
* such that
* such that
* \f[
* \mathbf{A} = \mathbf{Q} \, \mathbf{R}
* \f]
@@ -85,12 +85,12 @@ template<typename _MatrixType> class HouseholderQR
*
* This constructor computes the QR factorization of the matrix \a matrix by calling
* the method compute(). It is a short cut for:
*
*
* \code
* HouseholderQR<MatrixType> qr(matrix.rows(), matrix.cols());
* qr.compute(matrix);
* \endcode
*
*
* \sa compute()
*/
template<typename InputType>
@@ -204,13 +204,13 @@ template<typename _MatrixType> class HouseholderQR
inline Index rows() const { return m_qr.rows(); }
inline Index cols() const { return m_qr.cols(); }
/** \returns a const reference to the vector of Householder coefficients used to represent the factor \c Q.
*
*
* For advanced uses only.
*/
const HCoeffsType& hCoeffs() const { return m_hCoeffs; }
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename RhsType, typename DstType>
EIGEN_DEVICE_FUNC
@@ -218,14 +218,14 @@ template<typename _MatrixType> class HouseholderQR
#endif
protected:
static void check_template_parameters()
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
}
void computeInPlace();
MatrixType m_qr;
HCoeffsType m_hCoeffs;
RowVectorType m_temp;
@@ -347,7 +347,7 @@ struct householder_qr_inplace_blocked
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename _MatrixType>
template<typename RhsType, typename DstType>
void HouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const
EIGEN_DEVICE_FUNC void HouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const
{
const Index rank = (std::min)(rows(), cols());
eigen_assert(rhs.rows() == rows());
@@ -379,7 +379,7 @@ template<typename MatrixType>
void HouseholderQR<MatrixType>::computeInPlace()
{
check_template_parameters();
Index rows = m_qr.rows();
Index cols = m_qr.cols();
Index size = (std::min)(rows,cols);

View File

@@ -34,12 +34,12 @@ namespace Eigen {
*
* Singular values are always sorted in decreasing order.
*
*
*
* You can ask for only \em thin \a U or \a V to be computed, meaning the following. In case of a rectangular n-by-p matrix, letting \a m be the
* smaller value among \a n and \a p, there are only \a m singular vectors; the remaining columns of \a U and \a V do not correspond to actual
* singular vectors. Asking for \em thin \a U or \a V means asking for only their \a m first columns to be formed. So \a U is then a n-by-m matrix,
* and \a V is then a p-by-m matrix. Notice that thin \a U and \a V are all you need for (least squares) solving.
*
*
* If the input matrix has inf or nan coefficients, the result of the computation is undefined, but the computation is guaranteed to
* terminate in finite (and reasonable) time.
* \sa class BDCSVD, class JacobiSVD
@@ -67,7 +67,7 @@ public:
typedef Matrix<Scalar, RowsAtCompileTime, RowsAtCompileTime, MatrixOptions, MaxRowsAtCompileTime, MaxRowsAtCompileTime> MatrixUType;
typedef Matrix<Scalar, ColsAtCompileTime, ColsAtCompileTime, MatrixOptions, MaxColsAtCompileTime, MaxColsAtCompileTime> MatrixVType;
typedef typename internal::plain_diag_type<MatrixType, RealScalar>::type SingularValuesType;
Derived& derived() { return *static_cast<Derived*>(this); }
const Derived& derived() const { return *static_cast<const Derived*>(this); }
@@ -120,7 +120,7 @@ public:
eigen_assert(m_isInitialized && "SVD is not initialized.");
return m_nonzeroSingularValues;
}
/** \returns the rank of the matrix of which \c *this is the SVD.
*
* \note This method has to determine which singular values should be considered nonzero.
@@ -137,7 +137,7 @@ public:
while(i>=0 && m_singularValues.coeff(i) < premultiplied_threshold) --i;
return i+1;
}
/** Allows to prescribe a threshold to be used by certain methods, such as rank() and solve(),
* which need to determine when singular values are to be considered nonzero.
* This is not used for the SVD decomposition itself.
@@ -193,7 +193,7 @@ public:
inline Index rows() const { return m_rows; }
inline Index cols() const { return m_cols; }
/** \returns a (least squares) solution of \f$ A x = b \f$ using the current SVD decomposition of A.
*
* \param b the right-hand-side of the equation to solve.
@@ -211,7 +211,7 @@ public:
eigen_assert(computeU() && computeV() && "SVD::solve() requires both unitaries U and V to be computed (thin unitaries suffice).");
return Solve<Derived, Rhs>(derived(), b.derived());
}
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename RhsType, typename DstType>
EIGEN_DEVICE_FUNC
@@ -219,12 +219,12 @@ public:
#endif
protected:
static void check_template_parameters()
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
}
// return true if already allocated
bool allocate(Index rows, Index cols, unsigned int computationOptions) ;
@@ -258,7 +258,7 @@ protected:
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename Derived>
template<typename RhsType, typename DstType>
void SVDBase<Derived>::_solve_impl(const RhsType &rhs, DstType &dst) const
EIGEN_DEVICE_FUNC void SVDBase<Derived>::_solve_impl(const RhsType &rhs, DstType &dst) const
{
eigen_assert(rhs.rows() == rows());

View File

@@ -10,9 +10,9 @@
#ifndef EIGEN_SPARSEASSIGN_H
#define EIGEN_SPARSEASSIGN_H
namespace Eigen {
namespace Eigen {
template<typename Derived>
template<typename Derived>
template<typename OtherDerived>
Derived& SparseMatrixBase<Derived>::operator=(const EigenBase<OtherDerived> &other)
{
@@ -104,7 +104,7 @@ void assign_sparse_to_sparse(DstXprType &dst, const SrcXprType &src)
enum { Flip = (DstEvaluatorType::Flags & RowMajorBit) != (SrcEvaluatorType::Flags & RowMajorBit) };
DstXprType temp(src.rows(), src.cols());
temp.reserve((std::max)(src.rows(),src.cols())*2);
@@ -127,7 +127,7 @@ void assign_sparse_to_sparse(DstXprType &dst, const SrcXprType &src)
template< typename DstXprType, typename SrcXprType, typename Functor>
struct Assignment<DstXprType, SrcXprType, Functor, Sparse2Sparse>
{
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
static EIGEN_DEVICE_FUNC void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
{
assign_sparse_to_sparse(dst.derived(), src.derived());
}
@@ -137,15 +137,15 @@ struct Assignment<DstXprType, SrcXprType, Functor, Sparse2Sparse>
template< typename DstXprType, typename SrcXprType, typename Functor>
struct Assignment<DstXprType, SrcXprType, Functor, Sparse2Dense>
{
static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
static EIGEN_DEVICE_FUNC void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
{
if(internal::is_same<Functor,internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> >::value)
dst.setZero();
internal::evaluator<SrcXprType> srcEval(src);
resize_if_allowed(dst, src, func);
internal::evaluator<DstXprType> dstEval(dst);
const Index outerEvaluationSize = (internal::evaluator<SrcXprType>::Flags&RowMajorBit) ? src.rows() : src.cols();
for (Index j=0; j<outerEvaluationSize; ++j)
for (typename internal::evaluator<SrcXprType>::InnerIterator i(srcEval,j); i; ++i)
@@ -159,7 +159,7 @@ template<typename DstXprType, typename DecType, typename RhsType, typename Scala
struct Assignment<DstXprType, Solve<DecType,RhsType>, internal::assign_op<Scalar,Scalar>, Sparse2Sparse>
{
typedef Solve<DecType,RhsType> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
static EIGEN_DEVICE_FUNC void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
{
Index dstRows = src.rows();
Index dstCols = src.cols();
@@ -182,7 +182,7 @@ struct Assignment<DstXprType, SrcXprType, Functor, Diagonal2Sparse>
typedef Array<StorageIndex,Dynamic,1> ArrayXI;
typedef Array<Scalar,Dynamic,1> ArrayXS;
template<int Options>
static void run(SparseMatrix<Scalar,Options,StorageIndex> &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
static EIGEN_DEVICE_FUNC void run(SparseMatrix<Scalar,Options,StorageIndex> &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
{
Index dstRows = src.rows();
Index dstCols = src.cols();
@@ -196,16 +196,16 @@ struct Assignment<DstXprType, SrcXprType, Functor, Diagonal2Sparse>
Map<ArrayXI>(dst.outerIndexPtr(), size+1).setLinSpaced(0,StorageIndex(size));
Map<ArrayXS>(dst.valuePtr(), size) = src.diagonal();
}
template<typename DstDerived>
static void run(SparseMatrixBase<DstDerived> &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
{
dst.diagonal() = src.diagonal();
}
static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
{ dst.diagonal() += src.diagonal(); }
static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
{ dst.diagonal() -= src.diagonal(); }
};

View File

@@ -101,7 +101,7 @@ public:
}
else
{
m_value = 0; // this is to avoid a compilation warning
m_value = Scalar(0); // this is to avoid a compilation warning
m_id = -1;
}
return *this;

View File

@@ -577,10 +577,12 @@ class SparseMatrix
else if (innerChange < 0)
{
// Inner size decreased: allocate a new m_innerNonZeros
m_innerNonZeros = static_cast<StorageIndex*>(std::malloc((m_outerSize+outerChange+1) * sizeof(StorageIndex)));
m_innerNonZeros = static_cast<StorageIndex*>(std::malloc((m_outerSize + outerChange) * sizeof(StorageIndex)));
if (!m_innerNonZeros) internal::throw_std_bad_alloc();
for(Index i = 0; i < m_outerSize; i++)
for(Index i = 0; i < m_outerSize + (std::min)(outerChange, Index(0)); i++)
m_innerNonZeros[i] = m_outerIndex[i+1] - m_outerIndex[i];
for(Index i = m_outerSize; i < m_outerSize + outerChange; i++)
m_innerNonZeros[i] = 0;
}
// Change the m_innerNonZeros in case of a decrease of inner size
@@ -974,7 +976,7 @@ void set_from_triplets(const InputIterator& begin, const InputIterator& end, Spa
* \code
typedef Triplet<double> T;
std::vector<T> tripletList;
triplets.reserve(estimation_of_entries);
tripletList.reserve(estimation_of_entries);
for(...)
{
// ...
@@ -1233,7 +1235,7 @@ typename SparseMatrix<_Scalar,_Options,_StorageIndex>::Scalar& SparseMatrix<_Sca
}
m_data.index(p) = convert_index(inner);
return (m_data.value(p) = 0);
return (m_data.value(p) = Scalar(0));
}
if(m_data.size() != m_data.allocatedSize())

View File

@@ -10,7 +10,7 @@
#ifndef EIGEN_SPARSEPRODUCT_H
#define EIGEN_SPARSEPRODUCT_H
namespace Eigen {
namespace Eigen {
/** \returns an expression of the product of two sparse matrices.
* By default a conservative product preserving the symbolic non zeros is performed.
@@ -102,13 +102,13 @@ template< typename DstXprType, typename Lhs, typename Rhs>
struct Assignment<DstXprType, Product<Lhs,Rhs,AliasFreeProduct>, internal::assign_op<typename DstXprType::Scalar,typename Product<Lhs,Rhs,AliasFreeProduct>::Scalar>, Sparse2Dense>
{
typedef Product<Lhs,Rhs,AliasFreeProduct> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &)
static EIGEN_DEVICE_FUNC void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &)
{
Index dstRows = src.rows();
Index dstCols = src.cols();
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
dst.resize(dstRows, dstCols);
generic_product_impl<Lhs, Rhs>::evalTo(dst,src.lhs(),src.rhs());
}
};
@@ -118,7 +118,7 @@ template< typename DstXprType, typename Lhs, typename Rhs>
struct Assignment<DstXprType, Product<Lhs,Rhs,AliasFreeProduct>, internal::add_assign_op<typename DstXprType::Scalar,typename Product<Lhs,Rhs,AliasFreeProduct>::Scalar>, Sparse2Dense>
{
typedef Product<Lhs,Rhs,AliasFreeProduct> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &)
static EIGEN_DEVICE_FUNC void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &)
{
generic_product_impl<Lhs, Rhs>::addTo(dst,src.lhs(),src.rhs());
}
@@ -129,7 +129,7 @@ template< typename DstXprType, typename Lhs, typename Rhs>
struct Assignment<DstXprType, Product<Lhs,Rhs,AliasFreeProduct>, internal::sub_assign_op<typename DstXprType::Scalar,typename Product<Lhs,Rhs,AliasFreeProduct>::Scalar>, Sparse2Dense>
{
typedef Product<Lhs,Rhs,AliasFreeProduct> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &)
static EIGEN_DEVICE_FUNC void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &)
{
generic_product_impl<Lhs, Rhs>::subTo(dst,src.lhs(),src.rhs());
}

View File

@@ -10,8 +10,8 @@
#ifndef EIGEN_SPARSE_SELFADJOINTVIEW_H
#define EIGEN_SPARSE_SELFADJOINTVIEW_H
namespace Eigen {
namespace Eigen {
/** \ingroup SparseCore_Module
* \class SparseSelfAdjointView
*
@@ -27,7 +27,7 @@ namespace Eigen {
* \sa SparseMatrixBase::selfadjointView()
*/
namespace internal {
template<typename MatrixType, unsigned int Mode>
struct traits<SparseSelfAdjointView<MatrixType,Mode> > : traits<MatrixType> {
};
@@ -44,7 +44,7 @@ template<typename MatrixType, unsigned int _Mode> class SparseSelfAdjointView
: public EigenBase<SparseSelfAdjointView<MatrixType,_Mode> >
{
public:
enum {
Mode = _Mode,
TransposeMode = ((Mode & Upper) ? Lower : 0) | ((Mode & Lower) ? Upper : 0),
@@ -58,7 +58,7 @@ template<typename MatrixType, unsigned int _Mode> class SparseSelfAdjointView
typedef Matrix<StorageIndex,Dynamic,1> VectorI;
typedef typename internal::ref_selector<MatrixType>::non_const_type MatrixTypeNested;
typedef typename internal::remove_all<MatrixTypeNested>::type _MatrixTypeNested;
explicit inline SparseSelfAdjointView(MatrixType& matrix) : m_matrix(matrix)
{
eigen_assert(rows()==cols() && "SelfAdjointView is only for squared matrices");
@@ -94,7 +94,7 @@ template<typename MatrixType, unsigned int _Mode> class SparseSelfAdjointView
{
return Product<OtherDerived, SparseSelfAdjointView>(lhs.derived(), rhs);
}
/** Efficient sparse self-adjoint matrix times dense vector/matrix product */
template<typename OtherDerived>
Product<SparseSelfAdjointView,OtherDerived>
@@ -121,7 +121,7 @@ template<typename MatrixType, unsigned int _Mode> class SparseSelfAdjointView
*/
template<typename DerivedU>
SparseSelfAdjointView& rankUpdate(const SparseMatrixBase<DerivedU>& u, const Scalar& alpha = Scalar(1));
/** \returns an expression of P H P^-1 */
// TODO implement twists in a more evaluator friendly fashion
SparseSymmetricPermutationProduct<_MatrixTypeNested,Mode> twistedBy(const PermutationMatrix<Dynamic,Dynamic,StorageIndex>& perm) const
@@ -148,7 +148,7 @@ template<typename MatrixType, unsigned int _Mode> class SparseSelfAdjointView
PermutationMatrix<Dynamic,Dynamic,StorageIndex> pnull;
return *this = src.twistedBy(pnull);
}
void resize(Index rows, Index cols)
{
EIGEN_ONLY_USED_FOR_DEBUG(rows);
@@ -156,7 +156,7 @@ template<typename MatrixType, unsigned int _Mode> class SparseSelfAdjointView
eigen_assert(rows == this->rows() && cols == this->cols()
&& "SparseSelfadjointView::resize() does not actually allow to resize.");
}
protected:
MatrixTypeNested m_matrix;
@@ -203,7 +203,7 @@ SparseSelfAdjointView<MatrixType,Mode>::rankUpdate(const SparseMatrixBase<Derive
}
namespace internal {
// TODO currently a selfadjoint expression has the form SelfAdjointView<.,.>
// in the future selfadjoint-ness should be defined by the expression traits
// such that Transpose<SelfAdjointView<.,.> > is valid. (currently TriangularBase::transpose() is overloaded to make it work)
@@ -226,7 +226,7 @@ struct Assignment<DstXprType, SrcXprType, Functor, SparseSelfAdjoint2Sparse>
typedef internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> AssignOpType;
template<typename DestScalar,int StorageOrder>
static void run(SparseMatrix<DestScalar,StorageOrder,StorageIndex> &dst, const SrcXprType &src, const AssignOpType&/*func*/)
static EIGEN_DEVICE_FUNC void run(SparseMatrix<DestScalar,StorageOrder,StorageIndex> &dst, const SrcXprType &src, const AssignOpType&/*func*/)
{
internal::permute_symm_to_fullsymm<SrcXprType::Mode>(src.matrix(), dst);
}
@@ -257,7 +257,7 @@ struct Assignment<DstXprType, SrcXprType, Functor, SparseSelfAdjoint2Sparse>
run(tmp, src, AssignOpType());
dst -= tmp;
}
template<typename DestScalar>
static void run(DynamicSparseMatrix<DestScalar,ColMajor,StorageIndex>& dst, const SrcXprType &src, const AssignOpType&/*func*/)
{
@@ -280,13 +280,13 @@ template<int Mode, typename SparseLhsType, typename DenseRhsType, typename Dense
inline void sparse_selfadjoint_time_dense_product(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const AlphaType& alpha)
{
EIGEN_ONLY_USED_FOR_DEBUG(alpha);
typedef typename internal::nested_eval<SparseLhsType,DenseRhsType::MaxColsAtCompileTime>::type SparseLhsTypeNested;
typedef typename internal::remove_all<SparseLhsTypeNested>::type SparseLhsTypeNestedCleaned;
typedef evaluator<SparseLhsTypeNestedCleaned> LhsEval;
typedef typename LhsEval::InnerIterator LhsIterator;
typedef typename SparseLhsType::Scalar LhsScalar;
enum {
LhsIsRowMajor = (LhsEval::Flags&RowMajorBit)==RowMajorBit,
ProcessFirstHalf =
@@ -295,7 +295,7 @@ inline void sparse_selfadjoint_time_dense_product(const SparseLhsType& lhs, cons
|| ( (Mode&Lower) && LhsIsRowMajor),
ProcessSecondHalf = !ProcessFirstHalf
};
SparseLhsTypeNested lhs_nested(lhs);
LhsEval lhsEval(lhs_nested);
@@ -349,7 +349,7 @@ struct generic_product_impl<LhsView, Rhs, SparseSelfAdjointShape, DenseShape, Pr
typedef typename nested_eval<Rhs,Dynamic>::type RhsNested;
LhsNested lhsNested(lhsView.matrix());
RhsNested rhsNested(rhs);
internal::sparse_selfadjoint_time_dense_product<LhsView::Mode>(lhsNested, rhsNested, dst, alpha);
}
};
@@ -366,7 +366,7 @@ struct generic_product_impl<Lhs, RhsView, DenseShape, SparseSelfAdjointShape, Pr
typedef typename nested_eval<Rhs,Dynamic>::type RhsNested;
LhsNested lhsNested(lhs);
RhsNested rhsNested(rhsView.matrix());
// transpose everything
Transpose<Dest> dstT(dst);
internal::sparse_selfadjoint_time_dense_product<RhsView::TransposeMode>(rhsNested.transpose(), lhsNested.transpose(), dstT, alpha);
@@ -390,7 +390,7 @@ struct product_evaluator<Product<LhsView, Rhs, DefaultProduct>, ProductTag, Spar
::new (static_cast<Base*>(this)) Base(m_result);
generic_product_impl<typename Rhs::PlainObject, Rhs, SparseShape, SparseShape, ProductTag>::evalTo(m_result, m_lhs, xpr.rhs());
}
protected:
typename Rhs::PlainObject m_lhs;
PlainObject m_result;
@@ -410,7 +410,7 @@ struct product_evaluator<Product<Lhs, RhsView, DefaultProduct>, ProductTag, Spar
::new (static_cast<Base*>(this)) Base(m_result);
generic_product_impl<Lhs, typename Lhs::PlainObject, SparseShape, SparseShape, ProductTag>::evalTo(m_result, xpr.lhs(), m_rhs);
}
protected:
typename Lhs::PlainObject m_rhs;
PlainObject m_result;
@@ -432,13 +432,13 @@ void permute_symm_to_fullsymm(const MatrixType& mat, SparseMatrix<typename Matri
typedef Matrix<StorageIndex,Dynamic,1> VectorI;
typedef evaluator<MatrixType> MatEval;
typedef typename evaluator<MatrixType>::InnerIterator MatIterator;
MatEval matEval(mat);
Dest& dest(_dest.derived());
enum {
StorageOrderMatch = int(Dest::IsRowMajor) == int(MatrixType::IsRowMajor)
};
Index size = mat.rows();
VectorI count;
count.resize(size);
@@ -465,7 +465,7 @@ void permute_symm_to_fullsymm(const MatrixType& mat, SparseMatrix<typename Matri
}
}
Index nnz = count.sum();
// reserve space
dest.resizeNonZeros(nnz);
dest.outerIndexPtr()[0] = 0;
@@ -473,7 +473,7 @@ void permute_symm_to_fullsymm(const MatrixType& mat, SparseMatrix<typename Matri
dest.outerIndexPtr()[j+1] = dest.outerIndexPtr()[j] + count[j];
for(Index j=0; j<size; ++j)
count[j] = dest.outerIndexPtr()[j];
// copy data
for(StorageIndex j = 0; j<size; ++j)
{
@@ -482,10 +482,10 @@ void permute_symm_to_fullsymm(const MatrixType& mat, SparseMatrix<typename Matri
StorageIndex i = internal::convert_index<StorageIndex>(it.index());
Index r = it.row();
Index c = it.col();
StorageIndex jp = perm ? perm[j] : j;
StorageIndex ip = perm ? perm[i] : i;
if(Mode==int(Upper|Lower))
{
Index k = count[StorageOrderMatch ? jp : ip]++;
@@ -531,7 +531,7 @@ void permute_symm_to_symm(const MatrixType& mat, SparseMatrix<typename MatrixTyp
};
MatEval matEval(mat);
Index size = mat.rows();
VectorI count(size);
count.setZero();
@@ -544,7 +544,7 @@ void permute_symm_to_symm(const MatrixType& mat, SparseMatrix<typename MatrixTyp
StorageIndex i = it.index();
if((int(SrcMode)==int(Lower) && i<j) || (int(SrcMode)==int(Upper) && i>j))
continue;
StorageIndex ip = perm ? perm[i] : i;
count[int(DstMode)==int(Lower) ? (std::min)(ip,jp) : (std::max)(ip,jp)]++;
}
@@ -555,22 +555,22 @@ void permute_symm_to_symm(const MatrixType& mat, SparseMatrix<typename MatrixTyp
dest.resizeNonZeros(dest.outerIndexPtr()[size]);
for(Index j=0; j<size; ++j)
count[j] = dest.outerIndexPtr()[j];
for(StorageIndex j = 0; j<size; ++j)
{
for(MatIterator it(matEval,j); it; ++it)
{
StorageIndex i = it.index();
if((int(SrcMode)==int(Lower) && i<j) || (int(SrcMode)==int(Upper) && i>j))
continue;
StorageIndex jp = perm ? perm[j] : j;
StorageIndex ip = perm? perm[i] : i;
Index k = count[int(DstMode)==int(Lower) ? (std::min)(ip,jp) : (std::max)(ip,jp)]++;
dest.innerIndexPtr()[k] = int(DstMode)==int(Lower) ? (std::max)(ip,jp) : (std::min)(ip,jp);
if(!StorageOrderMatch) std::swap(ip,jp);
if( ((int(DstMode)==int(Lower) && ip<jp) || (int(DstMode)==int(Upper) && ip>jp)))
dest.valuePtr()[k] = numext::conj(it.value());
@@ -609,17 +609,17 @@ class SparseSymmetricPermutationProduct
typedef Matrix<StorageIndex,Dynamic,1> VectorI;
typedef typename MatrixType::Nested MatrixTypeNested;
typedef typename internal::remove_all<MatrixTypeNested>::type NestedExpression;
SparseSymmetricPermutationProduct(const MatrixType& mat, const Perm& perm)
: m_matrix(mat), m_perm(perm)
{}
inline Index rows() const { return m_matrix.rows(); }
inline Index cols() const { return m_matrix.cols(); }
const NestedExpression& matrix() const { return m_matrix; }
const Perm& perm() const { return m_perm; }
protected:
MatrixTypeNested m_matrix;
const Perm& m_perm;
@@ -627,21 +627,21 @@ class SparseSymmetricPermutationProduct
};
namespace internal {
template<typename DstXprType, typename MatrixType, int Mode, typename Scalar>
struct Assignment<DstXprType, SparseSymmetricPermutationProduct<MatrixType,Mode>, internal::assign_op<Scalar,typename MatrixType::Scalar>, Sparse2Sparse>
{
typedef SparseSymmetricPermutationProduct<MatrixType,Mode> SrcXprType;
typedef typename DstXprType::StorageIndex DstIndex;
template<int Options>
static void run(SparseMatrix<Scalar,Options,DstIndex> &dst, const SrcXprType &src, const internal::assign_op<Scalar,typename MatrixType::Scalar> &)
static EIGEN_DEVICE_FUNC void run(SparseMatrix<Scalar,Options,DstIndex> &dst, const SrcXprType &src, const internal::assign_op<Scalar,typename MatrixType::Scalar> &)
{
// internal::permute_symm_to_fullsymm<Mode>(m_matrix,_dest,m_perm.indices().data());
SparseMatrix<Scalar,(Options&RowMajor)==RowMajor ? ColMajor : RowMajor, DstIndex> tmp;
internal::permute_symm_to_fullsymm<Mode>(src.matrix(),tmp,src.perm().indices().data());
dst = tmp;
}
template<typename DestType,unsigned int DestMode>
static void run(SparseSelfAdjointView<DestType,DestMode>& dst, const SrcXprType &src, const internal::assign_op<Scalar,typename MatrixType::Scalar> &)
{

View File

@@ -42,31 +42,31 @@ namespace internal {
* \ingroup SparseQR_Module
* \class SparseQR
* \brief Sparse left-looking rank-revealing QR factorization
*
* This class implements a left-looking rank-revealing QR decomposition
*
* This class implements a left-looking rank-revealing QR decomposition
* of sparse matrices. When a column has a norm less than a given tolerance
* it is implicitly permuted to the end. The QR factorization thus obtained is
* given by A*P = Q*R where R is upper triangular or trapezoidal.
*
* it is implicitly permuted to the end. The QR factorization thus obtained is
* given by A*P = Q*R where R is upper triangular or trapezoidal.
*
* P is the column permutation which is the product of the fill-reducing and the
* rank-revealing permutations. Use colsPermutation() to get it.
*
* Q is the orthogonal matrix represented as products of Householder reflectors.
*
* Q is the orthogonal matrix represented as products of Householder reflectors.
* Use matrixQ() to get an expression and matrixQ().adjoint() to get the adjoint.
* You can then apply it to a vector.
*
*
* R is the sparse triangular or trapezoidal matrix. The later occurs when A is rank-deficient.
* matrixR().topLeftCorner(rank(), rank()) always returns a triangular factor of full rank.
*
*
* \tparam _MatrixType The type of the sparse matrix A, must be a column-major SparseMatrix<>
* \tparam _OrderingType The fill-reducing ordering method. See the \link OrderingMethods_Module
* \tparam _OrderingType The fill-reducing ordering method. See the \link OrderingMethods_Module
* OrderingMethods \endlink module for the list of built-in and external ordering methods.
*
*
* \implsparsesolverconcept
*
* \warning The input sparse matrix A must be in compressed mode (see SparseMatrix::makeCompressed()).
* \warning For complex matrices matrixQ().transpose() will actually return the adjoint matrix.
*
*
*/
template<typename _MatrixType, typename _OrderingType>
class SparseQR : public SparseSolverBase<SparseQR<_MatrixType,_OrderingType> >
@@ -90,26 +90,26 @@ class SparseQR : public SparseSolverBase<SparseQR<_MatrixType,_OrderingType> >
ColsAtCompileTime = MatrixType::ColsAtCompileTime,
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
};
public:
SparseQR () : m_analysisIsok(false), m_lastError(""), m_useDefaultThreshold(true),m_isQSorted(false),m_isEtreeOk(false)
{ }
/** Construct a QR factorization of the matrix \a mat.
*
*
* \warning The matrix \a mat must be in compressed mode (see SparseMatrix::makeCompressed()).
*
*
* \sa compute()
*/
explicit SparseQR(const MatrixType& mat) : m_analysisIsok(false), m_lastError(""), m_useDefaultThreshold(true),m_isQSorted(false),m_isEtreeOk(false)
{
compute(mat);
}
/** Computes the QR factorization of the sparse matrix \a mat.
*
*
* \warning The matrix \a mat must be in compressed mode (see SparseMatrix::makeCompressed()).
*
*
* \sa analyzePattern(), factorize()
*/
void compute(const MatrixType& mat)
@@ -119,15 +119,15 @@ class SparseQR : public SparseSolverBase<SparseQR<_MatrixType,_OrderingType> >
}
void analyzePattern(const MatrixType& mat);
void factorize(const MatrixType& mat);
/** \returns the number of rows of the represented matrix.
/** \returns the number of rows of the represented matrix.
*/
inline Index rows() const { return m_pmat.rows(); }
/** \returns the number of columns of the represented matrix.
/** \returns the number of columns of the represented matrix.
*/
inline Index cols() const { return m_pmat.cols();}
/** \returns a const reference to the \b sparse upper triangular matrix R of the QR factorization.
* \warning The entries of the returned matrix are not sorted. This means that using it in algorithms
* expecting sorted entries will fail. This include random coefficient accesses (SpaseMatrix::coeff()),
@@ -142,7 +142,7 @@ class SparseQR : public SparseSolverBase<SparseQR<_MatrixType,_OrderingType> >
* \endcode
*/
const QRMatrixType& matrixR() const { return m_R; }
/** \returns the number of non linearly dependent columns as determined by the pivoting threshold.
*
* \sa setPivotThreshold()
@@ -150,9 +150,9 @@ class SparseQR : public SparseSolverBase<SparseQR<_MatrixType,_OrderingType> >
Index rank() const
{
eigen_assert(m_isInitialized && "The factorization should be called first, use compute()");
return m_nonzeropivots;
return m_nonzeropivots;
}
/** \returns an expression of the matrix Q as products of sparse Householder reflectors.
* The common usage of this function is to apply it to a dense matrix or vector
* \code
@@ -171,23 +171,23 @@ class SparseQR : public SparseSolverBase<SparseQR<_MatrixType,_OrderingType> >
* reflectors are stored unsorted, two transpositions are needed to sort
* them before performing the product.
*/
SparseQRMatrixQReturnType<SparseQR> matrixQ() const
SparseQRMatrixQReturnType<SparseQR> matrixQ() const
{ return SparseQRMatrixQReturnType<SparseQR>(*this); }
/** \returns a const reference to the column permutation P that was applied to A such that A*P = Q*R
* It is the combination of the fill-in reducing permutation and numerical column pivoting.
*/
const PermutationType& colsPermutation() const
{
{
eigen_assert(m_isInitialized && "Decomposition is not initialized.");
return m_outputPerm_c;
}
/** \returns A string describing the type of error.
* This method is provided to ease debugging, not to handle errors.
*/
std::string lastErrorMessage() const { return m_lastError; }
/** \internal */
template<typename Rhs, typename Dest>
bool _solve_impl(const MatrixBase<Rhs> &B, MatrixBase<Dest> &dest) const
@@ -196,21 +196,21 @@ class SparseQR : public SparseSolverBase<SparseQR<_MatrixType,_OrderingType> >
eigen_assert(this->rows() == B.rows() && "SparseQR::solve() : invalid number of rows in the right hand side matrix");
Index rank = this->rank();
// Compute Q^* * b;
typename Dest::PlainObject y, b;
y = this->matrixQ().adjoint() * B;
b = y;
// Solve with the triangular matrix R
y.resize((std::max<Index>)(cols(),y.rows()),y.cols());
y.topRows(rank) = this->matrixR().topLeftCorner(rank, rank).template triangularView<Upper>().solve(b.topRows(rank));
y.bottomRows(y.rows()-rank).setZero();
// Apply the column permutation
if (m_perm_c.size()) dest = colsPermutation() * y.topRows(cols());
else dest = y.topRows(cols());
m_info = Success;
return true;
}
@@ -225,13 +225,13 @@ class SparseQR : public SparseSolverBase<SparseQR<_MatrixType,_OrderingType> >
m_useDefaultThreshold = false;
m_threshold = threshold;
}
/** \returns the solution X of \f$ A X = B \f$ using the current decomposition of A.
*
* \sa compute()
*/
template<typename Rhs>
inline const Solve<SparseQR, Rhs> solve(const MatrixBase<Rhs>& B) const
inline const Solve<SparseQR, Rhs> solve(const MatrixBase<Rhs>& B) const
{
eigen_assert(m_isInitialized && "The factorization should be called first, use compute()");
eigen_assert(this->rows() == B.rows() && "SparseQR::solve() : invalid number of rows in the right hand side matrix");
@@ -244,14 +244,14 @@ class SparseQR : public SparseSolverBase<SparseQR<_MatrixType,_OrderingType> >
eigen_assert(this->rows() == B.rows() && "SparseQR::solve() : invalid number of rows in the right hand side matrix");
return Solve<SparseQR, Rhs>(*this, B.derived());
}
/** \brief Reports whether previous computation was successful.
*
* \returns \c Success if computation was successful,
* \c NumericalIssue if the QR factorization reports a numerical problem
* \c InvalidInput if the input matrix is invalid
*
* \sa iparm()
* \sa iparm()
*/
ComputationInfo info() const
{
@@ -270,7 +270,7 @@ class SparseQR : public SparseSolverBase<SparseQR<_MatrixType,_OrderingType> >
this->m_isQSorted = true;
}
protected:
bool m_analysisIsok;
bool m_factorizationIsok;
@@ -290,18 +290,18 @@ class SparseQR : public SparseSolverBase<SparseQR<_MatrixType,_OrderingType> >
IndexVector m_firstRowElt; // First element in each row
bool m_isQSorted; // whether Q is sorted or not
bool m_isEtreeOk; // whether the elimination tree match the initial input matrix
template <typename, typename > friend struct SparseQR_QProduct;
};
/** \brief Preprocessing step of a QR factorization
*
/** \brief Preprocessing step of a QR factorization
*
* \warning The matrix \a mat must be in compressed mode (see SparseMatrix::makeCompressed()).
*
*
* In this step, the fill-reducing permutation is computed and applied to the columns of A
* and the column elimination tree is computed as well. Only the sparsity pattern of \a mat is exploited.
*
*
* \note In this step it is assumed that there is no empty row in the matrix \a mat.
*/
template <typename MatrixType, typename OrderingType>
@@ -311,26 +311,26 @@ void SparseQR<MatrixType,OrderingType>::analyzePattern(const MatrixType& mat)
// Copy to a column major matrix if the input is rowmajor
typename internal::conditional<MatrixType::IsRowMajor,QRMatrixType,const MatrixType&>::type matCpy(mat);
// Compute the column fill reducing ordering
OrderingType ord;
ord(matCpy, m_perm_c);
OrderingType ord;
ord(matCpy, m_perm_c);
Index n = mat.cols();
Index m = mat.rows();
Index diagSize = (std::min)(m,n);
if (!m_perm_c.size())
{
m_perm_c.resize(n);
m_perm_c.indices().setLinSpaced(n, 0,StorageIndex(n-1));
}
// Compute the column elimination tree of the permuted matrix
m_outputPerm_c = m_perm_c.inverse();
internal::coletree(matCpy, m_etree, m_firstRowElt, m_outputPerm_c.indices().data());
m_isEtreeOk = true;
m_R.resize(m, n);
m_Q.resize(m, diagSize);
// Allocate space for nonzero elements : rough estimation
m_R.reserve(2*mat.nonZeros()); //FIXME Get a more accurate estimation through symbolic factorization with the etree
m_Q.reserve(2*mat.nonZeros());
@@ -339,17 +339,17 @@ void SparseQR<MatrixType,OrderingType>::analyzePattern(const MatrixType& mat)
}
/** \brief Performs the numerical QR factorization of the input matrix
*
*
* The function SparseQR::analyzePattern(const MatrixType&) must have been called beforehand with
* a matrix having the same sparsity pattern than \a mat.
*
*
* \param mat The sparse column-major matrix
*/
template <typename MatrixType, typename OrderingType>
void SparseQR<MatrixType,OrderingType>::factorize(const MatrixType& mat)
{
using std::abs;
eigen_assert(m_analysisIsok && "analyzePattern() should be called before this step");
StorageIndex m = StorageIndex(mat.rows());
StorageIndex n = StorageIndex(mat.cols());
@@ -359,7 +359,7 @@ void SparseQR<MatrixType,OrderingType>::factorize(const MatrixType& mat)
Index nzcolR, nzcolQ; // Number of nonzero for the current column of R and Q
ScalarVector tval(m); // The dense vector used to compute the current column
RealScalar pivotThreshold = m_threshold;
m_R.setZero();
m_Q.setZero();
m_pmat = mat;
@@ -371,12 +371,12 @@ void SparseQR<MatrixType,OrderingType>::factorize(const MatrixType& mat)
}
m_pmat.uncompress(); // To have the innerNonZeroPtr allocated
// Apply the fill-in reducing permutation lazily:
{
// If the input is row major, copy the original column indices,
// otherwise directly use the input matrix
//
//
IndexVector originalOuterIndicesCpy;
const StorageIndex *originalOuterIndices = mat.outerIndexPtr();
if(MatrixType::IsRowMajor)
@@ -384,20 +384,20 @@ void SparseQR<MatrixType,OrderingType>::factorize(const MatrixType& mat)
originalOuterIndicesCpy = IndexVector::Map(m_pmat.outerIndexPtr(),n+1);
originalOuterIndices = originalOuterIndicesCpy.data();
}
for (int i = 0; i < n; i++)
{
Index p = m_perm_c.size() ? m_perm_c.indices()(i) : i;
m_pmat.outerIndexPtr()[p] = originalOuterIndices[i];
m_pmat.innerNonZeroPtr()[p] = originalOuterIndices[i+1] - originalOuterIndices[i];
m_pmat.outerIndexPtr()[p] = originalOuterIndices[i];
m_pmat.innerNonZeroPtr()[p] = originalOuterIndices[i+1] - originalOuterIndices[i];
}
}
/* Compute the default threshold as in MatLab, see:
* Tim Davis, "Algorithm 915, SuiteSparseQR: Multifrontal Multithreaded Rank-Revealing
* Sparse QR Factorization, ACM Trans. on Math. Soft. 38(1), 2011, Page 8:3
* Sparse QR Factorization, ACM Trans. on Math. Soft. 38(1), 2011, Page 8:3
*/
if(m_useDefaultThreshold)
if(m_useDefaultThreshold)
{
RealScalar max2Norm = 0.0;
for (int j = 0; j < n; j++) max2Norm = numext::maxi(max2Norm, m_pmat.col(j).norm());
@@ -405,10 +405,10 @@ void SparseQR<MatrixType,OrderingType>::factorize(const MatrixType& mat)
max2Norm = RealScalar(1);
pivotThreshold = 20 * (m + n) * max2Norm * NumTraits<RealScalar>::epsilon();
}
// Initialize the numerical permutation
m_pivotperm.setIdentity(n);
StorageIndex nonzeroCol = 0; // Record the number of valid pivots
m_Q.startVec(0);
@@ -421,8 +421,8 @@ void SparseQR<MatrixType,OrderingType>::factorize(const MatrixType& mat)
Qidx(0) = nonzeroCol;
nzcolR = 0; nzcolQ = 1;
bool found_diag = nonzeroCol>=m;
tval.setZero();
tval.setZero();
// Symbolic factorization: find the nonzero locations of the column k of the factors R and Q, i.e.,
// all the nodes (with indexes lower than rank) reachable through the column elimination tree (etree) rooted at node k.
// Note: if the diagonal entry does not exist, then its contribution must be explicitly added,
@@ -432,7 +432,7 @@ void SparseQR<MatrixType,OrderingType>::factorize(const MatrixType& mat)
StorageIndex curIdx = nonzeroCol;
if(itp) curIdx = StorageIndex(itp.row());
if(curIdx == nonzeroCol) found_diag = true;
// Get the nonzeros indexes of the current column of R
StorageIndex st = m_firstRowElt(curIdx); // The traversal of the etree starts here
if (st < 0 )
@@ -442,7 +442,7 @@ void SparseQR<MatrixType,OrderingType>::factorize(const MatrixType& mat)
return;
}
// Traverse the etree
// Traverse the etree
Index bi = nzcolR;
for (; mark(st) != col; st = m_etree(st))
{
@@ -454,13 +454,13 @@ void SparseQR<MatrixType,OrderingType>::factorize(const MatrixType& mat)
// Reverse the list to get the topological ordering
Index nt = nzcolR-bi;
for(Index i = 0; i < nt/2; i++) std::swap(Ridx(bi+i), Ridx(nzcolR-i-1));
// Copy the current (curIdx,pcol) value of the input matrix
if(itp) tval(curIdx) = itp.value();
else tval(curIdx) = Scalar(0);
// Compute the pattern of Q(:,k)
if(curIdx > nonzeroCol && mark(curIdx) != col )
if(curIdx > nonzeroCol && mark(curIdx) != col )
{
Qidx(nzcolQ) = curIdx; // Add this row to the pattern of Q,
mark(curIdx) = col; // and mark it as visited
@@ -472,15 +472,15 @@ void SparseQR<MatrixType,OrderingType>::factorize(const MatrixType& mat)
for (Index i = nzcolR-1; i >= 0; i--)
{
Index curIdx = Ridx(i);
// Apply the curIdx-th householder vector to the current column (temporarily stored into tval)
Scalar tdot(0);
// First compute q' * tval
tdot = m_Q.col(curIdx).dot(tval);
tdot *= m_hcoeffs(curIdx);
// Then update tval = tval - q * tau
// FIXME: tval -= tdot * m_Q.col(curIdx) should amount to the same (need to check/add support for efficient "dense ?= sparse")
for (typename QRMatrixType::InnerIterator itq(m_Q, curIdx); itq; ++itq)
@@ -500,16 +500,16 @@ void SparseQR<MatrixType,OrderingType>::factorize(const MatrixType& mat)
}
}
} // End update current column
Scalar tau = RealScalar(0);
RealScalar beta = 0;
if(nonzeroCol < diagSize)
{
// Compute the Householder reflection that eliminate the current column
// FIXME this step should call the Householder module.
Scalar c0 = nzcolQ ? tval(Qidx(0)) : Scalar(0);
// First, the squared norm of Q((col+1):m, col)
RealScalar sqrNorm = 0.;
for (Index itq = 1; itq < nzcolQ; ++itq) sqrNorm += numext::abs2(tval(Qidx(itq)));
@@ -528,7 +528,7 @@ void SparseQR<MatrixType,OrderingType>::factorize(const MatrixType& mat)
for (Index itq = 1; itq < nzcolQ; ++itq)
tval(Qidx(itq)) /= (c0 - beta);
tau = numext::conj((beta-c0) / beta);
}
}
@@ -536,7 +536,7 @@ void SparseQR<MatrixType,OrderingType>::factorize(const MatrixType& mat)
for (Index i = nzcolR-1; i >= 0; i--)
{
Index curIdx = Ridx(i);
if(curIdx < nonzeroCol)
if(curIdx < nonzeroCol)
{
m_R.insertBackByOuterInnerUnordered(col, curIdx) = tval(curIdx);
tval(curIdx) = Scalar(0.);
@@ -562,17 +562,17 @@ void SparseQR<MatrixType,OrderingType>::factorize(const MatrixType& mat)
else
{
// Zero pivot found: move implicitly this column to the end
for (Index j = nonzeroCol; j < n-1; j++)
for (Index j = nonzeroCol; j < n-1; j++)
std::swap(m_pivotperm.indices()(j), m_pivotperm.indices()[j+1]);
// Recompute the column elimination tree
internal::coletree(m_pmat, m_etree, m_firstRowElt, m_pivotperm.indices().data());
m_isEtreeOk = false;
}
}
m_hcoeffs.tail(diagSize-nonzeroCol).setZero();
// Finalize the column pointers of the sparse matrices R and Q
m_Q.finalize();
m_Q.makeCompressed();
@@ -581,18 +581,18 @@ void SparseQR<MatrixType,OrderingType>::factorize(const MatrixType& mat)
m_isQSorted = false;
m_nonzeropivots = nonzeroCol;
if(nonzeroCol<n)
{
// Permute the triangular factor to put the 'dead' columns to the end
QRMatrixType tempR(m_R);
m_R = tempR * m_pivotperm;
// Update the column permutation
m_outputPerm_c = m_outputPerm_c * m_pivotperm;
}
m_isInitialized = true;
m_isInitialized = true;
m_factorizationIsok = true;
m_info = Success;
}
@@ -602,12 +602,12 @@ struct SparseQR_QProduct : ReturnByValue<SparseQR_QProduct<SparseQRType, Derived
{
typedef typename SparseQRType::QRMatrixType MatrixType;
typedef typename SparseQRType::Scalar Scalar;
// Get the references
SparseQR_QProduct(const SparseQRType& qr, const Derived& other, bool transpose) :
// Get the references
SparseQR_QProduct(const SparseQRType& qr, const Derived& other, bool transpose) :
m_qr(qr),m_other(other),m_transpose(transpose) {}
inline Index rows() const { return m_qr.matrixQ().rows(); }
inline Index cols() const { return m_other.cols(); }
// Assign to a vector
template<typename DesType>
void evalTo(DesType& res) const
@@ -651,7 +651,7 @@ struct SparseQR_QProduct : ReturnByValue<SparseQR_QProduct<SparseQRType, Derived
}
}
}
const SparseQRType& m_qr;
const Derived& m_other;
bool m_transpose; // TODO this actually means adjoint
@@ -659,7 +659,7 @@ struct SparseQR_QProduct : ReturnByValue<SparseQR_QProduct<SparseQRType, Derived
template<typename SparseQRType>
struct SparseQRMatrixQReturnType : public EigenBase<SparseQRMatrixQReturnType<SparseQRType> >
{
{
typedef typename SparseQRType::Scalar Scalar;
typedef Matrix<Scalar,Dynamic,Dynamic> DenseMatrix;
enum {
@@ -701,7 +701,7 @@ struct SparseQRMatrixQTransposeReturnType
};
namespace internal {
template<typename SparseQRType>
struct evaluator_traits<SparseQRMatrixQReturnType<SparseQRType> >
{
@@ -716,7 +716,7 @@ struct Assignment<DstXprType, SparseQRMatrixQReturnType<SparseQRType>, internal:
typedef SparseQRMatrixQReturnType<SparseQRType> SrcXprType;
typedef typename DstXprType::Scalar Scalar;
typedef typename DstXprType::StorageIndex StorageIndex;
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &/*func*/)
static EIGEN_DEVICE_FUNC void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &/*func*/)
{
typename DstXprType::PlainObject idMat(src.rows(), src.cols());
idMat.setIdentity();
@@ -732,7 +732,7 @@ struct Assignment<DstXprType, SparseQRMatrixQReturnType<SparseQRType>, internal:
typedef SparseQRMatrixQReturnType<SparseQRType> SrcXprType;
typedef typename DstXprType::Scalar Scalar;
typedef typename DstXprType::StorageIndex StorageIndex;
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &/*func*/)
static EIGEN_DEVICE_FUNC void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &/*func*/)
{
dst = src.m_qr.matrixQ() * DstXprType::Identity(src.m_qr.rows(), src.m_qr.rows());
}

View File

@@ -98,19 +98,7 @@ namespace std {
{ return deque_base::insert(position,x); }
void insert(const_iterator position, size_type new_size, const value_type& x)
{ deque_base::insert(position, new_size, x); }
#elif defined(_GLIBCXX_DEQUE) && EIGEN_GNUC_AT_LEAST(4,2) && !EIGEN_GNUC_AT_LEAST(10, 1)
// workaround GCC std::deque implementation
// GCC 10.1 doesn't let us access _Deque_impl _M_impl anymore and we have to
// fall-back to the default case
void resize(size_type new_size, const value_type& x)
{
if (new_size < deque_base::size())
deque_base::_M_erase_at_end(this->_M_impl._M_start + new_size);
else
deque_base::insert(deque_base::end(), new_size - deque_base::size(), x);
}
#else
// either non-GCC or GCC between 4.1 and 10.1
// default implementation which should always work.
void resize(size_type new_size, const value_type& x)
{

View File

@@ -119,7 +119,7 @@ OP(const Scalar& s) const { \
return this->OP(Derived::PlainObject::Constant(rows(), cols(), s)); \
} \
EIGEN_DEVICE_FUNC friend EIGEN_STRONG_INLINE const RCmp ## COMPARATOR ## ReturnType \
OP(const Scalar& s, const Derived& d) { \
OP(const Scalar& s, const EIGEN_CURRENT_STORAGE_BASE_CLASS<Derived>& d) { \
return Derived::PlainObject::Constant(d.rows(), d.cols(), s).OP(d); \
}
@@ -314,7 +314,8 @@ polygamma(const EIGEN_CURRENT_STORAGE_BASE_CLASS<DerivedN> &n) const
*
* It returns the Riemann zeta function of two arguments \c *this and \a q:
*
* \param *this is the exposent, it must be > 1
* `*this` is the exponent, it must be > 1
*
* \param q is the shift, it must be > 0
*
* \note This function supports only float and double scalar types. To support other scalar types, the user has

View File

@@ -4,7 +4,6 @@ project(EigenBlas CXX)
include("../cmake/language_support.cmake")
workaround_9220(Fortran EIGEN_Fortran_COMPILER_WORKS)
if(EIGEN_Fortran_COMPILER_WORKS)
enable_language(Fortran OPTIONAL)
if(NOT CMAKE_Fortran_COMPILER)

120
ci/CTest2JUnit.xsl Normal file
View File

@@ -0,0 +1,120 @@
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
<xsl:output method="xml" indent="yes"/>
<xsl:template match="/Site">
<xsl:variable name="Name"><xsl:value-of select="@Name"/></xsl:variable>
<xsl:variable name="Hostname"><xsl:value-of select="@Hostname"/></xsl:variable>
<xsl:variable name="TestCount"><xsl:value-of select="count(//TestList/Test)"/> </xsl:variable>
<xsl:variable name="ErrorCount"><xsl:value-of select="count(//TestList/Test[@Status='error'])"/> </xsl:variable>
<xsl:variable name="FailureCount"><xsl:value-of select="count(//Testing/Test[@Status='failed'])"/> </xsl:variable>
<testsuite name="{$Name}" hostname="{$Hostname}" errors="0" failures="{$FailureCount}" tests="{$TestCount}">
<xsl:variable name="BuildName"><xsl:value-of select="@BuildName"/></xsl:variable>
<xsl:variable name="BuildStamp"><xsl:value-of select="@BuildStamp"/></xsl:variable>
<xsl:variable name="Generator"><xsl:value-of select="@Generator"/></xsl:variable>
<xsl:variable name="CompilerName"><xsl:value-of select="@CompilerName"/></xsl:variable>
<xsl:variable name="OSName"><xsl:value-of select="@OSName"/></xsl:variable>
<xsl:variable name="OSRelease"><xsl:value-of select="@OSRelease"/></xsl:variable>
<xsl:variable name="OSVersion"><xsl:value-of select="@OSVersion"/></xsl:variable>
<xsl:variable name="OSPlatform"><xsl:value-of select="@OSPlatform"/></xsl:variable>
<xsl:variable name="Is64Bits"><xsl:value-of select="@Is64Bits"/></xsl:variable>
<xsl:variable name="VendorString"><xsl:value-of select="@VendorString"/></xsl:variable>
<xsl:variable name="VendorID"><xsl:value-of select="@VendorID"/></xsl:variable>
<xsl:variable name="FamilyID"><xsl:value-of select="@FamilyID"/></xsl:variable>
<xsl:variable name="ModelID"><xsl:value-of select="@ModelID"/></xsl:variable>
<xsl:variable name="ProcessorCacheSize"><xsl:value-of select="@ProcessorCacheSize"/></xsl:variable>
<xsl:variable name="NumberOfLogicalCPU"><xsl:value-of select="@NumberOfLogicalCPU"/></xsl:variable>
<xsl:variable name="NumberOfPhysicalCPU"><xsl:value-of select="@NumberOfPhysicalCPU"/></xsl:variable>
<xsl:variable name="TotalVirtualMemory"><xsl:value-of select="@TotalVirtualMemory"/></xsl:variable>
<xsl:variable name="TotalPhysicalMemory"><xsl:value-of select="@TotalPhysicalMemory"/></xsl:variable>
<xsl:variable name="LogicalProcessorsPerPhysical"><xsl:value-of select="@LogicalProcessorsPerPhysical"/></xsl:variable>
<xsl:variable name="ProcessorClockFrequency"><xsl:value-of select="@ProcessorClockFrequency"/></xsl:variable>
<properties>
<property name="BuildName" value="{$BuildName}" />
<property name="BuildStamp" value="{$BuildStamp}" />
<property name="Name" value="{$Name}" />
<property name="Generator" value="{$Generator}" />
<property name="CompilerName" value="{$CompilerName}" />
<property name="OSName" value="{$OSName}" />
<property name="Hostname" value="{$Hostname}" />
<property name="OSRelease" value="{$OSRelease}" />
<property name="OSVersion" value="{$OSVersion}" />
<property name="OSPlatform" value="{$OSPlatform}" />
<property name="Is64Bits" value="{$Is64Bits}" />
<property name="VendorString" value="{$VendorString}" />
<property name="VendorID" value="{$VendorID}" />
<property name="FamilyID" value="{$FamilyID}" />
<property name="ModelID" value="{$ModelID}" />
<property name="ProcessorCacheSize" value="{$ProcessorCacheSize}" />
<property name="NumberOfLogicalCPU" value="{$NumberOfLogicalCPU}" />
<property name="NumberOfPhysicalCPU" value="{$NumberOfPhysicalCPU}" />
<property name="TotalVirtualMemory" value="{$TotalVirtualMemory}" />
<property name="TotalPhysicalMemory" value="{$TotalPhysicalMemory}" />
<property name="LogicalProcessorsPerPhysical" value="{$LogicalProcessorsPerPhysical}" />
<property name="ProcessorClockFrequency" value="{$ProcessorClockFrequency}" />
</properties>
<xsl:apply-templates select="Testing/Test"/>
<system-out>
BuildName: <xsl:value-of select="$BuildName" />
BuildStamp: <xsl:value-of select="$BuildStamp" />
Name: <xsl:value-of select="$Name" />
Generator: <xsl:value-of select="$Generator" />
CompilerName: <xsl:value-of select="$CompilerName" />
OSName: <xsl:value-of select="$OSName" />
Hostname: <xsl:value-of select="$Hostname" />
OSRelease: <xsl:value-of select="$OSRelease" />
OSVersion: <xsl:value-of select="$OSVersion" />
OSPlatform: <xsl:value-of select="$OSPlatform" />
Is64Bits: <xsl:value-of select="$Is64Bits" />
VendorString: <xsl:value-of select="$VendorString" />
VendorID: <xsl:value-of select="$VendorID" />
FamilyID: <xsl:value-of select="$FamilyID" />
ModelID: <xsl:value-of select="$ModelID" />
ProcessorCacheSize: <xsl:value-of select="$ProcessorCacheSize" />
NumberOfLogicalCPU: <xsl:value-of select="$NumberOfLogicalCPU" />
NumberOfPhysicalCPU: <xsl:value-of select="$NumberOfPhysicalCPU" />
TotalVirtualMemory: <xsl:value-of select="$TotalVirtualMemory" />
TotalPhysicalMemory: <xsl:value-of select="$TotalPhysicalMemory" />
LogicalProcessorsPerPhysical: <xsl:value-of select="$LogicalProcessorsPerPhysical" />
ProcessorClockFrequency: <xsl:value-of select="$ProcessorClockFrequency" />
</system-out>
</testsuite>
</xsl:template>
<xsl:template match="Testing/Test">
<xsl:variable name="testcasename"><xsl:value-of select= "Name"/></xsl:variable>
<xsl:variable name="testclassname"><xsl:value-of select= " concat('this', substring(Path,2))"/></xsl:variable>
<xsl:variable name="exectime">
<xsl:for-each select="Results/NamedMeasurement">
<xsl:if test="@name = 'Execution Time'">
<xsl:value-of select="."/>
</xsl:if>
</xsl:for-each>
</xsl:variable>
<testcase name="{$testcasename}" classname="{$testclassname}" time="{$exectime}">
<xsl:if test="@Status = 'passed'">
</xsl:if>
<xsl:if test="@Status = 'failed'">
<xsl:variable name="failtype">
<xsl:for-each select="Results/NamedMeasurement">
<xsl:if test="@name = 'Exit Code'">
<xsl:value-of select="."/>
</xsl:if>
</xsl:for-each>
</xsl:variable>
<xsl:variable name="failcode">
<xsl:for-each select="Results/NamedMeasurement">
<xsl:if test="@name = 'Exit Value'">
<xsl:value-of select="."/>
</xsl:if>
</xsl:for-each>
</xsl:variable>
<failure message="{$failtype} ({$failcode})"><xsl:value-of select="Results/Measurement/Value/text()" /></failure>
</xsl:if>
<xsl:if test="@Status = 'notrun'">
<skipped><xsl:value-of select="Results/Measurement/Value/text()" /></skipped>
</xsl:if>
</testcase>
</xsl:template>
</xsl:stylesheet>

12
ci/README.md Normal file
View File

@@ -0,0 +1,12 @@
## Eigen CI infrastructure
Eigen's CI infrastructure uses three stages:
1. A `checkformat` stage to verify MRs satisfy proper formatting style, as
defined by `clang-format`.
2. A `build` stage to build the unit-tests.
3. A `test` stage to run the unit-tests.
For merge requests, only a small subset of tests are built/run, and only on a
small subset of platforms. This is to reduce our overall testing infrastructure
resource usage. In addition, we have nightly jobs that build and run the full
suite of tests on most officially supported platforms.

View File

@@ -0,0 +1,331 @@
# Base configuration for linux cross-compilation.
.build:linux:cross:
extends: .common:linux:cross
stage: build
variables:
EIGEN_CI_BUILD_TARGET: buildtests
script:
- . ci/scripts/build.linux.script.sh
tags:
- linux
- eigen-runner
- cross-compiler
rules:
- if: $CI_PIPELINE_SOURCE == "schedule" && $CI_PROJECT_NAMESPACE == "libeigen"
- if: $CI_PIPELINE_SOURCE == "web" && $CI_PROJECT_NAMESPACE == "libeigen"
- if: $CI_PIPELINE_SOURCE == "push" && $CI_PROJECT_NAMESPACE == "libeigen"
cache:
key: "$CI_JOB_NAME_SLUG-$CI_COMMIT_REF_SLUG-BUILD"
paths:
- ${EIGEN_CI_BUILDDIR}/
######## x86-64 ################################################################
.build:linux:cross:x86-64:
extends: .build:linux:cross
variables:
EIGEN_CI_TARGET_ARCH: x86_64
EIGEN_CI_CROSS_TARGET_TRIPLE: x86_64-linux-gnu
# GCC-6 (minimum on Ubuntu 18.04)
build:linux:cross:x86-64:gcc-6:default:
extends: .build:linux:cross:x86-64
image: ubuntu:18.04
variables:
EIGEN_CI_C_COMPILER: gcc-6
EIGEN_CI_CXX_COMPILER: g++-6
EIGEN_CI_CROSS_INSTALL: g++-6-x86-64-linux-gnu
EIGEN_CI_CROSS_C_COMPILER: x86_64-linux-gnu-gcc-6
EIGEN_CI_CROSS_CXX_COMPILER: x86_64-linux-gnu-g++-6
# GCC-10 (stable recent version)
build:linux:cross:x86-64:gcc-10:default:
extends: .build:linux:cross:x86-64
variables:
EIGEN_CI_C_COMPILER: gcc-10
EIGEN_CI_CXX_COMPILER: g++-10
EIGEN_CI_CROSS_INSTALL: g++-10-x86-64-linux-gnu
EIGEN_CI_CROSS_C_COMPILER: x86_64-linux-gnu-gcc-10
EIGEN_CI_CROSS_CXX_COMPILER: x86_64-linux-gnu-g++-10
build:linux:cross:x86-64:gcc-10:cxx03:
extends: build:linux:cross:x86-64:gcc-10:default
variables:
EIGEN_CI_ADDITIONAL_ARGS: "-DEIGEN_TEST_CXX11=off"
build:linux:cross:x86-64:gcc-10:avx:
extends: build:linux:cross:x86-64:gcc-10:default
variables:
EIGEN_CI_ADDITIONAL_ARGS: "-DEIGEN_TEST_AVX=on"
build:linux:cross:x86-64:gcc-10:avx2:
extends: build:linux:cross:x86-64:gcc-10:default
variables:
EIGEN_CI_ADDITIONAL_ARGS: "-DEIGEN_TEST_AVX2=on"
build:linux:cross:x86-64:gcc-10:avx512dq:
extends: build:linux:cross:x86-64:gcc-10:default
variables:
EIGEN_CI_ADDITIONAL_ARGS: "-DEIGEN_TEST_AVX512DQ=on"
# Clang-6 (minimum on Ubuntu 20.04)
build:linux:cross:x86-64:clang-6:default:
extends: .build:linux:cross:x86-64
image: ubuntu:20.04
variables:
EIGEN_CI_INSTALL: g++-8 clang-6.0 lld-6.0
EIGEN_CI_C_COMPILER: clang-6.0
EIGEN_CI_CXX_COMPILER: clang++-6.0
EIGEN_CI_CROSS_INSTALL: g++-8-x86-64-linux-gnu clang-6.0 lld-6.0
EIGEN_CI_ADDITIONAL_ARGS: -DCMAKE_EXE_LINKER_FLAGS=-fuse-ld=lld-6.0
# Clang-12 (stable recent version)
build:linux:cross:x86-64:clang-12:default:
extends: .build:linux:cross:x86-64
variables:
EIGEN_CI_INSTALL: clang-12
EIGEN_CI_C_COMPILER: clang-12
EIGEN_CI_CXX_COMPILER: clang++-12
EIGEN_CI_CROSS_INSTALL: g++-10-x86-64-linux-gnu clang-12
build:linux:cross:x86-64:clang-12:avx:
extends: build:linux:cross:x86-64:clang-12:default
variables:
EIGEN_CI_ADDITIONAL_ARGS: "-DEIGEN_TEST_AVX=on"
build:linux:cross:x86-64:clang-12:avx2:
extends: build:linux:cross:x86-64:clang-12:default
variables:
EIGEN_CI_ADDITIONAL_ARGS: "-DEIGEN_TEST_AVX2=on"
build:linux:cross:x86-64:clang-12:avx512dq:
extends: build:linux:cross:x86-64:clang-12:default
variables:
EIGEN_CI_ADDITIONAL_ARGS: "-DEIGEN_TEST_AVX512DQ=on"
build:linux:doc:
extends: .build:linux:cross
variables:
EIGEN_CI_TARGET_ARCH: any
EIGEN_CI_BUILD_TARGET: doc
EIGEN_CI_INSTALL: ca-certificates clang flex python3 bison graphviz
EIGEN_CI_C_COMPILER: clang
EIGEN_CI_CXX_COMPILER: clang++
EIGEN_CI_BEFORE_SCRIPT: ". ci/scripts/build_and_install_doxygen.sh Release_1_13_2"
rules:
- if: $CI_PIPELINE_SOURCE == "push" && $CI_PROJECT_NAMESPACE == "libeigen"
# # Sanitizers (Disabled because ASAN hangs and MSAN requires instrumented libc++)
# build:linux:cross:x86-64:clang-12:default:asan:
# extends: build:linux:cross:x86-64:clang-12:default
# variables:
# EIGEN_CI_ADDITIONAL_ARGS:
# -DEIGEN_TEST_CUSTOM_CXX_FLAGS=-fsanitize=address,undefined
# -DEIGEN_TEST_CUSTOM_LINKER_FLAGS=-fsanitize=address,undefined
# build:linux:cross:x86-64:clang-12:default:msan:
# extends: build:linux:cross:x86-64:clang-12:default
# variables:
# EIGEN_CI_ADDITIONAL_ARGS:
# -DEIGEN_TEST_CUSTOM_CXX_FLAGS=-fsanitize=memory
# -DEIGEN_TEST_CUSTOM_LINKER_FLAGS=-fsanitize=memory
######## CUDA ##################################################################
.build:linux:cuda:
extends: .build:linux:cross:x86-64
variables:
# Additional flags passed to the cuda compiler.
EIGEN_CI_CUDA_CXX_FLAGS: ""
# Compute architectures present in the GitLab CI runners.
EIGEN_CI_CUDA_COMPUTE_ARCH: "50;75"
EIGEN_CI_BUILD_TARGET: buildtests_gpu
EIGEN_CI_TEST_CUDA_CLANG: "off"
EIGEN_CI_ADDITIONAL_ARGS:
-DEIGEN_TEST_CUDA=on
-DEIGEN_CUDA_CXX_FLAGS=${EIGEN_CI_CUDA_CXX_FLAGS}
-DEIGEN_CUDA_COMPUTE_ARCH=${EIGEN_CI_CUDA_COMPUTE_ARCH}
-DEIGEN_TEST_CUDA_CLANG=${EIGEN_CI_TEST_CUDA_CLANG}
tags:
- eigen-runner
- linux
# Requires intel runner for cuda docker image support.
- x86-64
# NVidia no longer provides docker images < CUDA 11.0.3.
# # GCC-7, CUDA-9.2
# build:linux:cuda-9.2:gcc-7:
# extends: .build:linux:cuda
# image: nvidia/cuda:9.2-devel-ubuntu18.04
# variables:
# # cuda 9.2 doesn't support sm_75, so lower to 70.
# EIGEN_CI_CUDA_COMPUTE_ARCH: "50;70"
# EIGEN_CI_C_COMPILER: gcc-7
# EIGEN_CI_CXX_COMPILER: g++-7
# # Clang-10, CUDA-9.2
# build:linux:cuda-9.2:clang-10:
# extends: build:linux:cuda-9.2:gcc-7
# variables:
# EIGEN_CI_C_COMPILER: clang-10
# EIGEN_CI_CXX_COMPILER: clang++-10
# EIGEN_CI_TEST_CUDA_CLANG: "on"
# # GCC-8, CUDA-10.2
# build:linux:cuda-10.2:gcc-8:
# extends: .build:linux:cuda
# image: nvidia/cuda:10.2-devel-ubuntu18.04
# variables:
# EIGEN_CI_C_COMPILER: gcc-8
# EIGEN_CI_CXX_COMPILER: g++-8
# # Clang-10, CUDA-10.2
# build:linux:cuda-10.2:clang-10:
# extends: build:linux:cuda-10.2:gcc-8
# variables:
# EIGEN_CI_C_COMPILER: clang-10
# EIGEN_CI_CXX_COMPILER: clang++-10
# EIGEN_CI_TEST_CUDA_CLANG: "on"
# GCC-10, CUDA-11.4
build:linux:cuda-11.4:gcc-10:
extends: .build:linux:cuda
image: nvidia/cuda:11.4.3-devel-ubuntu20.04
variables:
EIGEN_CI_C_COMPILER: gcc-10
EIGEN_CI_CXX_COMPILER: g++-10
# Clang-12, CUDA-11.4
build:linux:cuda-11.4:clang-12:
extends: build:linux:cuda-11.4:gcc-10
variables:
EIGEN_CI_C_COMPILER: clang-12
EIGEN_CI_CXX_COMPILER: clang++-12
EIGEN_CI_TEST_CUDA_CLANG: "on"
# GCC-10, CUDA-12.2
build:linux:cuda-12.2:gcc-10:
extends: .build:linux:cuda
image: nvidia/cuda:12.2.0-devel-ubuntu20.04
variables:
EIGEN_CI_C_COMPILER: gcc-10
EIGEN_CI_CXX_COMPILER: g++-10
# Clang-12, CUDA-12.2
build:linux:cuda-12.2:clang-12:
extends: build:linux:cuda-12.2:gcc-10
variables:
EIGEN_CI_C_COMPILER: clang-12
EIGEN_CI_CXX_COMPILER: clang++-12
EIGEN_CI_TEST_CUDA_CLANG: "on"
# ######## HIP ###################################################################
# Note: these are currently build-only, until we get an AMD-supported runner.
# ROCm HIP
build:linux:rocm-latest:gcc-10:
extends: .build:linux:cross
image: rocm/dev-ubuntu-24.04:latest
variables:
EIGEN_CI_C_COMPILER: gcc-10
EIGEN_CI_CXX_COMPILER: g++-10
EIGEN_CI_BUILD_TARGET: buildtests_gpu
EIGEN_CI_ADDITIONAL_ARGS: -DEIGEN_TEST_HIP=on
tags:
- eigen-runner
- linux
- x86-64
######## Arm ###################################################################
.build:linux:cross:arm:
extends: .build:linux:cross
variables:
EIGEN_CI_TARGET_ARCH: arm
EIGEN_CI_CROSS_TARGET_TRIPLE: arm-linux-gnueabihf
EIGEN_CI_ADDITIONAL_ARGS: >
-DEIGEN_TEST_CUSTOM_CXX_FLAGS=-march=armv7-a;-mfpu=neon-vfpv4
build:linux:cross:arm:gcc-10:default:
extends: .build:linux:cross:arm
variables:
EIGEN_CI_CROSS_INSTALL: g++-10-arm-linux-gnueabihf
EIGEN_CI_CROSS_C_COMPILER: arm-linux-gnueabihf-gcc-10
EIGEN_CI_CROSS_CXX_COMPILER: arm-linux-gnueabihf-g++-10
build:linux:cross:arm:clang-12:default:
extends: .build:linux:cross:arm
variables:
EIGEN_CI_INSTALL: clang-12
EIGEN_CI_C_COMPILER: clang-12
EIGEN_CI_CXX_COMPILER: clang++-12
EIGEN_CI_CROSS_INSTALL: g++-10-arm-linux-gnueabihf clang-12
######## aarch64 ###############################################################
.build:linux:cross:aarch64:
extends: .build:linux:cross
variables:
EIGEN_CI_TARGET_ARCH: aarch64
EIGEN_CI_CROSS_TARGET_TRIPLE: aarch64-linux-gnu
EIGEN_CI_ADDITIONAL_ARGS: -DEIGEN_TEST_CUSTOM_CXX_FLAGS=-march=armv8.2-a+fp16
build:linux:cross:aarch64:gcc-10:default:
extends: .build:linux:cross:aarch64
variables:
EIGEN_CI_C_COMPILER: gcc-10
EIGEN_CI_CXX_COMPILER: g++-10
EIGEN_CI_CROSS_INSTALL: g++-10-aarch64-linux-gnu
EIGEN_CI_CROSS_C_COMPILER: aarch64-linux-gnu-gcc-10
EIGEN_CI_CROSS_CXX_COMPILER: aarch64-linux-gnu-g++-10
build:linux:cross:aarch64:clang-12:default:
extends: .build:linux:cross:aarch64
variables:
EIGEN_CI_INSTALL: clang-12
EIGEN_CI_C_COMPILER: clang-12
EIGEN_CI_CXX_COMPILER: clang++-12
EIGEN_CI_CROSS_INSTALL: g++-10-aarch64-linux-gnu clang-12
######## ppc64le ###############################################################
.build:linux:cross:ppc64le:
extends: .build:linux:cross
variables:
EIGEN_CI_TARGET_ARCH: ppc64le
EIGEN_CI_CROSS_TARGET_TRIPLE: powerpc64le-linux-gnu
build:linux:cross:ppc64le:gcc-10:default:
extends: .build:linux:cross:ppc64le
variables:
EIGEN_CI_C_COMPILER: gcc-10
EIGEN_CI_CXX_COMPILER: g++-10
EIGEN_CI_CROSS_INSTALL: g++-10-powerpc64le-linux-gnu
EIGEN_CI_CROSS_C_COMPILER: powerpc64le-linux-gnu-gcc-10
EIGEN_CI_CROSS_CXX_COMPILER: powerpc64le-linux-gnu-g++-10
# Temporarily disable MMA until #2457 is resolved.
EIGEN_CI_ADDITIONAL_ARGS: "-DEIGEN_ALTIVEC_DISABLE_MMA=1"
build:linux:cross:ppc64le:clang-12:default:
extends: .build:linux:cross:ppc64le
variables:
EIGEN_CI_INSTALL: clang-12
EIGEN_CI_C_COMPILER: clang-12
EIGEN_CI_CXX_COMPILER: clang++-12
EIGEN_CI_CROSS_INSTALL: g++-10-powerpc64le-linux-gnu clang-12
######## MR Smoke Tests ########################################################
build:linux:cross:x86-64:gcc-10:default:smoketest:
extends: build:linux:cross:x86-64:gcc-10:default
variables:
EIGEN_CI_BUILD_TARGET: buildsmoketests
rules:
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
build:linux:cross:x86-64:clang-12:default:smoketest:
extends: build:linux:cross:x86-64:clang-12:default
variables:
EIGEN_CI_BUILD_TARGET: buildsmoketests
rules:
- if: $CI_PIPELINE_SOURCE == "merge_request_event"

View File

@@ -0,0 +1,114 @@
# Base configuration for windows builds.
.build:windows:
extends: .common:windows
stage: build
variables:
EIGEN_CI_BUILD_TARGET: buildtests
# Reduce overall build size and compile time.
# Note: /d2ReducedOptimizeHugeFunctions is only available in VS 2019.
EIGEN_CI_TEST_CUSTOM_CXX_FLAGS: "/d2ReducedOptimizeHugeFunctions;/DEIGEN_STRONG_INLINE=inline;/Os"
script:
- ./ci/scripts/build.windows.script.ps1
tags:
- eigen-runner
- windows
- x86-64
rules:
- if: $CI_PIPELINE_SOURCE == "schedule" && $CI_PROJECT_NAMESPACE == "libeigen"
- if: $CI_PIPELINE_SOURCE == "web" && $CI_PROJECT_NAMESPACE == "libeigen"
- if: $CI_PIPELINE_SOURCE == "push" && $CI_PROJECT_NAMESPACE == "libeigen"
cache:
key: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG-BUILD"
paths:
- ${EIGEN_CI_BUILDDIR}/
######### MSVC #################################################################
# MSVC 14.16 (VS 2017)
build:windows:x86-64:msvc-14.16:default:
extends: .build:windows
variables:
EIGEN_CI_MSVC_VER: "14.16"
# Override to remove unsupported /d2ReducedOptimizeHugeFunctions.
EIGEN_CI_TEST_CUSTOM_CXX_FLAGS: "/DEIGEN_STRONG_INLINE=inline;/Os"
# MSVC 14.29 (VS 2019)
build:windows:x86-64:msvc-14.29:default:
extends: .build:windows
variables:
EIGEN_CI_MSVC_VER: "14.29"
build:windows:x86-64:msvc-14.29:cxx03:
extends: build:windows:x86-64:msvc-14.29:default
variables:
EIGEN_CI_ADDITIONAL_ARGS: "-DEIGEN_TEST_CXX11=off"
build:windows:x86-64:msvc-14.29:avx2:
extends: build:windows:x86-64:msvc-14.29:default
variables:
EIGEN_CI_ADDITIONAL_ARGS: "-DEIGEN_TEST_AVX2=on"
build:windows:x86-64:msvc-14.29:avx512dq:
extends: build:windows:x86-64:msvc-14.29:default
variables:
EIGEN_CI_ADDITIONAL_ARGS: "-DEIGEN_TEST_AVX512DQ=on"
######### MSVC + CUDA ##########################################################
.build:windows:cuda:
extends: .build:windows
variables:
# Compute architectures present in the GitLab CI runners.
EIGEN_CI_CUDA_COMPUTE_ARCH: "50;75"
EIGEN_CI_BUILD_TARGET: buildtests_gpu
EIGEN_CI_ADDITIONAL_ARGS:
-DEIGEN_TEST_CUDA=on
-DEIGEN_CUDA_COMPUTE_ARCH=${EIGEN_CI_CUDA_COMPUTE_ARCH}
tags:
- eigen-runner
- windows
- x86-64
- cuda
# The CUDA 9.2 compiler crashes with an internal error.
# # MSVC 14.16 + CUDA 9.2
# build:windows:x86-64:cuda-9.2:msvc-14.16:
# extends: .build:windows:cuda
# variables:
# # CUDA 9.2 doesn't support sm_75.
# EIGEN_CI_CUDA_COMPUTE_ARCH: "50;70"
# # CUDA 9.2 only supports up to VS 2017.
# EIGEN_CI_MSVC_VER: "14.16"
# EIGEN_CI_TEST_CUSTOM_CXX_FLAGS: "/DEIGEN_STRONG_INLINE=inline;/Os"
# EIGEN_CI_BEFORE_SCRIPT: $$env:CUDA_PATH=$$env:CUDA_PATH_V9_2
# MSVC 14.29 + CUDA 10.2
build:windows:x86-64:cuda-10.2:msvc-14.29:
extends: .build:windows:cuda
variables:
EIGEN_CI_MSVC_VER: "14.29"
EIGEN_CI_BEFORE_SCRIPT: $$env:CUDA_PATH=$$env:CUDA_PATH_V10_2
# MSVC 14.29 + CUDA 11.4
build:windows:x86-64:cuda-11.4:msvc-14.29:
extends: .build:windows:cuda
variables:
EIGEN_CI_MSVC_VER: "14.29"
EIGEN_CI_BEFORE_SCRIPT: $$env:CUDA_PATH=$$env:CUDA_PATH_V11_4
######## MR Smoke Tests ########################################################
# MSVC 14.29 64-bit (VS 2019)
build:windows:x86-64:msvc-14.29:avx512dq:smoketest:
extends: build:windows:x86-64:msvc-14.29:avx512dq
variables:
EIGEN_CI_BUILD_TARGET: buildsmoketests
rules:
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
# MSVC 14.29 32-bit (VS 2019)
build:windows:x86:msvc-14.29:avx512dq:smoketest:
extends: build:windows:x86-64:msvc-14.29:avx512dq:smoketest
variables:
EIGEN_CI_MSVC_ARCH: "x86"
rules:
- if: $CI_PIPELINE_SOURCE == "merge_request_event"

View File

@@ -0,0 +1,10 @@
checkformat:clangformat:
stage: checkformat
image: alpine:3.19
only:
- merge_requests
allow_failure: true
before_script:
- apk add --no-cache git clang17-extra-tools python3
script:
- git clang-format --diff --commit ${CI_MERGE_REQUEST_DIFF_BASE_SHA}

40
ci/common.gitlab-ci.yml Normal file
View File

@@ -0,0 +1,40 @@
# Base configuration for linux builds and tests.
.common:linux:cross:
image: ubuntu:20.04
variables:
EIGEN_CI_TARGET_ARCH: ""
EIGEN_CI_ADDITIONAL_ARGS: ""
# If host matches target, use the following:
EIGEN_CI_C_COMPILER: ""
EIGEN_CI_CXX_COMPILER: ""
EIGEN_CI_INSTALL: "${EIGEN_CI_C_COMPILER} ${EIGEN_CI_CXX_COMPILER}"
# If host does not match the target, use the following:
EIGEN_CI_CROSS_TARGET_TRIPLE: ""
EIGEN_CI_CROSS_C_COMPILER: ${EIGEN_CI_C_COMPILER}
EIGEN_CI_CROSS_CXX_COMPILER: ${EIGEN_CI_CXX_COMPILER}
EIGEN_CI_CROSS_INSTALL: "${EIGEN_CI_CROSS_C_COMPILER} ${EIGEN_CI_CROSS_CXX_COMPILER}"
before_script:
# Call script in current shell - it sets up some environment variables.
- . ci/scripts/common.linux.before_script.sh
artifacts:
when: always
name: "$CI_JOB_NAME_SLUG-$CI_COMMIT_REF_SLUG"
paths:
- ${EIGEN_CI_BUILDDIR}/
expire_in: 5 days
# Base configuration for Windows builds and tests.
.common:windows:
variables:
EIGEN_CI_MSVC_ARCH: x64
EIGEN_CI_MSVC_VER: "14.29"
EIGEN_CI_ADDITIONAL_ARGS: ""
EIGEN_CI_BEFORE_SCRIPT: ""
before_script:
- . ci/scripts/common.windows.before_script.ps1
artifacts:
when: always
name: "$CI_JOB_NAME_SLUG-$CI_COMMIT_REF_NAME"
paths:
- ${EIGEN_CI_BUILDDIR}/
expire_in: 5 days

43
ci/deploy.gitlab-ci.yml Normal file
View File

@@ -0,0 +1,43 @@
# Push a nightly tag if the pipeline succeeded.
deploy:tag:nightly:
stage: deploy
image: alpine:edge
dependencies: []
before_script:
- apk add git
script:
- git tag -f nightly $CI_COMMIT_SHORT_SHA
- git push -f $EIGEN_CI_GIT_PUSH_URL tag nightly
tags:
- linux
- eigen-runner
rules:
- if: $CI_PIPELINE_SOURCE == "schedule" && $CI_PROJECT_NAMESPACE == "libeigen" && $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
- if: $CI_PIPELINE_SOURCE == "web" && $CI_PROJECT_NAMESPACE == "libeigen" && $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
# Upload docs if pipeline succeeded.
deploy:docs:
stage: deploy
image: busybox
dependencies: [ build:linux:doc ]
variables:
PAGES_PREFIX: docs-nightly
script:
- echo "Deploying site to $CI_PAGES_URL"
- mv ${EIGEN_CI_BUILDDIR}/doc/html public
pages:
path_prefix: $PAGES_PREFIX
expire_in: never
artifacts:
name: "$CI_JOB_NAME_SLUG-$CI_COMMIT_REF_SLUG"
paths:
- public
tags:
- eigen-runner
- linux
rules:
- if: $CI_PIPELINE_SOURCE == "schedule" && $CI_PROJECT_NAMESPACE == "libeigen"
- if: $CI_PIPELINE_SOURCE == "web" && $CI_PROJECT_NAMESPACE == "libeigen"
- if: $CI_PIPELINE_SOURCE == "push" && $CI_PROJECT_NAMESPACE == "libeigen"
variables:
PAGES_PREFIX: docs-$CI_COMMIT_REF_NAME

View File

@@ -0,0 +1,31 @@
#!/bin/bash
set -x
# Create and enter build directory.
rootdir=`pwd`
mkdir -p ${EIGEN_CI_BUILDDIR}
cd ${EIGEN_CI_BUILDDIR}
# Configure build.
cmake -G Ninja \
-DCMAKE_CXX_COMPILER=${EIGEN_CI_CXX_COMPILER} \
-DCMAKE_C_COMPILER=${EIGEN_CI_C_COMPILER} \
-DCMAKE_CXX_COMPILER_TARGET=${EIGEN_CI_CXX_COMPILER_TARGET} \
${EIGEN_CI_ADDITIONAL_ARGS} ${rootdir}
target=""
if [[ ${EIGEN_CI_BUILD_TARGET} ]]; then
target="--target ${EIGEN_CI_BUILD_TARGET}"
fi
# Builds (particularly gcc) sometimes get killed, potentially when running
# out of resources. In that case, keep trying to build the remaining
# targets (k0), then try to build again with a single thread (j1) to minimize
# resource use.
cmake --build . ${target} -- -k0 || cmake --build . ${target} -- -k0 -j1
# Return to root directory.
cd ${rootdir}
set +x

View File

@@ -0,0 +1,44 @@
# Find Visual Studio installation directory.
$VS_INSTALL_DIR = &"${Env:ProgramFiles(x86)}\Microsoft Visual Studio\Installer\vswhere.exe" -latest -property installationPath
# Run VCVarsAll.bat initialization script and extract environment variables.
# http://allen-mack.blogspot.com/2008/03/replace-visual-studio-command-prompt.html
cmd.exe /c "`"${VS_INSTALL_DIR}\VC\Auxiliary\Build\vcvarsall.bat`" $EIGEN_CI_MSVC_ARCH -vcvars_ver=$EIGEN_CI_MSVC_VER & set" |
foreach {
if ($_ -match "=") {
$v = $_.split("="); set-item -force -path "ENV:\$($v[0])" -value "$($v[1])"
}
}
# Create and enter build directory.
$rootdir = Get-Location
if (-Not (Test-Path ${EIGEN_CI_BUILDDIR})) {
mkdir $EIGEN_CI_BUILDDIR
}
cd $EIGEN_CI_BUILDDIR
# We need to split EIGEN_CI_ADDITIONAL_ARGS, otherwise they are interpreted
# as a single argument. Split by space, unless double-quoted.
$split_args = [regex]::Split(${EIGEN_CI_ADDITIONAL_ARGS}, ' (?=(?:[^"]|"[^"]*")*$)' )
# Configure build.
cmake -G Ninja -DCMAKE_BUILD_TYPE=MinSizeRel `
-DEIGEN_TEST_CUSTOM_CXX_FLAGS="${EIGEN_CI_TEST_CUSTOM_CXX_FLAGS}" `
${split_args} "${rootdir}"
$target = ""
if (${EIGEN_CI_BUILD_TARGET}) {
$target = "--target ${EIGEN_CI_BUILD_TARGET}"
}
# Windows builds sometimes fail due heap errors. In that case, try
# building the rest, then try to build again with a single thread.
cmake --build . ${target} -- -k0 || cmake --build . ${target} -- -k0 -j1
$success = $LASTEXITCODE
# Return to root directory.
cd ${rootdir}
# Explicitly propagate exit code to indicate pass/failure of build command.
if($success -ne 0) { Exit $success }

View File

@@ -0,0 +1,6 @@
git clone --depth 1 --branch $1 https://github.com/doxygen/doxygen.git
cmake -B doxygen/.build -G Ninja \
-DCMAKE_CXX_COMPILER=${EIGEN_CI_CXX_COMPILER} \
-DCMAKE_C_COMPILER=${EIGEN_CI_C_COMPILER} \
doxygen
cmake --build doxygen/.build -t install

View File

@@ -0,0 +1,46 @@
#!/bin/bash
set -x
echo "Running ${CI_JOB_NAME}"
# Get architecture and display CI configuration.
export ARCH=`uname -m`
export NPROC=`nproc`
echo "arch=$ARCH, target=${EIGEN_CI_TARGET_ARCH}"
echo "Processors: ${NPROC}"
echo "CI Variables:"
export | grep EIGEN
# Set noninteractive, otherwise tzdata may be installed and prompt for a
# geographical region.
export DEBIAN_FRONTEND=noninteractive
apt-get update -y > /dev/null
apt-get install -y --no-install-recommends ninja-build cmake git > /dev/null
# Install required dependencies and set up compilers.
# These are required even for testing to ensure that dynamic runtime libraries
# are available.
if [[ "$ARCH" == "${EIGEN_CI_TARGET_ARCH}" || "${EIGEN_CI_TARGET_ARCH}" == "any" ]]; then
apt-get install -y --no-install-recommends ${EIGEN_CI_INSTALL} > /dev/null;
export EIGEN_CI_CXX_IMPLICIT_INCLUDE_DIRECTORIES="";
export EIGEN_CI_CXX_COMPILER_TARGET="";
else
apt-get install -y --no-install-recommends ${EIGEN_CI_CROSS_INSTALL} > /dev/null;
export EIGEN_CI_C_COMPILER=${EIGEN_CI_CROSS_C_COMPILER};
export EIGEN_CI_CXX_COMPILER=${EIGEN_CI_CROSS_CXX_COMPILER};
export EIGEN_CI_CXX_COMPILER_TARGET=${EIGEN_CI_CROSS_TARGET_TRIPLE};
# Tell the compiler where to find headers and libraries if using clang.
# NOTE: this breaks GCC since it messes with include path order
# (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70129)
if [[ "${EIGEN_CI_CROSS_CXX_COMPILER}" == *"clang"* ]]; then
export CPLUS_INCLUDE_PATH="/usr/${EIGEN_CI_CROSS_TARGET_TRIPLE}/include";
export LIBRARY_PATH="/usr/${EIGEN_CI_CROSS_TARGET_TRIPLE}/lib64";
fi
fi
echo "Compilers: ${EIGEN_CI_C_COMPILER} ${EIGEN_CI_CXX_COMPILER}"
if [ -n "$EIGEN_CI_BEFORE_SCRIPT" ]; then eval "$EIGEN_CI_BEFORE_SCRIPT"; fi
set +x

View File

@@ -0,0 +1,8 @@
echo "Running ${CI_JOB_NAME}"
# Print configuration variables.
Get-Variable EIGEN* | Format-Table -Wrap
Get-Variable CMAKE* | Format-Table -Wrap
# Run a custom before-script command.
if ("${EIGEN_CI_BEFORE_SCRIPT}") { Invoke-Expression -Command "${EIGEN_CI_BEFORE_SCRIPT}" }

View File

@@ -0,0 +1,19 @@
#!/bin/bash
set -x
# Enter build directory.
rootdir=`pwd`
cd ${EIGEN_CI_BUILDDIR}
# Install xml processor.
apt-get update -y
apt-get install --no-install-recommends -y xsltproc
# Generate test results.
xsltproc ${rootdir}/ci/CTest2JUnit.xsl Testing/`head -n 1 < Testing/TAG`/Test.xml > "JUnitTestResults_$CI_JOB_ID.xml"
# Return to root directory.
cd ${rootdir}
set +x

View File

@@ -0,0 +1,31 @@
#!/bin/bash
set -x
# Enter build directory.
rootdir=`pwd`
cd ${EIGEN_CI_BUILDDIR}
target=""
if [[ ${EIGEN_CI_CTEST_REGEX} ]]; then
target="-R ${EIGEN_CI_CTEST_REGEX}"
elif [[ ${EIGEN_CI_CTEST_LABEL} ]]; then
target="-L ${EIGEN_CI_CTEST_LABEL}"
fi
# Repeat tests up to three times to ignore flakes. Do not re-run with -T test,
# otherwise we lose test results for those that passed.
# Note: starting with CMake 3.17, we can use --repeat until-pass:3, but we have
# no way of easily installing this on ppc64le.
ctest ${EIGEN_CI_CTEST_ARGS} --parallel ${NPROC} \
--output-on-failure --no-compress-output \
--build-no-clean -T test ${target} || \
ctest ${EIGEN_CI_CTEST_ARGS} --parallel ${NPROC} \
--output-on-failure --no-compress-output --rerun-failed || \
ctest ${EIGEN_CI_CTEST_ARGS} --parallel ${NPROC} \
--output-on-failure --no-compress-output --rerun-failed
# Return to root directory.
cd ${rootdir}
set +x

View File

@@ -0,0 +1,17 @@
# Change to build directory.
$rootdir = Get-Location
cd ${EIGEN_CI_BUILDDIR}
# Determine the appropriate test tag and results file.
$TEST_TAG = Get-Content Testing\TAG | select -first 1
# PowerShell equivalent to xsltproc:
$XSL_FILE = Resolve-Path "..\ci\CTest2JUnit.xsl"
$INPUT_FILE = Resolve-Path Testing\$TEST_TAG\Test.xml
$OUTPUT_FILE = Join-Path -Path $pwd -ChildPath JUnitTestResults_$CI_JOB_ID.xml
$xslt = New-Object System.Xml.Xsl.XslCompiledTransform;
$xslt.Load($XSL_FILE)
$xslt.Transform($INPUT_FILE,$OUTPUT_FILE)
# Return to root directory.
cd ${rootdir}

View File

@@ -0,0 +1,30 @@
# Change to build directory.
$rootdir = Get-Location
cd $EIGEN_CI_BUILDDIR
# Determine number of processors for parallel tests.
$NPROC=${Env:NUMBER_OF_PROCESSORS}
# Set target based on regex or label.
$target = ""
if (${EIGEN_CI_CTEST_REGEX}) {
$target = "-R","${EIGEN_CI_CTEST_REGEX}"
} elseif (${EIGEN_CI_CTEST_LABEL}) {
$target = "-L","${EIGEN_CI_CTEST_LABEL}"
}
# Repeat tests up to three times to ignore flakes. Do not re-run with -T test,
# otherwise we lose test results for those that passed.
# Note: starting with CMake 3.17, we can use --repeat until-pass:3, but we have
# no way of easily installing this on ppc64le.
ctest $EIGEN_CI_CTEST_ARGS -j$NPROC --output-on-failure --no-compress-output --build-no-clean -T test $target || `
ctest $EIGEN_CI_CTEST_ARGS -j$NPROC --output-on-failure --no-compress-output --rerun-failed || `
ctest $EIGEN_CI_CTEST_ARGS -j$NPROC --output-on-failure --no-compress-output --rerun-failed
$success = $LASTEXITCODE
# Return to root directory.
cd ${rootdir}
# Explicitly propagate exit code to indicate pass/failure of test command.
if($success -ne 0) { Exit $success }

14
ci/scripts/vars.linux.sh Normal file
View File

@@ -0,0 +1,14 @@
#!/bin/bash
# Initialize default variables used by the CI.
export EIGEN_CI_ADDITIONAL_ARGS=""
export EIGEN_CI_BEFORE_SCRIPT=""
export EIGEN_CI_BUILD_TARGET="buildtests"
export EIGEN_CI_BUILDDIR=".build"
export EIGEN_CI_C_COMPILER="clang"
export EIGEN_CI_CXX_COMPILER="clang++"
export EIGEN_CI_TEST_CUSTOM_CXX_FLAGS=""
export EIGEN_CI_CTEST_LABEL="Official"
export EIGEN_CI_CTEST_REGEX=""
export EIGEN_CI_CTEST_ARGS=""

View File

@@ -0,0 +1,11 @@
# Initialize default variables used by the CI.
$EIGEN_CI_ADDITIONAL_ARGS = ""
$EIGEN_CI_BEFORE_SCRIPT = ""
$EIGEN_CI_BUILD_TARGET = "buildtests"
$EIGEN_CI_BUILDDIR = ".build"
$EIGEN_CI_MSVC_ARCH = "x64"
$EIGEN_CI_MSVC_VER = "14.29"
$EIGEN_CI_TEST_CUSTOM_CXX_FLAGS = "/d2ReducedOptimizeHugeFunctions /DEIGEN_STRONG_INLINE=inline /Os"
$EIGEN_CI_CTEST_LABEL = "Official"
$EIGEN_CI_CTEST_REGEX = ""
$EIGEN_CI_CTEST_ARGS = ""

107
ci/smoketests.gitlab-ci.yml Normal file
View File

@@ -0,0 +1,107 @@
.buildsmoketests:linux:base:
stage: buildsmoketests
image: ubuntu:18.04
before_script:
- apt-get update -y
- apt-get install -y --no-install-recommends software-properties-common
- add-apt-repository -y ppa:ubuntu-toolchain-r/test
- apt-get update
- apt-get install --no-install-recommends -y ${EIGEN_CI_CXX_COMPILER}
${EIGEN_CI_CC_COMPILER} cmake ninja-build
script:
- mkdir -p ${BUILDDIR} && cd ${BUILDDIR}
- CXX=${EIGEN_CI_CXX_COMPILER} CC=${EIGEN_CI_CC_COMPILER} cmake -G
${EIGEN_CI_CMAKE_GENEATOR} -DEIGEN_TEST_CXX11=${EIGEN_TEST_CXX11}
${EIGEN_CI_ADDITIONAL_ARGS} ..
- cmake --build . --target buildsmoketests
artifacts:
name: "$CI_JOB_NAME-$CI_COMMIT_REF_NAME"
paths:
- ${BUILDDIR}/
expire_in: 5 days
only:
- merge_requests
buildsmoketests:x86-64:linux:gcc-10:cxx11-off:
extends: .buildsmoketests:linux:base
variables:
EIGEN_CI_CXX_COMPILER: "g++-10"
EIGEN_CI_CC_COMPILER: "gcc-10"
EIGEN_TEST_CXX11: "off"
buildsmoketests:x86-64:linux:gcc-10:cxx11-on:
extends: .buildsmoketests:linux:base
variables:
EIGEN_CI_CXX_COMPILER: "g++-10"
EIGEN_CI_CC_COMPILER: "gcc-10"
EIGEN_TEST_CXX11: "on"
buildsmoketests:x86-64:linux:clang-10:cxx11-off:
extends: .buildsmoketests:linux:base
variables:
EIGEN_CI_CXX_COMPILER: "clang++-10"
EIGEN_CI_CC_COMPILER: "clang-10"
EIGEN_TEST_CXX11: "off"
buildsmoketests:x86-64:linux:clang-10:cxx11-on:
extends: .buildsmoketests:linux:base
variables:
EIGEN_CI_CXX_COMPILER: "clang++-10"
EIGEN_CI_CC_COMPILER: "clang-10"
EIGEN_TEST_CXX11: "on"
.smoketests:linux:base:
stage: smoketests
image: ubuntu:18.04
before_script:
- apt-get update -y
- apt-get install -y --no-install-recommends software-properties-common
- add-apt-repository -y ppa:ubuntu-toolchain-r/test
- apt-get update
- apt-get install --no-install-recommends -y ${EIGEN_CI_CXX_COMPILER}
${EIGEN_CI_CC_COMPILER} cmake ninja-build xsltproc
script:
- export CXX=${EIGEN_CI_CXX_COMPILER}
- export CC=${EIGEN_CI_CC_COMPILER}
- cd ${BUILDDIR} && ctest --output-on-failure --no-compress-output
--build-no-clean -T test -L smoketest
after_script:
- apt-get update -y
- apt-get install --no-install-recommends -y xsltproc
- cd ${BUILDDIR}
- xsltproc ../ci/CTest2JUnit.xsl Testing/`head -n 1 < Testing/TAG`/Test.xml > "JUnitTestResults_$CI_JOB_ID.xml"
artifacts:
reports:
junit:
- ${BUILDDIR}/JUnitTestResults_$CI_JOB_ID.xml
expire_in: 5 days
only:
- merge_requests
smoketests:x86-64:linux:gcc-10:cxx11-off:
extends: .smoketests:linux:base
variables:
EIGEN_CI_CXX_COMPILER: g++-10
EIGEN_CI_CC_COMPILER: gcc-10
needs: [ "buildsmoketests:x86-64:linux:gcc-10:cxx11-off" ]
smoketests:x86-64:linux:gcc-10:cxx11-on:
extends: .smoketests:linux:base
variables:
EIGEN_CI_CXX_COMPILER: g++-10
EIGEN_CI_CC_COMPILER: gcc-10
needs: [ "buildsmoketests:x86-64:linux:gcc-10:cxx11-on" ]
smoketests:x86-64:linux:clang-10:cxx11-off:
extends: .smoketests:linux:base
variables:
EIGEN_CI_CXX_COMPILER: clang++-10
EIGEN_CI_CC_COMPILER: clang-10
needs: [ "buildsmoketests:x86-64:linux:clang-10:cxx11-off" ]
smoketests:x86-64:linux:clang-10:cxx11-on:
extends: .smoketests:linux:base
variables:
EIGEN_CI_CXX_COMPILER: clang++-10
EIGEN_CI_CC_COMPILER: clang-10
needs: [ "buildsmoketests:x86-64:linux:clang-10:cxx11-on" ]

451
ci/test.linux.gitlab-ci.yml Normal file
View File

@@ -0,0 +1,451 @@
.test:linux:
extends: .common:linux:cross
stage: test
script:
- . ci/scripts/test.linux.script.sh
after_script:
- . ci/scripts/test.linux.after_script.sh
rules:
- if: $CI_PIPELINE_SOURCE == "schedule" && $CI_PROJECT_NAMESPACE == "libeigen"
- if: $CI_PIPELINE_SOURCE == "web" && $CI_PROJECT_NAMESPACE == "libeigen"
- if: $CI_PIPELINE_SOURCE == "push" && $CI_PROJECT_NAMESPACE == "libeigen"
##### x86-64 ###################################################################
.test:linux:x86-64:
extends: .test:linux
variables:
EIGEN_CI_TARGET_ARCH: x86_64
EIGEN_CI_CROSS_TARGET_TRIPLE: x86_64-linux-gnu
tags:
- eigen-runner
- linux
- x86-64
# GCC-6 (minimum on Ubuntu 18.04)
.test:linux:x86-64:gcc-6:default:
extends: .test:linux:x86-64
image: ubuntu:18.04
needs: [ build:linux:cross:x86-64:gcc-6:default ]
variables:
EIGEN_CI_INSTALL: g++-6
test:linux:x86-64:gcc-6:default:official:
extends: .test:linux:x86-64:gcc-6:default
variables:
EIGEN_CI_CTEST_LABEL: Official
test:linux:x86-64:gcc-6:default:unsupported:
extends: .test:linux:x86-64:gcc-6:default
variables:
EIGEN_CI_CTEST_LABEL: Unsupported
# GCC-10 (modern stable)
.test:linux:x86-64:gcc-10:default:
extends: .test:linux:x86-64
needs: [ build:linux:cross:x86-64:gcc-10:default ]
variables:
EIGEN_CI_INSTALL: g++-10
test:linux:x86-64:gcc-10:default:official:
extends: .test:linux:x86-64:gcc-10:default
variables:
EIGEN_CI_CTEST_LABEL: Official
test:linux:x86-64:gcc-10:default:unsupported:
extends: .test:linux:x86-64:gcc-10:default
variables:
EIGEN_CI_CTEST_LABEL: Unsupported
.test:linux:x86-64:gcc-10:cxx03:
extends: .test:linux:x86-64
needs: [ build:linux:cross:x86-64:gcc-10:cxx03 ]
variables:
EIGEN_CI_INSTALL: g++-10
test:linux:x86-64:gcc-10:cxx03:official:
extends: .test:linux:x86-64:gcc-10:cxx03
variables:
EIGEN_CI_CTEST_LABEL: Official
test:linux:x86-64:gcc-10:cxx03:unsupported:
extends: .test:linux:x86-64:gcc-10:cxx03
variables:
EIGEN_CI_CTEST_LABEL: Unsupported
.test:linux:x86-64:gcc-10:avx:
extends: .test:linux:x86-64
needs: [ build:linux:cross:x86-64:gcc-10:avx ]
variables:
EIGEN_CI_INSTALL: g++-10
test:linux:x86-64:gcc-10:avx:official:
extends: .test:linux:x86-64:gcc-10:avx
variables:
EIGEN_CI_CTEST_LABEL: Official
test:linux:x86-64:gcc-10:avx:unsupported:
extends: .test:linux:x86-64:gcc-10:avx
variables:
EIGEN_CI_CTEST_LABEL: Unsupported
.test:linux:x86-64:gcc-10:avx2:
extends: .test:linux:x86-64
needs: [ build:linux:cross:x86-64:gcc-10:avx2 ]
variables:
EIGEN_CI_INSTALL: g++-10
test:linux:x86-64:gcc-10:avx2:official:
extends: .test:linux:x86-64:gcc-10:avx2
variables:
EIGEN_CI_CTEST_LABEL: Official
test:linux:x86-64:gcc-10:avx2:unsupported:
extends: .test:linux:x86-64:gcc-10:avx2
variables:
EIGEN_CI_CTEST_LABEL: Unsupported
.test:linux:x86-64:gcc-10:avx512dq:
extends: .test:linux:x86-64
needs: [ build:linux:cross:x86-64:gcc-10:avx512dq ]
variables:
EIGEN_CI_INSTALL: g++-10
tags:
- eigen-runner
- linux
- x86-64
- avx512
test:linux:x86-64:gcc-10:avx512dq:official:
extends: .test:linux:x86-64:gcc-10:avx512dq
variables:
EIGEN_CI_CTEST_LABEL: Official
test:linux:x86-64:gcc-10:avx512dq:unsupported:
extends: .test:linux:x86-64:gcc-10:avx512dq
variables:
EIGEN_CI_CTEST_LABEL: Unsupported
# Clang-6 (minimum on Ubuntu 20.04)
.test:linux:x86-64:clang-6:default:
extends: .test:linux:x86-64
image: ubuntu:20.04
needs: [ build:linux:cross:x86-64:clang-6:default ]
variables:
EIGEN_CI_INSTALL: g++-8 clang-6.0 lld-6.0
test:linux:x86-64:clang-6:default:official:
extends: .test:linux:x86-64:clang-6:default
variables:
EIGEN_CI_CTEST_LABEL: Official
test:linux:x86-64:clang-6:default:unsupported:
extends: .test:linux:x86-64:clang-6:default
variables:
EIGEN_CI_CTEST_LABEL: Unsupported
# Clang-12 (modern stable)
.test:linux:x86-64:clang-12:default:
extends: .test:linux:x86-64
needs: [ build:linux:cross:x86-64:clang-12:default ]
variables:
EIGEN_CI_INSTALL: clang-12
test:linux:x86-64:clang-12:default:official:
extends: .test:linux:x86-64:clang-12:default
variables:
EIGEN_CI_CTEST_LABEL: Official
test:linux:x86-64:clang-12:default:unsupported:
extends: .test:linux:x86-64:clang-12:default
variables:
EIGEN_CI_CTEST_LABEL: Unsupported
.test:linux:x86-64:clang-12:avx:
extends: .test:linux:x86-64
needs: [ build:linux:cross:x86-64:clang-12:avx ]
variables:
EIGEN_CI_INSTALL: clang-12
test:linux:x86-64:clang-12:avx:official:
extends: .test:linux:x86-64:clang-12:avx
variables:
EIGEN_CI_CTEST_LABEL: Official
test:linux:x86-64:clang-12:avx:unsupported:
extends: .test:linux:x86-64:clang-12:avx
variables:
EIGEN_CI_CTEST_LABEL: Unsupported
.test:linux:x86-64:clang-12:avx2:
extends: .test:linux:x86-64
needs: [ build:linux:cross:x86-64:clang-12:avx2 ]
variables:
EIGEN_CI_INSTALL: clang-12
test:linux:x86-64:clang-12:avx2:official:
extends: .test:linux:x86-64:clang-12:avx2
variables:
EIGEN_CI_CTEST_LABEL: Official
test:linux:x86-64:clang-12:avx2:unsupported:
extends: .test:linux:x86-64:clang-12:avx2
variables:
EIGEN_CI_CTEST_LABEL: Unsupported
.test:linux:x86-64:clang-12:avx512dq:
extends: .test:linux:x86-64
needs: [ build:linux:cross:x86-64:clang-12:avx512dq ]
variables:
EIGEN_CI_INSTALL: clang-12
tags:
- eigen-runner
- linux
- x86-64
- avx512
test:linux:x86-64:clang-12:avx512dq:official:
extends: .test:linux:x86-64:clang-12:avx512dq
variables:
EIGEN_CI_CTEST_LABEL: Official
test:linux:x86-64:clang-12:avx512dq:unsupported:
extends: .test:linux:x86-64:clang-12:avx512dq
variables:
EIGEN_CI_CTEST_LABEL: Unsupported
##### CUDA #####################################################################
.test:linux:cuda:
extends: .test:linux
allow_failure: true
variables:
EIGEN_CI_CTEST_LABEL: gpu
tags:
- eigen-runner
- linux
- x86-64
- cuda
# NVidia no longer provides docker images < CUDA 11.0.3.
# # GCC-7, CUDA-9.2
# test:linux:cuda-9.2:gcc-7:
# extends: .test:linux:cuda
# image: nvidia/cuda:9.2-devel-ubuntu18.04
# needs: [ build:linux:cuda-9.2:gcc-7 ]
# variables:
# EIGEN_CI_CXX_COMPILER: g++-7
# EIGEN_CI_CC_COMPILER: gcc-7
# # Clang-10, CUDA-9.2
# test:linux:cuda-9.2:clang-10:
# extends: .test:linux:cuda
# image: nvidia/cuda:9.2-devel-ubuntu18.04
# needs: [ build:linux:cuda-9.2:clang-10 ]
# variables:
# EIGEN_CI_CXX_COMPILER: clang++-10
# EIGEN_CI_CC_COMPILER: clang-10
# # GCC-8, CUDA-10.2
# test:linux:cuda-10.2:gcc-8:
# extends: .test:linux:cuda
# image: nvidia/cuda:10.2-devel-ubuntu18.04
# needs: [ build:linux:cuda-10.2:gcc-8 ]
# variables:
# EIGEN_CI_CXX_COMPILER: g++-8
# EIGEN_CI_CC_COMPILER: gcc-8
# # Clang-10, CUDA-10.2
# test:linux:cuda-10.2:clang-10:
# extends: .test:linux:cuda
# image: nvidia/cuda:10.2-devel-ubuntu18.04
# needs: [ build:linux:cuda-10.2:clang-10 ]
# variables:
# EIGEN_CI_CXX_COMPILER: clang++-10
# EIGEN_CI_CC_COMPILER: clang-10
# GCC-10, CUDA-11.4
test:linux:cuda-11.4:gcc-10:
extends: .test:linux:cuda
image: nvidia/cuda:11.4.3-devel-ubuntu20.04
needs: [ build:linux:cuda-11.4:gcc-10 ]
variables:
EIGEN_CI_CXX_COMPILER: g++-10
EIGEN_CI_CC_COMPILER: gcc-10
# Clang-12, CUDA-11.4
test:linux:cuda-11.4:clang-12:
extends: .test:linux:cuda
image: nvidia/cuda:11.4.3-devel-ubuntu20.04
needs: [ build:linux:cuda-11.4:clang-12 ]
variables:
EIGEN_CI_CXX_COMPILER: clang++-12
EIGEN_CI_CC_COMPILER: clang-12
# GCC-10, CUDA-12.2
test:linux:cuda-12.2:gcc-10:
extends: .test:linux:cuda
image: nvidia/cuda:12.2.0-devel-ubuntu20.04
needs: [ build:linux:cuda-12.2:gcc-10 ]
variables:
EIGEN_CI_CXX_COMPILER: g++-10
EIGEN_CI_CC_COMPILER: gcc-10
# Clang-12, CUDA-12.2
test:linux:cuda-12.2:clang-12:
extends: .test:linux:cuda
image: nvidia/cuda:12.2.0-devel-ubuntu20.04
needs: [ build:linux:cuda-12.2:clang-12 ]
variables:
EIGEN_CI_CXX_COMPILER: clang++-12
EIGEN_CI_CC_COMPILER: clang-12
##### arm ######################################################################
.test:linux:arm:
extends: .test:linux
variables:
EIGEN_CI_TARGET_ARCH: arm
EIGEN_CI_CROSS_TARGET_TRIPLE: arm-linux-gnueabihf
# Enable cross-compiled arm binary to run on aarch64.
EIGEN_CI_BEFORE_SCRIPT: "ln -s /usr/arm-linux-gnueabihf/lib/ld-linux-armhf.so.3 /lib/ && export LD_LIBRARY_PATH=/usr/arm-linux-gnueabihf/lib/"
tags:
- eigen-runner
- linux
- aarch64
.test:linux:arm:gcc-10:default:
extends: .test:linux:arm
needs: [ build:linux:cross:arm:gcc-10:default ]
variables:
EIGEN_CI_CROSS_INSTALL: g++-10-arm-linux-gnueabihf
test:linux:arm:gcc-10:default:official:
extends: .test:linux:arm:gcc-10:default
variables:
EIGEN_CI_CTEST_LABEL: Official
test:linux:arm:gcc-10:default:unsupported:
extends: .test:linux:arm:gcc-10:default
variables:
EIGEN_CI_CTEST_LABEL: Unsupported
.test:linux:arm:clang-12:default:
extends: .test:linux:arm
needs: [ build:linux:cross:arm:clang-12:default ]
variables:
EIGEN_CI_CROSS_INSTALL: g++-10-arm-linux-gnueabihf clang-12
test:linux:arm:clang-12:default:official:
extends: .test:linux:arm:clang-12:default
variables:
EIGEN_CI_CTEST_LABEL: Official
test:linux:arm:clang-12:default:unsupported:
extends: .test:linux:arm:clang-12:default
variables:
EIGEN_CI_CTEST_LABEL: Unsupported
##### aarch64 ##################################################################
.test:linux:aarch64:
extends: .test:linux
variables:
EIGEN_CI_TARGET_ARCH: aarch64
EIGEN_CI_CROSS_TARGET_TRIPLE: aarch64-linux-gnu
tags:
- eigen-runner
- linux
- aarch64
.test:linux:aarch64:gcc-10:default:
extends: .test:linux:aarch64
needs: [ build:linux:cross:aarch64:gcc-10:default ]
variables:
EIGEN_CI_INSTALL: g++-10
test:linux:aarch64:gcc-10:default:official:
extends: .test:linux:aarch64:gcc-10:default
variables:
EIGEN_CI_CTEST_LABEL: Official
test:linux:aarch64:gcc-10:default:unsupported:
extends: .test:linux:aarch64:gcc-10:default
variables:
EIGEN_CI_CTEST_LABEL: Unsupported
.test:linux:aarch64:clang-12:default:
extends: .test:linux:aarch64
needs: [ build:linux:cross:aarch64:clang-12:default ]
variables:
EIGEN_CI_INSTALL: clang-12
test:linux:aarch64:clang-12:default:official:
extends: .test:linux:aarch64:clang-12:default
variables:
EIGEN_CI_CTEST_LABEL: Official
test:linux:aarch64:clang-12:default:unsupported:
extends: .test:linux:aarch64:clang-12:default
variables:
EIGEN_CI_CTEST_LABEL: Unsupported
##### ppc64le ##################################################################
.test:linux:ppc64le:
extends: .test:linux
variables:
EIGEN_CI_TARGET_ARCH: ppc64le
EIGEN_CI_CROSS_TARGET_TRIPLE: powerpc64le-linux-gnu
tags:
- eigen-runner
- linux
- ppc64le
.test:linux:ppc64le:gcc-10:default:
extends: .test:linux:ppc64le
needs: [ build:linux:cross:ppc64le:gcc-10:default ]
variables:
EIGEN_CI_INSTALL: g++-10
test:linux:ppc64le:gcc-10:default:official:
extends: .test:linux:ppc64le:gcc-10:default
variables:
EIGEN_CI_CTEST_LABEL: Official
test:linux:ppc64le:gcc-10:default:unsupported:
extends: .test:linux:ppc64le:gcc-10:default
variables:
EIGEN_CI_CTEST_LABEL: Unsupported
.test:linux:ppc64le:clang-12:default:
extends: .test:linux:ppc64le
needs: [ build:linux:cross:ppc64le:clang-12:default ]
variables:
EIGEN_CI_INSTALL: clang-12
test:linux:ppc64le:clang-12:default:official:
extends: .test:linux:ppc64le:clang-12:default
variables:
EIGEN_CI_CTEST_LABEL: Official
test:linux:ppc64le:clang-12:default:unsupported:
extends: .test:linux:ppc64le:clang-12:default
variables:
EIGEN_CI_CTEST_LABEL: Unsupported
##### MR Smoke Tests ###########################################################
test:linux:x86-64:gcc-10:default:smoketest:
extends: .test:linux:x86-64:gcc-10:default
needs: [ build:linux:cross:x86-64:gcc-10:default:smoketest ]
variables:
EIGEN_CI_CTEST_LABEL: smoketest
rules:
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
test:linux:x86-64:clang-12:default:smoketest:
extends: .test:linux:x86-64:clang-12:default
needs: [ build:linux:cross:x86-64:clang-12:default:smoketest ]
variables:
EIGEN_CI_CTEST_LABEL: smoketest
rules:
- if: $CI_PIPELINE_SOURCE == "merge_request_event"

View File

@@ -0,0 +1,112 @@
.test:windows:
extends: .common:windows
stage: test
script:
- ./ci/scripts/test.windows.script.ps1
after_script:
- ./ci/scripts/test.windows.after_script.ps1
rules:
- if: $CI_PIPELINE_SOURCE == "schedule" && $CI_PROJECT_NAMESPACE == "libeigen"
- if: $CI_PIPELINE_SOURCE == "web" && $CI_PROJECT_NAMESPACE == "libeigen"
- if: $CI_PIPELINE_SOURCE == "push" && $CI_PROJECT_NAMESPACE == "libeigen"
tags:
- eigen-runner
- windows
- x86-64
##### MSVC #####################################################################
# MSVC 14.16 (VS 2017)
.test:windows:x86-64:msvc-14.16:default:
extends: .test:windows
needs: [ build:windows:x86-64:msvc-14.16:default ]
test:windows:x86-64:msvc-14.16:default:official:
extends: .test:windows:x86-64:msvc-14.16:default
variables:
EIGEN_CI_CTEST_LABEL: Official
test:windows:x86-64:msvc-14.16:default:unsupported:
extends: .test:windows:x86-64:msvc-14.16:default
variables:
EIGEN_CI_CTEST_LABEL: Unsupported
# MSVC 14.29 (VS 2019)
.test:windows:x86-64:msvc-14.29:default:
extends: .test:windows
needs: [ build:windows:x86-64:msvc-14.29:default ]
test:windows:x86-64:msvc-14.29:default:official:
extends: .test:windows:x86-64:msvc-14.29:default
needs: [ build:windows:x86-64:msvc-14.29:cxx03 ]
test:windows:x86-64:msvc-14.29:default:official:
extends: .test:windows:x86-64:msvc-14.29:default
variables:
EIGEN_CI_CTEST_LABEL: Official
test:windows:x86-64:msvc-14.29:default:unsupported:
extends: .test:windows:x86-64:msvc-14.29:default
variables:
EIGEN_CI_CTEST_LABEL: Unsupported
.test:windows:x86-64:msvc-14.29:avx2:
extends: .test:windows
needs: [ build:windows:x86-64:msvc-14.29:avx2 ]
test:windows:x86-64:msvc-14.29:avx2:official:
extends: .test:windows:x86-64:msvc-14.29:avx2
variables:
EIGEN_CI_CTEST_LABEL: Official
test:windows:x86-64:msvc-14.29:avx2:unsupported:
extends: .test:windows:x86-64:msvc-14.29:avx2
variables:
EIGEN_CI_CTEST_LABEL: Unsupported
.test:windows:x86-64:msvc-14.29:avx512dq:
extends: .test:windows
needs: [ build:windows:x86-64:msvc-14.29:avx512dq ]
tags:
- eigen-runner
- windows
- x86-64
- avx512
test:windows:x86-64:msvc-14.29:avx512dq:official:
extends: .test:windows:x86-64:msvc-14.29:avx512dq
variables:
EIGEN_CI_CTEST_LABEL: Official
test:windows:x86-64:msvc-14.29:avx512dq:unsupported:
extends: .test:windows:x86-64:msvc-14.29:avx512dq
variables:
EIGEN_CI_CTEST_LABEL: Unsupported
##### MSVC + CUDA ##############################################################
.test:windows:cuda:
extends: .test:windows
allow_failure: true
variables:
EIGEN_CI_CTEST_LABEL: gpu
tags:
- eigen-runner
- windows
- x86-64
- cuda
# The CUDA 9.2 compiler crashes with an internal error.
# # MSVC 14.16 + CUDA 9.2
# test:windows:x86-64:cuda-9.2:msvc-14.16:
# extends: .test:windows:cuda
# needs: [ build:windows:x86-64:cuda-9.2:msvc-14.16 ]
# MSVC 14.29 + CUDA 10.2
test:windows:x86-64:cuda-10.2:msvc-14.29:
extends: .test:windows:cuda
needs: [ build:windows:x86-64:cuda-10.2:msvc-14.29 ]
# MSVC 14.29 + CUDA 11.4
test:windows:x86-64:cuda-11.4:msvc-14.29:
extends: .test:windows:cuda
needs: [ build:windows:x86-64:cuda-11.4:msvc-14.29 ]

View File

@@ -3,7 +3,9 @@
@PACKAGE_INIT@
include ("${CMAKE_CURRENT_LIST_DIR}/Eigen3Targets.cmake")
if (NOT TARGET eigen)
include ("${CMAKE_CURRENT_LIST_DIR}/Eigen3Targets.cmake")
endif ()
# Legacy variables, do *not* use. May be removed in the future.

View File

@@ -1,7 +1,7 @@
include(EigenTesting)
include(CheckCXXSourceCompiles)
# configure the "site" and "buildname"
# configure the "site" and "buildname"
ei_set_sitename()
# retrieve and store the build string
@@ -11,6 +11,15 @@ add_custom_target(buildtests)
add_custom_target(check COMMAND "ctest")
add_dependencies(check buildtests)
# Convenience target for only building GPU tests.
add_custom_target(buildtests_gpu)
add_custom_target(check_gpu COMMAND "ctest" "--output-on-failure"
"--no-compress-output"
"--build-no-clean"
"-T" "test"
"-L" "gpu")
add_dependencies(check_gpu buildtests_gpu)
# check whether /bin/bash exists (disabled as not used anymore)
# find_file(EIGEN_BIN_BASH_EXISTS "/bin/bash" PATHS "/" NO_DEFAULT_PATH)
@@ -50,7 +59,7 @@ if(CMAKE_COMPILER_IS_GNUCXX)
set(CTEST_CUSTOM_COVERAGE_EXCLUDE "/test/")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COVERAGE_FLAGS}")
endif(EIGEN_COVERAGE_TESTING)
elseif(MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D_CRT_SECURE_NO_WARNINGS /D_SCL_SECURE_NO_WARNINGS")
endif(CMAKE_COMPILER_IS_GNUCXX)

View File

@@ -18,26 +18,26 @@ macro(ei_add_test_internal testname testname_with_suffix)
set(filename ${testname}.cpp)
endif()
set(is_gpu_test OFF)
if(EIGEN_ADD_TEST_FILENAME_EXTENSION STREQUAL cu)
set(is_gpu_test ON)
if(EIGEN_TEST_CUDA_CLANG)
set_source_files_properties(${filename} PROPERTIES LANGUAGE CXX)
if(CUDA_64_BIT_DEVICE_CODE)
if(CUDA_64_BIT_DEVICE_CODE AND (EXISTS "${CUDA_TOOLKIT_ROOT_DIR}/lib64"))
link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64")
else()
link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib")
endif()
if (${ARGC} GREATER 2)
add_executable(${targetname} ${filename})
else()
add_executable(${targetname} ${filename} OPTIONS ${ARGV2})
add_executable(${targetname} ${filename})
set(CUDA_CLANG_LINK_LIBRARIES "cudart_static" "cuda" "dl" "pthread")
if (CMAKE_SYSTEM_NAME STREQUAL "Linux")
set(CUDA_CLANG_LINK_LIBRARIES ${CUDA_CLANG_LINK_LIBRARIES} "rt")
endif()
target_link_libraries(${targetname} "cudart_static" "cuda" "dl" "rt" "pthread")
target_link_libraries(${targetname} ${CUDA_CLANG_LINK_LIBRARIES})
else()
if (${ARGC} GREATER 2)
cuda_add_executable(${targetname} ${filename} OPTIONS ${ARGV2})
else()
cuda_add_executable(${targetname} ${filename})
endif()
cuda_add_executable(${targetname} ${filename})
endif()
else()
add_executable(${targetname} ${filename})
@@ -46,7 +46,10 @@ macro(ei_add_test_internal testname testname_with_suffix)
if (targetname MATCHES "^eigen2_")
add_dependencies(eigen2_buildtests ${targetname})
else()
add_dependencies(buildtests ${targetname})
add_dependencies(buildtests ${targetname})
endif()
if (is_gpu_test)
add_dependencies(buildtests_gpu ${targetname})
endif()
if(EIGEN_NO_ASSERTION_CHECKING)
@@ -71,7 +74,7 @@ macro(ei_add_test_internal testname testname_with_suffix)
endif(${ARGC} GREATER 2)
if(EIGEN_TEST_CUSTOM_CXX_FLAGS)
ei_add_target_property(${targetname} COMPILE_FLAGS "${EIGEN_TEST_CUSTOM_CXX_FLAGS}")
target_compile_options(${targetname} PRIVATE ${EIGEN_TEST_CUSTOM_CXX_FLAGS})
endif()
if(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO)
@@ -97,7 +100,11 @@ macro(ei_add_test_internal testname testname_with_suffix)
endif()
endif()
add_test(${testname_with_suffix} "${targetname}")
add_test(NAME ${testname_with_suffix} COMMAND "${targetname}")
if (is_gpu_test)
# Add gpu tag for testing only GPU tests.
set_property(TEST ${testname_with_suffix} APPEND PROPERTY LABELS "gpu")
endif()
# Specify target and test labels accoirding to EIGEN_CURRENT_SUBPROJECT
get_property(current_subproject GLOBAL PROPERTY EIGEN_CURRENT_SUBPROJECT)
@@ -145,10 +152,10 @@ macro(ei_add_test_internal_sycl testname testname_with_suffix)
endif()
if(EIGEN_NO_ASSERTION_CHECKING)
ei_add_target_property(${targetname} COMPILE_FLAGS "-DEIGEN_NO_ASSERTION_CHECKING=1")
target_compile_options(${targetname} PRIVATE "-DEIGEN_NO_ASSERTION_CHECKING=1")
else(EIGEN_NO_ASSERTION_CHECKING)
if(EIGEN_DEBUG_ASSERTS)
ei_add_target_property(${targetname} COMPILE_FLAGS "-DEIGEN_DEBUG_ASSERTS=1")
target_compile_options(${targetname} PRIVATE "-DEIGEN_DEBUG_ASSERTS=1")
endif(EIGEN_DEBUG_ASSERTS)
endif(EIGEN_NO_ASSERTION_CHECKING)
@@ -166,7 +173,7 @@ macro(ei_add_test_internal_sycl testname testname_with_suffix)
endif(${ARGC} GREATER 2)
if(EIGEN_TEST_CUSTOM_CXX_FLAGS)
ei_add_target_property(${targetname} COMPILE_FLAGS "${EIGEN_TEST_CUSTOM_CXX_FLAGS}")
target_compile_options(${targetname} PRIVATE ${EIGEN_TEST_CUSTOM_CXX_FLAGS})
endif()
if(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO)
@@ -192,7 +199,7 @@ macro(ei_add_test_internal_sycl testname testname_with_suffix)
endif()
endif()
add_test(${testname_with_suffix} "${targetname}")
add_test(NAME ${testname_with_suffix} COMMAND "${targetname}")
# Specify target and test labels according to EIGEN_CURRENT_SUBPROJECT
get_property(current_subproject GLOBAL PROPERTY EIGEN_CURRENT_SUBPROJECT)

View File

@@ -19,8 +19,11 @@ include(CheckCXXSourceCompiles)
# notice the std:: is required on some platforms such as QNX
set(find_standard_math_library_test_program
"#include<cmath>
int main() { std::sin(0.0); std::log(0.0f); }")
"
#include<cmath>
int main(int argc, char **){
return int(std::sin(double(argc)) + std::log(double(argc)));
}")
# first try compiling/linking the test program without any linker flags

Some files were not shown because too many files have changed in this diff Show More