mirror of
https://gitlab.com/libeigen/eigen.git
synced 2026-04-10 11:34:33 +08:00
Compare commits
260 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3f79884f03 | ||
|
|
26129229ec | ||
|
|
fcee1903be | ||
|
|
6dc478fd77 | ||
|
|
65c01e2bf7 | ||
|
|
8f0e80fe30 | ||
|
|
47197065da | ||
|
|
bcb9068268 | ||
|
|
c8ecc897c0 | ||
|
|
3a2bb7f782 | ||
|
|
bf402dd9b8 | ||
|
|
8eb0fc1e72 | ||
|
|
dbedc70012 | ||
|
|
12a152031d | ||
|
|
75e60121f4 | ||
|
|
0308f64515 | ||
|
|
fb30bb9e59 | ||
|
|
20be8ad91e | ||
|
|
b8bb804007 | ||
|
|
5c3d21693b | ||
|
|
eb105cace8 | ||
|
|
d229f99ba2 | ||
|
|
8ba8d90063 | ||
|
|
6fad2eb97b | ||
|
|
58e0cce0f7 | ||
|
|
4a98cada26 | ||
|
|
a76ce042e6 | ||
|
|
af22364988 | ||
|
|
d9c131de5b | ||
|
|
423f88aa1e | ||
|
|
c6503e03eb | ||
|
|
e3d01f85b2 | ||
|
|
b5f32830fd | ||
|
|
01fad14d78 | ||
|
|
2334291157 | ||
|
|
71f023de3e | ||
|
|
94ea1eed9a | ||
|
|
327ed3d1d3 | ||
|
|
72d4d45133 | ||
|
|
316dadc8e4 | ||
|
|
053261de88 | ||
|
|
1c54514bfc | ||
|
|
c253cc3d53 | ||
|
|
947f84633b | ||
|
|
62bf04b339 | ||
|
|
82e4a16759 | ||
|
|
77c943670e | ||
|
|
91e9344be9 | ||
|
|
f9123df772 | ||
|
|
d591b0466d | ||
|
|
9bb75937cc | ||
|
|
62eb4dc99b | ||
|
|
4824db6444 | ||
|
|
d17bb02ccd | ||
|
|
e0ea25fc21 | ||
|
|
b49dde01dc | ||
|
|
dd94f10442 | ||
|
|
dcff9ba785 | ||
|
|
cb7a72d5b0 | ||
|
|
e17d17cea3 | ||
|
|
bd8d06033d | ||
|
|
a47bbf664c | ||
|
|
548ecc2fe5 | ||
|
|
ad9a7c69bc | ||
|
|
6924d4eec5 | ||
|
|
6261f4629f | ||
|
|
474c2996bd | ||
|
|
d1111d625c | ||
|
|
103b9351fd | ||
|
|
a6da803873 | ||
|
|
60aad09878 | ||
|
|
92b1674c79 | ||
|
|
610d79e686 | ||
|
|
a64aabf73c | ||
|
|
55c7848877 | ||
|
|
d4b664c4cd | ||
|
|
5354ffbb4f | ||
|
|
6264755dd3 | ||
|
|
ab41c18d60 | ||
|
|
216c9125e9 | ||
|
|
ddbbd7065d | ||
|
|
85fdcdf055 | ||
|
|
87aafc9169 | ||
|
|
19d9c835e0 | ||
|
|
b37551f62a | ||
|
|
c625a6a85b | ||
|
|
453d54325e | ||
|
|
ba212aeaa9 | ||
|
|
aa2b46aa91 | ||
|
|
853c0e15df | ||
|
|
8566ef805b | ||
|
|
3a30a2bc3e | ||
|
|
b80d9dd42e | ||
|
|
8bbe556e35 | ||
|
|
97ced33b33 | ||
|
|
76fbe94279 | ||
|
|
530b328769 | ||
|
|
3dd8225862 | ||
|
|
976d7c19e8 | ||
|
|
5c7cb3c05c | ||
|
|
d558e84f0b | ||
|
|
224dd66e10 | ||
|
|
d90d7a006f | ||
|
|
cc25edd5de | ||
|
|
508b51cb62 | ||
|
|
a9fe75efc4 | ||
|
|
7cefa75901 | ||
|
|
e92993d7b9 | ||
|
|
6b89ee0095 | ||
|
|
2f0e8904f1 | ||
|
|
b038a4bb71 | ||
|
|
1420f8b3a1 | ||
|
|
3d9764ee24 | ||
|
|
425444428c | ||
|
|
2b5a0060b4 | ||
|
|
072ee3c07d | ||
|
|
ae8425c74c | ||
|
|
145830e067 | ||
|
|
40f6e26a24 | ||
|
|
d0f6b1c21f | ||
|
|
9daa66f262 | ||
|
|
5d98fa235d | ||
|
|
403e672587 | ||
|
|
7020f30da3 | ||
|
|
b9edd6fb85 | ||
|
|
96ba7cd655 | ||
|
|
fa6d36e0f7 | ||
|
|
734469e43f | ||
|
|
c7f40e522e | ||
|
|
06250a154c | ||
|
|
bec3f9bfe4 | ||
|
|
0916d69ca5 | ||
|
|
0dfc5b296b | ||
|
|
8a96b0080d | ||
|
|
8e21cef80a | ||
|
|
4393f20fea | ||
|
|
f1104a3b0f | ||
|
|
35f0bc70d8 | ||
|
|
b5f2b7d087 | ||
|
|
7dbbc6ffd1 | ||
|
|
ced1a45f82 | ||
|
|
193eedbfe2 | ||
|
|
d7fa09bf05 | ||
|
|
4824ac1363 | ||
|
|
b551a2d77a | ||
|
|
10a7668035 | ||
|
|
7b23fad4c9 | ||
|
|
44cb1e4802 | ||
|
|
872523844a | ||
|
|
76eb9c9fd9 | ||
|
|
70b1ce11c6 | ||
|
|
8b0b121c9e | ||
|
|
08c841eb87 | ||
|
|
1ed4233fd2 | ||
|
|
c2ee454df4 | ||
|
|
6e157dd7c6 | ||
|
|
f8aae7a908 | ||
|
|
cd0e5dca9b | ||
|
|
45362f4eae | ||
|
|
3f532edc6d | ||
|
|
1dc9aaaf36 | ||
|
|
36d9b51a44 | ||
|
|
b72b7ab76f | ||
|
|
f8678272a4 | ||
|
|
8e3c4283f5 | ||
|
|
ff96c94043 | ||
|
|
4161b8be67 | ||
|
|
e5bc9526f1 | ||
|
|
c4ef69b5bd | ||
|
|
6dcd373b9d | ||
|
|
6ad3f1ab1f | ||
|
|
96f9015807 | ||
|
|
b2effa2b2c | ||
|
|
642cc27eb1 | ||
|
|
f6bd508351 | ||
|
|
d9e134c73c | ||
|
|
26cfe5a958 | ||
|
|
2c03ca3325 | ||
|
|
b1a17dbfe4 | ||
|
|
551cb9b7b4 | ||
|
|
504d3a3586 | ||
|
|
51ec188da0 | ||
|
|
951da96f14 | ||
|
|
cb3aad1d91 | ||
|
|
9852e7b9cb | ||
|
|
300a226ffa | ||
|
|
2a1500915a | ||
|
|
2066ed91de | ||
|
|
d89925e6de | ||
|
|
02fd3acd81 | ||
|
|
31a36aa9c4 | ||
|
|
fc3fd8ab57 | ||
|
|
861962c55f | ||
|
|
0f2d480af0 | ||
|
|
a2415388ef | ||
|
|
65257f6b29 | ||
|
|
dd18b22f0b | ||
|
|
845994f18f | ||
|
|
e07c0f6bb5 | ||
|
|
3a7f16a655 | ||
|
|
b0896382a3 | ||
|
|
74cf12cbe0 | ||
|
|
d5e0efaf69 | ||
|
|
c851044eae | ||
|
|
55495dcbae | ||
|
|
e38fc9692d | ||
|
|
f8d3b4c060 | ||
|
|
bfa606d16f | ||
|
|
38d0a0d5d6 | ||
|
|
2dba4b7ce7 | ||
|
|
bc57c68cf5 | ||
|
|
e04c3f2cc0 | ||
|
|
d6454788d9 | ||
|
|
291fef5760 | ||
|
|
49747fa4a9 | ||
|
|
3428d80d20 | ||
|
|
d849bc4401 | ||
|
|
5322b670c8 | ||
|
|
7d23e7f9f1 | ||
|
|
d1243b393e | ||
|
|
c69a226192 | ||
|
|
e1eccfad3f | ||
|
|
c64c0f382f | ||
|
|
5c58582a08 | ||
|
|
6e5bed69dc | ||
|
|
464fc297cf | ||
|
|
4b474fdb34 | ||
|
|
95f2e7f3f5 | ||
|
|
3abbdfd621 | ||
|
|
abd5faf784 | ||
|
|
cac147ba10 | ||
|
|
78d3c54631 | ||
|
|
ea27678153 | ||
|
|
2a820d41df | ||
|
|
dd27e10360 | ||
|
|
2d78023815 | ||
|
|
cbd6fe323c | ||
|
|
f59226e901 | ||
|
|
4c19024fbf | ||
|
|
fb041c260c | ||
|
|
883a8cbb2c | ||
|
|
6ab9e8632f | ||
|
|
044424b0e2 | ||
|
|
6a370f50c7 | ||
|
|
b08c26aefa | ||
|
|
84fdbded4d | ||
|
|
87e89fea4e | ||
|
|
bfbe61454e | ||
|
|
cf9edd9958 | ||
|
|
b6fac91998 | ||
|
|
d4d4382b18 | ||
|
|
90d6fc0e28 | ||
|
|
b0bd1cfa05 | ||
|
|
e4f3759c4d | ||
|
|
c36316f284 | ||
|
|
140ad0908d | ||
|
|
6ba5d2c90c | ||
|
|
8e776c94c1 | ||
|
|
19a70ae939 | ||
|
|
850c6d8a2b |
@@ -150,7 +150,7 @@ if(CMAKE_COMPILER_IS_GNUCXX)
|
||||
|
||||
option(EIGEN_TEST_NEON "Enable/Disable Neon in tests/examples" OFF)
|
||||
if(EIGEN_TEST_NEON)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfloat-abi=softfp -mfpu=neon -mcpu=cortex-a8")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfloat-abi=hard -mfpu=neon -mcpu=cortex-a8")
|
||||
message("Enabling NEON in tests/examples")
|
||||
endif()
|
||||
|
||||
@@ -198,6 +198,18 @@ if(MSVC)
|
||||
endif(MSVC)
|
||||
|
||||
option(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION "Disable explicit vectorization in tests/examples" OFF)
|
||||
option(EIGEN_TEST_X87 "Force using X87 instructions. Implies no vectorization." OFF)
|
||||
|
||||
if(EIGEN_TEST_X87)
|
||||
set(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION ON)
|
||||
if(CMAKE_COMPILER_IS_GNUCXX)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpmath=387")
|
||||
message("Forcing use of x87 instructions in tests/examples")
|
||||
else()
|
||||
message("EIGEN_TEST_X87 ignored on your compiler")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION)
|
||||
add_definitions(-DEIGEN_DONT_VECTORIZE=1)
|
||||
message("Disabling vectorization in tests/examples")
|
||||
|
||||
33
Eigen/Core
33
Eigen/Core
@@ -26,13 +26,21 @@
|
||||
#ifndef EIGEN_CORE_H
|
||||
#define EIGEN_CORE_H
|
||||
|
||||
#define EIGEN_NO_STATIC_ASSERT
|
||||
|
||||
// first thing Eigen does: prevent MSVC from committing suicide
|
||||
#include "src/Core/util/DisableMSVCWarnings.h"
|
||||
|
||||
// then include this file where all our macros are defined. It's really important to do it first because
|
||||
// it's where we do all the alignment settings (platform detection and honoring the user's will if he
|
||||
// defined e.g. EIGEN_DONT_ALIGN) so it needs to be done before we do anything with vectorization.
|
||||
#include "src/Core/util/Macros.h"
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
#include "src/Core/util/Macros.h"
|
||||
#else
|
||||
namespace Eigen { // for some reason Doxygen needs this namespace
|
||||
#include "src/Core/util/Macros.h"
|
||||
}
|
||||
#endif
|
||||
|
||||
// if alignment is disabled, then disable vectorization. Note: EIGEN_ALIGN is the proper check, it takes into
|
||||
// account both the user's will (EIGEN_DONT_ALIGN) and our own platform checks
|
||||
@@ -87,7 +95,14 @@
|
||||
#endif
|
||||
|
||||
// include files
|
||||
|
||||
#if (defined __GNUC__) && (defined __MINGW32__)
|
||||
#include <intrin.h>
|
||||
//including intrin.h works around a MINGW bug http://sourceforge.net/tracker/?func=detail&atid=102435&aid=2962480&group_id=2435
|
||||
//in essence, intrin.h is included by windows.h and also declares intrinsics (just as emmintrin.h etc. below do). However,
|
||||
//intrin.h uses an extern "C" declaration, and g++ thus complains of duplicate declarations with conflicting linkage. The linkage for intrinsics
|
||||
//doesn't matter, but at that stage the compiler doesn't know; so, to avoid compile errors when windows.h is included after Eigen/Core,
|
||||
//include intrin here.
|
||||
#endif
|
||||
#include <emmintrin.h>
|
||||
#include <xmmintrin.h>
|
||||
#ifdef EIGEN_VECTORIZE_SSE3
|
||||
@@ -126,7 +141,14 @@
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
// MSVC for windows mobile does not have the errno.h file
|
||||
#if !(defined(_MSC_VER) && defined(_WIN32_WCE))
|
||||
#define EIGEN_HAS_ERRNO
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN_HAS_ERRNO
|
||||
#include <cerrno>
|
||||
#endif
|
||||
#include <cstdlib>
|
||||
#include <cmath>
|
||||
#include <complex>
|
||||
@@ -145,7 +167,7 @@
|
||||
#endif
|
||||
|
||||
// required for __cpuid, needs to be included after cmath
|
||||
#ifdef _MSC_VER
|
||||
#if defined(_MSC_VER) && (defined(_M_IX86)||defined(_M_IX64))
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
@@ -221,10 +243,13 @@ using std::size_t;
|
||||
#if defined EIGEN_VECTORIZE_SSE
|
||||
#include "src/Core/arch/SSE/PacketMath.h"
|
||||
#include "src/Core/arch/SSE/MathFunctions.h"
|
||||
#include "src/Core/arch/SSE/Complex.h"
|
||||
#elif defined EIGEN_VECTORIZE_ALTIVEC
|
||||
#include "src/Core/arch/AltiVec/PacketMath.h"
|
||||
#include "src/Core/arch/AltiVec/Complex.h"
|
||||
#elif defined EIGEN_VECTORIZE_NEON
|
||||
#include "src/Core/arch/NEON/PacketMath.h"
|
||||
#include "src/Core/arch/NEON/Complex.h"
|
||||
#endif
|
||||
|
||||
#include "src/Core/arch/Default/Settings.h"
|
||||
@@ -248,11 +273,11 @@ using std::size_t;
|
||||
#include "src/Core/NoAlias.h"
|
||||
#include "src/Core/DenseStorageBase.h"
|
||||
#include "src/Core/Matrix.h"
|
||||
#include "src/Core/SelfCwiseBinaryOp.h"
|
||||
#include "src/Core/CwiseBinaryOp.h"
|
||||
#include "src/Core/CwiseUnaryOp.h"
|
||||
#include "src/Core/CwiseNullaryOp.h"
|
||||
#include "src/Core/CwiseUnaryView.h"
|
||||
#include "src/Core/SelfCwiseBinaryOp.h"
|
||||
#include "src/Core/Dot.h"
|
||||
#include "src/Core/StableNorm.h"
|
||||
#include "src/Core/MapBase.h"
|
||||
|
||||
@@ -4,4 +4,4 @@
|
||||
#include "QR"
|
||||
#include "SVD"
|
||||
#include "Geometry"
|
||||
#include "Eigenvalues"
|
||||
#include "Eigenvalues"
|
||||
|
||||
@@ -23,7 +23,6 @@ namespace Eigen {
|
||||
*/
|
||||
|
||||
#include "src/misc/Solve.h"
|
||||
#include "src/SVD/SVD.h"
|
||||
#include "src/SVD/JacobiSVD.h"
|
||||
#include "src/SVD/UpperBidiagonalization.h"
|
||||
|
||||
|
||||
42
Eigen/StdDeque
Normal file
42
Eigen/StdDeque
Normal file
@@ -0,0 +1,42 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2009 Hauke Heibel <hauke.heibel@googlemail.com>
|
||||
//
|
||||
// Eigen is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU Lesser General Public
|
||||
// License as published by the Free Software Foundation; either
|
||||
// version 3 of the License, or (at your option) any later version.
|
||||
//
|
||||
// Alternatively, you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License as
|
||||
// published by the Free Software Foundation; either version 2 of
|
||||
// the License, or (at your option) any later version.
|
||||
//
|
||||
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public
|
||||
// License and a copy of the GNU General Public License along with
|
||||
// Eigen. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#ifndef EIGEN_STDDEQUE_MODULE_H
|
||||
#define EIGEN_STDDEQUE_MODULE_H
|
||||
|
||||
#include "Core"
|
||||
#include <deque>
|
||||
|
||||
#if (defined(_MSC_VER) && defined(_WIN64)) /* MSVC auto aligns in 64 bit builds */
|
||||
|
||||
#define EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(...)
|
||||
|
||||
#else
|
||||
|
||||
#include "src/StlSupport/StdDeque.h"
|
||||
|
||||
#endif
|
||||
|
||||
#endif // EIGEN_STDDEQUE_MODULE_H
|
||||
@@ -1,12 +1,6 @@
|
||||
ADD_SUBDIRECTORY(Core)
|
||||
ADD_SUBDIRECTORY(LU)
|
||||
ADD_SUBDIRECTORY(QR)
|
||||
ADD_SUBDIRECTORY(SVD)
|
||||
ADD_SUBDIRECTORY(Cholesky)
|
||||
ADD_SUBDIRECTORY(Geometry)
|
||||
ADD_SUBDIRECTORY(Sparse)
|
||||
ADD_SUBDIRECTORY(Jacobi)
|
||||
ADD_SUBDIRECTORY(Householder)
|
||||
ADD_SUBDIRECTORY(Eigenvalues)
|
||||
ADD_SUBDIRECTORY(misc)
|
||||
ADD_SUBDIRECTORY(plugins)
|
||||
file(GLOB Eigen_src_subdirectories "*")
|
||||
foreach(f ${Eigen_src_subdirectories})
|
||||
if(NOT f MATCHES ".txt")
|
||||
add_subdirectory(${f})
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
@@ -363,7 +363,9 @@ struct ei_solve_retval<LDLT<_MatrixType,_UpLo>, Rhs>
|
||||
}
|
||||
};
|
||||
|
||||
/** This is the \em in-place version of solve().
|
||||
/** \internal use x = ldlt_object.solve(x);
|
||||
*
|
||||
* This is the \em in-place version of solve().
|
||||
*
|
||||
* \param bAndX represents both the right-hand side matrix b and result x.
|
||||
*
|
||||
|
||||
@@ -76,11 +76,11 @@ template<typename _MatrixType, int _UpLo> class LLT
|
||||
typedef LLT_Traits<MatrixType,UpLo> Traits;
|
||||
|
||||
/**
|
||||
* \brief Default Constructor.
|
||||
*
|
||||
* The default constructor is useful in cases in which the user intends to
|
||||
* perform decompositions via LLT::compute(const MatrixType&).
|
||||
*/
|
||||
* \brief Default Constructor.
|
||||
*
|
||||
* The default constructor is useful in cases in which the user intends to
|
||||
* perform decompositions via LLT::compute(const MatrixType&).
|
||||
*/
|
||||
LLT() : m_matrix(), m_isInitialized(false) {}
|
||||
|
||||
/** \brief Default Constructor with memory preallocation
|
||||
@@ -134,7 +134,7 @@ template<typename _MatrixType, int _UpLo> class LLT
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
bool solveInPlace(MatrixBase<Derived> &bAndX) const;
|
||||
void solveInPlace(MatrixBase<Derived> &bAndX) const;
|
||||
|
||||
LLT& compute(const MatrixType& matrix);
|
||||
|
||||
@@ -309,7 +309,9 @@ struct ei_solve_retval<LLT<_MatrixType, UpLo>, Rhs>
|
||||
}
|
||||
};
|
||||
|
||||
/** This is the \em in-place version of solve().
|
||||
/** \internal use x = llt_object.solve(x);
|
||||
*
|
||||
* This is the \em in-place version of solve().
|
||||
*
|
||||
* \param bAndX represents both the right-hand side matrix b and result x.
|
||||
*
|
||||
@@ -322,13 +324,12 @@ struct ei_solve_retval<LLT<_MatrixType, UpLo>, Rhs>
|
||||
*/
|
||||
template<typename MatrixType, int _UpLo>
|
||||
template<typename Derived>
|
||||
bool LLT<MatrixType,_UpLo>::solveInPlace(MatrixBase<Derived> &bAndX) const
|
||||
void LLT<MatrixType,_UpLo>::solveInPlace(MatrixBase<Derived> &bAndX) const
|
||||
{
|
||||
ei_assert(m_isInitialized && "LLT is not initialized.");
|
||||
ei_assert(m_matrix.rows()==bAndX.rows());
|
||||
matrixL().solveInPlace(bAndX);
|
||||
matrixU().solveInPlace(bAndX);
|
||||
return true;
|
||||
}
|
||||
|
||||
/** \returns the matrix represented by the decomposition,
|
||||
|
||||
@@ -25,6 +25,20 @@
|
||||
#ifndef EIGEN_ARRAY_H
|
||||
#define EIGEN_ARRAY_H
|
||||
|
||||
/** \class Array
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief General-purpose arrays with easy API for coefficient-wise operations
|
||||
*
|
||||
* The %Array class is very similar to the Matrix class. It provides
|
||||
* general-purpose one- and two-dimensional arrays. The difference between the
|
||||
* %Array and the %Matrix class is primarily in the API: the API for the
|
||||
* %Array class provides easy access to coefficient-wise operations, while the
|
||||
* API for the %Matrix class provides easy access to linear-algebra
|
||||
* operations.
|
||||
*
|
||||
* \sa \ref TutorialArrayClass, \ref TopicClassHierarchy
|
||||
*/
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
|
||||
struct ei_traits<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > : ei_traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
|
||||
{
|
||||
@@ -231,6 +245,7 @@ class Array
|
||||
};
|
||||
|
||||
/** \defgroup arraytypedefs Global array typedefs
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* Eigen defines several typedef shortcuts for most common 1D and 2D array types.
|
||||
*
|
||||
@@ -251,7 +266,7 @@ class Array
|
||||
#define EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, Size, SizeSuffix) \
|
||||
/** \ingroup arraytypedefs */ \
|
||||
typedef Array<Type, Size, Size> Array##SizeSuffix##SizeSuffix##TypeSuffix; \
|
||||
/** \ingroup matrixtypedefs */ \
|
||||
/** \ingroup arraytypedefs */ \
|
||||
typedef Array<Type, Size, 1> Array##SizeSuffix##TypeSuffix;
|
||||
|
||||
#define EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, Size) \
|
||||
|
||||
@@ -28,6 +28,7 @@
|
||||
template<typename ExpressionType> class MatrixWrapper;
|
||||
|
||||
/** \class ArrayBase
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Base class for all 1D and 2D array, and related expressions
|
||||
*
|
||||
@@ -43,7 +44,7 @@ template<typename ExpressionType> class MatrixWrapper;
|
||||
*
|
||||
* \param Derived is the derived type, e.g., an array or an expression type.
|
||||
*
|
||||
* \sa class MatrixBase
|
||||
* \sa class MatrixBase, \ref TopicClassHierarchy
|
||||
*/
|
||||
template<typename Derived> class ArrayBase
|
||||
: public DenseBase<Derived>
|
||||
@@ -166,6 +167,13 @@ template<typename Derived> class ArrayBase
|
||||
explicit ArrayBase(Index);
|
||||
ArrayBase(Index,Index);
|
||||
template<typename OtherDerived> explicit ArrayBase(const ArrayBase<OtherDerived>&);
|
||||
protected:
|
||||
// mixing arrays and matrices is not legal
|
||||
template<typename OtherDerived> Derived& operator+=(const MatrixBase<OtherDerived>& mat)
|
||||
{EIGEN_STATIC_ASSERT(sizeof(typename OtherDerived::Scalar)==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES);}
|
||||
// mixing arrays and matrices is not legal
|
||||
template<typename OtherDerived> Derived& operator-=(const MatrixBase<OtherDerived>& mat)
|
||||
{EIGEN_STATIC_ASSERT(sizeof(typename OtherDerived::Scalar)==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES);}
|
||||
};
|
||||
|
||||
/** replaces \c *this by \c *this - \a other.
|
||||
@@ -177,7 +185,7 @@ template<typename OtherDerived>
|
||||
EIGEN_STRONG_INLINE Derived &
|
||||
ArrayBase<Derived>::operator-=(const ArrayBase<OtherDerived> &other)
|
||||
{
|
||||
SelfCwiseBinaryOp<ei_scalar_difference_op<Scalar>, Derived> tmp(derived());
|
||||
SelfCwiseBinaryOp<ei_scalar_difference_op<Scalar>, Derived, OtherDerived> tmp(derived());
|
||||
tmp = other;
|
||||
return derived();
|
||||
}
|
||||
@@ -191,7 +199,7 @@ template<typename OtherDerived>
|
||||
EIGEN_STRONG_INLINE Derived &
|
||||
ArrayBase<Derived>::operator+=(const ArrayBase<OtherDerived>& other)
|
||||
{
|
||||
SelfCwiseBinaryOp<ei_scalar_sum_op<Scalar>, Derived> tmp(derived());
|
||||
SelfCwiseBinaryOp<ei_scalar_sum_op<Scalar>, Derived, OtherDerived> tmp(derived());
|
||||
tmp = other.derived();
|
||||
return derived();
|
||||
}
|
||||
@@ -205,7 +213,7 @@ template<typename OtherDerived>
|
||||
EIGEN_STRONG_INLINE Derived &
|
||||
ArrayBase<Derived>::operator*=(const ArrayBase<OtherDerived>& other)
|
||||
{
|
||||
SelfCwiseBinaryOp<ei_scalar_product_op<Scalar>, Derived> tmp(derived());
|
||||
SelfCwiseBinaryOp<ei_scalar_product_op<Scalar>, Derived, OtherDerived> tmp(derived());
|
||||
tmp = other.derived();
|
||||
return derived();
|
||||
}
|
||||
@@ -219,7 +227,7 @@ template<typename OtherDerived>
|
||||
EIGEN_STRONG_INLINE Derived &
|
||||
ArrayBase<Derived>::operator/=(const ArrayBase<OtherDerived>& other)
|
||||
{
|
||||
SelfCwiseBinaryOp<ei_scalar_quotient_op<Scalar>, Derived> tmp(derived());
|
||||
SelfCwiseBinaryOp<ei_scalar_quotient_op<Scalar>, Derived, OtherDerived> tmp(derived());
|
||||
tmp = other.derived();
|
||||
return derived();
|
||||
}
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#define EIGEN_ARRAYWRAPPER_H
|
||||
|
||||
/** \class ArrayWrapper
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of a mathematical vector or matrix as an array object
|
||||
*
|
||||
@@ -110,6 +111,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
|
||||
};
|
||||
|
||||
/** \class MatrixWrapper
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of an array as a mathematical vector or matrix
|
||||
*
|
||||
|
||||
@@ -256,6 +256,12 @@ struct ei_assign_impl;
|
||||
*** Default traversal ***
|
||||
************************/
|
||||
|
||||
template<typename Derived1, typename Derived2, int Unrolling>
|
||||
struct ei_assign_impl<Derived1, Derived2, InvalidTraversal, Unrolling>
|
||||
{
|
||||
inline static void run(Derived1 &, const Derived2 &) { }
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct ei_assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling>
|
||||
{
|
||||
@@ -397,7 +403,12 @@ struct ei_assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling
|
||||
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
const Index size = dst.size();
|
||||
const Index packetSize = ei_packet_traits<typename Derived1::Scalar>::size;
|
||||
typedef ei_packet_traits<typename Derived1::Scalar> PacketTraits;
|
||||
enum {
|
||||
packetSize = PacketTraits::size,
|
||||
dstAlignment = PacketTraits::AlignedOnScalar ? Aligned : int(ei_assign_traits<Derived1,Derived2>::DstIsAligned) ,
|
||||
srcAlignment = ei_assign_traits<Derived1,Derived2>::JointAlignment
|
||||
};
|
||||
const Index alignedStart = ei_assign_traits<Derived1,Derived2>::DstIsAligned ? 0
|
||||
: ei_first_aligned(&dst.coeffRef(0), size);
|
||||
const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
|
||||
@@ -406,7 +417,7 @@ struct ei_assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling
|
||||
|
||||
for(Index index = alignedStart; index < alignedEnd; index += packetSize)
|
||||
{
|
||||
dst.template copyPacket<Derived2, Aligned, ei_assign_traits<Derived1,Derived2>::JointAlignment>(index, src);
|
||||
dst.template copyPacket<Derived2, dstAlignment, srcAlignment>(index, src);
|
||||
}
|
||||
|
||||
ei_unaligned_assign_impl<>::run(src,dst,alignedEnd,size);
|
||||
@@ -438,12 +449,18 @@ struct ei_assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling>
|
||||
typedef typename Derived1::Index Index;
|
||||
inline static void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
const Index packetSize = ei_packet_traits<typename Derived1::Scalar>::size;
|
||||
typedef ei_packet_traits<typename Derived1::Scalar> PacketTraits;
|
||||
enum {
|
||||
packetSize = PacketTraits::size,
|
||||
alignable = PacketTraits::AlignedOnScalar,
|
||||
dstAlignment = alignable ? Aligned : int(ei_assign_traits<Derived1,Derived2>::DstIsAligned) ,
|
||||
srcAlignment = ei_assign_traits<Derived1,Derived2>::JointAlignment
|
||||
};
|
||||
const Index packetAlignedMask = packetSize - 1;
|
||||
const Index innerSize = dst.innerSize();
|
||||
const Index outerSize = dst.outerSize();
|
||||
const Index alignedStep = (packetSize - dst.outerStride() % packetSize) & packetAlignedMask;
|
||||
Index alignedStart = ei_assign_traits<Derived1,Derived2>::DstIsAligned ? 0
|
||||
const Index alignedStep = alignable ? (packetSize - dst.outerStride() % packetSize) & packetAlignedMask : 0;
|
||||
Index alignedStart = ((!alignable) || ei_assign_traits<Derived1,Derived2>::DstIsAligned) ? 0
|
||||
: ei_first_aligned(&dst.coeffRef(0,0), innerSize);
|
||||
|
||||
for(Index outer = 0; outer < outerSize; ++outer)
|
||||
@@ -475,14 +492,21 @@ template<typename OtherDerived>
|
||||
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>
|
||||
::lazyAssign(const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
enum{
|
||||
SameType = ei_is_same_type<typename Derived::Scalar,typename OtherDerived::Scalar>::ret
|
||||
};
|
||||
|
||||
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived)
|
||||
EIGEN_STATIC_ASSERT((ei_is_same_type<typename Derived::Scalar, typename OtherDerived::Scalar>::ret),
|
||||
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
|
||||
EIGEN_STATIC_ASSERT(SameType,YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
|
||||
|
||||
|
||||
|
||||
#ifdef EIGEN_DEBUG_ASSIGN
|
||||
ei_assign_traits<Derived, OtherDerived>::debug();
|
||||
#endif
|
||||
ei_assert(rows() == other.rows() && cols() == other.cols());
|
||||
ei_assign_impl<Derived, OtherDerived>::run(derived(),other.derived());
|
||||
ei_assign_impl<Derived, OtherDerived, int(SameType) ? int(ei_assign_traits<Derived, OtherDerived>::Traversal)
|
||||
: int(InvalidTraversal)>::run(derived(),other.derived());
|
||||
#ifndef EIGEN_NO_DEBUG
|
||||
checkTransposeAliasing(other.derived());
|
||||
#endif
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
|
||||
/**
|
||||
* \class BandMatrix
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Represents a rectangular matrix with a banded storage
|
||||
*
|
||||
@@ -54,7 +55,7 @@ struct ei_traits<BandMatrix<_Scalar,Rows,Cols,Supers,Subs,Options> >
|
||||
ColsAtCompileTime = Cols,
|
||||
MaxRowsAtCompileTime = Rows,
|
||||
MaxColsAtCompileTime = Cols,
|
||||
Flags = 0
|
||||
Flags = LvalueBit
|
||||
};
|
||||
};
|
||||
|
||||
@@ -205,6 +206,7 @@ class BandMatrix : public EigenBase<BandMatrix<_Scalar,Rows,Cols,Supers,Subs,Opt
|
||||
|
||||
/**
|
||||
* \class TridiagonalMatrix
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Represents a tridiagonal matrix
|
||||
*
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
#define EIGEN_BLOCK_H
|
||||
|
||||
/** \class Block
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of a fixed-size or dynamic-size block
|
||||
*
|
||||
@@ -57,8 +58,8 @@
|
||||
*
|
||||
* \sa DenseBase::block(Index,Index,Index,Index), DenseBase::block(Index,Index), class VectorBlock
|
||||
*/
|
||||
template<typename XprType, int BlockRows, int BlockCols, bool HasDirectAccess>
|
||||
struct ei_traits<Block<XprType, BlockRows, BlockCols, HasDirectAccess> > : ei_traits<XprType>
|
||||
template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool HasDirectAccess>
|
||||
struct ei_traits<Block<XprType, BlockRows, BlockCols, InnerPanel, HasDirectAccess> > : ei_traits<XprType>
|
||||
{
|
||||
typedef typename ei_traits<XprType>::Scalar Scalar;
|
||||
typedef typename ei_traits<XprType>::StorageKind StorageKind;
|
||||
@@ -91,15 +92,16 @@ struct ei_traits<Block<XprType, BlockRows, BlockCols, HasDirectAccess> > : ei_tr
|
||||
MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % ei_packet_traits<Scalar>::size) == 0)
|
||||
&& (InnerStrideAtCompileTime == 1)
|
||||
? PacketAccessBit : 0,
|
||||
MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && ((OuterStrideAtCompileTime % ei_packet_traits<Scalar>::size) == 0)) ? AlignedBit : 0,
|
||||
FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1) ? LinearAccessBit : 0,
|
||||
Flags0 = ei_traits<XprType>::Flags & (HereditaryBits | MaskPacketAccessBit | DirectAccessBit),
|
||||
Flags0 = ei_traits<XprType>::Flags & (HereditaryBits | MaskPacketAccessBit | LvalueBit | DirectAccessBit | MaskAlignedBit),
|
||||
Flags1 = Flags0 | FlagsLinearAccessBit,
|
||||
Flags = (Flags1 & ~RowMajorBit) | (IsRowMajor ? RowMajorBit : 0)
|
||||
};
|
||||
};
|
||||
|
||||
template<typename XprType, int BlockRows, int BlockCols, bool HasDirectAccess> class Block
|
||||
: public ei_dense_xpr_base<Block<XprType, BlockRows, BlockCols, HasDirectAccess> >::type
|
||||
template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool HasDirectAccess> class Block
|
||||
: public ei_dense_xpr_base<Block<XprType, BlockRows, BlockCols, InnerPanel, HasDirectAccess> >::type
|
||||
{
|
||||
public:
|
||||
|
||||
@@ -228,9 +230,9 @@ template<typename XprType, int BlockRows, int BlockCols, bool HasDirectAccess> c
|
||||
};
|
||||
|
||||
/** \internal */
|
||||
template<typename XprType, int BlockRows, int BlockCols>
|
||||
class Block<XprType,BlockRows,BlockCols,true>
|
||||
: public MapBase<Block<XprType, BlockRows, BlockCols,true> >
|
||||
template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
|
||||
class Block<XprType,BlockRows,BlockCols, InnerPanel,true>
|
||||
: public MapBase<Block<XprType, BlockRows, BlockCols, InnerPanel, true> >
|
||||
{
|
||||
public:
|
||||
|
||||
@@ -321,546 +323,5 @@ class Block<XprType,BlockRows,BlockCols,true>
|
||||
int m_outerStride;
|
||||
};
|
||||
|
||||
/** \returns a dynamic-size expression of a block in *this.
|
||||
*
|
||||
* \param startRow the first row in the block
|
||||
* \param startCol the first column in the block
|
||||
* \param blockRows the number of rows in the block
|
||||
* \param blockCols the number of columns in the block
|
||||
*
|
||||
* Example: \include MatrixBase_block_int_int_int_int.cpp
|
||||
* Output: \verbinclude MatrixBase_block_int_int_int_int.out
|
||||
*
|
||||
* \note Even though the returned expression has dynamic size, in the case
|
||||
* when it is applied to a fixed-size matrix, it inherits a fixed maximal size,
|
||||
* which means that evaluating it does not cause a dynamic memory allocation.
|
||||
*
|
||||
* \sa class Block, block(Index,Index)
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline Block<Derived> DenseBase<Derived>
|
||||
::block(Index startRow, Index startCol, Index blockRows, Index blockCols)
|
||||
{
|
||||
return Block<Derived>(derived(), startRow, startCol, blockRows, blockCols);
|
||||
}
|
||||
|
||||
/** This is the const version of block(Index,Index,Index,Index). */
|
||||
template<typename Derived>
|
||||
inline const Block<Derived> DenseBase<Derived>
|
||||
::block(Index startRow, Index startCol, Index blockRows, Index blockCols) const
|
||||
{
|
||||
return Block<Derived>(derived(), startRow, startCol, blockRows, blockCols);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/** \returns a dynamic-size expression of a top-right corner of *this.
|
||||
*
|
||||
* \param cRows the number of rows in the corner
|
||||
* \param cCols the number of columns in the corner
|
||||
*
|
||||
* Example: \include MatrixBase_topRightCorner_int_int.cpp
|
||||
* Output: \verbinclude MatrixBase_topRightCorner_int_int.out
|
||||
*
|
||||
* \sa class Block, block(Index,Index,Index,Index)
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline Block<Derived> DenseBase<Derived>
|
||||
::topRightCorner(Index cRows, Index cCols)
|
||||
{
|
||||
return Block<Derived>(derived(), 0, cols() - cCols, cRows, cCols);
|
||||
}
|
||||
|
||||
/** This is the const version of topRightCorner(Index, Index).*/
|
||||
template<typename Derived>
|
||||
inline const Block<Derived>
|
||||
DenseBase<Derived>::topRightCorner(Index cRows, Index cCols) const
|
||||
{
|
||||
return Block<Derived>(derived(), 0, cols() - cCols, cRows, cCols);
|
||||
}
|
||||
|
||||
/** \returns an expression of a fixed-size top-right corner of *this.
|
||||
*
|
||||
* The template parameters CRows and CCols are the number of rows and columns in the corner.
|
||||
*
|
||||
* Example: \include MatrixBase_template_int_int_topRightCorner.cpp
|
||||
* Output: \verbinclude MatrixBase_template_int_int_topRightCorner.out
|
||||
*
|
||||
* \sa class Block, block(Index,Index,Index,Index)
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<int CRows, int CCols>
|
||||
inline Block<Derived, CRows, CCols>
|
||||
DenseBase<Derived>::topRightCorner()
|
||||
{
|
||||
return Block<Derived, CRows, CCols>(derived(), 0, cols() - CCols);
|
||||
}
|
||||
|
||||
/** This is the const version of topRightCorner<int, int>().*/
|
||||
template<typename Derived>
|
||||
template<int CRows, int CCols>
|
||||
inline const Block<Derived, CRows, CCols>
|
||||
DenseBase<Derived>::topRightCorner() const
|
||||
{
|
||||
return Block<Derived, CRows, CCols>(derived(), 0, cols() - CCols);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/** \returns a dynamic-size expression of a top-left corner of *this.
|
||||
*
|
||||
* \param cRows the number of rows in the corner
|
||||
* \param cCols the number of columns in the corner
|
||||
*
|
||||
* Example: \include MatrixBase_topLeftCorner_int_int.cpp
|
||||
* Output: \verbinclude MatrixBase_topLeftCorner_int_int.out
|
||||
*
|
||||
* \sa class Block, block(Index,Index,Index,Index)
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline Block<Derived> DenseBase<Derived>
|
||||
::topLeftCorner(Index cRows, Index cCols)
|
||||
{
|
||||
return Block<Derived>(derived(), 0, 0, cRows, cCols);
|
||||
}
|
||||
|
||||
/** This is the const version of topLeftCorner(Index, Index).*/
|
||||
template<typename Derived>
|
||||
inline const Block<Derived>
|
||||
DenseBase<Derived>::topLeftCorner(Index cRows, Index cCols) const
|
||||
{
|
||||
return Block<Derived>(derived(), 0, 0, cRows, cCols);
|
||||
}
|
||||
|
||||
/** \returns an expression of a fixed-size top-left corner of *this.
|
||||
*
|
||||
* The template parameters CRows and CCols are the number of rows and columns in the corner.
|
||||
*
|
||||
* Example: \include MatrixBase_template_int_int_topLeftCorner.cpp
|
||||
* Output: \verbinclude MatrixBase_template_int_int_topLeftCorner.out
|
||||
*
|
||||
* \sa class Block, block(Index,Index,Index,Index)
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<int CRows, int CCols>
|
||||
inline Block<Derived, CRows, CCols>
|
||||
DenseBase<Derived>::topLeftCorner()
|
||||
{
|
||||
return Block<Derived, CRows, CCols>(derived(), 0, 0);
|
||||
}
|
||||
|
||||
/** This is the const version of topLeftCorner<int, int>().*/
|
||||
template<typename Derived>
|
||||
template<int CRows, int CCols>
|
||||
inline const Block<Derived, CRows, CCols>
|
||||
DenseBase<Derived>::topLeftCorner() const
|
||||
{
|
||||
return Block<Derived, CRows, CCols>(derived(), 0, 0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/** \returns a dynamic-size expression of a bottom-right corner of *this.
|
||||
*
|
||||
* \param cRows the number of rows in the corner
|
||||
* \param cCols the number of columns in the corner
|
||||
*
|
||||
* Example: \include MatrixBase_bottomRightCorner_int_int.cpp
|
||||
* Output: \verbinclude MatrixBase_bottomRightCorner_int_int.out
|
||||
*
|
||||
* \sa class Block, block(Index,Index,Index,Index)
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline Block<Derived> DenseBase<Derived>
|
||||
::bottomRightCorner(Index cRows, Index cCols)
|
||||
{
|
||||
return Block<Derived>(derived(), rows() - cRows, cols() - cCols, cRows, cCols);
|
||||
}
|
||||
|
||||
/** This is the const version of bottomRightCorner(Index, Index).*/
|
||||
template<typename Derived>
|
||||
inline const Block<Derived>
|
||||
DenseBase<Derived>::bottomRightCorner(Index cRows, Index cCols) const
|
||||
{
|
||||
return Block<Derived>(derived(), rows() - cRows, cols() - cCols, cRows, cCols);
|
||||
}
|
||||
|
||||
/** \returns an expression of a fixed-size bottom-right corner of *this.
|
||||
*
|
||||
* The template parameters CRows and CCols are the number of rows and columns in the corner.
|
||||
*
|
||||
* Example: \include MatrixBase_template_int_int_bottomRightCorner.cpp
|
||||
* Output: \verbinclude MatrixBase_template_int_int_bottomRightCorner.out
|
||||
*
|
||||
* \sa class Block, block(Index,Index,Index,Index)
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<int CRows, int CCols>
|
||||
inline Block<Derived, CRows, CCols>
|
||||
DenseBase<Derived>::bottomRightCorner()
|
||||
{
|
||||
return Block<Derived, CRows, CCols>(derived(), rows() - CRows, cols() - CCols);
|
||||
}
|
||||
|
||||
/** This is the const version of bottomRightCorner<int, int>().*/
|
||||
template<typename Derived>
|
||||
template<int CRows, int CCols>
|
||||
inline const Block<Derived, CRows, CCols>
|
||||
DenseBase<Derived>::bottomRightCorner() const
|
||||
{
|
||||
return Block<Derived, CRows, CCols>(derived(), rows() - CRows, cols() - CCols);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/** \returns a dynamic-size expression of a bottom-left corner of *this.
|
||||
*
|
||||
* \param cRows the number of rows in the corner
|
||||
* \param cCols the number of columns in the corner
|
||||
*
|
||||
* Example: \include MatrixBase_bottomLeftCorner_int_int.cpp
|
||||
* Output: \verbinclude MatrixBase_bottomLeftCorner_int_int.out
|
||||
*
|
||||
* \sa class Block, block(Index,Index,Index,Index)
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline Block<Derived> DenseBase<Derived>
|
||||
::bottomLeftCorner(Index cRows, Index cCols)
|
||||
{
|
||||
return Block<Derived>(derived(), rows() - cRows, 0, cRows, cCols);
|
||||
}
|
||||
|
||||
/** This is the const version of bottomLeftCorner(Index, Index).*/
|
||||
template<typename Derived>
|
||||
inline const Block<Derived>
|
||||
DenseBase<Derived>::bottomLeftCorner(Index cRows, Index cCols) const
|
||||
{
|
||||
return Block<Derived>(derived(), rows() - cRows, 0, cRows, cCols);
|
||||
}
|
||||
|
||||
/** \returns an expression of a fixed-size bottom-left corner of *this.
|
||||
*
|
||||
* The template parameters CRows and CCols are the number of rows and columns in the corner.
|
||||
*
|
||||
* Example: \include MatrixBase_template_int_int_bottomLeftCorner.cpp
|
||||
* Output: \verbinclude MatrixBase_template_int_int_bottomLeftCorner.out
|
||||
*
|
||||
* \sa class Block, block(Index,Index,Index,Index)
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<int CRows, int CCols>
|
||||
inline Block<Derived, CRows, CCols>
|
||||
DenseBase<Derived>::bottomLeftCorner()
|
||||
{
|
||||
return Block<Derived, CRows, CCols>(derived(), rows() - CRows, 0);
|
||||
}
|
||||
|
||||
/** This is the const version of bottomLeftCorner<int, int>().*/
|
||||
template<typename Derived>
|
||||
template<int CRows, int CCols>
|
||||
inline const Block<Derived, CRows, CCols>
|
||||
DenseBase<Derived>::bottomLeftCorner() const
|
||||
{
|
||||
return Block<Derived, CRows, CCols>(derived(), rows() - CRows, 0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/** \returns a block consisting of the top rows of *this.
|
||||
*
|
||||
* \param n the number of rows in the block
|
||||
*
|
||||
* Example: \include MatrixBase_topRows_int.cpp
|
||||
* Output: \verbinclude MatrixBase_topRows_int.out
|
||||
*
|
||||
* \sa class Block, block(Index,Index,Index,Index)
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline typename DenseBase<Derived>::RowsBlockXpr DenseBase<Derived>
|
||||
::topRows(Index n)
|
||||
{
|
||||
return RowsBlockXpr(derived(), 0, 0, n, cols());
|
||||
}
|
||||
|
||||
/** This is the const version of topRows(Index).*/
|
||||
template<typename Derived>
|
||||
inline const typename DenseBase<Derived>::RowsBlockXpr
|
||||
DenseBase<Derived>::topRows(Index n) const
|
||||
{
|
||||
return RowsBlockXpr(derived(), 0, 0, n, cols());
|
||||
}
|
||||
|
||||
/** \returns a block consisting of the top rows of *this.
|
||||
*
|
||||
* \param N the number of rows in the block
|
||||
*
|
||||
* Example: \include MatrixBase_template_int_topRows.cpp
|
||||
* Output: \verbinclude MatrixBase_template_int_topRows.out
|
||||
*
|
||||
* \sa class Block, block(Index,Index,Index,Index)
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<int N>
|
||||
inline typename DenseBase<Derived>::template NRowsBlockXpr<N>::Type
|
||||
DenseBase<Derived>::topRows()
|
||||
{
|
||||
return typename DenseBase<Derived>::template NRowsBlockXpr<N>::Type(derived(), 0, 0, N, cols());
|
||||
}
|
||||
|
||||
/** This is the const version of topRows<int>().*/
|
||||
template<typename Derived>
|
||||
template<int N>
|
||||
inline const typename DenseBase<Derived>::template NRowsBlockXpr<N>::Type
|
||||
DenseBase<Derived>::topRows() const
|
||||
{
|
||||
return typename DenseBase<Derived>::template NRowsBlockXpr<N>::Type(derived(), 0, 0, N, cols());
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/** \returns a block consisting of the bottom rows of *this.
|
||||
*
|
||||
* \param n the number of rows in the block
|
||||
*
|
||||
* Example: \include MatrixBase_bottomRows_int.cpp
|
||||
* Output: \verbinclude MatrixBase_bottomRows_int.out
|
||||
*
|
||||
* \sa class Block, block(Index,Index,Index,Index)
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline typename DenseBase<Derived>::RowsBlockXpr DenseBase<Derived>
|
||||
::bottomRows(Index n)
|
||||
{
|
||||
return RowsBlockXpr(derived(), rows() - n, 0, n, cols());
|
||||
}
|
||||
|
||||
/** This is the const version of bottomRows(Index).*/
|
||||
template<typename Derived>
|
||||
inline const typename DenseBase<Derived>::RowsBlockXpr
|
||||
DenseBase<Derived>::bottomRows(Index n) const
|
||||
{
|
||||
return RowsBlockXpr(derived(), rows() - n, 0, n, cols());
|
||||
}
|
||||
|
||||
/** \returns a block consisting of the bottom rows of *this.
|
||||
*
|
||||
* \param N the number of rows in the block
|
||||
*
|
||||
* Example: \include MatrixBase_template_int_bottomRows.cpp
|
||||
* Output: \verbinclude MatrixBase_template_int_bottomRows.out
|
||||
*
|
||||
* \sa class Block, block(Index,Index,Index,Index)
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<int N>
|
||||
inline typename DenseBase<Derived>::template NRowsBlockXpr<N>::Type
|
||||
DenseBase<Derived>::bottomRows()
|
||||
{
|
||||
return typename NRowsBlockXpr<N>::Type(derived(), rows() - N, 0, N, cols());
|
||||
}
|
||||
|
||||
/** This is the const version of bottomRows<int>().*/
|
||||
template<typename Derived>
|
||||
template<int N>
|
||||
inline const typename DenseBase<Derived>::template NRowsBlockXpr<N>::Type
|
||||
DenseBase<Derived>::bottomRows() const
|
||||
{
|
||||
return typename NRowsBlockXpr<N>::Type(derived(), rows() - N, 0, N, cols());
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/** \returns a block consisting of the top columns of *this.
|
||||
*
|
||||
* \param n the number of columns in the block
|
||||
*
|
||||
* Example: \include MatrixBase_leftCols_int.cpp
|
||||
* Output: \verbinclude MatrixBase_leftCols_int.out
|
||||
*
|
||||
* \sa class Block, block(Index,Index,Index,Index)
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline typename DenseBase<Derived>::ColsBlockXpr DenseBase<Derived>
|
||||
::leftCols(Index n)
|
||||
{
|
||||
return ColsBlockXpr(derived(), 0, 0, rows(), n);
|
||||
}
|
||||
|
||||
/** This is the const version of leftCols(Index).*/
|
||||
template<typename Derived>
|
||||
inline const typename DenseBase<Derived>::ColsBlockXpr
|
||||
DenseBase<Derived>::leftCols(Index n) const
|
||||
{
|
||||
return ColsBlockXpr(derived(), 0, 0, rows(), n);
|
||||
}
|
||||
|
||||
/** \returns a block consisting of the top columns of *this.
|
||||
*
|
||||
* \param N the number of columns in the block
|
||||
*
|
||||
* Example: \include MatrixBase_template_int_leftCols.cpp
|
||||
* Output: \verbinclude MatrixBase_template_int_leftCols.out
|
||||
*
|
||||
* \sa class Block, block(Index,Index,Index,Index)
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<int N>
|
||||
inline typename DenseBase<Derived>::template NColsBlockXpr<N>::Type
|
||||
DenseBase<Derived>::leftCols()
|
||||
{
|
||||
return typename NColsBlockXpr<N>::Type(derived(), 0, 0, rows(), N);
|
||||
}
|
||||
|
||||
/** This is the const version of leftCols<int>().*/
|
||||
template<typename Derived>
|
||||
template<int N>
|
||||
inline const typename DenseBase<Derived>::template NColsBlockXpr<N>::Type
|
||||
DenseBase<Derived>::leftCols() const
|
||||
{
|
||||
return typename NColsBlockXpr<N>::Type(derived(), 0, 0, rows(), N);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/** \returns a block consisting of the top columns of *this.
|
||||
*
|
||||
* \param n the number of columns in the block
|
||||
*
|
||||
* Example: \include MatrixBase_rightCols_int.cpp
|
||||
* Output: \verbinclude MatrixBase_rightCols_int.out
|
||||
*
|
||||
* \sa class Block, block(Index,Index,Index,Index)
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline typename DenseBase<Derived>::ColsBlockXpr DenseBase<Derived>
|
||||
::rightCols(Index n)
|
||||
{
|
||||
return ColsBlockXpr(derived(), 0, cols() - n, rows(), n);
|
||||
}
|
||||
|
||||
/** This is the const version of rightCols(Index).*/
|
||||
template<typename Derived>
|
||||
inline const typename DenseBase<Derived>::ColsBlockXpr
|
||||
DenseBase<Derived>::rightCols(Index n) const
|
||||
{
|
||||
return ColsBlockXpr(derived(), 0, cols() - n, rows(), n);
|
||||
}
|
||||
|
||||
/** \returns a block consisting of the top columns of *this.
|
||||
*
|
||||
* \param N the number of columns in the block
|
||||
*
|
||||
* Example: \include MatrixBase_template_int_rightCols.cpp
|
||||
* Output: \verbinclude MatrixBase_template_int_rightCols.out
|
||||
*
|
||||
* \sa class Block, block(Index,Index,Index,Index)
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<int N>
|
||||
inline typename DenseBase<Derived>::template NColsBlockXpr<N>::Type
|
||||
DenseBase<Derived>::rightCols()
|
||||
{
|
||||
return typename DenseBase<Derived>::template NColsBlockXpr<N>::Type(derived(), 0, cols() - N, rows(), N);
|
||||
}
|
||||
|
||||
/** This is the const version of rightCols<int>().*/
|
||||
template<typename Derived>
|
||||
template<int N>
|
||||
inline const typename DenseBase<Derived>::template NColsBlockXpr<N>::Type
|
||||
DenseBase<Derived>::rightCols() const
|
||||
{
|
||||
return typename DenseBase<Derived>::template NColsBlockXpr<N>::Type(derived(), 0, cols() - N, rows(), N);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/** \returns a fixed-size expression of a block in *this.
|
||||
*
|
||||
* The template parameters \a BlockRows and \a BlockCols are the number of
|
||||
* rows and columns in the block.
|
||||
*
|
||||
* \param startRow the first row in the block
|
||||
* \param startCol the first column in the block
|
||||
*
|
||||
* Example: \include MatrixBase_block_int_int.cpp
|
||||
* Output: \verbinclude MatrixBase_block_int_int.out
|
||||
*
|
||||
* \note since block is a templated member, the keyword template has to be used
|
||||
* if the matrix type is also a template parameter: \code m.template block<3,3>(1,1); \endcode
|
||||
*
|
||||
* \sa class Block, block(Index,Index,Index,Index)
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<int BlockRows, int BlockCols>
|
||||
inline Block<Derived, BlockRows, BlockCols>
|
||||
DenseBase<Derived>::block(Index startRow, Index startCol)
|
||||
{
|
||||
return Block<Derived, BlockRows, BlockCols>(derived(), startRow, startCol);
|
||||
}
|
||||
|
||||
/** This is the const version of block<>(Index, Index). */
|
||||
template<typename Derived>
|
||||
template<int BlockRows, int BlockCols>
|
||||
inline const Block<Derived, BlockRows, BlockCols>
|
||||
DenseBase<Derived>::block(Index startRow, Index startCol) const
|
||||
{
|
||||
return Block<Derived, BlockRows, BlockCols>(derived(), startRow, startCol);
|
||||
}
|
||||
|
||||
/** \returns an expression of the \a i-th column of *this. Note that the numbering starts at 0.
|
||||
*
|
||||
* Example: \include MatrixBase_col.cpp
|
||||
* Output: \verbinclude MatrixBase_col.out
|
||||
*
|
||||
* \sa row(), class Block */
|
||||
template<typename Derived>
|
||||
inline typename DenseBase<Derived>::ColXpr
|
||||
DenseBase<Derived>::col(Index i)
|
||||
{
|
||||
return ColXpr(derived(), i);
|
||||
}
|
||||
|
||||
/** This is the const version of col(). */
|
||||
template<typename Derived>
|
||||
inline const typename DenseBase<Derived>::ColXpr
|
||||
DenseBase<Derived>::col(Index i) const
|
||||
{
|
||||
return ColXpr(derived(), i);
|
||||
}
|
||||
|
||||
/** \returns an expression of the \a i-th row of *this. Note that the numbering starts at 0.
|
||||
*
|
||||
* Example: \include MatrixBase_row.cpp
|
||||
* Output: \verbinclude MatrixBase_row.out
|
||||
*
|
||||
* \sa col(), class Block */
|
||||
template<typename Derived>
|
||||
inline typename DenseBase<Derived>::RowXpr
|
||||
DenseBase<Derived>::row(Index i)
|
||||
{
|
||||
return RowXpr(derived(), i);
|
||||
}
|
||||
|
||||
/** This is the const version of row(). */
|
||||
template<typename Derived>
|
||||
inline const typename DenseBase<Derived>::RowXpr
|
||||
DenseBase<Derived>::row(Index i) const
|
||||
{
|
||||
return RowXpr(derived(), i);
|
||||
}
|
||||
|
||||
#endif // EIGEN_BLOCK_H
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
#define EIGEN_COMMAINITIALIZER_H
|
||||
|
||||
/** \class CommaInitializer
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Helper class used by the comma initializer operator
|
||||
*
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
#define EIGEN_CWISE_BINARY_OP_H
|
||||
|
||||
/** \class CwiseBinaryOp
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Generic expression where a coefficient-wise binary operator is applied to two expressions
|
||||
*
|
||||
@@ -79,13 +80,14 @@ struct ei_traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
|
||||
RhsCoeffReadCost = _RhsNested::CoeffReadCost,
|
||||
LhsFlags = _LhsNested::Flags,
|
||||
RhsFlags = _RhsNested::Flags,
|
||||
SameType = ei_is_same_type<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::ret,
|
||||
StorageOrdersAgree = (int(Lhs::Flags)&RowMajorBit)==(int(Rhs::Flags)&RowMajorBit),
|
||||
Flags0 = (int(LhsFlags) | int(RhsFlags)) & (
|
||||
HereditaryBits
|
||||
| (int(LhsFlags) & int(RhsFlags) &
|
||||
( AlignedBit
|
||||
| (StorageOrdersAgree ? LinearAccessBit : 0)
|
||||
| (ei_functor_traits<BinaryOp>::PacketAccess && StorageOrdersAgree ? PacketAccessBit : 0)
|
||||
| (ei_functor_traits<BinaryOp>::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0)
|
||||
)
|
||||
)
|
||||
),
|
||||
@@ -94,6 +96,19 @@ struct ei_traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
|
||||
};
|
||||
};
|
||||
|
||||
// we require Lhs and Rhs to have the same scalar type. Currently there is no example of a binary functor
|
||||
// that would take two operands of different types. If there were such an example, then this check should be
|
||||
// moved to the BinaryOp functors, on a per-case basis. This would however require a change in the BinaryOp functors, as
|
||||
// currently they take only one typename Scalar template parameter.
|
||||
// It is tempting to always allow mixing different types but remember that this is often impossible in the vectorized paths.
|
||||
// So allowing mixing different types gives very unexpected errors when enabling vectorization, when the user tries to
|
||||
// add together a float matrix and a double matrix.
|
||||
#define EIGEN_CHECK_BINARY_COMPATIBILIY(BINOP,LHS,RHS) \
|
||||
EIGEN_STATIC_ASSERT((ei_functor_allows_mixing_real_and_complex<BINOP>::ret \
|
||||
? int(ei_is_same_type<typename NumTraits<LHS>::Real, typename NumTraits<RHS>::Real>::ret) \
|
||||
: int(ei_is_same_type<LHS, RHS>::ret)), \
|
||||
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
|
||||
|
||||
template<typename BinaryOp, typename Lhs, typename Rhs, typename StorageKind>
|
||||
class CwiseBinaryOpImpl;
|
||||
|
||||
@@ -120,17 +135,7 @@ class CwiseBinaryOp : ei_no_assignment_operator,
|
||||
EIGEN_STRONG_INLINE CwiseBinaryOp(const Lhs& lhs, const Rhs& rhs, const BinaryOp& func = BinaryOp())
|
||||
: m_lhs(lhs), m_rhs(rhs), m_functor(func)
|
||||
{
|
||||
// we require Lhs and Rhs to have the same scalar type. Currently there is no example of a binary functor
|
||||
// that would take two operands of different types. If there were such an example, then this check should be
|
||||
// moved to the BinaryOp functors, on a per-case basis. This would however require a change in the BinaryOp functors, as
|
||||
// currently they take only one typename Scalar template parameter.
|
||||
// It is tempting to always allow mixing different types but remember that this is often impossible in the vectorized paths.
|
||||
// So allowing mixing different types gives very unexpected errors when enabling vectorization, when the user tries to
|
||||
// add together a float matrix and a double matrix.
|
||||
EIGEN_STATIC_ASSERT((ei_functor_allows_mixing_real_and_complex<BinaryOp>::ret
|
||||
? int(ei_is_same_type<typename Lhs::RealScalar, typename Rhs::RealScalar>::ret)
|
||||
: int(ei_is_same_type<typename Lhs::Scalar, typename Rhs::Scalar>::ret)),
|
||||
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
|
||||
EIGEN_CHECK_BINARY_COMPATIBILIY(BinaryOp,typename Lhs::Scalar,typename Rhs::Scalar);
|
||||
// require the sizes to match
|
||||
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Lhs, Rhs)
|
||||
ei_assert(lhs.rows() == rhs.rows() && lhs.cols() == rhs.cols());
|
||||
@@ -210,8 +215,8 @@ template<typename OtherDerived>
|
||||
EIGEN_STRONG_INLINE Derived &
|
||||
MatrixBase<Derived>::operator-=(const MatrixBase<OtherDerived> &other)
|
||||
{
|
||||
SelfCwiseBinaryOp<ei_scalar_difference_op<Scalar>, Derived> tmp(derived());
|
||||
tmp = other;
|
||||
SelfCwiseBinaryOp<ei_scalar_difference_op<Scalar>, Derived, OtherDerived> tmp(derived());
|
||||
tmp = other.derived();
|
||||
return derived();
|
||||
}
|
||||
|
||||
@@ -224,7 +229,7 @@ template<typename OtherDerived>
|
||||
EIGEN_STRONG_INLINE Derived &
|
||||
MatrixBase<Derived>::operator+=(const MatrixBase<OtherDerived>& other)
|
||||
{
|
||||
SelfCwiseBinaryOp<ei_scalar_sum_op<Scalar>, Derived> tmp(derived());
|
||||
SelfCwiseBinaryOp<ei_scalar_sum_op<Scalar>, Derived, OtherDerived> tmp(derived());
|
||||
tmp = other.derived();
|
||||
return derived();
|
||||
}
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#define EIGEN_CWISE_NULLARY_OP_H
|
||||
|
||||
/** \class CwiseNullaryOp
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Generic expression of a matrix where all coefficients are defined by a functor
|
||||
*
|
||||
@@ -239,16 +240,29 @@ DenseBase<Derived>::Constant(const Scalar& value)
|
||||
* Example: \include DenseBase_LinSpaced_seq.cpp
|
||||
* Output: \verbinclude DenseBase_LinSpaced_seq.out
|
||||
*
|
||||
* \sa setLinSpaced(const Scalar&,const Scalar&,Index), LinSpaced(Scalar,Scalar,Index), CwiseNullaryOp
|
||||
* \sa setLinSpaced(Index,const Scalar&,const Scalar&), LinSpaced(Index,Scalar,Scalar), CwiseNullaryOp
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::SequentialLinSpacedReturnType
|
||||
DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& high, Index size)
|
||||
DenseBase<Derived>::LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
return DenseBase<Derived>::NullaryExpr(size, ei_linspaced_op<Scalar,false>(low,high,size));
|
||||
}
|
||||
|
||||
/**
|
||||
* \copydoc DenseBase::LinSpaced(Sequential_t, Index, const Scalar&, const Scalar&)
|
||||
* Special version for fixed size types which does not require the size parameter.
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::SequentialLinSpacedReturnType
|
||||
DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& high)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
|
||||
return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, ei_linspaced_op<Scalar,false>(low,high,Derived::SizeAtCompileTime));
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Sets a linearly space vector.
|
||||
*
|
||||
@@ -259,16 +273,29 @@ DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& hig
|
||||
* Example: \include DenseBase_LinSpaced.cpp
|
||||
* Output: \verbinclude DenseBase_LinSpaced.out
|
||||
*
|
||||
* \sa setLinSpaced(const Scalar&,const Scalar&,Index), LinSpaced(Sequential_t,const Scalar&,const Scalar&,Index), CwiseNullaryOp
|
||||
* \sa setLinSpaced(Index,const Scalar&,const Scalar&), LinSpaced(Sequential_t,Index,const Scalar&,const Scalar&,Index), CwiseNullaryOp
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
|
||||
DenseBase<Derived>::LinSpaced(const Scalar& low, const Scalar& high, Index size)
|
||||
DenseBase<Derived>::LinSpaced(Index size, const Scalar& low, const Scalar& high)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
return DenseBase<Derived>::NullaryExpr(size, ei_linspaced_op<Scalar,true>(low,high,size));
|
||||
}
|
||||
|
||||
/**
|
||||
* \copydoc DenseBase::LinSpaced(Index, const Scalar&, const Scalar&)
|
||||
* Special version for fixed size types which does not require the size parameter.
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
|
||||
DenseBase<Derived>::LinSpaced(const Scalar& low, const Scalar& high)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
|
||||
return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, ei_linspaced_op<Scalar,true>(low,high,Derived::SizeAtCompileTime));
|
||||
}
|
||||
|
||||
/** \returns true if all coefficients in this matrix are approximately equal to \a value, to within precision \a prec */
|
||||
template<typename Derived>
|
||||
bool DenseBase<Derived>::isApproxToConstant
|
||||
@@ -332,6 +359,7 @@ DenseStorageBase<Derived>::setConstant(Index size, const Scalar& value)
|
||||
*
|
||||
* \param rows the new number of rows
|
||||
* \param cols the new number of columns
|
||||
* \param value the value to which all coefficients are set
|
||||
*
|
||||
* Example: \include Matrix_setConstant_int_int.cpp
|
||||
* Output: \verbinclude Matrix_setConstant_int_int.out
|
||||
@@ -359,7 +387,7 @@ DenseStorageBase<Derived>::setConstant(Index rows, Index cols, const Scalar& val
|
||||
* \sa CwiseNullaryOp
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(const Scalar& low, const Scalar& high, Index size)
|
||||
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index size, const Scalar& low, const Scalar& high)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
return derived() = Derived::NullaryExpr(size, ei_linspaced_op<Scalar,false>(low,high,size));
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
#define EIGEN_CWISE_UNARY_OP_H
|
||||
|
||||
/** \class CwiseUnaryOp
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Generic expression where a coefficient-wise unary operator is applied to an expression
|
||||
*
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#define EIGEN_CWISE_UNARY_VIEW_H
|
||||
|
||||
/** \class CwiseUnaryView
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Generic lvalue expression of a coefficient-wise unary operator of a matrix or a vector
|
||||
*
|
||||
@@ -47,7 +48,7 @@ struct ei_traits<CwiseUnaryView<ViewOp, MatrixType> >
|
||||
typedef typename MatrixType::Nested MatrixTypeNested;
|
||||
typedef typename ei_cleantype<MatrixTypeNested>::type _MatrixTypeNested;
|
||||
enum {
|
||||
Flags = (ei_traits<_MatrixTypeNested>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit)),
|
||||
Flags = (ei_traits<_MatrixTypeNested>::Flags & (HereditaryBits | LvalueBit | LinearAccessBit | DirectAccessBit)),
|
||||
CoeffReadCost = ei_traits<_MatrixTypeNested>::CoeffReadCost + ei_functor_traits<ViewOp>::Cost,
|
||||
MatrixTypeInnerStride = ei_inner_stride_at_compile_time<MatrixType>::ret,
|
||||
// need to cast the sizeof's from size_t to int explicitly, otherwise:
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
#define EIGEN_DENSEBASE_H
|
||||
|
||||
/** \class DenseBase
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Base class for all dense matrices, vectors, and arrays
|
||||
*
|
||||
@@ -34,6 +35,8 @@
|
||||
* and related expression types). The common Eigen API for dense objects is contained in this class.
|
||||
*
|
||||
* \param Derived is the derived type, e.g., a matrix type or an expression.
|
||||
*
|
||||
* \sa \ref TopicClassHierarchy
|
||||
*/
|
||||
template<typename Derived> class DenseBase
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
@@ -44,14 +47,13 @@ template<typename Derived> class DenseBase
|
||||
#endif // not EIGEN_PARSED_BY_DOXYGEN
|
||||
{
|
||||
public:
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
using ei_special_scalar_op_base<Derived,typename ei_traits<Derived>::Scalar,
|
||||
typename NumTraits<typename ei_traits<Derived>::Scalar>::Real>::operator*;
|
||||
|
||||
class InnerIterator;
|
||||
|
||||
typedef typename ei_traits<Derived>::StorageKind StorageKind;
|
||||
typedef typename ei_traits<Derived>::Index Index;
|
||||
typedef typename ei_traits<Derived>::Index Index; /**< The type of indices */
|
||||
typedef typename ei_traits<Derived>::Scalar Scalar;
|
||||
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
@@ -87,9 +89,7 @@ template<typename Derived> class DenseBase
|
||||
using Base::outerStride;
|
||||
using Base::rowStride;
|
||||
using Base::colStride;
|
||||
using typename Base::CoeffReturnType;
|
||||
|
||||
#endif // not EIGEN_PARSED_BY_DOXYGEN
|
||||
typedef typename Base::CoeffReturnType CoeffReturnType;
|
||||
|
||||
enum {
|
||||
|
||||
@@ -234,19 +234,6 @@ template<typename Derived> class DenseBase
|
||||
typedef CwiseNullaryOp<ei_linspaced_op<Scalar,true>,Derived> RandomAccessLinSpacedReturnType;
|
||||
/** \internal the return type of MatrixBase::eigenvalues() */
|
||||
typedef Matrix<typename NumTraits<typename ei_traits<Derived>::Scalar>::Real, ei_traits<Derived>::ColsAtCompileTime, 1> EigenvaluesReturnType;
|
||||
/** \internal expression type of a column */
|
||||
typedef Block<Derived, ei_traits<Derived>::RowsAtCompileTime, 1> ColXpr;
|
||||
/** \internal expression type of a row */
|
||||
typedef Block<Derived, 1, ei_traits<Derived>::ColsAtCompileTime> RowXpr;
|
||||
/** \internal expression type of a block of whole columns */
|
||||
typedef Block<Derived, ei_traits<Derived>::RowsAtCompileTime, Dynamic> ColsBlockXpr;
|
||||
/** \internal expression type of a block of whole rows */
|
||||
typedef Block<Derived, Dynamic, ei_traits<Derived>::ColsAtCompileTime> RowsBlockXpr;
|
||||
/** \internal expression type of a block of whole columns */
|
||||
template<int N> struct NColsBlockXpr { typedef Block<Derived, ei_traits<Derived>::RowsAtCompileTime, N> Type; };
|
||||
/** \internal expression type of a block of whole rows */
|
||||
template<int N> struct NRowsBlockXpr { typedef Block<Derived, N, ei_traits<Derived>::ColsAtCompileTime> Type; };
|
||||
|
||||
|
||||
#endif // not EIGEN_PARSED_BY_DOXYGEN
|
||||
|
||||
@@ -295,15 +282,6 @@ template<typename Derived> class DenseBase
|
||||
public:
|
||||
#endif
|
||||
|
||||
RowXpr row(Index i);
|
||||
const RowXpr row(Index i) const;
|
||||
|
||||
ColXpr col(Index i);
|
||||
const ColXpr col(Index i) const;
|
||||
|
||||
Block<Derived> block(Index startRow, Index startCol, Index blockRows, Index blockCols);
|
||||
const Block<Derived> block(Index startRow, Index startCol, Index blockRows, Index blockCols) const;
|
||||
|
||||
VectorBlock<Derived> segment(Index start, Index size);
|
||||
const VectorBlock<Derived> segment(Index start, Index size) const;
|
||||
|
||||
@@ -313,47 +291,6 @@ template<typename Derived> class DenseBase
|
||||
VectorBlock<Derived> tail(Index size);
|
||||
const VectorBlock<Derived> tail(Index size) const;
|
||||
|
||||
Block<Derived> topLeftCorner(Index cRows, Index cCols);
|
||||
const Block<Derived> topLeftCorner(Index cRows, Index cCols) const;
|
||||
Block<Derived> topRightCorner(Index cRows, Index cCols);
|
||||
const Block<Derived> topRightCorner(Index cRows, Index cCols) const;
|
||||
Block<Derived> bottomLeftCorner(Index cRows, Index cCols);
|
||||
const Block<Derived> bottomLeftCorner(Index cRows, Index cCols) const;
|
||||
Block<Derived> bottomRightCorner(Index cRows, Index cCols);
|
||||
const Block<Derived> bottomRightCorner(Index cRows, Index cCols) const;
|
||||
|
||||
RowsBlockXpr topRows(Index n);
|
||||
const RowsBlockXpr topRows(Index n) const;
|
||||
RowsBlockXpr bottomRows(Index n);
|
||||
const RowsBlockXpr bottomRows(Index n) const;
|
||||
ColsBlockXpr leftCols(Index n);
|
||||
const ColsBlockXpr leftCols(Index n) const;
|
||||
ColsBlockXpr rightCols(Index n);
|
||||
const ColsBlockXpr rightCols(Index n) const;
|
||||
|
||||
template<int CRows, int CCols> Block<Derived, CRows, CCols> topLeftCorner();
|
||||
template<int CRows, int CCols> const Block<Derived, CRows, CCols> topLeftCorner() const;
|
||||
template<int CRows, int CCols> Block<Derived, CRows, CCols> topRightCorner();
|
||||
template<int CRows, int CCols> const Block<Derived, CRows, CCols> topRightCorner() const;
|
||||
template<int CRows, int CCols> Block<Derived, CRows, CCols> bottomLeftCorner();
|
||||
template<int CRows, int CCols> const Block<Derived, CRows, CCols> bottomLeftCorner() const;
|
||||
template<int CRows, int CCols> Block<Derived, CRows, CCols> bottomRightCorner();
|
||||
template<int CRows, int CCols> const Block<Derived, CRows, CCols> bottomRightCorner() const;
|
||||
|
||||
template<int NRows> typename NRowsBlockXpr<NRows>::Type topRows();
|
||||
template<int NRows> const typename NRowsBlockXpr<NRows>::Type topRows() const;
|
||||
template<int NRows> typename NRowsBlockXpr<NRows>::Type bottomRows();
|
||||
template<int NRows> const typename NRowsBlockXpr<NRows>::Type bottomRows() const;
|
||||
template<int NCols> typename NColsBlockXpr<NCols>::Type leftCols();
|
||||
template<int NCols> const typename NColsBlockXpr<NCols>::Type leftCols() const;
|
||||
template<int NCols> typename NColsBlockXpr<NCols>::Type rightCols();
|
||||
template<int NCols> const typename NColsBlockXpr<NCols>::Type rightCols() const;
|
||||
|
||||
template<int BlockRows, int BlockCols>
|
||||
Block<Derived, BlockRows, BlockCols> block(Index startRow, Index startCol);
|
||||
template<int BlockRows, int BlockCols>
|
||||
const Block<Derived, BlockRows, BlockCols> block(Index startRow, Index startCol) const;
|
||||
|
||||
template<int Size> VectorBlock<Derived,Size> head(void);
|
||||
template<int Size> const VectorBlock<Derived,Size> head() const;
|
||||
|
||||
@@ -389,9 +326,13 @@ template<typename Derived> class DenseBase
|
||||
Constant(const Scalar& value);
|
||||
|
||||
static const SequentialLinSpacedReturnType
|
||||
LinSpaced(Sequential_t, const Scalar& low, const Scalar& high, Index size);
|
||||
LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high);
|
||||
static const RandomAccessLinSpacedReturnType
|
||||
LinSpaced(const Scalar& low, const Scalar& high, Index size);
|
||||
LinSpaced(Index size, const Scalar& low, const Scalar& high);
|
||||
static const SequentialLinSpacedReturnType
|
||||
LinSpaced(Sequential_t, const Scalar& low, const Scalar& high);
|
||||
static const RandomAccessLinSpacedReturnType
|
||||
LinSpaced(const Scalar& low, const Scalar& high);
|
||||
|
||||
template<typename CustomNullaryOp>
|
||||
static const CwiseNullaryOp<CustomNullaryOp, Derived>
|
||||
@@ -412,7 +353,8 @@ template<typename Derived> class DenseBase
|
||||
|
||||
void fill(const Scalar& value);
|
||||
Derived& setConstant(const Scalar& value);
|
||||
Derived& setLinSpaced(const Scalar& low, const Scalar& high, Index size);
|
||||
Derived& setLinSpaced(Index size, const Scalar& low, const Scalar& high);
|
||||
Derived& setLinSpaced(const Scalar& low, const Scalar& high);
|
||||
Derived& setZero();
|
||||
Derived& setOnes();
|
||||
Derived& setRandom();
|
||||
@@ -518,6 +460,13 @@ template<typename Derived> class DenseBase
|
||||
const Eigen::Reverse<Derived, BothDirections> reverse() const;
|
||||
void reverseInPlace();
|
||||
|
||||
#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::DenseBase
|
||||
# include "../plugins/BlockMethods.h"
|
||||
# ifdef EIGEN_DENSEBASE_PLUGIN
|
||||
# include EIGEN_DENSEBASE_PLUGIN
|
||||
# endif
|
||||
#undef EIGEN_CURRENT_STORAGE_BASE_CLASS
|
||||
|
||||
#ifdef EIGEN2_SUPPORT
|
||||
|
||||
Block<Derived> corner(CornerType type, Index cRows, Index cCols);
|
||||
@@ -529,9 +478,6 @@ template<typename Derived> class DenseBase
|
||||
|
||||
#endif // EIGEN2_SUPPORT
|
||||
|
||||
#ifdef EIGEN_DENSEBASE_PLUGIN
|
||||
#include EIGEN_DENSEBASE_PLUGIN
|
||||
#endif
|
||||
|
||||
// disable the use of evalTo for dense objects with a nice compilation error
|
||||
template<typename Dest> inline void evalTo(Dest& ) const
|
||||
|
||||
@@ -25,15 +25,26 @@
|
||||
#ifndef EIGEN_DENSECOEFFSBASE_H
|
||||
#define EIGEN_DENSECOEFFSBASE_H
|
||||
|
||||
template<typename Derived, bool EnableDirectAccessAPI>
|
||||
class DenseCoeffsBase : public EigenBase<Derived>
|
||||
/** \brief Base class providing read-only coefficient access to matrices and arrays.
|
||||
* \ingroup Core_Module
|
||||
* \tparam Derived Type of the derived class
|
||||
* \tparam ReadOnlyAccessors Constant indicating read-only access
|
||||
*
|
||||
* This class defines the \c operator() \c const function and friends, which can be used to read specific
|
||||
* entries of a matrix or array.
|
||||
*
|
||||
* \sa DenseCoeffsBase<Derived, WriteAccessors>, DenseCoeffsBase<Derived, DirectAccessors>,
|
||||
* \ref TopicClassHierarchy
|
||||
*/
|
||||
template<typename Derived>
|
||||
class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
|
||||
{
|
||||
public:
|
||||
typedef typename ei_traits<Derived>::StorageKind StorageKind;
|
||||
typedef typename ei_traits<Derived>::Index Index;
|
||||
typedef typename ei_traits<Derived>::Scalar Scalar;
|
||||
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
|
||||
typedef typename ei_meta_if<ei_has_direct_access<Derived>::ret,
|
||||
typedef typename ei_meta_if<bool(ei_traits<Derived>::Flags&LvalueBit),
|
||||
const Scalar&,
|
||||
typename ei_meta_if<ei_is_arithmetic<Scalar>::ret, Scalar, const Scalar>::ret
|
||||
>::ret CoeffReturnType;
|
||||
@@ -238,12 +249,23 @@ class DenseCoeffsBase : public EigenBase<Derived>
|
||||
void colStride();
|
||||
};
|
||||
|
||||
/** \brief Base class providing read/write coefficient access to matrices and arrays.
|
||||
* \ingroup Core_Module
|
||||
* \tparam Derived Type of the derived class
|
||||
* \tparam WriteAccessors Constant indicating read/write access
|
||||
*
|
||||
* This class defines the non-const \c operator() function and friends, which can be used to write specific
|
||||
* entries of a matrix or array. This class inherits DenseCoeffsBase<Derived, ReadOnlyAccessors> which
|
||||
* defines the const variant for reading specific entries.
|
||||
*
|
||||
* \sa DenseCoeffsBase<Derived, DirectAccessors>, \ref TopicClassHierarchy
|
||||
*/
|
||||
template<typename Derived>
|
||||
class DenseCoeffsBase<Derived, true> : public DenseCoeffsBase<Derived, false>
|
||||
class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived, ReadOnlyAccessors>
|
||||
{
|
||||
public:
|
||||
|
||||
typedef DenseCoeffsBase<Derived, false> Base;
|
||||
typedef DenseCoeffsBase<Derived, ReadOnlyAccessors> Base;
|
||||
|
||||
typedef typename ei_traits<Derived>::StorageKind StorageKind;
|
||||
typedef typename ei_traits<Derived>::Index Index;
|
||||
@@ -512,6 +534,34 @@ class DenseCoeffsBase<Derived, true> : public DenseCoeffsBase<Derived, false>
|
||||
}
|
||||
#endif
|
||||
|
||||
};
|
||||
|
||||
/** \brief Base class providing direct coefficient access to matrices and arrays.
|
||||
* \ingroup Core_Module
|
||||
* \tparam Derived Type of the derived class
|
||||
* \tparam DirectAccessors Constant indicating direct access
|
||||
*
|
||||
* This class defines functions to work with strides which can be used to access entries directly. This class
|
||||
* inherits DenseCoeffsBase<Derived, WriteAccessors> which defines functions to access entries using
|
||||
* \c operator() .
|
||||
*
|
||||
* \sa \ref TopicClassHierarchy
|
||||
*/
|
||||
template<typename Derived>
|
||||
class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived, WriteAccessors>
|
||||
{
|
||||
public:
|
||||
|
||||
typedef DenseCoeffsBase<Derived, WriteAccessors> Base;
|
||||
typedef typename ei_traits<Derived>::Index Index;
|
||||
typedef typename ei_traits<Derived>::Scalar Scalar;
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
|
||||
using Base::rows;
|
||||
using Base::cols;
|
||||
using Base::size;
|
||||
using Base::derived;
|
||||
|
||||
/** \returns the pointer increment between two consecutive elements within a slice in the inner direction.
|
||||
*
|
||||
* \sa outerStride(), rowStride(), colStride()
|
||||
@@ -531,6 +581,7 @@ class DenseCoeffsBase<Derived, true> : public DenseCoeffsBase<Derived, false>
|
||||
return derived().outerStride();
|
||||
}
|
||||
|
||||
// FIXME shall we remove it ?
|
||||
inline Index stride() const
|
||||
{
|
||||
return Derived::IsVectorAtCompileTime ? innerStride() : outerStride();
|
||||
|
||||
@@ -36,8 +36,9 @@ template <typename Derived, typename OtherDerived = Derived, bool IsVector = sta
|
||||
template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers> struct ei_matrix_swap_impl;
|
||||
|
||||
/**
|
||||
* \brief Dense storage base class for matrices and arrays.
|
||||
**/
|
||||
* \brief %Dense storage base class for matrices and arrays.
|
||||
* \sa \ref TopicClassHierarchy
|
||||
*/
|
||||
template<typename Derived>
|
||||
class DenseStorageBase : public ei_dense_xpr_base<Derived>::type
|
||||
{
|
||||
@@ -108,7 +109,7 @@ class DenseStorageBase : public ei_dense_xpr_base<Derived>::type
|
||||
template<int LoadMode>
|
||||
EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const
|
||||
{
|
||||
return ei_ploadt<Scalar, LoadMode>
|
||||
return ei_ploadt<PacketScalar, LoadMode>
|
||||
(m_storage.data() + (Flags & RowMajorBit
|
||||
? col + row * m_storage.cols()
|
||||
: row + col * m_storage.rows()));
|
||||
@@ -117,7 +118,7 @@ class DenseStorageBase : public ei_dense_xpr_base<Derived>::type
|
||||
template<int LoadMode>
|
||||
EIGEN_STRONG_INLINE PacketScalar packet(Index index) const
|
||||
{
|
||||
return ei_ploadt<Scalar, LoadMode>(m_storage.data() + index);
|
||||
return ei_ploadt<PacketScalar, LoadMode>(m_storage.data() + index);
|
||||
}
|
||||
|
||||
template<int StoreMode>
|
||||
@@ -432,8 +433,9 @@ class DenseStorageBase : public ei_dense_xpr_base<Derived>::type
|
||||
ei_assert((this->size()==0 || (IsVectorAtCompileTime ? (this->size() == other.size())
|
||||
: (rows() == other.rows() && cols() == other.cols())))
|
||||
&& "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined");
|
||||
#endif
|
||||
#else
|
||||
resizeLike(other);
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -482,8 +484,8 @@ class DenseStorageBase : public ei_dense_xpr_base<Derived>::type
|
||||
template<typename T0, typename T1>
|
||||
EIGEN_STRONG_INLINE void _init2(Index rows, Index cols, typename ei_enable_if<Base::SizeAtCompileTime!=2,T0>::type* = 0)
|
||||
{
|
||||
ei_assert(rows > 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
|
||||
&& cols > 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols));
|
||||
ei_assert(rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
|
||||
&& cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols));
|
||||
m_storage.resize(rows*cols,rows,cols);
|
||||
EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
|
||||
}
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#define EIGEN_DIAGONAL_H
|
||||
|
||||
/** \class Diagonal
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of a diagonal/subdiagonal/superdiagonal in a matrix
|
||||
*
|
||||
@@ -61,7 +62,7 @@ struct ei_traits<Diagonal<MatrixType,DiagIndex> >
|
||||
MatrixType::MaxColsAtCompileTime)
|
||||
: (EIGEN_SIZE_MIN_PREFER_FIXED(MatrixType::MaxRowsAtCompileTime, MatrixType::MaxColsAtCompileTime) - AbsDiagIndex),
|
||||
MaxColsAtCompileTime = 1,
|
||||
Flags = (unsigned int)_MatrixTypeNested::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit) & ~RowMajorBit,
|
||||
Flags = (unsigned int)_MatrixTypeNested::Flags & (HereditaryBits | LinearAccessBit | LvalueBit | DirectAccessBit) & ~RowMajorBit,
|
||||
CoeffReadCost = _MatrixTypeNested::CoeffReadCost,
|
||||
MatrixTypeOuterStride = ei_outer_stride_at_compile_time<MatrixType>::ret,
|
||||
InnerStrideAtCompileTime = MatrixTypeOuterStride == Dynamic ? Dynamic : MatrixTypeOuterStride+1,
|
||||
@@ -125,6 +126,9 @@ template<typename MatrixType, int DiagIndex> class Diagonal
|
||||
EIGEN_STRONG_INLINE Index absDiagIndex() const { return m_index.value()>0 ? m_index.value() : -m_index.value(); }
|
||||
EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value()>0 ? 0 : -m_index.value(); }
|
||||
EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value()>0 ? m_index.value() : 0; }
|
||||
// triger a compile time error is someone try to call packet
|
||||
template<int LoadMode> typename MatrixType::PacketReturnType packet(Index) const;
|
||||
template<int LoadMode> typename MatrixType::PacketReturnType packet(Index,Index) const;
|
||||
};
|
||||
|
||||
/** \returns an expression of the main diagonal of the matrix \c *this
|
||||
|
||||
@@ -87,6 +87,7 @@ void DiagonalBase<Derived>::evalTo(MatrixBase<DenseDerived> &other) const
|
||||
#endif
|
||||
|
||||
/** \class DiagonalMatrix
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Represents a diagonal matrix with its storage
|
||||
*
|
||||
@@ -104,6 +105,9 @@ struct ei_traits<DiagonalMatrix<_Scalar,SizeAtCompileTime,MaxSizeAtCompileTime>
|
||||
typedef Matrix<_Scalar,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1> DiagonalVectorType;
|
||||
typedef Dense StorageKind;
|
||||
typedef DenseIndex Index;
|
||||
enum {
|
||||
Flags = LvalueBit
|
||||
};
|
||||
};
|
||||
|
||||
template<typename _Scalar, int SizeAtCompileTime, int MaxSizeAtCompileTime>
|
||||
@@ -170,7 +174,7 @@ class DiagonalMatrix
|
||||
*/
|
||||
DiagonalMatrix& operator=(const DiagonalMatrix& other)
|
||||
{
|
||||
m_diagonal = other.m_diagonal();
|
||||
m_diagonal = other.diagonal();
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
@@ -188,6 +192,7 @@ class DiagonalMatrix
|
||||
};
|
||||
|
||||
/** \class DiagonalWrapper
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of a diagonal matrix
|
||||
*
|
||||
@@ -211,7 +216,7 @@ struct ei_traits<DiagonalWrapper<_DiagonalVectorType> >
|
||||
ColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
|
||||
MaxRowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
|
||||
MaxColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
|
||||
Flags = 0
|
||||
Flags = ei_traits<DiagonalVectorType>::Flags & LvalueBit
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
@@ -36,8 +36,16 @@ struct ei_traits<DiagonalProduct<MatrixType, DiagonalType, ProductOrder> >
|
||||
ColsAtCompileTime = MatrixType::ColsAtCompileTime,
|
||||
MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
|
||||
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
|
||||
Flags = (HereditaryBits & (unsigned int)(MatrixType::Flags))
|
||||
| (PacketAccessBit & (unsigned int)(MatrixType::Flags) & (unsigned int)(DiagonalType::DiagonalVectorType::Flags)),
|
||||
|
||||
_StorageOrder = MatrixType::Flags & RowMajorBit ? RowMajor : ColMajor,
|
||||
_PacketOnDiag = !((int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheLeft)
|
||||
||(int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheRight)),
|
||||
_SameTypes = ei_is_same_type<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ret,
|
||||
// FIXME currently we need same types, but in the future the next rule should be the one
|
||||
//_Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagonalType::Flags)&PacketAccessBit))),
|
||||
_Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && _SameTypes && ((!_PacketOnDiag) || (bool(int(DiagonalType::Flags)&PacketAccessBit))),
|
||||
|
||||
Flags = (HereditaryBits & (unsigned int)(MatrixType::Flags)) | (_Vectorizable ? PacketAccessBit : 0),
|
||||
CoeffReadCost = NumTraits<Scalar>::MulCost + MatrixType::CoeffReadCost + DiagonalType::DiagonalVectorType::CoeffReadCost
|
||||
};
|
||||
};
|
||||
@@ -69,26 +77,34 @@ class DiagonalProduct : ei_no_assignment_operator,
|
||||
EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const
|
||||
{
|
||||
enum {
|
||||
StorageOrder = Flags & RowMajorBit ? RowMajor : ColMajor,
|
||||
InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime,
|
||||
DiagonalVectorPacketLoadMode = (LoadMode == Aligned && ((InnerSize%16) == 0)) ? Aligned : Unaligned
|
||||
StorageOrder = Flags & RowMajorBit ? RowMajor : ColMajor
|
||||
};
|
||||
const Index indexInDiagonalVector = ProductOrder == OnTheLeft ? row : col;
|
||||
|
||||
if((int(StorageOrder) == RowMajor && int(ProductOrder) == OnTheLeft)
|
||||
||(int(StorageOrder) == ColMajor && int(ProductOrder) == OnTheRight))
|
||||
{
|
||||
return ei_pmul(m_matrix.template packet<LoadMode>(row, col),
|
||||
ei_pset1(m_diagonal.diagonal().coeff(indexInDiagonalVector)));
|
||||
}
|
||||
else
|
||||
{
|
||||
return ei_pmul(m_matrix.template packet<LoadMode>(row, col),
|
||||
m_diagonal.diagonal().template packet<DiagonalVectorPacketLoadMode>(indexInDiagonalVector));
|
||||
}
|
||||
return packet_impl<LoadMode>(row,col,indexInDiagonalVector,typename ei_meta_if<
|
||||
((int(StorageOrder) == RowMajor && int(ProductOrder) == OnTheLeft)
|
||||
||(int(StorageOrder) == ColMajor && int(ProductOrder) == OnTheRight)), ei_meta_true, ei_meta_false>::ret());
|
||||
}
|
||||
|
||||
protected:
|
||||
template<int LoadMode>
|
||||
EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, ei_meta_true) const
|
||||
{
|
||||
return ei_pmul(m_matrix.template packet<LoadMode>(row, col),
|
||||
ei_pset1<PacketScalar>(m_diagonal.diagonal().coeff(id)));
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, ei_meta_false) const
|
||||
{
|
||||
enum {
|
||||
InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime,
|
||||
DiagonalVectorPacketLoadMode = (LoadMode == Aligned && ((InnerSize%16) == 0)) ? Aligned : Unaligned
|
||||
};
|
||||
return ei_pmul(m_matrix.template packet<LoadMode>(row, col),
|
||||
m_diagonal.diagonal().template packet<DiagonalVectorPacketLoadMode>(id));
|
||||
}
|
||||
|
||||
const typename MatrixType::Nested m_matrix;
|
||||
const typename DiagonalType::Nested m_diagonal;
|
||||
};
|
||||
|
||||
@@ -41,7 +41,7 @@ struct ei_dot_nocheck
|
||||
{
|
||||
static inline typename ei_traits<T>::Scalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
|
||||
{
|
||||
return a.conjugate().cwiseProduct(b).sum();
|
||||
return a.template binaryExpr<ei_scalar_conj_product_op<typename ei_traits<T>::Scalar> >(b).sum();
|
||||
}
|
||||
};
|
||||
|
||||
@@ -50,7 +50,7 @@ struct ei_dot_nocheck<T, U, true>
|
||||
{
|
||||
static inline typename ei_traits<T>::Scalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
|
||||
{
|
||||
return a.adjoint().cwiseProduct(b).sum();
|
||||
return a.transpose().template binaryExpr<ei_scalar_conj_product_op<typename ei_traits<T>::Scalar> >(b).sum();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -34,6 +34,8 @@
|
||||
* Besides MatrixBase-derived classes, this also includes special matrix classes such as diagonal matrices, etc.
|
||||
*
|
||||
* Notice that this class is trivial, it is only used to disambiguate overloaded functions.
|
||||
*
|
||||
* \sa \ref TopicClassHierarchy
|
||||
*/
|
||||
template<typename Derived> struct EigenBase
|
||||
{
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#define EIGEN_FLAGGED_H
|
||||
|
||||
/** \class Flagged
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression with modified flags
|
||||
*
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#define EIGEN_FORCEALIGNEDACCESS_H
|
||||
|
||||
/** \class ForceAlignedAccess
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Enforce aligned packet loads and stores regardless of what is requested
|
||||
*
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// Eigen is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU Lesser General Public
|
||||
@@ -35,18 +35,18 @@
|
||||
template<typename Scalar> struct ei_scalar_sum_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_sum_op)
|
||||
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a + b; }
|
||||
template<typename PacketScalar>
|
||||
EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a, const PacketScalar& b) const
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
|
||||
{ return ei_padd(a,b); }
|
||||
template<typename PacketScalar>
|
||||
EIGEN_STRONG_INLINE const Scalar predux(const PacketScalar& a) const
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const
|
||||
{ return ei_predux(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct ei_functor_traits<ei_scalar_sum_op<Scalar> > {
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::AddCost,
|
||||
PacketAccess = ei_packet_traits<Scalar>::size>1
|
||||
PacketAccess = ei_packet_traits<Scalar>::HasAdd
|
||||
};
|
||||
};
|
||||
|
||||
@@ -55,21 +55,47 @@ struct ei_functor_traits<ei_scalar_sum_op<Scalar> > {
|
||||
*
|
||||
* \sa class CwiseBinaryOp, Cwise::operator*(), class VectorwiseOp, MatrixBase::redux()
|
||||
*/
|
||||
template<typename Scalar> struct ei_scalar_product_op {
|
||||
template<typename LhsScalar,typename RhsScalar> struct ei_scalar_product_op {
|
||||
enum {
|
||||
Vectorizable = ei_is_same_type<LhsScalar,RhsScalar>::ret && ei_packet_traits<LhsScalar>::HasMul && ei_packet_traits<RhsScalar>::HasMul
|
||||
};
|
||||
typedef typename ei_scalar_product_traits<LhsScalar,RhsScalar>::ReturnType result_type;
|
||||
EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_product_op)
|
||||
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a * b; }
|
||||
template<typename PacketScalar>
|
||||
EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a, const PacketScalar& b) const
|
||||
EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a * b; }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
|
||||
{ return ei_pmul(a,b); }
|
||||
template<typename PacketScalar>
|
||||
EIGEN_STRONG_INLINE const Scalar predux(const PacketScalar& a) const
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const
|
||||
{ return ei_predux_mul(a); }
|
||||
};
|
||||
template<typename LhsScalar,typename RhsScalar>
|
||||
struct ei_functor_traits<ei_scalar_product_op<LhsScalar,RhsScalar> > {
|
||||
enum {
|
||||
Cost = (NumTraits<LhsScalar>::MulCost + NumTraits<RhsScalar>::MulCost)/2, // rough estimate!
|
||||
PacketAccess = ei_scalar_product_op<LhsScalar,RhsScalar>::Vectorizable
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the conjugate product of two scalars
|
||||
*
|
||||
* This is a short cut for ei_conj(x) * y which is needed for optimization purpose
|
||||
*/
|
||||
template<typename Scalar> struct ei_scalar_conj_product_op {
|
||||
enum { Conj = NumTraits<Scalar>::IsComplex };
|
||||
EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_conj_product_op)
|
||||
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const
|
||||
{ return ei_conj_helper<Scalar,Scalar,Conj,false>().pmul(a,b); }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
|
||||
{ return ei_conj_helper<Packet,Packet,Conj,false>().pmul(a,b); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct ei_functor_traits<ei_scalar_product_op<Scalar> > {
|
||||
struct ei_functor_traits<ei_scalar_conj_product_op<Scalar> > {
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::MulCost,
|
||||
PacketAccess = ei_packet_traits<Scalar>::size>1
|
||||
PacketAccess = ei_packet_traits<Scalar>::HasMul
|
||||
};
|
||||
};
|
||||
|
||||
@@ -81,18 +107,18 @@ struct ei_functor_traits<ei_scalar_product_op<Scalar> > {
|
||||
template<typename Scalar> struct ei_scalar_min_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_min_op)
|
||||
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return std::min(a, b); }
|
||||
template<typename PacketScalar>
|
||||
EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a, const PacketScalar& b) const
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
|
||||
{ return ei_pmin(a,b); }
|
||||
template<typename PacketScalar>
|
||||
EIGEN_STRONG_INLINE const Scalar predux(const PacketScalar& a) const
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const
|
||||
{ return ei_predux_min(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct ei_functor_traits<ei_scalar_min_op<Scalar> > {
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::AddCost,
|
||||
PacketAccess = ei_packet_traits<Scalar>::size>1
|
||||
PacketAccess = ei_packet_traits<Scalar>::HasMin
|
||||
};
|
||||
};
|
||||
|
||||
@@ -104,18 +130,18 @@ struct ei_functor_traits<ei_scalar_min_op<Scalar> > {
|
||||
template<typename Scalar> struct ei_scalar_max_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_max_op)
|
||||
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return std::max(a, b); }
|
||||
template<typename PacketScalar>
|
||||
EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a, const PacketScalar& b) const
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
|
||||
{ return ei_pmax(a,b); }
|
||||
template<typename PacketScalar>
|
||||
EIGEN_STRONG_INLINE const Scalar predux(const PacketScalar& a) const
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const
|
||||
{ return ei_predux_max(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct ei_functor_traits<ei_scalar_max_op<Scalar> > {
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::AddCost,
|
||||
PacketAccess = ei_packet_traits<Scalar>::size>1
|
||||
PacketAccess = ei_packet_traits<Scalar>::HasMax
|
||||
};
|
||||
};
|
||||
|
||||
@@ -150,15 +176,15 @@ struct ei_functor_traits<ei_scalar_hypot_op<Scalar> > {
|
||||
template<typename Scalar> struct ei_scalar_difference_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_difference_op)
|
||||
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a - b; }
|
||||
template<typename PacketScalar>
|
||||
EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a, const PacketScalar& b) const
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
|
||||
{ return ei_psub(a,b); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct ei_functor_traits<ei_scalar_difference_op<Scalar> > {
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::AddCost,
|
||||
PacketAccess = ei_packet_traits<Scalar>::size>1
|
||||
PacketAccess = ei_packet_traits<Scalar>::HasSub
|
||||
};
|
||||
};
|
||||
|
||||
@@ -170,18 +196,15 @@ struct ei_functor_traits<ei_scalar_difference_op<Scalar> > {
|
||||
template<typename Scalar> struct ei_scalar_quotient_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_quotient_op)
|
||||
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a / b; }
|
||||
template<typename PacketScalar>
|
||||
EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a, const PacketScalar& b) const
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
|
||||
{ return ei_pdiv(a,b); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct ei_functor_traits<ei_scalar_quotient_op<Scalar> > {
|
||||
enum {
|
||||
Cost = 2 * NumTraits<Scalar>::MulCost,
|
||||
PacketAccess = ei_packet_traits<Scalar>::size>1
|
||||
#if (defined EIGEN_VECTORIZE)
|
||||
&& !NumTraits<Scalar>::IsInteger
|
||||
#endif
|
||||
PacketAccess = ei_packet_traits<Scalar>::HasDiv
|
||||
};
|
||||
};
|
||||
|
||||
@@ -195,15 +218,15 @@ struct ei_functor_traits<ei_scalar_quotient_op<Scalar> > {
|
||||
template<typename Scalar> struct ei_scalar_opposite_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_opposite_op)
|
||||
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return -a; }
|
||||
template<typename PacketScalar>
|
||||
EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a) const
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
|
||||
{ return ei_pnegate(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct ei_functor_traits<ei_scalar_opposite_op<Scalar> >
|
||||
{ enum {
|
||||
Cost = NumTraits<Scalar>::AddCost,
|
||||
PacketAccess = int(ei_packet_traits<Scalar>::size)>1 };
|
||||
PacketAccess = ei_packet_traits<Scalar>::HasNegate };
|
||||
};
|
||||
|
||||
/** \internal
|
||||
@@ -215,8 +238,8 @@ template<typename Scalar> struct ei_scalar_abs_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_abs_op)
|
||||
typedef typename NumTraits<Scalar>::Real result_type;
|
||||
EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return ei_abs(a); }
|
||||
template<typename PacketScalar>
|
||||
EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a) const
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
|
||||
{ return ei_pabs(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
@@ -224,7 +247,7 @@ struct ei_functor_traits<ei_scalar_abs_op<Scalar> >
|
||||
{
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::AddCost,
|
||||
PacketAccess = int(ei_packet_traits<Scalar>::size)>1
|
||||
PacketAccess = ei_packet_traits<Scalar>::HasAbs
|
||||
};
|
||||
};
|
||||
|
||||
@@ -237,13 +260,13 @@ template<typename Scalar> struct ei_scalar_abs2_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_abs2_op)
|
||||
typedef typename NumTraits<Scalar>::Real result_type;
|
||||
EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return ei_abs2(a); }
|
||||
template<typename PacketScalar>
|
||||
EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a) const
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
|
||||
{ return ei_pmul(a,a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct ei_functor_traits<ei_scalar_abs2_op<Scalar> >
|
||||
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = int(ei_packet_traits<Scalar>::size)>1 }; };
|
||||
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::HasAbs2 }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the conjugate of a complex value
|
||||
@@ -253,15 +276,15 @@ struct ei_functor_traits<ei_scalar_abs2_op<Scalar> >
|
||||
template<typename Scalar> struct ei_scalar_conjugate_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_conjugate_op)
|
||||
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return ei_conj(a); }
|
||||
template<typename PacketScalar>
|
||||
EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a) const { return a; }
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return ei_pconj(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct ei_functor_traits<ei_scalar_conjugate_op<Scalar> >
|
||||
{
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::IsComplex ? NumTraits<Scalar>::AddCost : 0,
|
||||
PacketAccess = int(ei_packet_traits<Scalar>::size)>1
|
||||
PacketAccess = ei_packet_traits<Scalar>::HasConj
|
||||
};
|
||||
};
|
||||
|
||||
@@ -378,27 +401,27 @@ struct ei_functor_traits<ei_scalar_log_op<Scalar> >
|
||||
* \sa class CwiseUnaryOp, MatrixBase::operator*, MatrixBase::operator/
|
||||
*/
|
||||
/* NOTE why doing the ei_pset1() in packetOp *is* an optimization ?
|
||||
* indeed it seems better to declare m_other as a PacketScalar and do the ei_pset1() once
|
||||
* indeed it seems better to declare m_other as a Packet and do the ei_pset1() once
|
||||
* in the constructor. However, in practice:
|
||||
* - GCC does not like m_other as a PacketScalar and generate a load every time it needs it
|
||||
* - GCC does not like m_other as a Packet and generate a load every time it needs it
|
||||
* - on the other hand GCC is able to moves the ei_pset1() away the loop :)
|
||||
* - simpler code ;)
|
||||
* (ICC and gcc 4.4 seems to perform well in both cases, the issue is visible with y = a*x + b*y)
|
||||
*/
|
||||
template<typename Scalar>
|
||||
struct ei_scalar_multiple_op {
|
||||
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
|
||||
typedef typename ei_packet_traits<Scalar>::type Packet;
|
||||
// FIXME default copy constructors seems bugged with std::complex<>
|
||||
EIGEN_STRONG_INLINE ei_scalar_multiple_op(const ei_scalar_multiple_op& other) : m_other(other.m_other) { }
|
||||
EIGEN_STRONG_INLINE ei_scalar_multiple_op(const Scalar& other) : m_other(other) { }
|
||||
EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a * m_other; }
|
||||
EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a) const
|
||||
{ return ei_pmul(a, ei_pset1(m_other)); }
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
|
||||
{ return ei_pmul(a, ei_pset1<Packet>(m_other)); }
|
||||
typename ei_makeconst<typename NumTraits<Scalar>::Nested>::type m_other;
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct ei_functor_traits<ei_scalar_multiple_op<Scalar> >
|
||||
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::size>1 }; };
|
||||
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::HasMul }; };
|
||||
|
||||
template<typename Scalar1, typename Scalar2>
|
||||
struct ei_scalar_multiple2_op {
|
||||
@@ -414,18 +437,18 @@ struct ei_functor_traits<ei_scalar_multiple2_op<Scalar1,Scalar2> >
|
||||
|
||||
template<typename Scalar, bool IsInteger>
|
||||
struct ei_scalar_quotient1_impl {
|
||||
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
|
||||
typedef typename ei_packet_traits<Scalar>::type Packet;
|
||||
// FIXME default copy constructors seems bugged with std::complex<>
|
||||
EIGEN_STRONG_INLINE ei_scalar_quotient1_impl(const ei_scalar_quotient1_impl& other) : m_other(other.m_other) { }
|
||||
EIGEN_STRONG_INLINE ei_scalar_quotient1_impl(const Scalar& other) : m_other(static_cast<Scalar>(1) / other) {}
|
||||
EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a * m_other; }
|
||||
EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a) const
|
||||
{ return ei_pmul(a, ei_pset1(m_other)); }
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
|
||||
{ return ei_pmul(a, ei_pset1<Packet>(m_other)); }
|
||||
const Scalar m_other;
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct ei_functor_traits<ei_scalar_quotient1_impl<Scalar,false> >
|
||||
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::size>1 }; };
|
||||
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::HasMul }; };
|
||||
|
||||
template<typename Scalar>
|
||||
struct ei_scalar_quotient1_impl<Scalar,true> {
|
||||
@@ -461,18 +484,19 @@ struct ei_functor_traits<ei_scalar_quotient1_op<Scalar> >
|
||||
|
||||
template<typename Scalar>
|
||||
struct ei_scalar_constant_op {
|
||||
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
|
||||
typedef typename ei_packet_traits<Scalar>::type Packet;
|
||||
EIGEN_STRONG_INLINE ei_scalar_constant_op(const ei_scalar_constant_op& other) : m_other(other.m_other) { }
|
||||
EIGEN_STRONG_INLINE ei_scalar_constant_op(const Scalar& other) : m_other(other) { }
|
||||
template<typename Index>
|
||||
EIGEN_STRONG_INLINE const Scalar operator() (Index, Index = 0) const { return m_other; }
|
||||
template<typename Index>
|
||||
EIGEN_STRONG_INLINE const PacketScalar packetOp(Index, Index = 0) const { return ei_pset1(m_other); }
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(Index, Index = 0) const { return ei_pset1<Packet>(m_other); }
|
||||
const Scalar m_other;
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct ei_functor_traits<ei_scalar_constant_op<Scalar> >
|
||||
{ enum { Cost = 1, PacketAccess = ei_packet_traits<Scalar>::size>1, IsRepeatable = true }; };
|
||||
// FIXME replace this packet test by a safe one
|
||||
{ enum { Cost = 1, PacketAccess = ei_packet_traits<Scalar>::Vectorizable, IsRepeatable = true }; };
|
||||
|
||||
template<typename Scalar> struct ei_scalar_identity_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_identity_op)
|
||||
@@ -493,22 +517,22 @@ template <typename Scalar, bool RandomAccess> struct ei_linspaced_op_impl;
|
||||
template <typename Scalar>
|
||||
struct ei_linspaced_op_impl<Scalar,false>
|
||||
{
|
||||
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
|
||||
typedef typename ei_packet_traits<Scalar>::type Packet;
|
||||
|
||||
ei_linspaced_op_impl(Scalar low, Scalar step) :
|
||||
m_low(low), m_step(step),
|
||||
m_packetStep(ei_pset1(ei_packet_traits<Scalar>::size*step)),
|
||||
m_base(ei_padd(ei_pset1(low),ei_pmul(ei_pset1(step),ei_plset<Scalar>(-ei_packet_traits<Scalar>::size)))) {}
|
||||
m_packetStep(ei_pset1<Packet>(ei_packet_traits<Scalar>::size*step)),
|
||||
m_base(ei_padd(ei_pset1<Packet>(low),ei_pmul(ei_pset1<Packet>(step),ei_plset<Scalar>(-ei_packet_traits<Scalar>::size)))) {}
|
||||
|
||||
template<typename Index>
|
||||
EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return m_low+i*m_step; }
|
||||
template<typename Index>
|
||||
EIGEN_STRONG_INLINE const PacketScalar packetOp(Index) const { return m_base = ei_padd(m_base,m_packetStep); }
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(Index) const { return m_base = ei_padd(m_base,m_packetStep); }
|
||||
|
||||
const Scalar m_low;
|
||||
const Scalar m_step;
|
||||
const PacketScalar m_packetStep;
|
||||
mutable PacketScalar m_base;
|
||||
const Packet m_packetStep;
|
||||
mutable Packet m_base;
|
||||
};
|
||||
|
||||
// random access for packet ops:
|
||||
@@ -517,23 +541,23 @@ struct ei_linspaced_op_impl<Scalar,false>
|
||||
template <typename Scalar>
|
||||
struct ei_linspaced_op_impl<Scalar,true>
|
||||
{
|
||||
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
|
||||
typedef typename ei_packet_traits<Scalar>::type Packet;
|
||||
|
||||
ei_linspaced_op_impl(Scalar low, Scalar step) :
|
||||
m_low(low), m_step(step),
|
||||
m_lowPacket(ei_pset1(m_low)), m_stepPacket(ei_pset1(m_step)), m_interPacket(ei_plset<Scalar>(0)) {}
|
||||
m_lowPacket(ei_pset1<Packet>(m_low)), m_stepPacket(ei_pset1<Packet>(m_step)), m_interPacket(ei_plset<Scalar>(0)) {}
|
||||
|
||||
template<typename Index>
|
||||
EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return m_low+i*m_step; }
|
||||
template<typename Index>
|
||||
EIGEN_STRONG_INLINE const PacketScalar packetOp(Index i) const
|
||||
{ return ei_padd(m_lowPacket, ei_pmul(m_stepPacket, ei_padd(ei_pset1<Scalar>(i),m_interPacket))); }
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(Index i) const
|
||||
{ return ei_padd(m_lowPacket, ei_pmul(m_stepPacket, ei_padd(ei_pset1<Packet>(i),m_interPacket))); }
|
||||
|
||||
const Scalar m_low;
|
||||
const Scalar m_step;
|
||||
const PacketScalar m_lowPacket;
|
||||
const PacketScalar m_stepPacket;
|
||||
const PacketScalar m_interPacket;
|
||||
const Packet m_lowPacket;
|
||||
const Packet m_stepPacket;
|
||||
const Packet m_interPacket;
|
||||
};
|
||||
|
||||
// ----- Linspace functor ----------------------------------------------------------------
|
||||
@@ -543,17 +567,17 @@ struct ei_linspaced_op_impl<Scalar,true>
|
||||
// nested expressions).
|
||||
template <typename Scalar, bool RandomAccess = true> struct ei_linspaced_op;
|
||||
template <typename Scalar, bool RandomAccess> struct ei_functor_traits< ei_linspaced_op<Scalar,RandomAccess> >
|
||||
{ enum { Cost = 1, PacketAccess = ei_packet_traits<Scalar>::size>1, IsRepeatable = true }; };
|
||||
{ enum { Cost = 1, PacketAccess = ei_packet_traits<Scalar>::HasSetLinear, IsRepeatable = true }; };
|
||||
template <typename Scalar, bool RandomAccess> struct ei_linspaced_op
|
||||
{
|
||||
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
|
||||
typedef typename ei_packet_traits<Scalar>::type Packet;
|
||||
ei_linspaced_op(Scalar low, Scalar high, int num_steps) : impl(low, (high-low)/(num_steps-1)) {}
|
||||
template<typename Index>
|
||||
EIGEN_STRONG_INLINE const Scalar operator() (Index i, Index = 0) const { return impl(i); }
|
||||
template<typename Index>
|
||||
EIGEN_STRONG_INLINE const PacketScalar packetOp(Index i, Index = 0) const { return impl.packetOp(i); }
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(Index i, Index = 0) const { return impl.packetOp(i); }
|
||||
// This proxy object handles the actual required temporaries, the different
|
||||
// implementations (random vs. sequential access) as well as the piping
|
||||
// implementations (random vs. sequential access) as well as the
|
||||
// correct piping to size 2/4 packet operations.
|
||||
const ei_linspaced_op_impl<Scalar,RandomAccess> impl;
|
||||
};
|
||||
@@ -561,13 +585,15 @@ template <typename Scalar, bool RandomAccess> struct ei_linspaced_op
|
||||
// all functors allow linear access, except ei_scalar_identity_op. So we fix here a quick meta
|
||||
// to indicate whether a functor allows linear access, just always answering 'yes' except for
|
||||
// ei_scalar_identity_op.
|
||||
// FIXME move this to ei_functor_traits adding a ei_functor_default
|
||||
template<typename Functor> struct ei_functor_has_linear_access { enum { ret = 1 }; };
|
||||
template<typename Scalar> struct ei_functor_has_linear_access<ei_scalar_identity_op<Scalar> > { enum { ret = 0 }; };
|
||||
|
||||
// in CwiseBinaryOp, we require the Lhs and Rhs to have the same scalar type, except for multiplication
|
||||
// where we only require them to have the same _real_ scalar type so one may multiply, say, float by complex<float>.
|
||||
// FIXME move this to ei_functor_traits adding a ei_functor_default
|
||||
template<typename Functor> struct ei_functor_allows_mixing_real_and_complex { enum { ret = 0 }; };
|
||||
template<typename Scalar> struct ei_functor_allows_mixing_real_and_complex<ei_scalar_product_op<Scalar> > { enum { ret = 1 }; };
|
||||
template<typename LhsScalar,typename RhsScalar> struct ei_functor_allows_mixing_real_and_complex<ei_scalar_product_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
|
||||
|
||||
|
||||
/** \internal
|
||||
@@ -577,18 +603,18 @@ template<typename Scalar> struct ei_functor_allows_mixing_real_and_complex<ei_sc
|
||||
/* If you wonder why doing the ei_pset1() in packetOp() is an optimization check ei_scalar_multiple_op */
|
||||
template<typename Scalar>
|
||||
struct ei_scalar_add_op {
|
||||
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
|
||||
typedef typename ei_packet_traits<Scalar>::type Packet;
|
||||
// FIXME default copy constructors seems bugged with std::complex<>
|
||||
inline ei_scalar_add_op(const ei_scalar_add_op& other) : m_other(other.m_other) { }
|
||||
inline ei_scalar_add_op(const Scalar& other) : m_other(other) { }
|
||||
inline Scalar operator() (const Scalar& a) const { return a + m_other; }
|
||||
inline const PacketScalar packetOp(const PacketScalar& a) const
|
||||
{ return ei_padd(a, ei_pset1(m_other)); }
|
||||
inline const Packet packetOp(const Packet& a) const
|
||||
{ return ei_padd(a, ei_pset1<Packet>(m_other)); }
|
||||
const Scalar m_other;
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct ei_functor_traits<ei_scalar_add_op<Scalar> >
|
||||
{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = ei_packet_traits<Scalar>::size>1 }; };
|
||||
{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = ei_packet_traits<Scalar>::HasAdd }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the square root of a scalar
|
||||
@@ -670,13 +696,13 @@ template<typename Scalar>
|
||||
struct ei_scalar_inverse_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_inverse_op)
|
||||
inline Scalar operator() (const Scalar& a) const { return Scalar(1)/a; }
|
||||
template<typename PacketScalar>
|
||||
inline const PacketScalar packetOp(const PacketScalar& a) const
|
||||
{ return ei_pdiv(ei_pset1(Scalar(1)),a); }
|
||||
template<typename Packet>
|
||||
inline const Packet packetOp(const Packet& a) const
|
||||
{ return ei_pdiv(ei_pset1<Packet>(Scalar(1)),a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct ei_functor_traits<ei_scalar_inverse_op<Scalar> >
|
||||
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = int(ei_packet_traits<Scalar>::size)>1 }; };
|
||||
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::HasDiv }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the square of a scalar
|
||||
@@ -686,13 +712,13 @@ template<typename Scalar>
|
||||
struct ei_scalar_square_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_square_op)
|
||||
inline Scalar operator() (const Scalar& a) const { return a*a; }
|
||||
template<typename PacketScalar>
|
||||
inline const PacketScalar packetOp(const PacketScalar& a) const
|
||||
template<typename Packet>
|
||||
inline const Packet packetOp(const Packet& a) const
|
||||
{ return ei_pmul(a,a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct ei_functor_traits<ei_scalar_square_op<Scalar> >
|
||||
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = int(ei_packet_traits<Scalar>::size)>1 }; };
|
||||
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::HasMul }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the cube of a scalar
|
||||
@@ -702,13 +728,13 @@ template<typename Scalar>
|
||||
struct ei_scalar_cube_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_cube_op)
|
||||
inline Scalar operator() (const Scalar& a) const { return a*a*a; }
|
||||
template<typename PacketScalar>
|
||||
inline const PacketScalar packetOp(const PacketScalar& a) const
|
||||
template<typename Packet>
|
||||
inline const Packet packetOp(const Packet& a) const
|
||||
{ return ei_pmul(a,ei_pmul(a,a)); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct ei_functor_traits<ei_scalar_cube_op<Scalar> >
|
||||
{ enum { Cost = 2*NumTraits<Scalar>::MulCost, PacketAccess = int(ei_packet_traits<Scalar>::size)>1 }; };
|
||||
{ enum { Cost = 2*NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::HasMul }; };
|
||||
|
||||
// default functor traits for STL functors:
|
||||
|
||||
|
||||
@@ -58,8 +58,11 @@ struct ei_default_packet_traits
|
||||
HasMul = 1,
|
||||
HasNegate = 1,
|
||||
HasAbs = 1,
|
||||
HasAbs2 = 1,
|
||||
HasMin = 1,
|
||||
HasMax = 1,
|
||||
HasConj = 1,
|
||||
HasSetLinear = 1,
|
||||
|
||||
HasDiv = 0,
|
||||
HasSqrt = 0,
|
||||
@@ -79,15 +82,22 @@ struct ei_default_packet_traits
|
||||
template<typename T> struct ei_packet_traits : ei_default_packet_traits
|
||||
{
|
||||
typedef T type;
|
||||
enum {size=1};
|
||||
enum {
|
||||
Vectorizable = 0,
|
||||
size = 1,
|
||||
AlignedOnScalar = 0
|
||||
};
|
||||
enum {
|
||||
HasAdd = 0,
|
||||
HasSub = 0,
|
||||
HasMul = 0,
|
||||
HasNegate = 0,
|
||||
HasAbs = 0,
|
||||
HasAbs2 = 0,
|
||||
HasMin = 0,
|
||||
HasMax = 0
|
||||
HasMax = 0,
|
||||
HasConj = 0,
|
||||
HasSetLinear = 0
|
||||
};
|
||||
};
|
||||
|
||||
@@ -105,6 +115,10 @@ ei_psub(const Packet& a,
|
||||
template<typename Packet> inline Packet
|
||||
ei_pnegate(const Packet& a) { return -a; }
|
||||
|
||||
/** \internal \returns conj(a) (coeff-wise) */
|
||||
template<typename Packet> inline Packet
|
||||
ei_pconj(const Packet& a) { return ei_conj(a); }
|
||||
|
||||
/** \internal \returns a * b (coeff-wise) */
|
||||
template<typename Packet> inline Packet
|
||||
ei_pmul(const Packet& a,
|
||||
@@ -146,16 +160,20 @@ template<typename Packet> inline Packet
|
||||
ei_pandnot(const Packet& a, const Packet& b) { return a & (!b); }
|
||||
|
||||
/** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */
|
||||
template<typename Scalar> inline typename ei_packet_traits<Scalar>::type
|
||||
ei_pload(const Scalar* from) { return *from; }
|
||||
template<typename Packet> inline Packet
|
||||
ei_pload(const typename ei_unpacket_traits<Packet>::type* from) { return *from; }
|
||||
|
||||
/** \internal \returns a packet version of \a *from, (un-aligned load) */
|
||||
template<typename Scalar> inline typename ei_packet_traits<Scalar>::type
|
||||
ei_ploadu(const Scalar* from) { return *from; }
|
||||
template<typename Packet> inline Packet
|
||||
ei_ploadu(const typename ei_unpacket_traits<Packet>::type* from) { return *from; }
|
||||
|
||||
/** \internal \returns a packet with elements of \a *from duplicated, e.g.: (from[0],from[0],from[1],from[1]) */
|
||||
template<typename Packet> inline Packet
|
||||
ei_ploaddup(const typename ei_unpacket_traits<Packet>::type* from) { return *from; }
|
||||
|
||||
/** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */
|
||||
template<typename Scalar> inline typename ei_packet_traits<Scalar>::type
|
||||
ei_pset1(const Scalar& a) { return a; }
|
||||
template<typename Packet> inline Packet
|
||||
ei_pset1(const typename ei_unpacket_traits<Packet>::type& a) { return a; }
|
||||
|
||||
/** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */
|
||||
template<typename Scalar> inline typename ei_packet_traits<Scalar>::type
|
||||
@@ -242,13 +260,13 @@ ei_pmadd(const Packet& a,
|
||||
|
||||
/** \internal \returns a packet version of \a *from.
|
||||
* \If LoadMode equals Aligned, \a from must be 16 bytes aligned */
|
||||
template<typename Scalar, int LoadMode>
|
||||
inline typename ei_packet_traits<Scalar>::type ei_ploadt(const Scalar* from)
|
||||
template<typename Packet, int LoadMode>
|
||||
inline Packet ei_ploadt(const typename ei_unpacket_traits<Packet>::type* from)
|
||||
{
|
||||
if(LoadMode == Aligned)
|
||||
return ei_pload(from);
|
||||
return ei_pload<Packet>(from);
|
||||
else
|
||||
return ei_ploadu(from);
|
||||
return ei_ploadu<Packet>(from);
|
||||
}
|
||||
|
||||
/** \internal copy the packet \a from to \a *to.
|
||||
|
||||
@@ -31,6 +31,7 @@ enum { StreamPrecision = -1,
|
||||
FullPrecision = -2 };
|
||||
|
||||
/** \class IOFormat
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Stores a set of parameters controlling the way matrices are printed
|
||||
*
|
||||
@@ -80,6 +81,7 @@ struct IOFormat
|
||||
};
|
||||
|
||||
/** \class WithFormat
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Pseudo expression providing matrix output with given format
|
||||
*
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
#define EIGEN_MAP_H
|
||||
|
||||
/** \class Map
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief A matrix or vector expression mapping an existing array of data.
|
||||
*
|
||||
@@ -99,7 +100,7 @@ struct ei_traits<Map<PlainObjectType, MapOptions, StrideType> >
|
||||
|| ( OuterStrideAtCompileTime!=Dynamic
|
||||
&& ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime)%16)==0 ) ),
|
||||
Flags0 = ei_traits<PlainObjectType>::Flags,
|
||||
Flags1 = IsAligned ? int(Flags0) | AlignedBit : int(Flags0) & ~AlignedBit,
|
||||
Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit),
|
||||
Flags2 = HasNoStride ? int(Flags1) : int(Flags1 & ~LinearAccessBit),
|
||||
Flags = KeepsPacketAccess ? int(Flags2) : (int(Flags2) & ~PacketAccessBit)
|
||||
};
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
#define EIGEN_MAPBASE_H
|
||||
|
||||
/** \class MapBase
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Base class for Map and Block expression with direct access
|
||||
*
|
||||
@@ -123,14 +124,14 @@ template<typename Derived> class MapBase
|
||||
template<int LoadMode>
|
||||
inline PacketScalar packet(Index row, Index col) const
|
||||
{
|
||||
return ei_ploadt<Scalar, LoadMode>
|
||||
return ei_ploadt<PacketScalar, LoadMode>
|
||||
(m_data + (col * colStride() + row * rowStride()));
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline PacketScalar packet(Index index) const
|
||||
{
|
||||
return ei_ploadt<Scalar, LoadMode>(m_data + index * innerStride());
|
||||
return ei_ploadt<PacketScalar, LoadMode>(m_data + index * innerStride());
|
||||
}
|
||||
|
||||
template<int StoreMode>
|
||||
@@ -188,8 +189,8 @@ template<typename Derived> class MapBase
|
||||
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(ei_traits<Derived>::Flags&PacketAccessBit,
|
||||
ei_inner_stride_at_compile_time<Derived>::ret==1),
|
||||
PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1);
|
||||
ei_assert(EIGEN_IMPLIES(ei_traits<Derived>::Flags&AlignedBit, (size_t(m_data)&0xf)==0)
|
||||
&& "data is not aligned");
|
||||
ei_assert(EIGEN_IMPLIES(ei_traits<Derived>::Flags&AlignedBit, (size_t(m_data) % (sizeof(Scalar)*ei_packet_traits<Scalar>::size)) == 0)
|
||||
&& "data is not aligned");
|
||||
}
|
||||
|
||||
const Scalar* EIGEN_RESTRICT m_data;
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
#define EIGEN_MATRIX_H
|
||||
|
||||
/** \class Matrix
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief The matrix class, also used for vectors and row-vectors
|
||||
*
|
||||
@@ -106,7 +107,7 @@
|
||||
* are the dimensions of the original matrix, while _Rows and _Cols are Dynamic.</dd>
|
||||
* </dl>
|
||||
*
|
||||
* \see MatrixBase for the majority of the API methods for matrices
|
||||
* \see MatrixBase for the majority of the API methods for matrices, \ref TopicClassHierarchy
|
||||
*/
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
|
||||
struct ei_traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
#define EIGEN_MATRIXBASE_H
|
||||
|
||||
/** \class MatrixBase
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Base class for all dense matrices, vectors, and expressions
|
||||
*
|
||||
@@ -51,6 +52,8 @@
|
||||
cout << x.row(0) << endl;
|
||||
}
|
||||
* \endcode
|
||||
*
|
||||
* \sa \ref TopicClassHierarchy
|
||||
*/
|
||||
template<typename Derived> class MatrixBase
|
||||
: public DenseBase<Derived>
|
||||
@@ -180,7 +183,7 @@ template<typename Derived> class MatrixBase
|
||||
operator*(const MatrixBase<OtherDerived> &other) const;
|
||||
|
||||
template<typename OtherDerived>
|
||||
const typename ProductReturnType<Derived,OtherDerived,LazyCoeffBasedProductMode>::Type
|
||||
const typename LazyProductReturnType<Derived,OtherDerived>::Type
|
||||
lazyProduct(const MatrixBase<OtherDerived> &other) const;
|
||||
|
||||
template<typename OtherDerived>
|
||||
@@ -328,8 +331,6 @@ template<typename Derived> class MatrixBase
|
||||
|
||||
/////////// SVD module ///////////
|
||||
|
||||
SVD<PlainObject> svd() const;
|
||||
|
||||
/////////// Geometry module ///////////
|
||||
|
||||
template<typename OtherDerived>
|
||||
@@ -338,7 +339,7 @@ template<typename Derived> class MatrixBase
|
||||
PlainObject cross3(const MatrixBase<OtherDerived>& other) const;
|
||||
PlainObject unitOrthogonal(void) const;
|
||||
Matrix<Scalar,3,1> eulerAngles(Index a0, Index a1, Index a2) const;
|
||||
const ScalarMultipleReturnType operator*(const UniformScaling<Scalar>& s) const;
|
||||
ScalarMultipleReturnType operator*(const UniformScaling<Scalar>& s) const;
|
||||
enum {
|
||||
SizeMinusOne = SizeAtCompileTime==Dynamic ? Dynamic : SizeAtCompileTime-1
|
||||
};
|
||||
@@ -348,9 +349,13 @@ template<typename Derived> class MatrixBase
|
||||
typedef CwiseUnaryOp<ei_scalar_quotient1_op<typename ei_traits<Derived>::Scalar>,
|
||||
StartMinusOne > HNormalizedReturnType;
|
||||
|
||||
const HNormalizedReturnType hnormalized() const;
|
||||
typedef Homogeneous<Derived,MatrixBase<Derived>::ColsAtCompileTime==1?Vertical:Horizontal> HomogeneousReturnType;
|
||||
const HomogeneousReturnType homogeneous() const;
|
||||
HNormalizedReturnType hnormalized() const;
|
||||
|
||||
// put this as separate enum value to work around possible GCC 4.3 bug (?)
|
||||
enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1?Vertical:Horizontal };
|
||||
typedef Homogeneous<Derived, HomogeneousReturnTypeDirection> HomogeneousReturnType;
|
||||
|
||||
HomogeneousReturnType homogeneous() const;
|
||||
|
||||
////////// Householder module ///////////
|
||||
|
||||
@@ -426,6 +431,13 @@ template<typename Derived> class MatrixBase
|
||||
explicit MatrixBase(int);
|
||||
MatrixBase(int,int);
|
||||
template<typename OtherDerived> explicit MatrixBase(const MatrixBase<OtherDerived>&);
|
||||
protected:
|
||||
// mixing arrays and matrices is not legal
|
||||
template<typename OtherDerived> Derived& operator+=(const ArrayBase<OtherDerived>& array)
|
||||
{EIGEN_STATIC_ASSERT(sizeof(typename OtherDerived::Scalar)==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES);}
|
||||
// mixing arrays and matrices is not legal
|
||||
template<typename OtherDerived> Derived& operator-=(const ArrayBase<OtherDerived>& array)
|
||||
{EIGEN_STATIC_ASSERT(sizeof(typename OtherDerived::Scalar)==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES);}
|
||||
};
|
||||
|
||||
#endif // EIGEN_MATRIXBASE_H
|
||||
|
||||
@@ -79,6 +79,7 @@ struct ei_matrix_array<T, 0, MatrixOptions, Alignment>
|
||||
/** \internal
|
||||
*
|
||||
* \class ei_matrix_storage
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Stores the data of a matrix
|
||||
*
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
#define EIGEN_NESTBYVALUE_H
|
||||
|
||||
/** \class NestByValue
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression which must be nested by value
|
||||
*
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#define EIGEN_NOALIAS_H
|
||||
|
||||
/** \class NoAlias
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Pseudo expression providing an operator = assuming no aliasing
|
||||
*
|
||||
@@ -42,6 +43,7 @@
|
||||
template<typename ExpressionType, template <typename> class StorageBase>
|
||||
class NoAlias
|
||||
{
|
||||
typedef typename ExpressionType::Scalar Scalar;
|
||||
public:
|
||||
NoAlias(ExpressionType& expression) : m_expression(expression) {}
|
||||
|
||||
@@ -49,17 +51,31 @@ class NoAlias
|
||||
* \sa MatrixBase::lazyAssign() */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_STRONG_INLINE ExpressionType& operator=(const StorageBase<OtherDerived>& other)
|
||||
{ return m_expression.lazyAssign(other.derived()); }
|
||||
{ return ei_assign_selector<ExpressionType,OtherDerived,false>::run(m_expression,other.derived()); }
|
||||
|
||||
/** \sa MatrixBase::operator+= */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_STRONG_INLINE ExpressionType& operator+=(const StorageBase<OtherDerived>& other)
|
||||
{ return m_expression.lazyAssign(m_expression + other.derived()); }
|
||||
{
|
||||
typedef SelfCwiseBinaryOp<ei_scalar_sum_op<Scalar>, ExpressionType, OtherDerived> SelfAdder;
|
||||
SelfAdder tmp(m_expression);
|
||||
typedef typename ei_nested<OtherDerived>::type OtherDerivedNested;
|
||||
typedef typename ei_cleantype<OtherDerivedNested>::type _OtherDerivedNested;
|
||||
ei_assign_selector<SelfAdder,_OtherDerivedNested,false>::run(tmp,OtherDerivedNested(other.derived()));
|
||||
return m_expression;
|
||||
}
|
||||
|
||||
/** \sa MatrixBase::operator-= */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_STRONG_INLINE ExpressionType& operator-=(const StorageBase<OtherDerived>& other)
|
||||
{ return m_expression.lazyAssign(m_expression - other.derived()); }
|
||||
{
|
||||
typedef SelfCwiseBinaryOp<ei_scalar_difference_op<Scalar>, ExpressionType, OtherDerived> SelfAdder;
|
||||
SelfAdder tmp(m_expression);
|
||||
typedef typename ei_nested<OtherDerived>::type OtherDerivedNested;
|
||||
typedef typename ei_cleantype<OtherDerivedNested>::type _OtherDerivedNested;
|
||||
ei_assign_selector<SelfAdder,_OtherDerivedNested,false>::run(tmp,OtherDerivedNested(other.derived()));
|
||||
return m_expression;
|
||||
}
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename ProductDerived, typename Lhs, typename Rhs>
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#define EIGEN_NUMTRAITS_H
|
||||
|
||||
/** \class NumTraits
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Holds information about the various numeric (i.e. scalar) types allowed by Eigen.
|
||||
*
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
#define EIGEN_PERMUTATIONMATRIX_H
|
||||
|
||||
/** \class PermutationMatrix
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Permutation matrix
|
||||
*
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
#define EIGEN_PRODUCT_H
|
||||
|
||||
/** \class GeneralProduct
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of the product of two general matrices or vectors
|
||||
*
|
||||
@@ -120,6 +121,7 @@ template<> struct ei_product_type_selector<Small,Large,Small> { en
|
||||
template<> struct ei_product_type_selector<Large,Large,Small> { enum { ret = GemmProduct }; };
|
||||
|
||||
/** \class ProductReturnType
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Helper class to get the correct and optimized returned type of operator*
|
||||
*
|
||||
@@ -161,6 +163,10 @@ struct ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode>
|
||||
typedef CoeffBasedProduct<LhsNested, RhsNested, NestByRefBit> Type;
|
||||
};
|
||||
|
||||
// this is a workaround for sun CC
|
||||
template<typename Lhs, typename Rhs>
|
||||
struct LazyProductReturnType : public ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode>
|
||||
{};
|
||||
|
||||
/***********************************************************************
|
||||
* Implementation of Inner Vector Vector Product
|
||||
@@ -280,10 +286,13 @@ class GeneralProduct<Lhs, Rhs, GemvProduct>
|
||||
public:
|
||||
EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
|
||||
|
||||
typedef typename Lhs::Scalar LhsScalar;
|
||||
typedef typename Rhs::Scalar RhsScalar;
|
||||
|
||||
GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((ei_is_same_type<typename Lhs::Scalar, typename Rhs::Scalar>::ret),
|
||||
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
|
||||
// EIGEN_STATIC_ASSERT((ei_is_same_type<typename Lhs::Scalar, typename Rhs::Scalar>::ret),
|
||||
// YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
|
||||
}
|
||||
|
||||
enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
|
||||
@@ -317,42 +326,66 @@ template<> struct ei_gemv_selector<OnTheRight,ColMajor,true>
|
||||
template<typename ProductType, typename Dest>
|
||||
static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
|
||||
{
|
||||
typedef typename ProductType::Scalar Scalar;
|
||||
typedef typename ProductType::Index Index;
|
||||
typedef typename ProductType::LhsScalar LhsScalar;
|
||||
typedef typename ProductType::RhsScalar RhsScalar;
|
||||
typedef typename ProductType::Scalar ResScalar;
|
||||
typedef typename ProductType::RealScalar RealScalar;
|
||||
typedef typename ProductType::ActualLhsType ActualLhsType;
|
||||
typedef typename ProductType::ActualRhsType ActualRhsType;
|
||||
typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
|
||||
typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
|
||||
typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
|
||||
|
||||
ActualLhsType actualLhs = LhsBlasTraits::extract(prod.lhs());
|
||||
ActualRhsType actualRhs = RhsBlasTraits::extract(prod.rhs());
|
||||
|
||||
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
|
||||
* RhsBlasTraits::extractScalarFactor(prod.rhs());
|
||||
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
|
||||
* RhsBlasTraits::extractScalarFactor(prod.rhs());
|
||||
|
||||
enum {
|
||||
// FIXME find a way to allow an inner stride on the result if ei_packet_traits<Scalar>::size==1
|
||||
EvalToDest = Dest::InnerStrideAtCompileTime==1
|
||||
EvalToDestAtCompileTime = Dest::InnerStrideAtCompileTime==1,
|
||||
ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex)
|
||||
};
|
||||
|
||||
Scalar* EIGEN_RESTRICT actualDest;
|
||||
if (EvalToDest)
|
||||
bool alphaIsCompatible = (!ComplexByReal) || (ei_imag(actualAlpha)==RealScalar(0));
|
||||
bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
|
||||
|
||||
RhsScalar compatibleAlpha = ei_get_factor<ResScalar,RhsScalar>::run(actualAlpha);
|
||||
|
||||
ResScalar* actualDest;
|
||||
if (evalToDest)
|
||||
{
|
||||
actualDest = &dest.coeffRef(0);
|
||||
}
|
||||
else
|
||||
{
|
||||
actualDest = ei_aligned_stack_new(Scalar,dest.size());
|
||||
Map<typename Dest::PlainObject>(actualDest, dest.size()) = dest;
|
||||
actualDest = ei_aligned_stack_new(ResScalar,dest.size());
|
||||
if(!alphaIsCompatible)
|
||||
{
|
||||
MappedDest(actualDest, dest.size()).setZero();
|
||||
compatibleAlpha = RhsScalar(1);
|
||||
}
|
||||
else
|
||||
MappedDest(actualDest, dest.size()) = dest;
|
||||
}
|
||||
|
||||
ei_cache_friendly_product_colmajor_times_vector
|
||||
<LhsBlasTraits::NeedToConjugate,RhsBlasTraits::NeedToConjugate>(
|
||||
dest.size(),
|
||||
&actualLhs.const_cast_derived().coeffRef(0,0), actualLhs.outerStride(),
|
||||
actualRhs, actualDest, actualAlpha);
|
||||
ei_general_matrix_vector_product
|
||||
<Index,LhsScalar,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run(
|
||||
actualLhs.rows(), actualLhs.cols(),
|
||||
&actualLhs.const_cast_derived().coeffRef(0,0), actualLhs.outerStride(),
|
||||
actualRhs.data(), actualRhs.innerStride(),
|
||||
actualDest, 1,
|
||||
compatibleAlpha);
|
||||
|
||||
if (!EvalToDest)
|
||||
if (!evalToDest)
|
||||
{
|
||||
dest = Map<typename Dest::PlainObject>(actualDest, dest.size());
|
||||
ei_aligned_stack_delete(Scalar, actualDest, dest.size());
|
||||
if(!alphaIsCompatible)
|
||||
dest += actualAlpha * MappedDest(actualDest, dest.size());
|
||||
else
|
||||
dest = MappedDest(actualDest, dest.size());
|
||||
ei_aligned_stack_delete(ResScalar, actualDest, dest.size());
|
||||
}
|
||||
}
|
||||
};
|
||||
@@ -362,7 +395,10 @@ template<> struct ei_gemv_selector<OnTheRight,RowMajor,true>
|
||||
template<typename ProductType, typename Dest>
|
||||
static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
|
||||
{
|
||||
typedef typename ProductType::Scalar Scalar;
|
||||
typedef typename ProductType::LhsScalar LhsScalar;
|
||||
typedef typename ProductType::RhsScalar RhsScalar;
|
||||
typedef typename ProductType::Scalar ResScalar;
|
||||
typedef typename ProductType::Index Index;
|
||||
typedef typename ProductType::ActualLhsType ActualLhsType;
|
||||
typedef typename ProductType::ActualRhsType ActualRhsType;
|
||||
typedef typename ProductType::_ActualRhsType _ActualRhsType;
|
||||
@@ -372,29 +408,34 @@ template<> struct ei_gemv_selector<OnTheRight,RowMajor,true>
|
||||
ActualLhsType actualLhs = LhsBlasTraits::extract(prod.lhs());
|
||||
ActualRhsType actualRhs = RhsBlasTraits::extract(prod.rhs());
|
||||
|
||||
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
|
||||
* RhsBlasTraits::extractScalarFactor(prod.rhs());
|
||||
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
|
||||
* RhsBlasTraits::extractScalarFactor(prod.rhs());
|
||||
|
||||
enum {
|
||||
DirectlyUseRhs = ((ei_packet_traits<Scalar>::size==1) || (_ActualRhsType::Flags&ActualPacketAccessBit))
|
||||
// FIXME I think here we really have to check for ei_packet_traits<Scalar>::size==1
|
||||
// because in this case it is fine to have an inner stride
|
||||
DirectlyUseRhs = ((ei_packet_traits<RhsScalar>::size==1) || (_ActualRhsType::Flags&ActualPacketAccessBit))
|
||||
&& (!(_ActualRhsType::Flags & RowMajorBit))
|
||||
};
|
||||
|
||||
Scalar* EIGEN_RESTRICT rhs_data;
|
||||
RhsScalar* rhs_data;
|
||||
if (DirectlyUseRhs)
|
||||
rhs_data = reinterpret_cast<Scalar* EIGEN_RESTRICT>(&actualRhs.const_cast_derived().coeffRef(0));
|
||||
rhs_data = &actualRhs.const_cast_derived().coeffRef(0);
|
||||
else
|
||||
{
|
||||
rhs_data = ei_aligned_stack_new(Scalar, actualRhs.size());
|
||||
Map<typename _ActualRhsType::PlainObject>(reinterpret_cast<Scalar*>(rhs_data), actualRhs.size()) = actualRhs;
|
||||
rhs_data = ei_aligned_stack_new(RhsScalar, actualRhs.size());
|
||||
Map<typename _ActualRhsType::PlainObject>(rhs_data, actualRhs.size()) = actualRhs;
|
||||
}
|
||||
|
||||
ei_cache_friendly_product_rowmajor_times_vector
|
||||
<LhsBlasTraits::NeedToConjugate,RhsBlasTraits::NeedToConjugate>(
|
||||
ei_general_matrix_vector_product
|
||||
<Index,LhsScalar,RowMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run(
|
||||
actualLhs.rows(), actualLhs.cols(),
|
||||
&actualLhs.const_cast_derived().coeffRef(0,0), actualLhs.outerStride(),
|
||||
rhs_data, prod.rhs().size(), dest, actualAlpha);
|
||||
rhs_data, 1,
|
||||
&dest.coeffRef(0,0), dest.innerStride(),
|
||||
actualAlpha);
|
||||
|
||||
if (!DirectlyUseRhs) ei_aligned_stack_delete(Scalar, rhs_data, prod.rhs().size());
|
||||
if (!DirectlyUseRhs) ei_aligned_stack_delete(RhsScalar, rhs_data, prod.rhs().size());
|
||||
}
|
||||
};
|
||||
|
||||
@@ -477,7 +518,7 @@ MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
const typename ProductReturnType<Derived,OtherDerived,LazyCoeffBasedProductMode>::Type
|
||||
const typename LazyProductReturnType<Derived,OtherDerived>::Type
|
||||
MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const
|
||||
{
|
||||
enum {
|
||||
@@ -496,7 +537,7 @@ MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const
|
||||
INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
|
||||
EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
|
||||
|
||||
return typename ProductReturnType<Derived,OtherDerived,LazyCoeffBasedProductMode>::Type(derived(), other.derived());
|
||||
return typename LazyProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
|
||||
}
|
||||
|
||||
#endif // EIGEN_PRODUCT_H
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#define EIGEN_PRODUCTBASE_H
|
||||
|
||||
/** \class ProductBase
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
*/
|
||||
template<typename Derived, typename _Lhs, typename _Rhs>
|
||||
@@ -124,7 +125,7 @@ class ProductBase : public MatrixBase<Derived>
|
||||
operator const PlainObject& () const
|
||||
{
|
||||
m_result.resize(m_lhs.rows(), m_rhs.cols());
|
||||
this->evalTo(m_result);
|
||||
derived().evalTo(m_result);
|
||||
return m_result;
|
||||
}
|
||||
|
||||
@@ -215,6 +216,7 @@ class ScaledProduct
|
||||
typename NestedProduct::_LhsNested,
|
||||
typename NestedProduct::_RhsNested> Base;
|
||||
typedef typename Base::Scalar Scalar;
|
||||
typedef typename Base::PlainObject PlainObject;
|
||||
// EIGEN_PRODUCT_PUBLIC_INTERFACE(ScaledProduct)
|
||||
|
||||
ScaledProduct(const NestedProduct& prod, Scalar x)
|
||||
@@ -231,7 +233,7 @@ class ScaledProduct
|
||||
|
||||
template<typename Dest>
|
||||
inline void scaleAndAddTo(Dest& dst,Scalar alpha) const { m_prod.derived().scaleAndAddTo(dst,alpha); }
|
||||
|
||||
|
||||
protected:
|
||||
const NestedProduct& m_prod;
|
||||
Scalar m_alpha;
|
||||
|
||||
@@ -183,7 +183,7 @@ struct ei_redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>
|
||||
typedef typename Derived::Index Index;
|
||||
static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func)
|
||||
{
|
||||
ei_assert(mat.rows()>0 && mat.cols()>0 && "you are using a non initialized matrix");
|
||||
ei_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
|
||||
Scalar res;
|
||||
res = mat.coeffByOuterInner(0, 0);
|
||||
for(Index i = 1; i < mat.innerSize(); ++i)
|
||||
@@ -210,6 +210,7 @@ struct ei_redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
|
||||
static Scalar run(const Derived& mat, const Func& func)
|
||||
{
|
||||
const Index size = mat.size();
|
||||
ei_assert(size && "you are using an empty matrix");
|
||||
const Index packetSize = ei_packet_traits<Scalar>::size;
|
||||
const Index alignedStart = ei_first_aligned(mat);
|
||||
enum {
|
||||
@@ -253,6 +254,7 @@ struct ei_redux_impl<Func, Derived, SliceVectorizedTraversal, NoUnrolling>
|
||||
|
||||
static Scalar run(const Derived& mat, const Func& func)
|
||||
{
|
||||
ei_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
|
||||
const Index innerSize = mat.innerSize();
|
||||
const Index outerSize = mat.outerSize();
|
||||
enum {
|
||||
@@ -294,6 +296,7 @@ struct ei_redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling
|
||||
};
|
||||
EIGEN_STRONG_INLINE static Scalar run(const Derived& mat, const Func& func)
|
||||
{
|
||||
ei_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
|
||||
Scalar res = func.predux(ei_redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));
|
||||
if (VectorizedSize != Size)
|
||||
res = func(res,ei_redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func));
|
||||
@@ -345,6 +348,8 @@ template<typename Derived>
|
||||
EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar
|
||||
DenseBase<Derived>::sum() const
|
||||
{
|
||||
if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
|
||||
return Scalar(0);
|
||||
return this->redux(Eigen::ei_scalar_sum_op<Scalar>());
|
||||
}
|
||||
|
||||
@@ -370,6 +375,8 @@ template<typename Derived>
|
||||
EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar
|
||||
DenseBase<Derived>::prod() const
|
||||
{
|
||||
if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
|
||||
return Scalar(1);
|
||||
return this->redux(Eigen::ei_scalar_product_op<Scalar>());
|
||||
}
|
||||
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
|
||||
/**
|
||||
* \class Replicate
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of the multiple replication of a matrix or vector
|
||||
*
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
#define EIGEN_RETURNBYVALUE_H
|
||||
|
||||
/** \class ReturnByValue
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
*/
|
||||
template<typename Derived>
|
||||
|
||||
@@ -28,6 +28,7 @@
|
||||
#define EIGEN_REVERSE_H
|
||||
|
||||
/** \class Reverse
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of the reverse of a vector or matrix
|
||||
*
|
||||
@@ -58,7 +59,7 @@ struct ei_traits<Reverse<MatrixType, Direction> >
|
||||
LinearAccess = ( (Direction==BothDirections) && (int(_MatrixTypeNested::Flags)&PacketAccessBit) )
|
||||
? LinearAccessBit : 0,
|
||||
|
||||
Flags = int(_MatrixTypeNested::Flags) & (HereditaryBits | PacketAccessBit | LinearAccess),
|
||||
Flags = int(_MatrixTypeNested::Flags) & (HereditaryBits | LvalueBit | PacketAccessBit | LinearAccess),
|
||||
|
||||
CoeffReadCost = _MatrixTypeNested::CoeffReadCost
|
||||
};
|
||||
@@ -108,6 +109,11 @@ template<typename MatrixType, int Direction> class Reverse
|
||||
inline Index rows() const { return m_matrix.rows(); }
|
||||
inline Index cols() const { return m_matrix.cols(); }
|
||||
|
||||
inline Index innerStride() const
|
||||
{
|
||||
return -m_matrix.innerStride();
|
||||
}
|
||||
|
||||
inline Scalar& operator()(Index row, Index col)
|
||||
{
|
||||
ei_assert(row >= 0 && row < rows() && col >= 0 && col < cols());
|
||||
@@ -120,13 +126,13 @@ template<typename MatrixType, int Direction> class Reverse
|
||||
ReverseCol ? m_matrix.cols() - col - 1 : col);
|
||||
}
|
||||
|
||||
inline const Scalar coeff(Index row, Index col) const
|
||||
inline CoeffReturnType coeff(Index row, Index col) const
|
||||
{
|
||||
return m_matrix.coeff(ReverseRow ? m_matrix.rows() - row - 1 : row,
|
||||
ReverseCol ? m_matrix.cols() - col - 1 : col);
|
||||
}
|
||||
|
||||
inline const Scalar coeff(Index index) const
|
||||
inline CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
return m_matrix.coeff(m_matrix.size() - index - 1);
|
||||
}
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#define EIGEN_SELECT_H
|
||||
|
||||
/** \class Select
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of a coefficient wise version of the C++ ternary operator ?:
|
||||
*
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#define EIGEN_SELFADJOINTMATRIX_H
|
||||
|
||||
/** \class SelfAdjointView
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
*
|
||||
* \brief Expression of a selfadjoint matrix from a triangular part of a dense matrix
|
||||
@@ -64,7 +65,10 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
|
||||
public:
|
||||
|
||||
typedef TriangularBase<SelfAdjointView> Base;
|
||||
typedef typename ei_traits<SelfAdjointView>::Scalar Scalar;
|
||||
|
||||
/** \brief The type of coefficients in this matrix */
|
||||
typedef typename ei_traits<SelfAdjointView>::Scalar Scalar;
|
||||
|
||||
typedef typename MatrixType::Index Index;
|
||||
|
||||
enum {
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#define EIGEN_SELFCWISEBINARYOP_H
|
||||
|
||||
/** \class SelfCwiseBinaryOp
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \internal
|
||||
*
|
||||
@@ -38,14 +39,21 @@
|
||||
*
|
||||
* \sa class SwapWrapper for a similar trick.
|
||||
*/
|
||||
template<typename BinaryOp, typename MatrixType>
|
||||
struct ei_traits<SelfCwiseBinaryOp<BinaryOp,MatrixType> > : ei_traits<MatrixType>
|
||||
template<typename BinaryOp, typename Lhs, typename Rhs>
|
||||
struct ei_traits<SelfCwiseBinaryOp<BinaryOp,Lhs,Rhs> >
|
||||
: ei_traits<CwiseBinaryOp<BinaryOp,Lhs,Rhs> >
|
||||
{
|
||||
|
||||
enum {
|
||||
// Note that it is still a good idea to preserve the DirectAccessBit
|
||||
// so that assign can correctly align the data.
|
||||
Flags = ei_traits<CwiseBinaryOp<BinaryOp,Lhs,Rhs> >::Flags | (Lhs::Flags&DirectAccessBit) | (Lhs::Flags&LvalueBit),
|
||||
OuterStrideAtCompileTime = Lhs::OuterStrideAtCompileTime,
|
||||
InnerStrideAtCompileTime = Lhs::InnerStrideAtCompileTime
|
||||
};
|
||||
};
|
||||
|
||||
template<typename BinaryOp, typename MatrixType> class SelfCwiseBinaryOp
|
||||
: public ei_dense_xpr_base< SelfCwiseBinaryOp<BinaryOp, MatrixType> >::type
|
||||
template<typename BinaryOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp
|
||||
: public ei_dense_xpr_base< SelfCwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type
|
||||
{
|
||||
public:
|
||||
|
||||
@@ -54,9 +62,7 @@ template<typename BinaryOp, typename MatrixType> class SelfCwiseBinaryOp
|
||||
|
||||
typedef typename ei_packet_traits<Scalar>::type Packet;
|
||||
|
||||
using Base::operator=;
|
||||
|
||||
inline SelfCwiseBinaryOp(MatrixType& xpr, const BinaryOp& func = BinaryOp()) : m_matrix(xpr), m_functor(func) {}
|
||||
inline SelfCwiseBinaryOp(Lhs& xpr, const BinaryOp& func = BinaryOp()) : m_matrix(xpr), m_functor(func) {}
|
||||
|
||||
inline Index rows() const { return m_matrix.rows(); }
|
||||
inline Index cols() const { return m_matrix.cols(); }
|
||||
@@ -121,12 +127,8 @@ template<typename BinaryOp, typename MatrixType> class SelfCwiseBinaryOp
|
||||
template<typename RhsDerived>
|
||||
EIGEN_STRONG_INLINE SelfCwiseBinaryOp& lazyAssign(const DenseBase<RhsDerived>& rhs)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(MatrixType,RhsDerived)
|
||||
|
||||
EIGEN_STATIC_ASSERT((ei_functor_allows_mixing_real_and_complex<BinaryOp>::ret
|
||||
? int(ei_is_same_type<typename MatrixType::RealScalar, typename RhsDerived::RealScalar>::ret)
|
||||
: int(ei_is_same_type<typename MatrixType::Scalar, typename RhsDerived::Scalar>::ret)),
|
||||
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
|
||||
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Lhs,RhsDerived)
|
||||
EIGEN_CHECK_BINARY_COMPATIBILIY(BinaryOp,typename Lhs::Scalar,typename RhsDerived::Scalar);
|
||||
|
||||
#ifdef EIGEN_DEBUG_ASSIGN
|
||||
ei_assign_traits<SelfCwiseBinaryOp, RhsDerived>::debug();
|
||||
@@ -138,9 +140,18 @@ template<typename BinaryOp, typename MatrixType> class SelfCwiseBinaryOp
|
||||
#endif
|
||||
return *this;
|
||||
}
|
||||
|
||||
// overloaded to honor evaluation of special matrices
|
||||
// maybe another solution would be to not use SelfCwiseBinaryOp
|
||||
// at first...
|
||||
SelfCwiseBinaryOp& operator=(const Rhs& _rhs)
|
||||
{
|
||||
typename ei_nested<Rhs>::type rhs(_rhs);
|
||||
return Base::operator=(rhs);
|
||||
}
|
||||
|
||||
protected:
|
||||
MatrixType& m_matrix;
|
||||
Lhs& m_matrix;
|
||||
const BinaryOp& m_functor;
|
||||
|
||||
private:
|
||||
@@ -150,8 +161,8 @@ template<typename BinaryOp, typename MatrixType> class SelfCwiseBinaryOp
|
||||
template<typename Derived>
|
||||
inline Derived& DenseBase<Derived>::operator*=(const Scalar& other)
|
||||
{
|
||||
SelfCwiseBinaryOp<ei_scalar_product_op<Scalar>, Derived> tmp(derived());
|
||||
typedef typename Derived::PlainObject PlainObject;
|
||||
SelfCwiseBinaryOp<ei_scalar_product_op<Scalar>, Derived, typename PlainObject::ConstantReturnType> tmp(derived());
|
||||
tmp = PlainObject::Constant(rows(),cols(),other);
|
||||
return derived();
|
||||
}
|
||||
@@ -159,10 +170,11 @@ inline Derived& DenseBase<Derived>::operator*=(const Scalar& other)
|
||||
template<typename Derived>
|
||||
inline Derived& DenseBase<Derived>::operator/=(const Scalar& other)
|
||||
{
|
||||
SelfCwiseBinaryOp<typename ei_meta_if<NumTraits<Scalar>::IsInteger,
|
||||
typedef typename ei_meta_if<NumTraits<Scalar>::IsInteger,
|
||||
ei_scalar_quotient_op<Scalar>,
|
||||
ei_scalar_product_op<Scalar> >::ret, Derived> tmp(derived());
|
||||
ei_scalar_product_op<Scalar> >::ret BinOp;
|
||||
typedef typename Derived::PlainObject PlainObject;
|
||||
SelfCwiseBinaryOp<BinOp, Derived, typename PlainObject::ConstantReturnType> tmp(derived());
|
||||
tmp = PlainObject::Constant(rows(),cols(), NumTraits<Scalar>::IsInteger ? other : Scalar(1)/other);
|
||||
return derived();
|
||||
}
|
||||
|
||||
@@ -53,7 +53,8 @@ struct ei_triangular_solver_selector;
|
||||
template<typename Lhs, typename Rhs, int Mode>
|
||||
struct ei_triangular_solver_selector<Lhs,Rhs,OnTheLeft,Mode,NoUnrolling,RowMajor,1>
|
||||
{
|
||||
typedef typename Rhs::Scalar Scalar;
|
||||
typedef typename Lhs::Scalar LhsScalar;
|
||||
typedef typename Rhs::Scalar RhsScalar;
|
||||
typedef ei_blas_traits<Lhs> LhsProductTraits;
|
||||
typedef typename LhsProductTraits::ExtractType ActualLhsType;
|
||||
typedef typename Lhs::Index Index;
|
||||
@@ -80,12 +81,13 @@ struct ei_triangular_solver_selector<Lhs,Rhs,OnTheLeft,Mode,NoUnrolling,RowMajor
|
||||
// 2 - it is slighlty faster at runtime
|
||||
Index startRow = IsLower ? pi : pi-actualPanelWidth;
|
||||
Index startCol = IsLower ? 0 : pi;
|
||||
VectorBlock<Rhs,Dynamic> target(other,startRow,actualPanelWidth);
|
||||
|
||||
ei_cache_friendly_product_rowmajor_times_vector<LhsProductTraits::NeedToConjugate,false>(
|
||||
ei_general_matrix_vector_product<Index,LhsScalar,RowMajor,LhsProductTraits::NeedToConjugate,RhsScalar,false>::run(
|
||||
actualPanelWidth, r,
|
||||
&(actualLhs.const_cast_derived().coeffRef(startRow,startCol)), actualLhs.outerStride(),
|
||||
&(other.coeffRef(startCol)), r,
|
||||
target, Scalar(-1));
|
||||
&(other.coeffRef(startCol)), other.innerStride(),
|
||||
&other.coeffRef(startRow), other.innerStride(),
|
||||
RhsScalar(-1));
|
||||
}
|
||||
|
||||
for(Index k=0; k<actualPanelWidth; ++k)
|
||||
@@ -106,13 +108,12 @@ struct ei_triangular_solver_selector<Lhs,Rhs,OnTheLeft,Mode,NoUnrolling,RowMajor
|
||||
template<typename Lhs, typename Rhs, int Mode>
|
||||
struct ei_triangular_solver_selector<Lhs,Rhs,OnTheLeft,Mode,NoUnrolling,ColMajor,1>
|
||||
{
|
||||
typedef typename Rhs::Scalar Scalar;
|
||||
typedef typename ei_packet_traits<Scalar>::type Packet;
|
||||
typedef typename Lhs::Scalar LhsScalar;
|
||||
typedef typename Rhs::Scalar RhsScalar;
|
||||
typedef ei_blas_traits<Lhs> LhsProductTraits;
|
||||
typedef typename LhsProductTraits::ExtractType ActualLhsType;
|
||||
typedef typename Lhs::Index Index;
|
||||
enum {
|
||||
PacketSize = ei_packet_traits<Scalar>::size,
|
||||
IsLower = ((Mode&Lower)==Lower)
|
||||
};
|
||||
|
||||
@@ -147,12 +148,11 @@ struct ei_triangular_solver_selector<Lhs,Rhs,OnTheLeft,Mode,NoUnrolling,ColMajor
|
||||
// let's directly call the low level product function because:
|
||||
// 1 - it is faster to compile
|
||||
// 2 - it is slighlty faster at runtime
|
||||
ei_cache_friendly_product_colmajor_times_vector<LhsProductTraits::NeedToConjugate,false>(
|
||||
r,
|
||||
&(actualLhs.const_cast_derived().coeffRef(endBlock,startBlock)), actualLhs.outerStride(),
|
||||
other.segment(startBlock, actualPanelWidth),
|
||||
&(other.coeffRef(endBlock, 0)),
|
||||
Scalar(-1));
|
||||
ei_general_matrix_vector_product<Index,LhsScalar,ColMajor,LhsProductTraits::NeedToConjugate,RhsScalar,false>::run(
|
||||
r, actualPanelWidth,
|
||||
&(actualLhs.const_cast_derived().coeffRef(endBlock,startBlock)), actualLhs.outerStride(),
|
||||
&other.coeff(startBlock), other.innerStride(),
|
||||
&(other.coeffRef(endBlock, 0)), other.innerStride(), RhsScalar(-1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#define EIGEN_STRIDE_H
|
||||
|
||||
/** \class Stride
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Holds strides information for Map
|
||||
*
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#define EIGEN_SWAP_H
|
||||
|
||||
/** \class SwapWrapper
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \internal
|
||||
*
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
#define EIGEN_TRANSPOSE_H
|
||||
|
||||
/** \class Transpose
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of the transpose of a matrix
|
||||
*
|
||||
@@ -302,11 +303,11 @@ inline void MatrixBase<Derived>::adjointInPlace()
|
||||
|
||||
// The following is to detect aliasing problems in most common cases.
|
||||
|
||||
template<typename BinOp,typename NestedXpr>
|
||||
struct ei_blas_traits<SelfCwiseBinaryOp<BinOp,NestedXpr> >
|
||||
template<typename BinOp,typename NestedXpr,typename Rhs>
|
||||
struct ei_blas_traits<SelfCwiseBinaryOp<BinOp,NestedXpr,Rhs> >
|
||||
: ei_blas_traits<NestedXpr>
|
||||
{
|
||||
typedef SelfCwiseBinaryOp<BinOp,NestedXpr> XprType;
|
||||
typedef SelfCwiseBinaryOp<BinOp,NestedXpr,Rhs> XprType;
|
||||
static inline const XprType extract(const XprType& x) { return x; }
|
||||
};
|
||||
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#define EIGEN_TRANSPOSITIONS_H
|
||||
|
||||
/** \class Transpositions
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Represents a sequence of transpositions (row/column interchange)
|
||||
*
|
||||
|
||||
@@ -29,6 +29,7 @@
|
||||
/** \internal
|
||||
*
|
||||
* \class TriangularBase
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Base class for triangular part in a matrix
|
||||
*/
|
||||
@@ -89,7 +90,7 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
|
||||
|
||||
protected:
|
||||
|
||||
void check_coordinates(Index row, Index col)
|
||||
void check_coordinates(Index row, Index col) const
|
||||
{
|
||||
EIGEN_ONLY_USED_FOR_DEBUG(row);
|
||||
EIGEN_ONLY_USED_FOR_DEBUG(col);
|
||||
@@ -101,17 +102,18 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
|
||||
}
|
||||
|
||||
#ifdef EIGEN_INTERNAL_DEBUGGING
|
||||
void check_coordinates_internal(Index row, Index col)
|
||||
void check_coordinates_internal(Index row, Index col) const
|
||||
{
|
||||
check_coordinates(row, col);
|
||||
}
|
||||
#else
|
||||
void check_coordinates_internal(Index , Index ) {}
|
||||
void check_coordinates_internal(Index , Index ) const {}
|
||||
#endif
|
||||
|
||||
};
|
||||
|
||||
/** \class TriangularView
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Base class for triangular part in a matrix
|
||||
*
|
||||
@@ -152,11 +154,18 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
|
||||
|
||||
typedef TriangularBase<TriangularView> Base;
|
||||
typedef typename ei_traits<TriangularView>::Scalar Scalar;
|
||||
|
||||
typedef _MatrixType MatrixType;
|
||||
typedef typename MatrixType::PlainObject DenseMatrixType;
|
||||
|
||||
protected:
|
||||
typedef typename MatrixType::Nested MatrixTypeNested;
|
||||
typedef typename ei_cleantype<MatrixTypeNested>::type _MatrixTypeNested;
|
||||
typedef typename ei_cleantype<typename MatrixType::ConjugateReturnType>::type MatrixConjugateReturnType;
|
||||
|
||||
public:
|
||||
using Base::evalToLazy;
|
||||
|
||||
|
||||
typedef typename ei_traits<TriangularView>::StorageKind StorageKind;
|
||||
typedef typename ei_traits<TriangularView>::Index Index;
|
||||
@@ -233,6 +242,12 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
|
||||
template<typename OtherDerived>
|
||||
void lazyAssign(const MatrixBase<OtherDerived>& other);
|
||||
|
||||
/** \sa MatrixBase::conjugate() */
|
||||
inline TriangularView<MatrixConjugateReturnType,Mode> conjugate()
|
||||
{ return m_matrix.conjugate(); }
|
||||
/** \sa MatrixBase::conjugate() const */
|
||||
inline const TriangularView<MatrixConjugateReturnType,Mode> conjugate() const
|
||||
{ return m_matrix.conjugate(); }
|
||||
|
||||
/** \sa MatrixBase::adjoint() */
|
||||
inline TriangularView<typename MatrixType::AdjointReturnType,TransposeMode> adjoint()
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
#define EIGEN_VECTORBLOCK_H
|
||||
|
||||
/** \class VectorBlock
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of a fixed-size or dynamic-size sub-vector
|
||||
*
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
#define EIGEN_PARTIAL_REDUX_H
|
||||
|
||||
/** \class PartialReduxExpr
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Generic expression of a partially reduxed matrix
|
||||
*
|
||||
@@ -154,6 +155,7 @@ struct ei_member_redux {
|
||||
};
|
||||
|
||||
/** \class VectorwiseOp
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Pseudo expression providing partial reduction operations
|
||||
*
|
||||
@@ -438,7 +440,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
}
|
||||
|
||||
/** Returns the expression of the sum of the vector \a other to each subvector of \c *this */
|
||||
template<typename OtherDerived>
|
||||
template<typename OtherDerived> EIGEN_STRONG_INLINE
|
||||
CwiseBinaryOp<ei_scalar_sum_op<Scalar>,
|
||||
ExpressionType,
|
||||
typename ExtendedType<OtherDerived>::Type>
|
||||
@@ -461,7 +463,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
|
||||
/////////// Geometry module ///////////
|
||||
|
||||
const Homogeneous<ExpressionType,Direction> homogeneous() const;
|
||||
Homogeneous<ExpressionType,Direction> homogeneous() const;
|
||||
|
||||
typedef typename ExpressionType::PlainObject CrossReturnType;
|
||||
template<typename OtherDerived>
|
||||
@@ -489,7 +491,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
Direction==Horizontal ? HNormalized_SizeMinusOne : 1> >
|
||||
HNormalizedReturnType;
|
||||
|
||||
const HNormalizedReturnType hnormalized() const;
|
||||
HNormalizedReturnType hnormalized() const;
|
||||
|
||||
protected:
|
||||
ExpressionTypeNested m_matrix;
|
||||
|
||||
215
Eigen/src/Core/arch/AltiVec/Complex.h
Normal file
215
Eigen/src/Core/arch/AltiVec/Complex.h
Normal file
@@ -0,0 +1,215 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// Eigen is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU Lesser General Public
|
||||
// License as published by the Free Software Foundation; either
|
||||
// version 3 of the License, or (at your option) any later version.
|
||||
//
|
||||
// Alternatively, you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License as
|
||||
// published by the Free Software Foundation; either version 2 of
|
||||
// the License, or (at your option) any later version.
|
||||
//
|
||||
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public
|
||||
// License and a copy of the GNU General Public License along with
|
||||
// Eigen. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#ifndef EIGEN_COMPLEX_ALTIVEC_H
|
||||
#define EIGEN_COMPLEX_ALTIVEC_H
|
||||
|
||||
static Packet4ui ei_p4ui_CONJ_XOR = vec_mergeh((Packet4ui)ei_p4i_ZERO, (Packet4ui)ei_p4f_ZERO_);//{ 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
|
||||
static Packet16uc ei_p16uc_COMPLEX_RE = vec_sld((Packet16uc) vec_splat((Packet4ui)ei_p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)ei_p16uc_FORWARD, 2), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };
|
||||
static Packet16uc ei_p16uc_COMPLEX_IM = vec_sld((Packet16uc) vec_splat((Packet4ui)ei_p16uc_FORWARD, 1), (Packet16uc) vec_splat((Packet4ui)ei_p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 };
|
||||
static Packet16uc ei_p16uc_COMPLEX_REV = vec_sld(ei_p16uc_REVERSE, ei_p16uc_REVERSE, 8);//{ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11 };
|
||||
static Packet16uc ei_p16uc_COMPLEX_REV2 = vec_sld(ei_p16uc_FORWARD, ei_p16uc_FORWARD, 8);//{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
|
||||
static Packet16uc ei_p16uc_PSET_HI = (Packet16uc) vec_mergeh((Packet4ui) vec_splat((Packet4ui)ei_p16uc_FORWARD, 0), (Packet4ui) vec_splat((Packet4ui)ei_p16uc_FORWARD, 1));//{ 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 };
|
||||
static Packet16uc ei_p16uc_PSET_LO = (Packet16uc) vec_mergeh((Packet4ui) vec_splat((Packet4ui)ei_p16uc_FORWARD, 2), (Packet4ui) vec_splat((Packet4ui)ei_p16uc_FORWARD, 3));//{ 8,9,10,11, 12,13,14,15, 8,9,10,11, 12,13,14,15 };
|
||||
|
||||
//---------- float ----------
|
||||
struct Packet2cf
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf() {}
|
||||
EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
|
||||
Packet4f v;
|
||||
};
|
||||
|
||||
template<> struct ei_packet_traits<std::complex<float> > : ei_default_packet_traits
|
||||
{
|
||||
typedef Packet2cf type;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
size = 2,
|
||||
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasMul = 1,
|
||||
HasDiv = 1,
|
||||
HasNegate = 1,
|
||||
HasAbs = 0,
|
||||
HasAbs2 = 0,
|
||||
HasMin = 0,
|
||||
HasMax = 0,
|
||||
HasSetLinear = 0
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct ei_unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ei_pset1<Packet2cf>(const std::complex<float>& from)
|
||||
{
|
||||
Packet2cf res;
|
||||
/* On AltiVec we cannot load 64-bit registers, so wa have to take care of alignment */
|
||||
if ((ptrdiff_t)&from % 16 == 0) {
|
||||
res.v = ei_pload((const float *)&from);
|
||||
res.v = vec_perm(res.v, res.v, ei_p16uc_PSET_HI);
|
||||
} else {
|
||||
res.v = ei_ploadu((const float *)&from);
|
||||
res.v = vec_perm(res.v, res.v, ei_p16uc_PSET_LO);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ei_padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_add(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ei_psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_sub(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ei_pnegate(const Packet2cf& a) { return Packet2cf(ei_psub<Packet4f>(ei_p4f_ZERO, a.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ei_pconj(const Packet2cf& a) { return Packet2cf((Packet4f)vec_xor((Packet4ui)a.v, ei_p4ui_CONJ_XOR)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ei_pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
||||
{
|
||||
Packet4f v1, v2;
|
||||
|
||||
// Permute and multiply the real parts of a and b
|
||||
v1 = vec_perm(a.v, a.v, ei_p16uc_COMPLEX_RE);
|
||||
// Get the imaginary parts of a
|
||||
v2 = vec_perm(a.v, a.v, ei_p16uc_COMPLEX_IM);
|
||||
// multiply a_re * b
|
||||
v1 = vec_madd(v1, b.v, ei_p4f_ZERO);
|
||||
// multiply a_im * b and get the conjugate result
|
||||
v2 = vec_madd(v2, b.v, ei_p4f_ZERO);
|
||||
v2 = (Packet4f) vec_xor((Packet4ui)v2, ei_p4ui_CONJ_XOR);
|
||||
// permute back to a proper order
|
||||
v2 = vec_perm(v2, v2, ei_p16uc_COMPLEX_REV);
|
||||
|
||||
return Packet2cf(vec_add(v1, v2));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ei_pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_and(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ei_por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_or(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ei_pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_xor(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ei_pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_and(a.v, vec_nor(b.v,b.v))); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ei_pload <std::complex<float> >(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(ei_pload((const float*)from)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ei_ploadu<std::complex<float> >(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ei_ploadu((const float*)from)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void ei_pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE ei_pstore((float*)to, from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void ei_pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE ei_pstoreu((float*)to, from.v); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void ei_prefetch<std::complex<float> >(const std::complex<float> * addr) { vec_dstt((float *)addr, DST_CTRL(2,2,32), DST_CHAN); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> ei_pfirst<Packet2cf>(const Packet2cf& a)
|
||||
{
|
||||
std::complex<float> EIGEN_ALIGN16 res[2];
|
||||
ei_pstore((float *)&res, a.v);
|
||||
|
||||
return res[0];
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ei_preverse(const Packet2cf& a)
|
||||
{
|
||||
Packet4f rev_a;
|
||||
rev_a = vec_perm(a.v, a.v, ei_p16uc_COMPLEX_REV2);
|
||||
return Packet2cf(rev_a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> ei_predux<Packet2cf>(const Packet2cf& a)
|
||||
{
|
||||
Packet4f b;
|
||||
b = (Packet4f) vec_sld(a.v, a.v, 8);
|
||||
b = ei_padd(a.v, b);
|
||||
return ei_pfirst(Packet2cf(sum));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ei_preduxp<Packet2cf>(const Packet2cf* vecs)
|
||||
{
|
||||
Packet4f b1, b2;
|
||||
|
||||
b1 = (Packet4f) vec_sld(vecs[0].v, vecs[1].v, 8);
|
||||
b2 = (Packet4f) vec_sld(vecs[1].v, vecs[0].v, 8);
|
||||
b2 = (Packet4f) vec_sld(b2, b2, 8);
|
||||
b2 = ei_padd(b1, b2);
|
||||
|
||||
return Packet2cf(b2);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> ei_predux_mul<Packet2cf>(const Packet2cf& a)
|
||||
{
|
||||
Packet4f b;
|
||||
Packet2cf prod;
|
||||
b = (Packet4f) vec_sld(a.v, a.v, 8);
|
||||
prod = ei_pmul(a, Packet2cf(b));
|
||||
|
||||
return ei_pfirst(prod);
|
||||
}
|
||||
|
||||
template<int Offset>
|
||||
struct ei_palign_impl<Offset,Packet2cf>
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second)
|
||||
{
|
||||
if (Offset==1)
|
||||
{
|
||||
first.v = vec_sld(first.v, second.v, 8);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct ei_conj_helper<Packet2cf, Packet2cf, false,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
|
||||
{ return ei_padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
||||
{
|
||||
return ei_pmul(a, ei_pconj(b));
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct ei_conj_helper<Packet2cf, Packet2cf, true,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
|
||||
{ return ei_padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
||||
{
|
||||
return ei_pmul(ei_pconj(a), b);
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct ei_conj_helper<Packet2cf, Packet2cf, true,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
|
||||
{ return ei_padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
||||
{
|
||||
return ei_pconj(ei_pmul(a, b));
|
||||
}
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ei_pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
||||
{
|
||||
// TODO optimize it for AltiVec
|
||||
Packet2cf res = ei_conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
|
||||
Packet4f s = vec_madd(b.v, b.v, ei_p4f_ZERO);
|
||||
return Packet2cf(ei_pdiv(res.v, vec_add(s,vec_perm(s, s, ei_p16uc_COMPLEX_REV))));
|
||||
}
|
||||
|
||||
#endif // EIGEN_COMPLEX_ALTIVEC_H
|
||||
@@ -59,13 +59,13 @@ typedef __vector unsigned char Packet16uc;
|
||||
Packet4i ei_p4i_##NAME = vec_splat_s32(X)
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
|
||||
Packet4f ei_p4f_##NAME = ei_pset1<float>(X)
|
||||
Packet4f ei_p4f_##NAME = ei_pset1<Packet4f>(X)
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
|
||||
Packet4f ei_p4f_##NAME = vreinterpretq_f32_u32(ei_pset1<int>(X))
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
|
||||
Packet4i ei_p4i_##NAME = ei_pset1<int>(X)
|
||||
Packet4i ei_p4i_##NAME = ei_pset1<Packet4i>(X)
|
||||
|
||||
#define DST_CHAN 1
|
||||
#define DST_CTRL(size, count, stride) (((size) << 24) | ((count) << 16) | (stride))
|
||||
@@ -74,6 +74,7 @@ typedef __vector unsigned char Packet16uc;
|
||||
static Packet4f ei_p4f_COUNTDOWN = { 3.0, 2.0, 1.0, 0.0 };
|
||||
static Packet4i ei_p4i_COUNTDOWN = { 3, 2, 1, 0 };
|
||||
static Packet16uc ei_p16uc_REVERSE = {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3};
|
||||
static Packet16uc ei_p16uc_FORWARD = vec_lvsl(0, (float*)0);
|
||||
|
||||
static _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0);
|
||||
static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0);
|
||||
@@ -85,8 +86,13 @@ static Packet4f ei_p4f_ZERO_ = (Packet4f) vec_sl((Packet4ui)ei_p4i_MINUS1, (Pack
|
||||
|
||||
template<> struct ei_packet_traits<float> : ei_default_packet_traits
|
||||
{
|
||||
typedef Packet4f type; enum {size=4};
|
||||
typedef Packet4f type;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size=4,
|
||||
|
||||
// FIXME check the Has*
|
||||
HasSin = 0,
|
||||
HasCos = 0,
|
||||
HasLog = 0,
|
||||
@@ -95,7 +101,15 @@ template<> struct ei_packet_traits<float> : ei_default_packet_traits
|
||||
};
|
||||
};
|
||||
template<> struct ei_packet_traits<int> : ei_default_packet_traits
|
||||
{ typedef Packet4i type; enum {size=4}; };
|
||||
{
|
||||
typedef Packet4i type;
|
||||
enum {
|
||||
// FIXME check the Has*
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size=4
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct ei_unpacket_traits<Packet4f> { typedef float type; enum {size=4}; };
|
||||
template<> struct ei_unpacket_traits<Packet4i> { typedef int type; enum {size=4}; };
|
||||
@@ -144,7 +158,7 @@ inline std::ostream & operator <<(std::ostream & s, const Packetbi & v)
|
||||
return s;
|
||||
}
|
||||
*/
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<float>(const float& from) {
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<Packet4f>(const float& from) {
|
||||
// Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
|
||||
float EIGEN_ALIGN16 af[4];
|
||||
af[0] = from;
|
||||
@@ -153,7 +167,7 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<float>(const float& from) {
|
||||
return vc;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_pset1<int>(const int& from) {
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_pset1<Packet4i>(const int& from) {
|
||||
int EIGEN_ALIGN16 ai[4];
|
||||
ai[0] = from;
|
||||
Packet4i vc = vec_ld(0, ai);
|
||||
@@ -161,8 +175,8 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_pset1<int>(const int& from) {
|
||||
return vc;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_plset<float>(const float& a) { return vec_add(ei_pset1(a), ei_p4f_COUNTDOWN); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_plset<int>(const int& a) { return vec_add(ei_pset1(a), ei_p4i_COUNTDOWN); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_plset<float>(const float& a) { return vec_add(ei_pset1<Packet4f>(a), ei_p4f_COUNTDOWN); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_plset<int>(const int& a) { return vec_add(ei_pset1<Packet4i>(a), ei_p4i_COUNTDOWN); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_add(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_add(a,b); }
|
||||
@@ -227,7 +241,7 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_pdiv<Packet4f>(const Packet4f& a, con
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_pdiv<Packet4i>(const Packet4i& /*a*/, const Packet4i& /*b*/)
|
||||
{ ei_assert(false && "packet integer division are not supported by AltiVec");
|
||||
return ei_pset1<int>(0);
|
||||
return ei_pset1<Packet4i>(0);
|
||||
}
|
||||
|
||||
// for some weird raisons, it has to be overloaded for packet of integers
|
||||
@@ -253,10 +267,10 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_pxor<Packet4i>(const Packet4i& a, con
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, vec_nor(b, b)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, vec_nor(b, b)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_pload<float>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_pload<int>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu(const float* from)
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu<Packet4f>(const float* from)
|
||||
{
|
||||
EIGEN_DEBUG_ALIGNED_LOAD
|
||||
// Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
|
||||
@@ -268,7 +282,7 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu(const float* from)
|
||||
return (Packet4f) vec_perm(MSQ, LSQ, mask); // align the data
|
||||
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu(const int* from)
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu<Packet4i>(const int* from)
|
||||
{
|
||||
EIGEN_DEBUG_ALIGNED_LOAD
|
||||
// Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
|
||||
|
||||
@@ -67,12 +67,7 @@
|
||||
* Currently it must be 8 or 16. Other values will fail.
|
||||
*/
|
||||
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
|
||||
#if (defined __i386__)
|
||||
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 8
|
||||
#else
|
||||
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 16
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#endif // EIGEN_DEFAULT_SETTINGS_H
|
||||
|
||||
262
Eigen/src/Core/arch/NEON/Complex.h
Normal file
262
Eigen/src/Core/arch/NEON/Complex.h
Normal file
@@ -0,0 +1,262 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// Eigen is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU Lesser General Public
|
||||
// License as published by the Free Software Foundation; either
|
||||
// version 3 of the License, or (at your option) any later version.
|
||||
//
|
||||
// Alternatively, you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License as
|
||||
// published by the Free Software Foundation; either version 2 of
|
||||
// the License, or (at your option) any later version.
|
||||
//
|
||||
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public
|
||||
// License and a copy of the GNU General Public License along with
|
||||
// Eigen. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#ifndef EIGEN_COMPLEX_ALTIVEC_H
|
||||
#define EIGEN_COMPLEX_ALTIVEC_H
|
||||
|
||||
static uint32x4_t ei_p4ui_CONJ_XOR = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
|
||||
static uint32x2_t ei_p2ui_CONJ_XOR = { 0x00000000, 0x80000000 };
|
||||
|
||||
//---------- float ----------
|
||||
struct Packet2cf
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf() {}
|
||||
EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
|
||||
Packet4f v;
|
||||
};
|
||||
|
||||
template<> struct ei_packet_traits<std::complex<float> > : ei_default_packet_traits
|
||||
{
|
||||
typedef Packet2cf type;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
size = 2,
|
||||
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasMul = 1,
|
||||
HasDiv = 1,
|
||||
HasNegate = 1,
|
||||
HasAbs = 0,
|
||||
HasAbs2 = 0,
|
||||
HasMin = 0,
|
||||
HasMax = 0,
|
||||
HasSetLinear = 0
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct ei_unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ei_pset1<Packet2cf>(const std::complex<float>& from)
|
||||
{
|
||||
float32x2_t r64;
|
||||
r64 = vld1_f32((float *)&from);
|
||||
|
||||
return Packet2cf(vcombine_f32(r64, r64));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ei_padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(ei_padd<Packet4f>(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ei_psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(ei_psub<Packet4f>(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ei_pnegate(const Packet2cf& a) { return Packet2cf(ei_pnegate<Packet4f>(a.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ei_pconj(const Packet2cf& a)
|
||||
{
|
||||
return Packet2cf(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a.v), ei_p4ui_CONJ_XOR)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ei_pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
||||
{
|
||||
Packet4f v1, v2;
|
||||
float32x2_t a_lo, a_hi;
|
||||
|
||||
// Get the real values of a | a1_re | a1_re | a2_re | a2_re |
|
||||
v1 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 0), vdup_lane_f32(vget_high_f32(a.v), 0));
|
||||
// Get the real values of a | a1_im | a1_im | a2_im | a2_im |
|
||||
v2 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 1), vdup_lane_f32(vget_high_f32(a.v), 1));
|
||||
// Multiply the real a with b
|
||||
v1 = vmulq_f32(v1, b.v);
|
||||
// Multiply the imag a with b
|
||||
v2 = vmulq_f32(v2, b.v);
|
||||
// Conjugate v2
|
||||
v2 = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(v2), ei_p4ui_CONJ_XOR));
|
||||
// Swap real/imag elements in v2.
|
||||
a_lo = vrev64_f32(vget_low_f32(v2));
|
||||
a_hi = vrev64_f32(vget_high_f32(v2));
|
||||
v2 = vcombine_f32(a_lo, a_hi);
|
||||
// Add and return the result
|
||||
return Packet2cf(vaddq_f32(v1, v2));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ei_pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
||||
{
|
||||
return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ei_por <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
||||
{
|
||||
return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ei_pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
||||
{
|
||||
return Packet2cf(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ei_pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
||||
{
|
||||
return Packet2cf(vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ei_pload <std::complex<float> >(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(ei_pload((const float*)from)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ei_ploadu<std::complex<float> >(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ei_ploadu((const float*)from)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void ei_pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE ei_pstore((float*)to, from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void ei_pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE ei_pstoreu((float*)to, from.v); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void ei_prefetch<std::complex<float> >(const std::complex<float> * addr) { __pld((float *)addr); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> ei_pfirst<Packet2cf>(const Packet2cf& a)
|
||||
{
|
||||
std::complex<float> EIGEN_ALIGN16 x[2];
|
||||
vst1q_f32((float *)x, a.v);
|
||||
return x[0];
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ei_preverse(const Packet2cf& a)
|
||||
{
|
||||
float32x2_t a_lo, a_hi;
|
||||
Packet4f a_r128;
|
||||
|
||||
a_lo = vget_low_f32(a.v);
|
||||
a_hi = vget_high_f32(a.v);
|
||||
a_r128 = vcombine_f32(a_hi, a_lo);
|
||||
|
||||
return Packet2cf(a_r128);
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf ei_pcplxflip/*<Packet2cf>*/(const Packet2cf& x)
|
||||
{
|
||||
return Packet2cf(vrev64q_f32(a.v));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> ei_predux<Packet2cf>(const Packet2cf& a)
|
||||
{
|
||||
float32x2_t a1, a2;
|
||||
std::complex<float> s;
|
||||
|
||||
a1 = vget_low_f32(a.v);
|
||||
a2 = vget_high_f32(a.v);
|
||||
a2 = vadd_f32(a1, a2);
|
||||
vst1_f32((float *)&s, a2);
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ei_preduxp<Packet2cf>(const Packet2cf* vecs)
|
||||
{
|
||||
Packet4f sum1, sum2, sum;
|
||||
|
||||
// Add the first two 64-bit float32x2_t of vecs[0]
|
||||
sum1 = vcombine_f32(vget_low_f32(vecs[0].v), vget_low_f32(vecs[1].v));
|
||||
sum2 = vcombine_f32(vget_high_f32(vecs[0].v), vget_high_f32(vecs[1].v));
|
||||
sum = vaddq_f32(sum1, sum2);
|
||||
|
||||
return Packet2cf(sum);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> ei_predux_mul<Packet2cf>(const Packet2cf& a)
|
||||
{
|
||||
float32x2_t a1, a2, v1, v2, prod;
|
||||
std::complex<float> s;
|
||||
|
||||
a1 = vget_low_f32(a.v);
|
||||
a2 = vget_high_f32(a.v);
|
||||
// Get the real values of a | a1_re | a1_re | a2_re | a2_re |
|
||||
v1 = vdup_lane_f32(a1, 0);
|
||||
// Get the real values of a | a1_im | a1_im | a2_im | a2_im |
|
||||
v2 = vdup_lane_f32(a1, 1);
|
||||
// Multiply the real a with b
|
||||
v1 = vmul_f32(v1, a2);
|
||||
// Multiply the imag a with b
|
||||
v2 = vmul_f32(v2, a2);
|
||||
// Conjugate v2
|
||||
v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), ei_p2ui_CONJ_XOR));
|
||||
// Swap real/imag elements in v2.
|
||||
v2 = vrev64_f32(v2);
|
||||
// Add v1, v2
|
||||
prod = vadd_f32(v1, v2);
|
||||
|
||||
vst1_f32((float *)&s, prod);
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
template<int Offset>
|
||||
struct ei_palign_impl<Offset,Packet2cf>
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second)
|
||||
{
|
||||
if (Offset==1)
|
||||
{
|
||||
first.v = vextq_f32(first.v, second.v, 2);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct ei_conj_helper<Packet2cf, Packet2cf, false,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
|
||||
{ return ei_padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
||||
{
|
||||
return ei_pmul(a, ei_pconj(b));
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct ei_conj_helper<Packet2cf, Packet2cf, true,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
|
||||
{ return ei_padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
||||
{
|
||||
return ei_pmul(ei_pconj(a), b);
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct ei_conj_helper<Packet2cf, Packet2cf, true,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
|
||||
{ return ei_padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
||||
{
|
||||
return ei_pconj(ei_pmul(a, b));
|
||||
}
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ei_pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
||||
{
|
||||
// TODO optimize it for AltiVec
|
||||
Packet2cf res = ei_conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
|
||||
Packet4f s, rev_s;
|
||||
float32x2_t a_lo, a_hi;
|
||||
|
||||
// this computes the norm
|
||||
s = vmulq_f32(b.v, b.v);
|
||||
a_lo = vrev64_f32(vget_low_f32(s));
|
||||
a_hi = vrev64_f32(vget_high_f32(s));
|
||||
rev_s = vcombine_f32(a_lo, a_hi);
|
||||
|
||||
return Packet2cf(ei_pdiv(res.v, vaddq_f32(s,rev_s)));
|
||||
}
|
||||
|
||||
#endif // EIGEN_COMPLEX_ALTIVEC_H
|
||||
@@ -45,13 +45,13 @@ typedef float32x4_t Packet4f;
|
||||
typedef int32x4_t Packet4i;
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
|
||||
const Packet4f ei_p4f_##NAME = ei_pset1<float>(X)
|
||||
const Packet4f ei_p4f_##NAME = ei_pset1<Packet4f>(X)
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
|
||||
const Packet4f ei_p4f_##NAME = vreinterpretq_f32_u32(ei_pset1<int>(X))
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
|
||||
const Packet4i ei_p4i_##NAME = ei_pset1<int>(X)
|
||||
const Packet4i ei_p4i_##NAME = ei_pset1<Packet4i>(X)
|
||||
|
||||
#ifndef __pld
|
||||
#define __pld(x) asm volatile ( " pld [%[addr]]\n" :: [addr] "r" (x) : "cc" );
|
||||
@@ -59,8 +59,14 @@ typedef int32x4_t Packet4i;
|
||||
|
||||
template<> struct ei_packet_traits<float> : ei_default_packet_traits
|
||||
{
|
||||
typedef Packet4f type; enum {size=4};
|
||||
typedef Packet4f type;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size = 4,
|
||||
|
||||
HasDiv = 1,
|
||||
// FIXME check the Has*
|
||||
HasSin = 0,
|
||||
HasCos = 0,
|
||||
HasLog = 0,
|
||||
@@ -69,23 +75,31 @@ template<> struct ei_packet_traits<float> : ei_default_packet_traits
|
||||
};
|
||||
};
|
||||
template<> struct ei_packet_traits<int> : ei_default_packet_traits
|
||||
{ typedef Packet4i type; enum {size=4}; };
|
||||
{
|
||||
typedef Packet4i type;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size=4
|
||||
// FIXME check the Has*
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct ei_unpacket_traits<Packet4f> { typedef float type; enum {size=4}; };
|
||||
template<> struct ei_unpacket_traits<Packet4i> { typedef int type; enum {size=4}; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<float>(const float& from) { return vdupq_n_f32(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_pset1<int>(const int& from) { return vdupq_n_s32(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<Packet4f>(const float& from) { return vdupq_n_f32(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_pset1<Packet4i>(const int& from) { return vdupq_n_s32(from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_plset<float>(const float& a)
|
||||
{
|
||||
Packet4f countdown = { 3, 2, 1, 0 };
|
||||
return vaddq_f32(ei_pset1(a), countdown);
|
||||
return vaddq_f32(ei_pset1<Packet4f>(a), countdown);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_plset<int>(const int& a)
|
||||
{
|
||||
Packet4i countdown = { 3, 2, 1, 0 };
|
||||
return vaddq_s32(ei_pset1(a), countdown);
|
||||
return vaddq_s32(ei_pset1<Packet4i>(a), countdown);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return vaddq_f32(a,b); }
|
||||
@@ -123,7 +137,7 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_pdiv<Packet4f>(const Packet4f& a, con
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_pdiv<Packet4i>(const Packet4i& /*a*/, const Packet4i& /*b*/)
|
||||
{ ei_assert(false && "packet integer division are not supported by NEON");
|
||||
return ei_pset1<int>(0);
|
||||
return ei_pset1<Packet4i>(0);
|
||||
}
|
||||
|
||||
// for some weird raisons, it has to be overloaded for packet of integers
|
||||
@@ -163,8 +177,23 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_pandnot<Packet4i>(const Packet4i& a,
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_pload<float>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_pload<int>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_f32(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_s32(from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_ploaddup<Packet4f>(const float* from)
|
||||
{
|
||||
float32x2_t lo, ho;
|
||||
lo = vdup_n_f32(*from);
|
||||
hi = vdup_n_f32(*from);
|
||||
return vcombine_f32(lo, hi);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_ploaddup<Packet4i>(const float* from)
|
||||
{
|
||||
int32x2_t lo, ho;
|
||||
lo = vdup_n_s32(*from);
|
||||
hi = vdup_n_s32(*from);
|
||||
return vcombine_s32(lo, hi);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void ei_pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_f32(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void ei_pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_s32(to, from); }
|
||||
@@ -181,25 +210,21 @@ template<> EIGEN_STRONG_INLINE int ei_pfirst<Packet4i>(const Packet4i& a) { i
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_preverse(const Packet4f& a) {
|
||||
float32x2_t a_lo, a_hi;
|
||||
Packet4f a_r64, a_r128;
|
||||
Packet4f a_r64;
|
||||
|
||||
a_r64 = vrev64q_f32(a);
|
||||
a_lo = vget_low_f32(a_r64);
|
||||
a_hi = vget_high_f32(a_r64);
|
||||
a_r128 = vcombine_f32(a_hi, a_lo);
|
||||
|
||||
return a_r128;
|
||||
return vcombine_f32(a_hi, a_lo);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_preverse(const Packet4i& a) {
|
||||
int32x2_t a_lo, a_hi;
|
||||
Packet4i a_r64, a_r128;
|
||||
Packet4i a_r64;
|
||||
|
||||
a_r64 = vrev64q_s32(a);
|
||||
a_lo = vget_low_s32(a_r64);
|
||||
a_hi = vget_high_s32(a_r64);
|
||||
a_r128 = vcombine_s32(a_hi, a_lo);
|
||||
|
||||
return a_r128;
|
||||
return vcombine_s32(a_hi, a_lo);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_pabs(const Packet4f& a) { return vabsq_f32(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_pabs(const Packet4i& a) { return vabsq_s32(a); }
|
||||
|
||||
426
Eigen/src/Core/arch/SSE/Complex.h
Normal file
426
Eigen/src/Core/arch/SSE/Complex.h
Normal file
@@ -0,0 +1,426 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// Eigen is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU Lesser General Public
|
||||
// License as published by the Free Software Foundation; either
|
||||
// version 3 of the License, or (at your option) any later version.
|
||||
//
|
||||
// Alternatively, you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License as
|
||||
// published by the Free Software Foundation; either version 2 of
|
||||
// the License, or (at your option) any later version.
|
||||
//
|
||||
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public
|
||||
// License and a copy of the GNU General Public License along with
|
||||
// Eigen. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#ifndef EIGEN_COMPLEX_SSE_H
|
||||
#define EIGEN_COMPLEX_SSE_H
|
||||
|
||||
//---------- float ----------
|
||||
struct Packet2cf
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf() {}
|
||||
EIGEN_STRONG_INLINE explicit Packet2cf(const __m128& a) : v(a) {}
|
||||
__m128 v;
|
||||
};
|
||||
|
||||
template<> struct ei_packet_traits<std::complex<float> > : ei_default_packet_traits
|
||||
{
|
||||
typedef Packet2cf type;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size = 2,
|
||||
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasMul = 1,
|
||||
HasDiv = 1,
|
||||
HasNegate = 1,
|
||||
HasAbs = 0,
|
||||
HasAbs2 = 0,
|
||||
HasMin = 0,
|
||||
HasMax = 0,
|
||||
HasSetLinear = 0
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct ei_unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ei_padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_add_ps(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ei_psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_sub_ps(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ei_pnegate(const Packet2cf& a)
|
||||
{
|
||||
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));
|
||||
return Packet2cf(_mm_xor_ps(a.v,mask));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ei_pconj(const Packet2cf& a)
|
||||
{
|
||||
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
|
||||
return Packet2cf(_mm_xor_ps(a.v,mask));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ei_pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
||||
{
|
||||
// TODO optimize it for SSE3 and 4
|
||||
#ifdef EIGEN_VECTORIZE_SSE3
|
||||
return Packet2cf(_mm_addsub_ps(_mm_mul_ps(_mm_moveldup_ps(a.v), b.v),
|
||||
_mm_mul_ps(_mm_movehdup_ps(a.v),
|
||||
ei_vec4f_swizzle1(b.v, 1, 0, 3, 2))));
|
||||
// return Packet2cf(_mm_addsub_ps(_mm_mul_ps(ei_vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
|
||||
// _mm_mul_ps(ei_vec4f_swizzle1(a.v, 1, 1, 3, 3),
|
||||
// ei_vec4f_swizzle1(b.v, 1, 0, 3, 2))));
|
||||
#else
|
||||
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x00000000,0x80000000,0x00000000));
|
||||
return Packet2cf(_mm_add_ps(_mm_mul_ps(ei_vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
|
||||
_mm_xor_ps(_mm_mul_ps(ei_vec4f_swizzle1(a.v, 1, 1, 3, 3),
|
||||
ei_vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask)));
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ei_pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_and_ps(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ei_por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_or_ps(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ei_pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_xor_ps(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ei_pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_andnot_ps(a.v,b.v)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ei_pload <Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(ei_pload<Packet4f>(&ei_real_ref(*from))); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ei_ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ei_ploadu<Packet4f>(&ei_real_ref(*from))); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void ei_pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE ei_pstore(&ei_real_ref(*to), from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void ei_pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE ei_pstoreu(&ei_real_ref(*to), from.v); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void ei_prefetch<std::complex<float> >(const std::complex<float> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ei_pset1<Packet2cf>(const std::complex<float>& from)
|
||||
{
|
||||
Packet2cf res;
|
||||
res.v = _mm_loadl_pi(res.v, (const __m64*)&from);
|
||||
return Packet2cf(_mm_movelh_ps(res.v,res.v));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> ei_pfirst<Packet2cf>(const Packet2cf& a)
|
||||
{
|
||||
std::complex<float> res;
|
||||
_mm_storel_pi((__m64*)&res, a.v);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ei_preverse(const Packet2cf& a) { return Packet2cf(_mm_castpd_ps(ei_preverse(_mm_castps_pd(a.v)))); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> ei_predux<Packet2cf>(const Packet2cf& a)
|
||||
{
|
||||
return ei_pfirst(Packet2cf(_mm_add_ps(a.v, _mm_movehl_ps(a.v,a.v))));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ei_preduxp<Packet2cf>(const Packet2cf* vecs)
|
||||
{
|
||||
return Packet2cf(_mm_add_ps(_mm_movelh_ps(vecs[0].v,vecs[1].v), _mm_movehl_ps(vecs[1].v,vecs[0].v)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> ei_predux_mul<Packet2cf>(const Packet2cf& a)
|
||||
{
|
||||
return ei_pfirst(ei_pmul(a, Packet2cf(_mm_movehl_ps(a.v,a.v))));
|
||||
}
|
||||
|
||||
template<int Offset>
|
||||
struct ei_palign_impl<Offset,Packet2cf>
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second)
|
||||
{
|
||||
if (Offset==1)
|
||||
{
|
||||
first.v = _mm_movehl_ps(first.v, first.v);
|
||||
first.v = _mm_movelh_ps(first.v, second.v);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct ei_conj_helper<Packet2cf, Packet2cf, false,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
|
||||
{ return ei_padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_SSE3
|
||||
return ei_pmul(a, ei_pconj(b));
|
||||
#else
|
||||
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
|
||||
return Packet2cf(_mm_add_ps(_mm_xor_ps(_mm_mul_ps(ei_vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask),
|
||||
_mm_mul_ps(ei_vec4f_swizzle1(a.v, 1, 1, 3, 3),
|
||||
ei_vec4f_swizzle1(b.v, 1, 0, 3, 2))));
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct ei_conj_helper<Packet2cf, Packet2cf, true,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
|
||||
{ return ei_padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_SSE3
|
||||
return ei_pmul(ei_pconj(a), b);
|
||||
#else
|
||||
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
|
||||
return Packet2cf(_mm_add_ps(_mm_mul_ps(ei_vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
|
||||
_mm_xor_ps(_mm_mul_ps(ei_vec4f_swizzle1(a.v, 1, 1, 3, 3),
|
||||
ei_vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask)));
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct ei_conj_helper<Packet2cf, Packet2cf, true,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
|
||||
{ return ei_padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_SSE3
|
||||
return ei_pconj(ei_pmul(a, b));
|
||||
#else
|
||||
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
|
||||
return Packet2cf(_mm_sub_ps(_mm_xor_ps(_mm_mul_ps(ei_vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask),
|
||||
_mm_mul_ps(ei_vec4f_swizzle1(a.v, 1, 1, 3, 3),
|
||||
ei_vec4f_swizzle1(b.v, 1, 0, 3, 2))));
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct ei_conj_helper<Packet4f, Packet2cf, false,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet4f& x, const Packet2cf& y, const Packet2cf& c) const
|
||||
{ return ei_padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet4f& x, const Packet2cf& y) const
|
||||
{ return Packet2cf(ei_pmul(x, y.v)); }
|
||||
};
|
||||
|
||||
template<> struct ei_conj_helper<Packet2cf, Packet4f, false,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet4f& y, const Packet2cf& c) const
|
||||
{ return ei_padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& x, const Packet4f& y) const
|
||||
{ return Packet2cf(ei_pmul(x.v, y)); }
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ei_pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
||||
{
|
||||
// TODO optimize it for SSE3 and 4
|
||||
Packet2cf res = ei_conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
|
||||
__m128 s = _mm_mul_ps(b.v,b.v);
|
||||
return Packet2cf(_mm_div_ps(res.v,_mm_add_ps(s,_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(s), 0xb1)))));
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf ei_pcplxflip/*<Packet2cf>*/(const Packet2cf& x)
|
||||
{
|
||||
return Packet2cf(ei_vec4f_swizzle1(x.v, 1, 0, 3, 2));
|
||||
}
|
||||
|
||||
|
||||
//---------- double ----------
|
||||
struct Packet1cd
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet1cd() {}
|
||||
EIGEN_STRONG_INLINE explicit Packet1cd(const __m128d& a) : v(a) {}
|
||||
__m128d v;
|
||||
};
|
||||
|
||||
template<> struct ei_packet_traits<std::complex<double> > : ei_default_packet_traits
|
||||
{
|
||||
typedef Packet1cd type;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 0,
|
||||
size = 1,
|
||||
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasMul = 1,
|
||||
HasDiv = 1,
|
||||
HasNegate = 1,
|
||||
HasAbs = 0,
|
||||
HasAbs2 = 0,
|
||||
HasMin = 0,
|
||||
HasMax = 0,
|
||||
HasSetLinear = 0
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct ei_unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1}; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd ei_padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_add_pd(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd ei_psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_sub_pd(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd ei_pnegate(const Packet1cd& a) { return Packet1cd(ei_pnegate(a.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd ei_pconj(const Packet1cd& a)
|
||||
{
|
||||
const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
|
||||
return Packet1cd(_mm_xor_pd(a.v,mask));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd ei_pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
||||
{
|
||||
// TODO optimize it for SSE3 and 4
|
||||
#ifdef EIGEN_VECTORIZE_SSE3
|
||||
return Packet1cd(_mm_addsub_pd(_mm_mul_pd(ei_vec2d_swizzle1(a.v, 0, 0), b.v),
|
||||
_mm_mul_pd(ei_vec2d_swizzle1(a.v, 1, 1),
|
||||
ei_vec2d_swizzle1(b.v, 1, 0))));
|
||||
#else
|
||||
const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));
|
||||
return Packet1cd(_mm_add_pd(_mm_mul_pd(ei_vec2d_swizzle1(a.v, 0, 0), b.v),
|
||||
_mm_xor_pd(_mm_mul_pd(ei_vec2d_swizzle1(a.v, 1, 1),
|
||||
ei_vec2d_swizzle1(b.v, 1, 0)), mask)));
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd ei_pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_and_pd(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd ei_por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_or_pd(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd ei_pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_xor_pd(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd ei_pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_andnot_pd(a.v,b.v)); }
|
||||
|
||||
// FIXME force unaligned load, this is a temporary fix
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd ei_pload <Packet1cd>(const std::complex<double>* from)
|
||||
{ EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(ei_pload<Packet2d>((const double*)from)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd ei_ploadu<Packet1cd>(const std::complex<double>* from)
|
||||
{ EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ei_ploadu<Packet2d>((const double*)from)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd ei_pset1<Packet1cd>(const std::complex<double>& from)
|
||||
{ /* here we really have to use unaligned loads :( */ return ei_ploadu<Packet1cd>(&from); }
|
||||
|
||||
// FIXME force unaligned store, this is a temporary fix
|
||||
template<> EIGEN_STRONG_INLINE void ei_pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE ei_pstore((double*)to, from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void ei_pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE ei_pstoreu((double*)to, from.v); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void ei_prefetch<std::complex<double> >(const std::complex<double> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<double> ei_pfirst<Packet1cd>(const Packet1cd& a)
|
||||
{
|
||||
EIGEN_ALIGN16 double res[2];
|
||||
_mm_store_pd(res, a.v);
|
||||
return std::complex<double>(res[0],res[1]);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd ei_preverse(const Packet1cd& a) { return a; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<double> ei_predux<Packet1cd>(const Packet1cd& a)
|
||||
{
|
||||
return ei_pfirst(a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd ei_preduxp<Packet1cd>(const Packet1cd* vecs)
|
||||
{
|
||||
return vecs[0];
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<double> ei_predux_mul<Packet1cd>(const Packet1cd& a)
|
||||
{
|
||||
return ei_pfirst(a);
|
||||
}
|
||||
|
||||
template<int Offset>
|
||||
struct ei_palign_impl<Offset,Packet1cd>
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
|
||||
{
|
||||
// FIXME is it sure we never have to align a Packet1cd?
|
||||
// Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct ei_conj_helper<Packet1cd, Packet1cd, false,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
|
||||
{ return ei_padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_SSE3
|
||||
return ei_pmul(a, ei_pconj(b));
|
||||
#else
|
||||
const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
|
||||
return Packet1cd(_mm_add_pd(_mm_xor_pd(_mm_mul_pd(ei_vec2d_swizzle1(a.v, 0, 0), b.v), mask),
|
||||
_mm_mul_pd(ei_vec2d_swizzle1(a.v, 1, 1),
|
||||
ei_vec2d_swizzle1(b.v, 1, 0))));
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct ei_conj_helper<Packet1cd, Packet1cd, true,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
|
||||
{ return ei_padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_SSE3
|
||||
return ei_pmul(ei_pconj(a), b);
|
||||
#else
|
||||
const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
|
||||
return Packet1cd(_mm_add_pd(_mm_mul_pd(ei_vec2d_swizzle1(a.v, 0, 0), b.v),
|
||||
_mm_xor_pd(_mm_mul_pd(ei_vec2d_swizzle1(a.v, 1, 1),
|
||||
ei_vec2d_swizzle1(b.v, 1, 0)), mask)));
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct ei_conj_helper<Packet1cd, Packet1cd, true,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
|
||||
{ return ei_padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_SSE3
|
||||
return ei_pconj(ei_pmul(a, b));
|
||||
#else
|
||||
const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
|
||||
return Packet1cd(_mm_sub_pd(_mm_xor_pd(_mm_mul_pd(ei_vec2d_swizzle1(a.v, 0, 0), b.v), mask),
|
||||
_mm_mul_pd(ei_vec2d_swizzle1(a.v, 1, 1),
|
||||
ei_vec2d_swizzle1(b.v, 1, 0))));
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct ei_conj_helper<Packet2d, Packet1cd, false,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet2d& x, const Packet1cd& y, const Packet1cd& c) const
|
||||
{ return ei_padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet2d& x, const Packet1cd& y) const
|
||||
{ return Packet1cd(ei_pmul(x, y.v)); }
|
||||
};
|
||||
|
||||
template<> struct ei_conj_helper<Packet1cd, Packet2d, false,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet2d& y, const Packet1cd& c) const
|
||||
{ return ei_padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& x, const Packet2d& y) const
|
||||
{ return Packet1cd(ei_pmul(x.v, y)); }
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd ei_pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
||||
{
|
||||
// TODO optimize it for SSE3 and 4
|
||||
Packet1cd res = ei_conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
|
||||
__m128d s = _mm_mul_pd(b.v,b.v);
|
||||
return Packet1cd(_mm_div_pd(res.v, _mm_add_pd(s,_mm_shuffle_pd(s, s, 0x1))));
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd ei_pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
|
||||
{
|
||||
return Packet1cd(ei_preverse(x.v));
|
||||
}
|
||||
|
||||
#endif // EIGEN_COMPLEX_SSE_H
|
||||
@@ -373,19 +373,19 @@ Packet4f ei_pcos<Packet4f>(const Packet4f& _x)
|
||||
return _mm_xor_ps(y, sign_bit);
|
||||
}
|
||||
|
||||
// This is Quake3's fast inverse square root.
|
||||
// This is based on Quake3's fast inverse square root.
|
||||
// For detail see here: http://www.beyond3d.com/content/articles/8/
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4f ei_psqrt<Packet4f>(const Packet4f& _x)
|
||||
{
|
||||
Packet4f half = ei_pmul(_x, ei_pset1(.5f));
|
||||
|
||||
/* select only the inverse sqrt of non-zero inputs */
|
||||
Packet4f non_zero_mask = _mm_cmpgt_ps(_x, ei_pset1(std::numeric_limits<float>::epsilon()));
|
||||
Packet4f x = _mm_and_ps(non_zero_mask, _mm_rsqrt_ps(_x));
|
||||
Packet4f half = ei_pmul(_x, ei_pset1<Packet4f>(.5f));
|
||||
|
||||
x = ei_pmul(x, ei_psub(ei_pset1(1.5f), ei_pmul(half, ei_pmul(x,x))));
|
||||
return ei_pmul(_x,x);
|
||||
/* select only the inverse sqrt of non-zero inputs */
|
||||
Packet4f non_zero_mask = _mm_cmpgt_ps(_x, ei_pset1<Packet4f>(std::numeric_limits<float>::epsilon()));
|
||||
Packet4f x = _mm_and_ps(non_zero_mask, _mm_rsqrt_ps(_x));
|
||||
|
||||
x = ei_pmul(x, ei_psub(ei_pset1<Packet4f>(1.5f), ei_pmul(half, ei_pmul(x,x))));
|
||||
return ei_pmul(_x,x);
|
||||
}
|
||||
|
||||
#endif // EIGEN_MATH_FUNCTIONS_SSE_H
|
||||
|
||||
@@ -29,6 +29,10 @@
|
||||
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
|
||||
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
|
||||
#endif
|
||||
|
||||
typedef __m128 Packet4f;
|
||||
typedef __m128i Packet4i;
|
||||
typedef __m128d Packet2d;
|
||||
@@ -43,6 +47,9 @@ template<> struct ei_is_arithmetic<__m128d> { enum { ret = true }; };
|
||||
#define ei_vec4i_swizzle1(v,p,q,r,s) \
|
||||
(_mm_shuffle_epi32( v, ((s)<<6|(r)<<4|(q)<<2|(p))))
|
||||
|
||||
#define ei_vec2d_swizzle1(v,p,q) \
|
||||
(_mm_castsi128_pd(_mm_shuffle_epi32( _mm_castpd_si128(v), ((q*2+1)<<6|(q*2)<<4|(p*2+1)<<2|(p*2)))))
|
||||
|
||||
#define ei_vec4f_swizzle2(a,b,p,q,r,s) \
|
||||
(_mm_shuffle_ps( (a), (b), ((s)<<6|(r)<<4|(q)<<2|(p))))
|
||||
|
||||
@@ -50,18 +57,24 @@ template<> struct ei_is_arithmetic<__m128d> { enum { ret = true }; };
|
||||
(_mm_castps_si128( (_mm_shuffle_ps( _mm_castsi128_ps(a), _mm_castsi128_ps(b), ((s)<<6|(r)<<4|(q)<<2|(p))))))
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
|
||||
const Packet4f ei_p4f_##NAME = ei_pset1<float>(X)
|
||||
const Packet4f ei_p4f_##NAME = ei_pset1<Packet4f>(X)
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
|
||||
const Packet4f ei_p4f_##NAME = _mm_castsi128_ps(ei_pset1<int>(X))
|
||||
const Packet4f ei_p4f_##NAME = _mm_castsi128_ps(ei_pset1<Packet4i>(X))
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
|
||||
const Packet4i ei_p4i_##NAME = ei_pset1<int>(X)
|
||||
const Packet4i ei_p4i_##NAME = ei_pset1<Packet4i>(X)
|
||||
|
||||
|
||||
template<> struct ei_packet_traits<float> : ei_default_packet_traits
|
||||
{
|
||||
typedef Packet4f type; enum {size=4};
|
||||
typedef Packet4f type;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size=4,
|
||||
|
||||
HasDiv = 1,
|
||||
HasSin = EIGEN_FAST_MATH,
|
||||
HasCos = EIGEN_FAST_MATH,
|
||||
HasLog = 1,
|
||||
@@ -70,9 +83,26 @@ template<> struct ei_packet_traits<float> : ei_default_packet_traits
|
||||
};
|
||||
};
|
||||
template<> struct ei_packet_traits<double> : ei_default_packet_traits
|
||||
{ typedef Packet2d type; enum {size=2}; };
|
||||
{
|
||||
typedef Packet2d type;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size=2,
|
||||
|
||||
HasDiv = 1
|
||||
};
|
||||
};
|
||||
template<> struct ei_packet_traits<int> : ei_default_packet_traits
|
||||
{ typedef Packet4i type; enum {size=4}; };
|
||||
{
|
||||
typedef Packet4i type;
|
||||
enum {
|
||||
// FIXME check the Has*
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size=4
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct ei_unpacket_traits<Packet4f> { typedef float type; enum {size=4}; };
|
||||
template<> struct ei_unpacket_traits<Packet2d> { typedef double type; enum {size=2}; };
|
||||
@@ -81,23 +111,24 @@ template<> struct ei_unpacket_traits<Packet4i> { typedef int type; enum {size
|
||||
#ifdef __GNUC__
|
||||
// Sometimes GCC implements _mm_set1_p* using multiple moves,
|
||||
// that is inefficient :( (e.g., see ei_gemm_pack_rhs)
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<float>(const float& from) {
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<Packet4f>(const float& from) {
|
||||
Packet4f res = _mm_set_ss(from);
|
||||
return _mm_shuffle_ps(res,res,0);
|
||||
return ei_vec4f_swizzle1(res,0,0,0,0);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ei_pset1<double>(const double& from) {
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ei_pset1<Packet2d>(const double& from) {
|
||||
// NOTE the SSE3 intrinsic _mm_loaddup_pd is never faster but sometimes much slower
|
||||
Packet2d res = _mm_set_sd(from);
|
||||
return _mm_unpacklo_pd(res,res);
|
||||
return ei_vec2d_swizzle1(res, 0, 0);
|
||||
}
|
||||
#else
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<float>(const float& from) { return _mm_set1_ps(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ei_pset1<double>(const double& from) { return _mm_set1_pd(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<Packet4f>(const float& from) { return _mm_set1_ps(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ei_pset1<Packet2d>(const double& from) { return _mm_set1_pd(from); }
|
||||
#endif
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_pset1<int>(const int& from) { return _mm_set1_epi32(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_pset1<Packet4i>(const int& from) { return _mm_set1_epi32(from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_plset<float>(const float& a) { return _mm_add_ps(ei_pset1(a), _mm_set_ps(3,2,1,0)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ei_plset<double>(const double& a) { return _mm_add_pd(ei_pset1(a),_mm_set_pd(1,0)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_plset<int>(const int& a) { return _mm_add_epi32(ei_pset1(a),_mm_set_epi32(3,2,1,0)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_plset<float>(const float& a) { return _mm_add_ps(ei_pset1<Packet4f>(a), _mm_set_ps(3,2,1,0)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ei_plset<double>(const double& a) { return _mm_add_pd(ei_pset1<Packet2d>(a),_mm_set_pd(1,0)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_plset<int>(const int& a) { return _mm_add_epi32(ei_pset1<Packet4i>(a),_mm_set_epi32(3,2,1,0)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_add_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ei_padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_add_pd(a,b); }
|
||||
@@ -144,7 +175,7 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_pdiv<Packet4f>(const Packet4f& a, con
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ei_pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_div_pd(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_pdiv<Packet4i>(const Packet4i& /*a*/, const Packet4i& /*b*/)
|
||||
{ ei_assert(false && "packet integer division are not supported by SSE");
|
||||
return ei_pset1<int>(0);
|
||||
return ei_pset1<Packet4i>(0);
|
||||
}
|
||||
|
||||
// for some weird raisons, it has to be overloaded for packet of integers
|
||||
@@ -184,14 +215,14 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_pandnot<Packet4f>(const Packet4f& a,
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ei_pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_andnot_pd(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_andnot_si128(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_pload<float>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_ps(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ei_pload<double>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_pd(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_pload<int>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(reinterpret_cast<const Packet4i*>(from)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_ps(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ei_pload<Packet2d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_pd(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(reinterpret_cast<const Packet4i*>(from)); }
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_ps(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ei_ploadu<double>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_pd(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu<int>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_si128(reinterpret_cast<const Packet4i*>(from)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu<Packet4f>(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_ps(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ei_ploadu<Packet2d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_pd(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu<Packet4i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_si128(reinterpret_cast<const Packet4i*>(from)); }
|
||||
#else
|
||||
// Fast unaligned loads. Note that here we cannot directly use intrinsics: this would
|
||||
// require pointer casting to incompatible pointer types and leads to invalid code
|
||||
@@ -199,7 +230,7 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_pload<int>(const int* from) { EIGEN_D
|
||||
// a correct instruction dependency.
|
||||
// TODO: do the same for MSVC (ICC is compatible)
|
||||
// NOTE: with the code below, MSVC's compiler crashes!
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu(const float* from)
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu<Packet4f>(const float* from)
|
||||
{
|
||||
EIGEN_DEBUG_UNALIGNED_LOAD
|
||||
__m128d res;
|
||||
@@ -207,7 +238,7 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu(const float* from)
|
||||
res = _mm_loadh_pd(res, (const double*)(from+2)) ;
|
||||
return _mm_castpd_ps(res);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ei_ploadu(const double* from)
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ei_ploadu<Packet2d>(const double* from)
|
||||
{
|
||||
EIGEN_DEBUG_UNALIGNED_LOAD
|
||||
__m128d res;
|
||||
@@ -215,7 +246,7 @@ template<> EIGEN_STRONG_INLINE Packet2d ei_ploadu(const double* from)
|
||||
res = _mm_loadh_pd(res,from+1);
|
||||
return res;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu(const int* from)
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu<Packet4i>(const int* from)
|
||||
{
|
||||
EIGEN_DEBUG_UNALIGNED_LOAD
|
||||
__m128d res;
|
||||
@@ -225,6 +256,19 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu(const int* from)
|
||||
}
|
||||
#endif
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_ploaddup<Packet4f>(const float* from)
|
||||
{
|
||||
return ei_vec4f_swizzle1(_mm_castpd_ps(_mm_load_sd((const double*)from)), 0, 0, 1, 1);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ei_ploaddup<Packet2d>(const double* from)
|
||||
{ return ei_pset1<Packet2d>(from[0]); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_ploaddup<Packet4i>(const int* from)
|
||||
{
|
||||
Packet4i tmp;
|
||||
tmp = _mm_loadl_epi64(reinterpret_cast<const Packet4i*>(from));
|
||||
return ei_vec4i_swizzle1(tmp, 0, 0, 1, 1);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void ei_pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_ps(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void ei_pstore<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_pd(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void ei_pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<Packet4i*>(to), from); }
|
||||
@@ -241,13 +285,13 @@ template<> EIGEN_STRONG_INLINE void ei_prefetch<float>(const float* addr) { _m
|
||||
template<> EIGEN_STRONG_INLINE void ei_prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void ei_prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
|
||||
#if defined(_MSC_VER) && (_MSC_VER <= 1500) && defined(_WIN64)
|
||||
#if defined(_MSC_VER) && (_MSC_VER <= 1500) && defined(_WIN64) && !defined(__INTEL_COMPILER)
|
||||
// The temporary variable fixes an internal compilation error.
|
||||
// Direct of the struct members fixed bug #62.
|
||||
template<> EIGEN_STRONG_INLINE float ei_pfirst<Packet4f>(const Packet4f& a) { return a.m128_f32[0]; }
|
||||
template<> EIGEN_STRONG_INLINE double ei_pfirst<Packet2d>(const Packet2d& a) { return a.m128d_f64[0]; }
|
||||
template<> EIGEN_STRONG_INLINE int ei_pfirst<Packet4i>(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; }
|
||||
#elif defined(_MSC_VER) && (_MSC_VER <= 1500)
|
||||
#elif defined(_MSC_VER) && (_MSC_VER <= 1500) && !defined(__INTEL_COMPILER)
|
||||
// The temporary variable fixes an internal compilation error.
|
||||
template<> EIGEN_STRONG_INLINE float ei_pfirst<Packet4f>(const Packet4f& a) { float x = _mm_cvtss_f32(a); return x; }
|
||||
template<> EIGEN_STRONG_INLINE double ei_pfirst<Packet2d>(const Packet2d& a) { double x = _mm_cvtsd_f64(a); return x; }
|
||||
|
||||
@@ -42,7 +42,7 @@
|
||||
template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
|
||||
struct ei_product_coeff_impl;
|
||||
|
||||
template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename PacketScalar, int LoadMode>
|
||||
template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||
struct ei_product_packet_impl;
|
||||
|
||||
template<typename LhsNested, typename RhsNested, int NestingFlags>
|
||||
@@ -73,6 +73,8 @@ struct ei_traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
|
||||
LhsRowMajor = LhsFlags & RowMajorBit,
|
||||
RhsRowMajor = RhsFlags & RowMajorBit,
|
||||
|
||||
SameType = ei_is_same_type<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::ret,
|
||||
|
||||
CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit)
|
||||
&& (ColsAtCompileTime == Dynamic
|
||||
|| ( (ColsAtCompileTime % ei_packet_traits<Scalar>::size) == 0
|
||||
@@ -94,7 +96,8 @@ struct ei_traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
|
||||
Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
|
||||
| (EvalToRowMajor ? RowMajorBit : 0)
|
||||
| NestingFlags
|
||||
| (CanVectorizeLhs || CanVectorizeRhs ? PacketAccessBit : 0),
|
||||
// TODO enable vectorization for mixed types
|
||||
| (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0),
|
||||
|
||||
CoeffReadCost = InnerSize == Dynamic ? Dynamic
|
||||
: InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
|
||||
@@ -105,7 +108,8 @@ struct ei_traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
|
||||
* loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
|
||||
* the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI.
|
||||
*/
|
||||
CanVectorizeInner = LhsRowMajor
|
||||
CanVectorizeInner = SameType
|
||||
&& LhsRowMajor
|
||||
&& (!RhsRowMajor)
|
||||
&& (LhsFlags & RhsFlags & ActualPacketAccessBit)
|
||||
&& (LhsFlags & RhsFlags & AlignedBit)
|
||||
@@ -195,7 +199,7 @@ class CoeffBasedProduct
|
||||
}
|
||||
|
||||
// Implicit conversion to the nested type (trigger the evaluation of the product)
|
||||
operator const PlainObject& () const
|
||||
EIGEN_STRONG_INLINE operator const PlainObject& () const
|
||||
{
|
||||
m_result.lazyAssign(*this);
|
||||
return m_result;
|
||||
@@ -275,20 +279,20 @@ struct ei_product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
|
||||
*** Scalar path with inner vectorization ***
|
||||
*******************************************/
|
||||
|
||||
template<int UnrollingIndex, typename Lhs, typename Rhs, typename PacketScalar>
|
||||
template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet>
|
||||
struct ei_product_coeff_vectorized_unroller
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
enum { PacketSize = ei_packet_traits<typename Lhs::Scalar>::size };
|
||||
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
|
||||
{
|
||||
ei_product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, PacketScalar>::run(row, col, lhs, rhs, pres);
|
||||
ei_product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
|
||||
pres = ei_padd(pres, ei_pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) ));
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename PacketScalar>
|
||||
struct ei_product_coeff_vectorized_unroller<0, Lhs, Rhs, PacketScalar>
|
||||
template<typename Lhs, typename Rhs, typename Packet>
|
||||
struct ei_product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
|
||||
@@ -300,13 +304,13 @@ struct ei_product_coeff_vectorized_unroller<0, Lhs, Rhs, PacketScalar>
|
||||
template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
|
||||
struct ei_product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
|
||||
{
|
||||
typedef typename Lhs::PacketScalar PacketScalar;
|
||||
typedef typename Lhs::PacketScalar Packet;
|
||||
typedef typename Lhs::Index Index;
|
||||
enum { PacketSize = ei_packet_traits<typename Lhs::Scalar>::size };
|
||||
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
|
||||
{
|
||||
PacketScalar pres;
|
||||
ei_product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, PacketScalar>::run(row, col, lhs, rhs, pres);
|
||||
Packet pres;
|
||||
ei_product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
|
||||
ei_product_coeff_impl<DefaultTraversal,UnrollingIndex,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, res);
|
||||
res = ei_predux(pres);
|
||||
}
|
||||
@@ -318,7 +322,7 @@ struct ei_product_coeff_vectorized_dyn_selector
|
||||
typedef typename Lhs::Index Index;
|
||||
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
|
||||
{
|
||||
res = lhs.row(row).cwiseProduct(rhs.col(col)).sum();
|
||||
res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum();
|
||||
}
|
||||
};
|
||||
|
||||
@@ -330,7 +334,7 @@ struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
|
||||
typedef typename Lhs::Index Index;
|
||||
EIGEN_STRONG_INLINE static void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
|
||||
{
|
||||
res = lhs.cwiseProduct(rhs.col(col)).sum();
|
||||
res = lhs.transpose().cwiseProduct(rhs.col(col)).sum();
|
||||
}
|
||||
};
|
||||
|
||||
@@ -340,7 +344,7 @@ struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
|
||||
typedef typename Lhs::Index Index;
|
||||
EIGEN_STRONG_INLINE static void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
|
||||
{
|
||||
res = lhs.row(row).cwiseProduct(rhs).sum();
|
||||
res = lhs.row(row).transpose().cwiseProduct(rhs).sum();
|
||||
}
|
||||
};
|
||||
|
||||
@@ -350,7 +354,7 @@ struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
|
||||
typedef typename Lhs::Index Index;
|
||||
EIGEN_STRONG_INLINE static void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
|
||||
{
|
||||
res = lhs.cwiseProduct(rhs).sum();
|
||||
res = lhs.transpose().cwiseProduct(rhs).sum();
|
||||
}
|
||||
};
|
||||
|
||||
@@ -368,71 +372,71 @@ struct ei_product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetSca
|
||||
*** Packet path ***
|
||||
*******************/
|
||||
|
||||
template<int UnrollingIndex, typename Lhs, typename Rhs, typename PacketScalar, int LoadMode>
|
||||
struct ei_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, PacketScalar, LoadMode>
|
||||
template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||
struct ei_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
|
||||
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
|
||||
{
|
||||
ei_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, PacketScalar, LoadMode>::run(row, col, lhs, rhs, res);
|
||||
res = ei_pmadd(ei_pset1(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
|
||||
ei_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
|
||||
res = ei_pmadd(ei_pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
|
||||
}
|
||||
};
|
||||
|
||||
template<int UnrollingIndex, typename Lhs, typename Rhs, typename PacketScalar, int LoadMode>
|
||||
struct ei_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, PacketScalar, LoadMode>
|
||||
template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||
struct ei_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
|
||||
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
|
||||
{
|
||||
ei_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, PacketScalar, LoadMode>::run(row, col, lhs, rhs, res);
|
||||
res = ei_pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), ei_pset1(rhs.coeff(UnrollingIndex, col)), res);
|
||||
ei_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
|
||||
res = ei_pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), ei_pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename PacketScalar, int LoadMode>
|
||||
struct ei_product_packet_impl<RowMajor, 0, Lhs, Rhs, PacketScalar, LoadMode>
|
||||
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||
struct ei_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
|
||||
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
|
||||
{
|
||||
res = ei_pmul(ei_pset1(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
|
||||
res = ei_pmul(ei_pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename PacketScalar, int LoadMode>
|
||||
struct ei_product_packet_impl<ColMajor, 0, Lhs, Rhs, PacketScalar, LoadMode>
|
||||
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||
struct ei_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
|
||||
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
|
||||
{
|
||||
res = ei_pmul(lhs.template packet<LoadMode>(row, 0), ei_pset1(rhs.coeff(0, col)));
|
||||
res = ei_pmul(lhs.template packet<LoadMode>(row, 0), ei_pset1<Packet>(rhs.coeff(0, col)));
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename PacketScalar, int LoadMode>
|
||||
struct ei_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, PacketScalar, LoadMode>
|
||||
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||
struct ei_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, PacketScalar& res)
|
||||
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
|
||||
{
|
||||
ei_assert(lhs.cols()>0 && "you are using a non initialized matrix");
|
||||
res = ei_pmul(ei_pset1(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
|
||||
res = ei_pmul(ei_pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
|
||||
for(Index i = 1; i < lhs.cols(); ++i)
|
||||
res = ei_pmadd(ei_pset1(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
|
||||
res = ei_pmadd(ei_pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename PacketScalar, int LoadMode>
|
||||
struct ei_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, PacketScalar, LoadMode>
|
||||
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||
struct ei_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
typedef typename Lhs::Index Index;
|
||||
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, PacketScalar& res)
|
||||
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
|
||||
{
|
||||
ei_assert(lhs.cols()>0 && "you are using a non initialized matrix");
|
||||
res = ei_pmul(lhs.template packet<LoadMode>(row, 0), ei_pset1(rhs.coeff(0, col)));
|
||||
res = ei_pmul(lhs.template packet<LoadMode>(row, 0), ei_pset1<Packet>(rhs.coeff(0, col)));
|
||||
for(Index i = 1; i < lhs.cols(); ++i)
|
||||
res = ei_pmadd(lhs.template packet<LoadMode>(row, i), ei_pset1(rhs.coeff(i, col)), res);
|
||||
res = ei_pmadd(lhs.template packet<LoadMode>(row, i), ei_pset1<Packet>(rhs.coeff(i, col)), res);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -29,26 +29,25 @@ template<typename _LhsScalar, typename _RhsScalar> class ei_level3_blocking;
|
||||
|
||||
/* Specialization for a row-major destination matrix => simple transposition of the product */
|
||||
template<
|
||||
typename Scalar, typename Index,
|
||||
int LhsStorageOrder, bool ConjugateLhs,
|
||||
int RhsStorageOrder, bool ConjugateRhs>
|
||||
struct ei_general_matrix_matrix_product<Scalar,Index,LhsStorageOrder,ConjugateLhs,RhsStorageOrder,ConjugateRhs,RowMajor>
|
||||
typename Index,
|
||||
typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
|
||||
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs>
|
||||
struct ei_general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor>
|
||||
{
|
||||
typedef typename ei_scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
|
||||
static EIGEN_STRONG_INLINE void run(
|
||||
Index rows, Index cols, Index depth,
|
||||
const Scalar* lhs, Index lhsStride,
|
||||
const Scalar* rhs, Index rhsStride,
|
||||
Scalar* res, Index resStride,
|
||||
Scalar alpha,
|
||||
ei_level3_blocking<Scalar,Scalar>& blocking,
|
||||
const LhsScalar* lhs, Index lhsStride,
|
||||
const RhsScalar* rhs, Index rhsStride,
|
||||
ResScalar* res, Index resStride,
|
||||
ResScalar alpha,
|
||||
ei_level3_blocking<RhsScalar,LhsScalar>& blocking,
|
||||
GemmParallelInfo<Index>* info = 0)
|
||||
{
|
||||
// transpose the product such that the result is column major
|
||||
ei_general_matrix_matrix_product<Scalar, Index,
|
||||
RhsStorageOrder==RowMajor ? ColMajor : RowMajor,
|
||||
ConjugateRhs,
|
||||
LhsStorageOrder==RowMajor ? ColMajor : RowMajor,
|
||||
ConjugateLhs,
|
||||
ei_general_matrix_matrix_product<Index,
|
||||
RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs,
|
||||
LhsScalar, LhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateLhs,
|
||||
ColMajor>
|
||||
::run(cols,rows,depth,rhs,rhsStride,lhs,lhsStride,res,resStride,alpha,blocking,info);
|
||||
}
|
||||
@@ -57,35 +56,32 @@ struct ei_general_matrix_matrix_product<Scalar,Index,LhsStorageOrder,ConjugateLh
|
||||
/* Specialization for a col-major destination matrix
|
||||
* => Blocking algorithm following Goto's paper */
|
||||
template<
|
||||
typename Scalar, typename Index,
|
||||
int LhsStorageOrder, bool ConjugateLhs,
|
||||
int RhsStorageOrder, bool ConjugateRhs>
|
||||
struct ei_general_matrix_matrix_product<Scalar,Index,LhsStorageOrder,ConjugateLhs,RhsStorageOrder,ConjugateRhs,ColMajor>
|
||||
typename Index,
|
||||
typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
|
||||
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs>
|
||||
struct ei_general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor>
|
||||
{
|
||||
typedef typename ei_scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
|
||||
static void run(Index rows, Index cols, Index depth,
|
||||
const Scalar* _lhs, Index lhsStride,
|
||||
const Scalar* _rhs, Index rhsStride,
|
||||
Scalar* res, Index resStride,
|
||||
Scalar alpha,
|
||||
ei_level3_blocking<Scalar,Scalar>& blocking,
|
||||
const LhsScalar* _lhs, Index lhsStride,
|
||||
const RhsScalar* _rhs, Index rhsStride,
|
||||
ResScalar* res, Index resStride,
|
||||
ResScalar alpha,
|
||||
ei_level3_blocking<LhsScalar,RhsScalar>& blocking,
|
||||
GemmParallelInfo<Index>* info = 0)
|
||||
{
|
||||
ei_const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
|
||||
ei_const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
|
||||
ei_const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
|
||||
ei_const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
|
||||
|
||||
if (ConjugateRhs)
|
||||
alpha = ei_conj(alpha);
|
||||
|
||||
typedef typename ei_packet_traits<Scalar>::type PacketType;
|
||||
typedef ei_product_blocking_traits<Scalar> Blocking;
|
||||
typedef ei_gebp_traits<LhsScalar,RhsScalar> Traits;
|
||||
|
||||
Index kc = blocking.kc(); // cache block size along the K direction
|
||||
Index mc = std::min(rows,blocking.mc()); // cache block size along the M direction
|
||||
//Index nc = blocking.nc(); // cache block size along the N direction
|
||||
|
||||
ei_gemm_pack_rhs<Scalar, Index, Blocking::nr, RhsStorageOrder> pack_rhs;
|
||||
ei_gemm_pack_lhs<Scalar, Index, Blocking::mr, LhsStorageOrder> pack_lhs;
|
||||
ei_gebp_kernel<Scalar, Index, Blocking::mr, Blocking::nr, ei_conj_helper<ConjugateLhs,ConjugateRhs> > gebp;
|
||||
ei_gemm_pack_lhs<LhsScalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
|
||||
ei_gemm_pack_rhs<RhsScalar, Index, Traits::nr, RhsStorageOrder> pack_rhs;
|
||||
ei_gebp_kernel<LhsScalar, RhsScalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp;
|
||||
|
||||
#ifdef EIGEN_HAS_OPENMP
|
||||
if(info)
|
||||
@@ -94,10 +90,10 @@ static void run(Index rows, Index cols, Index depth,
|
||||
Index tid = omp_get_thread_num();
|
||||
Index threads = omp_get_num_threads();
|
||||
|
||||
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
|
||||
std::size_t sizeW = kc*Blocking::PacketSize*Blocking::nr*8;
|
||||
Scalar* w = ei_aligned_stack_new(Scalar, sizeW);
|
||||
Scalar* blockB = blocking.blockB();
|
||||
LhsScalar* blockA = ei_aligned_stack_new(LhsScalar, kc*mc);
|
||||
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
|
||||
RhsScalar* w = ei_aligned_stack_new(RhsScalar, sizeW);
|
||||
RhsScalar* blockB = blocking.blockB();
|
||||
ei_internal_assert(blockB!=0);
|
||||
|
||||
// For each horizontal panel of the rhs, and corresponding vertical panel of the lhs...
|
||||
@@ -118,7 +114,7 @@ static void run(Index rows, Index cols, Index depth,
|
||||
while(info[tid].users!=0) {}
|
||||
info[tid].users += threads;
|
||||
|
||||
pack_rhs(blockB+info[tid].rhs_start*kc, &rhs(k,info[tid].rhs_start), rhsStride, alpha, actual_kc, info[tid].rhs_length);
|
||||
pack_rhs(blockB+info[tid].rhs_start*kc, &rhs(k,info[tid].rhs_start), rhsStride, actual_kc, info[tid].rhs_length);
|
||||
|
||||
// Notify the other threads that the part B'_j is ready to go.
|
||||
info[tid].sync = k;
|
||||
@@ -134,7 +130,7 @@ static void run(Index rows, Index cols, Index depth,
|
||||
if(shift>0)
|
||||
while(info[j].sync!=k) {}
|
||||
|
||||
gebp(res+info[j].rhs_start*resStride, resStride, blockA, blockB+info[j].rhs_start*kc, mc, actual_kc, info[j].rhs_length, -1,-1,0,0, w);
|
||||
gebp(res+info[j].rhs_start*resStride, resStride, blockA, blockB+info[j].rhs_start*kc, mc, actual_kc, info[j].rhs_length, alpha, -1,-1,0,0, w);
|
||||
}
|
||||
|
||||
// Then keep going as usual with the remaining A'
|
||||
@@ -146,7 +142,7 @@ static void run(Index rows, Index cols, Index depth,
|
||||
pack_lhs(blockA, &lhs(i,k), lhsStride, actual_kc, actual_mc);
|
||||
|
||||
// C_i += A' * B'
|
||||
gebp(res+i, resStride, blockA, blockB, actual_mc, actual_kc, cols, -1,-1,0,0, w);
|
||||
gebp(res+i, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha, -1,-1,0,0, w);
|
||||
}
|
||||
|
||||
// Release all the sub blocks B'_j of B' for the current thread,
|
||||
@@ -156,8 +152,8 @@ static void run(Index rows, Index cols, Index depth,
|
||||
--(info[j].users);
|
||||
}
|
||||
|
||||
ei_aligned_stack_delete(Scalar, blockA, kc*mc);
|
||||
ei_aligned_stack_delete(Scalar, w, sizeW);
|
||||
ei_aligned_stack_delete(LhsScalar, blockA, kc*mc);
|
||||
ei_aligned_stack_delete(RhsScalar, w, sizeW);
|
||||
}
|
||||
else
|
||||
#endif // EIGEN_HAS_OPENMP
|
||||
@@ -167,10 +163,10 @@ static void run(Index rows, Index cols, Index depth,
|
||||
// this is the sequential version!
|
||||
std::size_t sizeA = kc*mc;
|
||||
std::size_t sizeB = kc*cols;
|
||||
std::size_t sizeW = kc*Blocking::PacketSize*Blocking::nr;
|
||||
Scalar *blockA = blocking.blockA()==0 ? ei_aligned_stack_new(Scalar, sizeA) : blocking.blockA();
|
||||
Scalar *blockB = blocking.blockB()==0 ? ei_aligned_stack_new(Scalar, sizeB) : blocking.blockB();
|
||||
Scalar *blockW = blocking.blockW()==0 ? ei_aligned_stack_new(Scalar, sizeW) : blocking.blockW();
|
||||
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
|
||||
LhsScalar *blockA = blocking.blockA()==0 ? ei_aligned_stack_new(LhsScalar, sizeA) : blocking.blockA();
|
||||
RhsScalar *blockB = blocking.blockB()==0 ? ei_aligned_stack_new(RhsScalar, sizeB) : blocking.blockB();
|
||||
RhsScalar *blockW = blocking.blockW()==0 ? ei_aligned_stack_new(RhsScalar, sizeW) : blocking.blockW();
|
||||
|
||||
// For each horizontal panel of the rhs, and corresponding panel of the lhs...
|
||||
// (==GEMM_VAR1)
|
||||
@@ -182,7 +178,7 @@ static void run(Index rows, Index cols, Index depth,
|
||||
// => Pack rhs's panel into a sequential chunk of memory (L2 caching)
|
||||
// Note that this panel will be read as many times as the number of blocks in the lhs's
|
||||
// vertical panel which is, in practice, a very low number.
|
||||
pack_rhs(blockB, &rhs(k2,0), rhsStride, alpha, actual_kc, cols);
|
||||
pack_rhs(blockB, &rhs(k2,0), rhsStride, actual_kc, cols);
|
||||
|
||||
|
||||
// For each mc x kc block of the lhs's vertical panel...
|
||||
@@ -197,14 +193,14 @@ static void run(Index rows, Index cols, Index depth,
|
||||
pack_lhs(blockA, &lhs(i2,k2), lhsStride, actual_kc, actual_mc);
|
||||
|
||||
// Everything is packed, we can now call the block * panel kernel:
|
||||
gebp(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, -1, -1, 0, 0, blockW);
|
||||
gebp(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha, -1, -1, 0, 0, blockW);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
if(blocking.blockA()==0) ei_aligned_stack_delete(Scalar, blockA, kc*mc);
|
||||
if(blocking.blockB()==0) ei_aligned_stack_delete(Scalar, blockB, sizeB);
|
||||
if(blocking.blockW()==0) ei_aligned_stack_delete(Scalar, blockW, sizeW);
|
||||
if(blocking.blockA()==0) ei_aligned_stack_delete(LhsScalar, blockA, kc*mc);
|
||||
if(blocking.blockB()==0) ei_aligned_stack_delete(RhsScalar, blockB, sizeB);
|
||||
if(blocking.blockW()==0) ei_aligned_stack_delete(RhsScalar, blockW, sizeW);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -237,10 +233,10 @@ struct ei_gemm_functor
|
||||
{
|
||||
if(cols==-1)
|
||||
cols = m_rhs.cols();
|
||||
|
||||
|
||||
Gemm::run(rows, cols, m_lhs.cols(),
|
||||
(const Scalar*)&(m_lhs.const_cast_derived().coeffRef(row,0)), m_lhs.outerStride(),
|
||||
(const Scalar*)&(m_rhs.const_cast_derived().coeffRef(0,col)), m_rhs.outerStride(),
|
||||
/*(const Scalar*)*/&(m_lhs.const_cast_derived().coeffRef(row,0)), m_lhs.outerStride(),
|
||||
/*(const Scalar*)*/&(m_rhs.const_cast_derived().coeffRef(0,col)), m_rhs.outerStride(),
|
||||
(Scalar*)&(m_dest.coeffRef(row,col)), m_dest.outerStride(),
|
||||
m_actualAlpha, m_blocking, info);
|
||||
}
|
||||
@@ -299,11 +295,11 @@ class ei_gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols
|
||||
};
|
||||
typedef typename ei_meta_if<Transpose,_RhsScalar,_LhsScalar>::ret LhsScalar;
|
||||
typedef typename ei_meta_if<Transpose,_LhsScalar,_RhsScalar>::ret RhsScalar;
|
||||
typedef ei_product_blocking_traits<RhsScalar> Blocking;
|
||||
typedef ei_gebp_traits<LhsScalar,RhsScalar> Traits;
|
||||
enum {
|
||||
SizeA = ActualRows * MaxDepth,
|
||||
SizeB = ActualCols * MaxDepth,
|
||||
SizeW = MaxDepth * Blocking::nr * ei_packet_traits<RhsScalar>::size
|
||||
SizeW = MaxDepth * Traits::WorkSpaceFactor
|
||||
};
|
||||
|
||||
EIGEN_ALIGN16 LhsScalar m_staticA[SizeA];
|
||||
@@ -339,7 +335,7 @@ class ei_gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols
|
||||
};
|
||||
typedef typename ei_meta_if<Transpose,_RhsScalar,_LhsScalar>::ret LhsScalar;
|
||||
typedef typename ei_meta_if<Transpose,_LhsScalar,_RhsScalar>::ret RhsScalar;
|
||||
typedef ei_product_blocking_traits<RhsScalar> Blocking;
|
||||
typedef ei_gebp_traits<LhsScalar,RhsScalar> Traits;
|
||||
|
||||
DenseIndex m_sizeA;
|
||||
DenseIndex m_sizeB;
|
||||
@@ -356,7 +352,7 @@ class ei_gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols
|
||||
computeProductBlockingSizes<LhsScalar,RhsScalar>(this->m_kc, this->m_mc, this->m_nc);
|
||||
m_sizeA = this->m_mc * this->m_kc;
|
||||
m_sizeB = this->m_kc * this->m_nc;
|
||||
m_sizeW = this->m_kc*ei_packet_traits<RhsScalar>::size*Blocking::nr;
|
||||
m_sizeW = this->m_kc*Traits::WorkSpaceFactor;
|
||||
}
|
||||
|
||||
void allocateA()
|
||||
@@ -401,11 +397,15 @@ class GeneralProduct<Lhs, Rhs, GemmProduct>
|
||||
};
|
||||
public:
|
||||
EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
|
||||
|
||||
typedef typename Lhs::Scalar LhsScalar;
|
||||
typedef typename Rhs::Scalar RhsScalar;
|
||||
typedef Scalar ResScalar;
|
||||
|
||||
GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((ei_is_same_type<typename Lhs::Scalar, typename Rhs::Scalar>::ret),
|
||||
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
|
||||
typedef ei_scalar_product_op<LhsScalar,RhsScalar> BinOp;
|
||||
EIGEN_CHECK_BINARY_COMPATIBILIY(BinOp,LhsScalar,RhsScalar);
|
||||
}
|
||||
|
||||
template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
|
||||
@@ -418,15 +418,15 @@ class GeneralProduct<Lhs, Rhs, GemmProduct>
|
||||
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
|
||||
* RhsBlasTraits::extractScalarFactor(m_rhs);
|
||||
|
||||
typedef ei_gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
|
||||
typedef ei_gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,LhsScalar,RhsScalar,
|
||||
Dest::MaxRowsAtCompileTime,Dest::MaxColsAtCompileTime,MaxDepthAtCompileTime> BlockingType;
|
||||
|
||||
typedef ei_gemm_functor<
|
||||
Scalar, Index,
|
||||
ei_general_matrix_matrix_product<
|
||||
Scalar, Index,
|
||||
(_ActualLhsType::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(LhsBlasTraits::NeedToConjugate),
|
||||
(_ActualRhsType::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(RhsBlasTraits::NeedToConjugate),
|
||||
Index,
|
||||
LhsScalar, (_ActualLhsType::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(LhsBlasTraits::NeedToConjugate),
|
||||
RhsScalar, (_ActualRhsType::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(RhsBlasTraits::NeedToConjugate),
|
||||
(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor>,
|
||||
_ActualLhsType, _ActualRhsType, Dest, BlockingType> GemmFunctor;
|
||||
|
||||
|
||||
@@ -30,52 +30,80 @@
|
||||
* the number of load/stores of the result by a factor 4 and to reduce
|
||||
* the instruction dependency. Moreover, we know that all bands have the
|
||||
* same alignment pattern.
|
||||
* TODO: since rhs gets evaluated only once, no need to evaluate it
|
||||
*
|
||||
* Mixing type logic: C += alpha * A * B
|
||||
* | A | B |alpha| comments
|
||||
* |real |cplx |cplx | no vectorization
|
||||
* |real |cplx |real | alpha is converted to a cplx when calling the run function, no vectorization
|
||||
* |cplx |real |cplx | invalid, the caller has to do tmp: = A * B; C += alpha*tmp
|
||||
* |cplx |real |real | optimal case, vectorization possible via real-cplx mul
|
||||
*/
|
||||
template<bool ConjugateLhs, bool ConjugateRhs, typename Scalar, typename Index, typename RhsType>
|
||||
static EIGEN_DONT_INLINE
|
||||
void ei_cache_friendly_product_colmajor_times_vector(
|
||||
Index size,
|
||||
const Scalar* lhs, Index lhsStride,
|
||||
const RhsType& rhs,
|
||||
Scalar* res,
|
||||
Scalar alpha)
|
||||
template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
|
||||
struct ei_general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjugateLhs,RhsScalar,ConjugateRhs>
|
||||
{
|
||||
typedef typename ei_scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
|
||||
|
||||
enum {
|
||||
Vectorizable = ei_packet_traits<LhsScalar>::Vectorizable && ei_packet_traits<RhsScalar>::Vectorizable
|
||||
&& int(ei_packet_traits<LhsScalar>::size)==int(ei_packet_traits<RhsScalar>::size),
|
||||
LhsPacketSize = Vectorizable ? ei_packet_traits<LhsScalar>::size : 1,
|
||||
RhsPacketSize = Vectorizable ? ei_packet_traits<RhsScalar>::size : 1,
|
||||
ResPacketSize = Vectorizable ? ei_packet_traits<ResScalar>::size : 1
|
||||
};
|
||||
|
||||
typedef typename ei_packet_traits<LhsScalar>::type _LhsPacket;
|
||||
typedef typename ei_packet_traits<RhsScalar>::type _RhsPacket;
|
||||
typedef typename ei_packet_traits<ResScalar>::type _ResPacket;
|
||||
|
||||
typedef typename ei_meta_if<Vectorizable,_LhsPacket,LhsScalar>::ret LhsPacket;
|
||||
typedef typename ei_meta_if<Vectorizable,_RhsPacket,RhsScalar>::ret RhsPacket;
|
||||
typedef typename ei_meta_if<Vectorizable,_ResPacket,ResScalar>::ret ResPacket;
|
||||
|
||||
EIGEN_DONT_INLINE static void run(
|
||||
Index rows, Index cols,
|
||||
const LhsScalar* lhs, Index lhsStride,
|
||||
const RhsScalar* rhs, Index rhsIncr,
|
||||
ResScalar* res, Index
|
||||
#ifdef EIGEN_INTERNAL_DEBUGGING
|
||||
resIncr
|
||||
#endif
|
||||
, RhsScalar alpha)
|
||||
{
|
||||
ei_internal_assert(resIncr==1);
|
||||
#ifdef _EIGEN_ACCUMULATE_PACKETS
|
||||
#error _EIGEN_ACCUMULATE_PACKETS has already been defined
|
||||
#endif
|
||||
#define _EIGEN_ACCUMULATE_PACKETS(A0,A13,A2) \
|
||||
ei_pstore(&res[j], \
|
||||
ei_padd(ei_pload(&res[j]), \
|
||||
ei_padd(ei_pload<ResPacket>(&res[j]), \
|
||||
ei_padd( \
|
||||
ei_padd(cj.pmul(EIGEN_CAT(ei_ploa , A0)(&lhs0[j]), ptmp0), \
|
||||
cj.pmul(EIGEN_CAT(ei_ploa , A13)(&lhs1[j]), ptmp1)), \
|
||||
ei_padd(cj.pmul(EIGEN_CAT(ei_ploa , A2)(&lhs2[j]), ptmp2), \
|
||||
cj.pmul(EIGEN_CAT(ei_ploa , A13)(&lhs3[j]), ptmp3)) )))
|
||||
ei_padd(pcj.pmul(EIGEN_CAT(ei_ploa , A0)<LhsPacket>(&lhs0[j]), ptmp0), \
|
||||
pcj.pmul(EIGEN_CAT(ei_ploa , A13)<LhsPacket>(&lhs1[j]), ptmp1)), \
|
||||
ei_padd(pcj.pmul(EIGEN_CAT(ei_ploa , A2)<LhsPacket>(&lhs2[j]), ptmp2), \
|
||||
pcj.pmul(EIGEN_CAT(ei_ploa , A13)<LhsPacket>(&lhs3[j]), ptmp3)) )))
|
||||
|
||||
ei_conj_helper<ConjugateLhs,ConjugateRhs> cj;
|
||||
ei_conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;
|
||||
ei_conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj;
|
||||
if(ConjugateRhs)
|
||||
alpha = ei_conj(alpha);
|
||||
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
typedef typename ei_packet_traits<Scalar>::type Packet;
|
||||
const Index PacketSize = sizeof(Packet)/sizeof(Scalar);
|
||||
|
||||
enum { AllAligned = 0, EvenAligned, FirstAligned, NoneAligned };
|
||||
const Index columnsAtOnce = 4;
|
||||
const Index peels = 2;
|
||||
const Index PacketAlignedMask = PacketSize-1;
|
||||
const Index PeelAlignedMask = PacketSize*peels-1;
|
||||
|
||||
const Index LhsPacketAlignedMask = LhsPacketSize-1;
|
||||
const Index ResPacketAlignedMask = ResPacketSize-1;
|
||||
const Index PeelAlignedMask = ResPacketSize*peels-1;
|
||||
const Index size = rows;
|
||||
|
||||
// How many coeffs of the result do we have to skip to be aligned.
|
||||
// Here we assume data are at least aligned on the base scalar type.
|
||||
Index alignedStart = ei_first_aligned(res,size);
|
||||
Index alignedSize = PacketSize>1 ? alignedStart + ((size-alignedStart) & ~PacketAlignedMask) : 0;
|
||||
Index alignedSize = ResPacketSize>1 ? alignedStart + ((size-alignedStart) & ~ResPacketAlignedMask) : 0;
|
||||
const Index peeledSize = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart;
|
||||
|
||||
const Index alignmentStep = PacketSize>1 ? (PacketSize - lhsStride % PacketSize) & PacketAlignedMask : 0;
|
||||
const Index alignmentStep = LhsPacketSize>1 ? (LhsPacketSize - lhsStride % LhsPacketSize) & LhsPacketAlignedMask : 0;
|
||||
Index alignmentPattern = alignmentStep==0 ? AllAligned
|
||||
: alignmentStep==(PacketSize/2) ? EvenAligned
|
||||
: alignmentStep==(LhsPacketSize/2) ? EvenAligned
|
||||
: FirstAligned;
|
||||
|
||||
// we cannot assume the first element is aligned because of sub-matrices
|
||||
@@ -84,19 +112,19 @@ void ei_cache_friendly_product_colmajor_times_vector(
|
||||
// find how many columns do we have to skip to be aligned with the result (if possible)
|
||||
Index skipColumns = 0;
|
||||
// if the data cannot be aligned (TODO add some compile time tests when possible, e.g. for floats)
|
||||
if( (size_t(lhs)%sizeof(RealScalar)) || (size_t(res)%sizeof(RealScalar)) )
|
||||
if( (size_t(lhs)%sizeof(LhsScalar)) || (size_t(res)%sizeof(ResScalar)) )
|
||||
{
|
||||
alignedSize = 0;
|
||||
alignedStart = 0;
|
||||
}
|
||||
else if (PacketSize>1)
|
||||
else if (LhsPacketSize>1)
|
||||
{
|
||||
ei_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(Packet)==0 || size<PacketSize);
|
||||
ei_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || size<LhsPacketSize);
|
||||
|
||||
while (skipColumns<PacketSize &&
|
||||
alignedStart != ((lhsAlignmentOffset + alignmentStep*skipColumns)%PacketSize))
|
||||
while (skipColumns<LhsPacketSize &&
|
||||
alignedStart != ((lhsAlignmentOffset + alignmentStep*skipColumns)%LhsPacketSize))
|
||||
++skipColumns;
|
||||
if (skipColumns==PacketSize)
|
||||
if (skipColumns==LhsPacketSize)
|
||||
{
|
||||
// nothing can be aligned, no need to skip any column
|
||||
alignmentPattern = NoneAligned;
|
||||
@@ -104,30 +132,38 @@ void ei_cache_friendly_product_colmajor_times_vector(
|
||||
}
|
||||
else
|
||||
{
|
||||
skipColumns = std::min(skipColumns,rhs.size());
|
||||
skipColumns = std::min(skipColumns,cols);
|
||||
// note that the skiped columns are processed later.
|
||||
}
|
||||
|
||||
ei_internal_assert( (alignmentPattern==NoneAligned)
|
||||
|| (skipColumns + columnsAtOnce >= rhs.size())
|
||||
|| PacketSize > size
|
||||
|| (size_t(lhs+alignedStart+lhsStride*skipColumns)%sizeof(Packet))==0);
|
||||
|| (skipColumns + columnsAtOnce >= cols)
|
||||
|| LhsPacketSize > size
|
||||
|| (size_t(lhs+alignedStart+lhsStride*skipColumns)%sizeof(LhsPacket))==0);
|
||||
}
|
||||
else if(Vectorizable)
|
||||
{
|
||||
alignedStart = 0;
|
||||
alignedSize = size;
|
||||
alignmentPattern = AllAligned;
|
||||
}
|
||||
|
||||
Index offset1 = (FirstAligned && alignmentStep==1?3:1);
|
||||
Index offset3 = (FirstAligned && alignmentStep==1?1:3);
|
||||
|
||||
Index columnBound = ((rhs.size()-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns;
|
||||
Index columnBound = ((cols-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns;
|
||||
for (Index i=skipColumns; i<columnBound; i+=columnsAtOnce)
|
||||
{
|
||||
Packet ptmp0 = ei_pset1(alpha*rhs[i]), ptmp1 = ei_pset1(alpha*rhs[i+offset1]),
|
||||
ptmp2 = ei_pset1(alpha*rhs[i+2]), ptmp3 = ei_pset1(alpha*rhs[i+offset3]);
|
||||
RhsPacket ptmp0 = ei_pset1<RhsPacket>(alpha*rhs[i*rhsIncr]),
|
||||
ptmp1 = ei_pset1<RhsPacket>(alpha*rhs[(i+offset1)*rhsIncr]),
|
||||
ptmp2 = ei_pset1<RhsPacket>(alpha*rhs[(i+2)*rhsIncr]),
|
||||
ptmp3 = ei_pset1<RhsPacket>(alpha*rhs[(i+offset3)*rhsIncr]);
|
||||
|
||||
// this helps a lot generating better binary code
|
||||
const Scalar *lhs0 = lhs + i*lhsStride, *lhs1 = lhs + (i+offset1)*lhsStride,
|
||||
*lhs2 = lhs + (i+2)*lhsStride, *lhs3 = lhs + (i+offset3)*lhsStride;
|
||||
const LhsScalar *lhs0 = lhs + i*lhsStride, *lhs1 = lhs + (i+offset1)*lhsStride,
|
||||
*lhs2 = lhs + (i+2)*lhsStride, *lhs3 = lhs + (i+offset3)*lhsStride;
|
||||
|
||||
if (PacketSize>1)
|
||||
if (Vectorizable)
|
||||
{
|
||||
/* explicit vectorization */
|
||||
// process initial unaligned coeffs
|
||||
@@ -144,51 +180,52 @@ void ei_cache_friendly_product_colmajor_times_vector(
|
||||
switch(alignmentPattern)
|
||||
{
|
||||
case AllAligned:
|
||||
for (Index j = alignedStart; j<alignedSize; j+=PacketSize)
|
||||
for (Index j = alignedStart; j<alignedSize; j+=ResPacketSize)
|
||||
_EIGEN_ACCUMULATE_PACKETS(d,d,d);
|
||||
break;
|
||||
case EvenAligned:
|
||||
for (Index j = alignedStart; j<alignedSize; j+=PacketSize)
|
||||
for (Index j = alignedStart; j<alignedSize; j+=ResPacketSize)
|
||||
_EIGEN_ACCUMULATE_PACKETS(d,du,d);
|
||||
break;
|
||||
case FirstAligned:
|
||||
if(peels>1)
|
||||
{
|
||||
Packet A00, A01, A02, A03, A10, A11, A12, A13;
|
||||
LhsPacket A00, A01, A02, A03, A10, A11, A12, A13;
|
||||
ResPacket T0, T1;
|
||||
|
||||
A01 = ei_pload(&lhs1[alignedStart-1]);
|
||||
A02 = ei_pload(&lhs2[alignedStart-2]);
|
||||
A03 = ei_pload(&lhs3[alignedStart-3]);
|
||||
A01 = ei_pload<LhsPacket>(&lhs1[alignedStart-1]);
|
||||
A02 = ei_pload<LhsPacket>(&lhs2[alignedStart-2]);
|
||||
A03 = ei_pload<LhsPacket>(&lhs3[alignedStart-3]);
|
||||
|
||||
for (Index j = alignedStart; j<peeledSize; j+=peels*PacketSize)
|
||||
for (Index j = alignedStart; j<peeledSize; j+=peels*ResPacketSize)
|
||||
{
|
||||
A11 = ei_pload(&lhs1[j-1+PacketSize]); ei_palign<1>(A01,A11);
|
||||
A12 = ei_pload(&lhs2[j-2+PacketSize]); ei_palign<2>(A02,A12);
|
||||
A13 = ei_pload(&lhs3[j-3+PacketSize]); ei_palign<3>(A03,A13);
|
||||
A11 = ei_pload<LhsPacket>(&lhs1[j-1+LhsPacketSize]); ei_palign<1>(A01,A11);
|
||||
A12 = ei_pload<LhsPacket>(&lhs2[j-2+LhsPacketSize]); ei_palign<2>(A02,A12);
|
||||
A13 = ei_pload<LhsPacket>(&lhs3[j-3+LhsPacketSize]); ei_palign<3>(A03,A13);
|
||||
|
||||
A00 = ei_pload (&lhs0[j]);
|
||||
A10 = ei_pload (&lhs0[j+PacketSize]);
|
||||
A00 = cj.pmadd(A00, ptmp0, ei_pload(&res[j]));
|
||||
A10 = cj.pmadd(A10, ptmp0, ei_pload(&res[j+PacketSize]));
|
||||
A00 = ei_pload<LhsPacket>(&lhs0[j]);
|
||||
A10 = ei_pload<LhsPacket>(&lhs0[j+LhsPacketSize]);
|
||||
T0 = pcj.pmadd(A00, ptmp0, ei_pload<ResPacket>(&res[j]));
|
||||
T1 = pcj.pmadd(A10, ptmp0, ei_pload<ResPacket>(&res[j+ResPacketSize]));
|
||||
|
||||
A00 = cj.pmadd(A01, ptmp1, A00);
|
||||
A01 = ei_pload(&lhs1[j-1+2*PacketSize]); ei_palign<1>(A11,A01);
|
||||
A00 = cj.pmadd(A02, ptmp2, A00);
|
||||
A02 = ei_pload(&lhs2[j-2+2*PacketSize]); ei_palign<2>(A12,A02);
|
||||
A00 = cj.pmadd(A03, ptmp3, A00);
|
||||
ei_pstore(&res[j],A00);
|
||||
A03 = ei_pload(&lhs3[j-3+2*PacketSize]); ei_palign<3>(A13,A03);
|
||||
A10 = cj.pmadd(A11, ptmp1, A10);
|
||||
A10 = cj.pmadd(A12, ptmp2, A10);
|
||||
A10 = cj.pmadd(A13, ptmp3, A10);
|
||||
ei_pstore(&res[j+PacketSize],A10);
|
||||
T0 = pcj.pmadd(A01, ptmp1, T0);
|
||||
A01 = ei_pload<LhsPacket>(&lhs1[j-1+2*LhsPacketSize]); ei_palign<1>(A11,A01);
|
||||
T0 = pcj.pmadd(A02, ptmp2, T0);
|
||||
A02 = ei_pload<LhsPacket>(&lhs2[j-2+2*LhsPacketSize]); ei_palign<2>(A12,A02);
|
||||
T0 = pcj.pmadd(A03, ptmp3, T0);
|
||||
ei_pstore(&res[j],T0);
|
||||
A03 = ei_pload<LhsPacket>(&lhs3[j-3+2*LhsPacketSize]); ei_palign<3>(A13,A03);
|
||||
T1 = pcj.pmadd(A11, ptmp1, T1);
|
||||
T1 = pcj.pmadd(A12, ptmp2, T1);
|
||||
T1 = pcj.pmadd(A13, ptmp3, T1);
|
||||
ei_pstore(&res[j+ResPacketSize],T1);
|
||||
}
|
||||
}
|
||||
for (Index j = peeledSize; j<alignedSize; j+=PacketSize)
|
||||
for (Index j = peeledSize; j<alignedSize; j+=ResPacketSize)
|
||||
_EIGEN_ACCUMULATE_PACKETS(d,du,du);
|
||||
break;
|
||||
default:
|
||||
for (Index j = alignedStart; j<alignedSize; j+=PacketSize)
|
||||
for (Index j = alignedStart; j<alignedSize; j+=ResPacketSize)
|
||||
_EIGEN_ACCUMULATE_PACKETS(du,du,du);
|
||||
break;
|
||||
}
|
||||
@@ -206,34 +243,33 @@ void ei_cache_friendly_product_colmajor_times_vector(
|
||||
}
|
||||
|
||||
// process remaining first and last columns (at most columnsAtOnce-1)
|
||||
Index end = rhs.size();
|
||||
Index end = cols;
|
||||
Index start = columnBound;
|
||||
do
|
||||
{
|
||||
for (Index i=start; i<end; ++i)
|
||||
for (Index k=start; k<end; ++k)
|
||||
{
|
||||
Packet ptmp0 = ei_pset1(alpha*rhs[i]);
|
||||
const Scalar* lhs0 = lhs + i*lhsStride;
|
||||
RhsPacket ptmp0 = ei_pset1<RhsPacket>(alpha*rhs[k*rhsIncr]);
|
||||
const LhsScalar* lhs0 = lhs + k*lhsStride;
|
||||
|
||||
if (PacketSize>1)
|
||||
if (Vectorizable)
|
||||
{
|
||||
/* explicit vectorization */
|
||||
// process first unaligned result's coeffs
|
||||
for (Index j=0; j<alignedStart; ++j)
|
||||
res[j] += cj.pmul(lhs0[j], ei_pfirst(ptmp0));
|
||||
|
||||
// process aligned result's coeffs
|
||||
if ((size_t(lhs0+alignedStart)%sizeof(Packet))==0)
|
||||
for (Index j = alignedStart;j<alignedSize;j+=PacketSize)
|
||||
ei_pstore(&res[j], cj.pmadd(ei_pload(&lhs0[j]), ptmp0, ei_pload(&res[j])));
|
||||
if ((size_t(lhs0+alignedStart)%sizeof(LhsPacket))==0)
|
||||
for (Index i = alignedStart;i<alignedSize;i+=ResPacketSize)
|
||||
ei_pstore(&res[i], pcj.pmadd(ei_ploadu<LhsPacket>(&lhs0[i]), ptmp0, ei_pload<ResPacket>(&res[i])));
|
||||
else
|
||||
for (Index j = alignedStart;j<alignedSize;j+=PacketSize)
|
||||
ei_pstore(&res[j], cj.pmadd(ei_ploadu(&lhs0[j]), ptmp0, ei_pload(&res[j])));
|
||||
for (Index i = alignedStart;i<alignedSize;i+=ResPacketSize)
|
||||
ei_pstore(&res[i], pcj.pmadd(ei_ploadu<LhsPacket>(&lhs0[i]), ptmp0, ei_pload<ResPacket>(&res[i])));
|
||||
}
|
||||
|
||||
// process remaining scalars (or all if no explicit vectorization)
|
||||
for (Index j=alignedSize; j<size; ++j)
|
||||
res[j] += cj.pmul(lhs0[j], ei_pfirst(ptmp0));
|
||||
for (Index i=alignedSize; i<size; ++i)
|
||||
res[i] += cj.pmul(lhs0[i], ei_pfirst(ptmp0));
|
||||
}
|
||||
if (skipColumns)
|
||||
{
|
||||
@@ -243,73 +279,104 @@ void ei_cache_friendly_product_colmajor_times_vector(
|
||||
}
|
||||
else
|
||||
break;
|
||||
} while(PacketSize>1);
|
||||
} while(Vectorizable);
|
||||
#undef _EIGEN_ACCUMULATE_PACKETS
|
||||
}
|
||||
};
|
||||
|
||||
// TODO add peeling to mask unaligned load/stores
|
||||
template<bool ConjugateLhs, bool ConjugateRhs, typename Scalar, typename Index, typename ResType>
|
||||
static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector(
|
||||
const Scalar* lhs, Index lhsStride,
|
||||
const Scalar* rhs, Index rhsSize,
|
||||
ResType& res,
|
||||
Scalar alpha)
|
||||
/* Optimized row-major matrix * vector product:
|
||||
* This algorithm processes 4 rows at onces that allows to both reduce
|
||||
* the number of load/stores of the result by a factor 4 and to reduce
|
||||
* the instruction dependency. Moreover, we know that all bands have the
|
||||
* same alignment pattern.
|
||||
*
|
||||
* Mixing type logic:
|
||||
* - alpha is always a complex (or converted to a complex)
|
||||
* - no vectorization
|
||||
*/
|
||||
template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
|
||||
struct ei_general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjugateLhs,RhsScalar,ConjugateRhs>
|
||||
{
|
||||
typedef typename ei_scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
|
||||
|
||||
enum {
|
||||
Vectorizable = ei_packet_traits<LhsScalar>::Vectorizable && ei_packet_traits<RhsScalar>::Vectorizable
|
||||
&& int(ei_packet_traits<LhsScalar>::size)==int(ei_packet_traits<RhsScalar>::size),
|
||||
LhsPacketSize = Vectorizable ? ei_packet_traits<LhsScalar>::size : 1,
|
||||
RhsPacketSize = Vectorizable ? ei_packet_traits<RhsScalar>::size : 1,
|
||||
ResPacketSize = Vectorizable ? ei_packet_traits<ResScalar>::size : 1
|
||||
};
|
||||
|
||||
typedef typename ei_packet_traits<LhsScalar>::type _LhsPacket;
|
||||
typedef typename ei_packet_traits<RhsScalar>::type _RhsPacket;
|
||||
typedef typename ei_packet_traits<ResScalar>::type _ResPacket;
|
||||
|
||||
typedef typename ei_meta_if<Vectorizable,_LhsPacket,LhsScalar>::ret LhsPacket;
|
||||
typedef typename ei_meta_if<Vectorizable,_RhsPacket,RhsScalar>::ret RhsPacket;
|
||||
typedef typename ei_meta_if<Vectorizable,_ResPacket,ResScalar>::ret ResPacket;
|
||||
|
||||
EIGEN_DONT_INLINE static void run(
|
||||
Index rows, Index cols,
|
||||
const LhsScalar* lhs, Index lhsStride,
|
||||
const RhsScalar* rhs, Index rhsIncr,
|
||||
ResScalar* res, Index resIncr,
|
||||
ResScalar alpha)
|
||||
{
|
||||
EIGEN_UNUSED_VARIABLE(rhsIncr);
|
||||
ei_internal_assert(rhsIncr==1);
|
||||
#ifdef _EIGEN_ACCUMULATE_PACKETS
|
||||
#error _EIGEN_ACCUMULATE_PACKETS has already been defined
|
||||
#endif
|
||||
|
||||
#define _EIGEN_ACCUMULATE_PACKETS(A0,A13,A2) {\
|
||||
Packet b = ei_pload(&rhs[j]); \
|
||||
ptmp0 = cj.pmadd(EIGEN_CAT(ei_ploa,A0) (&lhs0[j]), b, ptmp0); \
|
||||
ptmp1 = cj.pmadd(EIGEN_CAT(ei_ploa,A13)(&lhs1[j]), b, ptmp1); \
|
||||
ptmp2 = cj.pmadd(EIGEN_CAT(ei_ploa,A2) (&lhs2[j]), b, ptmp2); \
|
||||
ptmp3 = cj.pmadd(EIGEN_CAT(ei_ploa,A13)(&lhs3[j]), b, ptmp3); }
|
||||
RhsPacket b = ei_pload<RhsPacket>(&rhs[j]); \
|
||||
ptmp0 = pcj.pmadd(EIGEN_CAT(ei_ploa,A0) <LhsPacket>(&lhs0[j]), b, ptmp0); \
|
||||
ptmp1 = pcj.pmadd(EIGEN_CAT(ei_ploa,A13)<LhsPacket>(&lhs1[j]), b, ptmp1); \
|
||||
ptmp2 = pcj.pmadd(EIGEN_CAT(ei_ploa,A2) <LhsPacket>(&lhs2[j]), b, ptmp2); \
|
||||
ptmp3 = pcj.pmadd(EIGEN_CAT(ei_ploa,A13)<LhsPacket>(&lhs3[j]), b, ptmp3); }
|
||||
|
||||
ei_conj_helper<ConjugateLhs,ConjugateRhs> cj;
|
||||
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
typedef typename ei_packet_traits<Scalar>::type Packet;
|
||||
const Index PacketSize = sizeof(Packet)/sizeof(Scalar);
|
||||
ei_conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;
|
||||
ei_conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj;
|
||||
|
||||
enum { AllAligned=0, EvenAligned=1, FirstAligned=2, NoneAligned=3 };
|
||||
const Index rowsAtOnce = 4;
|
||||
const Index peels = 2;
|
||||
const Index PacketAlignedMask = PacketSize-1;
|
||||
const Index PeelAlignedMask = PacketSize*peels-1;
|
||||
const Index size = rhsSize;
|
||||
const Index RhsPacketAlignedMask = RhsPacketSize-1;
|
||||
const Index LhsPacketAlignedMask = LhsPacketSize-1;
|
||||
const Index PeelAlignedMask = RhsPacketSize*peels-1;
|
||||
const Index depth = cols;
|
||||
|
||||
// How many coeffs of the result do we have to skip to be aligned.
|
||||
// Here we assume data are at least aligned on the base scalar type
|
||||
// if that's not the case then vectorization is discarded, see below.
|
||||
Index alignedStart = ei_first_aligned(rhs, size);
|
||||
Index alignedSize = PacketSize>1 ? alignedStart + ((size-alignedStart) & ~PacketAlignedMask) : 0;
|
||||
Index alignedStart = ei_first_aligned(rhs, depth);
|
||||
Index alignedSize = RhsPacketSize>1 ? alignedStart + ((depth-alignedStart) & ~RhsPacketAlignedMask) : 0;
|
||||
const Index peeledSize = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart;
|
||||
|
||||
const Index alignmentStep = PacketSize>1 ? (PacketSize - lhsStride % PacketSize) & PacketAlignedMask : 0;
|
||||
const Index alignmentStep = LhsPacketSize>1 ? (LhsPacketSize - lhsStride % LhsPacketSize) & LhsPacketAlignedMask : 0;
|
||||
Index alignmentPattern = alignmentStep==0 ? AllAligned
|
||||
: alignmentStep==(PacketSize/2) ? EvenAligned
|
||||
: FirstAligned;
|
||||
: alignmentStep==(LhsPacketSize/2) ? EvenAligned
|
||||
: FirstAligned;
|
||||
|
||||
// we cannot assume the first element is aligned because of sub-matrices
|
||||
const Index lhsAlignmentOffset = ei_first_aligned(lhs,size);
|
||||
const Index lhsAlignmentOffset = ei_first_aligned(lhs,depth);
|
||||
|
||||
// find how many rows do we have to skip to be aligned with rhs (if possible)
|
||||
Index skipRows = 0;
|
||||
// if the data cannot be aligned (TODO add some compile time tests when possible, e.g. for floats)
|
||||
if( (size_t(lhs)%sizeof(RealScalar)) || (size_t(rhs)%sizeof(RealScalar)) )
|
||||
if( (sizeof(LhsScalar)!=sizeof(RhsScalar)) || (size_t(lhs)%sizeof(LhsScalar)) || (size_t(rhs)%sizeof(RhsScalar)) )
|
||||
{
|
||||
alignedSize = 0;
|
||||
alignedStart = 0;
|
||||
}
|
||||
else if (PacketSize>1)
|
||||
else if (LhsPacketSize>1)
|
||||
{
|
||||
ei_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(Packet)==0 || size<PacketSize);
|
||||
ei_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || depth<LhsPacketSize);
|
||||
|
||||
while (skipRows<PacketSize &&
|
||||
alignedStart != ((lhsAlignmentOffset + alignmentStep*skipRows)%PacketSize))
|
||||
while (skipRows<LhsPacketSize &&
|
||||
alignedStart != ((lhsAlignmentOffset + alignmentStep*skipRows)%LhsPacketSize))
|
||||
++skipRows;
|
||||
if (skipRows==PacketSize)
|
||||
if (skipRows==LhsPacketSize)
|
||||
{
|
||||
// nothing can be aligned, no need to skip any column
|
||||
alignmentPattern = NoneAligned;
|
||||
@@ -317,38 +384,46 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector(
|
||||
}
|
||||
else
|
||||
{
|
||||
skipRows = std::min(skipRows,Index(res.size()));
|
||||
skipRows = std::min(skipRows,Index(rows));
|
||||
// note that the skiped columns are processed later.
|
||||
}
|
||||
ei_internal_assert( alignmentPattern==NoneAligned
|
||||
|| PacketSize==1
|
||||
|| (skipRows + rowsAtOnce >= res.size())
|
||||
|| PacketSize > rhsSize
|
||||
|| (size_t(lhs+alignedStart+lhsStride*skipRows)%sizeof(Packet))==0);
|
||||
|| LhsPacketSize==1
|
||||
|| (skipRows + rowsAtOnce >= rows)
|
||||
|| LhsPacketSize > depth
|
||||
|| (size_t(lhs+alignedStart+lhsStride*skipRows)%sizeof(LhsPacket))==0);
|
||||
}
|
||||
else if(Vectorizable)
|
||||
{
|
||||
alignedStart = 0;
|
||||
alignedSize = depth;
|
||||
alignmentPattern = AllAligned;
|
||||
}
|
||||
|
||||
Index offset1 = (FirstAligned && alignmentStep==1?3:1);
|
||||
Index offset3 = (FirstAligned && alignmentStep==1?1:3);
|
||||
|
||||
Index rowBound = ((res.size()-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows;
|
||||
Index rowBound = ((rows-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows;
|
||||
for (Index i=skipRows; i<rowBound; i+=rowsAtOnce)
|
||||
{
|
||||
Scalar tmp0 = Scalar(0), tmp1 = Scalar(0), tmp2 = Scalar(0), tmp3 = Scalar(0);
|
||||
EIGEN_ALIGN16 ResScalar tmp0 = ResScalar(0);
|
||||
ResScalar tmp1 = ResScalar(0), tmp2 = ResScalar(0), tmp3 = ResScalar(0);
|
||||
|
||||
// this helps the compiler generating good binary code
|
||||
const Scalar *lhs0 = lhs + i*lhsStride, *lhs1 = lhs + (i+offset1)*lhsStride,
|
||||
*lhs2 = lhs + (i+2)*lhsStride, *lhs3 = lhs + (i+offset3)*lhsStride;
|
||||
const LhsScalar *lhs0 = lhs + i*lhsStride, *lhs1 = lhs + (i+offset1)*lhsStride,
|
||||
*lhs2 = lhs + (i+2)*lhsStride, *lhs3 = lhs + (i+offset3)*lhsStride;
|
||||
|
||||
if (PacketSize>1)
|
||||
if (Vectorizable)
|
||||
{
|
||||
/* explicit vectorization */
|
||||
Packet ptmp0 = ei_pset1(Scalar(0)), ptmp1 = ei_pset1(Scalar(0)), ptmp2 = ei_pset1(Scalar(0)), ptmp3 = ei_pset1(Scalar(0));
|
||||
ResPacket ptmp0 = ei_pset1<ResPacket>(ResScalar(0)), ptmp1 = ei_pset1<ResPacket>(ResScalar(0)),
|
||||
ptmp2 = ei_pset1<ResPacket>(ResScalar(0)), ptmp3 = ei_pset1<ResPacket>(ResScalar(0));
|
||||
|
||||
// process initial unaligned coeffs
|
||||
// FIXME this loop get vectorized by the compiler !
|
||||
for (Index j=0; j<alignedStart; ++j)
|
||||
{
|
||||
Scalar b = rhs[j];
|
||||
RhsScalar b = rhs[j];
|
||||
tmp0 += cj.pmul(lhs0[j],b); tmp1 += cj.pmul(lhs1[j],b);
|
||||
tmp2 += cj.pmul(lhs2[j],b); tmp3 += cj.pmul(lhs3[j],b);
|
||||
}
|
||||
@@ -358,11 +433,11 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector(
|
||||
switch(alignmentPattern)
|
||||
{
|
||||
case AllAligned:
|
||||
for (Index j = alignedStart; j<alignedSize; j+=PacketSize)
|
||||
for (Index j = alignedStart; j<alignedSize; j+=RhsPacketSize)
|
||||
_EIGEN_ACCUMULATE_PACKETS(d,d,d);
|
||||
break;
|
||||
case EvenAligned:
|
||||
for (Index j = alignedStart; j<alignedSize; j+=PacketSize)
|
||||
for (Index j = alignedStart; j<alignedSize; j+=RhsPacketSize)
|
||||
_EIGEN_ACCUMULATE_PACKETS(d,du,d);
|
||||
break;
|
||||
case FirstAligned:
|
||||
@@ -374,38 +449,38 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector(
|
||||
* overlaping the desired unaligned packet. This is *much* more efficient
|
||||
* than basic unaligned loads.
|
||||
*/
|
||||
Packet A01, A02, A03, b, A11, A12, A13;
|
||||
A01 = ei_pload(&lhs1[alignedStart-1]);
|
||||
A02 = ei_pload(&lhs2[alignedStart-2]);
|
||||
A03 = ei_pload(&lhs3[alignedStart-3]);
|
||||
LhsPacket A01, A02, A03, A11, A12, A13;
|
||||
A01 = ei_pload<LhsPacket>(&lhs1[alignedStart-1]);
|
||||
A02 = ei_pload<LhsPacket>(&lhs2[alignedStart-2]);
|
||||
A03 = ei_pload<LhsPacket>(&lhs3[alignedStart-3]);
|
||||
|
||||
for (Index j = alignedStart; j<peeledSize; j+=peels*PacketSize)
|
||||
for (Index j = alignedStart; j<peeledSize; j+=peels*RhsPacketSize)
|
||||
{
|
||||
b = ei_pload(&rhs[j]);
|
||||
A11 = ei_pload(&lhs1[j-1+PacketSize]); ei_palign<1>(A01,A11);
|
||||
A12 = ei_pload(&lhs2[j-2+PacketSize]); ei_palign<2>(A02,A12);
|
||||
A13 = ei_pload(&lhs3[j-3+PacketSize]); ei_palign<3>(A03,A13);
|
||||
RhsPacket b = ei_pload<RhsPacket>(&rhs[j]);
|
||||
A11 = ei_pload<LhsPacket>(&lhs1[j-1+LhsPacketSize]); ei_palign<1>(A01,A11);
|
||||
A12 = ei_pload<LhsPacket>(&lhs2[j-2+LhsPacketSize]); ei_palign<2>(A02,A12);
|
||||
A13 = ei_pload<LhsPacket>(&lhs3[j-3+LhsPacketSize]); ei_palign<3>(A03,A13);
|
||||
|
||||
ptmp0 = cj.pmadd(ei_pload (&lhs0[j]), b, ptmp0);
|
||||
ptmp1 = cj.pmadd(A01, b, ptmp1);
|
||||
A01 = ei_pload(&lhs1[j-1+2*PacketSize]); ei_palign<1>(A11,A01);
|
||||
ptmp2 = cj.pmadd(A02, b, ptmp2);
|
||||
A02 = ei_pload(&lhs2[j-2+2*PacketSize]); ei_palign<2>(A12,A02);
|
||||
ptmp3 = cj.pmadd(A03, b, ptmp3);
|
||||
A03 = ei_pload(&lhs3[j-3+2*PacketSize]); ei_palign<3>(A13,A03);
|
||||
ptmp0 = pcj.pmadd(ei_pload<LhsPacket>(&lhs0[j]), b, ptmp0);
|
||||
ptmp1 = pcj.pmadd(A01, b, ptmp1);
|
||||
A01 = ei_pload<LhsPacket>(&lhs1[j-1+2*LhsPacketSize]); ei_palign<1>(A11,A01);
|
||||
ptmp2 = pcj.pmadd(A02, b, ptmp2);
|
||||
A02 = ei_pload<LhsPacket>(&lhs2[j-2+2*LhsPacketSize]); ei_palign<2>(A12,A02);
|
||||
ptmp3 = pcj.pmadd(A03, b, ptmp3);
|
||||
A03 = ei_pload<LhsPacket>(&lhs3[j-3+2*LhsPacketSize]); ei_palign<3>(A13,A03);
|
||||
|
||||
b = ei_pload(&rhs[j+PacketSize]);
|
||||
ptmp0 = cj.pmadd(ei_pload (&lhs0[j+PacketSize]), b, ptmp0);
|
||||
ptmp1 = cj.pmadd(A11, b, ptmp1);
|
||||
ptmp2 = cj.pmadd(A12, b, ptmp2);
|
||||
ptmp3 = cj.pmadd(A13, b, ptmp3);
|
||||
b = ei_pload<RhsPacket>(&rhs[j+RhsPacketSize]);
|
||||
ptmp0 = pcj.pmadd(ei_pload<LhsPacket>(&lhs0[j+LhsPacketSize]), b, ptmp0);
|
||||
ptmp1 = pcj.pmadd(A11, b, ptmp1);
|
||||
ptmp2 = pcj.pmadd(A12, b, ptmp2);
|
||||
ptmp3 = pcj.pmadd(A13, b, ptmp3);
|
||||
}
|
||||
}
|
||||
for (Index j = peeledSize; j<alignedSize; j+=PacketSize)
|
||||
for (Index j = peeledSize; j<alignedSize; j+=RhsPacketSize)
|
||||
_EIGEN_ACCUMULATE_PACKETS(d,du,du);
|
||||
break;
|
||||
default:
|
||||
for (Index j = alignedStart; j<alignedSize; j+=PacketSize)
|
||||
for (Index j = alignedStart; j<alignedSize; j+=RhsPacketSize)
|
||||
_EIGEN_ACCUMULATE_PACKETS(du,du,du);
|
||||
break;
|
||||
}
|
||||
@@ -418,25 +493,28 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector(
|
||||
|
||||
// process remaining coeffs (or all if no explicit vectorization)
|
||||
// FIXME this loop get vectorized by the compiler !
|
||||
for (Index j=alignedSize; j<size; ++j)
|
||||
for (Index j=alignedSize; j<depth; ++j)
|
||||
{
|
||||
Scalar b = rhs[j];
|
||||
RhsScalar b = rhs[j];
|
||||
tmp0 += cj.pmul(lhs0[j],b); tmp1 += cj.pmul(lhs1[j],b);
|
||||
tmp2 += cj.pmul(lhs2[j],b); tmp3 += cj.pmul(lhs3[j],b);
|
||||
}
|
||||
res[i] += alpha*tmp0; res[i+offset1] += alpha*tmp1; res[i+2] += alpha*tmp2; res[i+offset3] += alpha*tmp3;
|
||||
res[i*resIncr] += alpha*tmp0;
|
||||
res[(i+offset1)*resIncr] += alpha*tmp1;
|
||||
res[(i+2)*resIncr] += alpha*tmp2;
|
||||
res[(i+offset3)*resIncr] += alpha*tmp3;
|
||||
}
|
||||
|
||||
// process remaining first and last rows (at most columnsAtOnce-1)
|
||||
Index end = res.size();
|
||||
Index end = rows;
|
||||
Index start = rowBound;
|
||||
do
|
||||
{
|
||||
for (Index i=start; i<end; ++i)
|
||||
{
|
||||
Scalar tmp0 = Scalar(0);
|
||||
Packet ptmp0 = ei_pset1(tmp0);
|
||||
const Scalar* lhs0 = lhs + i*lhsStride;
|
||||
EIGEN_ALIGN16 ResScalar tmp0 = ResScalar(0);
|
||||
ResPacket ptmp0 = ei_pset1<ResPacket>(tmp0);
|
||||
const LhsScalar* lhs0 = lhs + i*lhsStride;
|
||||
// process first unaligned result's coeffs
|
||||
// FIXME this loop get vectorized by the compiler !
|
||||
for (Index j=0; j<alignedStart; ++j)
|
||||
@@ -445,20 +523,20 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector(
|
||||
if (alignedSize>alignedStart)
|
||||
{
|
||||
// process aligned rhs coeffs
|
||||
if ((size_t(lhs0+alignedStart)%sizeof(Packet))==0)
|
||||
for (Index j = alignedStart;j<alignedSize;j+=PacketSize)
|
||||
ptmp0 = cj.pmadd(ei_pload(&lhs0[j]), ei_pload(&rhs[j]), ptmp0);
|
||||
if ((size_t(lhs0+alignedStart)%sizeof(LhsPacket))==0)
|
||||
for (Index j = alignedStart;j<alignedSize;j+=RhsPacketSize)
|
||||
ptmp0 = pcj.pmadd(ei_pload<LhsPacket>(&lhs0[j]), ei_pload<RhsPacket>(&rhs[j]), ptmp0);
|
||||
else
|
||||
for (Index j = alignedStart;j<alignedSize;j+=PacketSize)
|
||||
ptmp0 = cj.pmadd(ei_ploadu(&lhs0[j]), ei_pload(&rhs[j]), ptmp0);
|
||||
for (Index j = alignedStart;j<alignedSize;j+=RhsPacketSize)
|
||||
ptmp0 = pcj.pmadd(ei_ploadu<LhsPacket>(&lhs0[j]), ei_pload<RhsPacket>(&rhs[j]), ptmp0);
|
||||
tmp0 += ei_predux(ptmp0);
|
||||
}
|
||||
|
||||
// process remaining scalars
|
||||
// FIXME this loop get vectorized by the compiler !
|
||||
for (Index j=alignedSize; j<size; ++j)
|
||||
for (Index j=alignedSize; j<depth; ++j)
|
||||
tmp0 += cj.pmul(lhs0[j], rhs[j]);
|
||||
res[i] += alpha*tmp0;
|
||||
res[i*resIncr] += alpha*tmp0;
|
||||
}
|
||||
if (skipRows)
|
||||
{
|
||||
@@ -468,9 +546,10 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector(
|
||||
}
|
||||
else
|
||||
break;
|
||||
} while(PacketSize>1);
|
||||
} while(Vectorizable);
|
||||
|
||||
#undef _EIGEN_ACCUMULATE_PACKETS
|
||||
}
|
||||
};
|
||||
|
||||
#endif // EIGEN_GENERAL_MATRIX_VECTOR_H
|
||||
|
||||
@@ -26,10 +26,9 @@
|
||||
#define EIGEN_SELFADJOINT_MATRIX_MATRIX_H
|
||||
|
||||
// pack a selfadjoint block diagonal for use with the gebp_kernel
|
||||
template<typename Scalar, typename Index, int mr, int StorageOrder>
|
||||
template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder>
|
||||
struct ei_symm_pack_lhs
|
||||
{
|
||||
enum { PacketSize = ei_packet_traits<Scalar>::size };
|
||||
template<int BlockRows> inline
|
||||
void pack(Scalar* blockA, const ei_const_blas_data_mapper<Scalar,Index,StorageOrder>& lhs, Index cols, Index i, Index& count)
|
||||
{
|
||||
@@ -59,16 +58,16 @@ struct ei_symm_pack_lhs
|
||||
{
|
||||
ei_const_blas_data_mapper<Scalar,Index,StorageOrder> lhs(_lhs,lhsStride);
|
||||
Index count = 0;
|
||||
Index peeled_mc = (rows/mr)*mr;
|
||||
for(Index i=0; i<peeled_mc; i+=mr)
|
||||
Index peeled_mc = (rows/Pack1)*Pack1;
|
||||
for(Index i=0; i<peeled_mc; i+=Pack1)
|
||||
{
|
||||
pack<mr>(blockA, lhs, cols, i, count);
|
||||
pack<Pack1>(blockA, lhs, cols, i, count);
|
||||
}
|
||||
|
||||
if(rows-peeled_mc>=PacketSize)
|
||||
if(rows-peeled_mc>=Pack2)
|
||||
{
|
||||
pack<PacketSize>(blockA, lhs, cols, peeled_mc, count);
|
||||
peeled_mc += PacketSize;
|
||||
pack<Pack2>(blockA, lhs, cols, peeled_mc, count);
|
||||
peeled_mc += Pack2;
|
||||
}
|
||||
|
||||
// do the same with mr==1
|
||||
@@ -89,7 +88,7 @@ template<typename Scalar, typename Index, int nr, int StorageOrder>
|
||||
struct ei_symm_pack_rhs
|
||||
{
|
||||
enum { PacketSize = ei_packet_traits<Scalar>::size };
|
||||
void operator()(Scalar* blockB, const Scalar* _rhs, Index rhsStride, Scalar alpha, Index rows, Index cols, Index k2)
|
||||
void operator()(Scalar* blockB, const Scalar* _rhs, Index rhsStride, Index rows, Index cols, Index k2)
|
||||
{
|
||||
Index end_k = k2 + rows;
|
||||
Index count = 0;
|
||||
@@ -101,12 +100,12 @@ struct ei_symm_pack_rhs
|
||||
{
|
||||
for(Index k=k2; k<end_k; k++)
|
||||
{
|
||||
blockB[count+0] = alpha*rhs(k,j2+0);
|
||||
blockB[count+1] = alpha*rhs(k,j2+1);
|
||||
blockB[count+0] = rhs(k,j2+0);
|
||||
blockB[count+1] = rhs(k,j2+1);
|
||||
if (nr==4)
|
||||
{
|
||||
blockB[count+2] = alpha*rhs(k,j2+2);
|
||||
blockB[count+3] = alpha*rhs(k,j2+3);
|
||||
blockB[count+2] = rhs(k,j2+2);
|
||||
blockB[count+3] = rhs(k,j2+3);
|
||||
}
|
||||
count += nr;
|
||||
}
|
||||
@@ -119,12 +118,12 @@ struct ei_symm_pack_rhs
|
||||
// transpose
|
||||
for(Index k=k2; k<j2; k++)
|
||||
{
|
||||
blockB[count+0] = alpha*ei_conj(rhs(j2+0,k));
|
||||
blockB[count+1] = alpha*ei_conj(rhs(j2+1,k));
|
||||
blockB[count+0] = ei_conj(rhs(j2+0,k));
|
||||
blockB[count+1] = ei_conj(rhs(j2+1,k));
|
||||
if (nr==4)
|
||||
{
|
||||
blockB[count+2] = alpha*ei_conj(rhs(j2+2,k));
|
||||
blockB[count+3] = alpha*ei_conj(rhs(j2+3,k));
|
||||
blockB[count+2] = ei_conj(rhs(j2+2,k));
|
||||
blockB[count+3] = ei_conj(rhs(j2+3,k));
|
||||
}
|
||||
count += nr;
|
||||
}
|
||||
@@ -134,25 +133,25 @@ struct ei_symm_pack_rhs
|
||||
{
|
||||
// normal
|
||||
for (Index w=0 ; w<h; ++w)
|
||||
blockB[count+w] = alpha*rhs(k,j2+w);
|
||||
blockB[count+w] = rhs(k,j2+w);
|
||||
|
||||
blockB[count+h] = alpha*rhs(k,k);
|
||||
blockB[count+h] = ei_real(rhs(k,k));
|
||||
|
||||
// transpose
|
||||
for (Index w=h+1 ; w<nr; ++w)
|
||||
blockB[count+w] = alpha*ei_conj(rhs(j2+w,k));
|
||||
blockB[count+w] = ei_conj(rhs(j2+w,k));
|
||||
count += nr;
|
||||
++h;
|
||||
}
|
||||
// normal
|
||||
for(Index k=j2+nr; k<end_k; k++)
|
||||
{
|
||||
blockB[count+0] = alpha*rhs(k,j2+0);
|
||||
blockB[count+1] = alpha*rhs(k,j2+1);
|
||||
blockB[count+0] = rhs(k,j2+0);
|
||||
blockB[count+1] = rhs(k,j2+1);
|
||||
if (nr==4)
|
||||
{
|
||||
blockB[count+2] = alpha*rhs(k,j2+2);
|
||||
blockB[count+3] = alpha*rhs(k,j2+3);
|
||||
blockB[count+2] = rhs(k,j2+2);
|
||||
blockB[count+3] = rhs(k,j2+3);
|
||||
}
|
||||
count += nr;
|
||||
}
|
||||
@@ -163,12 +162,12 @@ struct ei_symm_pack_rhs
|
||||
{
|
||||
for(Index k=k2; k<end_k; k++)
|
||||
{
|
||||
blockB[count+0] = alpha*ei_conj(rhs(j2+0,k));
|
||||
blockB[count+1] = alpha*ei_conj(rhs(j2+1,k));
|
||||
blockB[count+0] = ei_conj(rhs(j2+0,k));
|
||||
blockB[count+1] = ei_conj(rhs(j2+1,k));
|
||||
if (nr==4)
|
||||
{
|
||||
blockB[count+2] = alpha*ei_conj(rhs(j2+2,k));
|
||||
blockB[count+3] = alpha*ei_conj(rhs(j2+3,k));
|
||||
blockB[count+2] = ei_conj(rhs(j2+2,k));
|
||||
blockB[count+3] = ei_conj(rhs(j2+3,k));
|
||||
}
|
||||
count += nr;
|
||||
}
|
||||
@@ -181,13 +180,13 @@ struct ei_symm_pack_rhs
|
||||
Index half = std::min(end_k,j2);
|
||||
for(Index k=k2; k<half; k++)
|
||||
{
|
||||
blockB[count] = alpha*ei_conj(rhs(j2,k));
|
||||
blockB[count] = ei_conj(rhs(j2,k));
|
||||
count += 1;
|
||||
}
|
||||
|
||||
if(half==j2 && half<k2+rows)
|
||||
{
|
||||
blockB[count] = alpha*ei_real(rhs(j2,j2));
|
||||
blockB[count] = ei_real(rhs(j2,j2));
|
||||
count += 1;
|
||||
}
|
||||
else
|
||||
@@ -196,7 +195,7 @@ struct ei_symm_pack_rhs
|
||||
// normal
|
||||
for(Index k=half+1; k<k2+rows; k++)
|
||||
{
|
||||
blockB[count] = alpha*rhs(k,j2);
|
||||
blockB[count] = rhs(k,j2);
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
@@ -253,12 +252,9 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,Conjugate
|
||||
ei_const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
|
||||
ei_const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
|
||||
|
||||
if (ConjugateRhs)
|
||||
alpha = ei_conj(alpha);
|
||||
typedef ei_gebp_traits<Scalar,Scalar> Traits;
|
||||
|
||||
typedef ei_product_blocking_traits<Scalar> Blocking;
|
||||
|
||||
Index kc = size; // cache block size along the K direction
|
||||
Index kc = size; // cache block size along the K direction
|
||||
Index mc = rows; // cache block size along the M direction
|
||||
Index nc = cols; // cache block size along the N direction
|
||||
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
|
||||
@@ -266,14 +262,15 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,Conjugate
|
||||
kc = std::min(kc,mc);
|
||||
|
||||
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
|
||||
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols;
|
||||
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
|
||||
std::size_t sizeB = sizeW + kc*cols;
|
||||
Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB);
|
||||
Scalar* blockB = allocatedBlockB + kc*Blocking::PacketSize*Blocking::nr;
|
||||
Scalar* blockB = allocatedBlockB + sizeW;
|
||||
|
||||
ei_gebp_kernel<Scalar, Index, Blocking::mr, Blocking::nr, ei_conj_helper<ConjugateLhs,ConjugateRhs> > gebp_kernel;
|
||||
ei_symm_pack_lhs<Scalar, Index, Blocking::mr,LhsStorageOrder> pack_lhs;
|
||||
ei_gemm_pack_rhs<Scalar, Index, Blocking::nr,RhsStorageOrder> pack_rhs;
|
||||
ei_gemm_pack_lhs<Scalar, Index, Blocking::mr,LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed;
|
||||
ei_gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
|
||||
ei_symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
|
||||
ei_gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
|
||||
ei_gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed;
|
||||
|
||||
for(Index k2=0; k2<size; k2+=kc)
|
||||
{
|
||||
@@ -282,7 +279,7 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,Conjugate
|
||||
// we have selected one row panel of rhs and one column panel of lhs
|
||||
// pack rhs's panel into a sequential chunk of memory
|
||||
// and expand each coeff to a constant packet for further reuse
|
||||
pack_rhs(blockB, &rhs(k2,0), rhsStride, alpha, actual_kc, cols);
|
||||
pack_rhs(blockB, &rhs(k2,0), rhsStride, actual_kc, cols);
|
||||
|
||||
// the select lhs's panel has to be split in three different parts:
|
||||
// 1 - the transposed panel above the diagonal block => transposed packed copy
|
||||
@@ -294,7 +291,7 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,Conjugate
|
||||
// transposed packed copy
|
||||
pack_lhs_transposed(blockA, &lhs(k2, i2), lhsStride, actual_kc, actual_mc);
|
||||
|
||||
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols);
|
||||
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
|
||||
}
|
||||
// the block diagonal
|
||||
{
|
||||
@@ -302,16 +299,16 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,Conjugate
|
||||
// symmetric packed copy
|
||||
pack_lhs(blockA, &lhs(k2,k2), lhsStride, actual_kc, actual_mc);
|
||||
|
||||
gebp_kernel(res+k2, resStride, blockA, blockB, actual_mc, actual_kc, cols);
|
||||
gebp_kernel(res+k2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
|
||||
}
|
||||
|
||||
for(Index i2=k2+kc; i2<size; i2+=mc)
|
||||
{
|
||||
const Index actual_mc = std::min(i2+mc,size)-i2;
|
||||
ei_gemm_pack_lhs<Scalar, Index, Blocking::mr,LhsStorageOrder,false>()
|
||||
ei_gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder,false>()
|
||||
(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);
|
||||
|
||||
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols);
|
||||
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -338,10 +335,7 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,Conjugat
|
||||
|
||||
ei_const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
|
||||
|
||||
if (ConjugateRhs)
|
||||
alpha = ei_conj(alpha);
|
||||
|
||||
typedef ei_product_blocking_traits<Scalar> Blocking;
|
||||
typedef ei_gebp_traits<Scalar,Scalar> Traits;
|
||||
|
||||
Index kc = size; // cache block size along the K direction
|
||||
Index mc = rows; // cache block size along the M direction
|
||||
@@ -349,19 +343,20 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,Conjugat
|
||||
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
|
||||
|
||||
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
|
||||
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols;
|
||||
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
|
||||
std::size_t sizeB = sizeW + kc*cols;
|
||||
Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB);
|
||||
Scalar* blockB = allocatedBlockB + kc*Blocking::PacketSize*Blocking::nr;
|
||||
Scalar* blockB = allocatedBlockB + sizeW;
|
||||
|
||||
ei_gebp_kernel<Scalar, Index, Blocking::mr, Blocking::nr, ei_conj_helper<ConjugateLhs,ConjugateRhs> > gebp_kernel;
|
||||
ei_gemm_pack_lhs<Scalar, Index, Blocking::mr,LhsStorageOrder> pack_lhs;
|
||||
ei_symm_pack_rhs<Scalar, Index, Blocking::nr,RhsStorageOrder> pack_rhs;
|
||||
ei_gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
|
||||
ei_gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
|
||||
ei_symm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
|
||||
|
||||
for(Index k2=0; k2<size; k2+=kc)
|
||||
{
|
||||
const Index actual_kc = std::min(k2+kc,size)-k2;
|
||||
|
||||
pack_rhs(blockB, _rhs, rhsStride, alpha, actual_kc, cols, k2);
|
||||
pack_rhs(blockB, _rhs, rhsStride, actual_kc, cols, k2);
|
||||
|
||||
// => GEPP
|
||||
for(Index i2=0; i2<rows; i2+=mc)
|
||||
@@ -369,7 +364,7 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,Conjugat
|
||||
const Index actual_mc = std::min(i2+mc,rows)-i2;
|
||||
pack_lhs(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);
|
||||
|
||||
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols);
|
||||
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -46,8 +46,11 @@ static EIGEN_DONT_INLINE void ei_product_selfadjoint_vector(
|
||||
FirstTriangular = IsRowMajor == IsLower
|
||||
};
|
||||
|
||||
ei_conj_helper<NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, IsRowMajor), ConjugateRhs> cj0;
|
||||
ei_conj_helper<NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> cj1;
|
||||
ei_conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, IsRowMajor), ConjugateRhs> cj0;
|
||||
ei_conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> cj1;
|
||||
|
||||
ei_conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, IsRowMajor), ConjugateRhs> pcj0;
|
||||
ei_conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> pcj1;
|
||||
|
||||
Scalar cjAlpha = ConjugateRhs ? ei_conj(alpha) : alpha;
|
||||
|
||||
@@ -74,14 +77,14 @@ static EIGEN_DONT_INLINE void ei_product_selfadjoint_vector(
|
||||
register const Scalar* EIGEN_RESTRICT A1 = lhs + (j+1)*lhsStride;
|
||||
|
||||
Scalar t0 = cjAlpha * rhs[j];
|
||||
Packet ptmp0 = ei_pset1(t0);
|
||||
Packet ptmp0 = ei_pset1<Packet>(t0);
|
||||
Scalar t1 = cjAlpha * rhs[j+1];
|
||||
Packet ptmp1 = ei_pset1(t1);
|
||||
Packet ptmp1 = ei_pset1<Packet>(t1);
|
||||
|
||||
Scalar t2 = 0;
|
||||
Packet ptmp2 = ei_pset1(t2);
|
||||
Packet ptmp2 = ei_pset1<Packet>(t2);
|
||||
Scalar t3 = 0;
|
||||
Packet ptmp3 = ei_pset1(t3);
|
||||
Packet ptmp3 = ei_pset1<Packet>(t3);
|
||||
|
||||
size_t starti = FirstTriangular ? 0 : j+2;
|
||||
size_t endi = FirstTriangular ? j : size;
|
||||
@@ -116,14 +119,14 @@ static EIGEN_DONT_INLINE void ei_product_selfadjoint_vector(
|
||||
Scalar* EIGEN_RESTRICT resIt = res + alignedStart;
|
||||
for (size_t i=alignedStart; i<alignedEnd; i+=PacketSize)
|
||||
{
|
||||
Packet A0i = ei_ploadu(a0It); a0It += PacketSize;
|
||||
Packet A1i = ei_ploadu(a1It); a1It += PacketSize;
|
||||
Packet Bi = ei_ploadu(rhsIt); rhsIt += PacketSize; // FIXME should be aligned in most cases
|
||||
Packet Xi = ei_pload (resIt);
|
||||
Packet A0i = ei_ploadu<Packet>(a0It); a0It += PacketSize;
|
||||
Packet A1i = ei_ploadu<Packet>(a1It); a1It += PacketSize;
|
||||
Packet Bi = ei_ploadu<Packet>(rhsIt); rhsIt += PacketSize; // FIXME should be aligned in most cases
|
||||
Packet Xi = ei_pload <Packet>(resIt);
|
||||
|
||||
Xi = cj0.pmadd(A0i,ptmp0, cj0.pmadd(A1i,ptmp1,Xi));
|
||||
ptmp2 = cj1.pmadd(A0i, Bi, ptmp2);
|
||||
ptmp3 = cj1.pmadd(A1i, Bi, ptmp3);
|
||||
Xi = pcj0.pmadd(A0i,ptmp0, pcj0.pmadd(A1i,ptmp1,Xi));
|
||||
ptmp2 = pcj1.pmadd(A0i, Bi, ptmp2);
|
||||
ptmp3 = pcj1.pmadd(A1i, Bi, ptmp3);
|
||||
ei_pstore(resIt,Xi); resIt += PacketSize;
|
||||
}
|
||||
for (size_t i=alignedEnd; i<endi; i++)
|
||||
@@ -198,5 +201,47 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, int RhsMode>
|
||||
struct ei_traits<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false> >
|
||||
: ei_traits<ProductBase<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>, Lhs, Rhs> >
|
||||
{};
|
||||
|
||||
template<typename Lhs, typename Rhs, int RhsMode>
|
||||
struct SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>
|
||||
: public ProductBase<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>, Lhs, Rhs >
|
||||
{
|
||||
EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix)
|
||||
|
||||
enum {
|
||||
RhsUpLo = RhsMode&(Upper|Lower)
|
||||
};
|
||||
|
||||
SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
|
||||
|
||||
template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
|
||||
{
|
||||
ei_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
|
||||
|
||||
const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs);
|
||||
const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs);
|
||||
|
||||
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
|
||||
* RhsBlasTraits::extractScalarFactor(m_rhs);
|
||||
|
||||
ei_assert(dst.innerStride()==1 && "not implemented yet");
|
||||
|
||||
// transpose the product
|
||||
ei_product_selfadjoint_vector<Scalar, Index, (ei_traits<_ActualRhsType>::Flags&RowMajorBit) ? ColMajor : RowMajor, int(RhsUpLo)==Upper ? Lower : Upper,
|
||||
bool(RhsBlasTraits::NeedToConjugate), bool(LhsBlasTraits::NeedToConjugate)>
|
||||
(
|
||||
rhs.rows(), // size
|
||||
&rhs.coeff(0,0), rhs.outerStride(), // lhs info
|
||||
&lhs.coeff(0), lhs.innerStride(), // rhs info
|
||||
&dst.coeffRef(0), // result info
|
||||
actualAlpha // scale factor
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
#endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_H
|
||||
|
||||
@@ -32,7 +32,7 @@
|
||||
**********************************************************************/
|
||||
|
||||
// forward declarations (defined at the end of this file)
|
||||
template<typename Scalar, typename Index, int mr, int nr, typename Conj, int UpLo>
|
||||
template<typename Scalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int UpLo>
|
||||
struct ei_sybb_kernel;
|
||||
|
||||
/* Optimized selfadjoint product (_SYRK) */
|
||||
@@ -65,38 +65,42 @@ struct ei_selfadjoint_product<Scalar, Index, MatStorageOrder, ColMajor, AAT, UpL
|
||||
{
|
||||
ei_const_blas_data_mapper<Scalar, Index, MatStorageOrder> mat(_mat,matStride);
|
||||
|
||||
if(AAT)
|
||||
alpha = ei_conj(alpha);
|
||||
// if(AAT)
|
||||
// alpha = ei_conj(alpha);
|
||||
|
||||
typedef ei_product_blocking_traits<Scalar> Blocking;
|
||||
typedef ei_gebp_traits<Scalar,Scalar> Traits;
|
||||
|
||||
Index kc = depth; // cache block size along the K direction
|
||||
Index mc = size; // cache block size along the M direction
|
||||
Index nc = size; // cache block size along the N direction
|
||||
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
|
||||
// !!! mc must be a multiple of nr:
|
||||
if(mc>Blocking::nr)
|
||||
mc = (mc/Blocking::nr)*Blocking::nr;
|
||||
if(mc>Traits::nr)
|
||||
mc = (mc/Traits::nr)*Traits::nr;
|
||||
|
||||
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
|
||||
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*size;
|
||||
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
|
||||
std::size_t sizeB = sizeW + kc*size;
|
||||
Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB);
|
||||
Scalar* blockB = allocatedBlockB + kc*Blocking::PacketSize*Blocking::nr;
|
||||
Scalar* blockB = allocatedBlockB + sizeW;
|
||||
|
||||
// note that the actual rhs is the transpose/adjoint of mat
|
||||
typedef ei_conj_helper<NumTraits<Scalar>::IsComplex && !AAT, NumTraits<Scalar>::IsComplex && AAT> Conj;
|
||||
enum {
|
||||
ConjLhs = NumTraits<Scalar>::IsComplex && !AAT,
|
||||
ConjRhs = NumTraits<Scalar>::IsComplex && AAT
|
||||
};
|
||||
|
||||
ei_gebp_kernel<Scalar, Index, Blocking::mr, Blocking::nr, Conj> gebp_kernel;
|
||||
ei_gemm_pack_rhs<Scalar, Index, Blocking::nr,MatStorageOrder==RowMajor ? ColMajor : RowMajor> pack_rhs;
|
||||
ei_gemm_pack_lhs<Scalar, Index, Blocking::mr,MatStorageOrder, false> pack_lhs;
|
||||
ei_sybb_kernel<Scalar, Index, Blocking::mr, Blocking::nr, Conj, UpLo> sybb;
|
||||
ei_gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjLhs, ConjRhs> gebp_kernel;
|
||||
ei_gemm_pack_rhs<Scalar, Index, Traits::nr,MatStorageOrder==RowMajor ? ColMajor : RowMajor> pack_rhs;
|
||||
ei_gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, MatStorageOrder, false> pack_lhs;
|
||||
ei_sybb_kernel<Scalar, Index, Traits::mr, Traits::nr, ConjLhs, ConjRhs, UpLo> sybb;
|
||||
|
||||
for(Index k2=0; k2<depth; k2+=kc)
|
||||
{
|
||||
const Index actual_kc = std::min(k2+kc,depth)-k2;
|
||||
|
||||
// note that the actual rhs is the transpose/adjoint of mat
|
||||
pack_rhs(blockB, &mat(0,k2), matStride, alpha, actual_kc, size);
|
||||
pack_rhs(blockB, &mat(0,k2), matStride, actual_kc, size);
|
||||
|
||||
for(Index i2=0; i2<size; i2+=mc)
|
||||
{
|
||||
@@ -109,15 +113,15 @@ struct ei_selfadjoint_product<Scalar, Index, MatStorageOrder, ColMajor, AAT, UpL
|
||||
// 2 - the actual_mc x actual_mc symmetric block => processed with a special kernel
|
||||
// 3 - after the diagonal => processed with gebp or skipped
|
||||
if (UpLo==Lower)
|
||||
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, std::min(size,i2),
|
||||
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, std::min(size,i2), alpha,
|
||||
-1, -1, 0, 0, allocatedBlockB);
|
||||
|
||||
sybb(res+resStride*i2 + i2, resStride, blockA, blockB + actual_kc*i2, actual_mc, actual_kc, allocatedBlockB);
|
||||
sybb(res+resStride*i2 + i2, resStride, blockA, blockB + actual_kc*i2, actual_mc, actual_kc, alpha, allocatedBlockB);
|
||||
|
||||
if (UpLo==Upper)
|
||||
{
|
||||
Index j2 = i2+actual_mc;
|
||||
gebp_kernel(res+resStride*j2+i2, resStride, blockA, blockB+actual_kc*j2, actual_mc, actual_kc, std::max(Index(0),size-j2),
|
||||
gebp_kernel(res+resStride*j2+i2, resStride, blockA, blockB+actual_kc*j2, actual_mc, actual_kc, std::max(Index(0), size-j2), alpha,
|
||||
-1, -1, 0, 0, allocatedBlockB);
|
||||
}
|
||||
}
|
||||
@@ -163,16 +167,16 @@ SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
|
||||
// while the selfadjoint block overlapping the diagonal is evaluated into a
|
||||
// small temporary buffer which is then accumulated into the result using a
|
||||
// triangular traversal.
|
||||
template<typename Scalar, typename Index, int mr, int nr, typename Conj, int UpLo>
|
||||
template<typename Scalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int UpLo>
|
||||
struct ei_sybb_kernel
|
||||
{
|
||||
enum {
|
||||
PacketSize = ei_packet_traits<Scalar>::size,
|
||||
BlockSize = EIGEN_PLAIN_ENUM_MAX(mr,nr)
|
||||
};
|
||||
void operator()(Scalar* res, Index resStride, const Scalar* blockA, const Scalar* blockB, Index size, Index depth, Scalar* workspace)
|
||||
void operator()(Scalar* res, Index resStride, const Scalar* blockA, const Scalar* blockB, Index size, Index depth, Scalar alpha, Scalar* workspace)
|
||||
{
|
||||
ei_gebp_kernel<Scalar, Index, mr, nr, Conj> gebp_kernel;
|
||||
ei_gebp_kernel<Scalar, Scalar, Index, mr, nr, ConjLhs, ConjRhs> gebp_kernel;
|
||||
Matrix<Scalar,BlockSize,BlockSize,ColMajor> buffer;
|
||||
|
||||
// let's process the block per panel of actual_mc x BlockSize,
|
||||
@@ -183,14 +187,15 @@ struct ei_sybb_kernel
|
||||
const Scalar* actual_b = blockB+j*depth;
|
||||
|
||||
if(UpLo==Upper)
|
||||
gebp_kernel(res+j*resStride, resStride, blockA, actual_b, j, depth, actualBlockSize, -1, -1, 0, 0, workspace);
|
||||
gebp_kernel(res+j*resStride, resStride, blockA, actual_b, j, depth, actualBlockSize, alpha,
|
||||
-1, -1, 0, 0, workspace);
|
||||
|
||||
// selfadjoint micro block
|
||||
{
|
||||
Index i = j;
|
||||
buffer.setZero();
|
||||
// 1 - apply the kernel on the temporary buffer
|
||||
gebp_kernel(buffer.data(), BlockSize, blockA+depth*i, actual_b, actualBlockSize, depth, actualBlockSize,
|
||||
gebp_kernel(buffer.data(), BlockSize, blockA+depth*i, actual_b, actualBlockSize, depth, actualBlockSize, alpha,
|
||||
-1, -1, 0, 0, workspace);
|
||||
// 2 - triangular accumulation
|
||||
for(Index j1=0; j1<actualBlockSize; ++j1)
|
||||
@@ -205,7 +210,7 @@ struct ei_sybb_kernel
|
||||
if(UpLo==Lower)
|
||||
{
|
||||
Index i = j+actualBlockSize;
|
||||
gebp_kernel(res+j*resStride+i, resStride, blockA+depth*i, actual_b, size-i, depth, actualBlockSize,
|
||||
gebp_kernel(res+j*resStride+i, resStride, blockA+depth*i, actual_b, size-i, depth, actualBlockSize, alpha,
|
||||
-1, -1, 0, 0, workspace);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -75,7 +75,7 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
|
||||
Scalar alpha)
|
||||
{
|
||||
ei_product_triangular_matrix_matrix<Scalar, Index,
|
||||
(Mode&UnitDiag) | ((Mode&Upper) ? Lower : Upper),
|
||||
(Mode&(UnitDiag|ZeroDiag)) | ((Mode&Upper) ? Lower : Upper),
|
||||
(!LhsIsTriangular),
|
||||
RhsStorageOrder==RowMajor ? ColMajor : RowMajor,
|
||||
ConjugateRhs,
|
||||
@@ -105,13 +105,11 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,true,
|
||||
ei_const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
|
||||
ei_const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
|
||||
|
||||
if (ConjugateRhs)
|
||||
alpha = ei_conj(alpha);
|
||||
|
||||
typedef ei_product_blocking_traits<Scalar> Blocking;
|
||||
typedef ei_gebp_traits<Scalar,Scalar> Traits;
|
||||
enum {
|
||||
SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Blocking::mr,Blocking::nr),
|
||||
IsLower = (Mode&Lower) == Lower
|
||||
SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
|
||||
IsLower = (Mode&Lower) == Lower,
|
||||
SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1
|
||||
};
|
||||
|
||||
Index kc = depth; // cache block size along the K direction
|
||||
@@ -120,18 +118,21 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,true,
|
||||
computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc);
|
||||
|
||||
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
|
||||
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols;
|
||||
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
|
||||
std::size_t sizeB = sizeW + kc*cols;
|
||||
Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB);
|
||||
// Scalar* allocatedBlockB = new Scalar[sizeB];
|
||||
Scalar* blockB = allocatedBlockB + kc*Blocking::PacketSize*Blocking::nr;
|
||||
Scalar* blockB = allocatedBlockB + sizeW;
|
||||
|
||||
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer;
|
||||
triangularBuffer.setZero();
|
||||
triangularBuffer.diagonal().setOnes();
|
||||
if((Mode&ZeroDiag)==ZeroDiag)
|
||||
triangularBuffer.diagonal().setZero();
|
||||
else
|
||||
triangularBuffer.diagonal().setOnes();
|
||||
|
||||
ei_gebp_kernel<Scalar, Index, Blocking::mr, Blocking::nr, ei_conj_helper<ConjugateLhs,ConjugateRhs> > gebp_kernel;
|
||||
ei_gemm_pack_lhs<Scalar, Index, Blocking::mr,LhsStorageOrder> pack_lhs;
|
||||
ei_gemm_pack_rhs<Scalar, Index, Blocking::nr,RhsStorageOrder> pack_rhs;
|
||||
ei_gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
|
||||
ei_gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
|
||||
ei_gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
|
||||
|
||||
for(Index k2=IsLower ? depth : 0;
|
||||
IsLower ? k2>0 : k2<depth;
|
||||
@@ -147,7 +148,7 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,true,
|
||||
k2 = k2+actual_kc-kc;
|
||||
}
|
||||
|
||||
pack_rhs(blockB, &rhs(actual_k2,0), rhsStride, alpha, actual_kc, cols);
|
||||
pack_rhs(blockB, &rhs(actual_k2,0), rhsStride, actual_kc, cols);
|
||||
|
||||
// the selected lhs's panel has to be split in three different parts:
|
||||
// 1 - the part which is above the diagonal block => skip it
|
||||
@@ -169,14 +170,14 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,true,
|
||||
// To this end we do an extra triangular copy to a small temporary buffer
|
||||
for (Index k=0;k<actualPanelWidth;++k)
|
||||
{
|
||||
if (!(Mode&UnitDiag))
|
||||
if (SetDiag)
|
||||
triangularBuffer.coeffRef(k,k) = lhs(startBlock+k,startBlock+k);
|
||||
for (Index i=IsLower ? k+1 : 0; IsLower ? i<actualPanelWidth : i<k; ++i)
|
||||
triangularBuffer.coeffRef(i,k) = lhs(startBlock+i,startBlock+k);
|
||||
}
|
||||
pack_lhs(blockA, triangularBuffer.data(), triangularBuffer.outerStride(), actualPanelWidth, actualPanelWidth);
|
||||
|
||||
gebp_kernel(res+startBlock, resStride, blockA, blockB, actualPanelWidth, actualPanelWidth, cols,
|
||||
gebp_kernel(res+startBlock, resStride, blockA, blockB, actualPanelWidth, actualPanelWidth, cols, alpha,
|
||||
actualPanelWidth, actual_kc, 0, blockBOffset);
|
||||
|
||||
// GEBP with remaining micro panel
|
||||
@@ -186,7 +187,7 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,true,
|
||||
|
||||
pack_lhs(blockA, &lhs(startTarget,startBlock), lhsStride, actualPanelWidth, lengthTarget);
|
||||
|
||||
gebp_kernel(res+startTarget, resStride, blockA, blockB, lengthTarget, actualPanelWidth, cols,
|
||||
gebp_kernel(res+startTarget, resStride, blockA, blockB, lengthTarget, actualPanelWidth, cols, alpha,
|
||||
actualPanelWidth, actual_kc, 0, blockBOffset);
|
||||
}
|
||||
}
|
||||
@@ -198,10 +199,10 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,true,
|
||||
for(Index i2=start; i2<end; i2+=mc)
|
||||
{
|
||||
const Index actual_mc = std::min(i2+mc,end)-i2;
|
||||
ei_gemm_pack_lhs<Scalar, Index, Blocking::mr,LhsStorageOrder,false>()
|
||||
ei_gemm_pack_lhs<Scalar, Index, Traits::mr,Traits::LhsProgress, LhsStorageOrder,false>()
|
||||
(blockA, &lhs(i2, actual_k2), lhsStride, actual_kc, actual_mc);
|
||||
|
||||
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols);
|
||||
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -231,13 +232,11 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,false,
|
||||
ei_const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
|
||||
ei_const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
|
||||
|
||||
if (ConjugateRhs)
|
||||
alpha = ei_conj(alpha);
|
||||
|
||||
typedef ei_product_blocking_traits<Scalar> Blocking;
|
||||
typedef ei_gebp_traits<Scalar,Scalar> Traits;
|
||||
enum {
|
||||
SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Blocking::mr,Blocking::nr),
|
||||
IsLower = (Mode&Lower) == Lower
|
||||
SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
|
||||
IsLower = (Mode&Lower) == Lower,
|
||||
SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1
|
||||
};
|
||||
|
||||
Index kc = depth; // cache block size along the K direction
|
||||
@@ -246,18 +245,22 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,false,
|
||||
computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc);
|
||||
|
||||
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
|
||||
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols;
|
||||
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
|
||||
std::size_t sizeB = sizeW + kc*cols;
|
||||
Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar,sizeB);
|
||||
Scalar* blockB = allocatedBlockB + kc*Blocking::PacketSize*Blocking::nr;
|
||||
Scalar* blockB = allocatedBlockB + sizeW;
|
||||
|
||||
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer;
|
||||
triangularBuffer.setZero();
|
||||
triangularBuffer.diagonal().setOnes();
|
||||
if((Mode&ZeroDiag)==ZeroDiag)
|
||||
triangularBuffer.diagonal().setZero();
|
||||
else
|
||||
triangularBuffer.diagonal().setOnes();
|
||||
|
||||
ei_gebp_kernel<Scalar, Index, Blocking::mr, Blocking::nr, ei_conj_helper<ConjugateLhs,ConjugateRhs> > gebp_kernel;
|
||||
ei_gemm_pack_lhs<Scalar, Index, Blocking::mr,LhsStorageOrder> pack_lhs;
|
||||
ei_gemm_pack_rhs<Scalar, Index, Blocking::nr,RhsStorageOrder> pack_rhs;
|
||||
ei_gemm_pack_rhs<Scalar, Index, Blocking::nr,RhsStorageOrder,true> pack_rhs_panel;
|
||||
ei_gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
|
||||
ei_gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
|
||||
ei_gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
|
||||
ei_gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder,false,true> pack_rhs_panel;
|
||||
|
||||
for(Index k2=IsLower ? 0 : depth;
|
||||
IsLower ? k2<depth : k2>0;
|
||||
@@ -280,7 +283,7 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,false,
|
||||
|
||||
Scalar* geb = blockB+ts*ts;
|
||||
|
||||
pack_rhs(geb, &rhs(actual_k2,IsLower ? 0 : k2), rhsStride, alpha, actual_kc, rs);
|
||||
pack_rhs(geb, &rhs(actual_k2,IsLower ? 0 : k2), rhsStride, actual_kc, rs);
|
||||
|
||||
// pack the triangular part of the rhs padding the unrolled blocks with zeros
|
||||
if(ts>0)
|
||||
@@ -293,21 +296,21 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,false,
|
||||
Index panelLength = IsLower ? actual_kc-j2-actualPanelWidth : j2;
|
||||
// general part
|
||||
pack_rhs_panel(blockB+j2*actual_kc,
|
||||
&rhs(actual_k2+panelOffset, actual_j2), rhsStride, alpha,
|
||||
&rhs(actual_k2+panelOffset, actual_j2), rhsStride,
|
||||
panelLength, actualPanelWidth,
|
||||
actual_kc, panelOffset);
|
||||
|
||||
// append the triangular part via a temporary buffer
|
||||
for (Index j=0;j<actualPanelWidth;++j)
|
||||
{
|
||||
if (!(Mode&UnitDiag))
|
||||
if (SetDiag)
|
||||
triangularBuffer.coeffRef(j,j) = rhs(actual_j2+j,actual_j2+j);
|
||||
for (Index k=IsLower ? j+1 : 0; IsLower ? k<actualPanelWidth : k<j; ++k)
|
||||
triangularBuffer.coeffRef(k,j) = rhs(actual_j2+k,actual_j2+j);
|
||||
}
|
||||
|
||||
pack_rhs_panel(blockB+j2*actual_kc,
|
||||
triangularBuffer.data(), triangularBuffer.outerStride(), alpha,
|
||||
triangularBuffer.data(), triangularBuffer.outerStride(),
|
||||
actualPanelWidth, actualPanelWidth,
|
||||
actual_kc, j2);
|
||||
}
|
||||
@@ -330,6 +333,7 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,false,
|
||||
gebp_kernel(res+i2+(actual_k2+j2)*resStride, resStride,
|
||||
blockA, blockB+j2*actual_kc,
|
||||
actual_mc, panelLength, actualPanelWidth,
|
||||
alpha,
|
||||
actual_kc, actual_kc, // strides
|
||||
blockOffset, blockOffset,// offsets
|
||||
allocatedBlockB); // workspace
|
||||
@@ -337,6 +341,7 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,false,
|
||||
}
|
||||
gebp_kernel(res+i2+(IsLower ? 0 : k2)*resStride, resStride,
|
||||
blockA, geb, actual_mc, actual_kc, rs,
|
||||
alpha,
|
||||
-1, -1, 0, 0, allocatedBlockB);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -76,12 +76,11 @@ struct ei_product_triangular_vector_selector<true,Lhs,Rhs,Result,Mode,ConjLhs,Co
|
||||
if (r>0)
|
||||
{
|
||||
Index s = IsLower ? pi+actualPanelWidth : 0;
|
||||
ei_cache_friendly_product_colmajor_times_vector<ConjLhs,ConjRhs>(
|
||||
r,
|
||||
ei_general_matrix_vector_product<Index,Scalar,ColMajor,ConjLhs,Scalar,ConjRhs>::run(
|
||||
r, actualPanelWidth,
|
||||
&(lhs.const_cast_derived().coeffRef(s,pi)), lhs.outerStride(),
|
||||
rhs.segment(pi, actualPanelWidth),
|
||||
&(res.coeffRef(s)),
|
||||
alpha);
|
||||
&rhs.coeff(pi), rhs.innerStride(),
|
||||
&res.coeffRef(s), res.innerStride(), alpha);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -119,11 +118,11 @@ struct ei_product_triangular_vector_selector<true,Lhs,Rhs,Result,Mode,ConjLhs,Co
|
||||
if (r>0)
|
||||
{
|
||||
Index s = IsLower ? 0 : pi + actualPanelWidth;
|
||||
Block<Result,Dynamic,1> target(res,pi,0,actualPanelWidth,1);
|
||||
ei_cache_friendly_product_rowmajor_times_vector<ConjLhs,ConjRhs>(
|
||||
ei_general_matrix_vector_product<Index,Scalar,RowMajor,ConjLhs,Scalar,ConjRhs>::run(
|
||||
actualPanelWidth, r,
|
||||
&(lhs.const_cast_derived().coeffRef(pi,s)), lhs.outerStride(),
|
||||
&(rhs.const_cast_derived().coeffRef(s)), r,
|
||||
target, alpha);
|
||||
&(rhs.const_cast_derived().coeffRef(s)), 1,
|
||||
&res.coeffRef(pi,0), res.innerStride(), alpha);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -57,9 +57,9 @@ struct ei_triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStora
|
||||
ei_const_blas_data_mapper<Scalar, Index, TriStorageOrder> tri(_tri,triStride);
|
||||
ei_blas_data_mapper<Scalar, Index, ColMajor> other(_other,otherStride);
|
||||
|
||||
typedef ei_product_blocking_traits<Scalar> Blocking;
|
||||
typedef ei_gebp_traits<Scalar,Scalar> Traits;
|
||||
enum {
|
||||
SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Blocking::mr,Blocking::nr),
|
||||
SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
|
||||
IsLower = (Mode&Lower) == Lower
|
||||
};
|
||||
|
||||
@@ -69,14 +69,15 @@ struct ei_triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStora
|
||||
computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc);
|
||||
|
||||
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
|
||||
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols;
|
||||
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
|
||||
std::size_t sizeB = sizeW + kc*cols;
|
||||
Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB);
|
||||
Scalar* blockB = allocatedBlockB + kc*Blocking::PacketSize*Blocking::nr;
|
||||
Scalar* blockB = allocatedBlockB + sizeW;
|
||||
|
||||
ei_conj_if<Conjugate> conj;
|
||||
ei_gebp_kernel<Scalar, Index, Blocking::mr, Blocking::nr, ei_conj_helper<Conjugate,false> > gebp_kernel;
|
||||
ei_gemm_pack_lhs<Scalar, Index, Blocking::mr,TriStorageOrder> pack_lhs;
|
||||
ei_gemm_pack_rhs<Scalar, Index, Blocking::nr, ColMajor, true> pack_rhs;
|
||||
ei_gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, Conjugate, false> gebp_kernel;
|
||||
ei_gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, TriStorageOrder> pack_lhs;
|
||||
ei_gemm_pack_rhs<Scalar, Index, Traits::nr, ColMajor, false, true> pack_rhs;
|
||||
|
||||
for(Index k2=IsLower ? 0 : size;
|
||||
IsLower ? k2<size : k2>0;
|
||||
@@ -140,7 +141,7 @@ struct ei_triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStora
|
||||
Index blockBOffset = IsLower ? k1 : lengthTarget;
|
||||
|
||||
// update the respective rows of B from other
|
||||
pack_rhs(blockB, _other+startBlock, otherStride, -1, actualPanelWidth, cols, actual_kc, blockBOffset);
|
||||
pack_rhs(blockB, _other+startBlock, otherStride, actualPanelWidth, cols, actual_kc, blockBOffset);
|
||||
|
||||
// GEBP
|
||||
if (lengthTarget>0)
|
||||
@@ -149,7 +150,7 @@ struct ei_triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStora
|
||||
|
||||
pack_lhs(blockA, &tri(startTarget,startBlock), triStride, actualPanelWidth, lengthTarget);
|
||||
|
||||
gebp_kernel(_other+startTarget, otherStride, blockA, blockB, lengthTarget, actualPanelWidth, cols,
|
||||
gebp_kernel(_other+startTarget, otherStride, blockA, blockB, lengthTarget, actualPanelWidth, cols, Scalar(-1),
|
||||
actualPanelWidth, actual_kc, 0, blockBOffset);
|
||||
}
|
||||
}
|
||||
@@ -166,7 +167,7 @@ struct ei_triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStora
|
||||
{
|
||||
pack_lhs(blockA, &tri(i2, IsLower ? k2 : k2-kc), triStride, actual_kc, actual_mc);
|
||||
|
||||
gebp_kernel(_other+i2, otherStride, blockA, blockB, actual_mc, actual_kc, cols);
|
||||
gebp_kernel(_other+i2, otherStride, blockA, blockB, actual_mc, actual_kc, cols, Scalar(-1));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -191,15 +192,15 @@ struct ei_triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStor
|
||||
ei_const_blas_data_mapper<Scalar, Index, TriStorageOrder> rhs(_tri,triStride);
|
||||
ei_blas_data_mapper<Scalar, Index, ColMajor> lhs(_other,otherStride);
|
||||
|
||||
typedef ei_product_blocking_traits<Scalar> Blocking;
|
||||
typedef ei_gebp_traits<Scalar,Scalar> Traits;
|
||||
enum {
|
||||
RhsStorageOrder = TriStorageOrder,
|
||||
SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Blocking::mr,Blocking::nr),
|
||||
SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
|
||||
IsLower = (Mode&Lower) == Lower
|
||||
};
|
||||
|
||||
// Index kc = std::min<Index>(Blocking::Max_kc/4,size); // cache block size along the K direction
|
||||
// Index mc = std::min<Index>(Blocking::Max_mc,size); // cache block size along the M direction
|
||||
// Index kc = std::min<Index>(Traits::Max_kc/4,size); // cache block size along the K direction
|
||||
// Index mc = std::min<Index>(Traits::Max_mc,size); // cache block size along the M direction
|
||||
// check that !!!!
|
||||
Index kc = size; // cache block size along the K direction
|
||||
Index mc = size; // cache block size along the M direction
|
||||
@@ -207,15 +208,16 @@ struct ei_triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStor
|
||||
computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc);
|
||||
|
||||
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
|
||||
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*size;
|
||||
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
|
||||
std::size_t sizeB = sizeW + kc*size;
|
||||
Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB);
|
||||
Scalar* blockB = allocatedBlockB + kc*Blocking::PacketSize*Blocking::nr;
|
||||
Scalar* blockB = allocatedBlockB + sizeW;
|
||||
|
||||
ei_conj_if<Conjugate> conj;
|
||||
ei_gebp_kernel<Scalar, Index, Blocking::mr, Blocking::nr, ei_conj_helper<false,Conjugate> > gebp_kernel;
|
||||
ei_gemm_pack_rhs<Scalar, Index, Blocking::nr,RhsStorageOrder> pack_rhs;
|
||||
ei_gemm_pack_rhs<Scalar, Index, Blocking::nr,RhsStorageOrder,true> pack_rhs_panel;
|
||||
ei_gemm_pack_lhs<Scalar, Index, Blocking::mr, ColMajor, false, true> pack_lhs_panel;
|
||||
ei_gebp_kernel<Scalar,Scalar, Index, Traits::mr, Traits::nr, false, Conjugate> gebp_kernel;
|
||||
ei_gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
|
||||
ei_gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder,false,true> pack_rhs_panel;
|
||||
ei_gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, ColMajor, false, true> pack_lhs_panel;
|
||||
|
||||
for(Index k2=IsLower ? size : 0;
|
||||
IsLower ? k2>0 : k2<size;
|
||||
@@ -228,7 +230,7 @@ struct ei_triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStor
|
||||
Index rs = IsLower ? actual_k2 : size - actual_k2 - actual_kc;
|
||||
Scalar* geb = blockB+actual_kc*actual_kc;
|
||||
|
||||
if (rs>0) pack_rhs(geb, &rhs(actual_k2,startPanel), triStride, -1, actual_kc, rs);
|
||||
if (rs>0) pack_rhs(geb, &rhs(actual_k2,startPanel), triStride, actual_kc, rs);
|
||||
|
||||
// triangular packing (we only pack the panels off the diagonal,
|
||||
// neglecting the blocks overlapping the diagonal
|
||||
@@ -242,7 +244,7 @@ struct ei_triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStor
|
||||
|
||||
if (panelLength>0)
|
||||
pack_rhs_panel(blockB+j2*actual_kc,
|
||||
&rhs(actual_k2+panelOffset, actual_j2), triStride, -1,
|
||||
&rhs(actual_k2+panelOffset, actual_j2), triStride,
|
||||
panelLength, actualPanelWidth,
|
||||
actual_kc, panelOffset);
|
||||
}
|
||||
@@ -273,6 +275,7 @@ struct ei_triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStor
|
||||
gebp_kernel(&lhs(i2,absolute_j2), otherStride,
|
||||
blockA, blockB+j2*actual_kc,
|
||||
actual_mc, panelLength, actualPanelWidth,
|
||||
Scalar(-1),
|
||||
actual_kc, actual_kc, // strides
|
||||
panelOffset, panelOffset, // offsets
|
||||
allocatedBlockB); // workspace
|
||||
@@ -305,7 +308,7 @@ struct ei_triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStor
|
||||
|
||||
if (rs>0)
|
||||
gebp_kernel(_other+i2+startPanel*otherStride, otherStride, blockA, geb,
|
||||
actual_mc, actual_kc, rs,
|
||||
actual_mc, actual_kc, rs, Scalar(-1),
|
||||
-1, -1, 0, 0, allocatedBlockB);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -29,70 +29,98 @@
|
||||
// implement and control fast level 2 and level 3 BLAS-like routines.
|
||||
|
||||
// forward declarations
|
||||
template<typename Scalar, typename Index, int mr, int nr, typename Conj>
|
||||
template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjugateLhs=false, bool ConjugateRhs=false>
|
||||
struct ei_gebp_kernel;
|
||||
|
||||
template<typename Scalar, typename Index, int nr, int StorageOrder, bool PanelMode=false>
|
||||
template<typename Scalar, typename Index, int nr, int StorageOrder, bool Conjugate = false, bool PanelMode=false>
|
||||
struct ei_gemm_pack_rhs;
|
||||
|
||||
template<typename Scalar, typename Index, int mr, int StorageOrder, bool Conjugate = false, bool PanelMode = false>
|
||||
template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder, bool Conjugate = false, bool PanelMode = false>
|
||||
struct ei_gemm_pack_lhs;
|
||||
|
||||
template<
|
||||
typename Scalar, typename Index,
|
||||
int LhsStorageOrder, bool ConjugateLhs,
|
||||
int RhsStorageOrder, bool ConjugateRhs,
|
||||
typename Index,
|
||||
typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
|
||||
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
|
||||
int ResStorageOrder>
|
||||
struct ei_general_matrix_matrix_product;
|
||||
|
||||
template<bool ConjugateLhs, bool ConjugateRhs, typename Scalar, typename Index, typename RhsType>
|
||||
static void ei_cache_friendly_product_colmajor_times_vector(
|
||||
Index size, const Scalar* lhs, Index lhsStride, const RhsType& rhs, Scalar* res, Scalar alpha);
|
||||
template<typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
|
||||
struct ei_general_matrix_vector_product;
|
||||
|
||||
template<bool ConjugateLhs, bool ConjugateRhs, typename Scalar, typename Index, typename ResType>
|
||||
static void ei_cache_friendly_product_rowmajor_times_vector(
|
||||
const Scalar* lhs, Index lhsStride, const Scalar* rhs, Index rhsSize, ResType& res, Scalar alpha);
|
||||
|
||||
// Provides scalar/packet-wise product and product with accumulation
|
||||
// with optional conjugation of the arguments.
|
||||
template<bool ConjLhs, bool ConjRhs> struct ei_conj_helper;
|
||||
template<bool Conjugate> struct ei_conj_if;
|
||||
|
||||
template<> struct ei_conj_helper<false,false>
|
||||
{
|
||||
template<> struct ei_conj_if<true> {
|
||||
template<typename T>
|
||||
EIGEN_STRONG_INLINE T pmadd(const T& x, const T& y, const T& c) const { return ei_pmadd(x,y,c); }
|
||||
inline T operator()(const T& x) { return ei_conj(x); }
|
||||
};
|
||||
|
||||
template<> struct ei_conj_if<false> {
|
||||
template<typename T>
|
||||
EIGEN_STRONG_INLINE T pmul(const T& x, const T& y) const { return ei_pmul(x,y); }
|
||||
inline const T& operator()(const T& x) { return x; }
|
||||
};
|
||||
|
||||
template<> struct ei_conj_helper<false,true>
|
||||
template<typename Scalar> struct ei_conj_helper<Scalar,Scalar,false,false>
|
||||
{
|
||||
template<typename T> std::complex<T>
|
||||
pmadd(const std::complex<T>& x, const std::complex<T>& y, const std::complex<T>& c) const
|
||||
{ return c + pmul(x,y); }
|
||||
|
||||
template<typename T> std::complex<T> pmul(const std::complex<T>& x, const std::complex<T>& y) const
|
||||
{ return std::complex<T>(ei_real(x)*ei_real(y) + ei_imag(x)*ei_imag(y), ei_imag(x)*ei_real(y) - ei_real(x)*ei_imag(y)); }
|
||||
EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const { return ei_pmadd(x,y,c); }
|
||||
EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const { return ei_pmul(x,y); }
|
||||
};
|
||||
|
||||
template<> struct ei_conj_helper<true,false>
|
||||
template<typename RealScalar> struct ei_conj_helper<std::complex<RealScalar>, std::complex<RealScalar>, false,true>
|
||||
{
|
||||
template<typename T> std::complex<T>
|
||||
pmadd(const std::complex<T>& x, const std::complex<T>& y, const std::complex<T>& c) const
|
||||
typedef std::complex<RealScalar> Scalar;
|
||||
EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const
|
||||
{ return c + pmul(x,y); }
|
||||
|
||||
template<typename T> std::complex<T> pmul(const std::complex<T>& x, const std::complex<T>& y) const
|
||||
{ return std::complex<T>(ei_real(x)*ei_real(y) + ei_imag(x)*ei_imag(y), ei_real(x)*ei_imag(y) - ei_imag(x)*ei_real(y)); }
|
||||
EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const
|
||||
{ return Scalar(ei_real(x)*ei_real(y) + ei_imag(x)*ei_imag(y), ei_imag(x)*ei_real(y) - ei_real(x)*ei_imag(y)); }
|
||||
};
|
||||
|
||||
template<> struct ei_conj_helper<true,true>
|
||||
template<typename RealScalar> struct ei_conj_helper<std::complex<RealScalar>, std::complex<RealScalar>, true,false>
|
||||
{
|
||||
template<typename T> std::complex<T>
|
||||
pmadd(const std::complex<T>& x, const std::complex<T>& y, const std::complex<T>& c) const
|
||||
typedef std::complex<RealScalar> Scalar;
|
||||
EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const
|
||||
{ return c + pmul(x,y); }
|
||||
|
||||
template<typename T> std::complex<T> pmul(const std::complex<T>& x, const std::complex<T>& y) const
|
||||
{ return std::complex<T>(ei_real(x)*ei_real(y) - ei_imag(x)*ei_imag(y), - ei_real(x)*ei_imag(y) - ei_imag(x)*ei_real(y)); }
|
||||
EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const
|
||||
{ return Scalar(ei_real(x)*ei_real(y) + ei_imag(x)*ei_imag(y), ei_real(x)*ei_imag(y) - ei_imag(x)*ei_real(y)); }
|
||||
};
|
||||
|
||||
template<typename RealScalar> struct ei_conj_helper<std::complex<RealScalar>, std::complex<RealScalar>, true,true>
|
||||
{
|
||||
typedef std::complex<RealScalar> Scalar;
|
||||
EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const
|
||||
{ return c + pmul(x,y); }
|
||||
|
||||
EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const
|
||||
{ return Scalar(ei_real(x)*ei_real(y) - ei_imag(x)*ei_imag(y), - ei_real(x)*ei_imag(y) - ei_imag(x)*ei_real(y)); }
|
||||
};
|
||||
|
||||
template<typename RealScalar,bool Conj> struct ei_conj_helper<std::complex<RealScalar>, RealScalar, Conj,false>
|
||||
{
|
||||
typedef std::complex<RealScalar> Scalar;
|
||||
EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const RealScalar& y, const Scalar& c) const
|
||||
{ return ei_padd(c, pmul(x,y)); }
|
||||
EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const RealScalar& y) const
|
||||
{ return ei_conj_if<Conj>()(x)*y; }
|
||||
};
|
||||
|
||||
template<typename RealScalar,bool Conj> struct ei_conj_helper<RealScalar, std::complex<RealScalar>, false,Conj>
|
||||
{
|
||||
typedef std::complex<RealScalar> Scalar;
|
||||
EIGEN_STRONG_INLINE Scalar pmadd(const RealScalar& x, const Scalar& y, const Scalar& c) const
|
||||
{ return ei_padd(c, pmul(x,y)); }
|
||||
EIGEN_STRONG_INLINE Scalar pmul(const RealScalar& x, const Scalar& y) const
|
||||
{ return x*ei_conj_if<Conj>()(y); }
|
||||
};
|
||||
|
||||
template<typename From,typename To> struct ei_get_factor {
|
||||
EIGEN_STRONG_INLINE static To run(const From& x) { return x; }
|
||||
};
|
||||
|
||||
template<typename Scalar> struct ei_get_factor<Scalar,typename NumTraits<Scalar>::Real> {
|
||||
EIGEN_STRONG_INLINE static typename NumTraits<Scalar>::Real run(const Scalar& x) { return ei_real(x); }
|
||||
};
|
||||
|
||||
// Lightweight helper class to access matrix coefficients.
|
||||
@@ -123,22 +151,6 @@ class ei_const_blas_data_mapper
|
||||
Index m_stride;
|
||||
};
|
||||
|
||||
// Defines various constant controlling register blocking for matrix-matrix algorithms.
|
||||
template<typename Scalar>
|
||||
struct ei_product_blocking_traits
|
||||
{
|
||||
typedef typename ei_packet_traits<Scalar>::type PacketType;
|
||||
enum {
|
||||
PacketSize = sizeof(PacketType)/sizeof(Scalar),
|
||||
NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
|
||||
|
||||
// register block size along the N direction (must be either 2 or 4)
|
||||
nr = NumberOfRegisters/4,
|
||||
|
||||
// register block size along the M direction (currently, this one cannot be modified)
|
||||
mr = 2 * PacketSize
|
||||
};
|
||||
};
|
||||
|
||||
/* Helper class to analyze the factors of a Product expression.
|
||||
* In particular it allows to pop out operator-, scalar multiples,
|
||||
|
||||
@@ -38,7 +38,7 @@ const int Dynamic = -1;
|
||||
*/
|
||||
const int Infinity = -1;
|
||||
|
||||
/** \defgroup flags flags
|
||||
/** \defgroup flags Flags
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* These are the possible bits which can be OR'ed to constitute the flags of a matrix or
|
||||
@@ -139,6 +139,14 @@ const unsigned int DirectAccessBit = 0x20;
|
||||
* means the first coefficient packet is guaranteed to be aligned */
|
||||
const unsigned int AlignedBit = 0x40;
|
||||
|
||||
/** \ingroup flags
|
||||
*
|
||||
* Means the expression is writable. Note that DirectAccessBit implies LvalueBit.
|
||||
* Internaly, it is mainly used to enable the writable coeff accessors, and makes
|
||||
* the read-only coeff accessors to return by const reference.
|
||||
*/
|
||||
const unsigned int LvalueBit = 0x80;
|
||||
|
||||
const unsigned int NestByRefBit = 0x100;
|
||||
|
||||
// list of flags that are inherited by default
|
||||
@@ -176,7 +184,9 @@ enum {
|
||||
LinearVectorizedTraversal,
|
||||
/** \internal Generic vectorization path using one vectorized loop per row/column with some
|
||||
* scalar loops to handle the unaligned boundaries */
|
||||
SliceVectorizedTraversal
|
||||
SliceVectorizedTraversal,
|
||||
/** \internal Special case to properly handle incompatible scalar types or other defecting cases*/
|
||||
InvalidTraversal
|
||||
};
|
||||
|
||||
enum {
|
||||
@@ -199,15 +209,6 @@ enum {
|
||||
OnTheRight = 2
|
||||
};
|
||||
|
||||
// options for SVD decomposition
|
||||
enum {
|
||||
SkipU = 0x1,
|
||||
SkipV = 0x2,
|
||||
AtLeastAsManyRowsAsCols = 0x4,
|
||||
AtLeastAsManyColsAsRows = 0x8,
|
||||
Square = AtLeastAsManyRowsAsCols | AtLeastAsManyColsAsRows
|
||||
};
|
||||
|
||||
/* the following could as well be written:
|
||||
* enum NoChange_t { NoChange };
|
||||
* but it feels dangerous to disambiguate overloaded functions on enum/integer types.
|
||||
@@ -234,13 +235,19 @@ enum {
|
||||
IsSparse
|
||||
};
|
||||
|
||||
enum AccessorLevels {
|
||||
ReadOnlyAccessors, WriteAccessors, DirectAccessors
|
||||
};
|
||||
|
||||
enum DecompositionOptions {
|
||||
Pivoting = 0x01, // LDLT,
|
||||
NoPivoting = 0x02, // LDLT,
|
||||
ComputeU = 0x10, // SVD,
|
||||
ComputeR = 0x20, // SVD,
|
||||
ComputeFullU = 0x04, // SVD,
|
||||
ComputeThinU = 0x08, // SVD,
|
||||
ComputeFullV = 0x10, // SVD,
|
||||
ComputeThinV = 0x20, // SVD,
|
||||
EigenvaluesOnly = 0x40, // all eigen solvers
|
||||
ComputeEigenvectors = 0x80, // all eigen solvers
|
||||
ComputeEigenvectors = 0x80, // all eigen solvers
|
||||
EigVecMask = EigenvaluesOnly | ComputeEigenvectors,
|
||||
Ax_lBx = 0x100,
|
||||
ABx_lx = 0x200,
|
||||
@@ -248,6 +255,13 @@ enum DecompositionOptions {
|
||||
GenEigMask = Ax_lBx | ABx_lx | BAx_lx
|
||||
};
|
||||
|
||||
enum QRPreconditioners {
|
||||
NoQRPreconditioner,
|
||||
HouseholderQRPreconditioner,
|
||||
ColPivHouseholderQRPreconditioner,
|
||||
FullPivHouseholderQRPreconditioner
|
||||
};
|
||||
|
||||
/** \brief Enum for reporting the status of a computation.
|
||||
*/
|
||||
enum ComputationInfo {
|
||||
|
||||
@@ -36,7 +36,10 @@ template<typename Derived> struct ei_has_direct_access
|
||||
|
||||
template<typename Derived> struct EigenBase;
|
||||
template<typename Derived> class DenseBase;
|
||||
template<typename Derived, bool EnableDirectAccessAPI = ei_has_direct_access<Derived>::ret>
|
||||
template<typename Derived,
|
||||
AccessorLevels Level = (ei_traits<Derived>::Flags & DirectAccessBit) ? DirectAccessors
|
||||
: (ei_traits<Derived>::Flags & LvalueBit) ? WriteAccessors
|
||||
: ReadOnlyAccessors>
|
||||
class DenseCoeffsBase;
|
||||
|
||||
template<typename _Scalar, int _Rows, int _Cols,
|
||||
@@ -57,7 +60,7 @@ template<typename ExpressionType> class NestByValue;
|
||||
template<typename ExpressionType> class ForceAlignedAccess;
|
||||
template<typename ExpressionType> class SwapWrapper;
|
||||
|
||||
template<typename XprType, int BlockRows=Dynamic, int BlockCols=Dynamic,
|
||||
template<typename XprType, int BlockRows=Dynamic, int BlockCols=Dynamic, bool InnerPanel = false,
|
||||
bool HasDirectAccess = ei_has_direct_access<XprType>::ret> class Block;
|
||||
|
||||
template<typename MatrixType, int Size=Dynamic> class VectorBlock;
|
||||
@@ -67,7 +70,7 @@ template<typename NullaryOp, typename MatrixType> class CwiseNullaryOp;
|
||||
template<typename UnaryOp, typename MatrixType> class CwiseUnaryOp;
|
||||
template<typename ViewOp, typename MatrixType> class CwiseUnaryView;
|
||||
template<typename BinaryOp, typename Lhs, typename Rhs> class CwiseBinaryOp;
|
||||
template<typename BinOp, typename MatrixType> class SelfCwiseBinaryOp;
|
||||
template<typename BinOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp;
|
||||
template<typename Derived, typename Lhs, typename Rhs> class ProductBase;
|
||||
template<typename Lhs, typename Rhs, int Mode> class GeneralProduct;
|
||||
template<typename Lhs, typename Rhs, int NestingFlags> class CoeffBasedProduct;
|
||||
@@ -106,9 +109,16 @@ template<typename Lhs, typename Rhs,
|
||||
int ProductType = ei_product_type<Lhs,Rhs>::value>
|
||||
struct ProductReturnType;
|
||||
|
||||
// this is a workaround for sun CC
|
||||
template<typename Lhs, typename Rhs> struct LazyProductReturnType;
|
||||
|
||||
// Provides scalar/packet-wise product and product with accumulation
|
||||
// with optional conjugation of the arguments.
|
||||
template<typename LhsScalar, typename RhsScalar, bool ConjLhs=false, bool ConjRhs=false> struct ei_conj_helper;
|
||||
|
||||
template<typename Scalar> struct ei_scalar_sum_op;
|
||||
template<typename Scalar> struct ei_scalar_difference_op;
|
||||
template<typename Scalar> struct ei_scalar_product_op;
|
||||
template<typename Scalar> struct ei_scalar_conj_product_op;
|
||||
template<typename Scalar> struct ei_scalar_quotient_op;
|
||||
template<typename Scalar> struct ei_scalar_opposite_op;
|
||||
template<typename Scalar> struct ei_scalar_conjugate_op;
|
||||
@@ -135,7 +145,8 @@ template<typename Scalar> struct ei_scalar_add_op;
|
||||
template<typename Scalar> struct ei_scalar_constant_op;
|
||||
template<typename Scalar> struct ei_scalar_identity_op;
|
||||
|
||||
template<typename Scalar1,typename Scalar2> struct ei_scalar_multiple2_op;
|
||||
template<typename LhsScalar,typename RhsScalar=LhsScalar> struct ei_scalar_product_op;
|
||||
template<typename LhsScalar,typename RhsScalar> struct ei_scalar_multiple2_op;
|
||||
|
||||
struct IOFormat;
|
||||
|
||||
@@ -158,8 +169,7 @@ template<typename MatrixType> struct ei_inverse_impl;
|
||||
template<typename MatrixType> class HouseholderQR;
|
||||
template<typename MatrixType> class ColPivHouseholderQR;
|
||||
template<typename MatrixType> class FullPivHouseholderQR;
|
||||
template<typename MatrixType> class SVD;
|
||||
template<typename MatrixType, unsigned int Options = 0> class JacobiSVD;
|
||||
template<typename MatrixType, int QRPreconditioner = ColPivHouseholderQRPreconditioner> class JacobiSVD;
|
||||
template<typename MatrixType, int UpLo = Lower> class LLT;
|
||||
template<typename MatrixType, int UpLo = Lower> class LDLT;
|
||||
template<typename VectorsType, typename CoeffsType, int Side=OnTheLeft> class HouseholderSequence;
|
||||
@@ -172,7 +182,7 @@ template<typename Derived> class QuaternionBase;
|
||||
template<typename Scalar> class Quaternion;
|
||||
template<typename Scalar> class Rotation2D;
|
||||
template<typename Scalar> class AngleAxis;
|
||||
template<typename Scalar,int Dim,int Mode=Affine> class Transform;
|
||||
template<typename Scalar,int Dim,int Mode> class Transform;
|
||||
template <typename _Scalar, int _AmbientDim> class ParametrizedLine;
|
||||
template <typename _Scalar, int _AmbientDim> class Hyperplane;
|
||||
template<typename Scalar,int Dim> class Translation;
|
||||
|
||||
@@ -27,7 +27,7 @@
|
||||
#define EIGEN_MACROS_H
|
||||
|
||||
#define EIGEN_WORLD_VERSION 2
|
||||
#define EIGEN_MAJOR_VERSION 91
|
||||
#define EIGEN_MAJOR_VERSION 92
|
||||
#define EIGEN_MINOR_VERSION 0
|
||||
|
||||
#define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
|
||||
@@ -109,6 +109,19 @@
|
||||
|
||||
#define EIGEN_DEBUG_VAR(x) std::cerr << #x << " = " << x << std::endl;
|
||||
|
||||
#ifdef EIGEN_PARSED_BY_DOXYGEN
|
||||
/** \def EIGEN_NO_DEBUG
|
||||
* \ingroup Core_Module
|
||||
* \brief If defined, Eigen's assertions are disabled.
|
||||
* \details Disabling run-time assertions improves the performance, but it is dangerous because the
|
||||
* assertions guard against programming errors. By default, the EIGEN_NO_DEBUG macro is not defined and
|
||||
* Eigen's run-time assertions are thus enabled. However, if the NDEBUG macro is defined (this is a
|
||||
* standard C++ macro which disables all asserts), then the EIGEN_NO_DEBUG macro will also be defined, and
|
||||
* so Eigen's assertions will also be disabled.
|
||||
*/
|
||||
#define EIGEN_NO_DEBUG
|
||||
#endif
|
||||
|
||||
#ifdef NDEBUG
|
||||
# ifndef EIGEN_NO_DEBUG
|
||||
# define EIGEN_NO_DEBUG
|
||||
@@ -147,6 +160,12 @@
|
||||
#define EIGEN_ALWAYS_INLINE_ATTRIB
|
||||
#endif
|
||||
|
||||
#if EIGEN_GNUC_AT_LEAST(4,1)
|
||||
#define EIGEN_FLATTEN_ATTRIB __attribute__((flatten))
|
||||
#else
|
||||
#define EIGEN_FLATTEN_ATTRIB
|
||||
#endif
|
||||
|
||||
// EIGEN_FORCE_INLINE means "inline as much as possible"
|
||||
#if (defined _MSC_VER) || (defined __intel_compiler)
|
||||
#define EIGEN_STRONG_INLINE __forceinline
|
||||
@@ -343,7 +362,7 @@
|
||||
|
||||
#define EIGEN_MAKE_CWISE_BINARY_OP(METHOD,FUNCTOR) \
|
||||
template<typename OtherDerived> \
|
||||
inline const CwiseBinaryOp<FUNCTOR<Scalar>, Derived, OtherDerived> \
|
||||
EIGEN_STRONG_INLINE const CwiseBinaryOp<FUNCTOR<Scalar>, Derived, OtherDerived> \
|
||||
METHOD(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const \
|
||||
{ \
|
||||
return CwiseBinaryOp<FUNCTOR<Scalar>, Derived, OtherDerived>(derived(), other.derived()); \
|
||||
@@ -353,10 +372,8 @@
|
||||
#define EIGEN_CWISE_PRODUCT_RETURN_TYPE(LHS,RHS) \
|
||||
CwiseBinaryOp< \
|
||||
ei_scalar_product_op< \
|
||||
typename ei_scalar_product_traits< \
|
||||
typename ei_traits<LHS>::Scalar, \
|
||||
typename ei_traits<RHS>::Scalar \
|
||||
>::ReturnType \
|
||||
>, \
|
||||
LHS, \
|
||||
RHS \
|
||||
|
||||
@@ -146,7 +146,9 @@ inline void* ei_generic_aligned_realloc(void* ptr, size_t size, size_t old_size)
|
||||
void* newptr = ei_aligned_malloc(size);
|
||||
if (newptr == 0)
|
||||
{
|
||||
#ifdef EIGEN_HAS_ERRNO
|
||||
errno = ENOMEM; // according to the standard
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -315,7 +317,8 @@ template<typename T> inline T* ei_construct_elements_of_array(T *ptr, size_t siz
|
||||
template<typename T> inline void ei_destruct_elements_of_array(T *ptr, size_t size)
|
||||
{
|
||||
// always destruct an array starting from the end.
|
||||
while(size) ptr[--size].~T();
|
||||
if(ptr)
|
||||
while(size) ptr[--size].~T();
|
||||
}
|
||||
|
||||
/*****************************************************************************
|
||||
@@ -495,6 +498,7 @@ inline static Index ei_first_aligned(const Scalar* array, Index size)
|
||||
/****************************************************************************/
|
||||
|
||||
/** \class aligned_allocator
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief STL compatible allocator to use with with 16 byte aligned types
|
||||
*
|
||||
@@ -588,17 +592,17 @@ public:
|
||||
//---------- Cache sizes ----------
|
||||
|
||||
#if defined(__GNUC__)
|
||||
# if defined(__PIC__) && defined(__i386__)
|
||||
# if defined(__PIC__) && defined(__i386__)
|
||||
# define EIGEN_CPUID(abcd,func,id) \
|
||||
__asm__ __volatile__ ("xchgl %%ebx, %%esi;cpuid; xchgl %%ebx,%%esi": "=a" (abcd[0]), "=S" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id));
|
||||
# elif !defined(__arm__) && !defined(__powerpc__)
|
||||
# elif !defined(__arm__) && !defined(__powerpc__)
|
||||
# define EIGEN_CPUID(abcd,func,id) \
|
||||
__asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id) );
|
||||
# endif
|
||||
# endif
|
||||
#elif defined(_MSC_VER)
|
||||
#if (_MSC_VER > 1500) /* newer than MSVC++ 9.0 */ || (_MSC_VER == 1500 && _MSC_FULL_VER >= 150030729) /* MSVC++ 9.0 with SP1*/
|
||||
# if (_MSC_VER > 1500) /* newer than MSVC++ 9.0 */ || (_MSC_VER == 1500 && _MSC_FULL_VER >= 150030729) /* MSVC++ 9.0 with SP1*/
|
||||
# define EIGEN_CPUID(abcd,func,id) __cpuidex((int*)abcd,func,id)
|
||||
#endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN_CPUID
|
||||
|
||||
@@ -205,10 +205,10 @@ template<typename T> struct ei_scalar_product_traits<std::complex<T>, T>
|
||||
};
|
||||
|
||||
// FIXME quick workaround around current limitation of ei_result_of
|
||||
template<typename Scalar, typename ArgType0, typename ArgType1>
|
||||
struct ei_result_of<ei_scalar_product_op<Scalar>(ArgType0,ArgType1)> {
|
||||
typedef typename ei_scalar_product_traits<typename ei_cleantype<ArgType0>::type, typename ei_cleantype<ArgType1>::type>::ReturnType type;
|
||||
};
|
||||
// template<typename Scalar, typename ArgType0, typename ArgType1>
|
||||
// struct ei_result_of<ei_scalar_product_op<Scalar>(ArgType0,ArgType1)> {
|
||||
// typedef typename ei_scalar_product_traits<typename ei_cleantype<ArgType0>::type, typename ei_cleantype<ArgType1>::type>::ReturnType type;
|
||||
// };
|
||||
|
||||
template<typename T> struct ei_is_diagonal
|
||||
{ enum { ret = false }; };
|
||||
@@ -222,16 +222,4 @@ template<typename T> struct ei_is_diagonal<DiagonalWrapper<T> >
|
||||
template<typename T, int S> struct ei_is_diagonal<DiagonalMatrix<T,S> >
|
||||
{ enum { ret = true }; };
|
||||
|
||||
template<bool Conjugate> struct ei_conj_if;
|
||||
|
||||
template<> struct ei_conj_if<true> {
|
||||
template<typename T>
|
||||
inline T operator()(const T& x) { return ei_conj(x); }
|
||||
};
|
||||
|
||||
template<> struct ei_conj_if<false> {
|
||||
template<typename T>
|
||||
inline const T& operator()(const T& x) { return x; }
|
||||
};
|
||||
|
||||
#endif // EIGEN_META_H
|
||||
|
||||
@@ -60,7 +60,9 @@
|
||||
YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES,
|
||||
THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE,
|
||||
THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE,
|
||||
THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE,
|
||||
YOU_MADE_A_PROGRAMMING_MISTAKE,
|
||||
EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT,
|
||||
EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE,
|
||||
YOU_CALLED_A_FIXED_SIZE_METHOD_ON_A_DYNAMIC_SIZE_MATRIX_OR_VECTOR,
|
||||
YOU_CALLED_A_DYNAMIC_SIZE_METHOD_ON_A_FIXED_SIZE_MATRIX_OR_VECTOR,
|
||||
@@ -85,7 +87,10 @@
|
||||
YOU_ALREADY_SPECIFIED_THIS_STRIDE,
|
||||
INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION,
|
||||
THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD,
|
||||
PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1
|
||||
PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1,
|
||||
THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS,
|
||||
YOU_CANNOT_MIX_ARRAYS_AND_MATRICES,
|
||||
YOU_PERFORMED_AN_INVALID_TRANSFORMATION_CONVERSION
|
||||
};
|
||||
};
|
||||
|
||||
@@ -95,12 +100,12 @@
|
||||
#ifdef _MSC_VER
|
||||
|
||||
#define EIGEN_STATIC_ASSERT(CONDITION,MSG) \
|
||||
{Eigen::ei_static_assert<CONDITION ? true : false>::MSG;}
|
||||
{Eigen::ei_static_assert<(CONDITION)>::MSG;}
|
||||
|
||||
#else
|
||||
|
||||
#define EIGEN_STATIC_ASSERT(CONDITION,MSG) \
|
||||
if (Eigen::ei_static_assert<CONDITION ? true : false>::MSG) {}
|
||||
if (Eigen::ei_static_assert<(CONDITION)>::MSG) {}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@@ -119,21 +119,11 @@ class ei_compute_matrix_flags
|
||||
enum {
|
||||
row_major_bit = Options&RowMajor ? RowMajorBit : 0,
|
||||
is_dynamic_size_storage = MaxRows==Dynamic || MaxCols==Dynamic,
|
||||
#if EIGEN_ALIGN_STATICALLY
|
||||
is_fixed_size_aligned
|
||||
= (!is_dynamic_size_storage) && (((MaxCols*MaxRows) % ei_packet_traits<Scalar>::size) == 0),
|
||||
#else
|
||||
is_fixed_size_aligned = 0,
|
||||
#endif
|
||||
#if EIGEN_ALIGN
|
||||
is_dynamic_size_aligned = is_dynamic_size_storage,
|
||||
#else
|
||||
is_dynamic_size_aligned = 0,
|
||||
#endif
|
||||
|
||||
aligned_bit =
|
||||
(
|
||||
((Options&DontAlign)==0)
|
||||
((Options&DontAlign)==0)
|
||||
&& ei_packet_traits<Scalar>::Vectorizable
|
||||
&& (
|
||||
#if EIGEN_ALIGN_STATICALLY
|
||||
((!is_dynamic_size_storage) && (((MaxCols*MaxRows) % ei_packet_traits<Scalar>::size) == 0))
|
||||
@@ -151,11 +141,11 @@ class ei_compute_matrix_flags
|
||||
|
||||
)
|
||||
) ? AlignedBit : 0,
|
||||
packet_access_bit = ei_packet_traits<Scalar>::size > 1 && aligned_bit ? PacketAccessBit : 0
|
||||
packet_access_bit = ei_packet_traits<Scalar>::Vectorizable && aligned_bit ? PacketAccessBit : 0
|
||||
};
|
||||
|
||||
public:
|
||||
enum { ret = LinearAccessBit | DirectAccessBit | NestByRefBit | packet_access_bit | row_major_bit | aligned_bit };
|
||||
enum { ret = LinearAccessBit | LvalueBit | DirectAccessBit | NestByRefBit | packet_access_bit | row_major_bit | aligned_bit };
|
||||
};
|
||||
|
||||
template<int _Rows, int _Cols> struct ei_size_at_compile_time
|
||||
@@ -355,7 +345,7 @@ template<typename T, int n=1, typename PlainObject = typename ei_eval<T>::type>
|
||||
|
||||
template<unsigned int Flags> struct ei_are_flags_consistent
|
||||
{
|
||||
enum { ret = true };
|
||||
enum { ret = EIGEN_IMPLIES(bool(Flags&DirectAccessBit), bool(Flags&LvalueBit)) };
|
||||
};
|
||||
|
||||
template<typename Derived, typename XprKind = typename ei_traits<Derived>::XprKind>
|
||||
|
||||
6
Eigen/src/Eigen2Support/CMakeLists.txt
Normal file
6
Eigen/src/Eigen2Support/CMakeLists.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
FILE(GLOB Eigen_Eigen2Support_SRCS "*.h")
|
||||
|
||||
INSTALL(FILES
|
||||
${Eigen_Eigen2Support_SRCS}
|
||||
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Eigen2Support COMPONENT Devel
|
||||
)
|
||||
@@ -291,8 +291,8 @@ void ComplexEigenSolver<MatrixType>::doComputeEigenvectors(RealScalar matrixnorm
|
||||
ComplexScalar z = m_schur.matrixT().coeff(i,i) - m_schur.matrixT().coeff(k,k);
|
||||
if(z==ComplexScalar(0))
|
||||
{
|
||||
// If the i-th and k-th eigenvalue are equal, then z equals 0.
|
||||
// Use a small value instead, to prevent division by zero.
|
||||
// If the i-th and k-th eigenvalue are equal, then z equals 0.
|
||||
// Use a small value instead, to prevent division by zero.
|
||||
ei_real_ref(z) = NumTraits<RealScalar>::epsilon() * matrixnorm;
|
||||
}
|
||||
m_matX.coeffRef(i,k) = m_matX.coeff(i,k) / z;
|
||||
|
||||
@@ -130,7 +130,7 @@ template<typename _MatrixType> class HessenbergDecomposition
|
||||
{
|
||||
if(matrix.rows()<2)
|
||||
{
|
||||
m_isInitialized = true;
|
||||
m_isInitialized = true;
|
||||
return;
|
||||
}
|
||||
m_hCoeffs.resize(matrix.rows()-1,1);
|
||||
@@ -160,7 +160,7 @@ template<typename _MatrixType> class HessenbergDecomposition
|
||||
m_matrix = matrix;
|
||||
if(matrix.rows()<2)
|
||||
{
|
||||
m_isInitialized = true;
|
||||
m_isInitialized = true;
|
||||
return *this;
|
||||
}
|
||||
m_hCoeffs.resize(matrix.rows()-1,1);
|
||||
@@ -360,7 +360,7 @@ template<typename MatrixType> struct HessenbergDecompositionMatrixHReturnType
|
||||
result = m_hess.packedMatrix();
|
||||
Index n = result.rows();
|
||||
if (n>2)
|
||||
result.bottomLeftCorner(n-2, n-2).template triangularView<Lower>().setZero();
|
||||
result.bottomLeftCorner(n-2, n-2).template triangularView<Lower>().setZero();
|
||||
}
|
||||
|
||||
Index rows() const { return m_hess.packedMatrix().rows(); }
|
||||
|
||||
@@ -384,7 +384,9 @@ void ei_tridiagonalization_inplace(MatrixType& matA, CoeffVectorType& hCoeffs)
|
||||
}
|
||||
|
||||
// forward declaration, implementation at the end of this file
|
||||
template<typename MatrixType, int Size=MatrixType::ColsAtCompileTime>
|
||||
template<typename MatrixType,
|
||||
int Size=MatrixType::ColsAtCompileTime,
|
||||
bool IsComplex=NumTraits<typename MatrixType::Scalar>::IsComplex>
|
||||
struct ei_tridiagonalization_inplace_selector;
|
||||
|
||||
/** \brief Performs a full tridiagonalization in place
|
||||
@@ -431,15 +433,15 @@ template<typename MatrixType, typename DiagonalType, typename SubDiagonalType>
|
||||
void ei_tridiagonalization_inplace(MatrixType& mat, DiagonalType& diag, SubDiagonalType& subdiag, bool extractQ)
|
||||
{
|
||||
typedef typename MatrixType::Index Index;
|
||||
Index n = mat.rows();
|
||||
ei_assert(mat.cols()==n && diag.size()==n && subdiag.size()==n-1);
|
||||
//Index n = mat.rows();
|
||||
ei_assert(mat.cols()==mat.rows() && diag.size()==mat.rows() && subdiag.size()==mat.rows()-1);
|
||||
ei_tridiagonalization_inplace_selector<MatrixType>::run(mat, diag, subdiag, extractQ);
|
||||
}
|
||||
|
||||
/** \internal
|
||||
* General full tridiagonalization
|
||||
*/
|
||||
template<typename MatrixType, int Size>
|
||||
template<typename MatrixType, int Size, bool IsComplex>
|
||||
struct ei_tridiagonalization_inplace_selector
|
||||
{
|
||||
typedef typename Tridiagonalization<MatrixType>::CoeffVectorType CoeffVectorType;
|
||||
@@ -458,11 +460,11 @@ struct ei_tridiagonalization_inplace_selector
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* Specialization for 3x3 matrices.
|
||||
* Specialization for 3x3 real matrices.
|
||||
* Especially useful for plane fitting.
|
||||
*/
|
||||
template<typename MatrixType>
|
||||
struct ei_tridiagonalization_inplace_selector<MatrixType,3>
|
||||
struct ei_tridiagonalization_inplace_selector<MatrixType,3,false>
|
||||
{
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
typedef typename MatrixType::RealScalar RealScalar;
|
||||
@@ -470,14 +472,14 @@ struct ei_tridiagonalization_inplace_selector<MatrixType,3>
|
||||
template<typename DiagonalType, typename SubDiagonalType>
|
||||
static void run(MatrixType& mat, DiagonalType& diag, SubDiagonalType& subdiag, bool extractQ)
|
||||
{
|
||||
diag[0] = ei_real(mat(0,0));
|
||||
diag[0] = mat(0,0);
|
||||
RealScalar v1norm2 = ei_abs2(mat(2,0));
|
||||
if (ei_isMuchSmallerThan(v1norm2, RealScalar(1)))
|
||||
if(v1norm2 == RealScalar(0))
|
||||
{
|
||||
diag[1] = ei_real(mat(1,1));
|
||||
diag[2] = ei_real(mat(2,2));
|
||||
subdiag[0] = ei_real(mat(1,0));
|
||||
subdiag[1] = ei_real(mat(2,1));
|
||||
diag[1] = mat(1,1);
|
||||
diag[2] = mat(2,2);
|
||||
subdiag[0] = mat(1,0);
|
||||
subdiag[1] = mat(2,1);
|
||||
if (extractQ)
|
||||
mat.setIdentity();
|
||||
}
|
||||
@@ -485,18 +487,18 @@ struct ei_tridiagonalization_inplace_selector<MatrixType,3>
|
||||
{
|
||||
RealScalar beta = ei_sqrt(ei_abs2(mat(1,0)) + v1norm2);
|
||||
RealScalar invBeta = RealScalar(1)/beta;
|
||||
Scalar m01 = ei_conj(mat(1,0)) * invBeta;
|
||||
Scalar m02 = ei_conj(mat(2,0)) * invBeta;
|
||||
Scalar q = RealScalar(2)*m01*ei_conj(mat(2,1)) + m02*(mat(2,2) - mat(1,1));
|
||||
diag[1] = ei_real(mat(1,1) + m02*q);
|
||||
diag[2] = ei_real(mat(2,2) - m02*q);
|
||||
Scalar m01 = mat(1,0) * invBeta;
|
||||
Scalar m02 = mat(2,0) * invBeta;
|
||||
Scalar q = RealScalar(2)*m01*mat(2,1) + m02*(mat(2,2) - mat(1,1));
|
||||
diag[1] = mat(1,1) + m02*q;
|
||||
diag[2] = mat(2,2) - m02*q;
|
||||
subdiag[0] = beta;
|
||||
subdiag[1] = ei_real(ei_conj(mat(2,1)) - m01 * q);
|
||||
subdiag[1] = mat(2,1) - m01 * q;
|
||||
if (extractQ)
|
||||
{
|
||||
mat << 1, 0, 0,
|
||||
0, m01, m02,
|
||||
0, m02, -m01;
|
||||
0, m01, m02,
|
||||
0, m02, -m01;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -505,8 +507,8 @@ struct ei_tridiagonalization_inplace_selector<MatrixType,3>
|
||||
/** \internal
|
||||
* Trivial specialization for 1x1 matrices
|
||||
*/
|
||||
template<typename MatrixType>
|
||||
struct ei_tridiagonalization_inplace_selector<MatrixType,1>
|
||||
template<typename MatrixType, bool IsComplex>
|
||||
struct ei_tridiagonalization_inplace_selector<MatrixType,1,IsComplex>
|
||||
{
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
|
||||
|
||||
@@ -142,6 +142,8 @@ public:
|
||||
m_angle = Scalar(other.angle());
|
||||
}
|
||||
|
||||
inline static const AngleAxis Identity() { return AngleAxis(0, Vector3::UnitX()); }
|
||||
|
||||
/** \returns \c true if \c *this is approximately equal to \a other, within the precision
|
||||
* determined by \a prec.
|
||||
*
|
||||
|
||||
@@ -135,7 +135,7 @@ template<typename MatrixType,int _Direction> class Homogeneous
|
||||
*
|
||||
* \return an expression of the equivalent homogeneous vector
|
||||
*
|
||||
* \vectoronly
|
||||
* \only_for_vectors
|
||||
*
|
||||
* Example: \include MatrixBase_homogeneous.cpp
|
||||
* Output: \verbinclude MatrixBase_homogeneous.out
|
||||
@@ -143,7 +143,7 @@ template<typename MatrixType,int _Direction> class Homogeneous
|
||||
* \sa class Homogeneous
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline const typename MatrixBase<Derived>::HomogeneousReturnType
|
||||
inline typename MatrixBase<Derived>::HomogeneousReturnType
|
||||
MatrixBase<Derived>::homogeneous() const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);
|
||||
@@ -159,7 +159,7 @@ MatrixBase<Derived>::homogeneous() const
|
||||
*
|
||||
* \sa MatrixBase::homogeneous() */
|
||||
template<typename ExpressionType, int Direction>
|
||||
inline const Homogeneous<ExpressionType,Direction>
|
||||
inline Homogeneous<ExpressionType,Direction>
|
||||
VectorwiseOp<ExpressionType,Direction>::homogeneous() const
|
||||
{
|
||||
return _expression();
|
||||
@@ -174,7 +174,7 @@ VectorwiseOp<ExpressionType,Direction>::homogeneous() const
|
||||
*
|
||||
* \sa VectorwiseOp::hnormalized() */
|
||||
template<typename Derived>
|
||||
inline const typename MatrixBase<Derived>::HNormalizedReturnType
|
||||
inline typename MatrixBase<Derived>::HNormalizedReturnType
|
||||
MatrixBase<Derived>::hnormalized() const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);
|
||||
@@ -192,7 +192,7 @@ MatrixBase<Derived>::hnormalized() const
|
||||
*
|
||||
* \sa MatrixBase::hnormalized() */
|
||||
template<typename ExpressionType, int Direction>
|
||||
inline const typename VectorwiseOp<ExpressionType,Direction>::HNormalizedReturnType
|
||||
inline typename VectorwiseOp<ExpressionType,Direction>::HNormalizedReturnType
|
||||
VectorwiseOp<ExpressionType,Direction>::hnormalized() const
|
||||
{
|
||||
return HNormalized_Block(_expression(),0,0,
|
||||
|
||||
@@ -228,7 +228,7 @@ public:
|
||||
* or a more generic Affine transformation. The default is Affine.
|
||||
* Other kind of transformations are not supported.
|
||||
*/
|
||||
inline Hyperplane& transform(const Transform<Scalar,AmbientDimAtCompileTime>& t,
|
||||
inline Hyperplane& transform(const Transform<Scalar,AmbientDimAtCompileTime,Affine>& t,
|
||||
TransformTraits traits = Affine)
|
||||
{
|
||||
transform(t.linear(), traits);
|
||||
|
||||
@@ -54,7 +54,7 @@ MatrixBase<Derived>::cross(const MatrixBase<OtherDerived>& other) const
|
||||
|
||||
template< int Arch,typename VectorLhs,typename VectorRhs,
|
||||
typename Scalar = typename VectorLhs::Scalar,
|
||||
int Vectorizable = (VectorLhs::Flags&VectorRhs::Flags)&PacketAccessBit>
|
||||
bool Vectorizable = (VectorLhs::Flags&VectorRhs::Flags)&PacketAccessBit>
|
||||
struct ei_cross3_impl {
|
||||
inline static typename ei_plain_matrix_type<VectorLhs>::type
|
||||
run(const VectorLhs& lhs, const VectorRhs& rhs)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2009 Mathieu Gautier <mathieu.gautier@cea.fr>
|
||||
//
|
||||
// Eigen is free software; you can redistribute it and/or
|
||||
@@ -180,6 +180,10 @@ public:
|
||||
return typename ei_cast_return_type<Derived,Quaternion<NewScalarType> >::type(
|
||||
coeffs().template cast<NewScalarType>());
|
||||
}
|
||||
|
||||
#ifdef EIGEN_QUATERNIONBASE_PLUGIN
|
||||
# include EIGEN_QUATERNIONBASE_PLUGIN
|
||||
#endif
|
||||
};
|
||||
|
||||
/***************************************************************************
|
||||
@@ -277,19 +281,6 @@ typedef Quaternion<double> Quaterniond;
|
||||
* Specialization of Map<Quaternion<Scalar>>
|
||||
***************************************************************************/
|
||||
|
||||
/** \class Map<Quaternion>
|
||||
*
|
||||
*
|
||||
* \brief Expression of a quaternion from a memory buffer
|
||||
*
|
||||
* \param _Scalar the type of the Quaternion coefficients
|
||||
* \param PacketAccess see class Map
|
||||
*
|
||||
* This is a specialization of class Map for Quaternion. This class allows to view
|
||||
* a 4 scalar memory buffer as an Eigen's Quaternion object.
|
||||
*
|
||||
* \sa class Map, class Quaternion, class QuaternionBase
|
||||
*/
|
||||
template<typename _Scalar, int _PacketAccess>
|
||||
struct ei_traits<Map<Quaternion<_Scalar>, _PacketAccess> >:
|
||||
ei_traits<Quaternion<_Scalar> >
|
||||
@@ -301,6 +292,16 @@ ei_traits<Quaternion<_Scalar> >
|
||||
};
|
||||
};
|
||||
|
||||
/** \brief Expression of a quaternion from a memory buffer
|
||||
*
|
||||
* \param _Scalar the type of the Quaternion coefficients
|
||||
* \param PacketAccess see class Map
|
||||
*
|
||||
* This is a specialization of class Map for Quaternion. This class allows to view
|
||||
* a 4 scalar memory buffer as an Eigen's Quaternion object.
|
||||
*
|
||||
* \sa class Map, class Quaternion, class QuaternionBase
|
||||
*/
|
||||
template<typename _Scalar, int PacketAccess>
|
||||
class Map<Quaternion<_Scalar>, PacketAccess >
|
||||
: public QuaternionBase<Map<Quaternion<_Scalar>, PacketAccess> >
|
||||
@@ -398,7 +399,8 @@ QuaternionBase<Derived>::_transformVector(Vector3 v) const
|
||||
// It appears to be much faster than the common algorithm found
|
||||
// in the litterature (30 versus 39 flops). It also requires two
|
||||
// Vector3 as temporaries.
|
||||
Vector3 uv = Scalar(2) * this->vec().cross(v);
|
||||
Vector3 uv = this->vec().cross(v);
|
||||
uv += uv;
|
||||
return v + this->w() * uv + this->vec().cross(uv);
|
||||
}
|
||||
|
||||
@@ -513,7 +515,7 @@ inline Derived& QuaternionBase<Derived>::setFromTwoVectors(const MatrixBase<Deri
|
||||
{
|
||||
c = std::max<Scalar>(c,-1);
|
||||
Matrix<Scalar,2,3> m; m << v0.transpose(), v1.transpose();
|
||||
JacobiSVD<Matrix<Scalar,2,3> > svd(m);
|
||||
JacobiSVD<Matrix<Scalar,2,3> > svd(m, ComputeFullV);
|
||||
Vector3 axis = svd.matrixV().col(2);
|
||||
|
||||
Scalar w2 = (Scalar(1)+c)*Scalar(0.5);
|
||||
|
||||
@@ -117,6 +117,8 @@ public:
|
||||
m_angle = Scalar(other.angle());
|
||||
}
|
||||
|
||||
inline static Rotation2D Identity() { return Rotation2D(0); }
|
||||
|
||||
/** \returns \c true if \c *this is approximately equal to \a other, within the precision
|
||||
* determined by \a prec.
|
||||
*
|
||||
|
||||
@@ -55,12 +55,17 @@ class RotationBase
|
||||
/** \returns an equivalent rotation matrix */
|
||||
inline RotationMatrixType toRotationMatrix() const { return derived().toRotationMatrix(); }
|
||||
|
||||
/** \returns an equivalent rotation matrix
|
||||
* This function is added to be conform with the Transform class' naming scheme.
|
||||
*/
|
||||
inline RotationMatrixType matrix() const { return derived().toRotationMatrix(); }
|
||||
|
||||
/** \returns the inverse rotation */
|
||||
inline Derived inverse() const { return derived().inverse(); }
|
||||
|
||||
/** \returns the concatenation of the rotation \c *this with a translation \a t */
|
||||
inline Transform<Scalar,Dim> operator*(const Translation<Scalar,Dim>& t) const
|
||||
{ return toRotationMatrix() * t; }
|
||||
inline Transform<Scalar,Dim,Isometry> operator*(const Translation<Scalar,Dim>& t) const
|
||||
{ return Transform<Scalar,Dim,Isometry>(*this) * t; }
|
||||
|
||||
/** \returns the concatenation of the rotation \c *this with a uniform scaling \a s */
|
||||
inline RotationMatrixType operator*(const UniformScaling<Scalar>& s) const
|
||||
@@ -82,6 +87,14 @@ class RotationBase
|
||||
inline RotationMatrixType operator*(const EigenBase<OtherDerived>& l, const Derived& r)
|
||||
{ return l.derived() * r.toRotationMatrix(); }
|
||||
|
||||
/** \returns the concatenation of a scaling \a l with the rotation \a r */
|
||||
friend inline Transform<Scalar,Dim,Affine> operator*(const DiagonalMatrix<Scalar,Dim>& l, const Derived& r)
|
||||
{
|
||||
Transform<Scalar,Dim,Affine> res(r);
|
||||
res.linear().applyOnTheLeft(l);
|
||||
return res;
|
||||
}
|
||||
|
||||
/** \returns the concatenation of the rotation \c *this with a transformation \a t */
|
||||
template<int Mode>
|
||||
inline Transform<Scalar,Dim,Mode> operator*(const Transform<Scalar,Dim,Mode>& t) const
|
||||
@@ -102,6 +115,18 @@ struct ei_rotation_base_generic_product_selector<RotationDerived,MatrixType,fals
|
||||
{ return r.toRotationMatrix() * m; }
|
||||
};
|
||||
|
||||
template<typename RotationDerived, typename Scalar, int Dim, int MaxDim>
|
||||
struct ei_rotation_base_generic_product_selector< RotationDerived, DiagonalMatrix<Scalar,Dim,MaxDim>, false >
|
||||
{
|
||||
typedef Transform<Scalar,Dim,Affine> ReturnType;
|
||||
inline static ReturnType run(const RotationDerived& r, const DiagonalMatrix<Scalar,Dim,MaxDim>& m)
|
||||
{
|
||||
ReturnType res(r);
|
||||
res.linear() *= m;
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename RotationDerived,typename OtherVectorType>
|
||||
struct ei_rotation_base_generic_product_selector<RotationDerived,OtherVectorType,true>
|
||||
{
|
||||
|
||||
@@ -69,7 +69,7 @@ public:
|
||||
|
||||
/** Concatenates a uniform scaling and a translation */
|
||||
template<int Dim>
|
||||
inline Transform<Scalar,Dim> operator* (const Translation<Scalar,Dim>& t) const;
|
||||
inline Transform<Scalar,Dim,Affine> operator* (const Translation<Scalar,Dim>& t) const;
|
||||
|
||||
/** Concatenates a uniform scaling and an affine transformation */
|
||||
template<int Dim, int Mode>
|
||||
@@ -115,7 +115,7 @@ public:
|
||||
/** Concatenates a linear transformation matrix and a uniform scaling */
|
||||
// NOTE this operator is defiend in MatrixBase and not as a friend function
|
||||
// of UniformScaling to fix an internal crash of Intel's ICC
|
||||
template<typename Derived> const typename MatrixBase<Derived>::ScalarMultipleReturnType
|
||||
template<typename Derived> typename MatrixBase<Derived>::ScalarMultipleReturnType
|
||||
MatrixBase<Derived>::operator*(const UniformScaling<Scalar>& s) const
|
||||
{ return derived() * s.factor(); }
|
||||
|
||||
@@ -158,10 +158,10 @@ typedef DiagonalMatrix<double,3> AlignedScaling3d;
|
||||
|
||||
template<typename Scalar>
|
||||
template<int Dim>
|
||||
inline Transform<Scalar,Dim>
|
||||
inline Transform<Scalar,Dim,Affine>
|
||||
UniformScaling<Scalar>::operator* (const Translation<Scalar,Dim>& t) const
|
||||
{
|
||||
Transform<Scalar,Dim> res;
|
||||
Transform<Scalar,Dim,Affine> res;
|
||||
res.matrix().setZero();
|
||||
res.linear().diagonal().fill(factor());
|
||||
res.translation() = factor() * t.vector();
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
//
|
||||
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2010 Hauke Heibel <hauke.heibel@gmail.com>
|
||||
//
|
||||
// Eigen is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU Lesser General Public
|
||||
@@ -26,18 +27,22 @@
|
||||
#ifndef EIGEN_TRANSFORM_H
|
||||
#define EIGEN_TRANSFORM_H
|
||||
|
||||
// Note that we have to pass Dim and HDim because it is not allowed to use a template
|
||||
// parameter to define a template specialization. To be more precise, in the following
|
||||
// specializations, it is not allowed to use Dim+1 instead of HDim.
|
||||
template< typename Other,
|
||||
int Mode,
|
||||
int Dim,
|
||||
int HDim,
|
||||
int OtherRows=Other::RowsAtCompileTime,
|
||||
int OtherCols=Other::ColsAtCompileTime>
|
||||
struct ei_transform_right_product_impl;
|
||||
template<typename Transform>
|
||||
struct ei_transform_traits
|
||||
{
|
||||
enum
|
||||
{
|
||||
Dim = Transform::Dim,
|
||||
HDim = Transform::HDim,
|
||||
Mode = Transform::Mode,
|
||||
IsProjective = (Mode==Projective)
|
||||
};
|
||||
};
|
||||
|
||||
template<typename TransformType> struct ei_transform_take_affine_part;
|
||||
template< typename TransformType,
|
||||
typename MatrixType,
|
||||
bool IsProjective = ei_transform_traits<TransformType>::IsProjective>
|
||||
struct ei_transform_right_product_impl;
|
||||
|
||||
template< typename Other,
|
||||
int Mode,
|
||||
@@ -47,7 +52,12 @@ template< typename Other,
|
||||
int OtherCols=Other::ColsAtCompileTime>
|
||||
struct ei_transform_left_product_impl;
|
||||
|
||||
template<typename Lhs,typename Rhs> struct ei_transform_transform_product_impl;
|
||||
template< typename Lhs,
|
||||
typename Rhs,
|
||||
bool AnyProjective =
|
||||
ei_transform_traits<Lhs>::IsProjective ||
|
||||
ei_transform_traits<Lhs>::IsProjective>
|
||||
struct ei_transform_transform_product_impl;
|
||||
|
||||
template< typename Other,
|
||||
int Mode,
|
||||
@@ -57,6 +67,8 @@ template< typename Other,
|
||||
int OtherCols=Other::ColsAtCompileTime>
|
||||
struct ei_transform_construct_from_matrix;
|
||||
|
||||
template<typename TransformType> struct ei_transform_take_affine_part;
|
||||
|
||||
/** \geometry_module \ingroup Geometry_Module
|
||||
*
|
||||
* \class Transform
|
||||
@@ -71,7 +83,7 @@ struct ei_transform_construct_from_matrix;
|
||||
* This is the default.
|
||||
* - AffineCompact: the transformation is stored as a (Dim)x(Dim+1) matrix.
|
||||
* - Projective: the transformation is stored as a (Dim+1)^2 matrix
|
||||
* whithout any assumption.
|
||||
* without any assumption.
|
||||
*
|
||||
* The homography is internally represented and stored by a matrix which
|
||||
* is available through the matrix() method. To understand the behavior of
|
||||
@@ -80,19 +92,19 @@ struct ei_transform_construct_from_matrix;
|
||||
*
|
||||
* \code v' = T * v \endcode
|
||||
*
|
||||
* Thefore, an affine transformation matrix M is shaped like this:
|
||||
* Therefore, an affine transformation matrix M is shaped like this:
|
||||
*
|
||||
* \f$ \left( \begin{array}{cc}
|
||||
* linear & translation\\
|
||||
* 0 ... 0 & 1
|
||||
* \end{array} \right) \f$
|
||||
*
|
||||
* Note that for a provective transformation the last row can be anything,
|
||||
* and then the interpretation of different parts might be sighlty different.
|
||||
* Note that for a projective transformation the last row can be anything,
|
||||
* and then the interpretation of different parts might be sightly different.
|
||||
*
|
||||
* However, unlike a plain matrix, the Transform class provides many features
|
||||
* simplifying both its assembly and usage. In particular, it can be composed
|
||||
* with any other transformations (Transform,Trnaslation,RotationBase,Matrix)
|
||||
* with any other transformations (Transform,Translation,RotationBase,Matrix)
|
||||
* and can be directly used to transform implicit homogeneous vectors. All these
|
||||
* operations are handled via the operator*. For the composition of transformations,
|
||||
* its principle consists to first convert the right/left hand sides of the product
|
||||
@@ -139,17 +151,17 @@ struct ei_transform_construct_from_matrix;
|
||||
* 1 & ... & 1
|
||||
* \end{array} \right) \f$
|
||||
*
|
||||
* The concatenation of a Tranform object with any kind of other transformation
|
||||
* The concatenation of a Transform object with any kind of other transformation
|
||||
* always returns a Transform object.
|
||||
*
|
||||
* A little execption to the "as pure matrix product" rule is the case of the
|
||||
* A little exception to the "as pure matrix product" rule is the case of the
|
||||
* transformation of non homogeneous vectors by an affine transformation. In
|
||||
* that case the last matrix row can be ignored, and the product returns non
|
||||
* homogeneous vectors.
|
||||
*
|
||||
* Since, for instance, a Dim x Dim matrix is interpreted as a linear transformation,
|
||||
* it is not possible to directly transform Dim vectors stored in a Dim x Dim matrix.
|
||||
* The solution is either to use a Dim x Dynamic matrix or explicitely request a
|
||||
* The solution is either to use a Dim x Dynamic matrix or explicitly request a
|
||||
* vector transformation by making the vector homogeneous:
|
||||
* \code
|
||||
* m' = T * m.colwise().homogeneous();
|
||||
@@ -202,7 +214,7 @@ protected:
|
||||
|
||||
public:
|
||||
|
||||
/** Default constructor without initialization of the meaningfull coefficients.
|
||||
/** Default constructor without initialization of the meaningful coefficients.
|
||||
* If Mode==Affine, then the last row is set to [0 ... 0 1] */
|
||||
inline Transform()
|
||||
{
|
||||
@@ -243,9 +255,41 @@ public:
|
||||
template<int OtherMode>
|
||||
inline Transform(const Transform<Scalar,Dim,OtherMode>& other)
|
||||
{
|
||||
ei_assert(OtherMode!=Projective && "You cannot directly assign a projective transform to an affine one.");
|
||||
typedef typename Transform<Scalar,Dim,OtherMode>::MatrixType OtherMatrixType;
|
||||
ei_transform_construct_from_matrix<OtherMatrixType,Mode,Dim,HDim>::run(this, other.matrix());
|
||||
// prevent conversions as:
|
||||
// Affine | AffineCompact | Isometry = Projective
|
||||
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(OtherMode==int(Projective), Mode==int(Projective)),
|
||||
YOU_PERFORMED_AN_INVALID_TRANSFORMATION_CONVERSION)
|
||||
|
||||
// prevent conversions as:
|
||||
// Isometry = Affine | AffineCompact
|
||||
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(OtherMode==int(Affine)||OtherMode==int(AffineCompact), Mode!=int(Isometry)),
|
||||
YOU_PERFORMED_AN_INVALID_TRANSFORMATION_CONVERSION)
|
||||
|
||||
enum { ModeIsAffineCompact = Mode == int(AffineCompact),
|
||||
OtherModeIsAffineCompact = OtherMode == int(AffineCompact)
|
||||
};
|
||||
|
||||
if(ModeIsAffineCompact == OtherModeIsAffineCompact)
|
||||
{
|
||||
// We need the block expression because the code is compiled for all
|
||||
// combinations of transformations and will trigger a compile time error
|
||||
// if one tries to assign the matrices directly
|
||||
m_matrix.template block<Dim,Dim+1>(0,0) = other.matrix().template block<Dim,Dim+1>(0,0);
|
||||
makeAffine();
|
||||
}
|
||||
else if(OtherModeIsAffineCompact)
|
||||
{
|
||||
typedef typename Transform<Scalar,Dim,OtherMode>::MatrixType OtherMatrixType;
|
||||
ei_transform_construct_from_matrix<OtherMatrixType,Mode,Dim,HDim>::run(this, other.matrix());
|
||||
}
|
||||
else
|
||||
{
|
||||
// here we know that Mode == AffineCompact and OtherMode != AffineCompact.
|
||||
// if OtherMode were Projective, the static assert above would already have caught it.
|
||||
// So the only possibility is that OtherMode == Affine
|
||||
linear() = other.linear();
|
||||
translation() = other.translation();
|
||||
}
|
||||
}
|
||||
|
||||
template<typename OtherDerived>
|
||||
@@ -271,10 +315,10 @@ public:
|
||||
#endif
|
||||
|
||||
/** shortcut for m_matrix(row,col);
|
||||
* \sa MatrixBase::operaror(Index,Index) const */
|
||||
* \sa MatrixBase::operator(Index,Index) const */
|
||||
inline Scalar operator() (Index row, Index col) const { return m_matrix(row,col); }
|
||||
/** shortcut for m_matrix(row,col);
|
||||
* \sa MatrixBase::operaror(Index,Index) */
|
||||
* \sa MatrixBase::operator(Index,Index) */
|
||||
inline Scalar& operator() (Index row, Index col) { return m_matrix(row,col); }
|
||||
|
||||
/** \returns a read-only expression of the transformation matrix */
|
||||
@@ -310,9 +354,9 @@ public:
|
||||
*/
|
||||
// note: this function is defined here because some compilers cannot find the respective declaration
|
||||
template<typename OtherDerived>
|
||||
inline const typename ei_transform_right_product_impl<OtherDerived,Mode,_Dim,_Dim+1>::ResultType
|
||||
EIGEN_STRONG_INLINE const typename ei_transform_right_product_impl<Transform, OtherDerived>::ResultType
|
||||
operator * (const EigenBase<OtherDerived> &other) const
|
||||
{ return ei_transform_right_product_impl<OtherDerived,Mode,Dim,HDim>::run(*this,other.derived()); }
|
||||
{ return ei_transform_right_product_impl<Transform, OtherDerived>::run(*this,other.derived()); }
|
||||
|
||||
/** \returns the product expression of a transformation matrix \a a times a transform \a b
|
||||
*
|
||||
@@ -323,23 +367,54 @@ public:
|
||||
*/
|
||||
template<typename OtherDerived> friend
|
||||
inline const typename ei_transform_left_product_impl<OtherDerived,Mode,_Dim,_Dim+1>::ResultType
|
||||
operator * (const EigenBase<OtherDerived> &a, const Transform &b)
|
||||
operator * (const EigenBase<OtherDerived> &a, const Transform &b)
|
||||
{ return ei_transform_left_product_impl<OtherDerived,Mode,Dim,HDim>::run(a.derived(),b); }
|
||||
|
||||
/** \returns The product expression of a transform \a a times a diagonal matrix \a b
|
||||
*
|
||||
* The rhs diagonal matrix is interpreted as an affine scaling transformation. The
|
||||
* product results in a Transform of the same type (mode) as the lhs only if the lhs
|
||||
* mode is no isometry. In that case, the returned transform is an affinity.
|
||||
*/
|
||||
friend inline const Transform<Scalar,Dim,((Mode==(int)Isometry)?Affine:(int)Mode)>
|
||||
operator * (const Transform &a, const DiagonalMatrix<Scalar,Dim> &b)
|
||||
{
|
||||
Transform<Scalar,Dim,((Mode==(int)Isometry)?Affine:(int)Mode)> res(a);
|
||||
res.linear() *= b;
|
||||
return res;
|
||||
}
|
||||
|
||||
/** \returns The product expression of a diagonal matrix \a a times a transform \a b
|
||||
*
|
||||
* The lhs diagonal matrix is interpreted as an affine scaling transformation. The
|
||||
* product results in a Transform of the same type (mode) as the lhs only if the lhs
|
||||
* mode is no isometry. In that case, the returned transform is an affinity.
|
||||
*/
|
||||
friend inline const Transform<Scalar,Dim,((Mode==(int)Isometry)?Affine:(int)Mode)>
|
||||
operator * (const DiagonalMatrix<Scalar,Dim> &a, const Transform &b)
|
||||
{
|
||||
Transform<Scalar,Dim,((Mode==(int)Isometry)?Affine:(int)Mode)> res;
|
||||
res.linear().noalias() = a*b.linear();
|
||||
res.translation().noalias() = a*b.translation();
|
||||
if (Mode!=int(AffineCompact))
|
||||
res.matrix().row(Dim) = b.matrix().row(Dim);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<typename OtherDerived>
|
||||
inline Transform& operator*=(const EigenBase<OtherDerived>& other) { return *this = *this * other; }
|
||||
|
||||
/** Contatenates two transformations */
|
||||
/** Concatenates two transformations */
|
||||
inline const Transform operator * (const Transform& other) const
|
||||
{
|
||||
return ei_transform_transform_product_impl<Transform,Transform>::run(*this,other);
|
||||
}
|
||||
|
||||
/** Contatenates two different transformations */
|
||||
/** Concatenates two different transformations */
|
||||
template<int OtherMode>
|
||||
inline const typename ei_transform_transform_product_impl<
|
||||
Transform,Transform<Scalar,Dim,OtherMode> >::ResultType
|
||||
operator * (const Transform<Scalar,Dim,OtherMode>& other) const
|
||||
Transform,Transform<Scalar,Dim,OtherMode> >::ResultType
|
||||
operator * (const Transform<Scalar,Dim,OtherMode>& other) const
|
||||
{
|
||||
return ei_transform_transform_product_impl<Transform,Transform<Scalar,Dim,OtherMode> >::run(*this,other);
|
||||
}
|
||||
@@ -388,6 +463,8 @@ public:
|
||||
inline Transform& operator*=(const UniformScaling<Scalar>& s) { return scale(s.factor()); }
|
||||
inline Transform operator*(const UniformScaling<Scalar>& s) const;
|
||||
|
||||
inline Transform& operator*=(const DiagonalMatrix<Scalar,Dim>& s) { linear() *= s; return *this; }
|
||||
|
||||
template<typename Derived>
|
||||
inline Transform& operator=(const RotationBase<Derived,Dim>& r);
|
||||
template<typename Derived>
|
||||
@@ -477,15 +554,6 @@ public:
|
||||
|
||||
};
|
||||
|
||||
/** \ingroup Geometry_Module */
|
||||
typedef Transform<float,2> Transform2f;
|
||||
/** \ingroup Geometry_Module */
|
||||
typedef Transform<float,3> Transform3f;
|
||||
/** \ingroup Geometry_Module */
|
||||
typedef Transform<double,2> Transform2d;
|
||||
/** \ingroup Geometry_Module */
|
||||
typedef Transform<double,3> Transform3d;
|
||||
|
||||
/** \ingroup Geometry_Module */
|
||||
typedef Transform<float,2,Isometry> Isometry2f;
|
||||
/** \ingroup Geometry_Module */
|
||||
@@ -496,13 +564,13 @@ typedef Transform<double,2,Isometry> Isometry2d;
|
||||
typedef Transform<double,3,Isometry> Isometry3d;
|
||||
|
||||
/** \ingroup Geometry_Module */
|
||||
typedef Transform<float,2> Affine2f;
|
||||
typedef Transform<float,2,Affine> Affine2f;
|
||||
/** \ingroup Geometry_Module */
|
||||
typedef Transform<float,3> Affine3f;
|
||||
typedef Transform<float,3,Affine> Affine3f;
|
||||
/** \ingroup Geometry_Module */
|
||||
typedef Transform<double,2> Affine2d;
|
||||
typedef Transform<double,2,Affine> Affine2d;
|
||||
/** \ingroup Geometry_Module */
|
||||
typedef Transform<double,3> Affine3d;
|
||||
typedef Transform<double,3,Affine> Affine3d;
|
||||
|
||||
/** \ingroup Geometry_Module */
|
||||
typedef Transform<float,2,AffineCompact> AffineCompact2f;
|
||||
@@ -548,7 +616,7 @@ Transform<Scalar,Dim,Mode>& Transform<Scalar,Dim,Mode>::operator=(const QMatrix&
|
||||
m_matrix << other.m11(), other.m21(), other.dx(),
|
||||
other.m12(), other.m22(), other.dy(),
|
||||
0, 0, 1;
|
||||
return *this;
|
||||
return *this;
|
||||
}
|
||||
|
||||
/** \returns a QMatrix from \c *this assuming the dimension is 2.
|
||||
@@ -587,7 +655,7 @@ Transform<Scalar,Dim,Mode>& Transform<Scalar,Dim,Mode>::operator=(const QTransfo
|
||||
m_matrix << other.m11(), other.m21(), other.dx(),
|
||||
other.m12(), other.m22(), other.dy(),
|
||||
other.m13(), other.m23(), other.m33();
|
||||
return *this;
|
||||
return *this;
|
||||
}
|
||||
|
||||
/** \returns a QTransform from \c *this assuming the dimension is 2.
|
||||
@@ -618,6 +686,7 @@ Transform<Scalar,Dim,Mode>&
|
||||
Transform<Scalar,Dim,Mode>::scale(const MatrixBase<OtherDerived> &other)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim))
|
||||
EIGEN_STATIC_ASSERT(Mode!=int(Isometry), THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS)
|
||||
linearExt().noalias() = (linearExt() * other.asDiagonal());
|
||||
return *this;
|
||||
}
|
||||
@@ -629,6 +698,7 @@ Transform<Scalar,Dim,Mode>::scale(const MatrixBase<OtherDerived> &other)
|
||||
template<typename Scalar, int Dim, int Mode>
|
||||
inline Transform<Scalar,Dim,Mode>& Transform<Scalar,Dim,Mode>::scale(Scalar s)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(Mode!=int(Isometry), THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS)
|
||||
linearExt() *= s;
|
||||
return *this;
|
||||
}
|
||||
@@ -643,6 +713,7 @@ Transform<Scalar,Dim,Mode>&
|
||||
Transform<Scalar,Dim,Mode>::prescale(const MatrixBase<OtherDerived> &other)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim))
|
||||
EIGEN_STATIC_ASSERT(Mode!=int(Isometry), THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS)
|
||||
m_matrix.template block<Dim,HDim>(0,0).noalias() = (other.asDiagonal() * m_matrix.template block<Dim,HDim>(0,0));
|
||||
return *this;
|
||||
}
|
||||
@@ -654,6 +725,7 @@ Transform<Scalar,Dim,Mode>::prescale(const MatrixBase<OtherDerived> &other)
|
||||
template<typename Scalar, int Dim, int Mode>
|
||||
inline Transform<Scalar,Dim,Mode>& Transform<Scalar,Dim,Mode>::prescale(Scalar s)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(Mode!=int(Isometry), THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS)
|
||||
m_matrix.template topRows<Dim>() *= s;
|
||||
return *this;
|
||||
}
|
||||
@@ -742,6 +814,7 @@ Transform<Scalar,Dim,Mode>&
|
||||
Transform<Scalar,Dim,Mode>::shear(Scalar sx, Scalar sy)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(int(Dim)==2, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
EIGEN_STATIC_ASSERT(Mode!=int(Isometry), THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS)
|
||||
VectorType tmp = linear().col(0)*sy + linear().col(1);
|
||||
linear() << linear().col(0) + linear().col(1)*sx, tmp;
|
||||
return *this;
|
||||
@@ -757,6 +830,7 @@ Transform<Scalar,Dim,Mode>&
|
||||
Transform<Scalar,Dim,Mode>::preshear(Scalar sx, Scalar sy)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(int(Dim)==2, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
EIGEN_STATIC_ASSERT(Mode!=int(Isometry), THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS)
|
||||
m_matrix.template block<Dim,HDim>(0,0) = LinearMatrixType(1, sx, sy, 1) * m_matrix.template block<Dim,HDim>(0,0);
|
||||
return *this;
|
||||
}
|
||||
@@ -854,7 +928,18 @@ template<typename Scalar, int Dim, int Mode>
|
||||
template<typename RotationMatrixType, typename ScalingMatrixType>
|
||||
void Transform<Scalar,Dim,Mode>::computeRotationScaling(RotationMatrixType *rotation, ScalingMatrixType *scaling) const
|
||||
{
|
||||
linear().svd().computeRotationScaling(rotation, scaling);
|
||||
JacobiSVD<LinearMatrixType> svd(linear(), ComputeFullU | ComputeFullV);
|
||||
|
||||
Scalar x = (svd.matrixU() * svd.matrixV().adjoint()).determinant(); // so x has absolute value 1
|
||||
VectorType sv(svd.singularValues());
|
||||
sv.coeffRef(0) *= x;
|
||||
if(scaling) scaling->lazyAssign(svd.matrixV() * sv.asDiagonal() * svd.matrixV().adjoint());
|
||||
if(rotation)
|
||||
{
|
||||
LinearMatrixType m(svd.matrixU());
|
||||
m.col(0) /= x;
|
||||
rotation->lazyAssign(m * svd.matrixV().adjoint());
|
||||
}
|
||||
}
|
||||
|
||||
/** decomposes the linear part of the transformation as a product rotation x scaling, the scaling being
|
||||
@@ -872,7 +957,18 @@ template<typename Scalar, int Dim, int Mode>
|
||||
template<typename ScalingMatrixType, typename RotationMatrixType>
|
||||
void Transform<Scalar,Dim,Mode>::computeScalingRotation(ScalingMatrixType *scaling, RotationMatrixType *rotation) const
|
||||
{
|
||||
linear().svd().computeScalingRotation(scaling, rotation);
|
||||
JacobiSVD<LinearMatrixType> svd(linear(), ComputeFullU | ComputeFullV);
|
||||
|
||||
Scalar x = (svd.matrixU() * svd.matrixV().adjoint()).determinant(); // so x has absolute value 1
|
||||
VectorType sv(svd.singularValues());
|
||||
sv.coeffRef(0) *= x;
|
||||
if(scaling) scaling->lazyAssign(svd.matrixU() * sv.asDiagonal() * svd.matrixU().adjoint());
|
||||
if(rotation)
|
||||
{
|
||||
LinearMatrixType m(svd.matrixU());
|
||||
m.col(0) /= x;
|
||||
rotation->lazyAssign(m * svd.matrixV().adjoint());
|
||||
}
|
||||
}
|
||||
|
||||
/** Convenient method to set \c *this from a position, orientation and scale
|
||||
@@ -914,7 +1010,7 @@ struct ei_projective_transform_inverse<TransformType, Projective>
|
||||
* \returns the inverse transformation according to some given knowledge
|
||||
* on \c *this.
|
||||
*
|
||||
* \param traits allows to optimize the inversion process when the transformation
|
||||
* \param hint allows to optimize the inversion process when the transformation
|
||||
* is known to be not a general transformation. The possible values are:
|
||||
* - Projective if the transformation is not necessarily affine, i.e., if the
|
||||
* last row is not guaranteed to be [0 ... 0 1]
|
||||
@@ -954,11 +1050,7 @@ Transform<Scalar,Dim,Mode>::inverse(TransformTraits hint) const
|
||||
// translation and remaining parts
|
||||
res.matrix().template topRightCorner<Dim,1>()
|
||||
= - res.matrix().template topLeftCorner<Dim,Dim>() * translation();
|
||||
if(int(Mode)!=int(AffineCompact))
|
||||
{
|
||||
res.matrix().template block<1,Dim>(Dim,0).setZero();
|
||||
res.matrix().coeffRef(Dim,Dim) = 1;
|
||||
}
|
||||
res.makeAffine(); // we do need this, because in the beginning res is uninitialized
|
||||
}
|
||||
return res;
|
||||
}
|
||||
@@ -1022,161 +1114,70 @@ struct ei_transform_construct_from_matrix<Other, AffineCompact,Dim,HDim, HDim,HD
|
||||
{ transform->matrix() = other.template block<Dim,HDim>(0,0); }
|
||||
};
|
||||
|
||||
/*********************************************************
|
||||
*** Specializations of operator* with a EigenBase ***
|
||||
*********************************************************/
|
||||
/**********************************************************
|
||||
*** Specializations of operator* with rhs EigenBase ***
|
||||
**********************************************************/
|
||||
|
||||
// ei_general_product_return_type is a generalization of ProductReturnType, for all types (including e.g. DiagonalBase...),
|
||||
// instead of being restricted to MatrixBase.
|
||||
template<typename Lhs, typename Rhs> struct ei_general_product_return_type;
|
||||
template<typename D1, typename D2> struct ei_general_product_return_type<MatrixBase<D1>, MatrixBase<D2> >
|
||||
: ProductReturnType<D1,D2> {};
|
||||
template<typename Lhs, typename D2> struct ei_general_product_return_type<Lhs, MatrixBase<D2> >
|
||||
{ typedef D2 Type; };
|
||||
template<typename D1, typename Rhs> struct ei_general_product_return_type<MatrixBase<D1>, Rhs >
|
||||
{ typedef D1 Type; };
|
||||
|
||||
|
||||
|
||||
// Projective * set of homogeneous column vectors
|
||||
template<typename Other, int Dim, int HDim>
|
||||
struct ei_transform_right_product_impl<Other,Projective, Dim,HDim, HDim, Dynamic>
|
||||
template<int LhsMode,int RhsMode>
|
||||
struct ei_transform_product_result
|
||||
{
|
||||
typedef Transform<typename Other::Scalar,Dim,Projective> TransformType;
|
||||
typedef typename TransformType::MatrixType MatrixType;
|
||||
typedef typename ProductReturnType<MatrixType,Other>::Type ResultType;
|
||||
static ResultType run(const TransformType& tr, const Other& other)
|
||||
{ return tr.matrix() * other; }
|
||||
enum
|
||||
{
|
||||
Mode =
|
||||
(LhsMode == (int)Projective || RhsMode == (int)Projective ) ? Projective :
|
||||
(LhsMode == (int)Affine || RhsMode == (int)Affine ) ? Affine :
|
||||
(LhsMode == (int)AffineCompact || RhsMode == (int)AffineCompact ) ? AffineCompact :
|
||||
(LhsMode == (int)Isometry || RhsMode == (int)Isometry ) ? Isometry : Projective
|
||||
};
|
||||
};
|
||||
|
||||
// Projective * homogeneous column vector
|
||||
template<typename Other, int Dim, int HDim>
|
||||
struct ei_transform_right_product_impl<Other,Projective, Dim,HDim, HDim, 1>
|
||||
template< typename TransformType, typename MatrixType >
|
||||
struct ei_transform_right_product_impl< TransformType, MatrixType, true >
|
||||
{
|
||||
typedef Transform<typename Other::Scalar,Dim,Projective> TransformType;
|
||||
typedef typename TransformType::MatrixType MatrixType;
|
||||
typedef typename ProductReturnType<MatrixType,Other>::Type ResultType;
|
||||
static ResultType run(const TransformType& tr, const Other& other)
|
||||
{ return tr.matrix() * other; }
|
||||
};
|
||||
typedef typename MatrixType::PlainObject ResultType;
|
||||
|
||||
// Projective * column vector
|
||||
template<typename Other, int Dim, int HDim>
|
||||
struct ei_transform_right_product_impl<Other,Projective, Dim,HDim, Dim, 1>
|
||||
{
|
||||
typedef Transform<typename Other::Scalar,Dim,Projective> TransformType;
|
||||
typedef Matrix<typename Other::Scalar,HDim,1> ResultType;
|
||||
static ResultType run(const TransformType& tr, const Other& other)
|
||||
{ return tr.matrix().template block<HDim,Dim>(0,0) * other + tr.matrix().col(Dim); }
|
||||
};
|
||||
|
||||
// Affine * column vector
|
||||
template<typename Other, int Mode, int Dim, int HDim>
|
||||
struct ei_transform_right_product_impl<Other,Mode, Dim,HDim, Dim,1>
|
||||
{
|
||||
typedef Transform<typename Other::Scalar,Dim,Mode> TransformType;
|
||||
typedef Matrix<typename Other::Scalar,Dim,1> ResultType;
|
||||
static ResultType run(const TransformType& tr, const Other& other)
|
||||
{ return tr.linear() * other + tr.translation(); }
|
||||
};
|
||||
|
||||
// Affine * set of column vectors
|
||||
// FIXME use a ReturnByValue to remove the temporary
|
||||
template<typename Other, int Mode, int Dim, int HDim>
|
||||
struct ei_transform_right_product_impl<Other,Mode, Dim,HDim, Dim,Dynamic>
|
||||
{
|
||||
typedef Transform<typename Other::Scalar,Dim,Mode> TransformType;
|
||||
typedef Matrix<typename Other::Scalar,Dim,Dynamic> ResultType;
|
||||
static ResultType run(const TransformType& tr, const Other& other)
|
||||
{ return (tr.linear() * other).colwise() + tr.translation(); }
|
||||
};
|
||||
|
||||
// Affine * homogeneous column vector
|
||||
// FIXME added for backward compatibility, but I'm not sure we should keep it
|
||||
template<typename Other, int Mode, int Dim, int HDim>
|
||||
struct ei_transform_right_product_impl<Other,Mode, Dim,HDim, HDim,1>
|
||||
{
|
||||
typedef Transform<typename Other::Scalar,Dim,Mode> TransformType;
|
||||
typedef Matrix<typename Other::Scalar,HDim,1> ResultType;
|
||||
static ResultType run(const TransformType& tr, const Other& other)
|
||||
{ return tr.matrix() * other; }
|
||||
};
|
||||
template<typename Other, int Dim, int HDim>
|
||||
struct ei_transform_right_product_impl<Other,AffineCompact, Dim,HDim, HDim,1>
|
||||
{
|
||||
typedef Transform<typename Other::Scalar,Dim,AffineCompact> TransformType;
|
||||
typedef Matrix<typename Other::Scalar,HDim,1> ResultType;
|
||||
static ResultType run(const TransformType& tr, const Other& other)
|
||||
EIGEN_STRONG_INLINE static ResultType run(const TransformType& T, const MatrixType& other)
|
||||
{
|
||||
ResultType res;
|
||||
res.template head<HDim>() = tr.matrix() * other;
|
||||
res.coeffRef(Dim) = other.coeff(Dim);
|
||||
return T.matrix() * other;
|
||||
}
|
||||
};
|
||||
|
||||
// T * linear matrix => T
|
||||
template<typename Other, int Mode, int Dim, int HDim>
|
||||
struct ei_transform_right_product_impl<Other,Mode, Dim,HDim, Dim,Dim>
|
||||
template< typename TransformType, typename MatrixType >
|
||||
struct ei_transform_right_product_impl< TransformType, MatrixType, false >
|
||||
{
|
||||
typedef Transform<typename Other::Scalar,Dim,Mode> TransformType;
|
||||
typedef typename TransformType::MatrixType MatrixType;
|
||||
typedef TransformType ResultType;
|
||||
static ResultType run(const TransformType& tr, const Other& other)
|
||||
enum {
|
||||
Dim = TransformType::Dim,
|
||||
HDim = TransformType::HDim,
|
||||
OtherRows = MatrixType::RowsAtCompileTime,
|
||||
OtherCols = MatrixType::ColsAtCompileTime
|
||||
};
|
||||
|
||||
typedef typename MatrixType::PlainObject ResultType;
|
||||
|
||||
EIGEN_STRONG_INLINE static ResultType run(const TransformType& T, const MatrixType& other)
|
||||
{
|
||||
TransformType res;
|
||||
res.matrix().col(Dim) = tr.matrix().col(Dim);
|
||||
res.linearExt().noalias() = (tr.linearExt() * other);
|
||||
if(Mode==Affine)
|
||||
res.matrix().row(Dim).template head<Dim>() = tr.matrix().row(Dim).template head<Dim>();
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
// T * affine matrix => T
|
||||
template<typename Other, int Mode, int Dim, int HDim>
|
||||
struct ei_transform_right_product_impl<Other,Mode, Dim,HDim, Dim,HDim>
|
||||
{
|
||||
typedef Transform<typename Other::Scalar,Dim,Mode> TransformType;
|
||||
typedef typename TransformType::MatrixType MatrixType;
|
||||
typedef TransformType ResultType;
|
||||
static ResultType run(const TransformType& tr, const Other& other)
|
||||
{
|
||||
TransformType res;
|
||||
enum { Rows = Mode==Projective ? HDim : Dim };
|
||||
res.matrix().template block<Rows,HDim>(0,0).noalias() = (tr.linearExt() * other);
|
||||
res.translationExt() += tr.translationExt();
|
||||
if(Mode!=Affine)
|
||||
res.makeAffine();
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
// T * generic matrix => Projective
|
||||
template<typename Other, int Mode, int Dim, int HDim>
|
||||
struct ei_transform_right_product_impl<Other,Mode, Dim,HDim, HDim,HDim>
|
||||
{
|
||||
typedef Transform<typename Other::Scalar,Dim,Mode> TransformType;
|
||||
typedef typename TransformType::MatrixType MatrixType;
|
||||
typedef Transform<typename Other::Scalar,Dim,Projective> ResultType;
|
||||
static ResultType run(const TransformType& tr, const Other& other)
|
||||
{ return ResultType(tr.matrix() * other); }
|
||||
};
|
||||
|
||||
// AffineCompact * generic matrix => Projective
|
||||
template<typename Other, int Dim, int HDim>
|
||||
struct ei_transform_right_product_impl<Other,AffineCompact, Dim,HDim, HDim,HDim>
|
||||
{
|
||||
typedef Transform<typename Other::Scalar,Dim,AffineCompact> TransformType;
|
||||
typedef Transform<typename Other::Scalar,Dim,Projective> ResultType;
|
||||
static ResultType run(const TransformType& tr, const Other& other)
|
||||
{
|
||||
ResultType res;
|
||||
res.affine().noalias() = tr.matrix() * other;
|
||||
res.makeAffine();
|
||||
EIGEN_STATIC_ASSERT(OtherRows==Dim || OtherRows==HDim, YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES);
|
||||
|
||||
typedef Block<ResultType, Dim, OtherCols> TopLeftLhs;
|
||||
typedef Block<MatrixType, Dim, OtherCols> TopLeftRhs;
|
||||
|
||||
ResultType res(other.rows(),other.cols());
|
||||
|
||||
TopLeftLhs(res, 0, 0, Dim, other.cols()) =
|
||||
( T.linear() * TopLeftRhs(other, 0, 0, Dim, other.cols()) ).colwise() +
|
||||
T.translation();
|
||||
|
||||
// we need to take .rows() because OtherRows might be Dim or HDim
|
||||
if (OtherRows==HDim)
|
||||
res.row(other.rows()) = other.row(other.rows());
|
||||
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
/**********************************************************
|
||||
*** Specializations of operator* with lhs EigenBase ***
|
||||
**********************************************************/
|
||||
|
||||
// generic HDim x HDim matrix * T => Projective
|
||||
template<typename Other,int Mode, int Dim, int HDim>
|
||||
@@ -1247,7 +1248,7 @@ struct ei_transform_left_product_impl<Other,Mode,Dim,HDim, Dim,Dim>
|
||||
static ResultType run(const Other& other, const TransformType& tr)
|
||||
{
|
||||
TransformType res;
|
||||
if(Mode!=AffineCompact)
|
||||
if(Mode!=int(AffineCompact))
|
||||
res.matrix().row(Dim) = tr.matrix().row(Dim);
|
||||
res.matrix().template topRows<Dim>().noalias()
|
||||
= other * tr.matrix().template topRows<Dim>();
|
||||
@@ -1259,52 +1260,32 @@ struct ei_transform_left_product_impl<Other,Mode,Dim,HDim, Dim,Dim>
|
||||
*** Specializations of operator* with another Transform ***
|
||||
**********************************************************/
|
||||
|
||||
template<typename Scalar, int Dim, int Mode>
|
||||
struct ei_transform_transform_product_impl<Transform<Scalar,Dim,Mode>,Transform<Scalar,Dim,Mode> >
|
||||
template<typename Scalar, int Dim, int LhsMode, int RhsMode>
|
||||
struct ei_transform_transform_product_impl<Transform<Scalar,Dim,LhsMode>,Transform<Scalar,Dim,RhsMode>,false >
|
||||
{
|
||||
typedef Transform<Scalar,Dim,Mode> TransformType;
|
||||
typedef TransformType ResultType;
|
||||
static ResultType run(const TransformType& lhs, const TransformType& rhs)
|
||||
enum { ResultMode = ei_transform_product_result<LhsMode,RhsMode>::Mode };
|
||||
typedef Transform<Scalar,Dim,LhsMode> Lhs;
|
||||
typedef Transform<Scalar,Dim,RhsMode> Rhs;
|
||||
typedef Transform<Scalar,Dim,ResultMode> ResultType;
|
||||
static ResultType run(const Lhs& lhs, const Rhs& rhs)
|
||||
{
|
||||
return ResultType(lhs.matrix() * rhs.matrix());
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Scalar, int Dim>
|
||||
struct ei_transform_transform_product_impl<Transform<Scalar,Dim,AffineCompact>,Transform<Scalar,Dim,AffineCompact> >
|
||||
{
|
||||
typedef Transform<Scalar,Dim,AffineCompact> TransformType;
|
||||
typedef TransformType ResultType;
|
||||
static ResultType run(const TransformType& lhs, const TransformType& rhs)
|
||||
{
|
||||
return ei_transform_right_product_impl<typename TransformType::MatrixType,
|
||||
AffineCompact,Dim,Dim+1>::run(lhs,rhs.matrix());
|
||||
ResultType res;
|
||||
res.linear() = lhs.linear() * rhs.linear();
|
||||
res.translation() = lhs.linear() * rhs.translation() + lhs.translation();
|
||||
res.makeAffine();
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Scalar, int Dim, int LhsMode, int RhsMode>
|
||||
struct ei_transform_transform_product_impl<Transform<Scalar,Dim,LhsMode>,Transform<Scalar,Dim,RhsMode> >
|
||||
struct ei_transform_transform_product_impl<Transform<Scalar,Dim,LhsMode>,Transform<Scalar,Dim,RhsMode>,true >
|
||||
{
|
||||
typedef Transform<Scalar,Dim,LhsMode> Lhs;
|
||||
typedef Transform<Scalar,Dim,RhsMode> Rhs;
|
||||
typedef typename ei_transform_right_product_impl<typename Rhs::MatrixType,
|
||||
LhsMode,Dim,Dim+1>::ResultType ResultType;
|
||||
typedef Transform<Scalar,Dim,Projective> ResultType;
|
||||
static ResultType run(const Lhs& lhs, const Rhs& rhs)
|
||||
{
|
||||
return ei_transform_right_product_impl<typename Rhs::MatrixType,LhsMode,Dim,Dim+1>::run(lhs,rhs.matrix());
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Scalar, int Dim>
|
||||
struct ei_transform_transform_product_impl<Transform<Scalar,Dim,AffineCompact>,
|
||||
Transform<Scalar,Dim,Affine> >
|
||||
{
|
||||
typedef Transform<Scalar,Dim,AffineCompact> Lhs;
|
||||
typedef Transform<Scalar,Dim,Affine> Rhs;
|
||||
typedef Transform<Scalar,Dim,AffineCompact> ResultType;
|
||||
static ResultType run(const Lhs& lhs, const Rhs& rhs)
|
||||
{
|
||||
return ResultType(lhs.matrix() * rhs.matrix());
|
||||
return ResultType( lhs.matrix() * rhs.matrix() );
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -53,7 +53,7 @@ public:
|
||||
/** corresponding linear transformation matrix type */
|
||||
typedef Matrix<Scalar,Dim,Dim> LinearMatrixType;
|
||||
/** corresponding affine transformation type */
|
||||
typedef Transform<Scalar,Dim> AffineTransformType;
|
||||
typedef Transform<Scalar,Dim,Affine> AffineTransformType;
|
||||
|
||||
protected:
|
||||
|
||||
@@ -98,6 +98,9 @@ public:
|
||||
const VectorType& vector() const { return m_coeffs; }
|
||||
VectorType& vector() { return m_coeffs; }
|
||||
|
||||
const VectorType& translation() const { return m_coeffs; }
|
||||
VectorType& translation() { return m_coeffs; }
|
||||
|
||||
/** Concatenates two translation */
|
||||
inline Translation operator* (const Translation& other) const
|
||||
{ return Translation(m_coeffs + other.m_coeffs); }
|
||||
@@ -128,7 +131,7 @@ public:
|
||||
return res;
|
||||
}
|
||||
|
||||
/** Concatenates a translation and an affine transformation */
|
||||
/** Concatenates a translation and a transformation */
|
||||
template<int Mode>
|
||||
inline Transform<Scalar,Dim,Mode> operator* (const Transform<Scalar,Dim,Mode>& t) const
|
||||
{
|
||||
@@ -150,6 +153,8 @@ public:
|
||||
return *this;
|
||||
}
|
||||
|
||||
static const Translation Identity() { return Translation(VectorType::Zero()); }
|
||||
|
||||
/** \returns \c *this with scalar type casted to \a NewScalarType
|
||||
*
|
||||
* Note that if \a NewScalarType is equal to the current scalar type of \c *this
|
||||
|
||||
@@ -141,7 +141,7 @@ umeyama(const MatrixBase<Derived>& src, const MatrixBase<OtherDerived>& dst, boo
|
||||
// Eq. (38)
|
||||
const MatrixType sigma = one_over_n * dst_demean * src_demean.transpose();
|
||||
|
||||
SVD<MatrixType> svd(sigma);
|
||||
JacobiSVD<MatrixType> svd(sigma, ComputeFullU | ComputeFullV);
|
||||
|
||||
// Initialize the resulting transformation with an identity matrix...
|
||||
TransformationMatrixType Rt = TransformationMatrixType::Identity(m+1,m+1);
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2009 Rohit Garg <rpg.314@gmail.com>
|
||||
// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// Eigen is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU Lesser General Public
|
||||
@@ -54,8 +54,8 @@ struct ei_cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true>
|
||||
inline static typename ei_plain_matrix_type<VectorLhs>::type
|
||||
run(const VectorLhs& lhs, const VectorRhs& rhs)
|
||||
{
|
||||
__m128 a = lhs.coeffs().packet<VectorLhs::Flags&AlignedBit ? Aligned : Unaligned>(0);
|
||||
__m128 b = rhs.coeffs().packet<VectorRhs::Flags&AlignedBit ? Aligned : Unaligned>(0);
|
||||
__m128 a = lhs.template packet<VectorLhs::Flags&AlignedBit ? Aligned : Unaligned>(0);
|
||||
__m128 b = rhs.template packet<VectorRhs::Flags&AlignedBit ? Aligned : Unaligned>(0);
|
||||
__m128 mul1=_mm_mul_ps(ei_vec4f_swizzle1(a,1,2,0,3),ei_vec4f_swizzle1(b,2,0,1,3));
|
||||
__m128 mul2=_mm_mul_ps(ei_vec4f_swizzle1(a,2,0,1,3),ei_vec4f_swizzle1(b,1,2,0,3));
|
||||
typename ei_plain_matrix_type<VectorLhs>::type res;
|
||||
@@ -64,4 +64,60 @@ struct ei_cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true>
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
template<class Derived, class OtherDerived>
|
||||
struct ei_quat_product<Architecture::SSE, Derived, OtherDerived, double, Aligned>
|
||||
{
|
||||
inline static Quaternion<double> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b)
|
||||
{
|
||||
const Packet2d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));
|
||||
|
||||
Quaternion<double> res;
|
||||
|
||||
const double* a = _a.coeffs().data();
|
||||
Packet2d b_xy = _b.coeffs().template packet<Aligned>(0);
|
||||
Packet2d b_zw = _b.coeffs().template packet<Aligned>(2);
|
||||
Packet2d a_xx = ei_pset1<Packet2d>(a[0]);
|
||||
Packet2d a_yy = ei_pset1<Packet2d>(a[1]);
|
||||
Packet2d a_zz = ei_pset1<Packet2d>(a[2]);
|
||||
Packet2d a_ww = ei_pset1<Packet2d>(a[3]);
|
||||
|
||||
// two temporaries:
|
||||
Packet2d t1, t2;
|
||||
|
||||
/*
|
||||
* t1 = ww*xy + yy*zw
|
||||
* t2 = zz*xy - xx*zw
|
||||
* res.xy = t1 +/- swap(t2)
|
||||
*/
|
||||
t1 = ei_padd(ei_pmul(a_ww, b_xy), ei_pmul(a_yy, b_zw));
|
||||
t2 = ei_psub(ei_pmul(a_zz, b_xy), ei_pmul(a_xx, b_zw));
|
||||
#ifdef __SSE3__
|
||||
EIGEN_UNUSED_VARIABLE(mask)
|
||||
ei_pstore(&res.x(), _mm_addsub_pd(t1, ei_preverse(t2)));
|
||||
#else
|
||||
ei_pstore(&res.x(), ei_padd(t1, ei_pxor(mask,ei_preverse(t2))));
|
||||
#endif
|
||||
|
||||
/*
|
||||
* t1 = ww*zw - yy*xy
|
||||
* t2 = zz*zw + xx*xy
|
||||
* res.zw = t1 -/+ swap(t2) = swap( swap(t1) +/- t2)
|
||||
*/
|
||||
t1 = ei_psub(ei_pmul(a_ww, b_zw), ei_pmul(a_yy, b_xy));
|
||||
t2 = ei_padd(ei_pmul(a_zz, b_zw), ei_pmul(a_xx, b_xy));
|
||||
#ifdef __SSE3__
|
||||
EIGEN_UNUSED_VARIABLE(mask)
|
||||
ei_pstore(&res.z(), ei_preverse(_mm_addsub_pd(ei_preverse(t1), t2)));
|
||||
#else
|
||||
ei_pstore(&res.z(), ei_psub(t1, ei_pxor(mask,ei_preverse(t2))));
|
||||
#endif
|
||||
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
#endif // EIGEN_GEOMETRY_SSE_H
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user