Compare commits

..

921 Commits

Author SHA1 Message Date
Gael Guennebaud
b9827c495e bump to 2.6.7 2015-11-05 15:56:09 +01:00
Gael Guennebaud
6056f4404c fix unit test compilation 2015-11-05 15:36:48 +01:00
Gael Guennebaud
efd484546e bug #1063: nest AutoDiffScalar by value to avoid dead references
(grafted from 971cfbb480614229b5f48b040ef9d5dd18a4ab44)
2015-11-05 13:54:26 +01:00
Gael Guennebaud
a92681e0d2 Fix IterativeSolverBase for expressions as input 2015-11-05 12:05:31 +01:00
Gael Guennebaud
47592d31ea SPQR and UmfPack need to link to cholmod. 2015-11-05 12:05:02 +01:00
Gael Guennebaud
1a9dda6bfd Backport DartConfiguration.tcl tricks to make ctest -D Experimental works on recent cmake versions 2015-11-05 10:04:23 +01:00
Gael Guennebaud
4c1a2b5614 Add overloads for real times sparse<complex> operations.
This avoids real to complex conversions, and also fixes a compilation issue with MSVC.
2015-10-29 03:55:39 -07:00
Gael Guennebaud
c308cb1b24 Backport DenseStorage::operator= implementations (fix regression with MSVC) 2015-11-04 18:41:44 +01:00
Gael Guennebaud
85e9e6e780 Fix compilation issue 2015-11-04 18:40:35 +01:00
Gael Guennebaud
c030925a66 Add support for dense.cwiseProduct(sparse)
This also fixes a regression regarding (dense*sparse).diagonal()
2015-11-04 17:42:07 +01:00
Gael Guennebaud
fd074be1a0 bug #1101: typo
(grafted from ddaaa2d381
)
2015-10-30 12:02:52 +01:00
Gael Guennebaud
e685bd7f46 Biug 1100: remove explicit CMAKE_INSTALL_PREFIX prefix to please cmake install's DESTINATION argument
(grafted from c8c8821038
)
2015-10-30 12:00:34 +01:00
Gael Guennebaud
e82f507747 Fix several shorcoming is cost computation (the Dynamic case was ignored) 2015-10-28 11:52:28 +01:00
Gael Guennebaud
1eea595550 Fix computation of CwiseUnaryOp::CoeffReadCost when the cost of the nested expression is Dynamic 2015-10-27 22:22:02 +01:00
Gael Guennebaud
d0980c7706 bug #1092: fix iterative solver ctors for expressions as input 2015-10-26 16:16:24 +01:00
Abhijit Kundu
9055400f3d Added ArpackSupport to cmake install target
(grafted from 1127ca8586
)
2015-10-16 16:41:33 -07:00
Gael Guennebaud
acb3c60295 Make the IterativeLinearSolvers module compatible with MPL2-only mode
by defaulting to COLAMDOrdering and NaturalOrdering for ILUT and ILLT respectively.
2015-10-26 15:17:52 +01:00
Gael Guennebaud
f8b88d21a6 bug #1088: fix setIdenity for non-compressed sparse-matrix 2015-10-25 22:01:58 +01:00
Sergiu Dotenco
89a222ce50 use explicit Scalar types for AngleAxis initialization 2015-08-28 22:20:15 +02:00
Hauke Heibel
960ec7aef2 Switched to MPL2 license.
(grafted from 6f5f488a80
)
2013-08-12 07:39:24 +02:00
Gael Guennebaud
e8bd2d49b3 bug #1090: fix a shortcoming in redux logic for which slice-vectorization plus unrolling might happen.
(grafted from e78bc111f1
)
2015-10-21 20:58:33 +02:00
Gael Guennebaud
f444996a7a bug #266: backport changeset 7c99b38b7c
about support for c++11 move semantic
2015-10-21 09:21:07 +02:00
Gael Guennebaud
a7c2e62a52 Backport change of operator/=(Scalar) to perform a true division 2015-10-18 22:27:42 +02:00
Gael Guennebaud
9ff967199a Improve numerical accuracy in LLT and triangular solve by using true scalar divisions (instead of x * (1/y))
(grafted from fe630c9873
)
2015-10-18 22:15:01 +02:00
Gael Guennebaud
dc0ef2cbed Fix miss use of hg resolve when backporting previous changeset 2015-10-12 16:24:19 +02:00
Gael Guennebaud
7aa90a3b0f bug #1086: replace deprecated UF_long by SuiteSparse_long 2015-10-12 16:20:12 +02:00
Gael Guennebaud
56488ddc0f bug #1080: fix some warnings (already fixed in devel branch) 2015-10-12 10:23:53 +02:00
Gael Guennebaud
165b69ca74 Added tag 3.2.6 for changeset 7abf6d02db 2015-10-01 09:06:37 +02:00
Gael Guennebaud
7abf6d02db bump to 3.2.6 2015-10-01 09:06:10 +02:00
Gael Guennebaud
73cb54835c bug #1075: fix AlignedBox::sample for runtime dimension
(grafted from 75a60d3ac0
)
2015-09-30 11:44:02 +02:00
Gael Guennebaud
cfe315476f Add PlainObjectBase copy ctor from PlainObjectBase and DenseBase objects. (manual backport from default branch, fix segfault when creating PlainObjectBase object, though such an usage is not recommended at all) 2015-09-28 15:51:00 +02:00
Gael Guennebaud
f1583e86f6 bug #1073: backport common pitfalls page 2015-09-28 14:59:54 +02:00
Gael Guennebaud
4bd69750ed Add missing unit tests for vector-wise all/any 2015-09-19 21:45:48 +02:00
Gael Guennebaud
d40e32c94e Fix bug #1067: naming conflict 2015-09-19 21:45:11 +02:00
Christoph Hertzberg
a0bf1b4242 Removed documentation of removed method (as in fab96f2ff3
)
2015-09-13 16:39:48 +02:00
Gael Guennebaud
cf645db95b MKL is now free of charge for opensource
(grafted from 5bf971e5b8
)
2015-09-07 11:23:55 +02:00
Gael Guennebaud
13135a82bd bug #1062: backport fix of SelfAdjointEigenSolver for RowMajor matrices from default branch 2015-09-04 18:26:26 +02:00
Gael Guennebaud
769cb99845 Fix sparselu unit test. 2015-09-03 13:56:02 +02:00
Thomas Capricelli
ba9add3c59 fix a conflict commited by error 2015-09-03 13:51:17 +02:00
Gael Guennebaud
ddfb72a92f bug #1053: fix SuplerLU::solve with EIGEN_DEFAULT_TO_ROW_MAJOR
(grafted from 5a1cc5d24c
)
2015-09-03 11:25:36 +02:00
Gael Guennebaud
8c7e281c9e Fix AMD ordering when a column has only one off-diagonal non-zero (also fix bug #1045) 2015-09-03 11:04:06 +02:00
Gael Guennebaud
66c092e44e bug #1057: fix a declaration missmatch with MSVC
(grafted from a75616887e
)
2015-09-02 09:31:32 +02:00
Gael Guennebaud
3ec6d38f35 bug #1059: fix predux_max<Packet4i> for NEON (this was already fixed in the default branch) 2015-09-01 16:30:18 +02:00
Gael Guennebaud
96f64441f7 bug #1055: Fix incomplete backport in changeset 0ebce69424 2015-09-01 16:11:43 +02:00
Sergiu Dotenco
5af4d77511 fixed Quaternion identity initialization for non-implicitly convertible types 2015-08-20 20:55:37 +02:00
Christoph Hertzberg
88ac8ffad5 bug #1054: Use set(EIGEN_CXX_FLAG_VERSION "/version") only for Intel compilers on Windows.
Also removed code calling `head -n1` and always use integrated REGEX functionality.
2015-08-14 15:32:15 +02:00
Christoph Hertzberg
edb0183e0c bug #1053: SparseLU failed with EIGEN_DEFAULT_TO_ROW_MAJOR 2015-08-07 23:07:29 +02:00
Gael Guennebaud
befa141699 Fix Jacobi preconditioner with zero diagonal entries
(grafted from c06ec0f464
)
2014-06-17 23:47:30 +02:00
Gael Guennebaud
5c70b43abd bug #1048: fix unused variable warning
(grafted from 41e1f3498c
)
2015-07-28 22:59:50 +02:00
Christoph Hertzberg
6a3797f46f bug #792: SparseLU::factorize failed for structurally rank deficient matrices 2015-07-26 20:39:32 +02:00
Christoph Hertzberg
c4432aad15 bug #1033: Add explicit type conversion from 0 to RealScalar 2015-07-17 13:19:55 +02:00
Christoph Hertzberg
ea0168c5a5 fix for MKL_BLAS not defined in MKL 11.2
(grafted from 4b678b96eb
)
2014-09-08 17:37:58 +08:00
Christoph Hertzberg
05fad4959a bug #1039: Redefining EIGEN_DEFAULT_DENSE_INDEX_TYPE may lead to errors 2015-07-13 16:08:02 +02:00
Gael Guennebaud
98eedb0c9a bug #1000: MSVC 2013 does need the operator= workaround 2015-06-26 14:04:24 +02:00
Gael Guennebaud
71424c4bf8 bug #1026: fix infinite loop for an empty input
(grafted from e102ddbf1f
)
2015-06-26 14:02:52 +02:00
Gael Guennebaud
e59b246b08 Backport changes in Ref/MapBase to fix MSVC 2013 confusion. 2015-06-23 16:22:46 +02:00
Gael Guennebaud
4aa7038074 Added tag 3.2.5 for changeset d9c80169e0 2015-06-16 11:53:12 +02:00
Gael Guennebaud
d9c80169e0 bump to 3.2.5 2015-06-16 11:53:07 +02:00
Gael Guennebaud
b514c943c7 Fix installation of some unsupported modules 2015-06-16 11:51:58 +02:00
Christoph Hertzberg
8ba643a903 bug #1014: More stable direct computation of eigenvalues and -vectors for 3x3 matrices 2015-05-17 21:54:32 +02:00
Gael Guennebaud
595c00157c Applied patch from Richard JW Roberts, resolving bug #704
(grafted from devel branch)
2015-06-15 22:02:57 +02:00
Gael Guennebaud
1c6b224fb3 Remove aligned-on-scalar assert and fallback to non vectorized path at runtime (first_aligned already had this runtime guard) 2015-06-14 15:04:07 +02:00
Gael Guennebaud
2361ec9c0e Fix a regression introduced in changeset 2461531e5a 2015-06-13 22:32:10 +02:00
Gael Guennebaud
fcd213a297 Fix use of unitialized buffers.
(grafted from 2f2a441a4d
)
2015-06-13 22:19:40 +02:00
Gael Guennebaud
37ed0d991a aligned-on-scalar assertion was still too aggressive: it now takes into account the sizes at runtime 2015-06-13 21:49:11 +02:00
Gael Guennebaud
62b08cf9f9 Limit aligned-on-scalar assert on Map 2015-06-12 08:59:26 +02:00
Gael Guennebaud
46f011466b Relax aligned-on-scalar assert for lvalue only 2015-06-12 08:50:15 +02:00
Gael Guennebaud
f600bdd76b Map: assert on unaligned on scalar only if the object might be vectorized 2015-06-11 22:17:56 +02:00
Gael Guennebaud
421aa4f358 typo 2015-06-09 18:34:13 +02:00
Gael Guennebaud
554356b034 bug #650: fix dense += sparse_row_major * dense 2015-06-09 18:03:38 +02:00
Gael Guennebaud
97119f854f bug #1003: assert in MapBase if the provided pointer is not aligned on scalar while it is expected to be. Also add a EIGEN_ALIGN8 macro. 2015-06-09 17:42:09 +02:00
Gael Guennebaud
51ab034f63 bug #872: remove usage of deprecated bind1st/bind2nd functions (manually backported from devel branch) 2015-06-09 11:06:39 +02:00
Gael Guennebaud
0ebce69424 Update approx. minimum ordering method to push and keep structural empty diagonal elements to the bottom-right part of the matrix 2015-03-20 16:33:48 +01:00
Gael Guennebaud
a748673bbb bug #1016: fix scalar conversion conversion 2015-06-05 16:04:51 +02:00
Gael Guennebaud
8597ee502b bug #705: fix handling of Lapack potrf return code
(grafted from 0a9b5d1396
)
2015-06-05 15:59:13 +02:00
Gael Guennebaud
ac66f1c73d Fix usage of EIGEN_NO_AUTOMATIC_RESIZING: resizing still has to be performed for a non-initialized object (was already fixed in devel branch) 2015-05-26 10:44:37 +02:00
Christoph Hertzberg
b392e6b21c Merged in mvdyck/eigen-3/3.2 (pull request PR-115)
[[DOC]] Topic Multithreading dox compile error in example code resolved as in default branch
2015-05-09 01:40:43 +02:00
Michiel Van Dyck
e88aaae5f4 Merged in mvdyck/doc-topicmultithreadingdox-resolved-comp-1431118452618 (pull request PR-1)
[[DOC]] TopicMultithreading.dox compile error in example code resolved as in default branch
2015-05-08 22:56:14 +02:00
Michiel Van Dyck
2d217a60a7 Close branch mvdyck/doc-topicmultithreadingdox-resolved-comp-1431118452618 2015-05-08 22:56:14 +02:00
Michiel Van Dyck
ef1439252c [[DOC]] TopicMultithreading.dox compile error in example code resolved as in default branch 2015-05-08 20:55:34 +00:00
Gael Guennebaud
847bb317cd bug #1013: fix 2x2 direct eigensolver for identical eiegnvalues 2015-05-07 15:55:12 +02:00
Gael Guennebaud
62d334c7d3 Fix bug #1010: m_isInitialized was improperly updated
(grafted from ebf8ca4fa8
)
2015-05-07 14:20:42 +02:00
Christoph Hertzberg
7713b29084 bug #1012: Enable alloca on Mac OS or if alloca is defined as macro 2015-05-06 13:24:48 +02:00
Christoph Hertzberg
a08df3ff34 Fix regression introduced by last merge 2015-05-06 11:03:00 +02:00
Christoph Hertzberg
5bb9459124 bug #999: clarify that behavior of empty AlignedBoxes is undefined, and further improvements in documentation 2015-04-30 19:29:47 +02:00
Christoph Hertzberg
80fd8fab87 Regression test for bug #302 2015-04-26 20:58:13 +02:00
Christoph Hertzberg
84eeabd223 Fix bug #1000: Manually inherit assignment operators for MSVC 2013 and later (as required by the standard). 2015-04-23 13:39:31 +02:00
Gael Guennebaud
058fa781d7 Fix bug #996: fix comparisons to 0 instead of Scalar(0)
(grafted from e0cff9ae0d
)
2015-04-15 14:48:53 +02:00
Christoph Hertzberg
b03209a7a6 Make conversion from 0 to Scalar explicit (issue reported by Brad Bell) 2015-04-13 17:10:52 +02:00
Christoph Hertzberg
71590d0ac7 bug #993: Passing matrix.inverse() as MatrixBase lead to infinite recursion. 2015-04-09 20:29:41 +02:00
Christoph Hertzberg
1e1b4b6678 Cygwin compatibility issues (manually backported from main branch) 2015-04-09 20:26:47 +02:00
Gael Guennebaud
2e3353634f bug #986: add support for coefficient-based product with 0 depth. 2015-04-01 13:21:47 +02:00
Gael Guennebaud
2461531e5a Fix bug #987: wrong alignement guess in diagonal product. 2015-03-31 23:36:54 +02:00
Christoph Hertzberg
a68917594b Change CMake warning to simple message for old Metis versions
(transplanted from 7bd578d11d
)
2015-03-31 00:50:04 +02:00
Christoph Hertzberg
3b93b1afb3 Addendum to last patch: k is Index and not int
(transplanted from 3238ca6abc
)
2015-03-31 00:42:14 +02:00
Christoph Hertzberg
0fb74c1f8b bug #985: RealQZ failed when either matrix had zero rows or columns (report and patch by Ben Goodrich)
Also added a regression test
(transplanted from 1efae98fee
)
2015-03-30 23:56:20 +02:00
Christoph Hertzberg
bf650a3686 bug #983: Pass Vector3 by const reference and not by value
(transplanted from 09a5361d1b
)
2015-03-28 12:36:24 +01:00
Christoph Hertzberg
8fa951e31d Optionally build the documentation when building unit tests. 2015-03-27 16:41:28 +01:00
Deanna Hood
1b64edbfd4 Make html directory before generating output image there
(transplanted from 2ab4922431
)
2015-03-18 07:24:13 +10:00
Gael Guennebaud
c74284ed81 bug #949: add static assertion for incompatible scalar types in dense end-user decompositions. 2015-03-13 21:06:20 +01:00
Gael Guennebaud
b09316fbea bug #980: fix taking a row (resp. column) of a column-major (resp. row-major) sparse matrix and add missing coeff/coeffRef members. 2015-03-13 15:13:58 +01:00
Gael Guennebaud
c5fc8e6bdc bug #969: workaround abiguous calls to Ref using enable_if. 2015-03-06 17:51:31 +01:00
Gael Guennebaud
88c844ae2f bug #824: improve accuracy of Quaternion::angularDistance using atan2 instead of acos.
(grafted from 2dc968e453
)
2015-03-04 17:03:13 +01:00
Gael Guennebaud
500c36de61 Merged in blechta/eigen/fix-cg-zero-guess (pull request PR-100)
Really use zero guess in ConjugateGradient::solve as documented
2015-03-04 11:42:25 +01:00
Gael Guennebaud
26234720bd Fix bug #972: allow coeff-based products of depth 0 and remove a useless statement in coeff-based product. 2015-02-28 15:25:39 +01:00
Gael Guennebaud
0e38796e1c Fix bug #961: eigen-doc.tgz included part of itself.
(grafted from fc5c3e85e2
)
2015-02-18 15:47:01 +01:00
Gael Guennebaud
a2d9a4806a Fix bug #714: the actual number of threads might be lower than the number of request ones. 2015-02-18 15:24:05 +01:00
Jan Blechta
a72bf09e6d Really use zero guess in ConjugateGradients::solve as documented
and expected for consistency with other methods.
2015-02-18 14:26:10 +01:00
Gael Guennebaud
bb3e5c29cc Big 957, workaround MSVC/ICC compilation issue 2015-02-18 11:24:32 +01:00
Gael Guennebaud
81b3d29b26 Fix SparseLU::signDeterminant() method, and add a SparseLU::determinant() method. 2015-02-16 19:16:21 +01:00
Gael Guennebaud
f0b1b1df9b Fix SparseLU::signDeterminant() method, and add a SparseLU::determinant() method. 2015-02-16 19:09:22 +01:00
Gael Guennebaud
e061b7a538 Add PermutationMatrix::determinant method.
(grafted from 8768ff3c31
)
2015-02-16 19:08:25 +01:00
Gael Guennebaud
8768ff3c31 Add PermutationMatrix::determinant method. 2015-02-16 19:08:25 +01:00
Gael Guennebaud
77af14fb62 bug #914: fix compiler detection on windows 2015-02-16 16:26:47 +01:00
Martin Drozdik
64b29e06b9 bug #956: Fixed bug in move constructors of DenseStorage which caused "moved-from" objects to be in an invalid state. 2015-02-16 18:18:46 +09:00
Gael Guennebaud
1c0e8bcf09 Fix unused variable warning. 2015-02-16 17:21:30 +01:00
Gael Guennebaud
69fa405096 Update circulant custom expression example 2015-02-16 17:21:16 +01:00
Gael Guennebaud
0f464d9d87 bug #897: fix regression in BiCGSTAB(mat) ctor (an all other iterative solvers).
Add respective regression unit test.
2015-02-16 17:05:10 +01:00
Gael Guennebaud
470d26d580 Remove some useless typedefs 2015-02-16 16:48:21 +01:00
Gael Guennebaud
4dded73227 bug #914: fix compiler detection on windows
(grafted from 77af14fb62
)
2015-02-16 16:26:47 +01:00
Gael Guennebaud
953d5ccfd5 Doc: explain how to free allocated memory in SparseMAtrix 2015-02-16 15:56:11 +01:00
Gael Guennebaud
98604576d1 Merged in chtz/eigen-indexconversion (pull request PR-92)
bug #877, bug #572: Get rid of Index conversion warnings, summary of changes:

- Introduce a global typedef Eigen::Index making Eigen::DenseIndex and AnyExpr<>::Index deprecated (default is std::ptrdiff_t).

 - Eigen::Index is used throughout the API to represent indices, offsets, and sizes.

 - Classes storing an array of indices uses the type StorageIndex to store them. This is a template parameter of the class. Default is int.

 - Methods that *explicitly* set or return an element of such an array take or return a StorageIndex type. In all other cases, the Index type is used.
2015-02-16 15:29:00 +01:00
Gael Guennebaud
45cbb0bbb1 The usage of DenseIndex is deprecated, so let's replace DenseIndex by Index 2015-02-16 15:05:41 +01:00
Gael Guennebaud
cc641aabb7 Remove deprecated usage of expr::Index. 2015-02-16 14:46:51 +01:00
Gael Guennebaud
aa6c516ec1 Fix many long to int conversion warnings:
- fix usage of Index (API) versus StorageIndex (when multiple indexes are stored)
 - use StorageIndex(val) when the input has already been check
 - use internal::convert_index<StorageIndex>(val) when val is potentially unsafe (directly comes from user input)
2015-02-16 13:19:05 +01:00
Christoph Hertzberg
b39413794e bug #952: Missing \endcode made doxygen fail to build ColPivHouseholderQR
(transplanted from bd511dde9d
)
2015-02-15 06:08:25 +01:00
Christoph Hertzberg
bd511dde9d bug #952: Missing \endcode made doxygen fail to build ColPivHouseholderQR 2015-02-15 06:08:25 +01:00
Benoit Steiner
e2cfddf75f Pulled latest updates from trunk 2015-02-13 16:21:59 -08:00
Benoit Steiner
0927801a84 Optimized version of the sin(), exp(), log() and sqrt() function for AVX 2015-02-13 16:07:08 -08:00
Benoit Jacob
e972b55ec4 bug #953 - Fix prefetches in 3px4 product kernel
This gives a 10% speedup on nexus 4 and on nexus 5.
2015-02-13 14:52:36 -05:00
Gael Guennebaud
fc202bab39 Index refactoring: StorageIndex must be used for storage only (and locally when it make sense). In all other cases use the global Index type. 2015-02-13 18:57:41 +01:00
Gael Guennebaud
fe51319980 Merge Index-refactoring branch with default, fix PastixSupport, remove some useless typedefs 2015-02-13 10:03:53 +01:00
Gael Guennebaud
0918c51e60 merge Tensor module within Eigen/unsupported and update gemv BLAS wrapper 2015-02-12 21:48:41 +01:00
Gael Guennebaud
409547a0c8 update EIGEN_FAST_MATH documentation 2015-02-12 21:04:31 +01:00
Benoit Steiner
4470c99975 Added a test to validate tensor casting on cuda devices 2015-02-10 14:40:18 -08:00
Benoit Steiner
6620aaa4b3 Silenced a few compilation warnings generated by nvcc 2015-02-10 14:34:42 -08:00
Benoit Steiner
f669f5656a Marked a few functions as EIGEN_DEVICE_FUNC to enable the use of tensors in cuda kernels. 2015-02-10 14:29:47 -08:00
Gael Guennebaud
029d236ceb merge 2015-02-10 23:12:47 +01:00
Gael Guennebaud
fe25f3b8e3 FMA has been wrongly disabled 2015-02-10 23:11:35 +01:00
Benoit Steiner
ceb4c9c10b Pulled latest changes from trunk 2015-02-10 14:03:17 -08:00
Benoit Steiner
cc5d7ff523 Added vectorized implementation of the exponential function for ARM/NEON 2015-02-10 14:02:38 -08:00
Gael Guennebaud
d771295554 remove useless include 2015-02-10 22:59:27 +01:00
Benoit Steiner
fefec723aa Fixed compilation error triggered when trying to vectorize a non vectorizable cuda kernel. 2015-02-10 13:16:22 -08:00
Benoit Steiner
780b2422e2 Silenced the last batch of compilation warnings triggered by gcc 4.8 2015-02-10 12:43:55 -08:00
Benoit Steiner
c21e45fbc5 Fixed a few more compilation warnings 2015-02-10 12:36:26 -08:00
Benoit Steiner
057cfd2f02 Silenced more compilation warnings 2015-02-10 12:25:02 -08:00
Benoit Steiner
114e863f08 Silcenced a few compilation warnings 2015-02-10 12:20:24 -08:00
Benoit Steiner
410895a7e4 Silenced several compilation warnings 2015-02-10 12:13:19 -08:00
Benoit Steiner
4716c2c666 Fixed compilation error 2015-02-10 12:06:19 -08:00
Benoit Steiner
91fe3a3004 Removed a debug printf statement. 2015-02-10 10:29:28 -08:00
Jan Blechta
84bba80916 Fix bug #733: step by step solving is not a good example for solveWithGuess 2015-02-10 14:24:39 +01:00
Gael Guennebaud
91953d2d37 Backport MINRES fixes to 3.2 2015-02-10 19:21:41 +01:00
Gael Guennebaud
7b35b4cacc Allows Lower|Upper as a template argument of CG and MINRES: in this case the full matrix will be considered. 2015-02-10 18:57:41 +01:00
Jan Blechta
c3f3580b8f Fix bug #733: step by step solving is not a good example for solveWithGuess 2015-02-10 14:24:39 +01:00
Gael Guennebaud
deecff97ed typo 2015-02-10 19:22:05 +01:00
Gael Guennebaud
c6e8caf090 Allows Lower|Upper as a template argument of CG and MINRES: in this case the full matrix will be considered. 2015-02-10 18:57:41 +01:00
Gael Guennebaud
d10d6a40dd bug #897: Update unsupported iterative solvers based on IterativeSolverBased. 2015-02-10 13:02:59 +01:00
Gael Guennebaud
87629cd639 bug #897: makes iterative sparse solvers use a Ref<SparseMatrix> instead of a SparseMatrix pointer. This fixes usage of iterative solvers with a Map<SparseMatrix>. 2015-02-09 11:41:25 +01:00
Gael Guennebaud
bde98df03f merge 2015-02-09 11:15:37 +01:00
Gael Guennebaud
d4ec48575e Make Block<SparseMatrix> inherit SparseCompressedBase in the case of an inner-panels and fix valuePtr() innerIndexPtr() 2015-02-09 11:14:36 +01:00
Gael Guennebaud
554aa9b31d Add failtests for Ref<SparseMatrix> 2015-02-09 10:24:07 +01:00
Gael Guennebaud
3af29caae8 Cleaning and add more unit tests for Ref<SparseMatrix> and Map<SparseMatrix> 2015-02-09 10:23:45 +01:00
Gael Guennebaud
f2ff8c091e Add a Ref<SparseMatrix> specialization. 2015-02-07 22:04:18 +01:00
Gael Guennebaud
f3be317614 Add a Map<SparseMatrix> specialization. 2015-02-07 22:03:25 +01:00
Gael Guennebaud
08081f8293 Make SparseTranspose inherit SparseCompressBase when possible 2015-02-07 22:02:14 +01:00
Gael Guennebaud
7838fda82c Add a SparseCompressedBase class providing (un)compressed accessors (like data()/*Stride() for dense matrices),
and a CompressedAccessBit flag (similar to DirectAccessBit for dense matrices).
2015-02-07 22:00:46 +01:00
Benoit Steiner
3ba6647398 Fixed the cxx11_meta test 2015-02-06 06:00:59 -08:00
Benoit Steiner
01f7918788 Pulled latest fixes 2015-02-06 05:30:20 -08:00
Gael Guennebaud
b50ffaddf2 merge 2015-02-06 14:27:12 +01:00
Gael Guennebaud
74e460b995 Fix symmetric product 2015-02-06 14:26:24 +01:00
Gael Guennebaud
c03c73c9b7 Fix clang compilation 2015-02-06 14:26:12 +01:00
Gael Guennebaud
668518aed6 Fix non initialized entries and comparison of very small numbers 2015-02-06 14:25:41 +01:00
Benoit Steiner
c739102ef9 Pulled the latest changes from the trunk 2015-02-06 05:25:03 -08:00
Benoit Steiner
2559fa9b0f Fixed compilation error in the tensor broadcasting test 2015-02-06 02:55:18 -08:00
Benoit Steiner
dcb2a8b184 Added the EIGEN_HAS_CONSTEXPR define
Gate the tensor index list code based on the value of EIGEN_HAS_CONSTEXPR
2015-02-06 02:51:59 -08:00
Filippo Basso
a8f2c6eec7 Using numext::pow instead of std::pow in poly_eval function. 2015-02-04 18:37:51 +00:00
Gael Guennebaud
b1eca55328 Use Ref<> to ensure that both x and b in Ax=b are compatible with Umfpack/SuperLU expectations 2015-02-03 23:46:05 +01:00
Gael Guennebaud
f9931a0392 SPQR: fix default threshold value 2015-02-03 22:32:34 +01:00
Gael Guennebaud
ebdf6a2dbb SPQR: fix default threshold value 2015-02-03 22:32:34 +01:00
Benoit Steiner
f64045a060 Silenced a few more compilation warnings 2015-01-30 19:52:01 -08:00
Benoit Steiner
590f4b0aa3 Silenced some compilation warnings 2015-01-30 19:46:30 -08:00
Benoit Jacob
5ef95fabee bug #936, patch 3/3: Properly detect FMA support on ARM (requires VFPv4)
and use it instead of MLA when available, because it's both more accurate,
and faster.
2015-01-30 17:45:03 -05:00
Benoit Jacob
0f21613698 bug #936, patch 2/3: Remove EIGEN_VECTORIZE_FMA, was redundant with EIGEN_HAS_SINGLE_INSTRUCTION_MADD 2015-01-30 17:44:26 -05:00
Benoit Jacob
340b8afb14 bug #936, patch 1.5/3: rename _FUSED_ macros to _SINGLE_INSTRUCTION_,
because this is what they are about. "Fused" means "no intermediate rounding
between the mul and the add, only one rounding at the end". Instead,
what we are concerned about here is whether a temporary register is needed,
i.e. whether the MUL and ADD are separate instructions.
Concretely, on ARM NEON, a single-instruction mul-add is always available: VMLA.
But a true fused mul-add is only available on VFPv4: VFMA.
2015-01-31 14:15:57 -05:00
Benoit Jacob
9f99f61e69 bug #936, patch 1/3: some cleanup and renaming for consistency. 2015-01-30 17:43:56 -05:00
Benoit Jacob
759bd92a85 bug #935: Add asm comments in GEBP kernels to work around a bug
in both GCC and Clang on ARM/NEON, whereby they spill registers,
severely harming performance. The reason why the asm comments
make a difference is that they prevent the compiler from
reordering code across these boundaries, which has the effect
of extending the lifetime of local variables and increasing
register pressure on this register-tight code.
2015-01-30 17:27:56 -05:00
Gael Guennebaud
f89ba2a58b bug #941: fix accuracy issue in ColPivHouseholderQR, do not stop decomposition on a small pivot
(grafted from f1092d2f73
)
2015-01-30 19:04:04 +01:00
Gael Guennebaud
f1092d2f73 bug #941: fix accuracy issue in ColPivHouseholderQR, do not stop decomposition on a small pivot 2015-01-30 19:04:04 +01:00
Gael Guennebaud
8296c4aaed Supernodes was disabled.
(grafted from 9d82f7e30d
)
2015-01-30 17:24:40 +01:00
Gael Guennebaud
9d82f7e30d Supernodes was disabled. 2015-01-30 17:24:40 +01:00
Benoit Steiner
e896c0ade7 Marked the contraction operation as read only, since its result can't be assigned. 2015-01-29 10:29:47 -08:00
Benoit Steiner
5a6ea4edf6 Added more tests to cover tensor reductions 2015-01-28 10:02:47 -08:00
Gael Guennebaud
b613173350 bug #933: RealSchur, do not consider the input matrix norm to check negligible sub-diag entries. This also makes this test consistent with the complex and self-adjoint cases.
(grafted from a727a2c4ed
)
2015-01-28 16:07:51 +01:00
Gael Guennebaud
a727a2c4ed bug #933: RealSchur, do not consider the input matrix norm to check negligible sub-diag entries. This also makes this test consistent with the complex and self-adjoint cases. 2015-01-28 16:07:51 +01:00
Benoit Steiner
9dfdbd7e56 mproved the performance of tensor reductions that preserve the inner most dimension(s). 2015-01-27 14:15:31 -08:00
Benoit Steiner
46fc881e4a Added a few benchmarks for the tensor code 2015-01-26 17:46:40 -08:00
Gael Guennebaud
c6eb84aabc Enable vectorization of transposeInPlace for PacketSize x PacketSize matrices 2015-01-26 17:09:01 +01:00
Gael Guennebaud
e1f1091fde Add support for dense ?= diagonal 2015-01-24 10:32:49 +01:00
Gael Guennebaud
638c6948d7 Added tag 3.2.4 for changeset e6952a51ba 2015-01-21 17:26:53 +01:00
Gael Guennebaud
e6952a51ba bump to 3.2.4 2015-01-21 17:26:41 +01:00
Gael Guennebaud
b9d314ae19 bug #329: fix typo 2015-01-17 21:55:33 +01:00
Gael Guennebaud
0039cd9cf9 bug #329: fix typo
(grafted from b9d314ae19
)
2015-01-17 21:55:33 +01:00
Benoit Steiner
14f537c296 gcc doesn't consider that
template<typename OtherDerived> TensorStridingOp& operator = (const OtherDerived& other)
provides a valid assignment operator for the striding operation, and therefore refuses to compile code like:
result.stride(foo) = source.stride(bar);

Added the explicit
   TensorStridingOp& operator = (const TensorStridingOp& other)

as a workaround to get the code to compile, and did the same in all the operations that can be used as lvalues.
2015-01-16 09:09:23 -08:00
Benoit Steiner
641e824c56 Added cube() operation 2015-01-15 11:11:48 -08:00
Benoit Steiner
b5124e7cfd Created many additional tests 2015-01-14 15:46:04 -08:00
Benoit Steiner
54e3633b43 Updated the list of include files 2015-01-14 15:43:38 -08:00
Benoit Steiner
f697df7237 Improved support for RowMajor tensors
Misc fixes and API cleanups.
2015-01-14 15:38:48 -08:00
Benoit Steiner
6559d09c60 Ensured that each thread has it's own copy of the TensorEvaluator: this avoid race conditions when the evaluator calls a non thread safe functor, eg when generating random numbers. 2015-01-14 15:34:50 -08:00
Benoit Steiner
8a382aa119 Improved the resizing of tensors 2015-01-14 15:33:11 -08:00
Benoit Steiner
703c526355 Misc improvements 2015-01-14 15:31:52 -08:00
Benoit Steiner
4cdf3fe427 Misc fixes 2015-01-14 15:30:47 -08:00
Benoit Steiner
0feff6e987 Expanded the functionality of index lists 2015-01-14 15:29:48 -08:00
Gael Guennebaud
cd679f2c47 Fix doc: setConstant does not exist for SparseMatrix. 2015-01-14 22:06:09 +01:00
Gael Guennebaud
f074d43f4b Fix doc: setConstant does not exist for SparseMatrix.
(grafted from cd679f2c47
)
2015-01-14 22:06:09 +01:00
Benoit Steiner
1ac8600126 Fixed the return type of coefficient wise operations. For example, the abs function returns a floating point value when called on a complex input. 2015-01-14 12:47:46 -08:00
Benoit Steiner
378bdfb7f0 Added missing apis to the TensorMap class 2015-01-14 12:45:20 -08:00
Benoit Steiner
0526dc1bb4 Added missing apis to the tensor class 2015-01-14 12:44:08 -08:00
Benoit Steiner
1a36590e84 Fixed the printing of RowMajor tensors 2015-01-14 12:43:20 -08:00
Benoit Steiner
7e0b6c56b4 Added ability to initialize a tensor using an initializer list 2015-01-14 12:41:30 -08:00
Benoit Steiner
b12dd1ae3c Misc improvements for fixed size tensors 2015-01-14 12:39:34 -08:00
Benoit Steiner
71676eaddd Added support for RowMajor inputs to the contraction code. 2015-01-14 12:36:57 -08:00
Benoit Steiner
0a0ab6dd15 Increased the functionality of the tensor devices 2015-01-14 11:45:17 -08:00
Benoit Steiner
5692723c58 Improved the performance of the contraction code on CUDA 2015-01-14 11:42:52 -08:00
Benoit Steiner
8f4b8d204b Improved the performance of tensor reductions
Added the ability to generate random numbers following a normal distribution
Created a test to validate the ability to generate random numbers.
2015-01-14 10:19:33 -08:00
Benoit Steiner
3bd2b41b2e Created a test for tensor type casting 2015-01-14 10:17:02 -08:00
Benoit Steiner
4928ea1212 Added ability to reverse the order of the coefficients in a tensor 2015-01-14 10:15:58 -08:00
Benoit Steiner
b00fe1590d Added ability to swap the layout of a tensor 2015-01-14 10:14:46 -08:00
Benoit Steiner
c94174b4fe Improved tensor references 2015-01-14 10:13:08 -08:00
Benoit Steiner
91dd53e54d Created some documentation 2015-01-13 16:07:51 -08:00
Gael Guennebaud
279786e987 Fix missing evaluator in outer-product 2015-01-13 10:25:50 +01:00
Gael Guennebaud
699c80e404 bug #927: backport some unit tests for Rotation2D 2015-01-13 10:11:44 +01:00
Gael Guennebaud
ae4644cc68 bug #907, ARM64: workaround ICE in xcode/clang 2015-01-13 10:03:00 +01:00
Gael Guennebaud
36f7c1337f bug #907, ARM64: workaround vreinterpretq_u64_* not defined in xcode/clang 2015-01-13 09:57:37 +01:00
Gael Guennebaud
5023afc0af Fix NEON compilation: use EIGEN_ARM_PREFETCH instead of __pld 2015-01-13 09:25:24 +01:00
Gael Guennebaud
63974bcb88 Big 907: workaround some missing intrinsics in current NDK's gcc version (ARM64) 2015-01-07 09:44:25 +01:00
Gael Guennebaud
79f4a59ed9 bug #907: fix compilation with ARM64 2015-01-07 09:41:56 +01:00
Benoit Steiner
9f98650d0a Ensured that contractions that can be reduced to a matrix vector product work correctly even when the input coefficients aren't aligned. 2015-01-06 09:29:13 -08:00
Gael Guennebaud
8638dbb809 Fix bug #925: typo in MatLab versions of middleRows
(grafted from db5b0741b5
)
2015-01-04 21:39:50 +01:00
Gael Guennebaud
db5b0741b5 Fix bug #925: typo in MatLab versions of middleRows 2015-01-04 21:39:50 +01:00
Gael Guennebaud
8efa5bb439 bug #921: fix utilization of bitwise operation on enums in first_aligned
(grafted from f5f6e2c6f4
)
2014-12-19 14:41:59 +01:00
Gael Guennebaud
f5f6e2c6f4 bug #921: fix utilization of bitwise operation on enums in first_aligned 2014-12-19 14:41:59 +01:00
Gael Guennebaud
a5a3a994c8 bug #920: fix MSVC 2015 compilation issues 2014-12-18 22:58:15 +01:00
Gael Guennebaud
25c7d9164f bug #920: fix MSVC 2015 compilation issues 2014-12-18 22:58:15 +01:00
Gael Guennebaud
ba44761435 bug #920: fix compilation issue with MSVC 2015 2014-12-18 22:47:48 +01:00
Gael Guennebaud
1a96594607 rm explicit keyword introduced by backporting another change 2014-12-18 14:53:40 +01:00
Gael Guennebaud
61db9a0e89 Added tag 3.2.3 for changeset bc129ad79c 2014-12-16 18:31:04 +01:00
Gael Guennebaud
bc129ad79c bump to 3.2.3 2014-12-16 18:30:52 +01:00
Gael Guennebaud
f5328be65a SparseQR is really for rows>=columns, so let's only check such cases 2014-12-16 18:23:13 +01:00
Gael Guennebaud
735f1fda39 Fix false negatives in geo_transformations unit tests 2014-12-16 16:50:30 +01:00
Gael Guennebaud
57ab550a17 Fix wrong negative in nullary unit test when extended precision is used (FPU). 2014-12-16 16:23:47 +01:00
Gael Guennebaud
e887c61b3d bug #821: workaround MSVC 2013 issue with using Base::Base::operator= 2014-12-16 13:33:43 +01:00
Gael Guennebaud
b8d9eaa19b Use true compile time "if" for Transform::makeAffine 2014-12-13 22:16:39 +01:00
Gael Guennebaud
f806c23012 Fix false negatives in geo_transformations unit tests 2014-12-16 16:50:30 +01:00
Gael Guennebaud
99501a2c4c Fix wrong negative in nullary unit test when extended precision is used (FPU). 2014-12-16 16:23:47 +01:00
Gael Guennebaud
7dad5f797e bug #821: workaround MSVC 2013 issue with using Base::Base::operator= 2014-12-16 13:33:43 +01:00
Christoph Hertzberg
dcad508986 At least CMAKE 2.8.4 is required for WORKING_DIRECTORY option in add_test 2014-12-15 12:45:29 +01:00
Gael Guennebaud
26977e281e Use true compile time "if" for Transform::makeAffine 2014-12-13 22:16:39 +01:00
Gael Guennebaud
1e109e1757 fix signed to unsigned convertion warning 2014-12-13 21:48:48 +01:00
Christoph Hertzberg
e469ac55c3 BVH appears to compile well with clang (re-enabled unit test) 2014-12-12 17:36:22 +01:00
Christoph Hertzberg
874f345562 Removed unused typedef 2014-12-12 12:03:50 +01:00
Christoph Hertzberg
608733415a Free functions should only be declared as static in separate compilation units
(grafted from d85abc89c5
)
2014-12-12 12:01:03 +01:00
Gael Guennebaud
57ec399ec9 Remove unused fortran files 2014-12-13 21:41:25 +01:00
Christoph Hertzberg
d85abc89c5 Free functions should only be declared as static in separate compilation units 2014-12-12 12:01:03 +01:00
Christoph Hertzberg
309620ee1f Make absolutely sure that tau is initialized (this change suppresses a gcc warning) 2014-12-12 11:53:24 +01:00
Gael Guennebaud
56ca44ad1a Use f2c generated code instead of the original fortran code, except for dotc/dotu. 2014-12-11 17:03:41 +01:00
Christoph Hertzberg
e8cdbedefb bug #877, bug #572: Introduce a global Index typedef. Rename Sparse*::Index to StorageIndex, make Dense*::StorageIndex an alias to DenseIndex. Overall this commit gets rid of all Index conversion warnings. 2014-12-04 22:48:53 +01:00
Gael Guennebaud
6ccf97f3e6 Fix GL support wrt evaluators 2014-12-04 22:05:28 +01:00
Gael Guennebaud
433bce5c3a UmfPack support: fix redundant evaluation/copies when calling compute() and support generic expressions as input 2014-12-02 17:30:57 +01:00
Gael Guennebaud
775f7e5fbb bug #697: make sure empty classes are at the end in case of multiple inheritence 2014-12-02 14:40:19 +01:00
Gael Guennebaud
a819fa148d Fix MSVC compilation issue 2014-12-02 14:35:31 +01:00
Gael Guennebaud
1a8dc85142 bug #897: fix UmfPack usage with mapped sparse matrices 2014-12-02 13:57:13 +01:00
Gael Guennebaud
4974d1d2b4 Fix bug #911: m_extractedDataAreDirty was not initialized in UmfPackLU 2014-12-02 13:54:06 +01:00
Gael Guennebaud
e2f3e4e4aa Document non-const SparseMatrix::diagonal() method. 2014-12-01 14:45:15 +01:00
Gael Guennebaud
b26e697182 Make SparseMatrix::coeff() returns a const reference and add a non const version of SparseMatrix::diagonal() 2014-12-01 14:41:39 +01:00
Gael Guennebaud
b1f9f603a0 Simplify return type of diagonal(Index) (and ease compiler job) 2014-11-28 14:39:47 +01:00
Gael Guennebaud
5384e89147 Disable MatrixBase::bdcSvd with CUDA (just like MatrixBase::jacobiSvd 2014-11-26 22:29:29 +01:00
Gael Guennebaud
8518ba0bbc Fix Hyperplane::Through(a,b,c) when points are aligned or identical. We use the stratgey as in Quaternion::setFromTwoVectors. 2014-11-26 15:01:53 +01:00
Tim Murray
80cae358b0 Adds a modified f2c-generated C implmentation for BLAS.
This adds an optional implementation for the BLAS library that does
not require the use of a FORTRAN compiler. It can be enabled with
EIGEN_USE_F2C_BLAS.

The C implementation uses the standard gfortran calling convention
and does not require the use of -ff2c when compiled with gfortran.
2014-11-24 10:56:30 -08:00
Gael Guennebaud
0efaff9b3b Fix out-of-bounds write 2014-12-11 16:15:20 +01:00
Gael Guennebaud
41a20994cc In simplicial cholesky: avoid deep copy of the input matrix is this later can be used readily 2014-12-08 17:56:33 +01:00
Gael Guennebaud
a910a7466e Fix inner iterator type 2014-12-08 17:55:31 +01:00
Gael Guennebaud
4371911861 Remove useless and non standard numext::atanh2 function. 2014-12-08 16:44:34 +01:00
Gael Guennebaud
5fc4ce6449 bug #876: remove usage of atanh2 in matrix power 2014-12-08 16:44:05 +01:00
Gael Guennebaud
77294047d6 bug #876, matrix_log_compute_2x2: directly use logp1 instead of atanh2 2014-12-08 16:28:06 +01:00
Gael Guennebaud
bea36925db bug #876: implement a portable log1p function 2014-12-08 16:26:53 +01:00
Gael Guennebaud
7f7a712062 Optimize Simplicial Cholesky when NaturalOrdering is used. 2014-12-08 15:02:25 +01:00
Gael Guennebaud
30c849669d Fix dynamic allocation in JacobiSVD (regression) 2014-12-08 14:45:04 +01:00
Gael Guennebaud
e0a8615b94 Merged in infinitei/eigen (pull request PR-91)
Added cmake uninstall target
2014-12-05 15:04:19 +01:00
Gael Guennebaud
8efd9142b3 Merged in infinitei/eigen-opengl-fixes (pull request PR-90)
Adding missing OPENGL_LIBRARIES for openglsupport test.
2014-12-05 12:54:57 +01:00
Gael Guennebaud
80ed5bd90c Workaround various "returning reference to temporary" warnings. 2014-12-05 12:49:30 +01:00
Abhijit Kundu
eb3695d2fc Added cmake uninstall target.
This adds a cmake command make uninstall
Running make uninstall removes the files installed by running make install
2014-12-04 02:57:03 -05:00
Abhijit Kundu
48db34a7b9 Adding missing OPENGL_LIBRARIES for openglsupport test. Also adding OpenGL include directories as a better pratice even though these are system include directories in most systems. 2014-12-04 01:18:47 -05:00
Gael Guennebaud
da584912b6 Fix memory pre-allocation when permuting inner vectors of a sparse matrix. 2014-11-24 17:31:59 +01:00
Benoit Steiner
509e4ddc02 Added reduction packet primitives for CUDA 2014-11-19 10:34:11 -08:00
Benoit Steiner
b33cf92878 Fixed the evaluation of expressions involving tensors of 2 or 3 elements on CUDA devices. 2014-11-18 14:32:41 -08:00
Benoit Steiner
1d3c8306f8 Fixed compilation errors with clang.
H: Enter commit message.  Lines beginning with 'HG:' are removed.
2014-11-13 19:13:17 -08:00
Benoit Steiner
ec785b0180 Added support for extraction of patches from images 2014-11-13 09:28:54 -08:00
Benoit Steiner
eeabf7975e Optimized broadcasting 2014-11-12 22:35:44 -08:00
Benoit Steiner
c2d1074932 Added support for static list of indices 2014-11-12 22:25:38 -08:00
Gael Guennebaud
722916e19d bug #903: clean swap API regarding extra enable_if parameters, and add failtests for swap 2014-11-06 09:25:26 +01:00
Benoit Steiner
cb37f818ca Fixed a compilation error triggered by some operations on fixed sized tensors 2014-11-05 23:25:11 -08:00
Benoit Steiner
9a06a71627 Fixed a test 2014-11-05 07:49:51 -08:00
Gael Guennebaud
4577bafb91 Big 853: replace enable_if in Ref<> ctor by static assertions and add failtests for Ref<> 2014-11-05 16:15:17 +01:00
Christoph Hertzberg
739ed32222 Disable yet another Eigen2 deprecated warning 2014-12-11 16:49:07 +01:00
Christoph Hertzberg
58f0647f96 Disable another Eigen2 deprecated warning 2014-12-11 16:17:29 +01:00
Gael Guennebaud
d0c3fcd382 Fix out-of-bounds write 2014-12-11 16:12:15 +01:00
Gael Guennebaud
19e16fe15f Workaround warning when EIGEN_STACK_ALLOCATION_LIMIT==0 2014-12-11 14:38:35 +01:00
Gael Guennebaud
8f87be9e03 Remove unused typedefs and variables 2014-12-11 14:35:22 +01:00
Gael Guennebaud
58725ff08c Remove unused variables in eigen2support. 2014-12-11 14:26:19 +01:00
Gael Guennebaud
15bff016d1 Define EIGEN_NO_EIGEN2_DEPRECATED_WARNING in eigen2support unit tests 2014-12-11 14:25:38 +01:00
Gael Guennebaud
c6fefe5d8e Big 853: replace enable_if in Ref<> ctor by static assertions and add failtests for Ref<> 2014-11-05 16:15:17 +01:00
Gael Guennebaud
ee06f78679 Introduce unified macros to identify compiler, OS, and architecture. They are all defined in util/Macros.h and prefixed with EIGEN_COMP_, EIGEN_OS_, and EIGEN_ARCH_ respectively. 2014-11-04 21:58:52 +01:00
Benoit Steiner
9ea09179b5 Fixed the return type of the coefficient-wise tensor operations. 2014-11-04 10:24:42 -08:00
Benoit Steiner
b1789c112b Improved handling of 1d tensors 2014-11-03 08:51:33 -08:00
Benoit Steiner
2dde63499c Generalized the matrix vector product code. 2014-10-31 16:33:51 -07:00
Benoit Steiner
7f2c6ed2fa Fixed a compilation warning 2014-10-31 11:45:21 -07:00
Christoph Hertzberg
c5a3777666 Regression test for (invalid) bug #900. We should make it possible somehow to increase the problem size depending on the available RAM. 2014-10-31 17:19:05 +01:00
Christoph Hertzberg
0833b82efd Run sparse_basic unit tests also for rectangular matrices.
TriangularView with UnitDiag does not work properly yet (bug #901)
2014-10-31 17:12:13 +01:00
Benoit Steiner
85c3389b28 Fixed a test 2014-10-31 00:04:13 -07:00
Benoit Steiner
67fcf47ecb Merged from trunk 2014-10-30 21:59:22 -07:00
Benoit Steiner
fcecafde3a Fixed a compilation error with clang 2014-10-30 21:58:14 -07:00
Benoit Steiner
d62bfe73a9 Use the proper index type in the padding code 2014-10-30 18:15:05 -07:00
Benoit Steiner
bc99c5f7db fixed some potential alignment issues. 2014-10-30 18:09:53 -07:00
Benoit Steiner
1946cc4478 Added missing packet primitives for CUDA. 2014-10-30 17:52:32 -07:00
Benoit Steiner
5e62427e22 Use the proper index type 2014-10-30 17:49:39 -07:00
Christoph Hertzberg
4ec2f07a5b Fixed bug in SparseBlock which caused a segfault in sparse_extra_3 test 2014-10-30 21:34:10 +01:00
Christoph Hertzberg
883168ed94 Make select CUDA compatible (comparison operators aren't yet, so no test case yet) 2014-10-30 20:16:16 +01:00
Christoph Hertzberg
e5f134006b EIGEN_UNUSED_VARIABLE works better than casting to void. Make this also usable from CUDA code 2014-10-30 19:59:09 +01:00
Christoph Hertzberg
d2fc597d5b Removed deprecated header (unsupported/Eigen/BDCSVD is included in Eigen/SVD now) 2014-10-29 17:51:14 +01:00
Christoph Hertzberg
3d25b1f5b8 Split up some test cases 2014-10-29 17:46:54 +01:00
Christoph Hertzberg
acecb7b09f Fixed include in bdcsvd.cpp 2014-10-29 17:46:33 +01:00
Gael Guennebaud
21c0a2ce0c Move D&C SVD to official SVD module. 2014-10-29 11:29:33 +01:00
Benoit Steiner
debc97821c Added support for tensor references 2014-10-28 23:10:13 -07:00
Christoph Hertzberg
e2e7ba9f85 bug #898: add inline hint to const_cast_ptr 2014-10-28 14:49:44 +01:00
Christoph Hertzberg
bd2d330b25 Temporary workaround for bug #875:
Let TriangularView<Sparse>::nonZeros() return nonZeros() of the nested expression
2014-10-28 13:31:00 +01:00
Konstantinos Margaritis
79225db0b6 Merged in kmargar/eigen (pull request PR-87)
Extend NEON to add ARMv8 64-bit double support
2014-10-28 13:08:53 +02:00
Benjamin Chrétien
c426054767 BDCSVD: fix CMake install (missing separator). 2014-10-24 15:10:56 +02:00
Christoph Hertzberg
1fa793cb97 Removed weird self assignment. 2014-10-24 13:19:19 +02:00
Christoph Hertzberg
04ffb9956e Replace TEST_SET_BUT_UNUSED_VARIABLE by already defined EIGEN_UNUSED_VARIABLE 2014-10-24 13:18:23 +02:00
Konstantinos Margaritis
94ed7c81e6 Bug #896: Swap order of checking __VSX__/__ALTIVEC__ 2014-10-22 06:15:18 -04:00
Konstantinos Margaritis
fcb3573d17 Merged eigen/eigen into default 2014-10-22 10:42:18 +03:00
Konstantinos Margaritis
fae4fd7a26 Added ARMv8 support 2014-10-22 07:39:49 +00:00
Christoph Hertzberg
cf09c5f687 Prevent CUDA calling a __host__ function from a __host__ __device__ function is not allowed error. 2014-10-21 20:40:09 +02:00
Konstantinos Margaritis
b508619392 working 64-bit support in PacketMath.h, Complex.h needed 2014-10-21 18:10:33 +00:00
Konstantinos Margaritis
0f65f2762d add EIGEN_TEST_NEON64, but it's a dummy, AArch64 implies NEON support so extra CXXFLAGS are needed 2014-10-21 18:10:01 +00:00
Konstantinos Margaritis
87524922dc check for __ARM_NEON instead as it's defined in arm64 as well 2014-10-21 18:08:50 +00:00
Gael Guennebaud
a303b6a733 bug #670: add unit test for mapped input in sparse solver. 2014-10-20 16:46:47 +02:00
Gael Guennebaud
fe57b2f963 bug #701: workaround (min) and (max) blocking ADL by introducing numext::mini and numext::maxi internal functions and a EIGEN_NOT_A_MACRO macro. 2014-10-20 15:55:32 +02:00
Christoph Hertzberg
c12b7896d0 bug #766: Check minimum CUDA version 2014-10-20 14:23:11 +02:00
Gael Guennebaud
973e6a035f bug #718: Introduce a compilation error when using the wrong InnerIterator type with a SparseVector 2014-10-20 14:07:08 +02:00
Christoph Hertzberg
84aaa03182 Addendum to bug #859: pexp(NaN) for double did not return NaN, also, plog(NaN) did not return NaN.
psqrt(NaN) and psqrt(-1) shall return NaN if EIGEN_FAST_MATH==0
2014-10-20 13:13:43 +02:00
Gael Guennebaud
aa5f79206f Fix bug #859: pexp(NaN) returned Inf instead of NaN 2014-10-20 11:38:51 +02:00
Gael Guennebaud
b4a9b3f496 Add unit tests for Rotation2D's inverse(), operator*, slerp, and fix regression wrt explicit ctor change 2014-10-20 11:04:32 +02:00
Gael Guennebaud
d04f23260d Fix bug #894: the sign of LDLT was not re-initialized at each call of compute() 2014-10-20 10:48:40 +02:00
Gael Guennebaud
8838b0a1ff Fix SparseQR::rank for a completely empty matrix. 2014-10-19 22:42:20 +02:00
Benoit Steiner
f786897e4b Added access to the unerlying raw data of a tnsor slice/chip whenever possible 2014-10-17 15:33:27 -07:00
Benoit Steiner
7acd38d19e Created some benchmarks for the tensor code 2014-10-17 09:49:03 -07:00
Gael Guennebaud
b50e5bc816 merge 2014-10-17 16:53:18 +02:00
Gael Guennebaud
a370b1f2e2 Fix SparseLU::absDeterminant and add respective unit test 2014-10-17 16:52:56 +02:00
Gael Guennebaud
a13bc22204 Ignore automalically imported lapack source files 2014-10-17 15:34:39 +02:00
Gael Guennebaud
4b7c3abbea Fix D&C SVD wrt zero matrices 2014-10-17 15:32:55 +02:00
Gael Guennebaud
feacfa5f83 Fix JacobiSVD wrt undeR/overflow by doing scaling prior to QR preconditioning 2014-10-17 15:32:06 +02:00
Gael Guennebaud
8472e697ca Add lapack interface to JacobiSVD and BDCSVD 2014-10-17 15:31:11 +02:00
Benoit Steiner
65af852b54 Silenced one last warning 2014-10-16 15:02:30 -07:00
Benoit Steiner
ae697b471c Silenced a few compilation warnings
Generalized a TensorMap constructor
2014-10-16 14:52:50 -07:00
Benoit Steiner
94e47798f4 Fixed the return types of unary and binary expressions to properly handle the case where it is different from the input type (e.g. abs(complex<float>)) 2014-10-16 10:41:07 -07:00
Benoit Steiner
d853adffdb Avoid calling get_future() more than once on a given promise. 2014-10-16 10:10:04 -07:00
Mark Borgerding
880e72c130 quieted more g++ warnings of the form: warning: typedef XXX locally defined but not used [-Wunused-local-typedefs] 2014-10-16 09:19:32 -04:00
Benoit Steiner
bfdd9f3ac9 Made the blocking computation aware of the l3 cache
Also optimized the blocking parameters to take into account the number of threads used for a computation
2014-10-15 15:32:59 -07:00
Gael Guennebaud
c566cfe2ba Make SVD unit test even more tough 2014-10-15 23:37:47 +02:00
Benoit Steiner
dba55041ab Added support for promises
Started to improve multithreaded contractions
2014-10-15 11:20:36 -07:00
Gael Guennebaud
fd1aaf4772 merge 2014-10-15 16:33:14 +02:00
Gael Guennebaud
c806009453 Extend svd unit tests to stress problems with duplicated singular values. 2014-10-15 16:32:16 +02:00
Gael Guennebaud
2cc41dbe83 D&C SVD: fix some numerical issues by truly skipping deflated singular values when computing them 2014-10-15 15:21:12 +02:00
Gael Guennebaud
c26e8a1af3 D&C SVD: fix deflation of repeated singular values, fix sorting of singular values, fix case of complete deflation 2014-10-15 11:59:21 +02:00
Christoph Hertzberg
0ec1fc9e11 bug #891: Determine sizeof(void*) via CMAKE variable instead of test program 2014-10-14 14:14:25 +02:00
Benoit Steiner
99d75235a9 Misc improvements and cleanups 2014-10-13 17:02:09 -07:00
Benoit Steiner
4c70b0a762 Added support for patch extraction 2014-10-13 10:04:04 -07:00
Christoph Hertzberg
d3f52debc6 Make cuda_basic test compile again by adding lots of EIGEN_DEVICE_FUNC.
Although the test passes now, there might still be some missing.
2014-10-13 17:18:26 +02:00
Benoit Steiner
0219f8aed4 Added ability to print a tensor using an iostream. 2014-10-10 16:17:26 -07:00
Benoit Steiner
2ed1838aeb Added support for tensor chips 2014-10-10 16:11:27 -07:00
Benoit Steiner
4b36c3591f Fixed the tensor shuffling test 2014-10-10 15:43:21 -07:00
Benoit Steiner
a991f94c0e Fixed the thread pool test 2014-10-10 15:20:37 -07:00
Benoit Steiner
498b7eed25 Rewrote the TensorBase::random method to support the generation of random number on gpu. 2014-10-09 15:39:13 -07:00
Benoit Steiner
767424af18 Improved the functors defined for standard reductions
Added a functor to encapsulate the generation of random numbers on cpu and gpu.
2014-10-09 15:36:23 -07:00
Gael Guennebaud
a80e17cfe8 Remove unused and dangerous CompressedStorage::Map function 2014-10-09 23:42:33 +02:00
Gael Guennebaud
349c2c9235 bug #367: fix double copies in atWithInsertion, and add respective unit-test 2014-10-09 23:35:49 +02:00
Gael Guennebaud
48d537f59f Fix indentation 2014-10-09 23:35:26 +02:00
Gael Guennebaud
538c059aa4 bug #887: fix CompressedStorage::reallocate wrt memory leaks 2014-10-09 23:35:05 +02:00
Gael Guennebaud
a48b82eece Add a scoped_array helper class to handle locally allocated/used arrays 2014-10-09 23:34:05 +02:00
Gael Guennebaud
ccd70ba123 Various numerical fixes in D&C SVD: I cannot make it fail with double, but still need to tune for single precision, and carefully test with duplicated singular values 2014-10-09 23:29:01 +02:00
Benoit Steiner
44beee9d68 Removed dead code 2014-10-08 14:14:20 -07:00
Benoit Steiner
0a07ac574e Added support for the *= and /* operators to TensorBase 2014-10-08 13:32:41 -07:00
Benoit Steiner
6c047d398d Fixed a comment 2014-10-08 13:29:36 -07:00
Gael Guennebaud
4b886e6b39 bug #889: fix protected typedef 2014-10-08 07:48:30 +02:00
Gael Guennebaud
5741349294 bug #882: fix various const-correctness issues with *View classes. 2014-10-07 18:29:28 +02:00
Gael Guennebaud
118b1113d9 Workaround MSVC issue. 2014-10-07 09:53:39 +02:00
Gael Guennebaud
503c176d8e Fix missing outer() member in DynamicSparseMatrix 2014-10-07 09:53:27 +02:00
Gael Guennebaud
dbdd8b0883 D&C SVD: add scaling to avoid overflow, fix handling of fixed size matrices 2014-10-06 19:35:57 +02:00
Gael Guennebaud
d44d432baa Re-enable products with triangular views of sparse matrices: we simply have to treat them as a sparse matrix. 2014-10-06 16:11:26 +02:00
Gael Guennebaud
893bfcf95f bug #887: use ei_declare_aligned_stack_constructed_variable instead of manual new[]/delete[] pairs in AMD and Paralellizer 2014-10-06 11:54:30 +02:00
Gael Guennebaud
fb53ff1eda Fix SparseLU regarding uncompressed inputs and avoid manual new/delete calls. 2014-10-06 11:42:31 +02:00
Gael Guennebaud
7a17639953 Extend unit tests to check uncompressed sparse inputs in sparse solvers 2014-10-06 11:41:50 +02:00
Benoit Steiner
bbce6fa65d define EIGEN_VECTORIZE_CUDA when compiling with nvcc 2014-10-03 19:55:35 -07:00
Benoit Steiner
95a430a2ca Vector primitives for CUDA 2014-10-03 19:45:19 -07:00
Benoit Steiner
152f3218ac Improved contraction test 2014-10-03 19:33:44 -07:00
Benoit Steiner
af2e5995e2 Improved support for CUDA devices.
Improved contractions on GPU
2014-10-03 19:18:07 -07:00
Benoit Steiner
1269392822 Created the IndexPair type to store pair of tensor indices. CUDA doesn't support std::pair so we can't use them when targeting GPUs.
Improved the performance on tensor contractions
2014-10-03 10:16:59 -07:00
Benoit Steiner
b7271dffb5 Generalized the gebp apis 2014-10-02 16:51:57 -07:00
Benoit Steiner
8b2afe33a1 Fixes for the forced evaluation of tensor expressions
More tests
2014-10-02 10:39:36 -07:00
Benoit Steiner
5cc23199be More tests to validate the const-correctness of the tensor code. 2014-10-02 10:30:44 -07:00
Benoit Steiner
7caaf6453b Added support for tensor reductions and concatenations 2014-10-01 20:38:22 -07:00
Benoit Steiner
1c236f4c9a Added tests for tensors of const values and tensors of stringswwq:: 2014-10-01 20:21:42 -07:00
Christoph Hertzberg
1fa6fe2abd template keyword not allowed before non-template function call 2014-10-01 14:33:55 +02:00
Konstantinos Margaritis
9d3c69952b fixed to make big-endian VSX work as well 2014-10-01 09:43:56 +00:00
Gael Guennebaud
5180bb5e47 Add missing default ctor in Rotation2D 2014-09-30 16:59:28 +02:00
Christoph Hertzberg
0187504912 Avoid `unneeded-internal-declaration' warning 2014-09-30 16:43:52 +02:00
Christoph Hertzberg
6d26deb894 Missing outerStride in AlignedVector3 resulted in infinite recursion 2014-09-30 16:43:19 +02:00
Christoph Hertzberg
81517eebc1 Missing explicit 2014-09-30 16:42:04 +02:00
Christoph Hertzberg
12d59465cb bug #884: Copy constructor of Ref shall never malloc, constructing from other RefBase shall only malloc if the memory layout is incompatible. 2014-09-30 14:57:54 +02:00
Christoph Hertzberg
e404841235 make sure that regex does not match cmake 2014-09-29 19:28:10 +00:00
Christoph Hertzberg
15c946338f Related to bug #880: Accept make as well a gmake when searching the MakeCommand. And don't include \n in match expression 2014-09-29 19:20:01 +02:00
Gael Guennebaud
56a0bbbbee Fix compilation with GCC 2014-09-29 18:28:18 +02:00
Gael Guennebaud
842e31cf5c Let KroneckerProduct exploits the recently introduced generic InnerIterator class. 2014-09-29 13:37:49 +02:00
Gael Guennebaud
abd3502e9e Introduce a generic InnerIterator classes compatible with evaluators. 2014-09-29 13:36:57 +02:00
Gael Guennebaud
76c3cf6949 Re-enable -Wshorten-64-to-32 compilation flag. 2014-09-29 10:33:16 +02:00
Georg Drenkhahn
bc34ee3365 Using Index type instead of hard coded int type to prevent potential implicit integer conversion. 2014-09-22 18:56:36 +02:00
Georg Drenkhahn
9a04cd307c Added implicit integer conversion by using explicit integer type conversion. Adding assert to catch overflow. 2014-09-22 18:47:33 +02:00
Gael Guennebaud
f0a62c90bc Avoid comparisons between different index types. 2014-09-29 10:27:51 +02:00
Georg Drenkhahn
2946992ad4 Using StorageIndexType for loop assigning initial permutation. Adding assert for index overflow. 2014-09-22 17:59:02 +02:00
Georg Drenkhahn
821ff0ecfb Using Index instead of hard coded int type to prevent potential implicit integer conversion 2014-09-22 16:12:35 +02:00
Georg Drenkhahn
2c4cace56c Using Index instead of hard coded int type to prevent potential implicit integer conversion 2014-09-22 15:54:34 +02:00
Georg Drenkhahn
8a502233d8 Correcting the ReturnType in traits<KroneckerProduct<>> to include the correct Index type.
Fixed mixup of types Rhs::Index and Lhs:Index in various loop variables.
Added explicit type conversion for arithmetic expressions which may return a wider type.
2014-09-21 23:19:29 +02:00
Georg Drenkhahn
b2755edcdd Replaced hard coded int types with Index types preventing implicit integer conversions. 2014-09-21 23:15:35 +02:00
Georg Drenkhahn
d1ef3c3546 Changed Diagonal::index() to return an Index type instead of int to prevent possible implicit conversion from long to int.
Added inline keyword to member methods.
2014-09-21 10:21:20 +02:00
Georg Drenkhahn
edaefeb978 Using Kernel::Index type instead of int to prevent possible implicit conversion from long to int. 2014-09-21 10:01:12 +02:00
Georg Drenkhahn
3bd31e21b5 Fixed compiler warning on implicit integer conversion by separating index type for matrix and permutation matrix which may not be equal. 2014-09-20 15:00:36 +02:00
Georg Drenkhahn
75e269c77b Fixed warning on implicit integer conversion in test case code by using type VectorXd::Index instead of int. 2014-09-20 14:57:42 +02:00
Gael Guennebaud
74cde0c925 Add missing return derived() in ArrayBase::operator= 2014-09-28 09:16:13 +02:00
Jitse Niesen
ce2035af86 New doc page on implementing a new expression class. 2014-09-27 23:25:58 +01:00
Konstantinos Margaritis
6d0f0b8cec add VSX identifier 2014-09-25 16:06:16 +00:00
Christoph Hertzberg
4ba8aa1482 Fix bug #884: No malloc for zero-sized matrices or for Ref without temporaries 2014-09-25 16:05:17 +02:00
Christoph Hertzberg
27d6b4daf9 Tridiagonalization::diagonal() and ::subDiagonal() did not work. Added unit-test 2014-09-24 14:37:13 +02:00
Gael Guennebaud
446001ef51 Fix nested_eval<Product<> > which wrongly returned a Product<> expression 2014-09-24 09:39:09 +02:00
Gael Guennebaud
13cbc751c9 bug #880: automatically preserves buildtool flags when modifying DartConfiguration.tcl file. 2014-09-23 22:10:32 +02:00
Christoph Hertzberg
421feea3b2 member_redux constructor is explicit too. Renamed some typedefs for more consistency. 2014-09-23 18:55:42 +02:00
Christoph Hertzberg
7817bc19a4 Removed FIXME, as it is actually necessary. 2014-09-23 17:23:34 +02:00
Christoph Hertzberg
eb13ada3aa Renamed CwiseInverseReturnType to InverseReturnType for ArrayBase::inverse() 2014-09-23 17:21:27 +02:00
Christoph Hertzberg
36448c9e28 Make constructors explicit if they could lead to unintended implicit conversion 2014-09-23 14:28:23 +02:00
Christoph Hertzberg
de0d8a010e Suppress stupid gcc-4.4 warning 2014-09-23 12:58:14 +02:00
Gael Guennebaud
72569f17ec bug #882: add const-correctness failtests for CwiseUnaryView, TriangularView, and SelfAdjointView. 2014-09-23 10:26:02 +02:00
Gael Guennebaud
3878e6f170 Add a true ctest unit test for failtests 2014-09-23 10:25:12 +02:00
Gael Guennebaud
ff46ec0f24 bug #881: make SparseMatrixBase::isApprox(SparseMatrixBase) exploits sparse computations instead of converting the operands to dense matrices. 2014-09-22 23:33:28 +02:00
Gael Guennebaud
ae514ddfe5 bug #880: manually edit the DartConfiguration.tcl file to get it working with cmake 3.0.x 2014-09-22 22:49:20 +02:00
Gael Guennebaud
f9d6d3780f bug #879: fix compilation of tri1=mat*tri2 by copying tri2 into a full temporary. 2014-09-22 17:34:17 +02:00
Gael Guennebaud
abba11bdcf Many improvements in Divide&Conquer SVD:
- Fix many numerical issues, in particular regarding deflation.
- Add heavy debugging output to help track numerical issues (there are still fews)
- Make use of Eiegn's apply-inplane-rotation feature.
2014-09-22 15:22:52 +02:00
Christoph Hertzberg
d9e0336a78 Merged in kmargar/eigen (pull request PR-84)
Add VSX support
2014-09-22 12:57:06 +02:00
Jitse Niesen
333905b0c2 Fix typos in docs for IterativeLinearSolvers module 2014-09-21 14:20:08 +01:00
Jitse Niesen
5fa69422a2 Fix copy-and-paste typo in SolveWithGuess assignment
This fixes compilation of code snippets in BiCGSTAB docs.
2014-09-21 14:19:23 +01:00
Konstantinos Margaritis
de38ff2499 prefetch are noops on VSX, actually disable the prefetch trait 2014-09-21 11:56:07 +00:00
Konstantinos Margaritis
60e093a9dc Merged eigen/eigen into default 2014-09-21 14:02:51 +03:00
Konstantinos Margaritis
56408504e4 fix compile error on big endian altivec 2014-09-21 13:59:30 +03:00
Konstantinos Margaritis
974fe38ca3 prefetch are noops on VSX 2014-09-21 11:24:30 +00:00
Konstantinos Margaritis
c0205ca4af VSX supports vec_div, implement where appropriate (float/doubles) 2014-09-21 08:12:22 +00:00
Konstantinos Margaritis
10f8aabb61 VSX port passes packetmath_[1-5] tests! 2014-09-20 22:31:31 +00:00
Jitse Niesen
80de35b6c5 Remove double return statement in PlainObjectBase::_set() 2014-09-19 22:05:18 +01:00
Konstantinos Margaritis
60663a510a 32-bit floats/ints, 64-bit doubles pass packetmath tests, complex 32/64-bit remaining 2014-09-19 21:05:01 +00:00
Gael Guennebaud
03dd4dd91a Unify unit test for BDC and Jacobi SVD. This reveals some numerical issues in BDCSVD. 2014-09-19 15:25:48 +02:00
Gael Guennebaud
0a18eecab3 bug #100: add support for explicit scalar to Array conversion (as enable implicit conversion is much more tricky) 2014-09-19 13:25:28 +02:00
Gael Guennebaud
7b044c0ead Added tag before-evaluators for changeset 9452eb38f8 2014-09-19 10:10:29 +02:00
Gael Guennebaud
755e77266f Fix SparseQR for row-major inputs. 2014-09-19 09:58:56 +02:00
Gael Guennebaud
07c5500d70 Introduce a compilation error when using the wrong InnerIterator type. 2014-09-19 09:58:20 +02:00
Gael Guennebaud
e70506dd8f Fix inner-stride of AlignedVector3 2014-09-18 22:46:46 +02:00
Gael Guennebaud
2ae20d558b Update KroneckerProduct wrt evaluator changes 2014-09-18 22:08:49 +02:00
Gael Guennebaud
62bce6e5e6 Make MatrixFunction use nested_eval instead of nested 2014-09-18 17:31:17 +02:00
Gael Guennebaud
060e835ee9 Add evaluator for the experimental AlignedVector3 2014-09-18 17:30:21 +02:00
Gael Guennebaud
0ca43f7e9a Remove deprecated code not used by evaluators 2014-09-18 15:15:27 +02:00
Gael Guennebaud
8b3be4907d log2(int) must be inlined. 2014-09-18 10:53:53 +02:00
Gael Guennebaud
0bf5894861 workaround one more shadowing issue with MSVC 2014-09-16 18:21:39 -07:00
Gael Guennebaud
e44d78dab3 workaround ambiguous call 2014-09-16 17:10:25 -07:00
Gael Guennebaud
c2f66c65aa workaround MSVC compilation issue (shadow issue) 2014-09-16 16:23:45 -07:00
Gael Guennebaud
125619146b workaround weird MSVC compilation issue: a typdedef in a base class shadows a template parameter of a derived class 2014-09-16 16:06:32 -07:00
Gael Guennebaud
341ae8665d avoid division by 0 2014-09-16 16:05:06 -07:00
Gael Guennebaud
fc23e93707 Add a portable log2 function for integers 2014-09-17 09:56:07 +02:00
Gael Guennebaud
0f0580b97c Remove not needed template keyword. 2014-09-17 09:55:44 +02:00
Gael Guennebaud
486ca277a0 Workaround MSVC ICE 2014-09-16 10:29:29 -07:00
Benoit Steiner
10a79ca3a3 Merged latest updates from the Eigen trunk. 2014-09-15 09:18:16 -07:00
Gael Guennebaud
466d6d41c6 Avoid a potential risk of recursive definition using traits to get he scalar type 2014-09-15 17:40:17 +02:00
Gael Guennebaud
8514179aa3 Fix traits<Quaternion>::IsAligned when using evaluators 2014-09-15 13:53:52 +02:00
Gael Guennebaud
0403d49006 Fix inverse unit test making sure we try to invert an invertible matrix 2014-09-14 20:12:07 +02:00
Gael Guennebaud
c83e01f2d6 Favor column major storage for inner products 2014-09-14 19:38:49 +02:00
Gael Guennebaud
26db954776 Re-enable aliasing checks when using evaluators 2014-09-14 19:06:08 +02:00
Gael Guennebaud
fda680f9cf Adapt changeset 51b3f558bb
to evaluators:
(Fix bug #822: outer products needed linear access, and add respective unit tests)
2014-09-14 18:31:29 +02:00
Gael Guennebaud
dfc54e1bbf Fix /= when using evaluator as in changeset 2d90484450 2014-09-14 18:27:48 +02:00
Gael Guennebaud
749b56f6af merge with default branch 2014-09-14 17:34:54 +02:00
Gael Guennebaud
af9c9f7706 Fix comparison to block size 2014-09-14 17:33:39 +02:00
Konstantinos Margaritis
470aa15c35 First time it compiles, but fails to pass the tests. 2014-09-09 16:58:48 +00:00
Gael Guennebaud
188a13f9fe Fix compilation of coeff(Index) on sub-inner-panels 2014-09-08 09:50:03 +02:00
Benoit Steiner
efdff15749 Fixed a typo in the contraction code 2014-09-06 13:28:24 -07:00
Gael Guennebaud
dacd39ea76 Exploit sparse structure in naiveU and naiveV when updating them. 2014-09-05 17:51:46 +02:00
Benoit Steiner
74db22455a Misc fixes. 2014-09-05 07:47:43 -07:00
Gael Guennebaud
b23556bbbd Oops, a block size of 1 is not very useful, set it to 48 as in HouseholderQR 2014-09-05 08:50:50 +02:00
Benoit Steiner
1abe4ed14c Created more regression tests 2014-09-04 20:27:28 -07:00
Benoit Steiner
d43f737b4a Added support for evaluation of tensor shuffling operations as lvalues 2014-09-04 20:02:28 -07:00
Benoit Steiner
f50548e86a Added missing tensor copy constructors. As a result it is now possible to declare and initialize a tensor on the same line, as in:
Tensor<bla> T = A + B;  or
  Tensor<bla> T(A.reshape(new_shape));
2014-09-04 19:50:27 -07:00
Gael Guennebaud
15bad3670b Apply Householder U and V in-place. 2014-09-04 09:17:01 +02:00
Gael Guennebaud
8846aa6d1b Optimization: enable cache-efficient application of HouseholderSequence. 2014-09-04 09:15:59 +02:00
Gael Guennebaud
80993b95d3 Disable a test which had never worked without evalautors 2014-09-03 22:56:39 +02:00
Benoit Steiner
b24fe22b1a Improved the performance of the tensor convolution code by a factor of about 4. 2014-09-03 11:38:13 -07:00
Gael Guennebaud
c82dc227f1 Cleaning in BDCSVD (formating, handling of transpose case, remove some for loops) 2014-09-03 10:15:24 +02:00
Gael Guennebaud
a96f3d629c Clean bdcsvd 2014-09-02 22:30:23 +02:00
Gael Guennebaud
47829e2d16 Disable solve_ret_val like mechanism with evaluator enabled 2014-09-01 18:32:59 +02:00
Gael Guennebaud
1f398dfc82 Factorize *SVD::solve to SVDBase 2014-09-01 18:31:54 +02:00
Gael Guennebaud
b3a0365429 merge with default branch 2014-09-01 18:21:01 +02:00
Gael Guennebaud
72c4f8ca8f Disable a few unit tests in unsupported 2014-09-01 17:35:58 +02:00
Gael Guennebaud
8754341848 Fix remaining garbage during a merge. 2014-09-01 17:25:13 +02:00
Gael Guennebaud
daad9585a3 Fix Kronecker product in legacy mode. 2014-09-01 17:24:07 +02:00
Gael Guennebaud
b051bbd64f Make unsupport sparse solvers use SparseSolverBase 2014-09-01 17:21:47 +02:00
Gael Guennebaud
b3d63b4db2 Add evaluator for DynamicSparseMatrix 2014-09-01 17:21:05 +02:00
Gael Guennebaud
1c4b69c5fb Factorize solveWithGuess in IterativeSolverBase 2014-09-01 17:19:51 +02:00
Gael Guennebaud
8a74ce922c Make IncompleteLUT use SparseSolverBase. 2014-09-01 17:19:16 +02:00
Gael Guennebaud
863b7362bc Fix usage of m_isInitialized in SparseLU and Pastix support. 2014-09-01 17:16:32 +02:00
Gael Guennebaud
1bf3b34849 Fix regression in sparse-sparse product 2014-09-01 17:15:08 +02:00
Gael Guennebaud
f9580a3473 Fix Cholmod support without evaluators 2014-09-01 17:14:30 +02:00
Gael Guennebaud
fbb53b6cbb Fix sparse matrix times sparse vector. 2014-09-01 16:53:52 +02:00
Gael Guennebaud
85c7659574 Refactoring of sparse solvers through a SparseSolverBase class and usage of the Solve<> expression. Introduce a SolveWithGuess expression on top of Solve. 2014-09-01 15:00:19 +02:00
Gael Guennebaud
bc065c75d2 Implement the missing bits to make Solve compatible with sparse rhs 2014-09-01 14:50:59 +02:00
Gael Guennebaud
e6cc24cbd6 Fix compilation in legacy mode 2014-09-01 14:20:11 +02:00
Konstantinos Margaritis
7ff266e3ce Initial VSX commit 2014-08-29 20:03:49 +00:00
Gael Guennebaud
b4a709520d merge 2014-08-29 15:31:54 +02:00
Gael Guennebaud
c1d0f15bde Enable evaluators by default 2014-08-29 15:31:32 +02:00
Gael Guennebaud
124d12a915 merge default branch 2014-08-29 15:20:31 +02:00
Gael Guennebaud
f29dbec321 undef Unsable macro 2014-08-29 15:12:03 +02:00
Benoit Steiner
2959045f2f Optimized the tensor padding code. 2014-08-26 09:47:18 -07:00
Benoit Steiner
36fffe48f7 Misc api improvements and cleanups 2014-08-23 14:35:41 -07:00
Benoit Steiner
fb5c1e9097 Optimized and cleaned up the tensor morphing code 2014-08-23 13:18:30 -07:00
Benoit Steiner
3d298da269 Added support for broadcasting 2014-08-20 17:00:50 -07:00
Benoit Steiner
9ac3c821ea Improved the speed of convolutions when running on cuda devices 2014-08-19 16:57:10 -07:00
Benoit Steiner
33c702c79f Added support for fast integer divisions by a constant
Sped up tensor slicing by a factor of 3 by using these fast integer divisions.
2014-08-14 22:13:21 -07:00
Benoit Steiner
756292f8aa Fixed compilation errors 2014-08-14 00:32:59 -07:00
Benoit Steiner
8c8db49331 Added a few regression tests 2014-08-14 00:25:22 -07:00
Benoit Steiner
eeb43f9e2b Added support for padding, stridding, and shuffling 2014-08-14 00:22:47 -07:00
Benoit Steiner
16047c8d4a Pulled in the latest changes from the Eigen trunk 2014-08-13 22:25:29 -07:00
Benoit Steiner
916ef48846 Added ability to get the nth element from an abstract array type. 2014-08-13 08:44:47 -07:00
Benoit Steiner
f1d8c13dbc Fixed misc typos. 2014-08-13 08:40:26 -07:00
Benoit Steiner
9faad2932f Added missing apis. 2014-08-13 08:36:33 -07:00
Benoit Steiner
f8fad09301 Updated the convolution and contraction evaluators to follow the new EvalSubExprsIfNeeded apu. 2014-08-13 08:33:18 -07:00
Benoit Steiner
72e7529708 Fixed a typo. 2014-08-13 08:29:40 -07:00
Benoit Steiner
1aa2bf8274 Support for in place evaluation of expressions containing slicing and reshaping operations 2014-08-13 08:27:58 -07:00
Benoit Steiner
b1892ab14d Added suppor for in place evaluation to simple tensor expressions.
Use mempy to speedup tensor copies whenever possible.
2014-08-13 08:26:44 -07:00
Benoit Steiner
439feca139 Reworked the TensorExecutor code to support in place evaluation. 2014-08-13 08:22:05 -07:00
Gael Guennebaud
4dd55a2958 Optimize reduxions for Homogeneous 2014-08-01 17:00:20 +02:00
Gael Guennebaud
f25338f4d7 Fix nesting of Homogenous evaluator 2014-08-01 16:49:44 +02:00
Gael Guennebaud
51357a6622 Fix geo_orthomethods unit test for complexes 2014-08-01 16:26:23 +02:00
Gael Guennebaud
107bb308c3 Fix various small issues detected by gcc 2014-08-01 16:24:23 +02:00
Gael Guennebaud
c2ff44cbf3 Make assignment from general EigenBase object call evaluator, and support dense X= sparse 2014-08-01 16:23:30 +02:00
Gael Guennebaud
2a3c3c49a1 Fix numerous nested versus nested_eval shortcomings 2014-08-01 14:48:22 +02:00
Gael Guennebaud
fc13b37c55 Make cross product uses nested/nested_eval 2014-08-01 14:47:33 +02:00
Benoit Steiner
647622281e The tensor assignment code now resizes the destination tensor as needed. 2014-07-31 17:39:04 -07:00
Gael Guennebaud
26d2cdefd4 Fix 4x4 inverse via SSE for submatrices 2014-07-31 16:24:29 +02:00
Gael Guennebaud
db183ca7b3 Make minimal changes to make homogenous compatible with evaluators 2014-07-31 14:54:54 +02:00
Gael Guennebaud
702a3c17db Make Transform exposes sizes: Dim+1 x Dim+1 for projective transform, and Dim x Dim+1 for all others 2014-07-31 14:54:00 +02:00
Gael Guennebaud
5f5a8d97c0 Re-enable main unit tests which are now compiling and running fine with evaluators 2014-07-31 13:43:19 +02:00
Gael Guennebaud
bae2e3327b Call product_generic_impl by default, and remove lot of boilerplate code 2014-07-31 13:35:49 +02:00
Gael Guennebaud
cd0ff253ec Make permutation compatible with sparse matrices 2014-07-30 15:22:50 +02:00
Gael Guennebaud
929e77192c Various minor fixes 2014-07-30 11:39:52 +02:00
Benoit Steiner
2116e261fb Made sure that the data stored in fixed sized tensor is aligned. 2014-07-25 09:47:59 -07:00
Benoit Steiner
1f371e78e6 Added a few tests to validate the behavior of the assignment operator. 2014-07-22 10:32:40 -07:00
Benoit Steiner
f7bb7ee3f3 Fixed the assignment operator of the Tensor and TensorMap classes. 2014-07-22 10:31:21 -07:00
Gael Guennebaud
baa77ffe38 Fix max sizes at compile time of DiagonalWrapper 2014-07-22 16:13:56 +02:00
Gael Guennebaud
4aac87251f Re-enable a couple of unit tests with evaluators. 2014-07-22 12:54:03 +02:00
Gael Guennebaud
6daa6a0d16 Refactor TriangularView to handle both dense and sparse objects. Introduce a glu_shape<S1,S2> helper to assemble sparse/dense shapes with triagular/seladjoint views. 2014-07-22 11:35:56 +02:00
Gael Guennebaud
2a251ffab0 Implement evaluator for sparse-selfadjoint products 2014-07-22 09:32:40 +02:00
Gael Guennebaud
9b729f93a1 Resizing is done by call_assignment_noalias, so no need to perform it when dealing with aliasing. 2014-07-21 11:46:47 +02:00
Gael Guennebaud
946b99dd5c Extend qr unit test 2014-07-21 11:45:54 +02:00
Gael Guennebaud
50eef6dfc3 Compilation fixes 2014-07-20 15:16:34 +02:00
Gael Guennebaud
62f332fc04 Make sure we evaluate into temporaries matching evaluator storage order requirements 2014-07-19 15:19:10 +02:00
Gael Guennebaud
3eba5e1101 Implement evaluator for sparse outer products 2014-07-19 14:55:56 +02:00
Gael Guennebaud
36e6c9064f bug #770: fix out of bounds access 2014-07-18 14:19:18 +02:00
Gael Guennebaud
a325d1cb1e merge with default branch 2014-07-18 11:02:22 +02:00
Gael Guennebaud
2bdb3b1afd Extend dense*sparse product unit tests 2014-07-15 11:00:16 +02:00
Gael Guennebaud
3c7686630d merge with default branch 2014-07-15 10:55:03 +02:00
Gael Guennebaud
296cb40161 merge with default branch 2014-07-10 22:04:45 +02:00
Benoit Steiner
40bb98e76a Added primitives to compare tensor dimensions 2014-07-10 11:29:51 -07:00
Benoit Steiner
9b7a6f0122 Added tests for tensor slicing 2014-07-10 11:27:27 -07:00
Benoit Steiner
ffd3654f67 Vectorized the evaluation of expressions involving tensor slices. 2014-07-10 11:09:46 -07:00
Benoit Steiner
25b2f6624d Improved the speed of slicing operations. 2014-07-09 12:48:34 -07:00
Christoph Hertzberg
547d660f1d Determine version of Metis library. Apparently, at least version 5.x is needed for Eigen/MetisSupport.
Marked some internal variables as advanced
2014-07-09 16:54:15 +02:00
Abhijit Kundu
5633cde9ad Adding missing OPENGL_LIBRARIES for openglsupport test. Also adding OpenGL include directories as a better pratice even though these are system include directories in most systems.
(grafted from 48db34a7b9
)
2014-12-04 01:18:47 -05:00
Gael Guennebaud
fe8757a576 Update mpreal version. 2014-12-11 11:51:00 +01:00
Gael Guennebaud
ff29221951 Fix MSVC compilation 2014-12-10 21:55:11 +01:00
Gael Guennebaud
7fbc9d8409 Introduce a ReplicateReturnType as a possible workaround of a compilation issue with MSVC+ICC 2014-12-10 14:26:25 +01:00
Gael Guennebaud
79c3cfabe3 Fix nomalloc_3 and binding reference to temporary issue 2014-12-09 19:01:25 +01:00
Gael Guennebaud
e0f390793c Fix dynamic allocation in JacobiSVD (regression)
(grafted from 30c849669d
)
2014-12-08 14:45:04 +01:00
Gael Guennebaud
97812ad0d3 UmfPack support: fix redundant evaluation/copies when calling compute() and support generic expressions as input 2014-12-02 17:30:57 +01:00
Gael Guennebaud
d66b5a1d91 Fix MSVC compilation issue
(grafted from a819fa148d
)
2014-12-02 14:35:31 +01:00
Gael Guennebaud
b0152fdb1d Fix bicgstab example 2014-12-02 14:32:55 +01:00
Gael Guennebaud
e9c5418249 bug #897: fix UmfPack usage with mapped sparse matrices
(grafted from 1a8dc85142
)
2014-12-02 13:57:13 +01:00
Gael Guennebaud
b25b517817 Fix bug #911: m_extractedDataAreDirty was not initialized in UmfPackLU
(grafted from 4974d1d2b4
)
2014-12-02 13:54:06 +01:00
Gael Guennebaud
ce0fb1bca1 Simplify return type of diagonal(Index) (and ease compiler job) 2014-11-28 14:39:47 +01:00
Christoph Hertzberg
92fce631ed added std:: scope to abs function call 2014-11-28 02:24:51 +00:00
Christoph Hertzberg
238308e0f7 bug #909: Removed unreachable return statement 2014-11-26 15:45:11 +01:00
Gael Guennebaud
719ac0d6b0 Fix Hyperplane::Through(a,b,c) when points are aligned or identical. We use the stratgey as in Quaternion::setFromTwoVectors.
(grafted from 8518ba0bbc
)
2014-11-26 15:01:53 +01:00
Gael Guennebaud
8e61a7aab6 Fix a case where 0-1 leads to Dynamic instead of 0. 2014-11-26 15:03:22 +01:00
Gael Guennebaud
09e992ce9f Add missing specialization of Block<const SparseMatrix> 2014-11-24 18:40:44 +01:00
Gael Guennebaud
cdd401f743 Enable Mx0 * 0xN matrix product. 2014-11-24 18:07:50 +01:00
Gael Guennebaud
59b7615d31 Fix memory pre-allocation when permuting inner vectors of a sparse matrix.
(grafted from da584912b6
)
2014-11-24 17:31:59 +01:00
Gael Guennebaud
a8cb0dfcf5 re-enable usage of ProductBase::m_result and workaround a compilation failure when m_result is too large but unused 2014-11-14 13:38:12 +01:00
Christoph Hertzberg
0e7a26c19f bug #898: add inline hint to const_cast_ptr 2014-10-28 14:51:05 +01:00
Christoph Hertzberg
13c636d864 Addendum to bug #859: pexp(NaN) for double did not return NaN, also, plog(NaN) did not return NaN.
psqrt(NaN) and psqrt(-1) shall return NaN if EIGEN_FAST_MATH==0
2014-10-20 13:35:03 +02:00
Gael Guennebaud
00ec1629ca Fix bug #859: pexp(NaN) returned Inf instead of NaN 2014-10-20 11:38:51 +02:00
Gael Guennebaud
a72eabec9b Fix bug #894: the sign of LDLT was not re-initialized at each call of compute()
(grafted from d04f23260d
)
2014-10-20 10:48:40 +02:00
Gael Guennebaud
235c97ba92 Fix SparseQR::rank for a completely empty matrix.
(grafted from 8838b0a1ff
)
2014-10-19 22:42:20 +02:00
Gael Guennebaud
4126cb6369 Fix SparseLU::absDeterminant and add respective unit test
(grafted from a370b1f2e2
)
2014-10-17 16:52:56 +02:00
Gael Guennebaud
8ea2ab4829 Fix JacobiSVD wrt undeR/overflow by doing scaling prior to QR preconditioning
(grafted from feacfa5f83
)
2014-10-17 15:32:06 +02:00
Christoph Hertzberg
9b79607579 bug #891: Determine sizeof(void*) via CMAKE variable instead of test program
(transplanted from 0ec1fc9e11
)
2014-10-14 14:14:25 +02:00
Gael Guennebaud
aadbfe78c2 bug #890: extract_data might returns 0x0 thus breaking aliasing detection 2014-10-10 16:42:32 +02:00
Gael Guennebaud
7d5e16c733 Add missing default ctor in Rotation2D 2014-09-30 16:59:28 +02:00
Christoph Hertzberg
e395a8042a Fix bug #884: No malloc for zero-sized matrices or for Ref without temporaries
manually ported from 4ba8aa1482
2014-09-25 16:25:31 +02:00
Gael Guennebaud
91f1a161ca bug #879: tri1 = mat * tri2 was compiling and running incorrectly if tri2 was not numerically triangular. Workaround the issue by evaluating mat*tri2 into a temporary. 2014-09-22 17:20:42 +02:00
Gael Guennebaud
16bca3bfe2 Fix SparseQR for row-major inputs.
(grafted from 755e77266f
)
2014-09-19 09:58:56 +02:00
Gael Guennebaud
e0ab58d815 Fix bug #791: infinite loop in JacobiSVD in the presence of NaN.
(grafted from d6236d3b26
)
2014-09-10 11:54:20 +02:00
Gael Guennebaud
c67a7148c4 ArrayWrapper and MatrixWrapper classes should not be nested by reference.
(grafted from 921a645481
)
2014-09-10 10:33:19 +02:00
Gael Guennebaud
38dc683901 Fix bug #822: outer products needed linear access, and add respective unit tests
(grafted from 51b3f558bb
)
2014-09-08 10:21:22 +02:00
Jitse Niesen
cad0fa5d77 Replace asm by __asm__ (bug #873).
Thanks to Markus Eisenmann for report and initial patch.
2014-09-06 11:54:47 +01:00
Gael Guennebaud
5daebe0a27 bug #871: fix compilation on ARM/Neon regarding __has_builtin usage (backport) 2014-09-01 10:58:07 +02:00
Georg Drenkhahn
05fb735d1d Added missing STL include of <list> in main.h
Removed duplicated include of <sstream>
Added comments on the background of min/max macro definitions and STL header includes
(grafted from e49e84d979
)
2014-08-29 10:41:05 +02:00
Gael Guennebaud
7443d8b4e9 bug #867: forward the cmake generator when testing support for fortran. (was already fixed in the default branch) 2014-08-28 09:15:33 +02:00
Georg Drenkhahn
36506511a1 Fixed CMakeLists.txt files to prevent CMake 3.0.0 warnings about deprecated LOCATION target property.
Small whitespace cleanup in CMakelLists.txt.
2014-08-22 12:13:07 +02:00
Gael Guennebaud
3afdc6d95a In SparseQR, calling factorize() without analyzePattern() was broken. 2014-08-26 23:32:32 +02:00
Gael Guennebaud
c14c03490f merge 2014-08-26 13:00:11 +02:00
Gael Guennebaud
c880590d27 bug #861: enable posix_memalign with PGI
(grafted from 2e50289ba3
)
2014-08-26 12:54:19 +02:00
Gael Guennebaud
54294e2293 bug #857: workaround MSVC compilation issue. 2014-08-26 12:52:29 +02:00
Gael Guennebaud
c7331ebb06 Do not apply the preconditioner before starting the iterations as this might destroy a very good initial guess.
(grafted from b49ef99617
)
2014-08-21 22:14:25 +02:00
Gael Guennebaud
0321449944 bug #854: fix numerical issue in SelfAdjointEigenSolver::computeDirect for 3x3 matrices. The tolerance to detect stable cross products was too optimistic.
Add respective unit tests.
(grafted from 9c0aa81fbf
)
2014-08-21 10:49:09 +02:00
Gael Guennebaud
44c390a370 Added tag 3.2.2 for changeset bbaf01712c 2014-08-04 12:52:31 +02:00
Gael Guennebaud
bbaf01712c bump to 3.2.2 2014-08-04 12:51:54 +02:00
Gael Guennebaud
8e875d3c38 Memory allocated on the stack is freed at the function exit, so reduce iteration count to avoid stack overflow
(grafted from e51da9c3a8
)
2014-08-04 12:46:00 +02:00
Gael Guennebaud
8d69b87c53 Make the ordering method of SimplicialL[D]LT user configurable.
(grafted from d4cc1bdc7f
)
2014-07-20 14:22:58 +02:00
Christoph Hertzberg
49cbaf3856 Add note to EIGEN_DONT_PARALLELIZE into preprocessor documentation page (requested in IRC)
(transplanted from 68eafc10b1
)
2014-07-18 15:42:12 +02:00
Gael Guennebaud
9b00035438 bug #843: fix jacobisvd for complexes and extend respective unit test to chack with random tricky matrices,
and backport other JacobiSVD fixes
2014-07-17 17:09:15 +02:00
Gael Guennebaud
e215740e8e Fix bug #838: detect outer products from either the lhs or rhs 2014-07-11 17:17:17 +02:00
Gael Guennebaud
0cc67589d3 Fix bug #838: fix dense * sparse and sparse * dense outer products 2014-07-11 16:31:41 +02:00
Christoph Hertzberg
51e2e93019 Backed out of changeset 6091:9d3e0da38576dddc4df25c0e61ad6685193eb630
Unfortunately this breaks things at other places
2014-07-10 16:12:33 +02:00
Christoph Hertzberg
9d3e0da385 Make MatrixBase::makeHouseholder resize its output vector if it is zero
(transplanted from f27f55bee3
)
2014-07-10 14:59:18 +02:00
Kolja Brix
6ff72f40cf Fix GMRES: Initialize essential Householder vector with correct dimension. Add check if initial guess is already a sufficient approximation.
(transplanted from e955725ff1
)
2014-07-10 08:20:55 +02:00
Benoit Steiner
ea0906dfd8 Improved evaluation of tensor expressions when used as rvalues 2014-07-08 16:43:28 -07:00
Benoit Steiner
cc1bacea5b Improved the efficiency of the tensor evaluation code on thread pools and gpus. 2014-07-08 16:39:28 -07:00
Benoit Steiner
c285fda7f4 Extended the functionality of the TensorDeviceType classes 2014-07-08 16:30:48 -07:00
Chen-Pang He
160034bba1 Fix bug #839 2014-07-09 03:32:32 +08:00
Gael Guennebaud
6eb16aae2d bug #808: fix set_from_triplets temporary matrix type (already fixed in the devel branch) 2014-07-08 19:10:26 +02:00
Gael Guennebaud
4777ca1afb bug #808: fix implicit conversions from int/longint to float/double 2014-07-08 18:59:18 +02:00
Gael Guennebaud
0e0ae40084 bug #808: use double instead of float for the increasing size ratio in CompressedStorage::resize 2014-07-08 18:58:41 +02:00
Gael Guennebaud
b73908000c Fix bug #809: unused variable warning
(grafted from 5c4733f6e4
)
2014-07-08 18:38:34 +02:00
Gael Guennebaud
08b0c08e5e Fix LDLT with semi-definite complex matrices: owing to round-off errors, the diagonal was not real. Also exploit the fact that the diagonal is real in the rest of LDLT 2014-07-08 10:04:27 +02:00
Benoit Steiner
7d53633e05 Added support for tensor slicing 2014-07-07 14:10:36 -07:00
Benoit Steiner
bc072c5cba Added support for tensor slicing 2014-07-07 14:08:45 -07:00
Benoit Steiner
47981c5925 Added support for tensor slicing 2014-07-07 14:07:57 -07:00
Gael Guennebaud
bbe9e22d60 LDLT is not rank-revealing, so we should not attempt to use the biggest diagonal elements as thresholds. 2014-07-02 23:04:46 +02:00
Gael Guennebaud
b18a7ff6be Do not attempt to include <intrin.h> on Windows CE 2014-07-02 16:13:05 +02:00
Gael Guennebaud
61b88d2feb merge with default branch 2014-07-02 09:35:37 +02:00
Gael Guennebaud
e84bdbb445 Fix regeression in bicgstab: the threshold used to detect the need for a restart was much too large.
(grafted from bf334b8ae5
)
2014-07-01 22:29:04 +02:00
Gael Guennebaud
8f4cdbbc8f Fix typo in dense * diagonal evaluator. 2014-07-01 18:04:30 +02:00
Gael Guennebaud
7390af91b6 Implement evaluators for sparse*dense products 2014-07-01 17:53:18 +02:00
Gael Guennebaud
1e6f53e070 Use DiagonalShape as the storage kind of DiagonalBase<>. 2014-07-01 17:52:58 +02:00
Gael Guennebaud
6f846ef9c6 Split StorageKind promotion into two helpers: one for products, and one for coefficient-wise operations. 2014-07-01 17:51:53 +02:00
Gael Guennebaud
3c63446507 Update copyright dates 2014-07-01 13:27:35 +02:00
Gael Guennebaud
746d2db6ed Implement evaluators for sparse * sparse with auto pruning. 2014-07-01 13:18:56 +02:00
Gael Guennebaud
441f97b2df Implement evaluators for sparse * sparse products 2014-07-01 11:50:20 +02:00
Gael Guennebaud
0ad7a644df Implement nonZeros() for Transpose<sparse> 2014-07-01 11:49:46 +02:00
Gael Guennebaud
7ffd55c980 Do not bypass aliasing in sparse e assignments 2014-07-01 11:48:49 +02:00
Gael Guennebaud
065344a06b Fix bug #836: extend SparseQR to support more columns than rows. 2014-07-01 10:24:46 +02:00
Gael Guennebaud
c401167712 Fix double constructions of the nested CwiseBinaryOp evaluator in sparse*diagonal product iterator. 2014-06-27 16:41:45 +02:00
Gael Guennebaud
73e686c6a4 Implement evaluators for sparse times diagonal products. 2014-06-27 15:54:44 +02:00
Gael Guennebaud
ae039dde13 Add a NoPreferredStorageOrderBit flag for expression having no preferred storage order.
It is currently only used in Product.
2014-06-27 15:53:51 +02:00
Gael Guennebaud
e1f1f66a52 Fix some ICEs with VC11. 2014-06-27 15:11:38 +02:00
Gael Guennebaud
f0648f8860 Implement evaluator for sparse views. 2014-06-26 13:52:19 +02:00
Gael Guennebaud
54607665ab Fix inverse evaluator 2014-06-25 23:44:59 +02:00
Gael Guennebaud
a7bd4c455a Update sparse reduxions and sparse-vectors to evaluators. 2014-06-25 17:24:43 +02:00
Gael Guennebaud
b868bfb84a Make operator=(EigenBase<>) uses the new assignment mechanism and introduce a generic EigenBase to EigenBase assignment kind based on the previous evalTo mechanism. 2014-06-25 17:23:52 +02:00
Gael Guennebaud
3b19b466a7 Generalize static assertions on matching sizes to avoid the need for SizeAtCompileTime 2014-06-25 17:22:12 +02:00
Gael Guennebaud
199ac3f2e7 Implement evaluators for sparse coeff-wise views 2014-06-25 17:21:04 +02:00
Gael Guennebaud
e3ba5329ff Implement evaluators for sparse Block. 2014-06-25 09:58:26 +02:00
Gael Guennebaud
17f119689e implement evaluator for SparseVector 2014-06-25 09:58:03 +02:00
Gael Guennebaud
3849cc65ee Implement binaryop and transpose evaluators for sparse matrices 2014-06-23 10:40:03 +02:00
Gael Guennebaud
ec0a8b2e6d rm conflict 2014-06-20 16:30:34 +02:00
Gael Guennebaud
7fa87a8b12 Backport changes from old to new expression engines 2014-06-20 16:17:57 +02:00
Gael Guennebaud
b29b81a1f4 merge with default branch 2014-06-20 15:55:44 +02:00
Gael Guennebaud
47585c8ab2 merge 2014-06-20 15:49:07 +02:00
Gael Guennebaud
c415b627a7 Started to move the SparseCore module to evaluators: implemented assignment and cwise-unary evaluator 2014-06-20 15:42:13 +02:00
Gael Guennebaud
78bb808337 1- Introduce sub-evaluator types for unary, binary, product, and map expressions to ease specializing them.
2- Remove a lot of code which should not be there with evaluators, in particular coeff/packet methods implemented in the expressions.
2014-06-20 15:39:38 +02:00
Gael Guennebaud
caf4936661 Add assertion and warning on the requirements of SparseQR and COLAMDOrdering
(grafted from 98ef44fe55
)
2014-06-20 14:43:47 +02:00
Benoit Steiner
774c3c1e0a Created additional unit tests for the tensor code and improved existing ones. 2014-06-13 10:20:28 -07:00
Benoit Steiner
f80c8e17eb Silenced a compilation warning 2014-06-13 10:12:12 -07:00
Benoit Steiner
38ab7e6ed0 Reworked the expression evaluation mechanism in order to make it possible to efficiently compute convolutions and contractions in the future:
* The scheduling of computation is moved out the the assignment code and into a new TensorExecutor class
 * The assignment itself is now a regular node on the expression tree
 * The expression evaluators start by recursively evaluating all their subexpressions if needed
2014-06-13 09:56:51 -07:00
Benoit Steiner
aa664eabb9 Fixed a few compilation errors. 2014-06-10 10:31:29 -07:00
Benoit Steiner
4304c73542 Pulled latest updates from the Eigen main trunk. 2014-06-10 10:23:32 -07:00
Benoit Steiner
925fb6b937 TensorEval are now typed on the device: this will make it possible to use partial template specialization to optimize the strategy of each evaluator for each device type.
Started work on partial evaluations.
2014-06-10 09:14:44 -07:00
Benoit Steiner
a77458a8ff Fixes compilation errors triggered when compiling the tensor contraction code with cxx11 enabled. 2014-06-09 10:06:57 -07:00
Benoit Steiner
a669052f12 Improved support for rvalues in tensor expressions. 2014-06-09 09:45:30 -07:00
Benoit Steiner
36a2b2e9dc Prevent the generation of unlaunchable cuda kernels when compiling in debug mode. 2014-06-09 09:43:51 -07:00
Benoit Steiner
2859a31ac8 Fixed compilation error 2014-06-09 09:42:34 -07:00
Benoit Steiner
d13711a363 Pulled latest changes from the main branch 2014-06-09 09:35:04 -07:00
Benoit Steiner
fe102248ac Fixed the threadpool test 2014-06-09 09:19:21 -07:00
Benoit Steiner
8c8ae2d819 Fixed a typo 2014-06-07 11:24:38 -07:00
Benoit Steiner
29aebf96e6 Created the pblend packet primitive and implemented it using SSE and AVX instructions. 2014-06-06 20:18:44 -07:00
Benoit Steiner
79085e08e9 Fixed a typo 2014-06-06 20:16:13 -07:00
Benoit Steiner
a961d72e65 Added support for convolution and reshaping of tensors. 2014-06-06 16:25:16 -07:00
Benoit Steiner
8998f4099e Created additional tests for the tensor code. 2014-06-05 10:49:34 -07:00
Benoit Steiner
6fa6cdd2b9 Added support for tensor contractions
Updated expression evaluation mechanism to also compute the size of the tensor result
Misc fixes and improvements.
2014-06-04 09:21:48 -07:00
Benoit Steiner
736267cf6b Added support for additional tensor operations:
* comparison (<, <=, ==, !=, ...)
  * selection
  * nullary ops such as random or constant generation
  * misc unary ops such as log(), exp(), or a user defined unaryExpr()
Cleaned up the code a little.
2014-05-22 16:22:35 -07:00
Benoit Steiner
7402fea0a8 Vectorized the evaluation of tensor expression (using SSE, AVX, NEON, ...)
Added the ability to parallelize the evaluation of a tensor expression over multiple cpu cores.
Added the ability to offload the evaluation of a tensor expression to a GPU.
2014-05-16 15:08:05 -07:00
Benoit Steiner
0320f7e3a7 Added support for fixed sized tensors.
Improved support for tensor expressions.
2014-05-06 11:18:37 -07:00
Benoit Steiner
c0f2cb016e Extended support for Tensors:
* Added ability to map a region of the memory to a tensor
  * Added basic support for unary and binary coefficient wise expressions, such as addition or square root
  * Provided an emulation layer to make it possible to compile the code with compilers (such as nvcc) that don't support cxx11.
2014-04-28 10:32:27 -07:00
Jitse Niesen
ffc995c9e4 Implement evaluator<ReturnByValue>.
All supported tests pass apart from Sparse and Geometry,
except test in adjoint_4 that a = a.transpose() raises an assert.
2014-04-16 18:16:36 +01:00
Jitse Niesen
b30706bd5c Fix typo in Inverse.h 2014-04-15 22:51:46 +01:00
Jitse Niesen
59f5f155c2 Port products with permutation matrices to evaluators. 2014-04-15 15:21:38 +01:00
Gael Guennebaud
0a6c472335 A bit of cleaning 2014-03-13 15:44:20 +01:00
Gael Guennebaud
aceae8314b Resurect EvalBeforeNestingBit to control nested_eval 2014-03-12 20:25:36 +01:00
Gael Guennebaud
16d4c7a5e8 Conditionally disable unit tests that are not supported by evaluators yet 2014-03-12 20:23:44 +01:00
Gael Guennebaud
a395024d44 More debug info and use lazyProd instead of operator* to query the right flags 2014-03-12 18:14:58 +01:00
Gael Guennebaud
f74ed34539 Fix regressions in redux_evaluator flags and evaluator<Block> flags 2014-03-12 18:14:08 +01:00
Gael Guennebaud
5e26b7cf9d Extend evaluation traits debuging info 2014-03-12 18:13:18 +01:00
Gael Guennebaud
74b1d79d77 merge default and evaluator branches 2014-03-12 16:24:25 +01:00
Gael Guennebaud
0b362e0c9a This file is not needed anymore 2014-03-12 16:18:54 +01:00
Gael Guennebaud
a6be1952f4 Fix a few regression when moving the flags 2014-03-12 16:18:34 +01:00
Gael Guennebaud
0bd5671b9e Fix Eigenvalues module 2014-03-12 13:35:44 +01:00
Gael Guennebaud
8dd3b716e3 Move evaluation related flags from traits to evaluator and fix evaluators of MapBase and Replicate 2014-03-12 13:34:11 +01:00
Gael Guennebaud
7eefdb948c Migrate JacobiSVD to Solver 2014-03-11 13:43:46 +01:00
Gael Guennebaud
082f7ddc37 Port Cholesky module to evaluators 2014-03-11 13:33:44 +01:00
Gael Guennebaud
9be72cda2a Port QR module to Solve/Inverse 2014-03-11 11:47:32 +01:00
Gael Guennebaud
ae40583965 Fix CoeffReadCost issues 2014-03-11 11:47:14 +01:00
Gael Guennebaud
5806e73800 It is not clear what XprType::Nested should be, so let's use nested<Xpr>::type as much as possible 2014-03-11 11:44:11 +01:00
Gael Guennebaud
2bf63c6b4a Even ReturnByValue should not evaluate when assembling the expression 2014-03-11 11:42:07 +01:00
Gael Guennebaud
da6ec81282 Move CoeffReadCost mechanism to evaluators 2014-03-10 23:24:40 +01:00
Gael Guennebaud
354bd8a428 Hide legacy dense assignment routines with EIGEN_TEST_EVALUATORS 2014-03-10 09:30:58 +01:00
Gael Guennebaud
5c0f294098 Fix evaluators unit test (i.e., when only EIGEN_ENABLE_EVALUATORS is defined 2014-03-10 09:28:00 +01:00
Gael Guennebaud
cbc572caf7 Split LU/Inverse.h to Core/Inverse.h for the generic Inverse expression, and LU/InverseImpl.h for the dense implementation of dense.inverse() 2014-02-24 11:49:30 +01:00
Gael Guennebaud
1e0c2f6ddb Hide some deprecated classes. 2014-02-24 11:41:19 +01:00
Gael Guennebaud
c98881e130 By-pass ProductBase for triangular and selfadjoint products and get rid of ProductBase 2014-02-23 22:51:13 +01:00
Gael Guennebaud
d67548f345 Get rid of GeneralProduct<> for GemvProduct 2014-02-21 17:13:28 +01:00
Gael Guennebaud
6c7ab50811 Get rid of GeneralProduct<> for GemmProduct 2014-02-21 16:43:03 +01:00
Gael Guennebaud
728c3d2cb9 Get rid of GeneralProduct for outer-products, and get rid of ScaledProduct 2014-02-21 16:27:24 +01:00
Gael Guennebaud
af31b6c37a Generalize evaluator<Inverse<>> such that there is no need to specialize it 2014-02-21 15:22:08 +01:00
Gael Guennebaud
93125e372d Port LU module to evaluators (except image() and kernel()) 2014-02-20 15:26:15 +01:00
Gael Guennebaud
b2e1453e1e Some bit flags and internal structures are deprecated 2014-02-20 15:25:06 +01:00
Gael Guennebaud
9621333545 Fix dimension of Solve expression 2014-02-20 15:24:21 +01:00
Gael Guennebaud
5f6ec95291 Propagate LvalueBit flag to TriangularView 2014-02-20 15:24:00 +01:00
Gael Guennebaud
ecd2c8f37b Add general Inverse<> expression with evaluator 2014-02-20 14:18:24 +01:00
Gael Guennebaud
2eee6eaf3c Fix mixing scalar types with evaluators 2014-02-19 16:30:17 +01:00
Gael Guennebaud
8af02d19b2 ExprType::Nested has a new meaning now... 2014-02-19 15:16:11 +01:00
Gael Guennebaud
95b0a6707b evaluator<Replicate> must evaluate its argument to avoid redundant evaluations 2014-02-19 14:51:46 +01:00
Gael Guennebaud
b1ab6a8e0b Add missing assertion in swap() 2014-02-19 14:06:35 +01:00
Gael Guennebaud
61cff28618 Disable Flagged and ForceAlignedAccess 2014-02-19 14:05:56 +01:00
Gael Guennebaud
68e8ddaf94 Fix vectorization logic wrt assignment functors 2014-02-19 13:26:07 +01:00
Gael Guennebaud
3a735a6cf1 Fix lazy evaluation in Ref 2014-02-19 13:17:41 +01:00
Gael Guennebaud
ccc41128fb Add a Solve expression for uniform treatment of solve() methods. 2014-02-19 11:33:29 +01:00
Gael Guennebaud
b3a07eecc5 Fix CoeffReadCost of products to handle Dynamic costs 2014-02-19 11:32:04 +01:00
Gael Guennebaud
c16b80746a isApprox must honors nested_eval 2014-02-19 11:30:58 +01:00
Gael Guennebaud
5b78780def Add evaluator shortcut for triangular ?= product 2014-02-18 17:43:16 +01:00
Gael Guennebaud
8169c6ac59 Simplify implementation of coeff-based products to fully exploit our reduxion mechanisms.
If this results in performance regressions, then we should optimize reduxion rather than
somehow duplicate the code.
2014-02-18 16:57:25 +01:00
Gael Guennebaud
463554c254 Merge with default branch 2014-02-18 15:45:39 +01:00
Gael Guennebaud
82c066b3c4 Cleaning 2014-02-18 15:44:32 +01:00
Gael Guennebaud
0543cb51b5 Product::coeff method are also OK for lazy products (including diagonal products) 2014-02-18 14:51:41 +01:00
Gael Guennebaud
99e27916cf Fix all()/any() for evaluators 2014-02-18 14:26:25 +01:00
Gael Guennebaud
06545058bb Temporary workaround for permutations 2014-02-18 13:33:04 +01:00
Gael Guennebaud
7002aa858f Support Product::coeff(0,0) even for dynamic matrices 2014-02-18 13:32:30 +01:00
Gael Guennebaud
8cfb138e73 Finally, the simplest remains to deffer resizing at the latest 2014-02-18 13:31:44 +01:00
Gael Guennebaud
1b5de5a37b Add evaluator for Ref 2014-02-18 13:30:16 +01:00
Gael Guennebaud
a08cba6b5f Move is_diagonal to XprHelper, forward declare Ref 2014-02-18 11:03:59 +01:00
Gael Guennebaud
573c587e3d New design for handling automatic transposition 2014-02-18 10:53:14 +01:00
Gael Guennebaud
551bf5c66a Get rid of DiagonalProduct 2014-02-18 10:52:26 +01:00
Gael Guennebaud
2d136d3d7f Get rid of SeflCwiseBinaryOp 2014-02-18 10:52:00 +01:00
Gael Guennebaud
873401032b Fix scalar * product optimization when 'product' includes a selfadjoint matrix 2014-02-17 19:00:45 +01:00
Gael Guennebaud
d595fd31f5 Deal with automatic transposition in call_assignment, fix a few shortcomings 2014-02-17 16:11:55 +01:00
Gael Guennebaud
bffa15142c Add evaluator support for diagonal products 2014-02-17 16:10:55 +01:00
Gael Guennebaud
94acccc126 Fix Random().normalized() by introducing a nested_eval helper (recall that the old nested<> class is deprecated) 2014-01-26 15:35:44 +01:00
Gael Guennebaud
34694d8828 Fix evaluator<Replicate> for fixed size objects 2014-01-26 15:34:26 +01:00
Gael Guennebaud
ee1c55f923 Add missing template keyword 2014-01-26 14:55:25 +01:00
Gael Guennebaud
f54e62e4a9 Port evaluation from selfadjoint to full to evaluators 2014-01-26 12:18:36 +01:00
Gael Guennebaud
5fa7262e4c Refactor triangular assignment 2014-01-25 23:02:14 +01:00
Gael Guennebaud
fef534f52e fix scalar * prod in evaluators unit test 2014-01-25 19:06:07 +01:00
Gael Guennebaud
d357bbd9c0 Fix a few regression regarding temporaries and products 2013-12-14 22:53:47 +01:00
Gael Guennebaud
27c068e9d6 Make selfqdjoint products use evaluators 2013-12-13 18:09:07 +01:00
Gael Guennebaud
e94fe4cc3e fix resizing in noalias for blocks, and make -=/+= use evaluators 2013-12-13 18:06:58 +01:00
Gael Guennebaud
2ca0ccd2f2 Add support for triangular products with evaluators 2013-12-07 17:17:47 +01:00
Gael Guennebaud
8d8acc3ab4 Move inner product special functions to a base class to avoid ambiguous calls 2013-12-04 22:58:19 +01:00
Gael Guennebaud
6c5e915e9a Enable use of evaluators for noalias and lazyProduct, add conversion to scalar for inner products 2013-12-03 17:17:53 +01:00
Gael Guennebaud
f0b82c3ab9 Make reductions compatible with evaluators 2013-12-02 17:54:38 +01:00
Gael Guennebaud
6f1a0479b3 fix a typo triangular assignment 2013-12-02 17:54:15 +01:00
Gael Guennebaud
b5fd774775 Fix flags of Product<> 2013-12-02 17:53:26 +01:00
Gael Guennebaud
34ca81b1bf Add direct assignment of products 2013-12-02 16:37:58 +01:00
Gael Guennebaud
7f917807c6 Fix product evaluator when TEST_EVALUATOR in not ON 2013-12-02 16:19:14 +01:00
Gael Guennebaud
8af1ba5346 Make swap unit test work with evaluators 2013-12-02 15:07:45 +01:00
Gael Guennebaud
c6f7337032 Get rid of call_dense_swap_loop 2013-12-02 14:44:13 +01:00
Gael Guennebaud
626821b0e3 Add evaluator/assignment to TriangularView expressions 2013-12-02 14:06:17 +01:00
Gael Guennebaud
27ca9437a1 Fix usage of Dense versus DenseShape 2013-12-02 14:05:34 +01:00
Gael Guennebaud
d0261bd26c Fix swap in DenseBase 2013-11-30 10:42:23 +01:00
Gael Guennebaud
c15c65990f First step toward the generalization of evaluators to triangular, sparse and other fancyness.
Remove product_tag template parameter to Product.
2013-11-29 17:50:59 +01:00
Gael Guennebaud
fb6e32a62f Get rid of evalautor_impl 2013-11-29 16:45:47 +01:00
Gael Guennebaud
d331def6cc add definition of product_tag 2013-11-29 16:18:22 +01:00
Gael Guennebaud
5584275325 Remove HasEvalTo and all at once eval mode 2013-11-29 13:38:59 +01:00
Gael Guennebaud
cc6dd878ee Refactor dense product evaluators 2013-11-27 17:32:57 +01:00
Gael Guennebaud
fc6ecebc69 Simplify evaluator of EvalToTemp 2013-11-27 11:32:07 +01:00
Gael Guennebaud
230f5c3aa9 Evaluator: introduce the main Assignment class, add call_assignment to bypass NoAlias and AssumeAliasing, and some bits of cleaning 2013-11-25 15:20:31 +01:00
Gael Guennebaud
0c4fc69d62 JacobiSVD: move from Lapack to Matlab strategy for the default threshold
(grafted from 019dcfc21d
)
2013-11-03 13:18:56 +01:00
Gael Guennebaud
e16e52d493 Add a rank method with threshold control to JacobiSVD, and make solve uses it to return the minimal norm solution for rank-deficient problems
(grafted from bbd49d194a
)
2013-11-01 18:21:46 +01:00
Gael Guennebaud
c49421a82b The BLAS interface is complete.
(grafted from abc1ca0af1
)
2014-06-06 11:21:19 +02:00
Gael Guennebaud
ccd7beba90 Fix bug #738: use the "current" version of cmake project directories to ease the inclusion of Eigen within other projects. 2014-06-06 11:06:44 +02:00
Gael Guennebaud
84a99f3a93 Enable LinearAccessBit in Block expression for inner-panels 2014-06-06 11:02:20 +02:00
Gael Guennebaud
43c2747e92 Allows EIGEN_STACK_ALLOCATION_LIMIT to be 0 for no limit
(transplanted from d9381598bc
)
2013-08-21 14:29:00 +02:00
Gael Guennebaud
3c5e82ee0b Make the static assertions on maximal fixed size object use EIGEN_STACK_ALLOCATION_LIMIT, and raise its default value to 128KB
(transplanted from 7bca2910c7
)
2013-08-20 13:59:33 +02:00
Gael Guennebaud
d132159ba3 Fic bug #819: include path of details.h
(grafted from 0f1e321dd4
)
2014-06-04 11:58:01 +02:00
Jitse Niesen
075b1168b4 Fix doc'n of FullPivLU re permutation matrices (bug #815).
(transplanted from 64be8659f606970211ef83f12ebd401648c9685c)
2014-05-31 23:05:18 +01:00
Pavel Holoborodko
be027bede8 Fixed bug #647 by using smart_copy instead of bitwise memcpy.
(transplanted from 1472f4bc61
)
2013-08-25 18:02:07 +09:00
Mark Borgerding
f1ed1b7d11 added conjugate 2014-05-26 08:08:28 -04:00
Gael Guennebaud
20b0747bdb Document how to reproduce matlab's rot90
(transplanted from 5d1291a4de
)
2013-11-19 11:51:16 +01:00
Mark Borgerding
11462c1a29 AsciiQuickReference: added .real(), .imag() 2014-05-16 13:45:35 -04:00
Mark Borgerding
e667819055 fixed AsciiQuickReference typo: LinSpace -> LinSpaced 2014-05-08 15:14:12 -04:00
Christoph Hertzberg
35c9f8779d Fix bug #807: Missing scalar type cast in umeyama()
(transplanted from b4beba72a2
)
2014-05-05 14:23:52 +02:00
Christoph Hertzberg
da81e863e2 Fixed bug #806: Missing scalar type cast in Quaternion::setFromTwoVectors()
(transplanted from b5e3d76aa5
)
2014-05-05 14:22:27 +02:00
Gael Guennebaud
c5c4269961 Fix bug #803: avoid char* to int* conversion
(grafted from 07986189b7
)
2014-05-01 23:03:54 +02:00
Mark Borgerding
b734863536 Check IMKL version for compatibility with Eigen (applying changeset e0dbb68c2f
to 3.2 branch)
2014-04-25 12:44:47 -04:00
Jitse Niesen
1046ea7a89 doc: Note that dm2 = sm1 + dm1 is not possible (see bug #632). 2014-04-07 13:49:51 +01:00
Christoph Hertzberg
8b10081dea Make some actual verifications inside the autodiff unit test
(transplanted from 1cb8de1250
)
2014-04-01 17:44:48 +02:00
Mark Borgerding
042bd9cbe2 immintrin.h did not come until intel version 11 2014-03-26 22:23:08 -04:00
Christoph Hertzberg
93e867b63c Fix bug #222. Make temporary matrix column-major independently of EIGEN_DEFAULT_TO_ROW_MAJOR
(transplanted from 60cd361ebe
)
2014-03-26 17:48:30 +01:00
Mark Borgerding
e702934dfa fixed ColPivHouseholderQR<>::rank (part of bbd49d194a
)
2014-03-20 14:25:50 -04:00
Gael Guennebaud
eef44fb2a5 Relax Ref such that Ref<MatrixXf> accepts a RowVectorXf which can be seen as a degenerate MatrixXf(1,N)
(grafted from bb4b67cf39
)
2014-03-13 18:04:19 +01:00
Christoph Hertzberg
eb9c8cffd6 bug #755: CommaInitializer produced wrong assertions in absence of ReturnValueOptimization. 2014-03-12 14:00:18 +01:00
Christoph Hertzberg
240e2f4162 bug #759: Removed hard-coded double-math from Quaternion::angularDistance.
Some documentation improvements
(transplanted from 88aa18df64
)
2014-03-12 13:43:19 +01:00
Christoph Hertzberg
b0702dca05 Fixed bug #754. Only inserted (!defined(_WIN32_WCE)) analog to alloc and free implementation (not tested, but should be correct).
(transplanted from d5cc083782
)
2014-03-05 14:50:00 +01:00
Gael Guennebaud
7191f31961 swap 3.2 <-> default CTestConfig.cmake file 2014-03-05 10:07:54 +01:00
Christoph Hertzberg
6d7bd066e0 Regression test for bug #752
(transplanted from 41e89c73c7
)
2014-02-27 12:57:24 +01:00
Jitse Niesen
66078fbd58 Added tag 3.2.1 for changeset 4e80704c53 2014-02-26 15:35:39 +00:00
Jitse Niesen
4e80704c53 Bump version number to 3.2.1 2014-02-26 15:35:18 +00:00
Christoph Hertzberg
043ece9730 Make pivoting HouseholderQR compatible with custom scalar types
(transplanted from 6b6071866b
)
2014-02-25 18:55:16 +01:00
Gael Guennebaud
48db2b8799 Implement bug #317: use a template function call to suppress unused variable warnings. 2014-02-24 18:18:52 +01:00
Jitse Niesen
593a82202f Fix bug #748 - array_5 test fails for seed 1392781168.
(grafted from 6fecb6f1b6
)
2014-02-24 14:10:17 +00:00
Christoph Hertzberg
f24ba33c2d Specify what non-resizeable objects are in transposeInPlace and adjointInPlace (cf bug #749)
(transplanted from 3e439889e0
)
2014-02-24 13:12:10 +01:00
Gael Guennebaud
ef807ea020 Mark Eigen2 support deprecated 2014-02-20 09:35:50 +01:00
Gael Guennebaud
da19c48d61 Fix typo 2014-02-20 09:06:06 +01:00
Gael Guennebaud
cef49d21f0 More int versus Index fixes
(grafted from 5960befc20
)
2014-02-19 21:42:29 +01:00
Christoph Hertzberg
53726663c7 Relaxed umeyama test. Problem was ill-posed if linear part was scaled with very small number. This should fix bug #744.
(transplanted from b14a4628af
)
2014-02-17 13:48:00 +01:00
Gael Guennebaud
2ad3dac422 Fix sparse_product/sparse_extra unit tests
(grafted from ed461ba9bc
)
2014-02-17 09:57:47 +01:00
Gael Guennebaud
e3d34064bf Fix FFTW unit test with clang
(grafted from 3bb57e21a8
)
2014-02-17 09:56:46 +01:00
Gael Guennebaud
3f5591981f Fix a few Index to int buggy conversions
(grafted from 4b6b3f310f
)
2014-02-15 09:35:23 +01:00
Gael Guennebaud
6def9fd52b Fix propagation of index type
(grafted from 0b1430ae10
)
2014-02-13 23:58:28 +01:00
Gael Guennebaud
76ee39485f Fix infinite loop in sparselu
(grafted from cd606bbc94
)
2014-02-14 23:10:16 +01:00
Gael Guennebaud
0c6b931cbc Fix enumeral mismatch warning 2014-02-14 22:10:39 +01:00
Gael Guennebaud
fd96ff166d alloca is not necessarily alligned on windows
(grafted from 97965dde9b
)
2014-02-14 00:04:38 +01:00
Gael Guennebaud
9a09b75df3 Fix stable_norm unit test for complexes
(grafted from 0715d49908
)
2014-02-13 15:49:54 +01:00
Gael Guennebaud
52dc1d7ffd Fix bug #740: overflow issue in stableNorm
(grafted from 3291580630
)
2014-02-13 15:44:01 +01:00
Gael Guennebaud
24e33a0d86 Fix Fortran compiler detection
(grafted from 14422decc2
)
2014-02-13 09:21:13 +01:00
Jitse Niesen
b5333b6760 Fix documentation of MatrixBase::applyOnTheLeft (bug #739)
Add examples; move methods from EigenBase.h to MatrixBase.h
(grafted from 7ea6ef8969
)
2014-02-12 14:03:39 +00:00
Gael Guennebaud
6a4489c523 fix compilation of Transform * UniformScaling
(grafted from 31c63ef0b4
)
2014-02-12 13:37:23 +01:00
Christoph Hertzberg
7958d92c23 Added examples for casting, made better examples for Maps
(transplanted from e170e7070b
)
2014-02-11 17:27:14 +01:00
Jitse Niesen
044f27546f Fix bug #736: LDLT isPositive returns false for a positive semidefinite matrix
Add unit test covering this case.
(grafted from ff8d81762d
)
2014-02-06 11:06:06 +00:00
Christoph Hertzberg
cd4ea5151f Fix bug #730: Path of OpenGL headers is different on MacOS
(transplanted from febfc7b9b4
)
2014-01-29 22:05:39 +01:00
Gael Guennebaud
f9276f9f90 Remove useless register keyword 2014-01-25 16:57:49 +01:00
Anton Gladky
88ec3fdef4 Port unsupported constrained CG to Eigen3
(grafted from 4cd4be97a7
)
2014-01-15 17:49:52 +01:00
Gael Guennebaud
5b93c59198 QuaternionBase::slerp was documented twice and one explanation was ambiguous.
(grafted from 548216b7ca
)
2014-01-12 11:09:06 +01:00
Christoph Hertzberg
fd5be2f9cc Merge with 598776b088 2013-12-21 21:27:10 +01:00
Christoph Hertzberg
598776b088 Fixed typos in comments
(transplanted from 8a49dd5626
)
2013-12-19 11:55:17 +01:00
Márton Danóczy
cdedc9e90d Added optional run-time size parameters to fixed-size block methods 2013-12-17 01:05:05 +01:00
Christoph Hertzberg
7c1fc0ee7c Fixed and simplified Matlab code and added further block-related examples
(transplanted from 276801b25a
)
2013-11-29 19:54:01 +01:00
Christoph Hertzberg
baf2b13589 Fix bug #609: Euler angles are in Range [0:pi]x[-pi:pi]x[-pi:pi].
Now the unit test verifies this (also that it is bijective in this range).
2013-11-29 19:42:11 +01:00
Gael Guennebaud
12504a79d1 Fix bug #708: add placement new/delete for array
(transplanted from 49034d1570
)
2013-11-27 09:46:59 +01:00
Gael Guennebaud
ae360a9ec0 Fix FullPivHouseholderQR ctors for non squared fixed size matrix types
(grafted from 28b2abdbea
)
2013-11-19 12:53:46 +01:00
Gael Guennebaud
516304cd90 Workaround fixing aliasing issue in x = SparseLU::solve(x)
(transplanted from 46dd1bb1be
)
2013-11-15 11:19:19 +01:00
Gael Guennebaud
4c5da3b03a fix overflow and ambiguity in SparseLU memory allocation
(transplanted from 6b471f205e
)
2013-11-15 10:59:19 +01:00
Christoph Hertzberg
b8020d11de Implement boolean reductions for zero-sized objects
(grafted from e59b38abef
)
2013-11-13 16:47:02 +01:00
Gael Guennebaud
6b931b3e47 JacobiSVD: fix a 0/0 issue for complexes
(transplanted from a236e15048
)
2013-11-04 23:58:18 +01:00
Gael Guennebaud
d21708172a SparseLU: fix estimated non-zeros in U
(transplanted from 7c9cdd6030
)
2013-11-05 00:12:14 +01:00
Gael Guennebaud
8946e0cb80 Fix changeset 2702788da7
for fixed size matrices
(transplanted from 8f496cd3a3
)
2013-11-01 18:17:55 +01:00
Gael Guennebaud
bf9747b9ff Fix bug #678: vectors of row and columns transpositions were not properly resized in FullPivQR
(grafted from 2702788da7
)
2013-10-29 18:02:18 +01:00
Christoph Hertzberg
a5522a1381 Use aligned loads in Matrix-Vector product where possible. Fixes bug #689 2013-10-29 12:42:46 +01:00
Martinho Fernandes
d646cc95ad Fix bug #503
C++11 support on simple allocators comes for free. `aligned_allocator` does not
need to add any `construct` overloads to work with C++11 compilers.
(grafted from a1f056cf2a
)
2013-09-10 17:08:04 +02:00
Gael Guennebaud
8ea9e762d6 Fix bug #672: use exceptions in SuperLU if they are enabled only
(grafted from 90b5d303db
)
2013-10-29 11:26:52 +01:00
vanhoucke
0a44b5249c Silence unused variable warning.
(grafted from 3736e00ae7
)
2013-10-04 00:21:03 +00:00
Thomas Capricelli
fbc5beadc8 simplify/uniformize eigen_gen_docs 2013-10-18 12:56:44 +02:00
Christoph Hertzberg
b2368b3408 Copy all format flags (not only precision) from actual output stream when calculating the maximal width 2013-10-17 14:30:09 +02:00
Christoph Hertzberg
965ee4e853 consider all columns for aligned output (fixes bug #616) 2013-10-17 14:14:06 +02:00
Christoph Hertzberg
d51c9f1e93 Fixes bug #681
Also fixed some spelling issues in the documentation
2013-10-17 00:03:00 +02:00
Christoph Hertzberg
56f4144035 Use != instead of < to check for emptiness of iterator range (fixes bug #664) 2013-10-16 13:10:15 +02:00
Christoph Hertzberg
609ef90213 Make index type of Triplet default to SparseMatrix::Index as suggested by Kolja Brix. Fixes bug #665. 2013-10-16 13:08:09 +02:00
Gael Guennebaud
f407a86a3f Allow .conservativeResize(rows,cols) on vectors
(grafted from b433fb2857
)
2013-10-16 12:07:33 +02:00
Gael Guennebaud
0257cf1cef bug #679: add respective unit test
(transplanted from 2c0303c89e
)
2013-10-15 23:51:01 +02:00
Christoph Hertzberg
941319a198 Fix bug #679 2013-10-15 19:09:09 +02:00
Thomas Capricelli
273a952099 uniformize piwik code among branches 2013-10-11 20:45:21 +02:00
Desire NUENTSA
551d20a824 Fix SPQR Solve() when assigning to a Map object
(grafted from 54e576c88a
)
2013-09-26 15:00:22 +02:00
Desire NUENTSA
f5ed3421e9 Fix leaked memory for successive calls to SPQR
(grafted from fe19f972e1
)
2013-09-24 15:56:56 +02:00
Gael Guennebaud
945b0802c9 Reduce explicit zeros when applying SparseQR's matrix Q
(grafted from 00dc45d0f9
)
2013-09-20 23:28:10 +02:00
Desire NUENTSA
2a0ca0131d Fix assert bug in sparseQR
(grafted from bd21c82a94
)
2013-09-20 18:49:32 +02:00
Pavel Holoborodko
6f7f0ab6c2 Removed unnecessary parentheses 2013-08-20 16:06:13 +09:00
Pavel Holoborodko
68069af969 Added support for custom scalars 2013-08-20 15:00:28 +09:00
Hauke Heibel
af74b16b0f Removed non-standard conforming (17.4.3.1.2/1) leading underscore.
(grafted from b1f4601bf9
)
2013-07-30 08:05:10 +02:00
Gael Guennebaud
f707f15842 Fix elimination tree and SparseQR with rows<cols
(grafted from 1b4623e713
)
2013-09-12 22:16:35 +02:00
Gael Guennebaud
a443b3d98d Fix bug #654: allow implicit transposition in Array to Matrix and Matrix to Array constructors
(grafted from 07417bd03f
)
2013-09-07 00:01:04 +02:00
Gael Guennebaud
811ec5bfcb Another compilation fix with ICC/MSVC combo
(grafted from eda2f8948a
)
2013-09-03 21:42:59 +02:00
Gael Guennebaud
31d40ebc9d Fix compilation with ICC/MSVC combo
(grafted from 1b8394f71f
)
2013-08-21 15:28:53 +02:00
Gael Guennebaud
0c5f4fd8da Make FullPivHouseholderQR::solve returns the least-square solution instead of aborting if no exact solution exist
(grafted from 150c9fe536
)
2013-08-20 11:52:48 +02:00
Gael Guennebaud
2b50ade6ca Fix bug #642: add vectorization of sqrt for doubles, and make sqrt really safe if EIGEN_FAST_MATH is disabled
(grafted from d4dd6aaed2
 and c47010e3d2
)
2013-08-19 16:02:27 +02:00
Gael Guennebaud
f9149f9ba0 Fix broken link on transforming normals
(transplanted from ace2ed7b87
)
2013-08-12 13:38:25 +02:00
Gael Guennebaud
76d05e8236 bug #638: fix typos in sparse tutorial
(transplanted from 956251b738
)
2013-08-12 13:37:47 +02:00
Gael Guennebaud
fa81676d64 Fix cost evaluation of partial reduxions -> improve performance of vectorwise/replicate expressions involving partial reduxions
(transplanted from bffdc491b3
)
2013-08-11 19:21:43 +02:00
Gael Guennebaud
b56348046f Ref<> objects must be nested by reference because they potentially store a temporary object
(transplanted from 6719e56b5b
)
2013-08-11 17:52:43 +02:00
Jitse Niesen
47a7de7b53 QuickReference.dox: std::tan(array) --> tan(array), same for other functions.
(transplanted from c13e9bbabf
)
2013-08-11 10:17:23 +01:00
Jitse Niesen
8607779757 Remove LinearLeastSquares.dox , which should not have been added.
Accidentally included in changeset e37ff98bbb
 .
(transplanted from 2f0faf117e
)
2013-08-06 08:03:39 +01:00
Gael Guennebaud
be71c46a3c Fix bug #635: add isCompressed to MappedSparseMatrix for compatibility
(transplanted from b72a686830
)
2013-08-02 11:11:21 +02:00
Gael Guennebaud
4219db123e reduce cancellation probablity
(transplanted from e90229a429
)
2013-08-02 00:36:06 +02:00
Gael Guennebaud
f003a6df38 Added tag 3.2.0 for changeset 56f9b810ab 2013-07-23 18:49:47 -07:00
Gael Guennebaud
56f9b810ab bump to 3.2 2013-07-23 18:48:35 -07:00
Gael Guennebaud
12815309a6 Added tag 3.2-rc2 for changeset 207747a518 2013-07-19 16:59:01 +02:00
Gael Guennebaud
207747a518 Bump to 3.2-rc2 2013-07-19 16:58:51 +02:00
Gael Guennebaud
5ecfdf2c00 Fix ICE with ICC 11
(transplanted from 660b905e12
)
2013-07-19 11:46:54 +02:00
Gael Guennebaud
e788869cf5 Previous isFinite->hasNonFinite change was broken. After discussion let's rename it to allFinite
(transplanted from 4f0bd557a4
)
2013-07-18 11:27:04 +02:00
Gael Guennebaud
9df04bcede Rename isFinite to hasNonFinite to avoid future naming collisions.
(transplanted from 6fab4012a3
)
2013-07-17 21:13:45 +02:00
Gael Guennebaud
c31606c88a Added tag 3.2-rc1 for changeset 2872d964f4 2013-07-17 10:00:51 +02:00
Gael Guennebaud
2872d964f4 Remove Evaluators in 3.2 branch. 2013-07-17 10:00:36 +02:00
Gael Guennebaud
2c288b3949 Bump to 3.2-rc1 2013-07-17 09:37:52 +02:00
502 changed files with 21691 additions and 21792 deletions

3
.hgeol
View File

@@ -1,9 +1,6 @@
[patterns]
*.sh = LF
*.MINPACK = CRLF
scripts/*.in = LF
debug/msvc/*.dat = CRLF
debug/msvc/*.natvis = CRLF
unsupported/test/mpreal/*.* = CRLF
** = native

View File

@@ -108,8 +108,7 @@ endif()
set(EIGEN_TEST_MAX_SIZE "320" CACHE STRING "Maximal matrix/vector size, default is 320")
macro(ei_add_cxx_compiler_flag FLAG)
string(REGEX REPLACE "-" "" SFLAG1 ${FLAG})
string(REGEX REPLACE "\\+" "p" SFLAG ${SFLAG1})
string(REGEX REPLACE "-" "" SFLAG ${FLAG})
check_cxx_compiler_flag(${FLAG} COMPILER_SUPPORT_${SFLAG})
if(COMPILER_SUPPORT_${SFLAG})
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAG}")
@@ -119,7 +118,7 @@ endmacro(ei_add_cxx_compiler_flag)
if(NOT MSVC)
# We assume that other compilers are partly compatible with GNUCC
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions")
set(CMAKE_CXX_FLAGS_DEBUG "-g3")
set(CMAKE_CXX_FLAGS_RELEASE "-g0 -O2")
@@ -143,9 +142,6 @@ if(NOT MSVC)
ei_add_cxx_compiler_flag("-Wpointer-arith")
ei_add_cxx_compiler_flag("-Wwrite-strings")
ei_add_cxx_compiler_flag("-Wformat-security")
ei_add_cxx_compiler_flag("-Wshorten-64-to-32")
ei_add_cxx_compiler_flag("-Wenum-conversion")
ei_add_cxx_compiler_flag("-Wc++11-extensions")
ei_add_cxx_compiler_flag("-Wno-psabi")
ei_add_cxx_compiler_flag("-Wno-variadic-macros")
@@ -157,7 +153,6 @@ if(NOT MSVC)
ei_add_cxx_compiler_flag("-wd981") # disable ICC's "operands are evaluated in unspecified order" remark
ei_add_cxx_compiler_flag("-wd2304") # disbale ICC's "warning #2304: non-explicit constructor with single argument may cause implicit type conversion" produced by -Wnon-virtual-dtor
# The -ansi flag must be added last, otherwise it is also used as a linker flag by check_cxx_compiler_flag making it fails
# Moreover we should not set both -strict-ansi and -ansi
check_cxx_compiler_flag("-strict-ansi" COMPILER_SUPPORT_STRICTANSI)
@@ -201,18 +196,6 @@ if(NOT MSVC)
message(STATUS "Enabling SSE4.2 in tests/examples")
endif()
option(EIGEN_TEST_AVX "Enable/Disable AVX in tests/examples" OFF)
if(EIGEN_TEST_AVX)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx")
message(STATUS "Enabling AVX in tests/examples")
endif()
option(EIGEN_TEST_FMA "Enable/Disable FMA in tests/examples" OFF)
if(EIGEN_TEST_FMA)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfma")
message(STATUS "Enabling FMA in tests/examples")
endif()
option(EIGEN_TEST_ALTIVEC "Enable/Disable AltiVec in tests/examples" OFF)
if(EIGEN_TEST_ALTIVEC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maltivec -mabi=altivec")
@@ -301,12 +284,6 @@ if(EIGEN_TEST_NO_EXPLICIT_ALIGNMENT)
message(STATUS "Disabling alignment in tests/examples")
endif()
option(EIGEN_TEST_NO_EXCEPTIONS "Disables C++ exceptions" OFF)
if(EIGEN_TEST_NO_EXCEPTIONS)
ei_add_cxx_compiler_flag("-fno-exceptions")
message(STATUS "Disabling exceptions in tests/examples")
endif()
option(EIGEN_TEST_C++0x "Enables all C++0x features." OFF)
include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
@@ -324,7 +301,7 @@ if(EIGEN_INCLUDE_INSTALL_DIR)
)
else()
set(INCLUDE_INSTALL_DIR
"${CMAKE_INSTALL_PREFIX}/include/eigen3"
"include/eigen3"
CACHE INTERNAL
"The directory where we install the header files (internal)"
)
@@ -427,7 +404,7 @@ if(cmake_generator_tolower MATCHES "makefile")
message(STATUS "make install | Install to ${CMAKE_INSTALL_PREFIX}. To change that:")
message(STATUS " | cmake . -DCMAKE_INSTALL_PREFIX=yourpath")
message(STATUS " | Eigen headers will then be installed to:")
message(STATUS " | ${INCLUDE_INSTALL_DIR}")
message(STATUS " | ${CMAKE_INSTALL_PREFIX}/${INCLUDE_INSTALL_DIR}")
message(STATUS " | To install Eigen headers to a separate location, do:")
message(STATUS " | cmake . -DEIGEN_INCLUDE_INSTALL_DIR=yourpath")
message(STATUS "make doc | Generate the API documentation, requires Doxygen & LaTeX")
@@ -441,31 +418,3 @@ else()
endif()
message(STATUS "")
set ( EIGEN_CONFIG_CMAKE_PATH
lib${LIB_SUFFIX}/cmake/eigen3
CACHE PATH "The directory where the CMake files are installed"
)
if ( NOT IS_ABSOLUTE EIGEN_CONFIG_CMAKE_PATH )
set ( EIGEN_CONFIG_CMAKE_PATH ${CMAKE_INSTALL_PREFIX}/${EIGEN_CONFIG_CMAKE_PATH} )
endif ()
set ( EIGEN_USE_FILE ${EIGEN_CONFIG_CMAKE_PATH}/UseEigen3.cmake )
set ( EIGEN_VERSION_STRING ${EIGEN_VERSION_NUMBER} )
set ( EIGEN_VERSION_MAJOR ${EIGEN_WORLD_VERSION} )
set ( EIGEN_VERSION_MINOR ${EIGEN_MAJOR_VERSION} )
set ( EIGEN_VERSION_PATCH ${EIGEN_MINOR_VERSION} )
set ( EIGEN_DEFINITIONS "")
set ( EIGEN_INCLUDE_DIR ${INCLUDE_INSTALL_DIR} )
set ( EIGEN_INCLUDE_DIRS ${EIGEN_INCLUDE_DIR} )
set ( EIGEN_ROOT_DIR ${CMAKE_INSTALL_PREFIX} )
configure_file ( ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Eigen3Config.cmake.in
${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
@ONLY ESCAPE_QUOTES
)
install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/cmake/UseEigen3.cmake
${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
DESTINATION ${EIGEN_CONFIG_CMAKE_PATH}
)

View File

@@ -4,14 +4,10 @@
## # The following are required to uses Dart and the Cdash dashboard
## ENABLE_TESTING()
## INCLUDE(CTest)
set(CTEST_PROJECT_NAME "Eigen")
set(CTEST_PROJECT_NAME "Eigen3.2")
set(CTEST_NIGHTLY_START_TIME "00:00:00 UTC")
set(CTEST_DROP_METHOD "http")
set(CTEST_DROP_SITE "manao.inria.fr")
set(CTEST_DROP_LOCATION "/CDash/submit.php?project=Eigen")
set(CTEST_DROP_LOCATION "/CDash/submit.php?project=Eigen3.2")
set(CTEST_DROP_SITE_CDASH TRUE)
set(CTEST_PROJECT_SUBPROJECTS
Official
Unsupported
)

11
Eigen/Array Normal file
View File

@@ -0,0 +1,11 @@
#ifndef EIGEN_ARRAY_MODULE_H
#define EIGEN_ARRAY_MODULE_H
// include Core first to handle Eigen2 support macros
#include "Core"
#ifndef EIGEN2_SUPPORT
#error The Eigen/Array header does no longer exist in Eigen3. All that functionality has moved to Eigen/Core.
#endif
#endif // EIGEN_ARRAY_MODULE_H

View File

@@ -10,11 +10,9 @@
*
*
* This module provides two variants of the Cholesky decomposition for selfadjoint (hermitian) matrices.
* Those decompositions are also accessible via the following methods:
* - MatrixBase::llt()
* Those decompositions are accessible via the following MatrixBase methods:
* - MatrixBase::llt(),
* - MatrixBase::ldlt()
* - SelfAdjointView::llt()
* - SelfAdjointView::ldlt()
*
* \code
* #include <Eigen/Cholesky>

View File

@@ -14,42 +14,6 @@
// first thing Eigen does: stop the compiler from committing suicide
#include "src/Core/util/DisableStupidWarnings.h"
// Handle NVCC/CUDA
#ifdef __CUDACC__
// Do not try asserts on CUDA!
#ifndef EIGEN_NO_DEBUG
#define EIGEN_NO_DEBUG
#endif
#ifdef EIGEN_INTERNAL_DEBUGGING
#undef EIGEN_INTERNAL_DEBUGGING
#endif
// Do not try to vectorize on CUDA!
#define EIGEN_DONT_VECTORIZE
// All functions callable from CUDA code must be qualified with __device__
#define EIGEN_DEVICE_FUNC __host__ __device__
#else
#define EIGEN_DEVICE_FUNC
#endif
#if defined(__CUDA_ARCH__)
#define EIGEN_USING_STD_MATH(FUNC) using ::FUNC;
#else
#define EIGEN_USING_STD_MATH(FUNC) using std::FUNC;
#endif
#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(__CUDA_ARCH__) && !defined(EIGEN_EXCEPTIONS)
#define EIGEN_EXCEPTIONS
#endif
#ifdef EIGEN_EXCEPTIONS
#include <new>
#endif
// then include this file where all our macros are defined. It's really important to do it first because
// it's where we do all the alignment settings (platform detection and honoring the user's will if he
// defined e.g. EIGEN_DONT_ALIGN) so it needs to be done before we do anything with vectorization.
@@ -118,16 +82,7 @@
#ifdef __SSE4_2__
#define EIGEN_VECTORIZE_SSE4_2
#endif
#ifdef __AVX__
#define EIGEN_VECTORIZE_AVX
#define EIGEN_VECTORIZE_SSE3
#define EIGEN_VECTORIZE_SSSE3
#define EIGEN_VECTORIZE_SSE4_1
#define EIGEN_VECTORIZE_SSE4_2
#endif
#ifdef __FMA__
#define EIGEN_VECTORIZE_FMA
#endif
// include files
// This extern "C" works around a MINGW-w64 compilation issue
@@ -157,9 +112,6 @@
#ifdef EIGEN_VECTORIZE_SSE4_2
#include <nmmintrin.h>
#endif
#ifdef EIGEN_VECTORIZE_AVX
#include <immintrin.h>
#endif
#endif
} // end extern "C"
#elif defined __ALTIVEC__
@@ -171,7 +123,7 @@
#undef bool
#undef vector
#undef pixel
#elif defined __ARM_NEON__
#elif defined __ARM_NEON
#define EIGEN_VECTORIZE
#define EIGEN_VECTORIZE_NEON
#include <arm_neon.h>
@@ -217,13 +169,19 @@
#include <intrin.h>
#endif
#if defined(_CPPUNWIND) || defined(__EXCEPTIONS)
#define EIGEN_EXCEPTIONS
#endif
#ifdef EIGEN_EXCEPTIONS
#include <new>
#endif
/** \brief Namespace containing all symbols from the %Eigen library. */
namespace Eigen {
inline static const char *SimdInstructionSetsInUse(void) {
#if defined(EIGEN_VECTORIZE_AVX)
return "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
#elif defined(EIGEN_VECTORIZE_SSE4_2)
#if defined(EIGEN_VECTORIZE_SSE4_2)
return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
#elif defined(EIGEN_VECTORIZE_SSE4_1)
return "SSE, SSE2, SSE3, SSSE3, SSE4.1";
@@ -244,9 +202,34 @@ inline static const char *SimdInstructionSetsInUse(void) {
} // end namespace Eigen
#if defined EIGEN2_SUPPORT_STAGE40_FULL_EIGEN3_STRICTNESS || defined EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API || defined EIGEN2_SUPPORT_STAGE20_RESOLVE_API_CONFLICTS || defined EIGEN2_SUPPORT_STAGE10_FULL_EIGEN2_API || defined EIGEN2_SUPPORT
// This will generate an error message:
#error Eigen2-support is only available up to version 3.2. Please go to "http://eigen.tuxfamily.org/index.php?title=Eigen2" for further information
#define STAGE10_FULL_EIGEN2_API 10
#define STAGE20_RESOLVE_API_CONFLICTS 20
#define STAGE30_FULL_EIGEN3_API 30
#define STAGE40_FULL_EIGEN3_STRICTNESS 40
#define STAGE99_NO_EIGEN2_SUPPORT 99
#if defined EIGEN2_SUPPORT_STAGE40_FULL_EIGEN3_STRICTNESS
#define EIGEN2_SUPPORT
#define EIGEN2_SUPPORT_STAGE STAGE40_FULL_EIGEN3_STRICTNESS
#elif defined EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API
#define EIGEN2_SUPPORT
#define EIGEN2_SUPPORT_STAGE STAGE30_FULL_EIGEN3_API
#elif defined EIGEN2_SUPPORT_STAGE20_RESOLVE_API_CONFLICTS
#define EIGEN2_SUPPORT
#define EIGEN2_SUPPORT_STAGE STAGE20_RESOLVE_API_CONFLICTS
#elif defined EIGEN2_SUPPORT_STAGE10_FULL_EIGEN2_API
#define EIGEN2_SUPPORT
#define EIGEN2_SUPPORT_STAGE STAGE10_FULL_EIGEN2_API
#elif defined EIGEN2_SUPPORT
// default to stage 3, that's what it's always meant
#define EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API
#define EIGEN2_SUPPORT_STAGE STAGE30_FULL_EIGEN3_API
#else
#define EIGEN2_SUPPORT_STAGE STAGE99_NO_EIGEN2_SUPPORT
#endif
#ifdef EIGEN2_SUPPORT
#undef minor
#endif
// we use size_t frequently and we'll never remember to prepend it with std:: everytime just to
@@ -276,13 +259,7 @@ using std::ptrdiff_t;
#include "src/Core/MathFunctions.h"
#include "src/Core/GenericPacketMath.h"
#if defined EIGEN_VECTORIZE_AVX
// Use AVX for floats and doubles, SSE for integers
#include "src/Core/arch/SSE/PacketMath.h"
#include "src/Core/arch/SSE/Complex.h"
#include "src/Core/arch/AVX/PacketMath.h"
#include "src/Core/arch/AVX/Complex.h"
#elif defined EIGEN_VECTORIZE_SSE
#if defined EIGEN_VECTORIZE_SSE
#include "src/Core/arch/SSE/PacketMath.h"
#include "src/Core/arch/SSE/MathFunctions.h"
#include "src/Core/arch/SSE/Complex.h"
@@ -296,30 +273,17 @@ using std::ptrdiff_t;
#include "src/Core/arch/Default/Settings.h"
#include "src/Core/functors/BinaryFunctors.h"
#include "src/Core/functors/UnaryFunctors.h"
#include "src/Core/functors/NullaryFunctors.h"
#include "src/Core/functors/StlFunctors.h"
#include "src/Core/Functors.h"
#include "src/Core/DenseCoeffsBase.h"
#include "src/Core/DenseBase.h"
#include "src/Core/MatrixBase.h"
#include "src/Core/EigenBase.h"
#ifdef EIGEN_ENABLE_EVALUATORS
#include "src/Core/functors/AssignmentFunctors.h"
#include "src/Core/Product.h"
#include "src/Core/CoreEvaluators.h"
#include "src/Core/AssignEvaluator.h"
#include "src/Core/ProductEvaluators.h"
#endif
#ifndef EIGEN_PARSED_BY_DOXYGEN // work around Doxygen bug triggered by Assign.h r814874
// at least confirmed with Doxygen 1.5.5 and 1.5.6
#include "src/Core/Assign.h"
#endif
#include "src/Core/ArrayBase.h"
#include "src/Core/util/BlasUtil.h"
#include "src/Core/DenseStorage.h"
#include "src/Core/NestByValue.h"
@@ -383,6 +347,7 @@ using std::ptrdiff_t;
#include "src/Core/Random.h"
#include "src/Core/Replicate.h"
#include "src/Core/Reverse.h"
#include "src/Core/ArrayBase.h"
#include "src/Core/ArrayWrapper.h"
#ifdef EIGEN_USE_BLAS
@@ -404,4 +369,8 @@ using std::ptrdiff_t;
#include "src/Core/util/ReenableStupidWarnings.h"
#ifdef EIGEN2_SUPPORT
#include "Eigen2Support"
#endif
#endif // EIGEN_CORE_H

View File

@@ -1,2 +1,2 @@
#include "Dense"
#include "Sparse"
//#include "Sparse"

95
Eigen/Eigen2Support Normal file
View File

@@ -0,0 +1,95 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN2SUPPORT_H
#define EIGEN2SUPPORT_H
#if (!defined(EIGEN2_SUPPORT)) || (!defined(EIGEN_CORE_H))
#error Eigen2 support must be enabled by defining EIGEN2_SUPPORT before including any Eigen header
#endif
#ifndef EIGEN_NO_EIGEN2_DEPRECATED_WARNING
#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
#warning "Eigen2 support is deprecated in Eigen 3.2.x and it will be removed in Eigen 3.3. (Define EIGEN_NO_EIGEN2_DEPRECATED_WARNING to disable this warning)"
#else
#pragma message ("Eigen2 support is deprecated in Eigen 3.2.x and it will be removed in Eigen 3.3. (Define EIGEN_NO_EIGEN2_DEPRECATED_WARNING to disable this warning)")
#endif
#endif // EIGEN_NO_EIGEN2_DEPRECATED_WARNING
#include "src/Core/util/DisableStupidWarnings.h"
/** \ingroup Support_modules
* \defgroup Eigen2Support_Module Eigen2 support module
*
* \warning Eigen2 support is deprecated in Eigen 3.2.x and it will be removed in Eigen 3.3.
*
* This module provides a couple of deprecated functions improving the compatibility with Eigen2.
*
* To use it, define EIGEN2_SUPPORT before including any Eigen header
* \code
* #define EIGEN2_SUPPORT
* \endcode
*
*/
#include "src/Eigen2Support/Macros.h"
#include "src/Eigen2Support/Memory.h"
#include "src/Eigen2Support/Meta.h"
#include "src/Eigen2Support/Lazy.h"
#include "src/Eigen2Support/Cwise.h"
#include "src/Eigen2Support/CwiseOperators.h"
#include "src/Eigen2Support/TriangularSolver.h"
#include "src/Eigen2Support/Block.h"
#include "src/Eigen2Support/VectorBlock.h"
#include "src/Eigen2Support/Minor.h"
#include "src/Eigen2Support/MathFunctions.h"
#include "src/Core/util/ReenableStupidWarnings.h"
// Eigen2 used to include iostream
#include<iostream>
#define EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, SizeSuffix) \
using Eigen::Matrix##SizeSuffix##TypeSuffix; \
using Eigen::Vector##SizeSuffix##TypeSuffix; \
using Eigen::RowVector##SizeSuffix##TypeSuffix;
#define EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(TypeSuffix) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 2) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 3) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 4) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, X) \
#define EIGEN_USING_MATRIX_TYPEDEFS \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(i) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(f) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(d) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(cf) \
EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(cd)
#define USING_PART_OF_NAMESPACE_EIGEN \
EIGEN_USING_MATRIX_TYPEDEFS \
using Eigen::Matrix; \
using Eigen::MatrixBase; \
using Eigen::ei_random; \
using Eigen::ei_real; \
using Eigen::ei_imag; \
using Eigen::ei_conj; \
using Eigen::ei_abs; \
using Eigen::ei_abs2; \
using Eigen::ei_sqrt; \
using Eigen::ei_exp; \
using Eigen::ei_log; \
using Eigen::ei_sin; \
using Eigen::ei_cos;
#endif // EIGEN2SUPPORT_H

View File

@@ -33,23 +33,27 @@
#include "src/Geometry/OrthoMethods.h"
#include "src/Geometry/EulerAngles.h"
#include "src/Geometry/Homogeneous.h"
#include "src/Geometry/RotationBase.h"
#include "src/Geometry/Rotation2D.h"
#include "src/Geometry/Quaternion.h"
#include "src/Geometry/AngleAxis.h"
#include "src/Geometry/Transform.h"
#include "src/Geometry/Translation.h"
#include "src/Geometry/Scaling.h"
#include "src/Geometry/Hyperplane.h"
#include "src/Geometry/ParametrizedLine.h"
#include "src/Geometry/AlignedBox.h"
#include "src/Geometry/Umeyama.h"
#if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS
#include "src/Geometry/Homogeneous.h"
#include "src/Geometry/RotationBase.h"
#include "src/Geometry/Rotation2D.h"
#include "src/Geometry/Quaternion.h"
#include "src/Geometry/AngleAxis.h"
#include "src/Geometry/Transform.h"
#include "src/Geometry/Translation.h"
#include "src/Geometry/Scaling.h"
#include "src/Geometry/Hyperplane.h"
#include "src/Geometry/ParametrizedLine.h"
#include "src/Geometry/AlignedBox.h"
#include "src/Geometry/Umeyama.h"
// Use the SSE optimized version whenever possible. At the moment the
// SSE version doesn't compile when AVX is enabled
#if defined EIGEN_VECTORIZE_SSE && !defined EIGEN_VECTORIZE_AVX
#include "src/Geometry/arch/Geometry_SSE.h"
#if defined EIGEN_VECTORIZE_SSE
#include "src/Geometry/arch/Geometry_SSE.h"
#endif
#endif
#ifdef EIGEN2_SUPPORT
#include "src/Eigen2Support/Geometry/All.h"
#endif
#include "src/Core/util/ReenableStupidWarnings.h"

View File

@@ -27,12 +27,14 @@
#include "src/LU/Determinant.h"
#include "src/LU/Inverse.h"
// Use the SSE optimized version whenever possible. At the moment the
// SSE version doesn't compile when AVX is enabled
#if defined EIGEN_VECTORIZE_SSE && !defined EIGEN_VECTORIZE_AVX
#if defined EIGEN_VECTORIZE_SSE
#include "src/LU/arch/Inverse_SSE.h"
#endif
#ifdef EIGEN2_SUPPORT
#include "src/Eigen2Support/LU.h"
#endif
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_LU_MODULE_H

32
Eigen/LeastSquares Normal file
View File

@@ -0,0 +1,32 @@
#ifndef EIGEN_REGRESSION_MODULE_H
#define EIGEN_REGRESSION_MODULE_H
#ifndef EIGEN2_SUPPORT
#error LeastSquares is only available in Eigen2 support mode (define EIGEN2_SUPPORT)
#endif
// exclude from normal eigen3-only documentation
#ifdef EIGEN2_SUPPORT
#include "Core"
#include "src/Core/util/DisableStupidWarnings.h"
#include "Eigenvalues"
#include "Geometry"
/** \defgroup LeastSquares_Module LeastSquares module
* This module provides linear regression and related features.
*
* \code
* #include <Eigen/LeastSquares>
* \endcode
*/
#include "src/Eigen2Support/LeastSquares.h"
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN2_SUPPORT
#endif // EIGEN_REGRESSION_MODULE_H

View File

@@ -15,9 +15,7 @@
*
* This module provides various QR decompositions
* This module also provides some MatrixBase methods, including:
* - MatrixBase::householderQr()
* - MatrixBase::colPivHouseholderQr()
* - MatrixBase::fullPivHouseholderQr()
* - MatrixBase::qr(),
*
* \code
* #include <Eigen/QR>
@@ -33,7 +31,15 @@
#include "src/QR/ColPivHouseholderQR_MKL.h"
#endif
#ifdef EIGEN2_SUPPORT
#include "src/Eigen2Support/QR.h"
#endif
#include "src/Core/util/ReenableStupidWarnings.h"
#ifdef EIGEN2_SUPPORT
#include "Eigenvalues"
#endif
#endif // EIGEN_QR_MODULE_H
/* vim: set filetype=cpp et sw=2 ts=2 ai: */

View File

@@ -21,13 +21,16 @@
*/
#include "src/misc/Solve.h"
#include "src/SVD/SVDBase.h"
#include "src/SVD/JacobiSVD.h"
#if defined(EIGEN_USE_LAPACKE) && !defined(EIGEN_USE_LAPACKE_STRICT)
#include "src/SVD/JacobiSVD_MKL.h"
#endif
#include "src/SVD/UpperBidiagonalization.h"
#ifdef EIGEN2_SUPPORT
#include "src/Eigen2Support/SVD.h"
#endif
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_SVD_MODULE_H

View File

@@ -14,7 +14,7 @@
/**
* \defgroup SparseCore_Module SparseCore module
*
* This module provides a sparse matrix representation, and basic associatd matrix manipulations
* This module provides a sparse matrix representation, and basic associated matrix manipulations
* and operations.
*
* See the \ref TutorialSparse "Sparse tutorial"

View File

@@ -43,7 +43,7 @@ namespace internal {
* Remember that Cholesky decompositions are not rank-revealing. Also, do not use a Cholesky
* decomposition to determine whether a system of equations has a solution.
*
* \sa MatrixBase::ldlt(), SelfAdjointView::ldlt(), class LLT
* \sa MatrixBase::ldlt(), class LLT
*/
template<typename _MatrixType, int _UpLo> class LDLT
{
@@ -151,6 +151,13 @@ template<typename _MatrixType, int _UpLo> class LDLT
eigen_assert(m_isInitialized && "LDLT is not initialized.");
return m_sign == internal::PositiveSemiDef || m_sign == internal::ZeroSign;
}
#ifdef EIGEN2_SUPPORT
inline bool isPositiveDefinite() const
{
return isPositive();
}
#endif
/** \returns true if the matrix is negative (semidefinite) */
inline bool isNegative(void) const
@@ -172,7 +179,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
* least-square solution of \f$ D y_3 = y_2 \f$ is computed. This does not mean that this function
* computes the least-square solution of \f$ A x = b \f$ is \f$ A \f$ is singular.
*
* \sa MatrixBase::ldlt(), SelfAdjointView::ldlt()
* \sa MatrixBase::ldlt()
*/
template<typename Rhs>
inline const internal::solve_retval<LDLT, Rhs>
@@ -184,6 +191,15 @@ template<typename _MatrixType, int _UpLo> class LDLT
return internal::solve_retval<LDLT, Rhs>(*this, b.derived());
}
#ifdef EIGEN2_SUPPORT
template<typename OtherDerived, typename ResultType>
bool solve(const MatrixBase<OtherDerived>& b, ResultType *result) const
{
*result = this->solve(b);
return true;
}
#endif
template<typename Derived>
bool solveInPlace(MatrixBase<Derived> &bAndX) const;
@@ -219,6 +235,11 @@ template<typename _MatrixType, int _UpLo> class LDLT
}
protected:
static void check_template_parameters()
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
}
/** \internal
* Used to compute and store the Cholesky decomposition A = L D L^* = U^* D U.
@@ -246,7 +267,6 @@ template<> struct ldlt_inplace<Lower>
typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::RealScalar RealScalar;
typedef typename MatrixType::Index Index;
typedef typename TranspositionType::StorageIndexType IndexType;
eigen_assert(mat.rows()==mat.cols());
const Index size = mat.rows();
@@ -266,7 +286,7 @@ template<> struct ldlt_inplace<Lower>
mat.diagonal().tail(size-k).cwiseAbs().maxCoeff(&index_of_biggest_in_corner);
index_of_biggest_in_corner += k;
transpositions.coeffRef(k) = IndexType(index_of_biggest_in_corner);
transpositions.coeffRef(k) = index_of_biggest_in_corner;
if(k != index_of_biggest_in_corner)
{
// apply the transposition while taking care to consider only
@@ -275,7 +295,7 @@ template<> struct ldlt_inplace<Lower>
mat.row(k).head(k).swap(mat.row(index_of_biggest_in_corner).head(k));
mat.col(k).tail(s).swap(mat.col(index_of_biggest_in_corner).tail(s));
std::swap(mat.coeffRef(k,k),mat.coeffRef(index_of_biggest_in_corner,index_of_biggest_in_corner));
for(Index i=k+1;i<index_of_biggest_in_corner;++i)
for(int i=k+1;i<index_of_biggest_in_corner;++i)
{
Scalar tmp = mat.coeffRef(i,k);
mat.coeffRef(i,k) = numext::conj(mat.coeffRef(index_of_biggest_in_corner,i));
@@ -419,6 +439,8 @@ template<typename MatrixType> struct LDLT_Traits<MatrixType,Upper>
template<typename MatrixType, int _UpLo>
LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const MatrixType& a)
{
check_template_parameters();
eigen_assert(a.rows()==a.cols());
const Index size = a.rows();
@@ -427,6 +449,7 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const MatrixType& a)
m_transpositions.resize(size);
m_isInitialized = false;
m_temporary.resize(size);
m_sign = internal::ZeroSign;
internal::ldlt_inplace<UpLo>::unblocked(m_matrix, m_transpositions, m_temporary, m_sign);
@@ -441,9 +464,8 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const MatrixType& a)
*/
template<typename MatrixType, int _UpLo>
template<typename Derived>
LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Derived>& w, const typename NumTraits<typename MatrixType::Scalar>::Real& sigma)
LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Derived>& w, const typename LDLT<MatrixType,_UpLo>::RealScalar& sigma)
{
typedef typename TranspositionType::StorageIndexType IndexType;
const Index size = w.rows();
if (m_isInitialized)
{
@@ -455,7 +477,7 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Deri
m_matrix.setZero();
m_transpositions.resize(size);
for (Index i = 0; i < size; i++)
m_transpositions.coeffRef(i) = IndexType(i);
m_transpositions.coeffRef(i) = i;
m_temporary.resize(size);
m_sign = sigma>=0 ? internal::PositiveSemiDef : internal::NegativeSemiDef;
m_isInitialized = true;
@@ -486,7 +508,7 @@ struct solve_retval<LDLT<_MatrixType,_UpLo>, Rhs>
// dst = D^-1 (L^-1 P b)
// more precisely, use pseudo-inverse of D (see bug 241)
using std::abs;
EIGEN_USING_STD_MATH(max);
using std::max;
typedef typename LDLTType::MatrixType MatrixType;
typedef typename LDLTType::RealScalar RealScalar;
const typename Diagonal<const MatrixType>::RealReturnType vectorD(dec().vectorD());
@@ -497,6 +519,7 @@ struct solve_retval<LDLT<_MatrixType,_UpLo>, Rhs>
// diagonal element is not well justified and to numerical issues in some cases.
// Moreover, Lapack's xSYTRS routines use 0 for the tolerance.
RealScalar tolerance = RealScalar(1) / NumTraits<RealScalar>::highest();
for (Index i = 0; i < vectorD.size(); ++i) {
if(abs(vectorD(i)) > tolerance)
dst.row(i) /= vectorD(i);
@@ -563,10 +586,8 @@ MatrixType LDLT<MatrixType,_UpLo>::reconstructedMatrix() const
return res;
}
#ifndef __CUDACC__
/** \cholesky_module
* \returns the Cholesky decomposition with full pivoting without square root of \c *this
* \sa MatrixBase::ldlt()
*/
template<typename MatrixType, unsigned int UpLo>
inline const LDLT<typename SelfAdjointView<MatrixType, UpLo>::PlainObject, UpLo>
@@ -577,7 +598,6 @@ SelfAdjointView<MatrixType, UpLo>::ldlt() const
/** \cholesky_module
* \returns the Cholesky decomposition with full pivoting without square root of \c *this
* \sa SelfAdjointView::ldlt()
*/
template<typename Derived>
inline const LDLT<typename MatrixBase<Derived>::PlainObject>
@@ -585,7 +605,6 @@ MatrixBase<Derived>::ldlt() const
{
return LDLT<PlainObject>(derived());
}
#endif // __CUDACC__
} // end namespace Eigen

View File

@@ -41,7 +41,7 @@ template<typename MatrixType, int UpLo> struct LLT_Traits;
* Example: \include LLT_example.cpp
* Output: \verbinclude LLT_example.out
*
* \sa MatrixBase::llt(), SelfAdjointView::llt(), class LDLT
* \sa MatrixBase::llt(), class LDLT
*/
/* HEY THIS DOX IS DISABLED BECAUSE THERE's A BUG EITHER HERE OR IN LDLT ABOUT THAT (OR BOTH)
* Note that during the decomposition, only the upper triangular part of A is considered. Therefore,
@@ -115,7 +115,7 @@ template<typename _MatrixType, int _UpLo> class LLT
* Example: \include LLT_solve.cpp
* Output: \verbinclude LLT_solve.out
*
* \sa solveInPlace(), MatrixBase::llt(), SelfAdjointView::llt()
* \sa solveInPlace(), MatrixBase::llt()
*/
template<typename Rhs>
inline const internal::solve_retval<LLT, Rhs>
@@ -127,6 +127,17 @@ template<typename _MatrixType, int _UpLo> class LLT
return internal::solve_retval<LLT, Rhs>(*this, b.derived());
}
#ifdef EIGEN2_SUPPORT
template<typename OtherDerived, typename ResultType>
bool solve(const MatrixBase<OtherDerived>& b, ResultType *result) const
{
*result = this->solve(b);
return true;
}
bool isPositiveDefinite() const { return true; }
#endif
template<typename Derived>
void solveInPlace(MatrixBase<Derived> &bAndX) const;
@@ -163,6 +174,12 @@ template<typename _MatrixType, int _UpLo> class LLT
LLT rankUpdate(const VectorType& vec, const RealScalar& sigma = 1);
protected:
static void check_template_parameters()
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
}
/** \internal
* Used to compute and store L
* The strict upper part is not used and even not initialized.
@@ -272,7 +289,7 @@ template<typename Scalar> struct llt_inplace<Scalar, Lower>
return k;
mat.coeffRef(k,k) = x = sqrt(x);
if (k>0 && rs>0) A21.noalias() -= A20 * A10.adjoint();
if (rs>0) A21 *= RealScalar(1)/x;
if (rs>0) A21 /= x;
}
return -1;
}
@@ -373,6 +390,8 @@ template<typename MatrixType> struct LLT_Traits<MatrixType,Upper>
template<typename MatrixType, int _UpLo>
LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const MatrixType& a)
{
check_template_parameters();
eigen_assert(a.rows()==a.cols());
const Index size = a.rows();
m_matrix.resize(size, size);
@@ -454,10 +473,8 @@ MatrixType LLT<MatrixType,_UpLo>::reconstructedMatrix() const
return matrixL() * matrixL().adjoint().toDenseMatrix();
}
#ifndef __CUDACC__
/** \cholesky_module
* \returns the LLT decomposition of \c *this
* \sa SelfAdjointView::llt()
*/
template<typename Derived>
inline const LLT<typename MatrixBase<Derived>::PlainObject>
@@ -468,7 +485,6 @@ MatrixBase<Derived>::llt() const
/** \cholesky_module
* \returns the LLT decomposition of \c *this
* \sa SelfAdjointView::llt()
*/
template<typename MatrixType, unsigned int UpLo>
inline const LLT<typename SelfAdjointView<MatrixType, UpLo>::PlainObject, UpLo>
@@ -476,8 +492,7 @@ SelfAdjointView<MatrixType, UpLo>::llt() const
{
return LLT<PlainObject,UpLo>(m_matrix);
}
#endif // __CUDACC__
} // end namespace Eigen
#endif // EIGEN_LLT_H

View File

@@ -60,7 +60,7 @@ template<> struct mkl_llt<EIGTYPE> \
lda = m.outerStride(); \
\
info = LAPACKE_##MKLPREFIX##potrf( matrix_order, uplo, size, (MKLTYPE*)a, lda ); \
info = (info==0) ? Success : NumericalIssue; \
info = (info==0) ? -1 : info>0 ? info-1 : size; \
return info; \
} \
}; \

View File

@@ -78,7 +78,7 @@ cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_Index>& mat)
{
res.itype = CHOLMOD_INT;
}
else if (internal::is_same<_Index,UF_long>::value)
else if (internal::is_same<_Index,SuiteSparse_long>::value)
{
res.itype = CHOLMOD_LONG;
}
@@ -395,7 +395,7 @@ class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimpl
CholmodSimplicialLLT(const MatrixType& matrix) : Base()
{
init();
compute(matrix);
Base::compute(matrix);
}
~CholmodSimplicialLLT() {}
@@ -442,7 +442,7 @@ class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimp
CholmodSimplicialLDLT(const MatrixType& matrix) : Base()
{
init();
compute(matrix);
Base::compute(matrix);
}
~CholmodSimplicialLDLT() {}
@@ -487,7 +487,7 @@ class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSuper
CholmodSupernodalLLT(const MatrixType& matrix) : Base()
{
init();
compute(matrix);
Base::compute(matrix);
}
~CholmodSupernodalLLT() {}
@@ -534,7 +534,7 @@ class CholmodDecomposition : public CholmodBase<_MatrixType, _UpLo, CholmodDecom
CholmodDecomposition(const MatrixType& matrix) : Base()
{
init();
compute(matrix);
Base::compute(matrix);
}
~CholmodDecomposition() {}

View File

@@ -69,7 +69,6 @@ class Array
* the usage of 'using'. This should be done only for operator=.
*/
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Array& operator=(const EigenBase<OtherDerived> &other)
{
return Base::operator=(other);
@@ -85,7 +84,6 @@ class Array
* remain row-vectors and vectors remain vectors.
*/
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Array& operator=(const ArrayBase<OtherDerived>& other)
{
return Base::_set(other);
@@ -94,7 +92,6 @@ class Array
/** This is a special case of the templated operator=. Its purpose is to
* prevent a default operator= from hiding the templated operator=.
*/
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Array& operator=(const Array& other)
{
return Base::_set(other);
@@ -110,7 +107,6 @@ class Array
*
* \sa resize(Index,Index)
*/
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Array() : Base()
{
Base::_check_template_params();
@@ -120,7 +116,6 @@ class Array
#ifndef EIGEN_PARSED_BY_DOXYGEN
// FIXME is it still needed ??
/** \internal */
EIGEN_DEVICE_FUNC
Array(internal::constructor_without_unaligned_array_assert)
: Base(internal::constructor_without_unaligned_array_assert())
{
@@ -144,48 +139,41 @@ class Array
}
#endif
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename T>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE explicit Array(const T& x)
/** Constructs a vector or row-vector with given dimension. \only_for_vectors
*
* Note that this is only useful for dynamic-size vectors. For fixed-size vectors,
* it is redundant to pass the dimension here, so it makes more sense to use the default
* constructor Matrix() instead.
*/
EIGEN_STRONG_INLINE explicit Array(Index dim)
: Base(dim, RowsAtCompileTime == 1 ? 1 : dim, ColsAtCompileTime == 1 ? 1 : dim)
{
Base::_check_template_params();
Base::template _init1<T>(x);
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Array)
eigen_assert(dim >= 0);
eigen_assert(SizeAtCompileTime == Dynamic || SizeAtCompileTime == dim);
EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
}
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename T0, typename T1>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Array(const T0& val0, const T1& val1)
{
Base::_check_template_params();
this->template _init2<T0,T1>(val0, val1);
}
#else
/** \brief Constructs a fixed-sized array initialized with coefficients starting at \a data */
EIGEN_DEVICE_FUNC explicit Array(const Scalar *data);
/** Constructs a vector or row-vector with given dimension. \only_for_vectors
/** constructs an uninitialized matrix with \a rows rows and \a cols columns.
*
* Note that this is only useful for dynamic-size vectors. For fixed-size vectors,
* it is redundant to pass the dimension here, so it makes more sense to use the default
* constructor Array() instead.
*/
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE explicit Array(Index dim);
/** constructs an initialized 1x1 Array with the given coefficient */
Array(const Scalar& value);
/** constructs an uninitialized array with \a rows rows and \a cols columns.
*
* This is useful for dynamic-size arrays. For fixed-size arrays,
* This is useful for dynamic-size matrices. For fixed-size matrices,
* it is redundant to pass these parameters, so one should use the default constructor
* Array() instead. */
* Matrix() instead. */
Array(Index rows, Index cols);
/** constructs an initialized 2D vector with given coefficients */
Array(const Scalar& val0, const Scalar& val1);
#endif
/** constructs an initialized 3D vector with given coefficients */
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2)
{
Base::_check_template_params();
@@ -195,7 +183,6 @@ class Array
m_storage.data()[2] = val2;
}
/** constructs an initialized 4D vector with given coefficients */
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2, const Scalar& val3)
{
Base::_check_template_params();
@@ -206,9 +193,10 @@ class Array
m_storage.data()[3] = val3;
}
explicit Array(const Scalar *data);
/** Constructor copying the value of the expression \a other */
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Array(const ArrayBase<OtherDerived>& other)
: Base(other.rows() * other.cols(), other.rows(), other.cols())
{
@@ -216,7 +204,6 @@ class Array
Base::_set_noalias(other);
}
/** Copy constructor */
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Array(const Array& other)
: Base(other.rows() * other.cols(), other.rows(), other.cols())
{
@@ -225,7 +212,6 @@ class Array
}
/** Copy constructor with in-place evaluation */
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Array(const ReturnByValue<OtherDerived>& other)
{
Base::_check_template_params();
@@ -235,7 +221,6 @@ class Array
/** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Array(const EigenBase<OtherDerived> &other)
: Base(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols())
{
@@ -251,8 +236,8 @@ class Array
void swap(ArrayBase<OtherDerived> const & other)
{ this->_swap(other.derived()); }
EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; }
EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); }
inline Index innerStride() const { return 1; }
inline Index outerStride() const { return this->innerSize(); }
#ifdef EIGEN_ARRAY_PLUGIN
#include EIGEN_ARRAY_PLUGIN

View File

@@ -46,9 +46,6 @@ template<typename Derived> class ArrayBase
typedef ArrayBase Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl;
using internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar,
typename NumTraits<typename internal::traits<Derived>::Scalar>::Real>::operator*;
typedef typename internal::traits<Derived>::StorageKind StorageKind;
typedef typename internal::traits<Derived>::Index Index;
typedef typename internal::traits<Derived>::Scalar Scalar;
@@ -56,6 +53,7 @@ template<typename Derived> class ArrayBase
typedef typename NumTraits<Scalar>::Real RealScalar;
typedef DenseBase<Derived> Base;
using Base::operator*;
using Base::RowsAtCompileTime;
using Base::ColsAtCompileTime;
using Base::SizeAtCompileTime;
@@ -118,50 +116,40 @@ template<typename Derived> class ArrayBase
/** Special case of the template operator=, in order to prevent the compiler
* from generating a default operator= (issue hit with g++ 4.1)
*/
EIGEN_DEVICE_FUNC
Derived& operator=(const ArrayBase& other)
{
return internal::assign_selector<Derived,Derived>::run(derived(), other.derived());
}
EIGEN_DEVICE_FUNC
Derived& operator+=(const Scalar& scalar);
EIGEN_DEVICE_FUNC
Derived& operator-=(const Scalar& scalar);
Derived& operator+=(const Scalar& scalar)
{ return *this = derived() + scalar; }
Derived& operator-=(const Scalar& scalar)
{ return *this = derived() - scalar; }
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
Derived& operator+=(const ArrayBase<OtherDerived>& other);
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
Derived& operator-=(const ArrayBase<OtherDerived>& other);
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
Derived& operator*=(const ArrayBase<OtherDerived>& other);
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
Derived& operator/=(const ArrayBase<OtherDerived>& other);
public:
EIGEN_DEVICE_FUNC
ArrayBase<Derived>& array() { return *this; }
EIGEN_DEVICE_FUNC
const ArrayBase<Derived>& array() const { return *this; }
/** \returns an \link Eigen::MatrixBase Matrix \endlink expression of this array
* \sa MatrixBase::array() */
EIGEN_DEVICE_FUNC
MatrixWrapper<Derived> matrix() { return derived(); }
EIGEN_DEVICE_FUNC
const MatrixWrapper<const Derived> matrix() const { return derived(); }
// template<typename Dest>
// inline void evalTo(Dest& dst) const { dst = matrix(); }
protected:
EIGEN_DEVICE_FUNC
ArrayBase() : Base() {}
private:

View File

@@ -53,54 +53,41 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
typedef typename internal::nested<ExpressionType>::type NestedExpressionType;
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {}
inline ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {}
EIGEN_DEVICE_FUNC
inline Index rows() const { return m_expression.rows(); }
EIGEN_DEVICE_FUNC
inline Index cols() const { return m_expression.cols(); }
EIGEN_DEVICE_FUNC
inline Index outerStride() const { return m_expression.outerStride(); }
EIGEN_DEVICE_FUNC
inline Index innerStride() const { return m_expression.innerStride(); }
EIGEN_DEVICE_FUNC
inline ScalarWithConstIfNotLvalue* data() { return m_expression.const_cast_derived().data(); }
EIGEN_DEVICE_FUNC
inline const Scalar* data() const { return m_expression.data(); }
EIGEN_DEVICE_FUNC
inline CoeffReturnType coeff(Index rowId, Index colId) const
{
return m_expression.coeff(rowId, colId);
}
EIGEN_DEVICE_FUNC
inline Scalar& coeffRef(Index rowId, Index colId)
{
return m_expression.const_cast_derived().coeffRef(rowId, colId);
}
EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index rowId, Index colId) const
{
return m_expression.const_cast_derived().coeffRef(rowId, colId);
}
EIGEN_DEVICE_FUNC
inline CoeffReturnType coeff(Index index) const
{
return m_expression.coeff(index);
}
EIGEN_DEVICE_FUNC
inline Scalar& coeffRef(Index index)
{
return m_expression.const_cast_derived().coeffRef(index);
}
EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index index) const
{
return m_expression.const_cast_derived().coeffRef(index);
@@ -131,11 +118,9 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
}
template<typename Dest>
EIGEN_DEVICE_FUNC
inline void evalTo(Dest& dst) const { dst = m_expression; }
const typename internal::remove_all<NestedExpressionType>::type&
EIGEN_DEVICE_FUNC
nestedExpression() const
{
return m_expression;
@@ -143,11 +128,9 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
/** Forwards the resizing request to the nested expression
* \sa DenseBase::resize(Index) */
EIGEN_DEVICE_FUNC
void resize(Index newSize) { m_expression.const_cast_derived().resize(newSize); }
/** Forwards the resizing request to the nested expression
* \sa DenseBase::resize(Index,Index)*/
EIGEN_DEVICE_FUNC
void resize(Index nbRows, Index nbCols) { m_expression.const_cast_derived().resize(nbRows,nbCols); }
protected:
@@ -195,54 +178,41 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
typedef typename internal::nested<ExpressionType>::type NestedExpressionType;
EIGEN_DEVICE_FUNC
inline MatrixWrapper(ExpressionType& a_matrix) : m_expression(a_matrix) {}
EIGEN_DEVICE_FUNC
inline Index rows() const { return m_expression.rows(); }
EIGEN_DEVICE_FUNC
inline Index cols() const { return m_expression.cols(); }
EIGEN_DEVICE_FUNC
inline Index outerStride() const { return m_expression.outerStride(); }
EIGEN_DEVICE_FUNC
inline Index innerStride() const { return m_expression.innerStride(); }
EIGEN_DEVICE_FUNC
inline ScalarWithConstIfNotLvalue* data() { return m_expression.const_cast_derived().data(); }
EIGEN_DEVICE_FUNC
inline const Scalar* data() const { return m_expression.data(); }
EIGEN_DEVICE_FUNC
inline CoeffReturnType coeff(Index rowId, Index colId) const
{
return m_expression.coeff(rowId, colId);
}
EIGEN_DEVICE_FUNC
inline Scalar& coeffRef(Index rowId, Index colId)
{
return m_expression.const_cast_derived().coeffRef(rowId, colId);
}
EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index rowId, Index colId) const
{
return m_expression.derived().coeffRef(rowId, colId);
}
EIGEN_DEVICE_FUNC
inline CoeffReturnType coeff(Index index) const
{
return m_expression.coeff(index);
}
EIGEN_DEVICE_FUNC
inline Scalar& coeffRef(Index index)
{
return m_expression.const_cast_derived().coeffRef(index);
}
EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index index) const
{
return m_expression.const_cast_derived().coeffRef(index);
@@ -272,7 +242,6 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
m_expression.const_cast_derived().template writePacket<LoadMode>(index, val);
}
EIGEN_DEVICE_FUNC
const typename internal::remove_all<NestedExpressionType>::type&
nestedExpression() const
{
@@ -281,11 +250,9 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
/** Forwards the resizing request to the nested expression
* \sa DenseBase::resize(Index) */
EIGEN_DEVICE_FUNC
void resize(Index newSize) { m_expression.const_cast_derived().resize(newSize); }
/** Forwards the resizing request to the nested expression
* \sa DenseBase::resize(Index,Index)*/
EIGEN_DEVICE_FUNC
void resize(Index nbRows, Index nbCols) { m_expression.const_cast_derived().resize(nbRows,nbCols); }
protected:

View File

@@ -105,8 +105,6 @@ public:
EIGEN_DEBUG_VAR(DstIsAligned)
EIGEN_DEBUG_VAR(SrcIsAligned)
EIGEN_DEBUG_VAR(JointAlignment)
EIGEN_DEBUG_VAR(Derived::SizeAtCompileTime)
EIGEN_DEBUG_VAR(OtherDerived::CoeffReadCost)
EIGEN_DEBUG_VAR(InnerSize)
EIGEN_DEBUG_VAR(InnerMaxSize)
EIGEN_DEBUG_VAR(PacketSize)
@@ -141,7 +139,6 @@ struct assign_DefaultTraversal_CompleteUnrolling
inner = Index % Derived1::InnerSizeAtCompileTime
};
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
{
dst.copyCoeffByOuterInner(outer, inner, src);
@@ -152,14 +149,12 @@ struct assign_DefaultTraversal_CompleteUnrolling
template<typename Derived1, typename Derived2, int Stop>
struct assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
{
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {}
};
template<typename Derived1, typename Derived2, int Index, int Stop>
struct assign_DefaultTraversal_InnerUnrolling
{
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, typename Derived1::Index outer)
{
dst.copyCoeffByOuterInner(outer, Index, src);
@@ -170,7 +165,6 @@ struct assign_DefaultTraversal_InnerUnrolling
template<typename Derived1, typename Derived2, int Stop>
struct assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Stop, Stop>
{
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, typename Derived1::Index) {}
};
@@ -181,7 +175,6 @@ struct assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Stop, Stop>
template<typename Derived1, typename Derived2, int Index, int Stop>
struct assign_LinearTraversal_CompleteUnrolling
{
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
{
dst.copyCoeff(Index, src);
@@ -192,7 +185,6 @@ struct assign_LinearTraversal_CompleteUnrolling
template<typename Derived1, typename Derived2, int Stop>
struct assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
{
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {}
};
@@ -257,7 +249,6 @@ struct assign_impl;
template<typename Derived1, typename Derived2, int Unrolling, int Version>
struct assign_impl<Derived1, Derived2, InvalidTraversal, Unrolling, Version>
{
EIGEN_DEVICE_FUNC
static inline void run(Derived1 &, const Derived2 &) { }
};
@@ -265,7 +256,6 @@ template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling, Version>
{
typedef typename Derived1::Index Index;
EIGEN_DEVICE_FUNC
static inline void run(Derived1 &dst, const Derived2 &src)
{
const Index innerSize = dst.innerSize();
@@ -279,7 +269,6 @@ struct assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling, Version>
template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, DefaultTraversal, CompleteUnrolling, Version>
{
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
{
assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
@@ -291,7 +280,6 @@ template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, DefaultTraversal, InnerUnrolling, Version>
{
typedef typename Derived1::Index Index;
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
{
const Index outerSize = dst.outerSize();
@@ -309,7 +297,6 @@ template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, LinearTraversal, NoUnrolling, Version>
{
typedef typename Derived1::Index Index;
EIGEN_DEVICE_FUNC
static inline void run(Derived1 &dst, const Derived2 &src)
{
const Index size = dst.size();
@@ -321,7 +308,6 @@ struct assign_impl<Derived1, Derived2, LinearTraversal, NoUnrolling, Version>
template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, LinearTraversal, CompleteUnrolling, Version>
{
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
{
assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
@@ -453,19 +439,26 @@ struct assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling, Ve
typedef typename Derived1::Index Index;
static inline void run(Derived1 &dst, const Derived2 &src)
{
typedef packet_traits<typename Derived1::Scalar> PacketTraits;
typedef typename Derived1::Scalar Scalar;
typedef packet_traits<Scalar> PacketTraits;
enum {
packetSize = PacketTraits::size,
alignable = PacketTraits::AlignedOnScalar,
dstAlignment = alignable ? Aligned : int(assign_traits<Derived1,Derived2>::DstIsAligned) ,
dstIsAligned = assign_traits<Derived1,Derived2>::DstIsAligned,
dstAlignment = alignable ? Aligned : int(dstIsAligned),
srcAlignment = assign_traits<Derived1,Derived2>::JointAlignment
};
const Scalar *dst_ptr = &dst.coeffRef(0,0);
if((!bool(dstIsAligned)) && (size_t(dst_ptr) % sizeof(Scalar))>0)
{
// the pointer is not aligend-on scalar, so alignment is not possible
return assign_impl<Derived1,Derived2,DefaultTraversal,NoUnrolling>::run(dst, src);
}
const Index packetAlignedMask = packetSize - 1;
const Index innerSize = dst.innerSize();
const Index outerSize = dst.outerSize();
const Index alignedStep = alignable ? (packetSize - dst.outerStride() % packetSize) & packetAlignedMask : 0;
Index alignedStart = ((!alignable) || assign_traits<Derived1,Derived2>::DstIsAligned) ? 0
: internal::first_aligned(&dst.coeffRef(0,0), innerSize);
Index alignedStart = ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned(dst_ptr, innerSize);
for(Index outer = 0; outer < outerSize; ++outer)
{
@@ -506,25 +499,12 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived)
EIGEN_STATIC_ASSERT(SameType,YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
#ifdef EIGEN_TEST_EVALUATORS
#ifdef EIGEN_DEBUG_ASSIGN
internal::copy_using_evaluator_traits<Derived, OtherDerived>::debug();
#endif
eigen_assert(rows() == other.rows() && cols() == other.cols());
internal::call_dense_assignment_loop(derived(),other.derived());
#else // EIGEN_TEST_EVALUATORS
#ifdef EIGEN_DEBUG_ASSIGN
internal::assign_traits<Derived, OtherDerived>::debug();
#endif
eigen_assert(rows() == other.rows() && cols() == other.cols());
internal::assign_impl<Derived, OtherDerived, int(SameType) ? int(internal::assign_traits<Derived, OtherDerived>::Traversal)
: int(InvalidTraversal)>::run(derived(),other.derived());
#endif // EIGEN_TEST_EVALUATORS
: int(InvalidTraversal)>::run(derived(),other.derived());
#ifndef EIGEN_NO_DEBUG
checkTransposeAliasing(other.derived());
#endif
@@ -544,28 +524,22 @@ struct assign_selector;
template<typename Derived, typename OtherDerived>
struct assign_selector<Derived,OtherDerived,false,false> {
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.derived()); }
template<typename ActualDerived, typename ActualOtherDerived>
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE Derived& evalTo(ActualDerived& dst, const ActualOtherDerived& other) { other.evalTo(dst); return dst; }
};
template<typename Derived, typename OtherDerived>
struct assign_selector<Derived,OtherDerived,true,false> {
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.eval()); }
};
template<typename Derived, typename OtherDerived>
struct assign_selector<Derived,OtherDerived,false,true> {
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose()); }
template<typename ActualDerived, typename ActualOtherDerived>
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE Derived& evalTo(ActualDerived& dst, const ActualOtherDerived& other) { Transpose<ActualDerived> dstTrans(dst); other.evalTo(dstTrans); return dst; }
};
template<typename Derived, typename OtherDerived>
struct assign_selector<Derived,OtherDerived,true,true> {
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose().eval()); }
};
@@ -573,21 +547,18 @@ struct assign_selector<Derived,OtherDerived,true,true> {
template<typename Derived>
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator=(const DenseBase<OtherDerived>& other)
{
return internal::assign_selector<Derived,OtherDerived>::run(derived(), other.derived());
}
template<typename Derived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator=(const DenseBase& other)
{
return internal::assign_selector<Derived,Derived>::run(derived(), other.derived());
}
template<typename Derived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const MatrixBase& other)
{
return internal::assign_selector<Derived,Derived>::run(derived(), other.derived());
@@ -595,7 +566,6 @@ EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const MatrixBase& ot
template<typename Derived>
template <typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const DenseBase<OtherDerived>& other)
{
return internal::assign_selector<Derived,OtherDerived>::run(derived(), other.derived());
@@ -603,7 +573,6 @@ EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const DenseBase<Othe
template<typename Derived>
template <typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const EigenBase<OtherDerived>& other)
{
return internal::assign_selector<Derived,OtherDerived,false>::evalTo(derived(), other.derived());
@@ -611,7 +580,6 @@ EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const EigenBase<Othe
template<typename Derived>
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const ReturnByValue<OtherDerived>& other)
{
return internal::assign_selector<Derived,OtherDerived,false>::evalTo(derived(), other.derived());

View File

@@ -1,842 +0,0 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2011 Benoit Jacob <jacob.benoit.1@gmail.com>
// Copyright (C) 2011-2013 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2011-2012 Jitse Niesen <jitse@maths.leeds.ac.uk>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_ASSIGN_EVALUATOR_H
#define EIGEN_ASSIGN_EVALUATOR_H
namespace Eigen {
// This implementation is based on Assign.h
namespace internal {
/***************************************************************************
* Part 1 : the logic deciding a strategy for traversal and unrolling *
***************************************************************************/
// copy_using_evaluator_traits is based on assign_traits
template <typename Derived, typename OtherDerived>
struct copy_using_evaluator_traits
{
public:
enum {
DstIsAligned = Derived::Flags & AlignedBit,
DstHasDirectAccess = Derived::Flags & DirectAccessBit,
SrcIsAligned = OtherDerived::Flags & AlignedBit,
JointAlignment = bool(DstIsAligned) && bool(SrcIsAligned) ? Aligned : Unaligned,
SrcEvalBeforeAssign = (evaluator_traits<OtherDerived>::HasEvalTo == 1)
};
private:
enum {
InnerSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::SizeAtCompileTime)
: int(Derived::Flags)&RowMajorBit ? int(Derived::ColsAtCompileTime)
: int(Derived::RowsAtCompileTime),
InnerMaxSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::MaxSizeAtCompileTime)
: int(Derived::Flags)&RowMajorBit ? int(Derived::MaxColsAtCompileTime)
: int(Derived::MaxRowsAtCompileTime),
MaxSizeAtCompileTime = Derived::SizeAtCompileTime,
PacketSize = packet_traits<typename Derived::Scalar>::size
};
enum {
StorageOrdersAgree = (int(Derived::IsRowMajor) == int(OtherDerived::IsRowMajor)),
MightVectorize = StorageOrdersAgree
&& (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit),
MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0
&& int(DstIsAligned) && int(SrcIsAligned),
MayLinearize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit),
MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess
&& (DstIsAligned || MaxSizeAtCompileTime == Dynamic),
/* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
so it's only good for large enough sizes. */
MaySliceVectorize = MightVectorize && DstHasDirectAccess
&& (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=3*PacketSize)
/* slice vectorization can be slow, so we only want it if the slices are big, which is
indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
in a fixed-size matrix */
};
public:
enum {
Traversal = int(SrcEvalBeforeAssign) ? int(AllAtOnceTraversal)
: int(MayInnerVectorize) ? int(InnerVectorizedTraversal)
: int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
: int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
: int(MayLinearize) ? int(LinearTraversal)
: int(DefaultTraversal),
Vectorized = int(Traversal) == InnerVectorizedTraversal
|| int(Traversal) == LinearVectorizedTraversal
|| int(Traversal) == SliceVectorizedTraversal
};
private:
enum {
UnrollingLimit = EIGEN_UNROLLING_LIMIT * (Vectorized ? int(PacketSize) : 1),
MayUnrollCompletely = int(Derived::SizeAtCompileTime) != Dynamic
&& int(OtherDerived::CoeffReadCost) != Dynamic
&& int(Derived::SizeAtCompileTime) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit),
MayUnrollInner = int(InnerSize) != Dynamic
&& int(OtherDerived::CoeffReadCost) != Dynamic
&& int(InnerSize) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit)
};
public:
enum {
Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal))
? (
int(MayUnrollCompletely) ? int(CompleteUnrolling)
: int(MayUnrollInner) ? int(InnerUnrolling)
: int(NoUnrolling)
)
: int(Traversal) == int(LinearVectorizedTraversal)
? ( bool(MayUnrollCompletely) && bool(DstIsAligned) ? int(CompleteUnrolling)
: int(NoUnrolling) )
: int(Traversal) == int(LinearTraversal)
? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling)
: int(NoUnrolling) )
: int(NoUnrolling)
};
#ifdef EIGEN_DEBUG_ASSIGN
static void debug()
{
EIGEN_DEBUG_VAR(DstIsAligned)
EIGEN_DEBUG_VAR(SrcIsAligned)
EIGEN_DEBUG_VAR(JointAlignment)
EIGEN_DEBUG_VAR(InnerSize)
EIGEN_DEBUG_VAR(InnerMaxSize)
EIGEN_DEBUG_VAR(PacketSize)
EIGEN_DEBUG_VAR(StorageOrdersAgree)
EIGEN_DEBUG_VAR(MightVectorize)
EIGEN_DEBUG_VAR(MayLinearize)
EIGEN_DEBUG_VAR(MayInnerVectorize)
EIGEN_DEBUG_VAR(MayLinearVectorize)
EIGEN_DEBUG_VAR(MaySliceVectorize)
EIGEN_DEBUG_VAR(Traversal)
EIGEN_DEBUG_VAR(UnrollingLimit)
EIGEN_DEBUG_VAR(MayUnrollCompletely)
EIGEN_DEBUG_VAR(MayUnrollInner)
EIGEN_DEBUG_VAR(Unrolling)
}
#endif
};
/***************************************************************************
* Part 2 : meta-unrollers
***************************************************************************/
/************************
*** Default traversal ***
************************/
template<typename Kernel, int Index, int Stop>
struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
{
typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
typedef typename DstEvaluatorType::XprType DstXprType;
enum {
outer = Index / DstXprType::InnerSizeAtCompileTime,
inner = Index % DstXprType::InnerSizeAtCompileTime
};
static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{
kernel.assignCoeffByOuterInner(outer, inner);
copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
}
};
template<typename Kernel, int Stop>
struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Stop, Stop>
{
static EIGEN_STRONG_INLINE void run(Kernel&) { }
};
template<typename Kernel, int Index, int Stop>
struct copy_using_evaluator_DefaultTraversal_InnerUnrolling
{
static EIGEN_STRONG_INLINE void run(Kernel &kernel, int outer)
{
kernel.assignCoeffByOuterInner(outer, Index);
copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Index+1, Stop>::run(kernel, outer);
}
};
template<typename Kernel, int Stop>
struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Stop, Stop>
{
static EIGEN_STRONG_INLINE void run(Kernel&, int) { }
};
/***********************
*** Linear traversal ***
***********************/
template<typename Kernel, int Index, int Stop>
struct copy_using_evaluator_LinearTraversal_CompleteUnrolling
{
static EIGEN_STRONG_INLINE void run(Kernel& kernel)
{
kernel.assignCoeff(Index);
copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
}
};
template<typename Kernel, int Stop>
struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Stop, Stop>
{
static EIGEN_STRONG_INLINE void run(Kernel&) { }
};
/**************************
*** Inner vectorization ***
**************************/
template<typename Kernel, int Index, int Stop>
struct copy_using_evaluator_innervec_CompleteUnrolling
{
typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
typedef typename DstEvaluatorType::XprType DstXprType;
enum {
outer = Index / DstXprType::InnerSizeAtCompileTime,
inner = Index % DstXprType::InnerSizeAtCompileTime,
JointAlignment = Kernel::AssignmentTraits::JointAlignment
};
static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{
kernel.template assignPacketByOuterInner<Aligned, JointAlignment>(outer, inner);
enum { NextIndex = Index + packet_traits<typename DstXprType::Scalar>::size };
copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
}
};
template<typename Kernel, int Stop>
struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop>
{
static EIGEN_STRONG_INLINE void run(Kernel&) { }
};
template<typename Kernel, int Index, int Stop>
struct copy_using_evaluator_innervec_InnerUnrolling
{
static EIGEN_STRONG_INLINE void run(Kernel &kernel, int outer)
{
kernel.template assignPacketByOuterInner<Aligned, Aligned>(outer, Index);
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
enum { NextIndex = Index + packet_traits<typename DstXprType::Scalar>::size };
copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop>::run(kernel, outer);
}
};
template<typename Kernel, int Stop>
struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop>
{
static EIGEN_STRONG_INLINE void run(Kernel &, int) { }
};
/***************************************************************************
* Part 3 : implementation of all cases
***************************************************************************/
// dense_assignment_loop is based on assign_impl
template<typename Kernel,
int Traversal = Kernel::AssignmentTraits::Traversal,
int Unrolling = Kernel::AssignmentTraits::Unrolling>
struct dense_assignment_loop;
/************************
*** Default traversal ***
************************/
template<typename Kernel>
struct dense_assignment_loop<Kernel, DefaultTraversal, NoUnrolling>
{
static void run(Kernel &kernel)
{
typedef typename Kernel::Index Index;
for(Index outer = 0; outer < kernel.outerSize(); ++outer) {
for(Index inner = 0; inner < kernel.innerSize(); ++inner) {
kernel.assignCoeffByOuterInner(outer, inner);
}
}
}
};
template<typename Kernel>
struct dense_assignment_loop<Kernel, DefaultTraversal, CompleteUnrolling>
{
static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
}
};
template<typename Kernel>
struct dense_assignment_loop<Kernel, DefaultTraversal, InnerUnrolling>
{
typedef typename Kernel::Index Index;
static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
const Index outerSize = kernel.outerSize();
for(Index outer = 0; outer < outerSize; ++outer)
copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel, outer);
}
};
/***************************
*** Linear vectorization ***
***************************/
// The goal of unaligned_dense_assignment_loop is simply to factorize the handling
// of the non vectorizable beginning and ending parts
template <bool IsAligned = false>
struct unaligned_dense_assignment_loop
{
// if IsAligned = true, then do nothing
template <typename Kernel>
static EIGEN_STRONG_INLINE void run(Kernel&, typename Kernel::Index, typename Kernel::Index) {}
};
template <>
struct unaligned_dense_assignment_loop<false>
{
// MSVC must not inline this functions. If it does, it fails to optimize the
// packet access path.
// FIXME check which version exhibits this issue
#ifdef _MSC_VER
template <typename Kernel>
static EIGEN_DONT_INLINE void run(Kernel &kernel,
typename Kernel::Index start,
typename Kernel::Index end)
#else
template <typename Kernel>
static EIGEN_STRONG_INLINE void run(Kernel &kernel,
typename Kernel::Index start,
typename Kernel::Index end)
#endif
{
for (typename Kernel::Index index = start; index < end; ++index)
kernel.assignCoeff(index);
}
};
template<typename Kernel>
struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling>
{
static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{
typedef typename Kernel::Index Index;
const Index size = kernel.size();
typedef packet_traits<typename Kernel::Scalar> PacketTraits;
enum {
packetSize = PacketTraits::size,
dstIsAligned = int(Kernel::AssignmentTraits::DstIsAligned),
dstAlignment = PacketTraits::AlignedOnScalar ? Aligned : dstIsAligned,
srcAlignment = Kernel::AssignmentTraits::JointAlignment
};
const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned(&kernel.dstEvaluator().coeffRef(0), size);
const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
unaligned_dense_assignment_loop<dstIsAligned!=0>::run(kernel, 0, alignedStart);
for(Index index = alignedStart; index < alignedEnd; index += packetSize)
kernel.template assignPacket<dstAlignment, srcAlignment>(index);
unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size);
}
};
template<typename Kernel>
struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrolling>
{
typedef typename Kernel::Index Index;
static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
enum { size = DstXprType::SizeAtCompileTime,
packetSize = packet_traits<typename Kernel::Scalar>::size,
alignedSize = (size/packetSize)*packetSize };
copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, alignedSize>::run(kernel);
copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, alignedSize, size>::run(kernel);
}
};
/**************************
*** Inner vectorization ***
**************************/
template<typename Kernel>
struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
{
static inline void run(Kernel &kernel)
{
typedef typename Kernel::Index Index;
const Index innerSize = kernel.innerSize();
const Index outerSize = kernel.outerSize();
const Index packetSize = packet_traits<typename Kernel::Scalar>::size;
for(Index outer = 0; outer < outerSize; ++outer)
for(Index inner = 0; inner < innerSize; inner+=packetSize)
kernel.template assignPacketByOuterInner<Aligned, Aligned>(outer, inner);
}
};
template<typename Kernel>
struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, CompleteUnrolling>
{
static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
}
};
template<typename Kernel>
struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling>
{
typedef typename Kernel::Index Index;
static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
const Index outerSize = kernel.outerSize();
for(Index outer = 0; outer < outerSize; ++outer)
copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel, outer);
}
};
/***********************
*** Linear traversal ***
***********************/
template<typename Kernel>
struct dense_assignment_loop<Kernel, LinearTraversal, NoUnrolling>
{
static inline void run(Kernel &kernel)
{
typedef typename Kernel::Index Index;
const Index size = kernel.size();
for(Index i = 0; i < size; ++i)
kernel.assignCoeff(i);
}
};
template<typename Kernel>
struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling>
{
static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
}
};
/**************************
*** Slice vectorization ***
***************************/
template<typename Kernel>
struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
{
static inline void run(Kernel &kernel)
{
typedef typename Kernel::Index Index;
typedef packet_traits<typename Kernel::Scalar> PacketTraits;
enum {
packetSize = PacketTraits::size,
alignable = PacketTraits::AlignedOnScalar,
dstAlignment = alignable ? Aligned : int(Kernel::AssignmentTraits::DstIsAligned)
};
const Index packetAlignedMask = packetSize - 1;
const Index innerSize = kernel.innerSize();
const Index outerSize = kernel.outerSize();
const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0;
Index alignedStart = ((!alignable) || Kernel::AssignmentTraits::DstIsAligned) ? 0
: internal::first_aligned(&kernel.dstEvaluator().coeffRef(0,0), innerSize);
for(Index outer = 0; outer < outerSize; ++outer)
{
const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
// do the non-vectorizable part of the assignment
for(Index inner = 0; inner<alignedStart ; ++inner)
kernel.assignCoeffByOuterInner(outer, inner);
// do the vectorizable part of the assignment
for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize)
kernel.template assignPacketByOuterInner<dstAlignment, Unaligned>(outer, inner);
// do the non-vectorizable part of the assignment
for(Index inner = alignedEnd; inner<innerSize ; ++inner)
kernel.assignCoeffByOuterInner(outer, inner);
alignedStart = std::min<Index>((alignedStart+alignedStep)%packetSize, innerSize);
}
}
};
/****************************
*** All-at-once traversal ***
****************************/
// TODO: this 'AllAtOnceTraversal' should be dropped or caught earlier (Gael)
// Indeed, what to do with the kernel's functor??
template<typename Kernel>
struct dense_assignment_loop<Kernel, AllAtOnceTraversal, NoUnrolling>
{
static inline void run(Kernel & kernel)
{
// Evaluate rhs in temporary to prevent aliasing problems in a = a * a;
// TODO: Do not pass the xpr object to evalTo() (Jitse)
kernel.srcEvaluator().evalTo(kernel.dstEvaluator(), kernel.dstExpression());
}
};
/***************************************************************************
* Part 4 : Generic Assignment routine
***************************************************************************/
// This class generalize the assignment of a coefficient (or packet) from one dense evaluator
// to another dense writable evaluator.
// It is parametrized by the two evaluators, and the actual assignment functor.
// This abstraction level permits to keep the evaluation loops as simple and as generic as possible.
// One can customize the assignment using this generic dense_assignment_kernel with different
// functors, or by completely overloading it, by-passing a functor.
template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor>
class generic_dense_assignment_kernel
{
protected:
typedef typename DstEvaluatorTypeT::XprType DstXprType;
typedef typename SrcEvaluatorTypeT::XprType SrcXprType;
public:
typedef DstEvaluatorTypeT DstEvaluatorType;
typedef SrcEvaluatorTypeT SrcEvaluatorType;
typedef typename DstEvaluatorType::Scalar Scalar;
typedef typename DstEvaluatorType::Index Index;
typedef copy_using_evaluator_traits<DstXprType, SrcXprType> AssignmentTraits;
generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
: m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr)
{}
Index size() const { return m_dstExpr.size(); }
Index innerSize() const { return m_dstExpr.innerSize(); }
Index outerSize() const { return m_dstExpr.outerSize(); }
Index outerStride() const { return m_dstExpr.outerStride(); }
// TODO get rid of this one:
DstXprType& dstExpression() const { return m_dstExpr; }
DstEvaluatorType& dstEvaluator() { return m_dst; }
const SrcEvaluatorType& srcEvaluator() const { return m_src; }
void assignCoeff(Index row, Index col)
{
m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col));
}
void assignCoeff(Index index)
{
m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index));
}
void assignCoeffByOuterInner(Index outer, Index inner)
{
Index row = rowIndexByOuterInner(outer, inner);
Index col = colIndexByOuterInner(outer, inner);
assignCoeff(row, col);
}
template<int StoreMode, int LoadMode>
void assignPacket(Index row, Index col)
{
m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row,col), m_src.template packet<LoadMode>(row,col));
}
template<int StoreMode, int LoadMode>
void assignPacket(Index index)
{
m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode>(index));
}
template<int StoreMode, int LoadMode>
void assignPacketByOuterInner(Index outer, Index inner)
{
Index row = rowIndexByOuterInner(outer, inner);
Index col = colIndexByOuterInner(outer, inner);
assignPacket<StoreMode,LoadMode>(row, col);
}
static Index rowIndexByOuterInner(Index outer, Index inner)
{
typedef typename DstEvaluatorType::ExpressionTraits Traits;
return int(Traits::RowsAtCompileTime) == 1 ? 0
: int(Traits::ColsAtCompileTime) == 1 ? inner
: int(Traits::Flags)&RowMajorBit ? outer
: inner;
}
static Index colIndexByOuterInner(Index outer, Index inner)
{
typedef typename DstEvaluatorType::ExpressionTraits Traits;
return int(Traits::ColsAtCompileTime) == 1 ? 0
: int(Traits::RowsAtCompileTime) == 1 ? inner
: int(Traits::Flags)&RowMajorBit ? inner
: outer;
}
protected:
DstEvaluatorType& m_dst;
const SrcEvaluatorType& m_src;
const Functor &m_functor;
// TODO find a way to avoid the needs of the original expression
DstXprType& m_dstExpr;
};
template<typename DstXprType, typename SrcXprType, typename Functor>
void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func)
{
#ifdef EIGEN_DEBUG_ASSIGN
// TODO these traits should be computed from information provided by the evaluators
internal::copy_using_evaluator_traits<DstXprType, SrcXprType>::debug();
#endif
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
DstEvaluatorType dstEvaluator(dst);
SrcEvaluatorType srcEvaluator(src);
typedef generic_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;
Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
dense_assignment_loop<Kernel>::run(kernel);
}
template<typename DstXprType, typename SrcXprType>
void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src)
{
call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar>());
}
/***************************************************************************
* Part 5 : Entry points
***************************************************************************/
// Based on DenseBase::LazyAssign()
// The following functions are just for testing and they are meant to be moved to operator= and the likes.
template<typename DstXprType, template <typename> class StorageBase, typename SrcXprType>
EIGEN_STRONG_INLINE
const DstXprType& copy_using_evaluator(const NoAlias<DstXprType, StorageBase>& dst,
const EigenBase<SrcXprType>& src)
{
return noalias_copy_using_evaluator(dst.expression(), src.derived(), internal::assign_op<typename DstXprType::Scalar>());
}
template<typename XprType, int AssumeAliasing = evaluator_traits<XprType>::AssumeAliasing>
struct AddEvalIfAssumingAliasing;
template<typename XprType>
struct AddEvalIfAssumingAliasing<XprType, 0>
{
static const XprType& run(const XprType& xpr)
{
return xpr;
}
};
template<typename XprType>
struct AddEvalIfAssumingAliasing<XprType, 1>
{
static const EvalToTemp<XprType> run(const XprType& xpr)
{
return EvalToTemp<XprType>(xpr);
}
};
template<typename DstXprType, typename SrcXprType, typename Functor>
EIGEN_STRONG_INLINE
const DstXprType& copy_using_evaluator(const EigenBase<DstXprType>& dst, const EigenBase<SrcXprType>& src, const Functor &func)
{
return noalias_copy_using_evaluator(dst.const_cast_derived(),
AddEvalIfAssumingAliasing<SrcXprType>::run(src.derived()),
func
);
}
// this mimics operator=
template<typename DstXprType, typename SrcXprType>
EIGEN_STRONG_INLINE
const DstXprType& copy_using_evaluator(const EigenBase<DstXprType>& dst, const EigenBase<SrcXprType>& src)
{
return copy_using_evaluator(dst.const_cast_derived(), src.derived(), internal::assign_op<typename DstXprType::Scalar>());
}
template<typename DstXprType, typename SrcXprType, typename Functor>
EIGEN_STRONG_INLINE
const DstXprType& noalias_copy_using_evaluator(const PlainObjectBase<DstXprType>& dst, const EigenBase<SrcXprType>& src, const Functor &func)
{
#ifdef EIGEN_DEBUG_ASSIGN
internal::copy_using_evaluator_traits<DstXprType, SrcXprType>::debug();
#endif
#ifdef EIGEN_NO_AUTOMATIC_RESIZING
eigen_assert((dst.size()==0 || (IsVectorAtCompileTime ? (dst.size() == src.size())
: (dst.rows() == src.rows() && dst.cols() == src.cols())))
&& "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined");
#else
dst.const_cast_derived().resizeLike(src.derived());
#endif
call_dense_assignment_loop(dst.const_cast_derived(), src.derived(), func);
return dst.derived();
}
template<typename DstXprType, typename SrcXprType, typename Functor>
EIGEN_STRONG_INLINE
const DstXprType& noalias_copy_using_evaluator(const EigenBase<DstXprType>& dst, const EigenBase<SrcXprType>& src, const Functor &func)
{
call_dense_assignment_loop(dst.const_cast_derived(), src.derived(), func);
return dst.derived();
}
// Based on DenseBase::swap()
// TODO: Check whether we need to do something special for swapping two
// Arrays or Matrices. (Jitse)
// Overload default assignPacket behavior for swapping them
template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT>
class swap_kernel : public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, swap_assign_op<typename DstEvaluatorTypeT::Scalar> >
{
typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, swap_assign_op<typename DstEvaluatorTypeT::Scalar> > Base;
typedef typename DstEvaluatorTypeT::PacketScalar PacketScalar;
using Base::m_dst;
using Base::m_src;
using Base::m_functor;
public:
typedef typename Base::Scalar Scalar;
typedef typename Base::Index Index;
typedef typename Base::DstXprType DstXprType;
swap_kernel(DstEvaluatorTypeT &dst, const SrcEvaluatorTypeT &src, DstXprType& dstExpr)
: Base(dst, src, swap_assign_op<Scalar>(), dstExpr)
{}
template<int StoreMode, int LoadMode>
void assignPacket(Index row, Index col)
{
m_functor.template swapPacket<StoreMode,LoadMode,PacketScalar>(&m_dst.coeffRef(row,col), &const_cast<SrcEvaluatorTypeT&>(m_src).coeffRef(row,col));
}
template<int StoreMode, int LoadMode>
void assignPacket(Index index)
{
m_functor.template swapPacket<StoreMode,LoadMode,PacketScalar>(&m_dst.coeffRef(index), &const_cast<SrcEvaluatorTypeT&>(m_src).coeffRef(index));
}
// TODO find a simple way not to have to copy/paste this function from generic_dense_assignment_kernel, by simple I mean no CRTP (Gael)
template<int StoreMode, int LoadMode>
void assignPacketByOuterInner(Index outer, Index inner)
{
Index row = Base::rowIndexByOuterInner(outer, inner);
Index col = Base::colIndexByOuterInner(outer, inner);
assignPacket<StoreMode,LoadMode>(row, col);
}
};
template<typename DstXprType, typename SrcXprType>
void swap_using_evaluator(const DstXprType& dst, const SrcXprType& src)
{
// TODO there is too much redundancy with call_dense_assignment_loop
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
DstEvaluatorType dstEvaluator(dst);
SrcEvaluatorType srcEvaluator(src);
typedef swap_kernel<DstEvaluatorType,SrcEvaluatorType> Kernel;
Kernel kernel(dstEvaluator, srcEvaluator, dst.const_cast_derived());
dense_assignment_loop<Kernel>::run(kernel);
}
// Based on MatrixBase::operator+= (in CwiseBinaryOp.h)
template<typename DstXprType, typename SrcXprType>
void add_assign_using_evaluator(const MatrixBase<DstXprType>& dst, const MatrixBase<SrcXprType>& src)
{
typedef typename DstXprType::Scalar Scalar;
copy_using_evaluator(dst.derived(), src.derived(), add_assign_op<Scalar>());
}
// Based on ArrayBase::operator+=
template<typename DstXprType, typename SrcXprType>
void add_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src)
{
typedef typename DstXprType::Scalar Scalar;
copy_using_evaluator(dst.derived(), src.derived(), add_assign_op<Scalar>());
}
// TODO: Add add_assign_using_evaluator for EigenBase ? (Jitse)
template<typename DstXprType, typename SrcXprType>
void subtract_assign_using_evaluator(const MatrixBase<DstXprType>& dst, const MatrixBase<SrcXprType>& src)
{
typedef typename DstXprType::Scalar Scalar;
copy_using_evaluator(dst.derived(), src.derived(), sub_assign_op<Scalar>());
}
template<typename DstXprType, typename SrcXprType>
void subtract_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src)
{
typedef typename DstXprType::Scalar Scalar;
copy_using_evaluator(dst.derived(), src.derived(), sub_assign_op<Scalar>());
}
template<typename DstXprType, typename SrcXprType>
void multiply_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src)
{
typedef typename DstXprType::Scalar Scalar;
copy_using_evaluator(dst.derived(), src.derived(), mul_assign_op<Scalar>());
}
template<typename DstXprType, typename SrcXprType>
void divide_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src)
{
typedef typename DstXprType::Scalar Scalar;
copy_using_evaluator(dst.derived(), src.derived(), div_assign_op<Scalar>());
}
} // namespace internal
} // end namespace Eigen
#endif // EIGEN_ASSIGN_EVALUATOR_H

View File

@@ -202,7 +202,6 @@ EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(asin, Asin)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(cos, Cos)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(acos, Acos)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(tan, Tan)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(atan, Atan)
//EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs, Abs)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(exp, Exp)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(log, Ln)

View File

@@ -21,9 +21,6 @@ namespace Eigen {
* \param XprType the type of the expression in which we are taking a block
* \param BlockRows the number of rows of the block we are taking at compile time (optional)
* \param BlockCols the number of columns of the block we are taking at compile time (optional)
* \param InnerPanel is true, if the block maps to a set of rows of a row major matrix or
* to set of columns of a column major matrix (optional). The parameter allows to determine
* at compile time whether aligned access is possible on the block expression.
*
* This class represents an expression of either a fixed-size or dynamic-size block. It is the return
* type of DenseBase::block(Index,Index,Index,Index) and DenseBase::block<int,int>(Index,Index) and
@@ -69,8 +66,9 @@ struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprTyp
: ColsAtCompileTime != Dynamic ? int(ColsAtCompileTime)
: int(traits<XprType>::MaxColsAtCompileTime),
XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0,
IsRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
: (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
IsDense = is_same<StorageKind,Dense>::value,
IsRowMajor = (IsDense&&MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
: (IsDense&&MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
: XprTypeIsRowMajor,
HasSameStorageOrderAsXprType = (IsRowMajor == XprTypeIsRowMajor),
InnerSize = IsRowMajor ? int(ColsAtCompileTime) : int(RowsAtCompileTime),
@@ -83,7 +81,7 @@ struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprTyp
MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % packet_traits<Scalar>::size) == 0)
&& (InnerStrideAtCompileTime == 1)
? PacketAccessBit : 0,
MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % EIGEN_ALIGN_BYTES) == 0)) ? AlignedBit : 0,
MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % 16) == 0)) ? AlignedBit : 0,
FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (traits<XprType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0,
FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0,
FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0,
@@ -114,7 +112,6 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class
/** Column or Row constructor
*/
EIGEN_DEVICE_FUNC
inline Block(XprType& xpr, Index i) : Impl(xpr,i)
{
eigen_assert( (i>=0) && (
@@ -124,7 +121,6 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class
/** Fixed-size constructor
*/
EIGEN_DEVICE_FUNC
inline Block(XprType& xpr, Index a_startRow, Index a_startCol)
: Impl(xpr, a_startRow, a_startCol)
{
@@ -135,7 +131,6 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class
/** Dynamic-size constructor
*/
EIGEN_DEVICE_FUNC
inline Block(XprType& xpr,
Index a_startRow, Index a_startCol,
Index blockRows, Index blockCols)
@@ -159,9 +154,8 @@ class BlockImpl<XprType, BlockRows, BlockCols, InnerPanel, Dense>
public:
typedef Impl Base;
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl)
EIGEN_DEVICE_FUNC inline BlockImpl(XprType& xpr, Index i) : Impl(xpr,i) {}
EIGEN_DEVICE_FUNC inline BlockImpl(XprType& xpr, Index a_startRow, Index a_startCol) : Impl(xpr, a_startRow, a_startCol) {}
EIGEN_DEVICE_FUNC
inline BlockImpl(XprType& xpr, Index i) : Impl(xpr,i) {}
inline BlockImpl(XprType& xpr, Index a_startRow, Index a_startCol) : Impl(xpr, a_startRow, a_startCol) {}
inline BlockImpl(XprType& xpr, Index a_startRow, Index a_startCol, Index blockRows, Index blockCols)
: Impl(xpr, a_startRow, a_startCol, blockRows, blockCols) {}
};
@@ -183,7 +177,6 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
/** Column or Row constructor
*/
EIGEN_DEVICE_FUNC
inline BlockImpl_dense(XprType& xpr, Index i)
: m_xpr(xpr),
// It is a row if and only if BlockRows==1 and BlockCols==XprType::ColsAtCompileTime,
@@ -198,7 +191,6 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
/** Fixed-size constructor
*/
EIGEN_DEVICE_FUNC
inline BlockImpl_dense(XprType& xpr, Index a_startRow, Index a_startCol)
: m_xpr(xpr), m_startRow(a_startRow), m_startCol(a_startCol),
m_blockRows(BlockRows), m_blockCols(BlockCols)
@@ -206,7 +198,6 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
/** Dynamic-size constructor
*/
EIGEN_DEVICE_FUNC
inline BlockImpl_dense(XprType& xpr,
Index a_startRow, Index a_startCol,
Index blockRows, Index blockCols)
@@ -214,10 +205,9 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
m_blockRows(blockRows), m_blockCols(blockCols)
{}
EIGEN_DEVICE_FUNC inline Index rows() const { return m_blockRows.value(); }
EIGEN_DEVICE_FUNC inline Index cols() const { return m_blockCols.value(); }
inline Index rows() const { return m_blockRows.value(); }
inline Index cols() const { return m_blockCols.value(); }
EIGEN_DEVICE_FUNC
inline Scalar& coeffRef(Index rowId, Index colId)
{
EIGEN_STATIC_ASSERT_LVALUE(XprType)
@@ -225,20 +215,17 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
.coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
}
EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index rowId, Index colId) const
{
return m_xpr.derived()
.coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index rowId, Index colId) const
{
return m_xpr.coeff(rowId + m_startRow.value(), colId + m_startCol.value());
}
EIGEN_DEVICE_FUNC
inline Scalar& coeffRef(Index index)
{
EIGEN_STATIC_ASSERT_LVALUE(XprType)
@@ -247,7 +234,6 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
}
EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index index) const
{
return m_xpr.const_cast_derived()
@@ -255,7 +241,6 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
}
EIGEN_DEVICE_FUNC
inline const CoeffReturnType coeff(Index index) const
{
return m_xpr
@@ -295,24 +280,21 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
#ifdef EIGEN_PARSED_BY_DOXYGEN
/** \sa MapBase::data() */
EIGEN_DEVICE_FUNC inline const Scalar* data() const;
EIGEN_DEVICE_FUNC inline Index innerStride() const;
EIGEN_DEVICE_FUNC inline Index outerStride() const;
inline const Scalar* data() const;
inline Index innerStride() const;
inline Index outerStride() const;
#endif
EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename XprType::Nested>::type& nestedExpression() const
const typename internal::remove_all<typename XprType::Nested>::type& nestedExpression() const
{
return m_xpr;
}
EIGEN_DEVICE_FUNC
Index startRow() const
Index startRow() const
{
return m_startRow.value();
}
EIGEN_DEVICE_FUNC
Index startCol() const
{
return m_startCol.value();
@@ -341,7 +323,6 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
/** Column or Row constructor
*/
EIGEN_DEVICE_FUNC
inline BlockImpl_dense(XprType& xpr, Index i)
: Base(internal::const_cast_ptr(&xpr.coeffRef(
(BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0,
@@ -355,7 +336,6 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
/** Fixed-size constructor
*/
EIGEN_DEVICE_FUNC
inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol)
: Base(internal::const_cast_ptr(&xpr.coeffRef(startRow,startCol))), m_xpr(xpr)
{
@@ -364,7 +344,6 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
/** Dynamic-size constructor
*/
EIGEN_DEVICE_FUNC
inline BlockImpl_dense(XprType& xpr,
Index startRow, Index startCol,
Index blockRows, Index blockCols)
@@ -374,14 +353,12 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
init();
}
EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename XprType::Nested>::type& nestedExpression() const
const typename internal::remove_all<typename XprType::Nested>::type& nestedExpression() const
{
return m_xpr;
}
/** \sa MapBase::innerStride() */
EIGEN_DEVICE_FUNC
inline Index innerStride() const
{
return internal::traits<BlockType>::HasSameStorageOrderAsXprType
@@ -390,7 +367,6 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
}
/** \sa MapBase::outerStride() */
EIGEN_DEVICE_FUNC
inline Index outerStride() const
{
return m_outerStride;
@@ -404,7 +380,6 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** \internal used by allowAligned() */
EIGEN_DEVICE_FUNC
inline BlockImpl_dense(XprType& xpr, const Scalar* data, Index blockRows, Index blockCols)
: Base(data, blockRows, blockCols), m_xpr(xpr)
{
@@ -413,7 +388,6 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
#endif
protected:
EIGEN_DEVICE_FUNC
void init()
{
m_outerStride = internal::traits<BlockType>::HasSameStorageOrderAsXprType

View File

@@ -8,4 +8,3 @@ INSTALL(FILES
ADD_SUBDIRECTORY(products)
ADD_SUBDIRECTORY(util)
ADD_SUBDIRECTORY(arch)
ADD_SUBDIRECTORY(functors)

View File

@@ -30,7 +30,6 @@ struct CommaInitializer
typedef typename XprType::Scalar Scalar;
typedef typename XprType::Index Index;
EIGEN_DEVICE_FUNC
inline CommaInitializer(XprType& xpr, const Scalar& s)
: m_xpr(xpr), m_row(0), m_col(1), m_currentBlockRows(1)
{
@@ -38,7 +37,6 @@ struct CommaInitializer
}
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
inline CommaInitializer(XprType& xpr, const DenseBase<OtherDerived>& other)
: m_xpr(xpr), m_row(0), m_col(other.cols()), m_currentBlockRows(other.rows())
{
@@ -48,7 +46,6 @@ struct CommaInitializer
/* Copy/Move constructor which transfers ownership. This is crucial in
* absence of return value optimization to avoid assertions during destruction. */
// FIXME in C++11 mode this could be replaced by a proper RValue constructor
EIGEN_DEVICE_FUNC
inline CommaInitializer(const CommaInitializer& o)
: m_xpr(o.m_xpr), m_row(o.m_row), m_col(o.m_col), m_currentBlockRows(o.m_currentBlockRows) {
// Mark original object as finished. In absence of R-value references we need to const_cast:
@@ -58,7 +55,6 @@ struct CommaInitializer
}
/* inserts a scalar value in the target matrix */
EIGEN_DEVICE_FUNC
CommaInitializer& operator,(const Scalar& s)
{
if (m_col==m_xpr.cols())
@@ -78,7 +74,6 @@ struct CommaInitializer
/* inserts a matrix expression in the target matrix */
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
CommaInitializer& operator,(const DenseBase<OtherDerived>& other)
{
if(other.cols()==0 || other.rows()==0)
@@ -104,7 +99,6 @@ struct CommaInitializer
return *this;
}
EIGEN_DEVICE_FUNC
inline ~CommaInitializer()
{
eigen_assert((m_row+m_currentBlockRows) == m_xpr.rows()
@@ -119,10 +113,9 @@ struct CommaInitializer
* quaternion.fromRotationMatrix((Matrix3f() << axis0, axis1, axis2).finished());
* \endcode
*/
EIGEN_DEVICE_FUNC
inline XprType& finished() { return m_xpr; }
XprType& m_xpr; // target expression
XprType& m_xpr; // target expression
Index m_row; // current row id
Index m_col; // current col id
Index m_currentBlockRows; // current block height

File diff suppressed because it is too large Load Diff

View File

@@ -81,7 +81,8 @@ struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
)
),
Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit),
CoeffReadCost = LhsCoeffReadCost + RhsCoeffReadCost + functor_traits<BinaryOp>::Cost
Cost0 = EIGEN_ADD_COST(LhsCoeffReadCost,RhsCoeffReadCost),
CoeffReadCost = EIGEN_ADD_COST(Cost0,functor_traits<BinaryOp>::Cost)
};
};
} // end namespace internal
@@ -122,7 +123,6 @@ class CwiseBinaryOp : internal::no_assignment_operator,
typedef typename internal::remove_reference<LhsNested>::type _LhsNested;
typedef typename internal::remove_reference<RhsNested>::type _RhsNested;
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE CwiseBinaryOp(const Lhs& aLhs, const Rhs& aRhs, const BinaryOp& func = BinaryOp())
: m_lhs(aLhs), m_rhs(aRhs), m_functor(func)
{
@@ -132,7 +132,6 @@ class CwiseBinaryOp : internal::no_assignment_operator,
eigen_assert(aLhs.rows() == aRhs.rows() && aLhs.cols() == aRhs.cols());
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Index rows() const {
// return the fixed size type if available to enable compile time optimizations
if (internal::traits<typename internal::remove_all<LhsNested>::type>::RowsAtCompileTime==Dynamic)
@@ -140,7 +139,6 @@ class CwiseBinaryOp : internal::no_assignment_operator,
else
return m_lhs.rows();
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Index cols() const {
// return the fixed size type if available to enable compile time optimizations
if (internal::traits<typename internal::remove_all<LhsNested>::type>::ColsAtCompileTime==Dynamic)
@@ -150,13 +148,10 @@ class CwiseBinaryOp : internal::no_assignment_operator,
}
/** \returns the left hand side nested expression */
EIGEN_DEVICE_FUNC
const _LhsNested& lhs() const { return m_lhs; }
/** \returns the right hand side nested expression */
EIGEN_DEVICE_FUNC
const _RhsNested& rhs() const { return m_rhs; }
/** \returns the functor representing the binary operation */
EIGEN_DEVICE_FUNC
const BinaryOp& functor() const { return m_functor; }
protected:
@@ -175,7 +170,6 @@ class CwiseBinaryOpImpl<BinaryOp, Lhs, Rhs, Dense>
typedef typename internal::dense_xpr_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE( Derived )
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const Scalar coeff(Index rowId, Index colId) const
{
return derived().functor()(derived().lhs().coeff(rowId, colId),
@@ -189,7 +183,6 @@ class CwiseBinaryOpImpl<BinaryOp, Lhs, Rhs, Dense>
derived().rhs().template packet<LoadMode>(rowId, colId));
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
{
return derived().functor()(derived().lhs().coeff(index),
@@ -235,4 +228,3 @@ MatrixBase<Derived>::operator+=(const MatrixBase<OtherDerived>& other)
} // end namespace Eigen
#endif // EIGEN_CWISE_BINARY_OP_H

View File

@@ -54,7 +54,6 @@ class CwiseNullaryOp : internal::no_assignment_operator,
typedef typename internal::dense_xpr_base<CwiseNullaryOp>::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(CwiseNullaryOp)
EIGEN_DEVICE_FUNC
CwiseNullaryOp(Index nbRows, Index nbCols, const NullaryOp& func = NullaryOp())
: m_rows(nbRows), m_cols(nbCols), m_functor(func)
{
@@ -64,12 +63,9 @@ class CwiseNullaryOp : internal::no_assignment_operator,
&& (ColsAtCompileTime == Dynamic || ColsAtCompileTime == nbCols));
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Index rows() const { return m_rows.value(); }
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Index cols() const { return m_cols.value(); }
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const Scalar coeff(Index rowId, Index colId) const
{
return m_functor(rowId, colId);
@@ -81,7 +77,6 @@ class CwiseNullaryOp : internal::no_assignment_operator,
return m_functor.packetOp(rowId, colId);
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
{
return m_functor(index);
@@ -94,7 +89,6 @@ class CwiseNullaryOp : internal::no_assignment_operator,
}
/** \returns the functor representing the nullary operation */
EIGEN_DEVICE_FUNC
const NullaryOp& functor() const { return m_functor; }
protected:
@@ -138,9 +132,6 @@ DenseBase<Derived>::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& f
*
* The template parameter \a CustomNullaryOp is the type of the functor.
*
* Here is an example with C++11 random generators: \include random_cpp11.cpp
* Output: \verbinclude random_cpp11.out
*
* \sa class CwiseNullaryOp
*/
template<typename Derived>
@@ -749,7 +740,6 @@ namespace internal {
template<typename Derived, bool Big = (Derived::SizeAtCompileTime>=16)>
struct setIdentity_impl
{
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE Derived& run(Derived& m)
{
return m = Derived::Identity(m.rows(), m.cols());
@@ -760,7 +750,6 @@ template<typename Derived>
struct setIdentity_impl<Derived, true>
{
typedef typename Derived::Index Index;
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE Derived& run(Derived& m)
{
m.setZero();

View File

@@ -47,7 +47,7 @@ struct traits<CwiseUnaryOp<UnaryOp, XprType> >
Flags = _XprTypeNested::Flags & (
HereditaryBits | LinearAccessBit | AlignedBit
| (functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0)),
CoeffReadCost = _XprTypeNested::CoeffReadCost + functor_traits<UnaryOp>::Cost
CoeffReadCost = EIGEN_ADD_COST(_XprTypeNested::CoeffReadCost, functor_traits<UnaryOp>::Cost)
};
};
}
@@ -64,26 +64,20 @@ class CwiseUnaryOp : internal::no_assignment_operator,
typedef typename CwiseUnaryOpImpl<UnaryOp, XprType,typename internal::traits<XprType>::StorageKind>::Base Base;
EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryOp)
EIGEN_DEVICE_FUNC
inline CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp())
: m_xpr(xpr), m_functor(func) {}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Index rows() const { return m_xpr.rows(); }
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Index cols() const { return m_xpr.cols(); }
/** \returns the functor representing the unary operation */
EIGEN_DEVICE_FUNC
const UnaryOp& functor() const { return m_functor; }
/** \returns the nested expression */
EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename XprType::Nested>::type&
nestedExpression() const { return m_xpr; }
/** \returns the nested expression */
EIGEN_DEVICE_FUNC
typename internal::remove_all<typename XprType::Nested>::type&
nestedExpression() { return m_xpr.const_cast_derived(); }
@@ -104,7 +98,6 @@ class CwiseUnaryOpImpl<UnaryOp,XprType,Dense>
typedef typename internal::dense_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const Scalar coeff(Index rowId, Index colId) const
{
return derived().functor()(derived().nestedExpression().coeff(rowId, colId));
@@ -116,14 +109,12 @@ class CwiseUnaryOpImpl<UnaryOp,XprType,Dense>
return derived().functor().packetOp(derived().nestedExpression().template packet<LoadMode>(rowId, colId));
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
{
return derived().functor()(derived().nestedExpression().coeff(index));
}
template<int LoadMode>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE PacketScalar packet(Index index) const
{
return derived().functor().packetOp(derived().nestedExpression().template packet<LoadMode>(index));

View File

@@ -40,15 +40,14 @@ static inline void check_DenseIndex_is_signed() {
*/
template<typename Derived> class DenseBase
#ifndef EIGEN_PARSED_BY_DOXYGEN
: public internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar,
typename NumTraits<typename internal::traits<Derived>::Scalar>::Real>
: public internal::special_scalar_op_base<Derived, typename internal::traits<Derived>::Scalar,
typename NumTraits<typename internal::traits<Derived>::Scalar>::Real,
DenseCoeffsBase<Derived> >
#else
: public DenseCoeffsBase<Derived>
#endif // not EIGEN_PARSED_BY_DOXYGEN
{
public:
using internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar,
typename NumTraits<typename internal::traits<Derived>::Scalar>::Real>::operator*;
class InnerIterator;
@@ -63,8 +62,9 @@ template<typename Derived> class DenseBase
typedef typename internal::traits<Derived>::Scalar Scalar;
typedef typename internal::packet_traits<Scalar>::type PacketScalar;
typedef typename NumTraits<Scalar>::Real RealScalar;
typedef internal::special_scalar_op_base<Derived,Scalar,RealScalar, DenseCoeffsBase<Derived> > Base;
typedef DenseCoeffsBase<Derived> Base;
using Base::operator*;
using Base::derived;
using Base::const_cast_derived;
using Base::rows;
@@ -182,19 +182,13 @@ template<typename Derived> class DenseBase
/** \returns the number of nonzero coefficients which is in practice the number
* of stored coefficients. */
EIGEN_DEVICE_FUNC
inline Index nonZeros() const { return size(); }
/** \returns true if either the number of rows or the number of columns is equal to 1.
* In other words, this function returns
* \code rows()==1 || cols()==1 \endcode
* \sa rows(), cols(), IsVectorAtCompileTime. */
/** \returns the outer size.
*
* \note For a vector, this returns just 1. For a matrix (non-vector), this is the major dimension
* with respect to the \ref TopicStorageOrders "storage order", i.e., the number of columns for a
* column-major matrix, and the number of rows for a row-major matrix. */
EIGEN_DEVICE_FUNC
Index outerSize() const
{
return IsVectorAtCompileTime ? 1
@@ -206,7 +200,6 @@ template<typename Derived> class DenseBase
* \note For a vector, this is just the size. For a matrix (non-vector), this is the minor dimension
* with respect to the \ref TopicStorageOrders "storage order", i.e., the number of rows for a
* column-major matrix, and the number of columns for a row-major matrix. */
EIGEN_DEVICE_FUNC
Index innerSize() const
{
return IsVectorAtCompileTime ? this->size()
@@ -217,7 +210,6 @@ template<typename Derived> class DenseBase
* Matrix::resize() and Array::resize(). The present method only asserts that the new size equals the old size, and does
* nothing else.
*/
EIGEN_DEVICE_FUNC
void resize(Index newSize)
{
EIGEN_ONLY_USED_FOR_DEBUG(newSize);
@@ -228,7 +220,6 @@ template<typename Derived> class DenseBase
* Matrix::resize() and Array::resize(). The present method only asserts that the new size equals the old size, and does
* nothing else.
*/
EIGEN_DEVICE_FUNC
void resize(Index nbRows, Index nbCols)
{
EIGEN_ONLY_USED_FOR_DEBUG(nbRows);
@@ -252,54 +243,44 @@ template<typename Derived> class DenseBase
/** Copies \a other into *this. \returns a reference to *this. */
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
Derived& operator=(const DenseBase<OtherDerived>& other);
/** Special case of the template operator=, in order to prevent the compiler
* from generating a default operator= (issue hit with g++ 4.1)
*/
EIGEN_DEVICE_FUNC
Derived& operator=(const DenseBase& other);
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
Derived& operator=(const EigenBase<OtherDerived> &other);
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
Derived& operator+=(const EigenBase<OtherDerived> &other);
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
Derived& operator-=(const EigenBase<OtherDerived> &other);
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
Derived& operator=(const ReturnByValue<OtherDerived>& func);
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** Copies \a other into *this without evaluating other. \returns a reference to *this. */
/** \internal Copies \a other into *this without evaluating other. \returns a reference to *this. */
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
Derived& lazyAssign(const DenseBase<OtherDerived>& other);
#endif // not EIGEN_PARSED_BY_DOXYGEN
EIGEN_DEVICE_FUNC
/** \internal Evaluates \a other into *this. \returns a reference to *this. */
template<typename OtherDerived>
Derived& lazyAssign(const ReturnByValue<OtherDerived>& other);
CommaInitializer<Derived> operator<< (const Scalar& s);
template<unsigned int Added,unsigned int Removed>
const Flagged<Derived, Added, Removed> flagged() const;
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
CommaInitializer<Derived> operator<< (const DenseBase<OtherDerived>& other);
EIGEN_DEVICE_FUNC
Eigen::Transpose<Derived> transpose();
typedef typename internal::add_const<Transpose<const Derived> >::type ConstTransposeReturnType;
EIGEN_DEVICE_FUNC
typedef typename internal::add_const<Transpose<const Derived> >::type ConstTransposeReturnType;
ConstTransposeReturnType transpose() const;
EIGEN_DEVICE_FUNC
void transposeInPlace();
#ifndef EIGEN_NO_DEBUG
protected:
@@ -309,68 +290,65 @@ template<typename Derived> class DenseBase
#endif
EIGEN_DEVICE_FUNC static const ConstantReturnType
static const ConstantReturnType
Constant(Index rows, Index cols, const Scalar& value);
EIGEN_DEVICE_FUNC static const ConstantReturnType
static const ConstantReturnType
Constant(Index size, const Scalar& value);
EIGEN_DEVICE_FUNC static const ConstantReturnType
static const ConstantReturnType
Constant(const Scalar& value);
EIGEN_DEVICE_FUNC static const SequentialLinSpacedReturnType
static const SequentialLinSpacedReturnType
LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high);
EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType
static const RandomAccessLinSpacedReturnType
LinSpaced(Index size, const Scalar& low, const Scalar& high);
EIGEN_DEVICE_FUNC static const SequentialLinSpacedReturnType
static const SequentialLinSpacedReturnType
LinSpaced(Sequential_t, const Scalar& low, const Scalar& high);
EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType
static const RandomAccessLinSpacedReturnType
LinSpaced(const Scalar& low, const Scalar& high);
template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
template<typename CustomNullaryOp>
static const CwiseNullaryOp<CustomNullaryOp, Derived>
NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func);
template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
template<typename CustomNullaryOp>
static const CwiseNullaryOp<CustomNullaryOp, Derived>
NullaryExpr(Index size, const CustomNullaryOp& func);
template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
template<typename CustomNullaryOp>
static const CwiseNullaryOp<CustomNullaryOp, Derived>
NullaryExpr(const CustomNullaryOp& func);
EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index rows, Index cols);
EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index size);
EIGEN_DEVICE_FUNC static const ConstantReturnType Zero();
EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(Index rows, Index cols);
EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(Index size);
EIGEN_DEVICE_FUNC static const ConstantReturnType Ones();
static const ConstantReturnType Zero(Index rows, Index cols);
static const ConstantReturnType Zero(Index size);
static const ConstantReturnType Zero();
static const ConstantReturnType Ones(Index rows, Index cols);
static const ConstantReturnType Ones(Index size);
static const ConstantReturnType Ones();
EIGEN_DEVICE_FUNC void fill(const Scalar& value);
EIGEN_DEVICE_FUNC Derived& setConstant(const Scalar& value);
EIGEN_DEVICE_FUNC Derived& setLinSpaced(Index size, const Scalar& low, const Scalar& high);
EIGEN_DEVICE_FUNC Derived& setLinSpaced(const Scalar& low, const Scalar& high);
EIGEN_DEVICE_FUNC Derived& setZero();
EIGEN_DEVICE_FUNC Derived& setOnes();
EIGEN_DEVICE_FUNC Derived& setRandom();
void fill(const Scalar& value);
Derived& setConstant(const Scalar& value);
Derived& setLinSpaced(Index size, const Scalar& low, const Scalar& high);
Derived& setLinSpaced(const Scalar& low, const Scalar& high);
Derived& setZero();
Derived& setOnes();
Derived& setRandom();
template<typename OtherDerived> EIGEN_DEVICE_FUNC
template<typename OtherDerived>
bool isApprox(const DenseBase<OtherDerived>& other,
const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
EIGEN_DEVICE_FUNC
bool isMuchSmallerThan(const RealScalar& other,
const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
template<typename OtherDerived> EIGEN_DEVICE_FUNC
template<typename OtherDerived>
bool isMuchSmallerThan(const DenseBase<OtherDerived>& other,
const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
EIGEN_DEVICE_FUNC bool isApproxToConstant(const Scalar& value, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
EIGEN_DEVICE_FUNC bool isConstant(const Scalar& value, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
EIGEN_DEVICE_FUNC bool isZero(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
EIGEN_DEVICE_FUNC bool isOnes(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
bool isApproxToConstant(const Scalar& value, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
bool isConstant(const Scalar& value, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
bool isZero(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
bool isOnes(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
inline bool hasNaN() const;
inline bool allFinite() const;
EIGEN_DEVICE_FUNC
inline Derived& operator*=(const Scalar& other);
EIGEN_DEVICE_FUNC
inline Derived& operator/=(const Scalar& other);
typedef typename internal::add_const_on_value_type<typename internal::eval<Derived>::type>::type EvalReturnType;
@@ -379,7 +357,6 @@ template<typename Derived> class DenseBase
* Notice that in the case of a plain matrix or vector (not an expression) this function just returns
* a const reference, in order to avoid a useless copy.
*/
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE EvalReturnType eval() const
{
// Even though MSVC does not honor strong inlining when the return type
@@ -392,7 +369,6 @@ template<typename Derived> class DenseBase
*
*/
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
void swap(const DenseBase<OtherDerived>& other,
int = OtherDerived::ThisConstantIsPrivateInPlainObjectBase)
{
@@ -403,52 +379,46 @@ template<typename Derived> class DenseBase
*
*/
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
void swap(PlainObjectBase<OtherDerived>& other)
{
SwapWrapper<Derived>(derived()).lazyAssign(other.derived());
}
EIGEN_DEVICE_FUNC inline const NestByValue<Derived> nestByValue() const;
EIGEN_DEVICE_FUNC inline const ForceAlignedAccess<Derived> forceAlignedAccess() const;
EIGEN_DEVICE_FUNC inline ForceAlignedAccess<Derived> forceAlignedAccess();
template<bool Enable> EIGEN_DEVICE_FUNC
inline const typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf() const;
template<bool Enable> EIGEN_DEVICE_FUNC
inline typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf();
inline const NestByValue<Derived> nestByValue() const;
inline const ForceAlignedAccess<Derived> forceAlignedAccess() const;
inline ForceAlignedAccess<Derived> forceAlignedAccess();
template<bool Enable> inline const typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf() const;
template<bool Enable> inline typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf();
EIGEN_DEVICE_FUNC Scalar sum() const;
EIGEN_DEVICE_FUNC Scalar mean() const;
EIGEN_DEVICE_FUNC Scalar trace() const;
Scalar sum() const;
Scalar mean() const;
Scalar trace() const;
EIGEN_DEVICE_FUNC Scalar prod() const;
Scalar prod() const;
EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar minCoeff() const;
EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar maxCoeff() const;
typename internal::traits<Derived>::Scalar minCoeff() const;
typename internal::traits<Derived>::Scalar maxCoeff() const;
template<typename IndexType> EIGEN_DEVICE_FUNC
template<typename IndexType>
typename internal::traits<Derived>::Scalar minCoeff(IndexType* row, IndexType* col) const;
template<typename IndexType> EIGEN_DEVICE_FUNC
template<typename IndexType>
typename internal::traits<Derived>::Scalar maxCoeff(IndexType* row, IndexType* col) const;
template<typename IndexType> EIGEN_DEVICE_FUNC
template<typename IndexType>
typename internal::traits<Derived>::Scalar minCoeff(IndexType* index) const;
template<typename IndexType> EIGEN_DEVICE_FUNC
template<typename IndexType>
typename internal::traits<Derived>::Scalar maxCoeff(IndexType* index) const;
template<typename BinaryOp>
EIGEN_DEVICE_FUNC
typename internal::result_of<BinaryOp(typename internal::traits<Derived>::Scalar)>::type
redux(const BinaryOp& func) const;
template<typename Visitor>
EIGEN_DEVICE_FUNC
void visit(Visitor& func) const;
inline const WithFormat<Derived> format(const IOFormat& fmt) const;
/** \returns the unique coefficient of a 1x1 expression */
EIGEN_DEVICE_FUNC
CoeffReturnType value() const
{
EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)
@@ -456,8 +426,8 @@ template<typename Derived> class DenseBase
return derived().coeff(0,0);
}
bool all() const;
bool any() const;
bool all(void) const;
bool any(void) const;
Index count() const;
typedef VectorwiseOp<Derived, Horizontal> RowwiseReturnType;
@@ -490,8 +460,10 @@ template<typename Derived> class DenseBase
template<int p> RealScalar lpNorm() const;
template<int RowFactor, int ColFactor>
const Replicate<Derived,RowFactor,ColFactor> replicate() const;
const Replicate<Derived,Dynamic,Dynamic> replicate(Index rowFacor,Index colFactor) const;
inline const Replicate<Derived,RowFactor,ColFactor> replicate() const;
typedef Replicate<Derived,Dynamic,Dynamic> ReplicateReturnType;
inline const ReplicateReturnType replicate(Index rowFacor,Index colFactor) const;
typedef Reverse<Derived, BothDirections> ReverseReturnType;
typedef const Reverse<const Derived, BothDirections> ConstReverseReturnType;
@@ -506,18 +478,27 @@ template<typename Derived> class DenseBase
# endif
#undef EIGEN_CURRENT_STORAGE_BASE_CLASS
#ifdef EIGEN2_SUPPORT
Block<Derived> corner(CornerType type, Index cRows, Index cCols);
const Block<Derived> corner(CornerType type, Index cRows, Index cCols) const;
template<int CRows, int CCols>
Block<Derived, CRows, CCols> corner(CornerType type);
template<int CRows, int CCols>
const Block<Derived, CRows, CCols> corner(CornerType type) const;
#endif // EIGEN2_SUPPORT
// disable the use of evalTo for dense objects with a nice compilation error
template<typename Dest>
EIGEN_DEVICE_FUNC
inline void evalTo(Dest& ) const
template<typename Dest> inline void evalTo(Dest& ) const
{
EIGEN_STATIC_ASSERT((internal::is_same<Dest,void>::value),THE_EVAL_EVALTO_FUNCTION_SHOULD_NEVER_BE_CALLED_FOR_DENSE_OBJECTS);
}
protected:
/** Default constructor. Do nothing. */
EIGEN_DEVICE_FUNC DenseBase()
DenseBase()
{
/* Just checks for self-consistency of the flags.
* Only do it when debugging Eigen, as this borders on paranoiac and could slow compilation down
@@ -530,9 +511,9 @@ template<typename Derived> class DenseBase
}
private:
EIGEN_DEVICE_FUNC explicit DenseBase(int);
EIGEN_DEVICE_FUNC DenseBase(int,int);
template<typename OtherDerived> EIGEN_DEVICE_FUNC explicit DenseBase(const DenseBase<OtherDerived>&);
explicit DenseBase(int);
DenseBase(int,int);
template<typename OtherDerived> explicit DenseBase(const DenseBase<OtherDerived>&);
};
} // end namespace Eigen

View File

@@ -61,7 +61,6 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
using Base::size;
using Base::derived;
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner) const
{
return int(Derived::RowsAtCompileTime) == 1 ? 0
@@ -70,7 +69,6 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
: inner;
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner) const
{
return int(Derived::ColsAtCompileTime) == 1 ? 0
@@ -93,7 +91,6 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
*
* \sa operator()(Index,Index) const, coeffRef(Index,Index), coeff(Index) const
*/
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const
{
eigen_internal_assert(row >= 0 && row < rows()
@@ -101,7 +98,6 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
return derived().coeff(row, col);
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
{
return coeff(rowIndexByOuterInner(outer, inner),
@@ -112,7 +108,6 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
*
* \sa operator()(Index,Index), operator[](Index)
*/
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE CoeffReturnType operator()(Index row, Index col) const
{
eigen_assert(row >= 0 && row < rows()
@@ -135,7 +130,6 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
* \sa operator[](Index) const, coeffRef(Index), coeff(Index,Index) const
*/
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE CoeffReturnType
coeff(Index index) const
{
@@ -152,12 +146,13 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
* z() const, w() const
*/
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE CoeffReturnType
operator[](Index index) const
{
#ifndef EIGEN2_SUPPORT
EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime,
THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD)
#endif
eigen_assert(index >= 0 && index < size());
return derived().coeff(index);
}
@@ -172,7 +167,6 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
* z() const, w() const
*/
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE CoeffReturnType
operator()(Index index) const
{
@@ -182,25 +176,21 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
/** equivalent to operator[](0). */
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE CoeffReturnType
x() const { return (*this)[0]; }
/** equivalent to operator[](1). */
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE CoeffReturnType
y() const { return (*this)[1]; }
/** equivalent to operator[](2). */
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE CoeffReturnType
z() const { return (*this)[2]; }
/** equivalent to operator[](3). */
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE CoeffReturnType
w() const { return (*this)[3]; }
@@ -321,7 +311,6 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
*
* \sa operator()(Index,Index), coeff(Index, Index) const, coeffRef(Index)
*/
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col)
{
eigen_internal_assert(row >= 0 && row < rows()
@@ -329,7 +318,6 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
return derived().coeffRef(row, col);
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar&
coeffRefByOuterInner(Index outer, Index inner)
{
@@ -342,7 +330,6 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
* \sa operator[](Index)
*/
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar&
operator()(Index row, Index col)
{
@@ -367,7 +354,6 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
* \sa operator[](Index), coeff(Index) const, coeffRef(Index,Index)
*/
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar&
coeffRef(Index index)
{
@@ -382,12 +368,13 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
* \sa operator[](Index) const, operator()(Index,Index), x(), y(), z(), w()
*/
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar&
operator[](Index index)
{
#ifndef EIGEN2_SUPPORT
EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime,
THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD)
#endif
eigen_assert(index >= 0 && index < size());
return derived().coeffRef(index);
}
@@ -401,7 +388,6 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
* \sa operator[](Index) const, operator()(Index,Index), x(), y(), z(), w()
*/
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar&
operator()(Index index)
{
@@ -411,25 +397,21 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
/** equivalent to operator[](0). */
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar&
x() { return (*this)[0]; }
/** equivalent to operator[](1). */
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar&
y() { return (*this)[1]; }
/** equivalent to operator[](2). */
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar&
z() { return (*this)[2]; }
/** equivalent to operator[](3). */
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar&
w() { return (*this)[3]; }
@@ -491,7 +473,6 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
*/
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE void copyCoeff(Index row, Index col, const DenseBase<OtherDerived>& other)
{
eigen_internal_assert(row >= 0 && row < rows()
@@ -508,7 +489,6 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
*/
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE void copyCoeff(Index index, const DenseBase<OtherDerived>& other)
{
eigen_internal_assert(index >= 0 && index < size());
@@ -517,7 +497,6 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE void copyCoeffByOuterInner(Index outer, Index inner, const DenseBase<OtherDerived>& other)
{
const Index row = rowIndexByOuterInner(outer,inner);
@@ -602,7 +581,6 @@ class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived
*
* \sa outerStride(), rowStride(), colStride()
*/
EIGEN_DEVICE_FUNC
inline Index innerStride() const
{
return derived().innerStride();
@@ -613,7 +591,6 @@ class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived
*
* \sa innerStride(), rowStride(), colStride()
*/
EIGEN_DEVICE_FUNC
inline Index outerStride() const
{
return derived().outerStride();
@@ -629,7 +606,6 @@ class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived
*
* \sa innerStride(), outerStride(), colStride()
*/
EIGEN_DEVICE_FUNC
inline Index rowStride() const
{
return Derived::IsRowMajor ? outerStride() : innerStride();
@@ -639,7 +615,6 @@ class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived
*
* \sa innerStride(), outerStride(), rowStride()
*/
EIGEN_DEVICE_FUNC
inline Index colStride() const
{
return Derived::IsRowMajor ? innerStride() : outerStride();
@@ -677,7 +652,6 @@ class DenseCoeffsBase<Derived, DirectWriteAccessors>
*
* \sa outerStride(), rowStride(), colStride()
*/
EIGEN_DEVICE_FUNC
inline Index innerStride() const
{
return derived().innerStride();
@@ -688,7 +662,6 @@ class DenseCoeffsBase<Derived, DirectWriteAccessors>
*
* \sa innerStride(), rowStride(), colStride()
*/
EIGEN_DEVICE_FUNC
inline Index outerStride() const
{
return derived().outerStride();
@@ -704,7 +677,6 @@ class DenseCoeffsBase<Derived, DirectWriteAccessors>
*
* \sa innerStride(), outerStride(), colStride()
*/
EIGEN_DEVICE_FUNC
inline Index rowStride() const
{
return Derived::IsRowMajor ? outerStride() : innerStride();
@@ -714,7 +686,6 @@ class DenseCoeffsBase<Derived, DirectWriteAccessors>
*
* \sa innerStride(), outerStride(), rowStride()
*/
EIGEN_DEVICE_FUNC
inline Index colStride() const
{
return Derived::IsRowMajor ? innerStride() : outerStride();

View File

@@ -3,7 +3,7 @@
//
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2006-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
// Copyright (C) 2010-2013 Hauke Heibel <hauke.heibel@gmail.com>
// Copyright (C) 2010 Hauke Heibel <hauke.heibel@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -24,9 +24,7 @@ namespace internal {
struct constructor_without_unaligned_array_assert {};
template<typename T, int Size>
EIGEN_DEVICE_FUNC
void check_static_allocation_size()
template<typename T, int Size> void check_static_allocation_size()
{
// if EIGEN_STACK_ALLOCATION_LIMIT is defined to 0, then no limit
#if EIGEN_STACK_ALLOCATION_LIMIT
@@ -40,20 +38,18 @@ void check_static_allocation_size()
*/
template <typename T, int Size, int MatrixOrArrayOptions,
int Alignment = (MatrixOrArrayOptions&DontAlign) ? 0
: (((Size*sizeof(T))%EIGEN_ALIGN_BYTES)==0) ? EIGEN_ALIGN_BYTES
: (((Size*sizeof(T))%16)==0) ? 16
: 0 >
struct plain_array
{
T array[Size];
EIGEN_DEVICE_FUNC
plain_array()
plain_array()
{
check_static_allocation_size<T,Size>();
}
EIGEN_DEVICE_FUNC
plain_array(constructor_without_unaligned_array_assert)
plain_array(constructor_without_unaligned_array_assert)
{
check_static_allocation_size<T,Size>();
}
@@ -68,31 +64,29 @@ struct plain_array
template<typename PtrType>
EIGEN_ALWAYS_INLINE PtrType eigen_unaligned_array_assert_workaround_gcc47(PtrType array) { return array; }
#define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \
eigen_assert((reinterpret_cast<size_t>(eigen_unaligned_array_assert_workaround_gcc47(array)) & (sizemask)) == 0 \
eigen_assert((reinterpret_cast<size_t>(eigen_unaligned_array_assert_workaround_gcc47(array)) & sizemask) == 0 \
&& "this assertion is explained here: " \
"http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \
" **** READ THIS WEB PAGE !!! ****");
#else
#define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \
eigen_assert((reinterpret_cast<size_t>(array) & (sizemask)) == 0 \
eigen_assert((reinterpret_cast<size_t>(array) & sizemask) == 0 \
&& "this assertion is explained here: " \
"http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \
" **** READ THIS WEB PAGE !!! ****");
#endif
template <typename T, int Size, int MatrixOrArrayOptions>
struct plain_array<T, Size, MatrixOrArrayOptions, EIGEN_ALIGN_BYTES>
struct plain_array<T, Size, MatrixOrArrayOptions, 16>
{
EIGEN_USER_ALIGN_DEFAULT T array[Size];
EIGEN_USER_ALIGN16 T array[Size];
EIGEN_DEVICE_FUNC
plain_array()
{
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(EIGEN_ALIGN_BYTES-1);
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(0xf);
check_static_allocation_size<T,Size>();
}
EIGEN_DEVICE_FUNC
plain_array(constructor_without_unaligned_array_assert)
{
check_static_allocation_size<T,Size>();
@@ -102,9 +96,9 @@ struct plain_array<T, Size, MatrixOrArrayOptions, EIGEN_ALIGN_BYTES>
template <typename T, int MatrixOrArrayOptions, int Alignment>
struct plain_array<T, 0, MatrixOrArrayOptions, Alignment>
{
EIGEN_USER_ALIGN_DEFAULT T array[1];
EIGEN_DEVICE_FUNC plain_array() {}
EIGEN_DEVICE_FUNC plain_array(constructor_without_unaligned_array_assert) {}
EIGEN_USER_ALIGN16 T array[1];
plain_array() {}
plain_array(constructor_without_unaligned_array_assert) {}
};
} // end namespace internal
@@ -128,44 +122,41 @@ template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseSt
{
internal::plain_array<T,Size,_Options> m_data;
public:
EIGEN_DEVICE_FUNC DenseStorage() {}
EIGEN_DEVICE_FUNC
DenseStorage() {}
DenseStorage(internal::constructor_without_unaligned_array_assert)
: m_data(internal::constructor_without_unaligned_array_assert()) {}
EIGEN_DEVICE_FUNC
DenseStorage(const DenseStorage& other) : m_data(other.m_data) {}
EIGEN_DEVICE_FUNC
DenseStorage& operator=(const DenseStorage& other)
{
{
if (this != &other) m_data = other.m_data;
return *this;
return *this;
}
EIGEN_DEVICE_FUNC DenseStorage(DenseIndex,DenseIndex,DenseIndex) {}
EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); }
EIGEN_DEVICE_FUNC static DenseIndex rows(void) {return _Rows;}
EIGEN_DEVICE_FUNC static DenseIndex cols(void) {return _Cols;}
EIGEN_DEVICE_FUNC void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {}
EIGEN_DEVICE_FUNC void resize(DenseIndex,DenseIndex,DenseIndex) {}
EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
DenseStorage(DenseIndex,DenseIndex,DenseIndex) {}
void swap(DenseStorage& other) { std::swap(m_data,other.m_data); }
static DenseIndex rows(void) {return _Rows;}
static DenseIndex cols(void) {return _Cols;}
void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {}
void resize(DenseIndex,DenseIndex,DenseIndex) {}
const T *data() const { return m_data.array; }
T *data() { return m_data.array; }
};
// null matrix
template<typename T, int _Rows, int _Cols, int _Options> class DenseStorage<T, 0, _Rows, _Cols, _Options>
{
public:
EIGEN_DEVICE_FUNC DenseStorage() {}
EIGEN_DEVICE_FUNC DenseStorage(internal::constructor_without_unaligned_array_assert) {}
EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage&) {}
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage&) { return *this; }
EIGEN_DEVICE_FUNC DenseStorage(DenseIndex,DenseIndex,DenseIndex) {}
EIGEN_DEVICE_FUNC void swap(DenseStorage& ) {}
EIGEN_DEVICE_FUNC static DenseIndex rows(void) {return _Rows;}
EIGEN_DEVICE_FUNC static DenseIndex cols(void) {return _Cols;}
EIGEN_DEVICE_FUNC void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {}
EIGEN_DEVICE_FUNC void resize(DenseIndex,DenseIndex,DenseIndex) {}
EIGEN_DEVICE_FUNC const T *data() const { return 0; }
EIGEN_DEVICE_FUNC T *data() { return 0; }
DenseStorage() {}
DenseStorage(internal::constructor_without_unaligned_array_assert) {}
DenseStorage(const DenseStorage&) {}
DenseStorage& operator=(const DenseStorage&) { return *this; }
DenseStorage(DenseIndex,DenseIndex,DenseIndex) {}
void swap(DenseStorage& ) {}
static DenseIndex rows(void) {return _Rows;}
static DenseIndex cols(void) {return _Cols;}
void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {}
void resize(DenseIndex,DenseIndex,DenseIndex) {}
const T *data() const { return 0; }
T *data() { return 0; }
};
// more specializations for null matrices; these are necessary to resolve ambiguities
@@ -185,29 +176,29 @@ template<typename T, int Size, int _Options> class DenseStorage<T, Size, Dynamic
DenseIndex m_rows;
DenseIndex m_cols;
public:
EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0), m_cols(0) {}
DenseStorage() : m_rows(0), m_cols(0) {}
DenseStorage(internal::constructor_without_unaligned_array_assert)
: m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0), m_cols(0) {}
DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows), m_cols(other.m_cols) {}
DenseStorage& operator=(const DenseStorage& other)
{
DenseStorage& operator=(const DenseStorage& other)
{
if (this != &other)
{
m_data = other.m_data;
m_rows = other.m_rows;
m_cols = other.m_cols;
}
return *this;
return *this;
}
DenseStorage(DenseIndex, DenseIndex nbRows, DenseIndex nbCols) : m_rows(nbRows), m_cols(nbCols) {}
void swap(DenseStorage& other)
{ std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
EIGEN_DEVICE_FUNC DenseIndex rows() const {return m_rows;}
EIGEN_DEVICE_FUNC DenseIndex cols() const {return m_cols;}
DenseIndex rows() const {return m_rows;}
DenseIndex cols() const {return m_cols;}
void conservativeResize(DenseIndex, DenseIndex nbRows, DenseIndex nbCols) { m_rows = nbRows; m_cols = nbCols; }
void resize(DenseIndex, DenseIndex nbRows, DenseIndex nbCols) { m_rows = nbRows; m_cols = nbCols; }
EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
const T *data() const { return m_data.array; }
T *data() { return m_data.array; }
};
// dynamic-size matrix with fixed-size storage and fixed width
@@ -216,27 +207,27 @@ template<typename T, int Size, int _Cols, int _Options> class DenseStorage<T, Si
internal::plain_array<T,Size,_Options> m_data;
DenseIndex m_rows;
public:
EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0) {}
DenseStorage() : m_rows(0) {}
DenseStorage(internal::constructor_without_unaligned_array_assert)
: m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0) {}
DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows) {}
DenseStorage& operator=(const DenseStorage& other)
DenseStorage& operator=(const DenseStorage& other)
{
if (this != &other)
{
m_data = other.m_data;
m_rows = other.m_rows;
}
return *this;
return *this;
}
DenseStorage(DenseIndex, DenseIndex nbRows, DenseIndex) : m_rows(nbRows) {}
void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
EIGEN_DEVICE_FUNC DenseIndex rows(void) const {return m_rows;}
EIGEN_DEVICE_FUNC DenseIndex cols(void) const {return _Cols;}
DenseIndex rows(void) const {return m_rows;}
DenseIndex cols(void) const {return _Cols;}
void conservativeResize(DenseIndex, DenseIndex nbRows, DenseIndex) { m_rows = nbRows; }
void resize(DenseIndex, DenseIndex nbRows, DenseIndex) { m_rows = nbRows; }
EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
const T *data() const { return m_data.array; }
T *data() { return m_data.array; }
};
// dynamic-size matrix with fixed-size storage and fixed height
@@ -245,7 +236,7 @@ template<typename T, int Size, int _Rows, int _Options> class DenseStorage<T, Si
internal::plain_array<T,Size,_Options> m_data;
DenseIndex m_cols;
public:
EIGEN_DEVICE_FUNC DenseStorage() : m_cols(0) {}
DenseStorage() : m_cols(0) {}
DenseStorage(internal::constructor_without_unaligned_array_assert)
: m_data(internal::constructor_without_unaligned_array_assert()), m_cols(0) {}
DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_cols(other.m_cols) {}
@@ -260,12 +251,12 @@ template<typename T, int Size, int _Rows, int _Options> class DenseStorage<T, Si
}
DenseStorage(DenseIndex, DenseIndex, DenseIndex nbCols) : m_cols(nbCols) {}
void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
EIGEN_DEVICE_FUNC DenseIndex rows(void) const {return _Rows;}
EIGEN_DEVICE_FUNC DenseIndex cols(void) const {return m_cols;}
DenseIndex rows(void) const {return _Rows;}
DenseIndex cols(void) const {return m_cols;}
void conservativeResize(DenseIndex, DenseIndex, DenseIndex nbCols) { m_cols = nbCols; }
void resize(DenseIndex, DenseIndex, DenseIndex nbCols) { m_cols = nbCols; }
EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
const T *data() const { return m_data.array; }
T *data() { return m_data.array; }
};
// purely dynamic matrix.
@@ -275,28 +266,12 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
DenseIndex m_rows;
DenseIndex m_cols;
public:
EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0), m_cols(0) {}
DenseStorage() : m_data(0), m_rows(0), m_cols(0) {}
DenseStorage(internal::constructor_without_unaligned_array_assert)
: m_data(0), m_rows(0), m_cols(0) {}
DenseStorage(DenseIndex size, DenseIndex nbRows, DenseIndex nbCols)
: m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(nbRows), m_cols(nbCols)
{ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN }
DenseStorage(const DenseStorage& other)
: m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(other.m_rows*other.m_cols))
, m_rows(other.m_rows)
, m_cols(other.m_cols)
{
internal::smart_copy(other.m_data, other.m_data+other.m_rows*other.m_cols, m_data);
}
DenseStorage& operator=(const DenseStorage& other)
{
if (this != &other)
{
DenseStorage tmp(other);
this->swap(tmp);
}
return *this;
}
#ifdef EIGEN_HAVE_RVALUE_REFERENCES
DenseStorage(DenseStorage&& other)
: m_data(std::move(other.m_data))
@@ -317,8 +292,8 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, m_rows*m_cols); }
void swap(DenseStorage& other)
{ std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
EIGEN_DEVICE_FUNC DenseIndex rows(void) const {return m_rows;}
EIGEN_DEVICE_FUNC DenseIndex cols(void) const {return m_cols;}
DenseIndex rows(void) const {return m_rows;}
DenseIndex cols(void) const {return m_cols;}
void conservativeResize(DenseIndex size, DenseIndex nbRows, DenseIndex nbCols)
{
m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*m_cols);
@@ -339,8 +314,11 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
m_rows = nbRows;
m_cols = nbCols;
}
EIGEN_DEVICE_FUNC const T *data() const { return m_data; }
EIGEN_DEVICE_FUNC T *data() { return m_data; }
const T *data() const { return m_data; }
T *data() { return m_data; }
private:
DenseStorage(const DenseStorage&);
DenseStorage& operator=(const DenseStorage&);
};
// matrix with dynamic width and fixed height (so that matrix has dynamic size).
@@ -349,25 +327,10 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
T *m_data;
DenseIndex m_cols;
public:
EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_cols(0) {}
DenseStorage() : m_data(0), m_cols(0) {}
DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {}
DenseStorage(DenseIndex size, DenseIndex, DenseIndex nbCols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_cols(nbCols)
{ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN }
DenseStorage(const DenseStorage& other)
: m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(_Rows*other.m_cols))
, m_cols(other.m_cols)
{
internal::smart_copy(other.m_data, other.m_data+_Rows*m_cols, m_data);
}
DenseStorage& operator=(const DenseStorage& other)
{
if (this != &other)
{
DenseStorage tmp(other);
this->swap(tmp);
}
return *this;
}
#ifdef EIGEN_HAVE_RVALUE_REFERENCES
DenseStorage(DenseStorage&& other)
: m_data(std::move(other.m_data))
@@ -385,8 +348,8 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
#endif
~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols); }
void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
EIGEN_DEVICE_FUNC static DenseIndex rows(void) {return _Rows;}
EIGEN_DEVICE_FUNC DenseIndex cols(void) const {return m_cols;}
static DenseIndex rows(void) {return _Rows;}
DenseIndex cols(void) const {return m_cols;}
void conservativeResize(DenseIndex size, DenseIndex, DenseIndex nbCols)
{
m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, _Rows*m_cols);
@@ -405,8 +368,11 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
}
m_cols = nbCols;
}
EIGEN_DEVICE_FUNC const T *data() const { return m_data; }
EIGEN_DEVICE_FUNC T *data() { return m_data; }
const T *data() const { return m_data; }
T *data() { return m_data; }
private:
DenseStorage(const DenseStorage&);
DenseStorage& operator=(const DenseStorage&);
};
// matrix with dynamic height and fixed width (so that matrix has dynamic size).
@@ -415,25 +381,10 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
T *m_data;
DenseIndex m_rows;
public:
EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0) {}
DenseStorage() : m_data(0), m_rows(0) {}
DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {}
DenseStorage(DenseIndex size, DenseIndex nbRows, DenseIndex) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(nbRows)
{ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN }
DenseStorage(const DenseStorage& other)
: m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(other.m_rows*_Cols))
, m_rows(other.m_rows)
{
internal::smart_copy(other.m_data, other.m_data+other.m_rows*_Cols, m_data);
}
DenseStorage& operator=(const DenseStorage& other)
{
if (this != &other)
{
DenseStorage tmp(other);
this->swap(tmp);
}
return *this;
}
#ifdef EIGEN_HAVE_RVALUE_REFERENCES
DenseStorage(DenseStorage&& other)
: m_data(std::move(other.m_data))
@@ -451,8 +402,8 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
#endif
~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows); }
void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
EIGEN_DEVICE_FUNC DenseIndex rows(void) const {return m_rows;}
EIGEN_DEVICE_FUNC static DenseIndex cols(void) {return _Cols;}
DenseIndex rows(void) const {return m_rows;}
static DenseIndex cols(void) {return _Cols;}
void conservativeResize(DenseIndex size, DenseIndex nbRows, DenseIndex)
{
m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*_Cols);
@@ -471,8 +422,11 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
}
m_rows = nbRows;
}
EIGEN_DEVICE_FUNC const T *data() const { return m_data; }
EIGEN_DEVICE_FUNC T *data() { return m_data; }
const T *data() const { return m_data; }
T *data() { return m_data; }
private:
DenseStorage(const DenseStorage&);
DenseStorage& operator=(const DenseStorage&);
};
} // end namespace Eigen

View File

@@ -70,30 +70,20 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
typedef typename internal::dense_xpr_base<Diagonal>::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(Diagonal)
EIGEN_DEVICE_FUNC
inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index) {}
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Diagonal)
EIGEN_DEVICE_FUNC
inline Index rows() const
{
EIGEN_USING_STD_MATH(min);
return m_index.value()<0 ? (min)(Index(m_matrix.cols()),Index(m_matrix.rows()+m_index.value()))
: (min)(Index(m_matrix.rows()),Index(m_matrix.cols()-m_index.value()));
}
{ return m_index.value()<0 ? (std::min<Index>)(m_matrix.cols(),m_matrix.rows()+m_index.value()) : (std::min<Index>)(m_matrix.rows(),m_matrix.cols()-m_index.value()); }
EIGEN_DEVICE_FUNC
inline Index cols() const { return 1; }
EIGEN_DEVICE_FUNC
inline Index innerStride() const
{
return m_matrix.outerStride() + 1;
}
EIGEN_DEVICE_FUNC
inline Index outerStride() const
{
return 0;
@@ -105,57 +95,47 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
const Scalar
>::type ScalarWithConstIfNotLvalue;
EIGEN_DEVICE_FUNC
inline ScalarWithConstIfNotLvalue* data() { return &(m_matrix.const_cast_derived().coeffRef(rowOffset(), colOffset())); }
EIGEN_DEVICE_FUNC
inline const Scalar* data() const { return &(m_matrix.const_cast_derived().coeffRef(rowOffset(), colOffset())); }
EIGEN_DEVICE_FUNC
inline Scalar& coeffRef(Index row, Index)
{
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
return m_matrix.const_cast_derived().coeffRef(row+rowOffset(), row+colOffset());
}
EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index row, Index) const
{
return m_matrix.const_cast_derived().coeffRef(row+rowOffset(), row+colOffset());
}
EIGEN_DEVICE_FUNC
inline CoeffReturnType coeff(Index row, Index) const
{
return m_matrix.coeff(row+rowOffset(), row+colOffset());
}
EIGEN_DEVICE_FUNC
inline Scalar& coeffRef(Index idx)
{
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
return m_matrix.const_cast_derived().coeffRef(idx+rowOffset(), idx+colOffset());
}
EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index idx) const
{
return m_matrix.const_cast_derived().coeffRef(idx+rowOffset(), idx+colOffset());
}
EIGEN_DEVICE_FUNC
inline CoeffReturnType coeff(Index idx) const
{
return m_matrix.coeff(idx+rowOffset(), idx+colOffset());
}
EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename MatrixType::Nested>::type&
nestedExpression() const
{
return m_matrix;
}
EIGEN_DEVICE_FUNC
int index() const
{
return m_index.value();
@@ -167,11 +147,8 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
private:
// some compilers may fail to optimize std::max etc in case of compile-time constants...
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Index absDiagIndex() const { return m_index.value()>0 ? m_index.value() : -m_index.value(); }
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value()>0 ? 0 : -m_index.value(); }
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value()>0 ? m_index.value() : 0; }
// triger a compile time error is someone try to call packet
template<int LoadMode> typename MatrixType::PacketReturnType packet(Index) const;
@@ -213,18 +190,18 @@ MatrixBase<Derived>::diagonal() const
*
* \sa MatrixBase::diagonal(), class Diagonal */
template<typename Derived>
inline typename MatrixBase<Derived>::template DiagonalIndexReturnType<DynamicIndex>::Type
inline typename MatrixBase<Derived>::DiagonalDynamicIndexReturnType
MatrixBase<Derived>::diagonal(Index index)
{
return typename DiagonalIndexReturnType<DynamicIndex>::Type(derived(), index);
return DiagonalDynamicIndexReturnType(derived(), index);
}
/** This is the const version of diagonal(Index). */
template<typename Derived>
inline typename MatrixBase<Derived>::template ConstDiagonalIndexReturnType<DynamicIndex>::Type
inline typename MatrixBase<Derived>::ConstDiagonalDynamicIndexReturnType
MatrixBase<Derived>::diagonal(Index index) const
{
return typename ConstDiagonalIndexReturnType<DynamicIndex>::Type(derived(), index);
return ConstDiagonalDynamicIndexReturnType(derived(), index);
}
/** \returns an expression of the \a DiagIndex-th sub or super diagonal of the matrix \c *this

View File

@@ -37,64 +37,63 @@ class DiagonalBase : public EigenBase<Derived>
typedef DenseMatrixType DenseType;
typedef DiagonalMatrix<Scalar,DiagonalVectorType::SizeAtCompileTime,DiagonalVectorType::MaxSizeAtCompileTime> PlainObject;
EIGEN_DEVICE_FUNC
inline const Derived& derived() const { return *static_cast<const Derived*>(this); }
EIGEN_DEVICE_FUNC
inline Derived& derived() { return *static_cast<Derived*>(this); }
EIGEN_DEVICE_FUNC
DenseMatrixType toDenseMatrix() const { return derived(); }
template<typename DenseDerived>
EIGEN_DEVICE_FUNC
void evalTo(MatrixBase<DenseDerived> &other) const;
template<typename DenseDerived>
EIGEN_DEVICE_FUNC
void addTo(MatrixBase<DenseDerived> &other) const
{ other.diagonal() += diagonal(); }
template<typename DenseDerived>
EIGEN_DEVICE_FUNC
void subTo(MatrixBase<DenseDerived> &other) const
{ other.diagonal() -= diagonal(); }
EIGEN_DEVICE_FUNC
inline const DiagonalVectorType& diagonal() const { return derived().diagonal(); }
EIGEN_DEVICE_FUNC
inline DiagonalVectorType& diagonal() { return derived().diagonal(); }
EIGEN_DEVICE_FUNC
inline Index rows() const { return diagonal().size(); }
EIGEN_DEVICE_FUNC
inline Index cols() const { return diagonal().size(); }
/** \returns the diagonal matrix product of \c *this by the matrix \a matrix.
*/
template<typename MatrixDerived>
EIGEN_DEVICE_FUNC
const DiagonalProduct<MatrixDerived, Derived, OnTheLeft>
operator*(const MatrixBase<MatrixDerived> &matrix) const
{
return DiagonalProduct<MatrixDerived, Derived, OnTheLeft>(matrix.derived(), derived());
}
EIGEN_DEVICE_FUNC
inline const DiagonalWrapper<const CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const DiagonalVectorType> >
inverse() const
{
return diagonal().cwiseInverse();
}
EIGEN_DEVICE_FUNC
inline const DiagonalWrapper<const CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const DiagonalVectorType> >
operator*(const Scalar& scalar) const
{
return diagonal() * scalar;
}
EIGEN_DEVICE_FUNC
friend inline const DiagonalWrapper<const CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const DiagonalVectorType> >
operator*(const Scalar& scalar, const DiagonalBase& other)
{
return other.diagonal() * scalar;
}
#ifdef EIGEN2_SUPPORT
template<typename OtherDerived>
bool isApprox(const DiagonalBase<OtherDerived>& other, typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) const
{
return diagonal().isApprox(other.diagonal(), precision);
}
template<typename OtherDerived>
bool isApprox(const MatrixBase<OtherDerived>& other, typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) const
{
return toDenseMatrix().isApprox(other, precision);
}
#endif
};
template<typename Derived>
@@ -152,31 +151,24 @@ class DiagonalMatrix
public:
/** const version of diagonal(). */
EIGEN_DEVICE_FUNC
inline const DiagonalVectorType& diagonal() const { return m_diagonal; }
/** \returns a reference to the stored vector of diagonal coefficients. */
EIGEN_DEVICE_FUNC
inline DiagonalVectorType& diagonal() { return m_diagonal; }
/** Default constructor without initialization */
EIGEN_DEVICE_FUNC
inline DiagonalMatrix() {}
/** Constructs a diagonal matrix with given dimension */
EIGEN_DEVICE_FUNC
inline DiagonalMatrix(Index dim) : m_diagonal(dim) {}
/** 2D constructor. */
EIGEN_DEVICE_FUNC
inline DiagonalMatrix(const Scalar& x, const Scalar& y) : m_diagonal(x,y) {}
/** 3D constructor. */
EIGEN_DEVICE_FUNC
inline DiagonalMatrix(const Scalar& x, const Scalar& y, const Scalar& z) : m_diagonal(x,y,z) {}
/** Copy constructor. */
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
inline DiagonalMatrix(const DiagonalBase<OtherDerived>& other) : m_diagonal(other.diagonal()) {}
#ifndef EIGEN_PARSED_BY_DOXYGEN
@@ -186,13 +178,11 @@ class DiagonalMatrix
/** generic constructor from expression of the diagonal coefficients */
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
explicit inline DiagonalMatrix(const MatrixBase<OtherDerived>& other) : m_diagonal(other)
{}
/** Copy operator. */
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
DiagonalMatrix& operator=(const DiagonalBase<OtherDerived>& other)
{
m_diagonal = other.diagonal();
@@ -203,7 +193,6 @@ class DiagonalMatrix
/** This is a special case of the templated operator=. Its purpose is to
* prevent a default operator= from hiding the templated operator=.
*/
EIGEN_DEVICE_FUNC
DiagonalMatrix& operator=(const DiagonalMatrix& other)
{
m_diagonal = other.diagonal();
@@ -212,19 +201,14 @@ class DiagonalMatrix
#endif
/** Resizes to given size. */
EIGEN_DEVICE_FUNC
inline void resize(Index size) { m_diagonal.resize(size); }
/** Sets all coefficients to zero. */
EIGEN_DEVICE_FUNC
inline void setZero() { m_diagonal.setZero(); }
/** Resizes and sets all coefficients to zero. */
EIGEN_DEVICE_FUNC
inline void setZero(Index size) { m_diagonal.setZero(size); }
/** Sets this matrix to be the identity matrix of the current size. */
EIGEN_DEVICE_FUNC
inline void setIdentity() { m_diagonal.setOnes(); }
/** Sets this matrix to be the identity matrix of the given size. */
EIGEN_DEVICE_FUNC
inline void setIdentity(Index size) { m_diagonal.setOnes(size); }
};
@@ -271,11 +255,9 @@ class DiagonalWrapper
#endif
/** Constructor from expression of diagonal coefficients to wrap. */
EIGEN_DEVICE_FUNC
inline DiagonalWrapper(DiagonalVectorType& a_diagonal) : m_diagonal(a_diagonal) {}
/** \returns a const reference to the wrapped expression of diagonal coefficients. */
EIGEN_DEVICE_FUNC
const DiagonalVectorType& diagonal() const { return m_diagonal; }
protected:

View File

@@ -34,8 +34,9 @@ struct traits<DiagonalProduct<MatrixType, DiagonalType, ProductOrder> >
_Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagonalType::DiagonalVectorType::Flags)&PacketAccessBit))),
_LinearAccessMask = (RowsAtCompileTime==1 || ColsAtCompileTime==1) ? LinearAccessBit : 0,
Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixType::Flags)) | (_Vectorizable ? PacketAccessBit : 0) | AlignedBit,//(int(MatrixType::Flags)&int(DiagonalType::DiagonalVectorType::Flags)&AlignedBit),
CoeffReadCost = NumTraits<Scalar>::MulCost + MatrixType::CoeffReadCost + DiagonalType::DiagonalVectorType::CoeffReadCost
Flags = ((HereditaryBits|_LinearAccessMask|AlignedBit) & (unsigned int)(MatrixType::Flags)) | (_Vectorizable ? PacketAccessBit : 0),//(int(MatrixType::Flags)&int(DiagonalType::DiagonalVectorType::Flags)&AlignedBit),
Cost0 = EIGEN_ADD_COST(NumTraits<Scalar>::MulCost, MatrixType::CoeffReadCost),
CoeffReadCost = EIGEN_ADD_COST(Cost0,DiagonalType::DiagonalVectorType::CoeffReadCost)
};
};
}

View File

@@ -29,7 +29,6 @@ template<typename T, typename U,
struct dot_nocheck
{
typedef typename scalar_product_traits<typename traits<T>::Scalar,typename traits<U>::Scalar>::ReturnType ResScalar;
EIGEN_DEVICE_FUNC
static inline ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
{
return a.template binaryExpr<scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> >(b).sum();
@@ -40,7 +39,6 @@ template<typename T, typename U>
struct dot_nocheck<T, U, true>
{
typedef typename scalar_product_traits<typename traits<T>::Scalar,typename traits<U>::Scalar>::ReturnType ResScalar;
EIGEN_DEVICE_FUNC
static inline ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
{
return a.transpose().template binaryExpr<scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> >(b).sum();
@@ -61,7 +59,6 @@ struct dot_nocheck<T, U, true>
*/
template<typename Derived>
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
typename internal::scalar_product_traits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
{
@@ -76,6 +73,34 @@ MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
return internal::dot_nocheck<Derived,OtherDerived>::run(*this, other);
}
#ifdef EIGEN2_SUPPORT
/** \returns the dot product of *this with other, with the Eigen2 convention that the dot product is linear in the first variable
* (conjugating the second variable). Of course this only makes a difference in the complex case.
*
* This method is only available in EIGEN2_SUPPORT mode.
*
* \only_for_vectors
*
* \sa dot()
*/
template<typename Derived>
template<typename OtherDerived>
typename internal::traits<Derived>::Scalar
MatrixBase<Derived>::eigen2_dot(const MatrixBase<OtherDerived>& other) const
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived)
EIGEN_STATIC_ASSERT((internal::is_same<Scalar, typename OtherDerived::Scalar>::value),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
eigen_assert(size() == other.size());
return internal::dot_nocheck<OtherDerived,Derived>::run(other,*this);
}
#endif
//---------- implementation of L2 norm and related functions ----------
/** \returns, for vectors, the squared \em l2 norm of \c *this, and for matrices the Frobenius norm.
@@ -139,7 +164,6 @@ template<typename Derived, int p>
struct lpNorm_selector
{
typedef typename NumTraits<typename traits<Derived>::Scalar>::Real RealScalar;
EIGEN_DEVICE_FUNC
static inline RealScalar run(const MatrixBase<Derived>& m)
{
using std::pow;
@@ -150,7 +174,6 @@ struct lpNorm_selector
template<typename Derived>
struct lpNorm_selector<Derived, 1>
{
EIGEN_DEVICE_FUNC
static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
{
return m.cwiseAbs().sum();
@@ -160,7 +183,6 @@ struct lpNorm_selector<Derived, 1>
template<typename Derived>
struct lpNorm_selector<Derived, 2>
{
EIGEN_DEVICE_FUNC
static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
{
return m.norm();
@@ -170,7 +192,6 @@ struct lpNorm_selector<Derived, 2>
template<typename Derived>
struct lpNorm_selector<Derived, Infinity>
{
EIGEN_DEVICE_FUNC
static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
{
return m.cwiseAbs().maxCoeff();

View File

@@ -31,40 +31,29 @@ template<typename Derived> struct EigenBase
typedef typename internal::traits<Derived>::Index Index;
/** \returns a reference to the derived object */
EIGEN_DEVICE_FUNC
Derived& derived() { return *static_cast<Derived*>(this); }
/** \returns a const reference to the derived object */
EIGEN_DEVICE_FUNC
const Derived& derived() const { return *static_cast<const Derived*>(this); }
EIGEN_DEVICE_FUNC
inline Derived& const_cast_derived() const
{ return *static_cast<Derived*>(const_cast<EigenBase*>(this)); }
EIGEN_DEVICE_FUNC
inline const Derived& const_derived() const
{ return *static_cast<const Derived*>(this); }
/** \returns the number of rows. \sa cols(), RowsAtCompileTime */
EIGEN_DEVICE_FUNC
inline Index rows() const { return derived().rows(); }
/** \returns the number of columns. \sa rows(), ColsAtCompileTime*/
EIGEN_DEVICE_FUNC
inline Index cols() const { return derived().cols(); }
/** \returns the number of coefficients, which is rows()*cols().
* \sa rows(), cols(), SizeAtCompileTime. */
EIGEN_DEVICE_FUNC
inline Index size() const { return rows() * cols(); }
/** \internal Don't use it, but do the equivalent: \code dst = *this; \endcode */
template<typename Dest>
EIGEN_DEVICE_FUNC
inline void evalTo(Dest& dst) const
template<typename Dest> inline void evalTo(Dest& dst) const
{ derived().evalTo(dst); }
/** \internal Don't use it, but do the equivalent: \code dst += *this; \endcode */
template<typename Dest>
EIGEN_DEVICE_FUNC
inline void addTo(Dest& dst) const
template<typename Dest> inline void addTo(Dest& dst) const
{
// This is the default implementation,
// derived class can reimplement it in a more optimized way.
@@ -74,9 +63,7 @@ template<typename Derived> struct EigenBase
}
/** \internal Don't use it, but do the equivalent: \code dst -= *this; \endcode */
template<typename Dest>
EIGEN_DEVICE_FUNC
inline void subTo(Dest& dst) const
template<typename Dest> inline void subTo(Dest& dst) const
{
// This is the default implementation,
// derived class can reimplement it in a more optimized way.
@@ -86,8 +73,7 @@ template<typename Derived> struct EigenBase
}
/** \internal Don't use it, but do the equivalent: \code dst.applyOnTheRight(*this); \endcode */
template<typename Dest>
EIGEN_DEVICE_FUNC inline void applyThisOnTheRight(Dest& dst) const
template<typename Dest> inline void applyThisOnTheRight(Dest& dst) const
{
// This is the default implementation,
// derived class can reimplement it in a more optimized way.
@@ -95,8 +81,7 @@ template<typename Derived> struct EigenBase
}
/** \internal Don't use it, but do the equivalent: \code dst.applyOnTheLeft(*this); \endcode */
template<typename Dest>
EIGEN_DEVICE_FUNC inline void applyThisOnTheLeft(Dest& dst) const
template<typename Dest> inline void applyThisOnTheLeft(Dest& dst) const
{
// This is the default implementation,
// derived class can reimplement it in a more optimized way.

1026
Eigen/src/Core/Functors.h Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -19,10 +19,9 @@ namespace internal
template<typename Derived, typename OtherDerived, bool is_integer = NumTraits<typename Derived::Scalar>::IsInteger>
struct isApprox_selector
{
EIGEN_DEVICE_FUNC
static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar& prec)
{
EIGEN_USING_STD_MATH(min);
using std::min;
typename internal::nested<Derived,2>::type nested(x);
typename internal::nested<OtherDerived,2>::type otherNested(y);
return (nested - otherNested).cwiseAbs2().sum() <= prec * prec * (min)(nested.cwiseAbs2().sum(), otherNested.cwiseAbs2().sum());
@@ -32,7 +31,6 @@ struct isApprox_selector
template<typename Derived, typename OtherDerived>
struct isApprox_selector<Derived, OtherDerived, true>
{
EIGEN_DEVICE_FUNC
static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar&)
{
return x.matrix() == y.matrix();
@@ -42,7 +40,6 @@ struct isApprox_selector<Derived, OtherDerived, true>
template<typename Derived, typename OtherDerived, bool is_integer = NumTraits<typename Derived::Scalar>::IsInteger>
struct isMuchSmallerThan_object_selector
{
EIGEN_DEVICE_FUNC
static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar& prec)
{
return x.cwiseAbs2().sum() <= numext::abs2(prec) * y.cwiseAbs2().sum();
@@ -52,7 +49,6 @@ struct isMuchSmallerThan_object_selector
template<typename Derived, typename OtherDerived>
struct isMuchSmallerThan_object_selector<Derived, OtherDerived, true>
{
EIGEN_DEVICE_FUNC
static bool run(const Derived& x, const OtherDerived&, const typename Derived::RealScalar&)
{
return x.matrix() == Derived::Zero(x.rows(), x.cols()).matrix();
@@ -62,7 +58,6 @@ struct isMuchSmallerThan_object_selector<Derived, OtherDerived, true>
template<typename Derived, bool is_integer = NumTraits<typename Derived::Scalar>::IsInteger>
struct isMuchSmallerThan_scalar_selector
{
EIGEN_DEVICE_FUNC
static bool run(const Derived& x, const typename Derived::RealScalar& y, const typename Derived::RealScalar& prec)
{
return x.cwiseAbs2().sum() <= numext::abs2(prec * y);
@@ -72,7 +67,6 @@ struct isMuchSmallerThan_scalar_selector
template<typename Derived>
struct isMuchSmallerThan_scalar_selector<Derived, true>
{
EIGEN_DEVICE_FUNC
static bool run(const Derived& x, const typename Derived::RealScalar&, const typename Derived::RealScalar&)
{
return x.matrix() == Derived::Zero(x.rows(), x.cols()).matrix();

View File

@@ -66,7 +66,8 @@ template<typename Lhs, typename Rhs> struct product_type
MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::MaxColsAtCompileTime,
_Rhs::MaxRowsAtCompileTime),
Depth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::ColsAtCompileTime,
_Rhs::RowsAtCompileTime)
_Rhs::RowsAtCompileTime),
LargeThreshold = EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
};
// the splitting into different lines of code here, introducing the _select enums and the typedef below,
@@ -256,7 +257,7 @@ template<typename Lhs, typename Rhs>
class GeneralProduct<Lhs, Rhs, OuterProduct>
: public ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs>
{
template<typename T> struct IsRowMajor : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {};
template<typename T> struct is_row_major : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {};
public:
EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
@@ -280,22 +281,22 @@ class GeneralProduct<Lhs, Rhs, OuterProduct>
template<typename Dest>
inline void evalTo(Dest& dest) const {
internal::outer_product_selector_run(*this, dest, set(), IsRowMajor<Dest>());
internal::outer_product_selector_run(*this, dest, set(), is_row_major<Dest>());
}
template<typename Dest>
inline void addTo(Dest& dest) const {
internal::outer_product_selector_run(*this, dest, add(), IsRowMajor<Dest>());
internal::outer_product_selector_run(*this, dest, add(), is_row_major<Dest>());
}
template<typename Dest>
inline void subTo(Dest& dest) const {
internal::outer_product_selector_run(*this, dest, sub(), IsRowMajor<Dest>());
internal::outer_product_selector_run(*this, dest, sub(), is_row_major<Dest>());
}
template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const
{
internal::outer_product_selector_run(*this, dest, adds(alpha), IsRowMajor<Dest>());
internal::outer_product_selector_run(*this, dest, adds(alpha), is_row_major<Dest>());
}
};
@@ -396,7 +397,7 @@ struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?PacketSize:0),0> m_data;
EIGEN_STRONG_INLINE Scalar* data() {
return ForceAlignment
? reinterpret_cast<Scalar*>((reinterpret_cast<size_t>(m_data.array) & ~(size_t(EIGEN_ALIGN_BYTES-1))) + EIGEN_ALIGN_BYTES)
? reinterpret_cast<Scalar*>((reinterpret_cast<size_t>(m_data.array) & ~(size_t(15))) + 16)
: m_data.array;
}
#endif
@@ -445,7 +446,7 @@ template<> struct gemv_selector<OnTheRight,ColMajor,true>
if(!evalToDest)
{
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
Index size = dest.size();
int size = dest.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
if(!alphaIsCompatible)
@@ -510,7 +511,7 @@ template<> struct gemv_selector<OnTheRight,RowMajor,true>
if(!DirectlyUseRhs)
{
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
Index size = actualRhs.size();
int size = actualRhs.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
@@ -564,40 +565,6 @@ template<> struct gemv_selector<OnTheRight,RowMajor,false>
*
* \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*()
*/
#ifndef __CUDACC__
#ifdef EIGEN_TEST_EVALUATORS
template<typename Derived>
template<typename OtherDerived>
inline const Product<Derived, OtherDerived>
MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
{
// A note regarding the function declaration: In MSVC, this function will sometimes
// not be inlined since DenseStorage is an unwindable object for dynamic
// matrices and product types are holding a member to store the result.
// Thus it does not help tagging this function with EIGEN_STRONG_INLINE.
enum {
ProductIsValid = Derived::ColsAtCompileTime==Dynamic
|| OtherDerived::RowsAtCompileTime==Dynamic
|| int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
};
// note to the lost user:
// * for a dot product use: v1.dot(v2)
// * for a coeff-wise product use: v1.cwiseProduct(v2)
EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
#ifdef EIGEN_DEBUG_PRODUCT
internal::product_type<Derived,OtherDerived>::debug();
#endif
return Product<Derived, OtherDerived>(derived(), other.derived());
}
#else
template<typename Derived>
template<typename OtherDerived>
inline const typename ProductReturnType<Derived, OtherDerived>::Type
@@ -627,9 +594,7 @@ MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
#endif
return typename ProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
}
#endif
#endif
/** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation.
*
* The returned product will behave like any other expressions: the coefficients of the product will be

View File

@@ -42,8 +42,6 @@ namespace internal {
struct default_packet_traits
{
enum {
HasHalfPacket = 0,
HasAdd = 1,
HasSub = 1,
HasMul = 1,
@@ -73,12 +71,10 @@ struct default_packet_traits
template<typename T> struct packet_traits : default_packet_traits
{
typedef T type;
typedef T half;
enum {
Vectorizable = 0,
size = 1,
AlignedOnScalar = 0,
HasHalfPacket = 0
AlignedOnScalar = 0
};
enum {
HasAdd = 0,
@@ -95,149 +91,94 @@ template<typename T> struct packet_traits : default_packet_traits
};
/** \internal \returns a + b (coeff-wise) */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
template<typename Packet> inline Packet
padd(const Packet& a,
const Packet& b) { return a+b; }
/** \internal \returns a - b (coeff-wise) */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
template<typename Packet> inline Packet
psub(const Packet& a,
const Packet& b) { return a-b; }
/** \internal \returns -a (coeff-wise) */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
template<typename Packet> inline Packet
pnegate(const Packet& a) { return -a; }
/** \internal \returns conj(a) (coeff-wise) */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
template<typename Packet> inline Packet
pconj(const Packet& a) { return numext::conj(a); }
/** \internal \returns a * b (coeff-wise) */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
template<typename Packet> inline Packet
pmul(const Packet& a,
const Packet& b) { return a*b; }
/** \internal \returns a / b (coeff-wise) */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
template<typename Packet> inline Packet
pdiv(const Packet& a,
const Packet& b) { return a/b; }
/** \internal \returns the min of \a a and \a b (coeff-wise) */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
template<typename Packet> inline Packet
pmin(const Packet& a,
const Packet& b) { EIGEN_USING_STD_MATH(min); return (min)(a, b); }
const Packet& b) { using std::min; return (min)(a, b); }
/** \internal \returns the max of \a a and \a b (coeff-wise) */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
template<typename Packet> inline Packet
pmax(const Packet& a,
const Packet& b) { EIGEN_USING_STD_MATH(max); return (max)(a, b); }
const Packet& b) { using std::max; return (max)(a, b); }
/** \internal \returns the absolute value of \a a */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
template<typename Packet> inline Packet
pabs(const Packet& a) { using std::abs; return abs(a); }
/** \internal \returns the bitwise and of \a a and \a b */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
template<typename Packet> inline Packet
pand(const Packet& a, const Packet& b) { return a & b; }
/** \internal \returns the bitwise or of \a a and \a b */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
template<typename Packet> inline Packet
por(const Packet& a, const Packet& b) { return a | b; }
/** \internal \returns the bitwise xor of \a a and \a b */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
template<typename Packet> inline Packet
pxor(const Packet& a, const Packet& b) { return a ^ b; }
/** \internal \returns the bitwise andnot of \a a and \a b */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
template<typename Packet> inline Packet
pandnot(const Packet& a, const Packet& b) { return a & (!b); }
/** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
template<typename Packet> inline Packet
pload(const typename unpacket_traits<Packet>::type* from) { return *from; }
/** \internal \returns a packet version of \a *from, (un-aligned load) */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
template<typename Packet> inline Packet
ploadu(const typename unpacket_traits<Packet>::type* from) { return *from; }
/** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
pset1(const typename unpacket_traits<Packet>::type& a) { return a; }
/** \internal \returns a packet with constant coefficients \a a[0], e.g.: (a[0],a[0],a[0],a[0]) */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
pload1(const typename unpacket_traits<Packet>::type *a) { return pset1<Packet>(*a); }
/** \internal \returns a packet with elements of \a *from duplicated.
* For instance, for a packet of 8 elements, 4 scalars will be read from \a *from and
* duplicated to form: {from[0],from[0],from[1],from[1],from[2],from[2],from[3],from[3]}
* For instance, for a packet of 8 elements, 4 scalar will be read from \a *from and
* duplicated to form: {from[0],from[0],from[1],from[1],,from[2],from[2],,from[3],from[3]}
* Currently, this function is only used for scalar * complex products.
*/
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
*/
template<typename Packet> inline Packet
ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; }
/** \internal \returns a packet with elements of \a *from quadrupled.
* For instance, for a packet of 8 elements, 2 scalars will be read from \a *from and
* replicated to form: {from[0],from[0],from[0],from[0],from[1],from[1],from[1],from[1]}
* Currently, this function is only used in matrix products.
* For packet-size smaller or equal to 4, this function is equivalent to pload1
*/
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
ploadquad(const typename unpacket_traits<Packet>::type* from)
{ return pload1<Packet>(from); }
/** \internal equivalent to
* \code
* a0 = pload1(a+0);
* a1 = pload1(a+1);
* a2 = pload1(a+2);
* a3 = pload1(a+3);
* \endcode
* \sa pset1, pload1, ploaddup, pbroadcast2
*/
template<typename Packet> EIGEN_DEVICE_FUNC
inline void pbroadcast4(const typename unpacket_traits<Packet>::type *a,
Packet& a0, Packet& a1, Packet& a2, Packet& a3)
{
a0 = pload1<Packet>(a+0);
a1 = pload1<Packet>(a+1);
a2 = pload1<Packet>(a+2);
a3 = pload1<Packet>(a+3);
}
/** \internal equivalent to
* \code
* a0 = pload1(a+0);
* a1 = pload1(a+1);
* \endcode
* \sa pset1, pload1, ploaddup, pbroadcast4
*/
template<typename Packet> EIGEN_DEVICE_FUNC
inline void pbroadcast2(const typename unpacket_traits<Packet>::type *a,
Packet& a0, Packet& a1)
{
a0 = pload1<Packet>(a+0);
a1 = pload1<Packet>(a+1);
}
/** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */
template<typename Packet> inline Packet
pset1(const typename unpacket_traits<Packet>::type& a) { return a; }
/** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */
template<typename Scalar> inline typename packet_traits<Scalar>::type
plset(const Scalar& a) { return a; }
/** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */
template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from)
template<typename Scalar, typename Packet> inline void pstore(Scalar* to, const Packet& from)
{ (*to) = from; }
/** \internal copy the packet \a from to \a *to, (un-aligned store) */
template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from)
{ (*to) = from; }
template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, DenseIndex /*stride*/)
{ return ploadu<Packet>(from); }
template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, DenseIndex /*stride*/)
{ pstore(to, from); }
template<typename Scalar, typename Packet> inline void pstoreu(Scalar* to, const Packet& from)
{ (*to) = from; }
/** \internal tries to do cache prefetching of \a addr */
template<typename Scalar> inline void prefetch(const Scalar* addr)
@@ -248,45 +189,36 @@ __builtin_prefetch(addr);
}
/** \internal \returns the first element of a packet */
template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type pfirst(const Packet& a)
template<typename Packet> inline typename unpacket_traits<Packet>::type pfirst(const Packet& a)
{ return a; }
/** \internal \returns a packet where the element i contains the sum of the packet of \a vec[i] */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
template<typename Packet> inline Packet
preduxp(const Packet* vecs) { return vecs[0]; }
/** \internal \returns the sum of the elements of \a a*/
template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux(const Packet& a)
{ return a; }
/** \internal \returns the sum of the elements of \a a by block of 4 elements.
* For a packet {a0, a1, a2, a3, a4, a5, a6, a7}, it returns a half packet {a0+a4, a1+a5, a2+a6, a3+a7}
* For packet-size smaller or equal to 4, this boils down to a noop.
*/
template<typename Packet> EIGEN_DEVICE_FUNC inline
typename conditional<(unpacket_traits<Packet>::size%8)==0,typename unpacket_traits<Packet>::half,Packet>::type
predux4(const Packet& a)
template<typename Packet> inline typename unpacket_traits<Packet>::type predux(const Packet& a)
{ return a; }
/** \internal \returns the product of the elements of \a a*/
template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a)
template<typename Packet> inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a)
{ return a; }
/** \internal \returns the min of the elements of \a a*/
template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(const Packet& a)
template<typename Packet> inline typename unpacket_traits<Packet>::type predux_min(const Packet& a)
{ return a; }
/** \internal \returns the max of the elements of \a a*/
template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(const Packet& a)
template<typename Packet> inline typename unpacket_traits<Packet>::type predux_max(const Packet& a)
{ return a; }
/** \internal \returns the reversed elements of \a a*/
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a)
template<typename Packet> inline Packet preverse(const Packet& a)
{ return a; }
/** \internal \returns \a a with real and imaginary part flipped (for complex type only) */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a)
template<typename Packet> inline Packet pcplxflip(const Packet& a)
{
// FIXME: uncomment the following in case we drop the internal imag and real functions.
// using std::imag;
@@ -318,10 +250,6 @@ Packet pasin(const Packet& a) { using std::asin; return asin(a); }
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet pacos(const Packet& a) { using std::acos; return acos(a); }
/** \internal \returns the atan of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet patan(const Packet& a) { using std::atan; return atan(a); }
/** \internal \returns the exp of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet pexp(const Packet& a) { using std::exp; return exp(a); }
@@ -347,7 +275,7 @@ inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename u
}
/** \internal \returns a * b + c (coeff-wise) */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
template<typename Packet> inline Packet
pmadd(const Packet& a,
const Packet& b,
const Packet& c)
@@ -408,33 +336,15 @@ inline void palign(PacketType& first, const PacketType& second)
* Fast complex products (GCC generates a function call which is very slow)
***************************************************************************/
// Eigen+CUDA does not support complexes.
#ifndef __CUDACC__
template<> inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b)
{ return std::complex<float>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
template<> inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b)
{ return std::complex<double>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
#endif
/***************************************************************************
* PacketBlock, that is a collection of N packets where the number of words
* in the packet is a multiple of N.
***************************************************************************/
template <typename Packet,int N=unpacket_traits<Packet>::size> struct PacketBlock {
Packet packet[N];
};
template<typename Packet> EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<Packet,1>& /*kernel*/) {
// Nothing to do in the scalar case, i.e. a 1x1 matrix.
}
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_GENERIC_PACKET_MATH_H

View File

@@ -45,7 +45,6 @@ namespace Eigen
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(asin,scalar_asin_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(acos,scalar_acos_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tan,scalar_tan_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(atan,scalar_atan_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs,scalar_abs_op)

View File

@@ -49,7 +49,7 @@ std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat&
*/
struct IOFormat
{
/** Default constructor, see class IOFormat for the meaning of the parameters */
/** Default contructor, see class IOFormat for the meaning of the parameters */
IOFormat(int _precision = StreamPrecision, int _flags = 0,
const std::string& _coeffSeparator = " ",
const std::string& _rowSeparator = "\n", const std::string& _rowPrefix="", const std::string& _rowSuffix="",
@@ -57,10 +57,6 @@ struct IOFormat
: matPrefix(_matPrefix), matSuffix(_matSuffix), rowPrefix(_rowPrefix), rowSuffix(_rowSuffix), rowSeparator(_rowSeparator),
rowSpacer(""), coeffSeparator(_coeffSeparator), precision(_precision), flags(_flags)
{
// TODO check if rowPrefix, rowSuffix or rowSeparator contains a newline
// don't add rowSpacer if columns are not to be aligned
if((flags & DontAlignCols))
return;
int i = int(matSuffix.length())-1;
while (i>=0 && matSuffix[i]!='\n')
{

View File

@@ -88,7 +88,7 @@ struct traits<Map<PlainObjectType, MapOptions, StrideType> >
&& ( bool(IsDynamicSize)
|| HasNoOuterStride
|| ( OuterStrideAtCompileTime!=Dynamic
&& ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime)%EIGEN_ALIGN_BYTES)==0 ) ),
&& ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime)%16)==0 ) ),
Flags0 = TraitsBase::Flags & (~NestByRefBit),
Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit),
Flags2 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime))
@@ -110,17 +110,19 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
EIGEN_DENSE_PUBLIC_INTERFACE(Map)
typedef typename Base::PointerType PointerType;
#if EIGEN2_SUPPORT_STAGE <= STAGE30_FULL_EIGEN3_API
typedef const Scalar* PointerArgType;
inline PointerType cast_to_pointer_type(PointerArgType ptr) { return const_cast<PointerType>(ptr); }
#else
typedef PointerType PointerArgType;
EIGEN_DEVICE_FUNC
inline PointerType cast_to_pointer_type(PointerArgType ptr) { return ptr; }
#endif
EIGEN_DEVICE_FUNC
inline Index innerStride() const
{
return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1;
}
EIGEN_DEVICE_FUNC
inline Index outerStride() const
{
return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer()
@@ -134,7 +136,6 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
* \param dataPtr pointer to the array to map
* \param a_stride optional Stride object, passing the strides.
*/
EIGEN_DEVICE_FUNC
inline Map(PointerArgType dataPtr, const StrideType& a_stride = StrideType())
: Base(cast_to_pointer_type(dataPtr)), m_stride(a_stride)
{
@@ -147,7 +148,6 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
* \param a_size the size of the vector expression
* \param a_stride optional Stride object, passing the strides.
*/
EIGEN_DEVICE_FUNC
inline Map(PointerArgType dataPtr, Index a_size, const StrideType& a_stride = StrideType())
: Base(cast_to_pointer_type(dataPtr), a_size), m_stride(a_stride)
{
@@ -161,7 +161,6 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
* \param nbCols the number of columns of the matrix expression
* \param a_stride optional Stride object, passing the strides.
*/
EIGEN_DEVICE_FUNC
inline Map(PointerArgType dataPtr, Index nbRows, Index nbCols, const StrideType& a_stride = StrideType())
: Base(cast_to_pointer_type(dataPtr), nbRows, nbCols), m_stride(a_stride)
{
@@ -174,6 +173,19 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
StrideType m_stride;
};
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
inline Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>
::Array(const Scalar *data)
{
this->_set_noalias(Eigen::Map<const Array>(data));
}
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
inline Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>
::Matrix(const Scalar *data)
{
this->_set_noalias(Eigen::Map<const Matrix>(data));
}
} // end namespace Eigen

View File

@@ -76,8 +76,8 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
typedef typename Base::CoeffReturnType CoeffReturnType;
EIGEN_DEVICE_FUNC inline Index rows() const { return m_rows.value(); }
EIGEN_DEVICE_FUNC inline Index cols() const { return m_cols.value(); }
inline Index rows() const { return m_rows.value(); }
inline Index cols() const { return m_cols.value(); }
/** Returns a pointer to the first coefficient of the matrix or vector.
*
@@ -87,26 +87,22 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
*/
inline const Scalar* data() const { return m_data; }
EIGEN_DEVICE_FUNC
inline const Scalar& coeff(Index rowId, Index colId) const
{
return m_data[colId * colStride() + rowId * rowStride()];
}
EIGEN_DEVICE_FUNC
inline const Scalar& coeff(Index index) const
{
EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
return m_data[index * innerStride()];
}
EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index rowId, Index colId) const
{
return this->m_data[colId * colStride() + rowId * rowStride()];
}
EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index index) const
{
EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
@@ -127,14 +123,12 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
return internal::ploadt<PacketScalar, LoadMode>(m_data + index * innerStride());
}
EIGEN_DEVICE_FUNC
inline MapBase(PointerType dataPtr) : m_data(dataPtr), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime)
explicit inline MapBase(PointerType dataPtr) : m_data(dataPtr), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime)
{
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
checkSanity();
}
EIGEN_DEVICE_FUNC
inline MapBase(PointerType dataPtr, Index vecSize)
: m_data(dataPtr),
m_rows(RowsAtCompileTime == Dynamic ? vecSize : Index(RowsAtCompileTime)),
@@ -146,7 +140,6 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
checkSanity();
}
EIGEN_DEVICE_FUNC
inline MapBase(PointerType dataPtr, Index nbRows, Index nbCols)
: m_data(dataPtr), m_rows(nbRows), m_cols(nbCols)
{
@@ -158,14 +151,13 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
protected:
EIGEN_DEVICE_FUNC
void checkSanity() const
{
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(internal::traits<Derived>::Flags&PacketAccessBit,
internal::inner_stride_at_compile_time<Derived>::ret==1),
PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1);
eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::Flags&AlignedBit, (size_t(m_data) % EIGEN_ALIGN_BYTES) == 0)
&& "data is not aligned");
eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::Flags&AlignedBit, (size_t(m_data) % 16) == 0)
&& "input pointer is not aligned on a 16 byte boundary");
}
PointerType m_data;
@@ -176,6 +168,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
template<typename Derived> class MapBase<Derived, WriteAccessors>
: public MapBase<Derived, ReadOnlyAccessors>
{
typedef MapBase<Derived, ReadOnlyAccessors> ReadOnlyMapBase;
public:
typedef MapBase<Derived, ReadOnlyAccessors> Base;
@@ -203,18 +196,14 @@ template<typename Derived> class MapBase<Derived, WriteAccessors>
const Scalar
>::type ScalarWithConstIfNotLvalue;
EIGEN_DEVICE_FUNC
inline const Scalar* data() const { return this->m_data; }
EIGEN_DEVICE_FUNC
inline ScalarWithConstIfNotLvalue* data() { return this->m_data; } // no const-cast here so non-const-correct code will give a compile error
EIGEN_DEVICE_FUNC
inline ScalarWithConstIfNotLvalue& coeffRef(Index row, Index col)
{
return this->m_data[col * colStride() + row * rowStride()];
}
EIGEN_DEVICE_FUNC
inline ScalarWithConstIfNotLvalue& coeffRef(Index index)
{
EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
@@ -236,18 +225,19 @@ template<typename Derived> class MapBase<Derived, WriteAccessors>
(this->m_data + index * innerStride(), val);
}
EIGEN_DEVICE_FUNC explicit inline MapBase(PointerType dataPtr) : Base(dataPtr) {}
EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index vecSize) : Base(dataPtr, vecSize) {}
EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index nbRows, Index nbCols) : Base(dataPtr, nbRows, nbCols) {}
explicit inline MapBase(PointerType dataPtr) : Base(dataPtr) {}
inline MapBase(PointerType dataPtr, Index vecSize) : Base(dataPtr, vecSize) {}
inline MapBase(PointerType dataPtr, Index nbRows, Index nbCols) : Base(dataPtr, nbRows, nbCols) {}
EIGEN_DEVICE_FUNC
Derived& operator=(const MapBase& other)
{
Base::Base::operator=(other);
ReadOnlyMapBase::Base::operator=(other);
return derived();
}
using Base::Base::operator=;
// In theory we could simply refer to Base:Base::operator=, but MSVC does not like Base::Base,
// see bugs 821 and 920.
using ReadOnlyMapBase::Base::operator=;
};
#undef EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS

View File

@@ -12,15 +12,6 @@
namespace Eigen {
// On WINCE, std::abs is defined for int only, so let's defined our own overloads:
// This issue has been confirmed with MSVC 2008 only, but the issue might exist for more recent versions too.
#if defined(_WIN32_WCE) && defined(_MSC_VER) && _MSC_VER<=1500
long abs(long x) { return (labs(x)); }
double abs(double x) { return (fabs(x)); }
float abs(float x) { return (fabsf(x)); }
long double abs(long double x) { return (fabsl(x)); }
#endif
namespace internal {
/** \internal \struct global_math_functions_filtering_base
@@ -71,7 +62,6 @@ template<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
struct real_default_impl
{
typedef typename NumTraits<Scalar>::Real RealScalar;
EIGEN_DEVICE_FUNC
static inline RealScalar run(const Scalar& x)
{
return x;
@@ -82,7 +72,6 @@ template<typename Scalar>
struct real_default_impl<Scalar,true>
{
typedef typename NumTraits<Scalar>::Real RealScalar;
EIGEN_DEVICE_FUNC
static inline RealScalar run(const Scalar& x)
{
using std::real;
@@ -98,6 +87,7 @@ struct real_retval
typedef typename NumTraits<Scalar>::Real type;
};
/****************************************************************************
* Implementation of imag *
****************************************************************************/
@@ -106,7 +96,6 @@ template<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
struct imag_default_impl
{
typedef typename NumTraits<Scalar>::Real RealScalar;
EIGEN_DEVICE_FUNC
static inline RealScalar run(const Scalar&)
{
return RealScalar(0);
@@ -117,7 +106,6 @@ template<typename Scalar>
struct imag_default_impl<Scalar,true>
{
typedef typename NumTraits<Scalar>::Real RealScalar;
EIGEN_DEVICE_FUNC
static inline RealScalar run(const Scalar& x)
{
using std::imag;
@@ -141,12 +129,10 @@ template<typename Scalar>
struct real_ref_impl
{
typedef typename NumTraits<Scalar>::Real RealScalar;
EIGEN_DEVICE_FUNC
static inline RealScalar& run(Scalar& x)
{
return reinterpret_cast<RealScalar*>(&x)[0];
}
EIGEN_DEVICE_FUNC
static inline const RealScalar& run(const Scalar& x)
{
return reinterpret_cast<const RealScalar*>(&x)[0];
@@ -167,12 +153,10 @@ template<typename Scalar, bool IsComplex>
struct imag_ref_default_impl
{
typedef typename NumTraits<Scalar>::Real RealScalar;
EIGEN_DEVICE_FUNC
static inline RealScalar& run(Scalar& x)
{
return reinterpret_cast<RealScalar*>(&x)[1];
}
EIGEN_DEVICE_FUNC
static inline const RealScalar& run(const Scalar& x)
{
return reinterpret_cast<RealScalar*>(&x)[1];
@@ -182,12 +166,10 @@ struct imag_ref_default_impl
template<typename Scalar>
struct imag_ref_default_impl<Scalar, false>
{
EIGEN_DEVICE_FUNC
static inline Scalar run(Scalar&)
{
return Scalar(0);
}
EIGEN_DEVICE_FUNC
static inline const Scalar run(const Scalar&)
{
return Scalar(0);
@@ -210,7 +192,6 @@ struct imag_ref_retval
template<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
struct conj_impl
{
EIGEN_DEVICE_FUNC
static inline Scalar run(const Scalar& x)
{
return x;
@@ -220,7 +201,6 @@ struct conj_impl
template<typename Scalar>
struct conj_impl<Scalar,true>
{
EIGEN_DEVICE_FUNC
static inline Scalar run(const Scalar& x)
{
using std::conj;
@@ -242,7 +222,6 @@ template<typename Scalar>
struct abs2_impl
{
typedef typename NumTraits<Scalar>::Real RealScalar;
EIGEN_DEVICE_FUNC
static inline RealScalar run(const Scalar& x)
{
return x*x;
@@ -252,7 +231,6 @@ struct abs2_impl
template<typename RealScalar>
struct abs2_impl<std::complex<RealScalar> >
{
EIGEN_DEVICE_FUNC
static inline RealScalar run(const std::complex<RealScalar>& x)
{
return real(x)*real(x) + imag(x)*imag(x);
@@ -273,7 +251,6 @@ template<typename Scalar, bool IsComplex>
struct norm1_default_impl
{
typedef typename NumTraits<Scalar>::Real RealScalar;
EIGEN_DEVICE_FUNC
static inline RealScalar run(const Scalar& x)
{
using std::abs;
@@ -284,7 +261,6 @@ struct norm1_default_impl
template<typename Scalar>
struct norm1_default_impl<Scalar, false>
{
EIGEN_DEVICE_FUNC
static inline Scalar run(const Scalar& x)
{
using std::abs;
@@ -311,23 +287,16 @@ struct hypot_impl
typedef typename NumTraits<Scalar>::Real RealScalar;
static inline RealScalar run(const Scalar& x, const Scalar& y)
{
EIGEN_USING_STD_MATH(max);
EIGEN_USING_STD_MATH(min);
using std::max;
using std::min;
using std::abs;
using std::sqrt;
RealScalar _x = abs(x);
RealScalar _y = abs(y);
Scalar p, qp;
if(_x>_y)
{
p = _x;
qp = _y / p;
}
else
{
p = _y;
qp = _x / p;
}
RealScalar p = (max)(_x, _y);
if(p==RealScalar(0)) return RealScalar(0);
RealScalar q = (min)(_x, _y);
RealScalar qp = q/p;
return p * sqrt(RealScalar(1) + qp*qp);
}
};
@@ -363,45 +332,37 @@ inline NewType cast(const OldType& x)
* Implementation of atanh2 *
****************************************************************************/
template<typename Scalar>
struct atanh2_impl
template<typename Scalar, bool IsInteger>
struct atanh2_default_impl
{
static inline Scalar run(const Scalar& x, const Scalar& r)
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
#if (__cplusplus >= 201103L) && !defined(__CYGWIN__)
using std::log1p;
return log1p(2 * x / (r - x)) / 2;
#else
using std::abs;
using std::log;
using std::sqrt;
Scalar z = x / r;
if (r == 0 || abs(z) > sqrt(NumTraits<Scalar>::epsilon()))
return log((r + x) / (r - x)) / 2;
else
return z + z*z*z / 3;
#endif
}
};
template<typename RealScalar>
struct atanh2_impl<std::complex<RealScalar> >
{
typedef std::complex<RealScalar> Scalar;
static inline Scalar run(const Scalar& x, const Scalar& r)
typedef Scalar retval;
typedef typename NumTraits<Scalar>::Real RealScalar;
static inline Scalar run(const Scalar& x, const Scalar& y)
{
using std::abs;
using std::log;
using std::norm;
using std::sqrt;
Scalar z = x / r;
if (r == Scalar(0) || norm(z) > NumTraits<RealScalar>::epsilon())
return RealScalar(0.5) * log((r + x) / (r - x));
Scalar z = x / y;
if (y == Scalar(0) || abs(z) > sqrt(NumTraits<RealScalar>::epsilon()))
return RealScalar(0.5) * log((y + x) / (y - x));
else
return z + z*z*z / RealScalar(3);
}
};
template<typename Scalar>
struct atanh2_default_impl<Scalar, true>
{
static inline Scalar run(const Scalar&, const Scalar&)
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
return Scalar(0);
}
};
template<typename Scalar>
struct atanh2_impl : atanh2_default_impl<Scalar, NumTraits<Scalar>::IsInteger> {};
template<typename Scalar>
struct atanh2_retval
{
@@ -593,84 +554,72 @@ inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random()
namespace numext {
template<typename Scalar>
EIGEN_DEVICE_FUNC
inline EIGEN_MATHFUNC_RETVAL(real, Scalar) real(const Scalar& x)
{
return EIGEN_MATHFUNC_IMPL(real, Scalar)::run(x);
}
template<typename Scalar>
EIGEN_DEVICE_FUNC
inline typename internal::add_const_on_value_type< EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) >::type real_ref(const Scalar& x)
{
return internal::real_ref_impl<Scalar>::run(x);
}
template<typename Scalar>
EIGEN_DEVICE_FUNC
inline EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) real_ref(Scalar& x)
{
return EIGEN_MATHFUNC_IMPL(real_ref, Scalar)::run(x);
}
template<typename Scalar>
EIGEN_DEVICE_FUNC
inline EIGEN_MATHFUNC_RETVAL(imag, Scalar) imag(const Scalar& x)
{
return EIGEN_MATHFUNC_IMPL(imag, Scalar)::run(x);
}
template<typename Scalar>
EIGEN_DEVICE_FUNC
inline typename internal::add_const_on_value_type< EIGEN_MATHFUNC_RETVAL(imag_ref, Scalar) >::type imag_ref(const Scalar& x)
{
return internal::imag_ref_impl<Scalar>::run(x);
}
template<typename Scalar>
EIGEN_DEVICE_FUNC
inline EIGEN_MATHFUNC_RETVAL(imag_ref, Scalar) imag_ref(Scalar& x)
{
return EIGEN_MATHFUNC_IMPL(imag_ref, Scalar)::run(x);
}
template<typename Scalar>
EIGEN_DEVICE_FUNC
inline EIGEN_MATHFUNC_RETVAL(conj, Scalar) conj(const Scalar& x)
{
return EIGEN_MATHFUNC_IMPL(conj, Scalar)::run(x);
}
template<typename Scalar>
EIGEN_DEVICE_FUNC
inline EIGEN_MATHFUNC_RETVAL(abs2, Scalar) abs2(const Scalar& x)
{
return EIGEN_MATHFUNC_IMPL(abs2, Scalar)::run(x);
}
template<typename Scalar>
EIGEN_DEVICE_FUNC
inline EIGEN_MATHFUNC_RETVAL(norm1, Scalar) norm1(const Scalar& x)
{
return EIGEN_MATHFUNC_IMPL(norm1, Scalar)::run(x);
}
template<typename Scalar>
EIGEN_DEVICE_FUNC
inline EIGEN_MATHFUNC_RETVAL(hypot, Scalar) hypot(const Scalar& x, const Scalar& y)
{
return EIGEN_MATHFUNC_IMPL(hypot, Scalar)::run(x, y);
}
template<typename Scalar>
EIGEN_DEVICE_FUNC
inline EIGEN_MATHFUNC_RETVAL(atanh2, Scalar) atanh2(const Scalar& x, const Scalar& y)
{
return EIGEN_MATHFUNC_IMPL(atanh2, Scalar)::run(x, y);
}
template<typename Scalar>
EIGEN_DEVICE_FUNC
inline EIGEN_MATHFUNC_RETVAL(pow, Scalar) pow(const Scalar& x, const Scalar& y)
{
return EIGEN_MATHFUNC_IMPL(pow, Scalar)::run(x, y);
@@ -678,22 +627,11 @@ inline EIGEN_MATHFUNC_RETVAL(pow, Scalar) pow(const Scalar& x, const Scalar& y)
// std::isfinite is non standard, so let's define our own version,
// even though it is not very efficient.
template<typename T>
EIGEN_DEVICE_FUNC
bool (isfinite)(const T& x)
template<typename T> bool (isfinite)(const T& x)
{
return x<NumTraits<T>::highest() && x>NumTraits<T>::lowest();
}
template<typename T>
EIGEN_DEVICE_FUNC
bool (isfinite)(const std::complex<T>& x)
{
using std::real;
using std::imag;
return isfinite(real(x)) && isfinite(imag(x));
}
} // end namespace numext
namespace internal {
@@ -711,20 +649,18 @@ template<typename Scalar>
struct scalar_fuzzy_default_impl<Scalar, false, false>
{
typedef typename NumTraits<Scalar>::Real RealScalar;
template<typename OtherScalar> EIGEN_DEVICE_FUNC
template<typename OtherScalar>
static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, const RealScalar& prec)
{
using std::abs;
return abs(x) <= abs(y) * prec;
}
EIGEN_DEVICE_FUNC
static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec)
{
EIGEN_USING_STD_MATH(min);
using std::min;
using std::abs;
return abs(x - y) <= (min)(abs(x), abs(y)) * prec;
}
EIGEN_DEVICE_FUNC
static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar& prec)
{
return x <= y || isApprox(x, y, prec);
@@ -735,17 +671,15 @@ template<typename Scalar>
struct scalar_fuzzy_default_impl<Scalar, false, true>
{
typedef typename NumTraits<Scalar>::Real RealScalar;
template<typename OtherScalar> EIGEN_DEVICE_FUNC
template<typename OtherScalar>
static inline bool isMuchSmallerThan(const Scalar& x, const Scalar&, const RealScalar&)
{
return x == Scalar(0);
}
EIGEN_DEVICE_FUNC
static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar&)
{
return x == y;
}
EIGEN_DEVICE_FUNC
static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar&)
{
return x <= y;
@@ -763,7 +697,7 @@ struct scalar_fuzzy_default_impl<Scalar, true, false>
}
static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec)
{
EIGEN_USING_STD_MATH(min);
using std::min;
return numext::abs2(x - y) <= (min)(numext::abs2(x), numext::abs2(y)) * prec * prec;
}
};
@@ -771,21 +705,21 @@ struct scalar_fuzzy_default_impl<Scalar, true, false>
template<typename Scalar>
struct scalar_fuzzy_impl : scalar_fuzzy_default_impl<Scalar, NumTraits<Scalar>::IsComplex, NumTraits<Scalar>::IsInteger> {};
template<typename Scalar, typename OtherScalar> EIGEN_DEVICE_FUNC
template<typename Scalar, typename OtherScalar>
inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y,
typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision())
{
return scalar_fuzzy_impl<Scalar>::template isMuchSmallerThan<OtherScalar>(x, y, precision);
}
template<typename Scalar> EIGEN_DEVICE_FUNC
template<typename Scalar>
inline bool isApprox(const Scalar& x, const Scalar& y,
typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision())
{
return scalar_fuzzy_impl<Scalar>::isApprox(x, y, precision);
}
template<typename Scalar> EIGEN_DEVICE_FUNC
template<typename Scalar>
inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y,
typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision())
{
@@ -808,19 +742,17 @@ template<> struct scalar_fuzzy_impl<bool>
{
typedef bool RealScalar;
template<typename OtherScalar> EIGEN_DEVICE_FUNC
template<typename OtherScalar>
static inline bool isMuchSmallerThan(const bool& x, const bool&, const bool&)
{
return !x;
}
EIGEN_DEVICE_FUNC
static inline bool isApprox(bool x, bool y, bool)
{
return x == y;
}
EIGEN_DEVICE_FUNC
static inline bool isApproxOrLessThan(const bool& x, const bool& y, const bool&)
{
return (!x) || y;

View File

@@ -151,7 +151,6 @@ class Matrix
*
* \callgraph
*/
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Matrix& operator=(const Matrix& other)
{
return Base::_set(other);
@@ -168,7 +167,6 @@ class Matrix
* remain row-vectors and vectors remain vectors.
*/
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Matrix& operator=(const MatrixBase<OtherDerived>& other)
{
return Base::_set(other);
@@ -181,14 +179,12 @@ class Matrix
* \copydetails DenseBase::operator=(const EigenBase<OtherDerived> &other)
*/
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Matrix& operator=(const EigenBase<OtherDerived> &other)
{
return Base::operator=(other);
}
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Matrix& operator=(const ReturnByValue<OtherDerived>& func)
{
return Base::operator=(func);
@@ -204,7 +200,6 @@ class Matrix
*
* \sa resize(Index,Index)
*/
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Matrix() : Base()
{
Base::_check_template_params();
@@ -212,7 +207,6 @@ class Matrix
}
// FIXME is it still needed
EIGEN_DEVICE_FUNC
Matrix(internal::constructor_without_unaligned_array_assert)
: Base(internal::constructor_without_unaligned_array_assert())
{ Base::_check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED }
@@ -232,65 +226,41 @@ class Matrix
}
#endif
#ifndef EIGEN_PARSED_BY_DOXYGEN
// This constructor is for both 1x1 matrices and dynamic vectors
template<typename T>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE explicit Matrix(const T& x)
/** \brief Constructs a vector or row-vector with given dimension. \only_for_vectors
*
* Note that this is only useful for dynamic-size vectors. For fixed-size vectors,
* it is redundant to pass the dimension here, so it makes more sense to use the default
* constructor Matrix() instead.
*/
EIGEN_STRONG_INLINE explicit Matrix(Index dim)
: Base(dim, RowsAtCompileTime == 1 ? 1 : dim, ColsAtCompileTime == 1 ? 1 : dim)
{
Base::_check_template_params();
Base::template _init1<T>(x);
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Matrix)
eigen_assert(dim >= 0);
eigen_assert(SizeAtCompileTime == Dynamic || SizeAtCompileTime == dim);
EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
}
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename T0, typename T1>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Matrix(const T0& x, const T1& y)
{
Base::_check_template_params();
Base::template _init2<T0,T1>(x, y);
}
#else
/** \brief Constructs a fixed-sized matrix initialized with coefficients starting at \a data */
EIGEN_DEVICE_FUNC
explicit Matrix(const Scalar *data);
/** \brief Constructs a vector or row-vector with given dimension. \only_for_vectors
*
* This is useful for dynamic-size vectors. For fixed-size vectors,
* it is redundant to pass these parameters, so one should use the default constructor
* Matrix() instead.
*
* \warning This constructor is disabled for fixed-size \c 1x1 matrices. For instance,
* calling Matrix<double,1,1>(1) will call the initialization constructor: Matrix(const Scalar&).
* For fixed-size \c 1x1 matrices it is thefore recommended to use the default
* constructor Matrix() instead, especilly when using one of the non standard
* \c EIGEN_INITIALIZE_MATRICES_BY_{ZERO,\c NAN} macros (see \ref TopicPreprocessorDirectives).
*/
EIGEN_STRONG_INLINE explicit Matrix(Index dim);
/** \brief Constructs an initialized 1x1 matrix with the given coefficient */
Matrix(const Scalar& x);
/** \brief Constructs an uninitialized matrix with \a rows rows and \a cols columns.
*
* This is useful for dynamic-size matrices. For fixed-size matrices,
* it is redundant to pass these parameters, so one should use the default constructor
* Matrix() instead.
*
* \warning This constructor is disabled for fixed-size \c 1x2 and \c 2x1 vectors. For instance,
* calling Matrix2f(2,1) will call the initialization constructor: Matrix(const Scalar& x, const Scalar& y).
* For fixed-size \c 1x2 or \c 2x1 vectors it is thefore recommended to use the default
* constructor Matrix() instead, especilly when using one of the non standard
* \c EIGEN_INITIALIZE_MATRICES_BY_{ZERO,\c NAN} macros (see \ref TopicPreprocessorDirectives).
*/
EIGEN_DEVICE_FUNC
* Matrix() instead. */
Matrix(Index rows, Index cols);
/** \brief Constructs an initialized 2D vector with given coefficients */
Matrix(const Scalar& x, const Scalar& y);
#endif
/** \brief Constructs an initialized 3D vector with given coefficients */
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z)
{
Base::_check_template_params();
@@ -300,7 +270,6 @@ class Matrix
m_storage.data()[2] = z;
}
/** \brief Constructs an initialized 4D vector with given coefficients */
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z, const Scalar& w)
{
Base::_check_template_params();
@@ -311,10 +280,10 @@ class Matrix
m_storage.data()[3] = w;
}
explicit Matrix(const Scalar *data);
/** \brief Constructor copying the value of the expression \a other */
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Matrix(const MatrixBase<OtherDerived>& other)
: Base(other.rows() * other.cols(), other.rows(), other.cols())
{
@@ -327,7 +296,6 @@ class Matrix
Base::_set_noalias(other);
}
/** \brief Copy constructor */
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Matrix(const Matrix& other)
: Base(other.rows() * other.cols(), other.rows(), other.cols())
{
@@ -336,7 +304,6 @@ class Matrix
}
/** \brief Copy constructor with in-place evaluation */
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Matrix(const ReturnByValue<OtherDerived>& other)
{
Base::_check_template_params();
@@ -348,7 +315,6 @@ class Matrix
* \sa MatrixBase::operator=(const EigenBase<OtherDerived>&)
*/
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Matrix(const EigenBase<OtherDerived> &other)
: Base(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols())
{
@@ -364,22 +330,26 @@ class Matrix
* of same type it is enough to swap the data pointers.
*/
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
void swap(MatrixBase<OtherDerived> const & other)
{ this->_swap(other.derived()); }
EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; }
EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); }
inline Index innerStride() const { return 1; }
inline Index outerStride() const { return this->innerSize(); }
/////////// Geometry module ///////////
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
explicit Matrix(const RotationBase<OtherDerived,ColsAtCompileTime>& r);
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
Matrix& operator=(const RotationBase<OtherDerived,ColsAtCompileTime>& r);
#ifdef EIGEN2_SUPPORT
template<typename OtherDerived>
explicit Matrix(const eigen2_RotationBase<OtherDerived,ColsAtCompileTime>& r);
template<typename OtherDerived>
Matrix& operator=(const eigen2_RotationBase<OtherDerived,ColsAtCompileTime>& r);
#endif
// allow to extend Matrix outside Eigen
#ifdef EIGEN_MATRIX_PLUGIN
#include EIGEN_MATRIX_PLUGIN

View File

@@ -81,7 +81,6 @@ template<typename Derived> class MatrixBase
using Base::operator-=;
using Base::operator*=;
using Base::operator/=;
using Base::operator*;
typedef typename Base::CoeffReturnType CoeffReturnType;
typedef typename Base::ConstTransposeReturnType ConstTransposeReturnType;
@@ -99,7 +98,6 @@ template<typename Derived> class MatrixBase
/** \returns the size of the main diagonal, which is min(rows(),cols()).
* \sa rows(), cols(), SizeAtCompileTime. */
EIGEN_DEVICE_FUNC
inline Index diagonalSize() const { return (std::min)(rows(),cols()); }
/** \brief The plain matrix type corresponding to this expression.
@@ -147,59 +145,36 @@ template<typename Derived> class MatrixBase
/** Special case of the template operator=, in order to prevent the compiler
* from generating a default operator= (issue hit with g++ 4.1)
*/
EIGEN_DEVICE_FUNC
Derived& operator=(const MatrixBase& other);
// We cannot inherit here via Base::operator= since it is causing
// trouble with MSVC.
template <typename OtherDerived>
EIGEN_DEVICE_FUNC
Derived& operator=(const DenseBase<OtherDerived>& other);
template <typename OtherDerived>
EIGEN_DEVICE_FUNC
Derived& operator=(const EigenBase<OtherDerived>& other);
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
Derived& operator=(const ReturnByValue<OtherDerived>& other);
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename ProductDerived, typename Lhs, typename Rhs>
EIGEN_DEVICE_FUNC
Derived& lazyAssign(const ProductBase<ProductDerived, Lhs,Rhs>& other);
#endif // not EIGEN_PARSED_BY_DOXYGEN
template<typename MatrixPower, typename Lhs, typename Rhs>
Derived& lazyAssign(const MatrixPowerProduct<MatrixPower, Lhs,Rhs>& other);
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
Derived& operator+=(const MatrixBase<OtherDerived>& other);
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
Derived& operator-=(const MatrixBase<OtherDerived>& other);
#ifdef __CUDACC__
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
const typename LazyProductReturnType<Derived,OtherDerived>::Type
operator*(const MatrixBase<OtherDerived> &other) const
{ return this->lazyProduct(other); }
#else
#ifdef EIGEN_TEST_EVALUATORS
template<typename OtherDerived>
const Product<Derived,OtherDerived>
operator*(const MatrixBase<OtherDerived> &other) const;
#else
template<typename OtherDerived>
const typename ProductReturnType<Derived,OtherDerived>::Type
operator*(const MatrixBase<OtherDerived> &other) const;
#endif
#endif
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
const typename LazyProductReturnType<Derived,OtherDerived>::Type
lazyProduct(const MatrixBase<OtherDerived> &other) const;
@@ -213,96 +188,84 @@ template<typename Derived> class MatrixBase
void applyOnTheRight(const EigenBase<OtherDerived>& other);
template<typename DiagonalDerived>
EIGEN_DEVICE_FUNC
const DiagonalProduct<Derived, DiagonalDerived, OnTheRight>
operator*(const DiagonalBase<DiagonalDerived> &diagonal) const;
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
typename internal::scalar_product_traits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
dot(const MatrixBase<OtherDerived>& other) const;
EIGEN_DEVICE_FUNC RealScalar squaredNorm() const;
EIGEN_DEVICE_FUNC RealScalar norm() const;
#ifdef EIGEN2_SUPPORT
template<typename OtherDerived>
Scalar eigen2_dot(const MatrixBase<OtherDerived>& other) const;
#endif
RealScalar squaredNorm() const;
RealScalar norm() const;
RealScalar stableNorm() const;
RealScalar blueNorm() const;
RealScalar hypotNorm() const;
EIGEN_DEVICE_FUNC const PlainObject normalized() const;
EIGEN_DEVICE_FUNC void normalize();
const PlainObject normalized() const;
void normalize();
EIGEN_DEVICE_FUNC const AdjointReturnType adjoint() const;
EIGEN_DEVICE_FUNC void adjointInPlace();
const AdjointReturnType adjoint() const;
void adjointInPlace();
typedef Diagonal<Derived> DiagonalReturnType;
EIGEN_DEVICE_FUNC
DiagonalReturnType diagonal();
typedef typename internal::add_const<Diagonal<const Derived> >::type ConstDiagonalReturnType;
EIGEN_DEVICE_FUNC
ConstDiagonalReturnType diagonal() const;
template<int Index> struct DiagonalIndexReturnType { typedef Diagonal<Derived,Index> Type; };
template<int Index> struct ConstDiagonalIndexReturnType { typedef const Diagonal<const Derived,Index> Type; };
template<int Index>
EIGEN_DEVICE_FUNC
typename DiagonalIndexReturnType<Index>::Type diagonal();
template<int Index>
EIGEN_DEVICE_FUNC
typename ConstDiagonalIndexReturnType<Index>::Type diagonal() const;
// Note: The "MatrixBase::" prefixes are added to help MSVC9 to match these declarations with the later implementations.
// On the other hand they confuse MSVC8...
#if (defined _MSC_VER) && (_MSC_VER >= 1500) // 2008 or later
typename MatrixBase::template DiagonalIndexReturnType<DynamicIndex>::Type diagonal(Index index);
typename MatrixBase::template ConstDiagonalIndexReturnType<DynamicIndex>::Type diagonal(Index index) const;
#else
EIGEN_DEVICE_FUNC
typename DiagonalIndexReturnType<DynamicIndex>::Type diagonal(Index index);
template<int Index> typename DiagonalIndexReturnType<Index>::Type diagonal();
template<int Index> typename ConstDiagonalIndexReturnType<Index>::Type diagonal() const;
EIGEN_DEVICE_FUNC
typename ConstDiagonalIndexReturnType<DynamicIndex>::Type diagonal(Index index) const;
#endif
typedef Diagonal<Derived,DynamicIndex> DiagonalDynamicIndexReturnType;
typedef typename internal::add_const<Diagonal<const Derived,DynamicIndex> >::type ConstDiagonalDynamicIndexReturnType;
DiagonalDynamicIndexReturnType diagonal(Index index);
ConstDiagonalDynamicIndexReturnType diagonal(Index index) const;
#ifdef EIGEN2_SUPPORT
template<unsigned int Mode> typename internal::eigen2_part_return_type<Derived, Mode>::type part();
template<unsigned int Mode> const typename internal::eigen2_part_return_type<Derived, Mode>::type part() const;
// huuuge hack. make Eigen2's matrix.part<Diagonal>() work in eigen3. Problem: Diagonal is now a class template instead
// of an integer constant. Solution: overload the part() method template wrt template parameters list.
template<template<typename T, int N> class U>
const DiagonalWrapper<ConstDiagonalReturnType> part() const
{ return diagonal().asDiagonal(); }
#endif // EIGEN2_SUPPORT
template<unsigned int Mode> struct TriangularViewReturnType { typedef TriangularView<Derived, Mode> Type; };
template<unsigned int Mode> struct ConstTriangularViewReturnType { typedef const TriangularView<const Derived, Mode> Type; };
template<unsigned int Mode>
EIGEN_DEVICE_FUNC
typename TriangularViewReturnType<Mode>::Type triangularView();
template<unsigned int Mode>
EIGEN_DEVICE_FUNC
typename ConstTriangularViewReturnType<Mode>::Type triangularView() const;
template<unsigned int Mode> typename TriangularViewReturnType<Mode>::Type triangularView();
template<unsigned int Mode> typename ConstTriangularViewReturnType<Mode>::Type triangularView() const;
template<unsigned int UpLo> struct SelfAdjointViewReturnType { typedef SelfAdjointView<Derived, UpLo> Type; };
template<unsigned int UpLo> struct ConstSelfAdjointViewReturnType { typedef const SelfAdjointView<const Derived, UpLo> Type; };
template<unsigned int UpLo>
EIGEN_DEVICE_FUNC
typename SelfAdjointViewReturnType<UpLo>::Type selfadjointView();
template<unsigned int UpLo>
EIGEN_DEVICE_FUNC
typename ConstSelfAdjointViewReturnType<UpLo>::Type selfadjointView() const;
template<unsigned int UpLo> typename SelfAdjointViewReturnType<UpLo>::Type selfadjointView();
template<unsigned int UpLo> typename ConstSelfAdjointViewReturnType<UpLo>::Type selfadjointView() const;
const SparseView<Derived> sparseView(const Scalar& m_reference = Scalar(0),
const typename NumTraits<Scalar>::Real& m_epsilon = NumTraits<Scalar>::dummy_precision()) const;
EIGEN_DEVICE_FUNC static const IdentityReturnType Identity();
EIGEN_DEVICE_FUNC static const IdentityReturnType Identity(Index rows, Index cols);
EIGEN_DEVICE_FUNC static const BasisReturnType Unit(Index size, Index i);
EIGEN_DEVICE_FUNC static const BasisReturnType Unit(Index i);
EIGEN_DEVICE_FUNC static const BasisReturnType UnitX();
EIGEN_DEVICE_FUNC static const BasisReturnType UnitY();
EIGEN_DEVICE_FUNC static const BasisReturnType UnitZ();
EIGEN_DEVICE_FUNC static const BasisReturnType UnitW();
static const IdentityReturnType Identity();
static const IdentityReturnType Identity(Index rows, Index cols);
static const BasisReturnType Unit(Index size, Index i);
static const BasisReturnType Unit(Index i);
static const BasisReturnType UnitX();
static const BasisReturnType UnitY();
static const BasisReturnType UnitZ();
static const BasisReturnType UnitW();
EIGEN_DEVICE_FUNC
const DiagonalWrapper<const Derived> asDiagonal() const;
const PermutationWrapper<const Derived> asPermutation() const;
EIGEN_DEVICE_FUNC
Derived& setIdentity();
EIGEN_DEVICE_FUNC
Derived& setIdentity(Index rows, Index cols);
bool isIdentity(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
@@ -341,26 +304,42 @@ template<typename Derived> class MatrixBase
Scalar trace() const;
template<int p> EIGEN_DEVICE_FUNC RealScalar lpNorm() const;
/////////// Array module ///////////
EIGEN_DEVICE_FUNC MatrixBase<Derived>& matrix() { return *this; }
EIGEN_DEVICE_FUNC const MatrixBase<Derived>& matrix() const { return *this; }
template<int p> RealScalar lpNorm() const;
MatrixBase<Derived>& matrix() { return *this; }
const MatrixBase<Derived>& matrix() const { return *this; }
/** \returns an \link Eigen::ArrayBase Array \endlink expression of this matrix
* \sa ArrayBase::matrix() */
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ArrayWrapper<Derived> array() { return derived(); }
/** \returns a const \link Eigen::ArrayBase Array \endlink expression of this matrix
* \sa ArrayBase::matrix() */
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const ArrayWrapper<const Derived> array() const { return derived(); }
ArrayWrapper<Derived> array() { return derived(); }
const ArrayWrapper<const Derived> array() const { return derived(); }
/////////// LU module ///////////
EIGEN_DEVICE_FUNC const FullPivLU<PlainObject> fullPivLu() const;
EIGEN_DEVICE_FUNC const PartialPivLU<PlainObject> partialPivLu() const;
const FullPivLU<PlainObject> fullPivLu() const;
const PartialPivLU<PlainObject> partialPivLu() const;
#if EIGEN2_SUPPORT_STAGE < STAGE20_RESOLVE_API_CONFLICTS
const LU<PlainObject> lu() const;
#endif
#ifdef EIGEN2_SUPPORT
const LU<PlainObject> eigen2_lu() const;
#endif
#if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS
const PartialPivLU<PlainObject> lu() const;
#endif
#ifdef EIGEN2_SUPPORT
template<typename ResultType>
void computeInverse(MatrixBase<ResultType> *result) const {
*result = this->inverse();
}
#endif
EIGEN_DEVICE_FUNC
const internal::inverse_impl<Derived> inverse() const;
template<typename ResultType>
void computeInverseAndDetWithCheck(
@@ -387,6 +366,10 @@ template<typename Derived> class MatrixBase
const HouseholderQR<PlainObject> householderQr() const;
const ColPivHouseholderQR<PlainObject> colPivHouseholderQr() const;
const FullPivHouseholderQR<PlainObject> fullPivHouseholderQr() const;
#ifdef EIGEN2_SUPPORT
const QR<PlainObject> qr() const;
#endif
EigenvaluesReturnType eigenvalues() const;
RealScalar operatorNorm() const;
@@ -395,6 +378,10 @@ template<typename Derived> class MatrixBase
JacobiSVD<PlainObject> jacobiSvd(unsigned int computationOptions = 0) const;
#ifdef EIGEN2_SUPPORT
SVD<PlainObject> svd() const;
#endif
/////////// Geometry module ///////////
#ifndef EIGEN_PARSED_BY_DOXYGEN
@@ -405,24 +392,20 @@ template<typename Derived> class MatrixBase
};
#endif // EIGEN_PARSED_BY_DOXYGEN
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
typename cross_product_return_type<OtherDerived>::type
cross(const MatrixBase<OtherDerived>& other) const;
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
PlainObject cross3(const MatrixBase<OtherDerived>& other) const;
EIGEN_DEVICE_FUNC
PlainObject unitOrthogonal(void) const;
Matrix<Scalar,3,1> eulerAngles(Index a0, Index a1, Index a2) const;
#if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS
ScalarMultipleReturnType operator*(const UniformScaling<Scalar>& s) const;
// put this as separate enum value to work around possible GCC 4.3 bug (?)
enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1?Vertical:Horizontal };
typedef Homogeneous<Derived, HomogeneousReturnTypeDirection> HomogeneousReturnType;
HomogeneousReturnType homogeneous() const;
#endif
enum {
SizeMinusOne = SizeAtCompileTime==Dynamic ? Dynamic : SizeAtCompileTime-1
@@ -457,6 +440,15 @@ template<typename Derived> class MatrixBase
template<typename OtherScalar>
void applyOnTheRight(Index p, Index q, const JacobiRotation<OtherScalar>& j);
///////// SparseCore module /////////
template<typename OtherDerived>
EIGEN_STRONG_INLINE const typename SparseMatrixBase<OtherDerived>::template CwiseProductDenseReturnType<Derived>::Type
cwiseProduct(const SparseMatrixBase<OtherDerived> &other) const
{
return other.cwiseProduct(derived());
}
///////// MatrixFunctions module /////////
typedef typename internal::stem_function<Scalar>::type StemFunction;
@@ -469,15 +461,49 @@ template<typename Derived> class MatrixBase
const MatrixSquareRootReturnValue<Derived> sqrt() const;
const MatrixLogarithmReturnValue<Derived> log() const;
const MatrixPowerReturnValue<Derived> pow(const RealScalar& p) const;
const MatrixComplexPowerReturnValue<Derived> pow(const std::complex<RealScalar>& p) const;
#ifdef EIGEN2_SUPPORT
template<typename ProductDerived, typename Lhs, typename Rhs>
Derived& operator+=(const Flagged<ProductBase<ProductDerived, Lhs,Rhs>, 0,
EvalBeforeAssigningBit>& other);
template<typename ProductDerived, typename Lhs, typename Rhs>
Derived& operator-=(const Flagged<ProductBase<ProductDerived, Lhs,Rhs>, 0,
EvalBeforeAssigningBit>& other);
/** \deprecated because .lazy() is deprecated
* Overloaded for cache friendly product evaluation */
template<typename OtherDerived>
Derived& lazyAssign(const Flagged<OtherDerived, 0, EvalBeforeAssigningBit>& other)
{ return lazyAssign(other._expression()); }
template<unsigned int Added>
const Flagged<Derived, Added, 0> marked() const;
const Flagged<Derived, 0, EvalBeforeAssigningBit> lazy() const;
inline const Cwise<Derived> cwise() const;
inline Cwise<Derived> cwise();
VectorBlock<Derived> start(Index size);
const VectorBlock<const Derived> start(Index size) const;
VectorBlock<Derived> end(Index size);
const VectorBlock<const Derived> end(Index size) const;
template<int Size> VectorBlock<Derived,Size> start();
template<int Size> const VectorBlock<const Derived,Size> start() const;
template<int Size> VectorBlock<Derived,Size> end();
template<int Size> const VectorBlock<const Derived,Size> end() const;
Minor<Derived> minor(Index row, Index col);
const Minor<Derived> minor(Index row, Index col) const;
#endif
protected:
EIGEN_DEVICE_FUNC MatrixBase() : Base() {}
MatrixBase() : Base() {}
private:
EIGEN_DEVICE_FUNC explicit MatrixBase(int);
EIGEN_DEVICE_FUNC MatrixBase(int,int);
template<typename OtherDerived> EIGEN_DEVICE_FUNC explicit MatrixBase(const MatrixBase<OtherDerived>&);
explicit MatrixBase(int);
MatrixBase(int,int);
template<typename OtherDerived> explicit MatrixBase(const MatrixBase<OtherDerived>&);
protected:
// mixing arrays and matrices is not legal
template<typename OtherDerived> Derived& operator+=(const ArrayBase<OtherDerived>& )

View File

@@ -37,13 +37,11 @@ class NoAlias
/** Behaves like MatrixBase::lazyAssign(other)
* \sa MatrixBase::lazyAssign() */
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE ExpressionType& operator=(const StorageBase<OtherDerived>& other)
{ return internal::assign_selector<ExpressionType,OtherDerived,false>::run(m_expression,other.derived()); }
/** \sa MatrixBase::operator+= */
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE ExpressionType& operator+=(const StorageBase<OtherDerived>& other)
{
typedef SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, ExpressionType, OtherDerived> SelfAdder;
@@ -56,7 +54,6 @@ class NoAlias
/** \sa MatrixBase::operator-= */
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE ExpressionType& operator-=(const StorageBase<OtherDerived>& other)
{
typedef SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, ExpressionType, OtherDerived> SelfAdder;
@@ -69,12 +66,10 @@ class NoAlias
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename ProductDerived, typename Lhs, typename Rhs>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE ExpressionType& operator+=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
{ other.derived().addTo(m_expression); return m_expression; }
template<typename ProductDerived, typename Lhs, typename Rhs>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE ExpressionType& operator-=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
{ other.derived().subTo(m_expression); return m_expression; }
@@ -83,7 +78,6 @@ class NoAlias
{ return m_expression.derived() += CoeffBasedProduct<Lhs,Rhs,NestByRefBit>(other.lhs(), other.rhs()); }
template<typename Lhs, typename Rhs, int NestingFlags>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE ExpressionType& operator-=(const CoeffBasedProduct<Lhs,Rhs,NestingFlags>& other)
{ return m_expression.derived() -= CoeffBasedProduct<Lhs,Rhs,NestByRefBit>(other.lhs(), other.rhs()); }
@@ -92,7 +86,6 @@ class NoAlias
{ return m_expression = func; }
#endif
EIGEN_DEVICE_FUNC
ExpressionType& expression() const
{
return m_expression;

View File

@@ -68,16 +68,7 @@ template<typename T> struct GenericNumTraits
>::type NonInteger;
typedef T Nested;
EIGEN_DEVICE_FUNC
static inline Real epsilon()
{
#if defined(__CUDA_ARCH__)
return internal::device::numeric_limits<T>::epsilon();
#else
return std::numeric_limits<T>::epsilon();
#endif
}
EIGEN_DEVICE_FUNC
static inline Real epsilon() { return std::numeric_limits<T>::epsilon(); }
static inline Real dummy_precision()
{
// make sure to override this for floating-point types
@@ -85,6 +76,13 @@ template<typename T> struct GenericNumTraits
}
static inline T highest() { return (std::numeric_limits<T>::max)(); }
static inline T lowest() { return IsInteger ? (std::numeric_limits<T>::min)() : (-(std::numeric_limits<T>::max)()); }
#ifdef EIGEN2_SUPPORT
enum {
HasFloatingPoint = !IsInteger
};
typedef NonInteger FloatingPoint;
#endif
};
template<typename T> struct NumTraits : GenericNumTraits<T>
@@ -93,13 +91,11 @@ template<typename T> struct NumTraits : GenericNumTraits<T>
template<> struct NumTraits<float>
: GenericNumTraits<float>
{
EIGEN_DEVICE_FUNC
static inline float dummy_precision() { return 1e-5f; }
};
template<> struct NumTraits<double> : GenericNumTraits<double>
{
EIGEN_DEVICE_FUNC
static inline double dummy_precision() { return 1e-12; }
};

View File

@@ -66,11 +66,11 @@ class PermutationBase : public EigenBase<Derived>
MaxRowsAtCompileTime = Traits::MaxRowsAtCompileTime,
MaxColsAtCompileTime = Traits::MaxColsAtCompileTime
};
typedef typename Traits::StorageIndexType StorageIndexType;
typedef typename Traits::Scalar Scalar;
typedef typename Traits::Index Index;
typedef Matrix<StorageIndexType,RowsAtCompileTime,ColsAtCompileTime,0,MaxRowsAtCompileTime,MaxColsAtCompileTime>
typedef Matrix<Scalar,RowsAtCompileTime,ColsAtCompileTime,0,MaxRowsAtCompileTime,MaxColsAtCompileTime>
DenseMatrixType;
typedef PermutationMatrix<IndicesType::SizeAtCompileTime,IndicesType::MaxSizeAtCompileTime,StorageIndexType>
typedef PermutationMatrix<IndicesType::SizeAtCompileTime,IndicesType::MaxSizeAtCompileTime,Index>
PlainPermutationType;
using Base::derived;
#endif
@@ -147,7 +147,7 @@ class PermutationBase : public EigenBase<Derived>
/** Sets *this to be the identity permutation matrix */
void setIdentity()
{
for(StorageIndexType i = 0; i < size(); ++i)
for(Index i = 0; i < size(); ++i)
indices().coeffRef(i) = i;
}
@@ -173,8 +173,8 @@ class PermutationBase : public EigenBase<Derived>
eigen_assert(i>=0 && j>=0 && i<size() && j<size());
for(Index k = 0; k < size(); ++k)
{
if(indices().coeff(k) == i) indices().coeffRef(k) = StorageIndexType(j);
else if(indices().coeff(k) == j) indices().coeffRef(k) = StorageIndexType(i);
if(indices().coeff(k) == i) indices().coeffRef(k) = j;
else if(indices().coeff(k) == j) indices().coeffRef(k) = i;
}
return derived();
}
@@ -250,6 +250,35 @@ class PermutationBase : public EigenBase<Derived>
template<typename Other> friend
inline PlainPermutationType operator*(const Transpose<PermutationBase<Other> >& other, const PermutationBase& perm)
{ return PlainPermutationType(internal::PermPermProduct, other.eval(), perm); }
/** \returns the determinant of the permutation matrix, which is either 1 or -1 depending on the parity of the permutation.
*
* This function is O(\c n) procedure allocating a buffer of \c n booleans.
*/
Index determinant() const
{
Index res = 1;
Index n = size();
Matrix<bool,RowsAtCompileTime,1,0,MaxRowsAtCompileTime> mask(n);
mask.fill(false);
Index r = 0;
while(r < n)
{
// search for the next seed
while(r<n && mask[r]) r++;
if(r>=n)
break;
// we got one, let's follow it until we are back to the seed
Index k0 = r++;
mask.coeffRef(k0) = true;
for(Index k=indices().coeff(k0); k!=k0; k=indices().coeff(k))
{
mask.coeffRef(k) = true;
res = -res;
}
}
return res;
}
protected:
@@ -262,7 +291,7 @@ class PermutationBase : public EigenBase<Derived>
*
* \param SizeAtCompileTime the number of rows/cols, or Dynamic
* \param MaxSizeAtCompileTime the maximum number of rows/cols, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it.
* \param StorageIndexType the integer type of the indices
* \param IndexType the interger type of the indices
*
* This class represents a permutation matrix, internally stored as a vector of integers.
*
@@ -270,18 +299,17 @@ class PermutationBase : public EigenBase<Derived>
*/
namespace internal {
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndexType>
struct traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndexType> >
: traits<Matrix<_StorageIndexType,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType>
struct traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType> >
: traits<Matrix<IndexType,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
{
typedef Matrix<_StorageIndexType, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType;
typedef typename IndicesType::Index Index;
typedef _StorageIndexType StorageIndexType;
typedef IndexType Index;
typedef Matrix<IndexType, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType;
};
}
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndexType>
class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndexType> >
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType>
class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType> >
{
typedef PermutationBase<PermutationMatrix> Base;
typedef internal::traits<PermutationMatrix> Traits;
@@ -289,8 +317,6 @@ class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompile
#ifndef EIGEN_PARSED_BY_DOXYGEN
typedef typename Traits::IndicesType IndicesType;
typedef typename Traits::StorageIndexType StorageIndexType;
typedef typename Traits::Index Index;
#endif
inline PermutationMatrix()
@@ -298,7 +324,7 @@ class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompile
/** Constructs an uninitialized permutation matrix of given size.
*/
inline PermutationMatrix(Index size) : m_indices(size)
inline PermutationMatrix(int size) : m_indices(size)
{}
/** Copy constructor. */
@@ -387,19 +413,18 @@ class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompile
namespace internal {
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndexType, int _PacketAccess>
struct traits<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndexType>,_PacketAccess> >
: traits<Matrix<_StorageIndexType,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType, int _PacketAccess>
struct traits<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType>,_PacketAccess> >
: traits<Matrix<IndexType,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
{
typedef Map<const Matrix<_StorageIndexType, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1>, _PacketAccess> IndicesType;
typedef typename IndicesType::Index Index;
typedef _StorageIndexType StorageIndexType;
typedef IndexType Index;
typedef Map<const Matrix<IndexType, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1>, _PacketAccess> IndicesType;
};
}
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndexType, int _PacketAccess>
class Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndexType>,_PacketAccess>
: public PermutationBase<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndexType>,_PacketAccess> >
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType, int _PacketAccess>
class Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType>,_PacketAccess>
: public PermutationBase<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType>,_PacketAccess> >
{
typedef PermutationBase<Map> Base;
typedef internal::traits<Map> Traits;
@@ -407,15 +432,14 @@ class Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageInd
#ifndef EIGEN_PARSED_BY_DOXYGEN
typedef typename Traits::IndicesType IndicesType;
typedef typename IndicesType::Scalar StorageIndexType;
typedef typename IndicesType::Index Index;
typedef typename IndicesType::Scalar Index;
#endif
inline Map(const StorageIndexType* indicesPtr)
inline Map(const Index* indicesPtr)
: m_indices(indicesPtr)
{}
inline Map(const StorageIndexType* indicesPtr, Index size)
inline Map(const Index* indicesPtr, Index size)
: m_indices(indicesPtr,size)
{}
@@ -471,8 +495,7 @@ struct traits<PermutationWrapper<_IndicesType> >
{
typedef PermutationStorage StorageKind;
typedef typename _IndicesType::Scalar Scalar;
typedef typename _IndicesType::Scalar StorageIndexType;
typedef typename _IndicesType::Index Index;
typedef typename _IndicesType::Scalar Index;
typedef _IndicesType IndicesType;
enum {
RowsAtCompileTime = _IndicesType::SizeAtCompileTime,
@@ -561,7 +584,10 @@ struct permut_matrix_product_retval
const Index n = Side==OnTheLeft ? rows() : cols();
// FIXME we need an is_same for expression that is not sensitive to constness. For instance
// is_same_xpr<Block<const Matrix>, Block<Matrix> >::value should be true.
if(is_same<MatrixTypeNestedCleaned,Dest>::value && extract_data(dst) == extract_data(m_matrix))
if( is_same<MatrixTypeNestedCleaned,Dest>::value
&& blas_traits<MatrixTypeNestedCleaned>::HasUsableDirectAccess
&& blas_traits<Dest>::HasUsableDirectAccess
&& extract_data(dst) == extract_data(m_matrix))
{
// apply the permutation inplace
Matrix<bool,PermutationType::RowsAtCompileTime,1,0,PermutationType::MaxRowsAtCompileTime> mask(m_permutation.size());

View File

@@ -28,7 +28,6 @@ namespace internal {
template<int MaxSizeAtCompileTime> struct check_rows_cols_for_overflow {
template<typename Index>
EIGEN_DEVICE_FUNC
static EIGEN_ALWAYS_INLINE void run(Index, Index)
{
}
@@ -36,7 +35,6 @@ template<int MaxSizeAtCompileTime> struct check_rows_cols_for_overflow {
template<> struct check_rows_cols_for_overflow<Dynamic> {
template<typename Index>
EIGEN_DEVICE_FUNC
static EIGEN_ALWAYS_INLINE void run(Index rows, Index cols)
{
// http://hg.mozilla.org/mozilla-central/file/6c8a909977d3/xpcom/ds/CheckedInt.h#l242
@@ -131,17 +129,12 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
enum { NeedsToAlign = SizeAtCompileTime != Dynamic && (internal::traits<Derived>::Flags & AlignedBit) != 0 };
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
EIGEN_DEVICE_FUNC
Base& base() { return *static_cast<Base*>(this); }
EIGEN_DEVICE_FUNC
const Base& base() const { return *static_cast<const Base*>(this); }
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Index rows() const { return m_storage.rows(); }
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Index cols() const { return m_storage.cols(); }
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const Scalar& coeff(Index rowId, Index colId) const
{
if(Flags & RowMajorBit)
@@ -150,13 +143,11 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
return m_storage.data()[rowId + colId * m_storage.rows()];
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const
{
return m_storage.data()[index];
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar& coeffRef(Index rowId, Index colId)
{
if(Flags & RowMajorBit)
@@ -165,13 +156,11 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
return m_storage.data()[rowId + colId * m_storage.rows()];
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar& coeffRef(Index index)
{
return m_storage.data()[index];
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const Scalar& coeffRef(Index rowId, Index colId) const
{
if(Flags & RowMajorBit)
@@ -180,7 +169,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
return m_storage.data()[rowId + colId * m_storage.rows()];
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const Scalar& coeffRef(Index index) const
{
return m_storage.data()[index];
@@ -244,7 +232,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
*
* \sa resize(Index) for vectors, resize(NoChange_t, Index), resize(Index, NoChange_t)
*/
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE void resize(Index nbRows, Index nbCols)
{
eigen_assert( EIGEN_IMPLIES(RowsAtCompileTime!=Dynamic,nbRows==RowsAtCompileTime)
@@ -275,7 +262,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
*
* \sa resize(Index,Index), resize(NoChange_t, Index), resize(Index, NoChange_t)
*/
EIGEN_DEVICE_FUNC
inline void resize(Index size)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(PlainObjectBase)
@@ -300,7 +286,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
*
* \sa resize(Index,Index)
*/
EIGEN_DEVICE_FUNC
inline void resize(NoChange_t, Index nbCols)
{
resize(rows(), nbCols);
@@ -314,7 +299,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
*
* \sa resize(Index,Index)
*/
EIGEN_DEVICE_FUNC
inline void resize(Index nbRows, NoChange_t)
{
resize(nbRows, cols());
@@ -328,7 +312,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
* remain row-vectors and vectors remain vectors.
*/
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE void resizeLike(const EigenBase<OtherDerived>& _other)
{
const OtherDerived& other = _other.derived();
@@ -356,7 +339,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
* Matrices are resized relative to the top-left element. In case values need to be
* appended to the matrix they will be uninitialized.
*/
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE void conservativeResize(Index nbRows, Index nbCols)
{
internal::conservative_resize_like_impl<Derived>::run(*this, nbRows, nbCols);
@@ -369,7 +351,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
*
* In case the matrix is growing, new rows will be uninitialized.
*/
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE void conservativeResize(Index nbRows, NoChange_t)
{
// Note: see the comment in conservativeResize(Index,Index)
@@ -383,7 +364,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
*
* In case the matrix is growing, new columns will be uninitialized.
*/
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE void conservativeResize(NoChange_t, Index nbCols)
{
// Note: see the comment in conservativeResize(Index,Index)
@@ -398,7 +378,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
*
* When values are appended, they will be uninitialized.
*/
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE void conservativeResize(Index size)
{
internal::conservative_resize_like_impl<Derived>::run(*this, size);
@@ -414,7 +393,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
* appended to the matrix they will copied from \c other.
*/
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE void conservativeResizeLike(const DenseBase<OtherDerived>& other)
{
internal::conservative_resize_like_impl<Derived,OtherDerived>::run(*this, other);
@@ -423,7 +401,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
/** This is a special case of the templated operator=. Its purpose is to
* prevent a default operator= from hiding the templated operator=.
*/
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Derived& operator=(const PlainObjectBase& other)
{
return _set(other);
@@ -431,7 +408,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
/** \sa MatrixBase::lazyAssign() */
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Derived& lazyAssign(const DenseBase<OtherDerived>& other)
{
_resize_to_match(other);
@@ -439,14 +415,12 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
}
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Derived& operator=(const ReturnByValue<OtherDerived>& func)
{
resize(func.rows(), func.cols());
return Base::operator=(func);
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE PlainObjectBase() : m_storage()
{
// _check_template_params();
@@ -456,7 +430,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
#ifndef EIGEN_PARSED_BY_DOXYGEN
// FIXME is it still needed ?
/** \internal */
EIGEN_DEVICE_FUNC
PlainObjectBase(internal::constructor_without_unaligned_array_assert)
: m_storage(internal::constructor_without_unaligned_array_assert())
{
@@ -465,13 +438,11 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
#endif
#ifdef EIGEN_HAVE_RVALUE_REFERENCES
EIGEN_DEVICE_FUNC
PlainObjectBase(PlainObjectBase&& other)
: m_storage( std::move(other.m_storage) )
{
}
EIGEN_DEVICE_FUNC
PlainObjectBase& operator=(PlainObjectBase&& other)
{
using std::swap;
@@ -480,7 +451,22 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
}
#endif
EIGEN_DEVICE_FUNC
/** Copy constructor */
EIGEN_STRONG_INLINE PlainObjectBase(const PlainObjectBase& other)
: m_storage()
{
_check_template_params();
lazyAssign(other);
}
template<typename OtherDerived>
EIGEN_STRONG_INLINE PlainObjectBase(const DenseBase<OtherDerived> &other)
: m_storage()
{
_check_template_params();
lazyAssign(other);
}
EIGEN_STRONG_INLINE PlainObjectBase(Index a_size, Index nbRows, Index nbCols)
: m_storage(a_size, nbRows, nbCols)
{
@@ -491,7 +477,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
/** \copydoc MatrixBase::operator=(const EigenBase<OtherDerived>&)
*/
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Derived& operator=(const EigenBase<OtherDerived> &other)
{
_resize_to_match(other);
@@ -501,7 +486,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
/** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE PlainObjectBase(const EigenBase<OtherDerived> &other)
: m_storage(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols())
{
@@ -584,16 +568,16 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
//@}
using Base::setConstant;
EIGEN_DEVICE_FUNC Derived& setConstant(Index size, const Scalar& value);
EIGEN_DEVICE_FUNC Derived& setConstant(Index rows, Index cols, const Scalar& value);
Derived& setConstant(Index size, const Scalar& value);
Derived& setConstant(Index rows, Index cols, const Scalar& value);
using Base::setZero;
EIGEN_DEVICE_FUNC Derived& setZero(Index size);
EIGEN_DEVICE_FUNC Derived& setZero(Index rows, Index cols);
Derived& setZero(Index size);
Derived& setZero(Index rows, Index cols);
using Base::setOnes;
EIGEN_DEVICE_FUNC Derived& setOnes(Index size);
EIGEN_DEVICE_FUNC Derived& setOnes(Index rows, Index cols);
Derived& setOnes(Index size);
Derived& setOnes(Index rows, Index cols);
using Base::setRandom;
Derived& setRandom(Index size);
@@ -612,7 +596,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
* remain row-vectors and vectors remain vectors.
*/
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE void _resize_to_match(const EigenBase<OtherDerived>& other)
{
#ifdef EIGEN_NO_AUTOMATIC_RESIZING
@@ -620,6 +603,8 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
: (rows() == other.rows() && cols() == other.cols())))
&& "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined");
EIGEN_ONLY_USED_FOR_DEBUG(other);
if(this->size()==0)
resizeLike(other);
#else
resizeLike(other);
#endif
@@ -640,7 +625,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
* \internal
*/
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Derived& _set(const DenseBase<OtherDerived>& other)
{
_set_selector(other.derived(), typename internal::conditional<static_cast<bool>(int(OtherDerived::Flags) & EvalBeforeAssigningBit), internal::true_type, internal::false_type>::type());
@@ -648,11 +632,9 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
}
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE void _set_selector(const OtherDerived& other, const internal::true_type&) { _set_noalias(other.eval()); }
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE void _set_selector(const OtherDerived& other, const internal::false_type&) { _set_noalias(other); }
/** \internal Like _set() but additionally makes the assumption that no aliasing effect can happen (which
@@ -661,7 +643,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
* \sa operator=(const MatrixBase<OtherDerived>&), _set()
*/
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Derived& _set_noalias(const DenseBase<OtherDerived>& other)
{
// I don't think we need this resize call since the lazyAssign will anyways resize
@@ -673,7 +654,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
}
template<typename T0, typename T1>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE void _init2(Index nbRows, Index nbCols, typename internal::enable_if<Base::SizeAtCompileTime!=2,T0>::type* = 0)
{
EIGEN_STATIC_ASSERT(bool(NumTraits<T0>::IsInteger) &&
@@ -681,91 +661,13 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED)
resize(nbRows,nbCols);
}
template<typename T0, typename T1>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE void _init2(const Scalar& val0, const Scalar& val1, typename internal::enable_if<Base::SizeAtCompileTime==2,T0>::type* = 0)
{
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2)
m_storage.data()[0] = val0;
m_storage.data()[1] = val1;
}
template<typename T0, typename T1>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE void _init2(const Index& val0, const Index& val1,
typename internal::enable_if< (!internal::is_same<Index,Scalar>::value)
&& (internal::is_same<T0,Index>::value)
&& (internal::is_same<T1,Index>::value)
&& Base::SizeAtCompileTime==2,T1>::type* = 0)
{
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2)
m_storage.data()[0] = Scalar(val0);
m_storage.data()[1] = Scalar(val1);
}
template<typename T>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE void _init1(Index size, typename internal::enable_if<Base::SizeAtCompileTime!=1 || !internal::is_convertible<T, Scalar>::value,T>::type* = 0)
{
// NOTE MSVC 2008 complains if we directly put bool(NumTraits<T>::IsInteger) as the EIGEN_STATIC_ASSERT argument.
const bool is_integer = NumTraits<T>::IsInteger;
EIGEN_STATIC_ASSERT(is_integer,
FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED)
resize(size);
}
template<typename T>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE void _init1(const Scalar& val0, typename internal::enable_if<Base::SizeAtCompileTime==1 && internal::is_convertible<T, Scalar>::value,T>::type* = 0)
{
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 1)
m_storage.data()[0] = val0;
}
template<typename T>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE void _init1(const Index& val0,
typename internal::enable_if< (!internal::is_same<Index,Scalar>::value)
&& (internal::is_same<Index,T>::value)
&& Base::SizeAtCompileTime==1
&& internal::is_convertible<T, Scalar>::value,T*>::type* = 0)
{
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 1)
m_storage.data()[0] = Scalar(val0);
}
template<typename T>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE void _init1(const Scalar* data){
this->_set_noalias(ConstMapType(data));
}
template<typename T, typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE void _init1(const DenseBase<OtherDerived>& other){
this->_set_noalias(other);
}
template<typename T, typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE void _init1(const EigenBase<OtherDerived>& other){
this->derived() = other;
}
template<typename T, typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE void _init1(const ReturnByValue<OtherDerived>& other)
{
resize(other.rows(), other.cols());
other.evalTo(this->derived());
}
template<typename T, typename OtherDerived, int ColsAtCompileTime>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE void _init1(const RotationBase<OtherDerived,ColsAtCompileTime>& r)
{
this->derived() = r;
}
template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers>
friend struct internal::matrix_swap_impl;
@@ -774,7 +676,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
* data pointers.
*/
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
void _swap(DenseBase<OtherDerived> const & other)
{
enum { SwapPointers = internal::is_same<Derived, OtherDerived>::value && Base::SizeAtCompileTime==Dynamic };
@@ -783,7 +684,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
public:
#ifndef EIGEN_PARSED_BY_DOXYGEN
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE void _check_template_params()
{
EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, (Options&RowMajor)==RowMajor)
@@ -900,7 +800,6 @@ struct conservative_resize_like_impl<Derived,OtherDerived,true>
template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers>
struct matrix_swap_impl
{
EIGEN_DEVICE_FUNC
static inline void run(MatrixTypeA& a, MatrixTypeB& b)
{
a.base().swap(b);
@@ -910,7 +809,6 @@ struct matrix_swap_impl
template<typename MatrixTypeA, typename MatrixTypeB>
struct matrix_swap_impl<MatrixTypeA, MatrixTypeB, true>
{
EIGEN_DEVICE_FUNC
static inline void run(MatrixTypeA& a, MatrixTypeB& b)
{
static_cast<typename MatrixTypeA::Base&>(a).m_storage.swap(static_cast<typename MatrixTypeB::Base&>(b).m_storage);

View File

@@ -1,107 +0,0 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_PRODUCT_H
#define EIGEN_PRODUCT_H
namespace Eigen {
template<typename Lhs, typename Rhs> class Product;
template<typename Lhs, typename Rhs, typename StorageKind> class ProductImpl;
/** \class Product
* \ingroup Core_Module
*
* \brief Expression of the product of two arbitrary matrices or vectors
*
* \param Lhs the type of the left-hand side expression
* \param Rhs the type of the right-hand side expression
*
* This class represents an expression of the product of two arbitrary matrices.
*
*/
// Use ProductReturnType to get correct traits, in particular vectorization flags
namespace internal {
template<typename Lhs, typename Rhs>
struct traits<Product<Lhs, Rhs> >
: traits<typename ProductReturnType<Lhs, Rhs>::Type>
{
// We want A+B*C to be of type Product<Matrix, Sum> and not Product<Matrix, Matrix>
// TODO: This flag should eventually go in a separate evaluator traits class
enum {
Flags = traits<typename ProductReturnType<Lhs, Rhs>::Type>::Flags & ~(EvalBeforeNestingBit | DirectAccessBit)
};
};
} // end namespace internal
template<typename Lhs, typename Rhs>
class Product : public ProductImpl<Lhs,Rhs,typename internal::promote_storage_type<typename internal::traits<Lhs>::StorageKind,
typename internal::traits<Rhs>::StorageKind>::ret>
{
public:
typedef typename ProductImpl<
Lhs, Rhs,
typename internal::promote_storage_type<typename Lhs::StorageKind,
typename Rhs::StorageKind>::ret>::Base Base;
EIGEN_GENERIC_PUBLIC_INTERFACE(Product)
typedef typename Lhs::Nested LhsNested;
typedef typename Rhs::Nested RhsNested;
typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs)
{
eigen_assert(lhs.cols() == rhs.rows()
&& "invalid matrix product"
&& "if you wanted a coeff-wise or a dot product use the respective explicit functions");
}
inline Index rows() const { return m_lhs.rows(); }
inline Index cols() const { return m_rhs.cols(); }
const LhsNestedCleaned& lhs() const { return m_lhs; }
const RhsNestedCleaned& rhs() const { return m_rhs; }
protected:
LhsNested m_lhs;
RhsNested m_rhs;
};
template<typename Lhs, typename Rhs>
class ProductImpl<Lhs,Rhs,Dense> : public internal::dense_xpr_base<Product<Lhs,Rhs> >::type
{
typedef Product<Lhs, Rhs> Derived;
public:
typedef typename internal::dense_xpr_base<Product<Lhs, Rhs> >::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
};
/***************************************************************************
* Implementation of matrix base methods
***************************************************************************/
/** \internal used to test the evaluator only
*/
template<typename Lhs,typename Rhs>
const Product<Lhs,Rhs>
prod(const Lhs& lhs, const Rhs& rhs)
{
return Product<Lhs,Rhs>(lhs,rhs);
}
} // end namespace Eigen
#endif // EIGEN_PRODUCT_H

View File

@@ -85,7 +85,14 @@ class ProductBase : public MatrixBase<Derived>
public:
#ifndef EIGEN_NO_MALLOC
typedef typename Base::PlainObject BasePlainObject;
typedef Matrix<Scalar,RowsAtCompileTime==1?1:Dynamic,ColsAtCompileTime==1?1:Dynamic,BasePlainObject::Options> DynPlainObject;
typedef typename internal::conditional<(BasePlainObject::SizeAtCompileTime==Dynamic) || (BasePlainObject::SizeAtCompileTime*int(sizeof(Scalar)) < int(EIGEN_STACK_ALLOCATION_LIMIT)),
BasePlainObject, DynPlainObject>::type PlainObject;
#else
typedef typename Base::PlainObject PlainObject;
#endif
ProductBase(const Lhs& a_lhs, const Rhs& a_rhs)
: m_lhs(a_lhs), m_rhs(a_rhs)
@@ -131,13 +138,17 @@ class ProductBase : public MatrixBase<Derived>
const Diagonal<FullyLazyCoeffBaseProductType,Dynamic> diagonal(Index index) const
{ return FullyLazyCoeffBaseProductType(m_lhs, m_rhs).diagonal(index); }
// restrict coeff accessors to 1x1 expressions. No need to care about mutators here since this isn't an Lvalue expression
// restrict coeff accessors to 1x1 expressions. No need to care about mutators here since this isnt a Lvalue expression
typename Base::CoeffReturnType coeff(Index row, Index col) const
{
#ifdef EIGEN2_SUPPORT
return lhs().row(row).cwiseProduct(rhs().col(col).transpose()).sum();
#else
EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)
eigen_assert(this->rows() == 1 && this->cols() == 1);
Matrix<Scalar,1,1> result = *this;
return result.coeff(row,col);
#endif
}
typename Base::CoeffReturnType coeff(Index i) const
@@ -176,7 +187,12 @@ namespace internal {
template<typename Lhs, typename Rhs, int Mode, int N, typename PlainObject>
struct nested<GeneralProduct<Lhs,Rhs,Mode>, N, PlainObject>
{
typedef PlainObject const& type;
typedef typename GeneralProduct<Lhs,Rhs,Mode>::PlainObject const& type;
};
template<typename Lhs, typename Rhs, int Mode, int N, typename PlainObject>
struct nested<const GeneralProduct<Lhs,Rhs,Mode>, N, PlainObject>
{
typedef typename GeneralProduct<Lhs,Rhs,Mode>::PlainObject const& type;
};
}

View File

@@ -1,411 +0,0 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2011 Jitse Niesen <jitse@maths.leeds.ac.uk>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_PRODUCTEVALUATORS_H
#define EIGEN_PRODUCTEVALUATORS_H
namespace Eigen {
namespace internal {
// We can evaluate the product either all at once, like GeneralProduct and its evalTo() function, or
// traverse the matrix coefficient by coefficient, like CoeffBasedProduct. Use the existing logic
// in ProductReturnType to decide.
template<typename XprType, typename ProductType>
struct product_evaluator_dispatcher;
template<typename Lhs, typename Rhs>
struct evaluator_impl<Product<Lhs, Rhs> >
: product_evaluator_dispatcher<Product<Lhs, Rhs>, typename ProductReturnType<Lhs, Rhs>::Type>
{
typedef Product<Lhs, Rhs> XprType;
typedef product_evaluator_dispatcher<XprType, typename ProductReturnType<Lhs, Rhs>::Type> Base;
evaluator_impl(const XprType& xpr) : Base(xpr)
{ }
};
template<typename XprType, typename ProductType>
struct product_evaluator_traits_dispatcher;
template<typename Lhs, typename Rhs>
struct evaluator_traits<Product<Lhs, Rhs> >
: product_evaluator_traits_dispatcher<Product<Lhs, Rhs>, typename ProductReturnType<Lhs, Rhs>::Type>
{
static const int AssumeAliasing = 1;
};
// Case 1: Evaluate all at once
//
// We can view the GeneralProduct class as a part of the product evaluator.
// Four sub-cases: InnerProduct, OuterProduct, GemmProduct and GemvProduct.
// InnerProduct is special because GeneralProduct does not have an evalTo() method in this case.
template<typename Lhs, typename Rhs>
struct product_evaluator_traits_dispatcher<Product<Lhs, Rhs>, GeneralProduct<Lhs, Rhs, InnerProduct> >
{
static const int HasEvalTo = 0;
};
template<typename Lhs, typename Rhs>
struct product_evaluator_dispatcher<Product<Lhs, Rhs>, GeneralProduct<Lhs, Rhs, InnerProduct> >
: public evaluator<typename Product<Lhs, Rhs>::PlainObject>::type
{
typedef Product<Lhs, Rhs> XprType;
typedef typename XprType::PlainObject PlainObject;
typedef typename evaluator<PlainObject>::type evaluator_base;
// TODO: Computation is too early (?)
product_evaluator_dispatcher(const XprType& xpr) : evaluator_base(m_result)
{
m_result.coeffRef(0,0) = (xpr.lhs().transpose().cwiseProduct(xpr.rhs())).sum();
}
protected:
PlainObject m_result;
};
// For the other three subcases, simply call the evalTo() method of GeneralProduct
// TODO: GeneralProduct should take evaluators, not expression objects.
template<typename Lhs, typename Rhs, int ProductType>
struct product_evaluator_traits_dispatcher<Product<Lhs, Rhs>, GeneralProduct<Lhs, Rhs, ProductType> >
{
static const int HasEvalTo = 1;
};
template<typename Lhs, typename Rhs, int ProductType>
struct product_evaluator_dispatcher<Product<Lhs, Rhs>, GeneralProduct<Lhs, Rhs, ProductType> >
{
typedef Product<Lhs, Rhs> XprType;
typedef typename XprType::PlainObject PlainObject;
typedef typename evaluator<PlainObject>::type evaluator_base;
product_evaluator_dispatcher(const XprType& xpr) : m_xpr(xpr)
{ }
template<typename DstEvaluatorType, typename DstXprType>
void evalTo(DstEvaluatorType /* not used */, DstXprType& dst) const
{
dst.resize(m_xpr.rows(), m_xpr.cols());
GeneralProduct<Lhs, Rhs, ProductType>(m_xpr.lhs(), m_xpr.rhs()).evalTo(dst);
}
protected:
const XprType& m_xpr;
};
// Case 2: Evaluate coeff by coeff
//
// This is mostly taken from CoeffBasedProduct.h
// The main difference is that we add an extra argument to the etor_product_*_impl::run() function
// for the inner dimension of the product, because evaluator object do not know their size.
template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
struct etor_product_coeff_impl;
template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct etor_product_packet_impl;
template<typename Lhs, typename Rhs, typename LhsNested, typename RhsNested, int Flags>
struct product_evaluator_traits_dispatcher<Product<Lhs, Rhs>, CoeffBasedProduct<LhsNested, RhsNested, Flags> >
{
static const int HasEvalTo = 0;
};
template<typename Lhs, typename Rhs, typename LhsNested, typename RhsNested, int Flags>
struct product_evaluator_dispatcher<Product<Lhs, Rhs>, CoeffBasedProduct<LhsNested, RhsNested, Flags> >
: evaluator_impl_base<Product<Lhs, Rhs> >
{
typedef Product<Lhs, Rhs> XprType;
typedef CoeffBasedProduct<LhsNested, RhsNested, Flags> CoeffBasedProductType;
product_evaluator_dispatcher(const XprType& xpr)
: m_lhsImpl(xpr.lhs()),
m_rhsImpl(xpr.rhs()),
m_innerDim(xpr.lhs().cols())
{ }
typedef typename XprType::Index Index;
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename XprType::PacketScalar PacketScalar;
typedef typename XprType::PacketReturnType PacketReturnType;
// Everything below here is taken from CoeffBasedProduct.h
enum {
RowsAtCompileTime = traits<CoeffBasedProductType>::RowsAtCompileTime,
PacketSize = packet_traits<Scalar>::size,
InnerSize = traits<CoeffBasedProductType>::InnerSize,
CoeffReadCost = traits<CoeffBasedProductType>::CoeffReadCost,
Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
CanVectorizeInner = traits<CoeffBasedProductType>::CanVectorizeInner
};
typedef typename evaluator<Lhs>::type LhsEtorType;
typedef typename evaluator<Rhs>::type RhsEtorType;
typedef etor_product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal,
Unroll ? InnerSize-1 : Dynamic,
LhsEtorType, RhsEtorType, Scalar> CoeffImpl;
const CoeffReturnType coeff(Index row, Index col) const
{
Scalar res;
CoeffImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res);
return res;
}
/* Allow index-based non-packet access. It is impossible though to allow index-based packed access,
* which is why we don't set the LinearAccessBit.
*/
const CoeffReturnType coeff(Index index) const
{
Scalar res;
const Index row = RowsAtCompileTime == 1 ? 0 : index;
const Index col = RowsAtCompileTime == 1 ? index : 0;
CoeffImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res);
return res;
}
template<int LoadMode>
const PacketReturnType packet(Index row, Index col) const
{
PacketScalar res;
typedef etor_product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor,
Unroll ? InnerSize-1 : Dynamic,
LhsEtorType, RhsEtorType, PacketScalar, LoadMode> PacketImpl;
PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res);
return res;
}
protected:
typename evaluator<Lhs>::type m_lhsImpl;
typename evaluator<Rhs>::type m_rhsImpl;
// TODO: Get rid of m_innerDim if known at compile time
Index m_innerDim;
};
/***************************************************************************
* Normal product .coeff() implementation (with meta-unrolling)
***************************************************************************/
/**************************************
*** Scalar path - no vectorization ***
**************************************/
template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
struct etor_product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
{
typedef typename Lhs::Index Index;
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar &res)
{
etor_product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, innerDim, res);
res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col);
}
};
template<typename Lhs, typename Rhs, typename RetScalar>
struct etor_product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
{
typedef typename Lhs::Index Index;
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, RetScalar &res)
{
res = lhs.coeff(row, 0) * rhs.coeff(0, col);
}
};
template<typename Lhs, typename Rhs, typename RetScalar>
struct etor_product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
{
typedef typename Lhs::Index Index;
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar& res)
{
eigen_assert(innerDim>0 && "you are using a non initialized matrix");
res = lhs.coeff(row, 0) * rhs.coeff(0, col);
for(Index i = 1; i < innerDim; ++i)
res += lhs.coeff(row, i) * rhs.coeff(i, col);
}
};
/*******************************************
*** Scalar path with inner vectorization ***
*******************************************/
template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet>
struct etor_product_coeff_vectorized_unroller
{
typedef typename Lhs::Index Index;
enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, typename Lhs::PacketScalar &pres)
{
etor_product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, innerDim, pres);
pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) ));
}
};
template<typename Lhs, typename Rhs, typename Packet>
struct etor_product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet>
{
typedef typename Lhs::Index Index;
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::PacketScalar &pres)
{
pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col));
}
};
template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
struct etor_product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
{
typedef typename Lhs::PacketScalar Packet;
typedef typename Lhs::Index Index;
enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar &res)
{
Packet pres;
etor_product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, innerDim, pres);
etor_product_coeff_impl<DefaultTraversal,UnrollingIndex,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, innerDim, res);
res = predux(pres);
}
};
template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int RhsCols = Rhs::ColsAtCompileTime>
struct etor_product_coeff_vectorized_dyn_selector
{
typedef typename Lhs::Index Index;
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
{
res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum();
}
};
// NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower
// NOTE maybe they are now useless since we have a specialization for Block<Matrix>
template<typename Lhs, typename Rhs, int RhsCols>
struct etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
{
typedef typename Lhs::Index Index;
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
{
res = lhs.transpose().cwiseProduct(rhs.col(col)).sum();
}
};
template<typename Lhs, typename Rhs, int LhsRows>
struct etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
{
typedef typename Lhs::Index Index;
static EIGEN_STRONG_INLINE void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
{
res = lhs.row(row).transpose().cwiseProduct(rhs).sum();
}
};
template<typename Lhs, typename Rhs>
struct etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
{
typedef typename Lhs::Index Index;
EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
{
res = lhs.transpose().cwiseProduct(rhs).sum();
}
};
template<typename Lhs, typename Rhs, typename RetScalar>
struct etor_product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar>
{
typedef typename Lhs::Index Index;
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, typename Lhs::Scalar &res)
{
etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, innerDim, res);
}
};
/*******************
*** Packet path ***
*******************/
template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
{
etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
}
};
template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
{
etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
}
};
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
{
res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
}
};
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
{
res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
}
};
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
{
eigen_assert(innerDim>0 && "you are using a non initialized matrix");
res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
for(Index i = 1; i < innerDim; ++i)
res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
}
};
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
{
eigen_assert(innerDim>0 && "you are using a non initialized matrix");
res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
for(Index i = 1; i < innerDim; ++i)
res = pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
}
};
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_PRODUCT_EVALUATORS_H

View File

@@ -28,18 +28,12 @@ struct functor_traits<scalar_random_op<Scalar> >
/** \returns a random matrix expression
*
* Numbers are uniformly spread through their whole definition range for integer types,
* and in the [-1:1] range for floating point scalar types.
*
* The parameters \a rows and \a cols are the number of rows and of columns of
* the returned matrix. Must be compatible with this MatrixBase type.
*
* \not_reentrant
*
* This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
* it is redundant to pass \a rows and \a cols as arguments, so Random() should be used
* instead.
*
*
* Example: \include MatrixBase_random_int_int.cpp
* Output: \verbinclude MatrixBase_random_int_int.out
@@ -47,10 +41,8 @@ struct functor_traits<scalar_random_op<Scalar> >
* This expression has the "evaluate before nesting" flag so that it will be evaluated into
* a temporary matrix whenever it is nested in a larger expression. This prevents unexpected
* behavior with expressions involving random matrices.
*
* See DenseBase::NullaryExpr(Index, const CustomNullaryOp&) for an example using C++11 random generators.
*
* \sa DenseBase::setRandom(), DenseBase::Random(Index), DenseBase::Random()
* \sa MatrixBase::setRandom(), MatrixBase::Random(Index), MatrixBase::Random()
*/
template<typename Derived>
inline const CwiseNullaryOp<internal::scalar_random_op<typename internal::traits<Derived>::Scalar>, Derived>
@@ -60,15 +52,11 @@ DenseBase<Derived>::Random(Index rows, Index cols)
}
/** \returns a random vector expression
*
* Numbers are uniformly spread through their whole definition range for integer types,
* and in the [-1:1] range for floating point scalar types.
*
* The parameter \a size is the size of the returned vector.
* Must be compatible with this MatrixBase type.
*
* \only_for_vectors
* \not_reentrant
*
* This variant is meant to be used for dynamic-size vector types. For fixed-size types,
* it is redundant to pass \a size as argument, so Random() should be used
@@ -81,7 +69,7 @@ DenseBase<Derived>::Random(Index rows, Index cols)
* a temporary vector whenever it is nested in a larger expression. This prevents unexpected
* behavior with expressions involving random matrices.
*
* \sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random()
* \sa MatrixBase::setRandom(), MatrixBase::Random(Index,Index), MatrixBase::Random()
*/
template<typename Derived>
inline const CwiseNullaryOp<internal::scalar_random_op<typename internal::traits<Derived>::Scalar>, Derived>
@@ -92,9 +80,6 @@ DenseBase<Derived>::Random(Index size)
/** \returns a fixed-size random matrix or vector expression
*
* Numbers are uniformly spread through their whole definition range for integer types,
* and in the [-1:1] range for floating point scalar types.
*
* This variant is only for fixed-size MatrixBase types. For dynamic-size types, you
* need to use the variants taking size arguments.
*
@@ -104,10 +89,8 @@ DenseBase<Derived>::Random(Index size)
* This expression has the "evaluate before nesting" flag so that it will be evaluated into
* a temporary matrix whenever it is nested in a larger expression. This prevents unexpected
* behavior with expressions involving random matrices.
*
* \not_reentrant
*
* \sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random(Index)
* \sa MatrixBase::setRandom(), MatrixBase::Random(Index,Index), MatrixBase::Random(Index)
*/
template<typename Derived>
inline const CwiseNullaryOp<internal::scalar_random_op<typename internal::traits<Derived>::Scalar>, Derived>
@@ -118,11 +101,6 @@ DenseBase<Derived>::Random()
/** Sets all coefficients in this expression to random values.
*
* Numbers are uniformly spread through their whole definition range for integer types,
* and in the [-1:1] range for floating point scalar types.
*
* \not_reentrant
*
* Example: \include MatrixBase_setRandom.cpp
* Output: \verbinclude MatrixBase_setRandom.out
*
@@ -136,16 +114,12 @@ inline Derived& DenseBase<Derived>::setRandom()
/** Resizes to the given \a newSize, and sets all coefficients in this expression to random values.
*
* Numbers are uniformly spread through their whole definition range for integer types,
* and in the [-1:1] range for floating point scalar types.
*
* \only_for_vectors
* \not_reentrant
*
* Example: \include Matrix_setRandom_int.cpp
* Output: \verbinclude Matrix_setRandom_int.out
*
* \sa DenseBase::setRandom(), setRandom(Index,Index), class CwiseNullaryOp, DenseBase::Random()
* \sa MatrixBase::setRandom(), setRandom(Index,Index), class CwiseNullaryOp, MatrixBase::Random()
*/
template<typename Derived>
EIGEN_STRONG_INLINE Derived&
@@ -157,18 +131,13 @@ PlainObjectBase<Derived>::setRandom(Index newSize)
/** Resizes to the given size, and sets all coefficients in this expression to random values.
*
* Numbers are uniformly spread through their whole definition range for integer types,
* and in the [-1:1] range for floating point scalar types.
*
* \not_reentrant
*
* \param nbRows the new number of rows
* \param nbCols the new number of columns
*
* Example: \include Matrix_setRandom_int_int.cpp
* Output: \verbinclude Matrix_setRandom_int_int.out
*
* \sa DenseBase::setRandom(), setRandom(Index), class CwiseNullaryOp, DenseBase::Random()
* \sa MatrixBase::setRandom(), setRandom(Index), class CwiseNullaryOp, MatrixBase::Random()
*/
template<typename Derived>
EIGEN_STRONG_INLINE Derived&

View File

@@ -82,7 +82,6 @@ struct redux_novec_unroller
typedef typename Derived::Scalar Scalar;
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
{
return func(redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
@@ -100,7 +99,6 @@ struct redux_novec_unroller<Func, Derived, Start, 1>
typedef typename Derived::Scalar Scalar;
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&)
{
return mat.coeffByOuterInner(outer, inner);
@@ -114,7 +112,6 @@ template<typename Func, typename Derived, int Start>
struct redux_novec_unroller<Func, Derived, Start, 0>
{
typedef typename Derived::Scalar Scalar;
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); }
};
@@ -173,7 +170,6 @@ struct redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>
{
typedef typename Derived::Scalar Scalar;
typedef typename Derived::Index Index;
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func)
{
eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
@@ -207,7 +203,7 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
const Index packetSize = packet_traits<Scalar>::size;
const Index alignedStart = internal::first_aligned(mat);
enum {
alignment = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) || bool(Derived::Flags & AlignedBit)
alignment = bool(Derived::Flags & DirectAccessBit) || bool(Derived::Flags & AlignedBit)
? Aligned : Unaligned
};
const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
@@ -251,8 +247,9 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
}
};
template<typename Func, typename Derived>
struct redux_impl<Func, Derived, SliceVectorizedTraversal, NoUnrolling>
// NOTE: for SliceVectorizedTraversal we simply bypass unrolling
template<typename Func, typename Derived, int Unrolling>
struct redux_impl<Func, Derived, SliceVectorizedTraversal, Unrolling>
{
typedef typename Derived::Scalar Scalar;
typedef typename packet_traits<Scalar>::type PacketScalar;
@@ -303,15 +300,10 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func)
{
eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
if (VectorizedSize > 0) {
Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));
if (VectorizedSize != Size)
res = func(res,redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func));
return res;
}
else {
return redux_novec_unroller<Func, Derived, 0, Size>::run(mat,func);
}
Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));
if (VectorizedSize != Size)
res = func(res,redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func));
return res;
}
};

View File

@@ -19,17 +19,17 @@ template<typename PlainObjectType, int Options = 0,
/** \class Ref
* \ingroup Core_Module
*
* \brief A matrix or vector expression mapping an existing expression
* \brief A matrix or vector expression mapping an existing expressions
*
* \tparam PlainObjectType the equivalent matrix type of the mapped data
* \tparam Options specifies whether the pointer is \c #Aligned, or \c #Unaligned.
* The default is \c #Unaligned.
* \tparam StrideType optionally specifies strides. By default, Ref implies a contiguous storage along the inner dimension (inner stride==1),
* but accepts a variable outer stride (leading dimension).
* but accept a variable outer stride (leading dimension).
* This can be overridden by specifying strides.
* The type passed here must be a specialization of the Stride template, see examples below.
*
* This class provides a way to write non-template functions taking Eigen objects as parameters while limiting the number of copies.
* This class permits to write non template functions taking Eigen's object as parameters while limiting the number of copies.
* A Ref<> object can represent either a const expression or a l-value:
* \code
* // in-out argument:
@@ -39,10 +39,10 @@ template<typename PlainObjectType, int Options = 0,
* void foo2(const Ref<const VectorXf>& x);
* \endcode
*
* In the in-out case, the input argument must satisfy the constraints of the actual Ref<> type, otherwise a compilation issue will be triggered.
* In the in-out case, the input argument must satisfies the constraints of the actual Ref<> type, otherwise a compilation issue will be triggered.
* By default, a Ref<VectorXf> can reference any dense vector expression of float having a contiguous memory layout.
* Likewise, a Ref<MatrixXf> can reference any column-major dense matrix expression of float whose column's elements are contiguously stored with
* the possibility to have a constant space in-between each column, i.e. the inner stride must be equal to 1, but the outer stride (or leading dimension)
* Likewise, a Ref<MatrixXf> can reference any column major dense matrix expression of float whose column's elements are contiguously stored with
* the possibility to have a constant space inbetween each column, i.e.: the inner stride mmust be equal to 1, but the outer-stride (or leading dimension),
* can be greater than the number of rows.
*
* In the const case, if the input expression does not match the above requirement, then it is evaluated into a temporary before being passed to the function.
@@ -58,15 +58,15 @@ template<typename PlainObjectType, int Options = 0,
* foo2(A.col().segment(2,4)); // No temporary
* \endcode
*
* The range of inputs that can be referenced without temporary can be enlarged using the last two template parameters.
* The range of inputs that can be referenced without temporary can be enlarged using the last two template parameter.
* Here is an example accepting an innerstride!=1:
* \code
* // in-out argument:
* void foo3(Ref<VectorXf,0,InnerStride<> > x);
* foo3(A.row()); // OK
* \endcode
* The downside here is that the function foo3 might be significantly slower than foo1 because it won't be able to exploit vectorization, and will involve more
* expensive address computations even if the input is contiguously stored in memory. To overcome this issue, one might propose to overload internally calling a
* The downside here is that the function foo3 might be significantly slower than foo1 because it won't be able to exploit vectorization, and will involved more
* expensive address computations even if the input is contiguously stored in memory. To overcome this issue, one might propose to overloads internally calling a
* template function, e.g.:
* \code
* // in the .h:
@@ -108,7 +108,8 @@ struct traits<Ref<_PlainObjectType, _Options, _StrideType> >
OuterStrideMatch = Derived::IsVectorAtCompileTime
|| int(StrideType::OuterStrideAtCompileTime)==int(Dynamic) || int(StrideType::OuterStrideAtCompileTime)==int(Derived::OuterStrideAtCompileTime),
AlignmentMatch = (_Options!=Aligned) || ((PlainObjectType::Flags&AlignedBit)==0) || ((traits<Derived>::Flags&AlignedBit)==AlignedBit),
MatchAtCompileTime = HasDirectAccess && StorageOrderMatch && InnerStrideMatch && OuterStrideMatch && AlignmentMatch
ScalarTypeMatch = internal::is_same<typename PlainObjectType::Scalar, typename Derived::Scalar>::value,
MatchAtCompileTime = HasDirectAccess && StorageOrderMatch && InnerStrideMatch && OuterStrideMatch && AlignmentMatch && ScalarTypeMatch
};
typedef typename internal::conditional<MatchAtCompileTime,internal::true_type,internal::false_type>::type type;
};
@@ -187,7 +188,11 @@ protected:
template<typename PlainObjectType, int Options, typename StrideType> class Ref
: public RefBase<Ref<PlainObjectType, Options, StrideType> >
{
private:
typedef internal::traits<Ref> Traits;
template<typename Derived>
inline Ref(const PlainObjectBase<Derived>& expr,
typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0);
public:
typedef RefBase<Ref> Base;
@@ -199,17 +204,20 @@ template<typename PlainObjectType, int Options, typename StrideType> class Ref
inline Ref(PlainObjectBase<Derived>& expr,
typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0)
{
Base::construct(expr);
EIGEN_STATIC_ASSERT(static_cast<bool>(Traits::template match<Derived>::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH);
Base::construct(expr.derived());
}
template<typename Derived>
inline Ref(const DenseBase<Derived>& expr,
typename internal::enable_if<bool(internal::is_lvalue<Derived>::value&&bool(Traits::template match<Derived>::MatchAtCompileTime)),Derived>::type* = 0,
int = Derived::ThisConstantIsPrivateInPlainObjectBase)
typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0)
#else
template<typename Derived>
inline Ref(DenseBase<Derived>& expr)
#endif
{
EIGEN_STATIC_ASSERT(static_cast<bool>(internal::is_lvalue<Derived>::value), THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY);
EIGEN_STATIC_ASSERT(static_cast<bool>(Traits::template match<Derived>::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH);
enum { THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY = Derived::ThisConstantIsPrivateInPlainObjectBase};
Base::construct(expr.const_cast_derived());
}
@@ -228,13 +236,23 @@ template<typename TPlainObjectType, int Options, typename StrideType> class Ref<
EIGEN_DENSE_PUBLIC_INTERFACE(Ref)
template<typename Derived>
inline Ref(const DenseBase<Derived>& expr)
inline Ref(const DenseBase<Derived>& expr,
typename internal::enable_if<bool(Traits::template match<Derived>::ScalarTypeMatch),Derived>::type* = 0)
{
// std::cout << match_helper<Derived>::HasDirectAccess << "," << match_helper<Derived>::OuterStrideMatch << "," << match_helper<Derived>::InnerStrideMatch << "\n";
// std::cout << int(StrideType::OuterStrideAtCompileTime) << " - " << int(Derived::OuterStrideAtCompileTime) << "\n";
// std::cout << int(StrideType::InnerStrideAtCompileTime) << " - " << int(Derived::InnerStrideAtCompileTime) << "\n";
construct(expr.derived(), typename Traits::template match<Derived>::type());
}
inline Ref(const Ref& other) : Base(other) {
// copy constructor shall not copy the m_object, to avoid unnecessary malloc and copy
}
template<typename OtherRef>
inline Ref(const RefBase<OtherRef>& other) {
construct(other.derived(), typename Traits::template match<OtherRef>::type());
}
protected:

View File

@@ -135,7 +135,7 @@ template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
*/
template<typename Derived>
template<int RowFactor, int ColFactor>
inline const Replicate<Derived,RowFactor,ColFactor>
const Replicate<Derived,RowFactor,ColFactor>
DenseBase<Derived>::replicate() const
{
return Replicate<Derived,RowFactor,ColFactor>(derived());
@@ -150,7 +150,7 @@ DenseBase<Derived>::replicate() const
* \sa VectorwiseOp::replicate(), DenseBase::replicate<int,int>(), class Replicate
*/
template<typename Derived>
inline const Replicate<Derived,Dynamic,Dynamic>
const typename DenseBase<Derived>::ReplicateReturnType
DenseBase<Derived>::replicate(Index rowFactor,Index colFactor) const
{
return Replicate<Derived,Dynamic,Dynamic>(derived(),rowFactor,colFactor);

View File

@@ -57,11 +57,10 @@ template<typename Derived> class ReturnByValue
EIGEN_DENSE_PUBLIC_INTERFACE(ReturnByValue)
template<typename Dest>
EIGEN_DEVICE_FUNC
inline void evalTo(Dest& dst) const
{ static_cast<const Derived*>(this)->evalTo(dst); }
EIGEN_DEVICE_FUNC inline Index rows() const { return static_cast<const Derived*>(this)->rows(); }
EIGEN_DEVICE_FUNC inline Index cols() const { return static_cast<const Derived*>(this)->cols(); }
inline Index rows() const { return static_cast<const Derived*>(this)->rows(); }
inline Index cols() const { return static_cast<const Derived*>(this)->cols(); }
#ifndef EIGEN_PARSED_BY_DOXYGEN
#define Unusable YOU_ARE_TRYING_TO_ACCESS_A_SINGLE_COEFFICIENT_IN_A_SPECIAL_EXPRESSION_WHERE_THAT_IS_NOT_ALLOWED_BECAUSE_THAT_WOULD_BE_INEFFICIENT
@@ -73,6 +72,8 @@ template<typename Derived> class ReturnByValue
const Unusable& coeff(Index,Index) const { return *reinterpret_cast<const Unusable*>(this); }
Unusable& coeffRef(Index) { return *reinterpret_cast<Unusable*>(this); }
Unusable& coeffRef(Index,Index) { return *reinterpret_cast<Unusable*>(this); }
template<int LoadMode> Unusable& packet(Index) const;
template<int LoadMode> Unusable& packet(Index, Index) const;
#endif
};
@@ -84,6 +85,15 @@ Derived& DenseBase<Derived>::operator=(const ReturnByValue<OtherDerived>& other)
return derived();
}
template<typename Derived>
template<typename OtherDerived>
Derived& DenseBase<Derived>::lazyAssign(const ReturnByValue<OtherDerived>& other)
{
other.evalTo(derived());
return derived();
}
} // end namespace Eigen
#endif // EIGEN_RETURNBYVALUE_H

View File

@@ -69,23 +69,17 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
};
typedef typename MatrixType::PlainObject PlainObject;
EIGEN_DEVICE_FUNC
inline SelfAdjointView(MatrixType& matrix) : m_matrix(matrix)
{}
EIGEN_DEVICE_FUNC
inline Index rows() const { return m_matrix.rows(); }
EIGEN_DEVICE_FUNC
inline Index cols() const { return m_matrix.cols(); }
EIGEN_DEVICE_FUNC
inline Index outerStride() const { return m_matrix.outerStride(); }
EIGEN_DEVICE_FUNC
inline Index innerStride() const { return m_matrix.innerStride(); }
/** \sa MatrixBase::coeff()
* \warning the coordinates must fit into the referenced triangular part
*/
EIGEN_DEVICE_FUNC
inline Scalar coeff(Index row, Index col) const
{
Base::check_coordinates_internal(row, col);
@@ -95,7 +89,6 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
/** \sa MatrixBase::coeffRef()
* \warning the coordinates must fit into the referenced triangular part
*/
EIGEN_DEVICE_FUNC
inline Scalar& coeffRef(Index row, Index col)
{
Base::check_coordinates_internal(row, col);
@@ -103,17 +96,13 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
}
/** \internal */
EIGEN_DEVICE_FUNC
const MatrixTypeNestedCleaned& _expression() const { return m_matrix; }
EIGEN_DEVICE_FUNC
const MatrixTypeNestedCleaned& nestedExpression() const { return m_matrix; }
EIGEN_DEVICE_FUNC
MatrixTypeNestedCleaned& nestedExpression() { return *const_cast<MatrixTypeNestedCleaned*>(&m_matrix); }
/** Efficient self-adjoint matrix times vector/matrix product */
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
SelfadjointProductMatrix<MatrixType,Mode,false,OtherDerived,0,OtherDerived::IsVectorAtCompileTime>
operator*(const MatrixBase<OtherDerived>& rhs) const
{
@@ -124,7 +113,6 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
/** Efficient vector/matrix times self-adjoint matrix product */
template<typename OtherDerived> friend
EIGEN_DEVICE_FUNC
SelfadjointProductMatrix<OtherDerived,0,OtherDerived::IsVectorAtCompileTime,MatrixType,Mode,false>
operator*(const MatrixBase<OtherDerived>& lhs, const SelfAdjointView& rhs)
{
@@ -144,7 +132,6 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
* \sa rankUpdate(const MatrixBase<DerivedU>&, Scalar)
*/
template<typename DerivedU, typename DerivedV>
EIGEN_DEVICE_FUNC
SelfAdjointView& rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, const Scalar& alpha = Scalar(1));
/** Perform a symmetric rank K update of the selfadjoint matrix \c *this:
@@ -158,7 +145,6 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
* \sa rankUpdate(const MatrixBase<DerivedU>&, const MatrixBase<DerivedV>&, Scalar)
*/
template<typename DerivedU>
EIGEN_DEVICE_FUNC
SelfAdjointView& rankUpdate(const MatrixBase<DerivedU>& u, const Scalar& alpha = Scalar(1));
/////////// Cholesky module ///////////
@@ -173,10 +159,31 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
/** Return type of eigenvalues() */
typedef Matrix<RealScalar, internal::traits<MatrixType>::ColsAtCompileTime, 1> EigenvaluesReturnType;
EIGEN_DEVICE_FUNC
EigenvaluesReturnType eigenvalues() const;
EIGEN_DEVICE_FUNC
RealScalar operatorNorm() const;
#ifdef EIGEN2_SUPPORT
template<typename OtherDerived>
SelfAdjointView& operator=(const MatrixBase<OtherDerived>& other)
{
enum {
OtherPart = UpLo == Upper ? StrictlyLower : StrictlyUpper
};
m_matrix.const_cast_derived().template triangularView<UpLo>() = other;
m_matrix.const_cast_derived().template triangularView<OtherPart>() = other.adjoint();
return *this;
}
template<typename OtherMatrixType, unsigned int OtherMode>
SelfAdjointView& operator=(const TriangularView<OtherMatrixType, OtherMode>& other)
{
enum {
OtherPart = UpLo == Upper ? StrictlyLower : StrictlyUpper
};
m_matrix.const_cast_derived().template triangularView<UpLo>() = other.toDenseMatrix();
m_matrix.const_cast_derived().template triangularView<OtherPart>() = other.toDenseMatrix().adjoint();
return *this;
}
#endif
protected:
MatrixTypeNested m_matrix;
@@ -202,7 +209,6 @@ struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), U
row = (UnrollCount-1) % Derived1::RowsAtCompileTime
};
EIGEN_DEVICE_FUNC
static inline void run(Derived1 &dst, const Derived2 &src)
{
triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), UnrollCount-1, ClearOpposite>::run(dst, src);
@@ -217,7 +223,6 @@ struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), U
template<typename Derived1, typename Derived2, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, 0, ClearOpposite>
{
EIGEN_DEVICE_FUNC
static inline void run(Derived1 &, const Derived2 &) {}
};
@@ -229,7 +234,6 @@ struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), U
row = (UnrollCount-1) % Derived1::RowsAtCompileTime
};
EIGEN_DEVICE_FUNC
static inline void run(Derived1 &dst, const Derived2 &src)
{
triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), UnrollCount-1, ClearOpposite>::run(dst, src);
@@ -244,7 +248,6 @@ struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), U
template<typename Derived1, typename Derived2, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Lower, 0, ClearOpposite>
{
EIGEN_DEVICE_FUNC
static inline void run(Derived1 &, const Derived2 &) {}
};
@@ -252,7 +255,6 @@ template<typename Derived1, typename Derived2, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, Dynamic, ClearOpposite>
{
typedef typename Derived1::Index Index;
EIGEN_DEVICE_FUNC
static inline void run(Derived1 &dst, const Derived2 &src)
{
for(Index j = 0; j < dst.cols(); ++j)
@@ -270,7 +272,6 @@ struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, Dyn
template<typename Derived1, typename Derived2, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Lower, Dynamic, ClearOpposite>
{
EIGEN_DEVICE_FUNC
static inline void run(Derived1 &dst, const Derived2 &src)
{
typedef typename Derived1::Index Index;

View File

@@ -35,7 +35,7 @@ struct traits<SelfCwiseBinaryOp<BinaryOp,Lhs,Rhs> >
enum {
// Note that it is still a good idea to preserve the DirectAccessBit
// so that assign can correctly align the data.
Flags = traits<CwiseBinaryOp<BinaryOp,Lhs,Rhs> >::Flags | (Lhs::Flags&AlignedBit) | (Lhs::Flags&DirectAccessBit) | (Lhs::Flags&LvalueBit),
Flags = traits<CwiseBinaryOp<BinaryOp,Lhs,Rhs> >::Flags | (Lhs::Flags&DirectAccessBit) | (Lhs::Flags&LvalueBit),
OuterStrideAtCompileTime = Lhs::OuterStrideAtCompileTime,
InnerStrideAtCompileTime = Lhs::InnerStrideAtCompileTime
};
@@ -52,24 +52,21 @@ template<typename BinaryOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp
typedef typename internal::packet_traits<Scalar>::type Packet;
EIGEN_DEVICE_FUNC
inline SelfCwiseBinaryOp(Lhs& xpr, const BinaryOp& func = BinaryOp()) : m_matrix(xpr), m_functor(func) {}
EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows(); }
EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols(); }
EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_matrix.outerStride(); }
EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_matrix.innerStride(); }
EIGEN_DEVICE_FUNC inline const Scalar* data() const { return m_matrix.data(); }
inline Index rows() const { return m_matrix.rows(); }
inline Index cols() const { return m_matrix.cols(); }
inline Index outerStride() const { return m_matrix.outerStride(); }
inline Index innerStride() const { return m_matrix.innerStride(); }
inline const Scalar* data() const { return m_matrix.data(); }
// note that this function is needed by assign to correctly align loads/stores
// TODO make Assign use .data()
EIGEN_DEVICE_FUNC
inline Scalar& coeffRef(Index row, Index col)
{
EIGEN_STATIC_ASSERT_LVALUE(Lhs)
return m_matrix.const_cast_derived().coeffRef(row, col);
}
EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index row, Index col) const
{
return m_matrix.coeffRef(row, col);
@@ -77,20 +74,17 @@ template<typename BinaryOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp
// note that this function is needed by assign to correctly align loads/stores
// TODO make Assign use .data()
EIGEN_DEVICE_FUNC
inline Scalar& coeffRef(Index index)
{
EIGEN_STATIC_ASSERT_LVALUE(Lhs)
return m_matrix.const_cast_derived().coeffRef(index);
}
EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index index) const
{
return m_matrix.const_cast_derived().coeffRef(index);
}
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
void copyCoeff(Index row, Index col, const DenseBase<OtherDerived>& other)
{
OtherDerived& _other = other.const_cast_derived();
@@ -101,7 +95,6 @@ template<typename BinaryOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp
}
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
void copyCoeff(Index index, const DenseBase<OtherDerived>& other)
{
OtherDerived& _other = other.const_cast_derived();
@@ -132,7 +125,6 @@ template<typename BinaryOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp
// reimplement lazyAssign to handle complex *= real
// see CwiseBinaryOp ctor for details
template<typename RhsDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE SelfCwiseBinaryOp& lazyAssign(const DenseBase<RhsDerived>& rhs)
{
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Lhs,RhsDerived)
@@ -152,20 +144,17 @@ template<typename BinaryOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp
// overloaded to honor evaluation of special matrices
// maybe another solution would be to not use SelfCwiseBinaryOp
// at first...
EIGEN_DEVICE_FUNC
SelfCwiseBinaryOp& operator=(const Rhs& _rhs)
{
typename internal::nested<Rhs>::type rhs(_rhs);
return Base::operator=(rhs);
}
EIGEN_DEVICE_FUNC
Lhs& expression() const
{
return m_matrix;
}
EIGEN_DEVICE_FUNC
const BinaryOp& functor() const
{
return m_functor;
@@ -188,24 +177,6 @@ inline Derived& DenseBase<Derived>::operator*=(const Scalar& other)
return derived();
}
template<typename Derived>
inline Derived& ArrayBase<Derived>::operator+=(const Scalar& other)
{
typedef typename Derived::PlainObject PlainObject;
SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, Derived, typename PlainObject::ConstantReturnType> tmp(derived());
tmp = PlainObject::Constant(rows(),cols(),other);
return derived();
}
template<typename Derived>
inline Derived& ArrayBase<Derived>::operator-=(const Scalar& other)
{
typedef typename Derived::PlainObject PlainObject;
SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, Derived, typename PlainObject::ConstantReturnType> tmp(derived());
tmp = PlainObject::Constant(rows(),cols(),other);
return derived();
}
template<typename Derived>
inline Derived& DenseBase<Derived>::operator/=(const Scalar& other)
{

View File

@@ -20,7 +20,7 @@ inline void stable_norm_kernel(const ExpressionType& bl, Scalar& ssq, Scalar& sc
using std::max;
Scalar maxCoeff = bl.cwiseAbs().maxCoeff();
if(maxCoeff>scale)
if (maxCoeff>scale)
{
ssq = ssq * numext::abs2(scale/maxCoeff);
Scalar tmp = Scalar(1)/maxCoeff;
@@ -29,21 +29,12 @@ inline void stable_norm_kernel(const ExpressionType& bl, Scalar& ssq, Scalar& sc
invScale = NumTraits<Scalar>::highest();
scale = Scalar(1)/invScale;
}
else if(maxCoeff>NumTraits<Scalar>::highest()) // we got a INF
{
invScale = Scalar(1);
scale = maxCoeff;
}
else
{
scale = maxCoeff;
invScale = tmp;
}
}
else if(maxCoeff!=maxCoeff) // we got a NaN
{
scale = maxCoeff;
}
// TODO if the maxCoeff is much much smaller than the current scale,
// then we can neglect this sub vector
@@ -58,13 +49,13 @@ blueNorm_impl(const EigenBase<Derived>& _vec)
typedef typename Derived::RealScalar RealScalar;
typedef typename Derived::Index Index;
using std::pow;
EIGEN_USING_STD_MATH(min);
EIGEN_USING_STD_MATH(max);
using std::min;
using std::max;
using std::sqrt;
using std::abs;
const Derived& vec(_vec.derived());
static bool initialized = false;
static RealScalar b1, b2, s1m, s2m, rbig, relerr;
static RealScalar b1, b2, s1m, s2m, overfl, rbig, relerr;
if(!initialized)
{
int ibeta, it, iemin, iemax, iexp;
@@ -93,6 +84,7 @@ blueNorm_impl(const EigenBase<Derived>& _vec)
iexp = - ((iemax+it)/2);
s2m = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // scaling factor for upper range
overfl = rbig*s2m; // overflow boundary for abig
eps = RealScalar(pow(double(ibeta), 1-it));
relerr = sqrt(eps); // tolerance for neglecting asml
initialized = true;
@@ -109,13 +101,13 @@ blueNorm_impl(const EigenBase<Derived>& _vec)
else if(ax < b1) asml += numext::abs2(ax*s1m);
else amed += numext::abs2(ax);
}
if(amed!=amed)
return amed; // we got a NaN
if(abig > RealScalar(0))
{
abig = sqrt(abig);
if(abig > rbig) // overflow, or *this contains INF values
return abig; // return INF
if(abig > overfl)
{
return rbig;
}
if(amed > RealScalar(0))
{
abig = abig/s2m;
@@ -160,7 +152,7 @@ template<typename Derived>
inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
MatrixBase<Derived>::stableNorm() const
{
EIGEN_USING_STD_MATH(min);
using std::min;
using std::sqrt;
const Index blockSize = 4096;
RealScalar scale(0);

View File

@@ -51,7 +51,6 @@ class Stride
};
/** Default constructor, for use when strides are fixed at compile time */
EIGEN_DEVICE_FUNC
Stride()
: m_outer(OuterStrideAtCompileTime), m_inner(InnerStrideAtCompileTime)
{
@@ -59,7 +58,6 @@ class Stride
}
/** Constructor allowing to pass the strides at runtime */
EIGEN_DEVICE_FUNC
Stride(Index outerStride, Index innerStride)
: m_outer(outerStride), m_inner(innerStride)
{
@@ -67,16 +65,13 @@ class Stride
}
/** Copy constructor */
EIGEN_DEVICE_FUNC
Stride(const Stride& other)
: m_outer(other.outer()), m_inner(other.inner())
{}
/** \returns the outer stride */
EIGEN_DEVICE_FUNC
inline Index outer() const { return m_outer.value(); }
/** \returns the inner stride */
EIGEN_DEVICE_FUNC
inline Index inner() const { return m_inner.value(); }
protected:
@@ -92,8 +87,8 @@ class InnerStride : public Stride<0, Value>
typedef Stride<0, Value> Base;
public:
typedef DenseIndex Index;
EIGEN_DEVICE_FUNC InnerStride() : Base() {}
EIGEN_DEVICE_FUNC InnerStride(Index v) : Base(0, v) {}
InnerStride() : Base() {}
InnerStride(Index v) : Base(0, v) {}
};
/** \brief Convenience specialization of Stride to specify only an outer stride
@@ -104,8 +99,8 @@ class OuterStride : public Stride<Value, 0>
typedef Stride<Value, 0> Base;
public:
typedef DenseIndex Index;
EIGEN_DEVICE_FUNC OuterStride() : Base() {}
EIGEN_DEVICE_FUNC OuterStride(Index v) : Base(v,0) {}
OuterStride() : Base() {}
OuterStride(Index v) : Base(v,0) {}
};
} // end namespace Eigen

View File

@@ -33,16 +33,11 @@ template<typename ExpressionType> class SwapWrapper
EIGEN_DENSE_PUBLIC_INTERFACE(SwapWrapper)
typedef typename internal::packet_traits<Scalar>::type Packet;
EIGEN_DEVICE_FUNC
inline SwapWrapper(ExpressionType& xpr) : m_expression(xpr) {}
EIGEN_DEVICE_FUNC
inline Index rows() const { return m_expression.rows(); }
EIGEN_DEVICE_FUNC
inline Index cols() const { return m_expression.cols(); }
EIGEN_DEVICE_FUNC
inline Index outerStride() const { return m_expression.outerStride(); }
EIGEN_DEVICE_FUNC
inline Index innerStride() const { return m_expression.innerStride(); }
typedef typename internal::conditional<
@@ -51,37 +46,30 @@ template<typename ExpressionType> class SwapWrapper
const Scalar
>::type ScalarWithConstIfNotLvalue;
EIGEN_DEVICE_FUNC
inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
EIGEN_DEVICE_FUNC
inline const Scalar* data() const { return m_expression.data(); }
EIGEN_DEVICE_FUNC
inline Scalar& coeffRef(Index rowId, Index colId)
{
return m_expression.const_cast_derived().coeffRef(rowId, colId);
}
EIGEN_DEVICE_FUNC
inline Scalar& coeffRef(Index index)
{
return m_expression.const_cast_derived().coeffRef(index);
}
EIGEN_DEVICE_FUNC
inline Scalar& coeffRef(Index rowId, Index colId) const
{
return m_expression.coeffRef(rowId, colId);
}
EIGEN_DEVICE_FUNC
inline Scalar& coeffRef(Index index) const
{
return m_expression.coeffRef(index);
}
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
void copyCoeff(Index rowId, Index colId, const DenseBase<OtherDerived>& other)
{
OtherDerived& _other = other.const_cast_derived();
@@ -93,7 +81,6 @@ template<typename ExpressionType> class SwapWrapper
}
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
void copyCoeff(Index index, const DenseBase<OtherDerived>& other)
{
OtherDerived& _other = other.const_cast_derived();
@@ -128,7 +115,6 @@ template<typename ExpressionType> class SwapWrapper
_other.template writePacket<LoadMode>(index, tmp);
}
EIGEN_DEVICE_FUNC
ExpressionType& expression() const { return m_expression; }
protected:

View File

@@ -62,21 +62,18 @@ template<typename MatrixType> class Transpose
typedef typename TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base;
EIGEN_GENERIC_PUBLIC_INTERFACE(Transpose)
EIGEN_DEVICE_FUNC
inline Transpose(MatrixType& a_matrix) : m_matrix(a_matrix) {}
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Transpose)
EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.cols(); }
EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.rows(); }
inline Index rows() const { return m_matrix.cols(); }
inline Index cols() const { return m_matrix.rows(); }
/** \returns the nested expression */
EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename MatrixType::Nested>::type&
nestedExpression() const { return m_matrix; }
/** \returns the nested expression */
EIGEN_DEVICE_FUNC
typename internal::remove_all<typename MatrixType::Nested>::type&
nestedExpression() { return m_matrix.const_cast_derived(); }
@@ -109,8 +106,8 @@ template<typename MatrixType> class TransposeImpl<MatrixType,Dense>
EIGEN_DENSE_PUBLIC_INTERFACE(Transpose<MatrixType>)
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(TransposeImpl)
EIGEN_DEVICE_FUNC inline Index innerStride() const { return derived().nestedExpression().innerStride(); }
EIGEN_DEVICE_FUNC inline Index outerStride() const { return derived().nestedExpression().outerStride(); }
inline Index innerStride() const { return derived().nestedExpression().innerStride(); }
inline Index outerStride() const { return derived().nestedExpression().outerStride(); }
typedef typename internal::conditional<
internal::is_lvalue<MatrixType>::value,
@@ -121,39 +118,33 @@ template<typename MatrixType> class TransposeImpl<MatrixType,Dense>
inline ScalarWithConstIfNotLvalue* data() { return derived().nestedExpression().data(); }
inline const Scalar* data() const { return derived().nestedExpression().data(); }
EIGEN_DEVICE_FUNC
inline ScalarWithConstIfNotLvalue& coeffRef(Index rowId, Index colId)
{
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
return derived().nestedExpression().const_cast_derived().coeffRef(colId, rowId);
}
EIGEN_DEVICE_FUNC
inline ScalarWithConstIfNotLvalue& coeffRef(Index index)
{
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
return derived().nestedExpression().const_cast_derived().coeffRef(index);
}
EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index rowId, Index colId) const
{
return derived().nestedExpression().coeffRef(colId, rowId);
}
EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index index) const
{
return derived().nestedExpression().coeffRef(index);
}
EIGEN_DEVICE_FUNC
inline CoeffReturnType coeff(Index rowId, Index colId) const
{
return derived().nestedExpression().coeff(colId, rowId);
}
EIGEN_DEVICE_FUNC
inline CoeffReturnType coeff(Index index) const
{
return derived().nestedExpression().coeff(index);

View File

@@ -53,8 +53,7 @@ class TranspositionsBase
public:
typedef typename Traits::IndicesType IndicesType;
typedef typename IndicesType::Scalar StorageIndexType;
typedef typename IndicesType::Index Index;
typedef typename IndicesType::Scalar Index;
Derived& derived() { return *static_cast<Derived*>(this); }
const Derived& derived() const { return *static_cast<const Derived*>(this); }
@@ -82,17 +81,17 @@ class TranspositionsBase
inline Index size() const { return indices().size(); }
/** Direct access to the underlying index vector */
inline const StorageIndexType& coeff(Index i) const { return indices().coeff(i); }
inline const Index& coeff(Index i) const { return indices().coeff(i); }
/** Direct access to the underlying index vector */
inline StorageIndexType& coeffRef(Index i) { return indices().coeffRef(i); }
inline Index& coeffRef(Index i) { return indices().coeffRef(i); }
/** Direct access to the underlying index vector */
inline const StorageIndexType& operator()(Index i) const { return indices()(i); }
inline const Index& operator()(Index i) const { return indices()(i); }
/** Direct access to the underlying index vector */
inline StorageIndexType& operator()(Index i) { return indices()(i); }
inline Index& operator()(Index i) { return indices()(i); }
/** Direct access to the underlying index vector */
inline const StorageIndexType& operator[](Index i) const { return indices()(i); }
inline const Index& operator[](Index i) const { return indices()(i); }
/** Direct access to the underlying index vector */
inline StorageIndexType& operator[](Index i) { return indices()(i); }
inline Index& operator[](Index i) { return indices()(i); }
/** const version of indices(). */
const IndicesType& indices() const { return derived().indices(); }
@@ -100,7 +99,7 @@ class TranspositionsBase
IndicesType& indices() { return derived().indices(); }
/** Resizes to given size. */
inline void resize(Index newSize)
inline void resize(int newSize)
{
indices().resize(newSize);
}
@@ -108,7 +107,7 @@ class TranspositionsBase
/** Sets \c *this to represents an identity transformation */
void setIdentity()
{
for(StorageIndexType i = 0; i < indices().size(); ++i)
for(int i = 0; i < indices().size(); ++i)
coeffRef(i) = i;
}
@@ -145,26 +144,23 @@ class TranspositionsBase
};
namespace internal {
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndexType>
struct traits<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndexType> >
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType>
struct traits<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,IndexType> >
{
typedef Matrix<_StorageIndexType, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType;
typedef typename IndicesType::Index Index;
typedef _StorageIndexType StorageIndexType;
typedef IndexType Index;
typedef Matrix<Index, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType;
};
}
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndexType>
class Transpositions : public TranspositionsBase<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndexType> >
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType>
class Transpositions : public TranspositionsBase<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,IndexType> >
{
typedef internal::traits<Transpositions> Traits;
public:
typedef TranspositionsBase<Transpositions> Base;
typedef typename Traits::IndicesType IndicesType;
typedef typename IndicesType::Scalar StorageIndexType;
typedef typename IndicesType::Index Index;
typedef typename IndicesType::Scalar Index;
inline Transpositions() {}
@@ -219,32 +215,30 @@ class Transpositions : public TranspositionsBase<Transpositions<SizeAtCompileTim
namespace internal {
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndexType, int _PacketAccess>
struct traits<Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndexType>,_PacketAccess> >
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType, int _PacketAccess>
struct traits<Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,IndexType>,_PacketAccess> >
{
typedef Map<const Matrix<_StorageIndexType,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1>, _PacketAccess> IndicesType;
typedef typename IndicesType::Index Index;
typedef _StorageIndexType StorageIndexType;
typedef IndexType Index;
typedef Map<const Matrix<Index,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1>, _PacketAccess> IndicesType;
};
}
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndexType, int PacketAccess>
class Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndexType>,PacketAccess>
: public TranspositionsBase<Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndexType>,PacketAccess> >
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType, int PacketAccess>
class Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,IndexType>,PacketAccess>
: public TranspositionsBase<Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,IndexType>,PacketAccess> >
{
typedef internal::traits<Map> Traits;
public:
typedef TranspositionsBase<Map> Base;
typedef typename Traits::IndicesType IndicesType;
typedef typename IndicesType::Scalar StorageIndexType;
typedef typename IndicesType::Index Index;
typedef typename IndicesType::Scalar Index;
inline Map(const StorageIndexType* indicesPtr)
inline Map(const Index* indicesPtr)
: m_indices(indicesPtr)
{}
inline Map(const StorageIndexType* indicesPtr, Index size)
inline Map(const Index* indicesPtr, Index size)
: m_indices(indicesPtr,size)
{}
@@ -281,8 +275,7 @@ namespace internal {
template<typename _IndicesType>
struct traits<TranspositionsWrapper<_IndicesType> >
{
typedef typename _IndicesType::Scalar StorageIndexType;
typedef typename _IndicesType::Index Index;
typedef typename _IndicesType::Scalar Index;
typedef _IndicesType IndicesType;
};
}
@@ -296,8 +289,7 @@ class TranspositionsWrapper
typedef TranspositionsBase<TranspositionsWrapper> Base;
typedef typename Traits::IndicesType IndicesType;
typedef typename IndicesType::Scalar StorageIndexType;
typedef typename IndicesType::Index Index;
typedef typename IndicesType::Scalar Index;
inline TranspositionsWrapper(IndicesType& a_indices)
: m_indices(a_indices)
@@ -371,25 +363,24 @@ struct transposition_matrix_product_retval
{
typedef typename remove_all<typename MatrixType::Nested>::type MatrixTypeNestedCleaned;
typedef typename TranspositionType::Index Index;
typedef typename TranspositionType::StorageIndexType StorageIndexType;
transposition_matrix_product_retval(const TranspositionType& tr, const MatrixType& matrix)
: m_transpositions(tr), m_matrix(matrix)
{}
inline Index rows() const { return m_matrix.rows(); }
inline Index cols() const { return m_matrix.cols(); }
inline int rows() const { return m_matrix.rows(); }
inline int cols() const { return m_matrix.cols(); }
template<typename Dest> inline void evalTo(Dest& dst) const
{
const Index size = m_transpositions.size();
StorageIndexType j = 0;
const int size = m_transpositions.size();
Index j = 0;
if(!(is_same<MatrixTypeNestedCleaned,Dest>::value && extract_data(dst) == extract_data(m_matrix)))
dst = m_matrix;
for(Index k=(Transposed?size-1:0) ; Transposed?k>=0:k<size ; Transposed?--k:++k)
if(Index(j=m_transpositions.coeff(k))!=k)
for(int k=(Transposed?size-1:0) ; Transposed?k>=0:k<size ; Transposed?--k:++k)
if((j=m_transpositions.coeff(k))!=k)
{
if(Side==OnTheLeft)
dst.row(k).swap(dst.row(j));

View File

@@ -44,39 +44,29 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
typedef typename internal::traits<Derived>::DenseMatrixType DenseMatrixType;
typedef DenseMatrixType DenseType;
EIGEN_DEVICE_FUNC
inline TriangularBase() { eigen_assert(!((Mode&UnitDiag) && (Mode&ZeroDiag))); }
EIGEN_DEVICE_FUNC
inline Index rows() const { return derived().rows(); }
EIGEN_DEVICE_FUNC
inline Index cols() const { return derived().cols(); }
EIGEN_DEVICE_FUNC
inline Index outerStride() const { return derived().outerStride(); }
EIGEN_DEVICE_FUNC
inline Index innerStride() const { return derived().innerStride(); }
EIGEN_DEVICE_FUNC
inline Scalar coeff(Index row, Index col) const { return derived().coeff(row,col); }
EIGEN_DEVICE_FUNC
inline Scalar& coeffRef(Index row, Index col) { return derived().coeffRef(row,col); }
/** \see MatrixBase::copyCoeff(row,col)
*/
template<typename Other>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE void copyCoeff(Index row, Index col, Other& other)
{
derived().coeffRef(row, col) = other.coeff(row, col);
}
EIGEN_DEVICE_FUNC
inline Scalar operator()(Index row, Index col) const
{
check_coordinates(row, col);
return coeff(row,col);
}
EIGEN_DEVICE_FUNC
inline Scalar& operator()(Index row, Index col)
{
check_coordinates(row, col);
@@ -84,20 +74,15 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
}
#ifndef EIGEN_PARSED_BY_DOXYGEN
EIGEN_DEVICE_FUNC
inline const Derived& derived() const { return *static_cast<const Derived*>(this); }
EIGEN_DEVICE_FUNC
inline Derived& derived() { return *static_cast<Derived*>(this); }
#endif // not EIGEN_PARSED_BY_DOXYGEN
template<typename DenseDerived>
EIGEN_DEVICE_FUNC
void evalTo(MatrixBase<DenseDerived> &other) const;
template<typename DenseDerived>
EIGEN_DEVICE_FUNC
void evalToLazy(MatrixBase<DenseDerived> &other) const;
EIGEN_DEVICE_FUNC
DenseMatrixType toDenseMatrix() const
{
DenseMatrixType res(rows(), cols());
@@ -204,52 +189,36 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
| (Mode & (ZeroDiag))
};
EIGEN_DEVICE_FUNC
inline TriangularView(const MatrixType& matrix) : m_matrix(matrix)
{}
EIGEN_DEVICE_FUNC
inline Index rows() const { return m_matrix.rows(); }
EIGEN_DEVICE_FUNC
inline Index cols() const { return m_matrix.cols(); }
EIGEN_DEVICE_FUNC
inline Index outerStride() const { return m_matrix.outerStride(); }
EIGEN_DEVICE_FUNC
inline Index innerStride() const { return m_matrix.innerStride(); }
/** \sa MatrixBase::operator+=() */
template<typename Other>
EIGEN_DEVICE_FUNC
TriangularView& operator+=(const DenseBase<Other>& other) { return *this = m_matrix + other.derived(); }
/** \sa MatrixBase::operator+=() */
template<typename Other> TriangularView& operator+=(const DenseBase<Other>& other) { return *this = m_matrix + other.derived(); }
/** \sa MatrixBase::operator-=() */
template<typename Other>
EIGEN_DEVICE_FUNC
TriangularView& operator-=(const DenseBase<Other>& other) { return *this = m_matrix - other.derived(); }
template<typename Other> TriangularView& operator-=(const DenseBase<Other>& other) { return *this = m_matrix - other.derived(); }
/** \sa MatrixBase::operator*=() */
EIGEN_DEVICE_FUNC
TriangularView& operator*=(const typename internal::traits<MatrixType>::Scalar& other) { return *this = m_matrix * other; }
/** \sa MatrixBase::operator/=() */
EIGEN_DEVICE_FUNC
TriangularView& operator/=(const typename internal::traits<MatrixType>::Scalar& other) { return *this = m_matrix / other; }
/** \sa MatrixBase::fill() */
EIGEN_DEVICE_FUNC
void fill(const Scalar& value) { setConstant(value); }
/** \sa MatrixBase::setConstant() */
EIGEN_DEVICE_FUNC
TriangularView& setConstant(const Scalar& value)
{ return *this = MatrixType::Constant(rows(), cols(), value); }
/** \sa MatrixBase::setZero() */
EIGEN_DEVICE_FUNC
TriangularView& setZero() { return setConstant(Scalar(0)); }
/** \sa MatrixBase::setOnes() */
EIGEN_DEVICE_FUNC
TriangularView& setOnes() { return setConstant(Scalar(1)); }
/** \sa MatrixBase::coeff()
* \warning the coordinates must fit into the referenced triangular part
*/
EIGEN_DEVICE_FUNC
inline Scalar coeff(Index row, Index col) const
{
Base::check_coordinates_internal(row, col);
@@ -259,62 +228,49 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
/** \sa MatrixBase::coeffRef()
* \warning the coordinates must fit into the referenced triangular part
*/
EIGEN_DEVICE_FUNC
inline Scalar& coeffRef(Index row, Index col)
{
Base::check_coordinates_internal(row, col);
return m_matrix.const_cast_derived().coeffRef(row, col);
}
EIGEN_DEVICE_FUNC
const MatrixTypeNestedCleaned& nestedExpression() const { return m_matrix; }
EIGEN_DEVICE_FUNC
MatrixTypeNestedCleaned& nestedExpression() { return *const_cast<MatrixTypeNestedCleaned*>(&m_matrix); }
/** Assigns a triangular matrix to a triangular part of a dense matrix */
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
TriangularView& operator=(const TriangularBase<OtherDerived>& other);
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
TriangularView& operator=(const MatrixBase<OtherDerived>& other);
EIGEN_DEVICE_FUNC
TriangularView& operator=(const TriangularView& other)
{ return *this = other.nestedExpression(); }
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
void lazyAssign(const TriangularBase<OtherDerived>& other);
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
void lazyAssign(const MatrixBase<OtherDerived>& other);
/** \sa MatrixBase::conjugate() */
EIGEN_DEVICE_FUNC
inline TriangularView<MatrixConjugateReturnType,Mode> conjugate()
{ return m_matrix.conjugate(); }
/** \sa MatrixBase::conjugate() const */
EIGEN_DEVICE_FUNC
inline const TriangularView<MatrixConjugateReturnType,Mode> conjugate() const
{ return m_matrix.conjugate(); }
/** \sa MatrixBase::adjoint() const */
EIGEN_DEVICE_FUNC
inline const TriangularView<const typename MatrixType::AdjointReturnType,TransposeMode> adjoint() const
{ return m_matrix.adjoint(); }
/** \sa MatrixBase::transpose() */
EIGEN_DEVICE_FUNC
inline TriangularView<Transpose<MatrixType>,TransposeMode> transpose()
{
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
return m_matrix.const_cast_derived().transpose();
}
/** \sa MatrixBase::transpose() const */
EIGEN_DEVICE_FUNC
inline const TriangularView<Transpose<MatrixType>,TransposeMode> transpose() const
{
return m_matrix.transpose();
@@ -322,7 +278,6 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
/** Efficient triangular matrix times vector/matrix product */
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
TriangularProduct<Mode, true, MatrixType, false, OtherDerived, OtherDerived::ColsAtCompileTime==1>
operator*(const MatrixBase<OtherDerived>& rhs) const
{
@@ -333,7 +288,6 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
/** Efficient vector/matrix times triangular matrix product */
template<typename OtherDerived> friend
EIGEN_DEVICE_FUNC
TriangularProduct<Mode, false, OtherDerived, OtherDerived::RowsAtCompileTime==1, MatrixType, false>
operator*(const MatrixBase<OtherDerived>& lhs, const TriangularView& rhs)
{
@@ -342,33 +296,56 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
(lhs.derived(),rhs.m_matrix);
}
#ifdef EIGEN2_SUPPORT
template<typename OtherDerived>
struct eigen2_product_return_type
{
typedef typename TriangularView<MatrixType,Mode>::DenseMatrixType DenseMatrixType;
typedef typename OtherDerived::PlainObject::DenseType OtherPlainObject;
typedef typename ProductReturnType<DenseMatrixType, OtherPlainObject>::Type ProdRetType;
typedef typename ProdRetType::PlainObject type;
};
template<typename OtherDerived>
const typename eigen2_product_return_type<OtherDerived>::type
operator*(const EigenBase<OtherDerived>& rhs) const
{
typename OtherDerived::PlainObject::DenseType rhsPlainObject;
rhs.evalTo(rhsPlainObject);
return this->toDenseMatrix() * rhsPlainObject;
}
template<typename OtherMatrixType>
bool isApprox(const TriangularView<OtherMatrixType, Mode>& other, typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) const
{
return this->toDenseMatrix().isApprox(other.toDenseMatrix(), precision);
}
template<typename OtherDerived>
bool isApprox(const MatrixBase<OtherDerived>& other, typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) const
{
return this->toDenseMatrix().isApprox(other, precision);
}
#endif // EIGEN2_SUPPORT
template<int Side, typename Other>
EIGEN_DEVICE_FUNC
inline const internal::triangular_solve_retval<Side,TriangularView, Other>
solve(const MatrixBase<Other>& other) const;
template<int Side, typename OtherDerived>
EIGEN_DEVICE_FUNC
void solveInPlace(const MatrixBase<OtherDerived>& other) const;
template<typename Other>
EIGEN_DEVICE_FUNC
inline const internal::triangular_solve_retval<OnTheLeft,TriangularView, Other>
solve(const MatrixBase<Other>& other) const
{ return solve<OnTheLeft>(other); }
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
void solveInPlace(const MatrixBase<OtherDerived>& other) const
{ return solveInPlace<OnTheLeft>(other); }
EIGEN_DEVICE_FUNC
const SelfAdjointView<MatrixTypeNestedNonRef,Mode> selfadjointView() const
{
EIGEN_STATIC_ASSERT((Mode&UnitDiag)==0,PROGRAMMING_ERROR);
return SelfAdjointView<MatrixTypeNestedNonRef,Mode>(m_matrix);
}
EIGEN_DEVICE_FUNC
SelfAdjointView<MatrixTypeNestedNonRef,Mode> selfadjointView()
{
EIGEN_STATIC_ASSERT((Mode&UnitDiag)==0,PROGRAMMING_ERROR);
@@ -376,21 +353,18 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
}
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
void swap(TriangularBase<OtherDerived> const & other)
{
TriangularView<SwapWrapper<MatrixType>,Mode>(const_cast<MatrixType&>(m_matrix)).lazyAssign(other.derived());
}
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
void swap(MatrixBase<OtherDerived> const & other)
{
SwapWrapper<MatrixType> swaper(const_cast<MatrixType&>(m_matrix));
TriangularView<SwapWrapper<MatrixType>,Mode>(swaper).lazyAssign(other.derived());
}
EIGEN_DEVICE_FUNC
Scalar determinant() const
{
if (Mode & UnitDiag)
@@ -403,55 +377,57 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
// TODO simplify the following:
template<typename ProductDerived, typename Lhs, typename Rhs>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE TriangularView& operator=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
{
setZero();
return assignProduct(other,1);
return assignProduct(other.derived(),1);
}
template<typename ProductDerived, typename Lhs, typename Rhs>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE TriangularView& operator+=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
{
return assignProduct(other,1);
return assignProduct(other.derived(),1);
}
template<typename ProductDerived, typename Lhs, typename Rhs>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE TriangularView& operator-=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
{
return assignProduct(other,-1);
return assignProduct(other.derived(),-1);
}
template<typename ProductDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE TriangularView& operator=(const ScaledProduct<ProductDerived>& other)
{
setZero();
return assignProduct(other,other.alpha());
return assignProduct(other.derived(),other.alpha());
}
template<typename ProductDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE TriangularView& operator+=(const ScaledProduct<ProductDerived>& other)
{
return assignProduct(other,other.alpha());
return assignProduct(other.derived(),other.alpha());
}
template<typename ProductDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE TriangularView& operator-=(const ScaledProduct<ProductDerived>& other)
{
return assignProduct(other,-other.alpha());
return assignProduct(other.derived(),-other.alpha());
}
protected:
template<typename ProductDerived, typename Lhs, typename Rhs>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE TriangularView& assignProduct(const ProductBase<ProductDerived, Lhs,Rhs>& prod, const Scalar& alpha);
template<int Mode, bool LhsIsTriangular,
typename Lhs, bool LhsIsVector,
typename Rhs, bool RhsIsVector>
EIGEN_STRONG_INLINE TriangularView& assignProduct(const TriangularProduct<Mode, LhsIsTriangular, Lhs, LhsIsVector, Rhs, RhsIsVector>& prod, const Scalar& alpha)
{
lazyAssign(alpha*prod.eval());
return *this;
}
MatrixTypeNested m_matrix;
};
@@ -472,7 +448,6 @@ struct triangular_assignment_selector
typedef typename Derived1::Scalar Scalar;
EIGEN_DEVICE_FUNC
static inline void run(Derived1 &dst, const Derived2 &src)
{
triangular_assignment_selector<Derived1, Derived2, Mode, UnrollCount-1, ClearOpposite>::run(dst, src);
@@ -501,7 +476,6 @@ struct triangular_assignment_selector
template<typename Derived1, typename Derived2, unsigned int Mode, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, Mode, 0, ClearOpposite>
{
EIGEN_DEVICE_FUNC
static inline void run(Derived1 &, const Derived2 &) {}
};
@@ -510,7 +484,6 @@ struct triangular_assignment_selector<Derived1, Derived2, Upper, Dynamic, ClearO
{
typedef typename Derived1::Index Index;
typedef typename Derived1::Scalar Scalar;
EIGEN_DEVICE_FUNC
static inline void run(Derived1 &dst, const Derived2 &src)
{
for(Index j = 0; j < dst.cols(); ++j)
@@ -529,7 +502,6 @@ template<typename Derived1, typename Derived2, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, Lower, Dynamic, ClearOpposite>
{
typedef typename Derived1::Index Index;
EIGEN_DEVICE_FUNC
static inline void run(Derived1 &dst, const Derived2 &src)
{
for(Index j = 0; j < dst.cols(); ++j)
@@ -549,7 +521,6 @@ struct triangular_assignment_selector<Derived1, Derived2, StrictlyUpper, Dynamic
{
typedef typename Derived1::Index Index;
typedef typename Derived1::Scalar Scalar;
EIGEN_DEVICE_FUNC
static inline void run(Derived1 &dst, const Derived2 &src)
{
for(Index j = 0; j < dst.cols(); ++j)
@@ -568,7 +539,6 @@ template<typename Derived1, typename Derived2, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, StrictlyLower, Dynamic, ClearOpposite>
{
typedef typename Derived1::Index Index;
EIGEN_DEVICE_FUNC
static inline void run(Derived1 &dst, const Derived2 &src)
{
for(Index j = 0; j < dst.cols(); ++j)
@@ -587,7 +557,6 @@ template<typename Derived1, typename Derived2, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, UnitUpper, Dynamic, ClearOpposite>
{
typedef typename Derived1::Index Index;
EIGEN_DEVICE_FUNC
static inline void run(Derived1 &dst, const Derived2 &src)
{
for(Index j = 0; j < dst.cols(); ++j)
@@ -608,7 +577,6 @@ template<typename Derived1, typename Derived2, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, UnitLower, Dynamic, ClearOpposite>
{
typedef typename Derived1::Index Index;
EIGEN_DEVICE_FUNC
static inline void run(Derived1 &dst, const Derived2 &src)
{
for(Index j = 0; j < dst.cols(); ++j)
@@ -751,6 +719,41 @@ void TriangularBase<Derived>::evalToLazy(MatrixBase<DenseDerived> &other) const
* Implementation of MatrixBase methods
***************************************************************************/
#ifdef EIGEN2_SUPPORT
// implementation of part<>(), including the SelfAdjoint case.
namespace internal {
template<typename MatrixType, unsigned int Mode>
struct eigen2_part_return_type
{
typedef TriangularView<MatrixType, Mode> type;
};
template<typename MatrixType>
struct eigen2_part_return_type<MatrixType, SelfAdjoint>
{
typedef SelfAdjointView<MatrixType, Upper> type;
};
}
/** \deprecated use MatrixBase::triangularView() */
template<typename Derived>
template<unsigned int Mode>
const typename internal::eigen2_part_return_type<Derived, Mode>::type MatrixBase<Derived>::part() const
{
return derived();
}
/** \deprecated use MatrixBase::triangularView() */
template<typename Derived>
template<unsigned int Mode>
typename internal::eigen2_part_return_type<Derived, Mode>::type MatrixBase<Derived>::part()
{
return derived();
}
#endif
/**
* \returns an expression of a triangular view extracted from the current matrix
*

View File

@@ -72,7 +72,6 @@ template<typename VectorType, int Size> class VectorBlock
/** Dynamic-size constructor
*/
EIGEN_DEVICE_FUNC
inline VectorBlock(VectorType& vector, Index start, Index size)
: Base(vector,
IsColVector ? start : 0, IsColVector ? 0 : start,
@@ -83,7 +82,6 @@ template<typename VectorType, int Size> class VectorBlock
/** Fixed-size constructor
*/
EIGEN_DEVICE_FUNC
inline VectorBlock(VectorType& vector, Index start)
: Base(vector, IsColVector ? start : 0, IsColVector ? 0 : start)
{

View File

@@ -302,7 +302,6 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
/** \returns a row (or column) vector expression of the squared norm
* of each column (or row) of the referenced expression.
* This is a vector with real entries, even if the original matrix has complex entries.
*
* Example: \include PartialRedux_squaredNorm.cpp
* Output: \verbinclude PartialRedux_squaredNorm.out
@@ -313,7 +312,6 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
/** \returns a row (or column) vector expression of the norm
* of each column (or row) of the referenced expression.
* This is a vector with real entries, even if the original matrix has complex entries.
*
* Example: \include PartialRedux_norm.cpp
* Output: \verbinclude PartialRedux_norm.out
@@ -325,8 +323,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
/** \returns a row (or column) vector expression of the norm
* of each column (or row) of the referenced expression, using
* Blue's algorithm.
* This is a vector with real entries, even if the original matrix has complex entries.
* blue's algorithm.
*
* \sa DenseBase::blueNorm() */
const typename ReturnType<internal::member_blueNorm,RealScalar>::Type blueNorm() const
@@ -336,7 +333,6 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
/** \returns a row (or column) vector expression of the norm
* of each column (or row) of the referenced expression, avoiding
* underflow and overflow.
* This is a vector with real entries, even if the original matrix has complex entries.
*
* \sa DenseBase::stableNorm() */
const typename ReturnType<internal::member_stableNorm,RealScalar>::Type stableNorm() const
@@ -346,7 +342,6 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
/** \returns a row (or column) vector expression of the norm
* of each column (or row) of the referenced expression, avoiding
* underflow and overflow using a concatenation of hypot() calls.
* This is a vector with real entries, even if the original matrix has complex entries.
*
* \sa DenseBase::hypotNorm() */
const typename ReturnType<internal::member_hypotNorm,RealScalar>::Type hypotNorm() const
@@ -371,7 +366,6 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
/** \returns a row (or column) vector expression representing
* whether \b all coefficients of each respective column (or row) are \c true.
* This expression can be assigned to a vector with entries of type \c bool.
*
* \sa DenseBase::all() */
const typename ReturnType<internal::member_all>::Type all() const
@@ -379,7 +373,6 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
/** \returns a row (or column) vector expression representing
* whether \b at \b least one coefficient of each respective column (or row) is \c true.
* This expression can be assigned to a vector with entries of type \c bool.
*
* \sa DenseBase::any() */
const typename ReturnType<internal::member_any>::Type any() const
@@ -387,8 +380,6 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
/** \returns a row (or column) vector expression representing
* the number of \c true coefficients of each respective column (or row).
* This expression can be assigned to a vector whose entries have the same type as is used to
* index entries of the original matrix; for dense matrices, this is \c std::ptrdiff_t .
*
* Example: \include PartialRedux_count.cpp
* Output: \verbinclude PartialRedux_count.out
@@ -560,7 +551,9 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
/////////// Geometry module ///////////
#if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS
Homogeneous<ExpressionType,Direction> homogeneous() const;
#endif
typedef typename ExpressionType::PlainObject CrossReturnType;
template<typename OtherDerived>

View File

@@ -194,7 +194,7 @@ DenseBase<Derived>::minCoeff(IndexType* index) const
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
internal::min_coeff_visitor<Derived> minVisitor;
this->visit(minVisitor);
*index = IndexType((RowsAtCompileTime==1) ? minVisitor.col : minVisitor.row);
*index = (RowsAtCompileTime==1) ? minVisitor.col : minVisitor.row;
return minVisitor.res;
}

View File

@@ -1,6 +0,0 @@
FILE(GLOB Eigen_Core_arch_AVX_SRCS "*.h")
INSTALL(FILES
${Eigen_Core_arch_AVX_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/AVX COMPONENT Devel
)

View File

@@ -1,463 +0,0 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Benoit Steiner (benoit.steiner.goog@gmail.com)
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_COMPLEX_AVX_H
#define EIGEN_COMPLEX_AVX_H
namespace Eigen {
namespace internal {
//---------- float ----------
struct Packet4cf
{
EIGEN_STRONG_INLINE Packet4cf() {}
EIGEN_STRONG_INLINE explicit Packet4cf(const __m256& a) : v(a) {}
__m256 v;
};
template<> struct packet_traits<std::complex<float> > : default_packet_traits
{
typedef Packet4cf type;
typedef Packet2cf half;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size = 4,
HasHalfPacket = 1,
HasAdd = 1,
HasSub = 1,
HasMul = 1,
HasDiv = 1,
HasNegate = 1,
HasAbs = 0,
HasAbs2 = 0,
HasMin = 0,
HasMax = 0,
HasSetLinear = 0
};
};
template<> struct unpacket_traits<Packet4cf> { typedef std::complex<float> type; enum {size=4}; typedef Packet2cf half; };
template<> EIGEN_STRONG_INLINE Packet4cf padd<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_add_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet4cf psub<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_sub_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet4cf pnegate(const Packet4cf& a)
{
return Packet4cf(pnegate(a.v));
}
template<> EIGEN_STRONG_INLINE Packet4cf pconj(const Packet4cf& a)
{
const __m256 mask = _mm256_castsi256_ps(_mm256_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000,0x00000000,0x80000000,0x00000000,0x80000000));
return Packet4cf(_mm256_xor_ps(a.v,mask));
}
template<> EIGEN_STRONG_INLINE Packet4cf pmul<Packet4cf>(const Packet4cf& a, const Packet4cf& b)
{
__m256 tmp1 = _mm256_mul_ps(_mm256_moveldup_ps(a.v), b.v);
__m256 tmp2 = _mm256_mul_ps(_mm256_movehdup_ps(a.v), _mm256_permute_ps(b.v, _MM_SHUFFLE(2,3,0,1)));
__m256 result = _mm256_addsub_ps(tmp1, tmp2);
return Packet4cf(result);
}
template<> EIGEN_STRONG_INLINE Packet4cf pand <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_and_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet4cf por <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_or_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet4cf pxor <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_xor_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet4cf pandnot<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_andnot_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet4cf pload <Packet4cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet4cf(pload<Packet8f>(&numext::real_ref(*from))); }
template<> EIGEN_STRONG_INLINE Packet4cf ploadu<Packet4cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet4cf(ploadu<Packet8f>(&numext::real_ref(*from))); }
template<> EIGEN_STRONG_INLINE Packet4cf pset1<Packet4cf>(const std::complex<float>& from)
{
return Packet4cf(_mm256_castpd_ps(_mm256_broadcast_sd((const double*)(const void*)&from)));
}
template<> EIGEN_STRONG_INLINE Packet4cf ploaddup<Packet4cf>(const std::complex<float>* from)
{
// FIXME The following might be optimized using _mm256_movedup_pd
Packet2cf a = ploaddup<Packet2cf>(from);
Packet2cf b = ploaddup<Packet2cf>(from+1);
return Packet4cf(_mm256_insertf128_ps(_mm256_castps128_ps256(a.v), b.v, 1));
}
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float>* to, const Packet4cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), from.v); }
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float>* to, const Packet4cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), from.v); }
template<> EIGEN_DEVICE_FUNC inline Packet4cf pgather<std::complex<float>, Packet4cf>(const std::complex<float>* from, DenseIndex stride)
{
return Packet4cf(_mm256_set_ps(std::imag(from[3*stride]), std::real(from[3*stride]),
std::imag(from[2*stride]), std::real(from[2*stride]),
std::imag(from[1*stride]), std::real(from[1*stride]),
std::imag(from[0*stride]), std::real(from[0*stride])));
}
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet4cf>(std::complex<float>* to, const Packet4cf& from, DenseIndex stride)
{
__m128 low = _mm256_extractf128_ps(from.v, 0);
to[stride*0] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(low, low, 0)),
_mm_cvtss_f32(_mm_shuffle_ps(low, low, 1)));
to[stride*1] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(low, low, 2)),
_mm_cvtss_f32(_mm_shuffle_ps(low, low, 3)));
__m128 high = _mm256_extractf128_ps(from.v, 1);
to[stride*2] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(high, high, 0)),
_mm_cvtss_f32(_mm_shuffle_ps(high, high, 1)));
to[stride*3] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(high, high, 2)),
_mm_cvtss_f32(_mm_shuffle_ps(high, high, 3)));
}
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet4cf>(const Packet4cf& a)
{
return pfirst(Packet2cf(_mm256_castps256_ps128(a.v)));
}
template<> EIGEN_STRONG_INLINE Packet4cf preverse(const Packet4cf& a) {
__m128 low = _mm256_extractf128_ps(a.v, 0);
__m128 high = _mm256_extractf128_ps(a.v, 1);
__m128d lowd = _mm_castps_pd(low);
__m128d highd = _mm_castps_pd(high);
low = _mm_castpd_ps(_mm_shuffle_pd(lowd,lowd,0x1));
high = _mm_castpd_ps(_mm_shuffle_pd(highd,highd,0x1));
__m256 result = _mm256_setzero_ps();
result = _mm256_insertf128_ps(result, low, 1);
result = _mm256_insertf128_ps(result, high, 0);
return Packet4cf(result);
}
template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet4cf>(const Packet4cf& a)
{
return predux(padd(Packet2cf(_mm256_extractf128_ps(a.v,0)),
Packet2cf(_mm256_extractf128_ps(a.v,1))));
}
template<> EIGEN_STRONG_INLINE Packet4cf preduxp<Packet4cf>(const Packet4cf* vecs)
{
Packet8f t0 = _mm256_shuffle_ps(vecs[0].v, vecs[0].v, _MM_SHUFFLE(3, 1, 2 ,0));
Packet8f t1 = _mm256_shuffle_ps(vecs[1].v, vecs[1].v, _MM_SHUFFLE(3, 1, 2 ,0));
t0 = _mm256_hadd_ps(t0,t1);
Packet8f t2 = _mm256_shuffle_ps(vecs[2].v, vecs[2].v, _MM_SHUFFLE(3, 1, 2 ,0));
Packet8f t3 = _mm256_shuffle_ps(vecs[3].v, vecs[3].v, _MM_SHUFFLE(3, 1, 2 ,0));
t2 = _mm256_hadd_ps(t2,t3);
t1 = _mm256_permute2f128_ps(t0,t2, 0 + (2<<4));
t3 = _mm256_permute2f128_ps(t0,t2, 1 + (3<<4));
return Packet4cf(_mm256_add_ps(t1,t3));
}
template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet4cf>(const Packet4cf& a)
{
return predux_mul(pmul(Packet2cf(_mm256_extractf128_ps(a.v, 0)),
Packet2cf(_mm256_extractf128_ps(a.v, 1))));
}
template<int Offset>
struct palign_impl<Offset,Packet4cf>
{
static EIGEN_STRONG_INLINE void run(Packet4cf& first, const Packet4cf& second)
{
if (Offset==0) return;
palign_impl<Offset*2,Packet8f>::run(first.v, second.v);
}
};
template<> struct conj_helper<Packet4cf, Packet4cf, false,true>
{
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) const
{
return internal::pmul(a, pconj(b));
}
};
template<> struct conj_helper<Packet4cf, Packet4cf, true,false>
{
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) const
{
return internal::pmul(pconj(a), b);
}
};
template<> struct conj_helper<Packet4cf, Packet4cf, true,true>
{
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) const
{
return pconj(internal::pmul(a, b));
}
};
template<> struct conj_helper<Packet8f, Packet4cf, false,false>
{
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet8f& x, const Packet4cf& y, const Packet4cf& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet8f& x, const Packet4cf& y) const
{ return Packet4cf(Eigen::internal::pmul(x, y.v)); }
};
template<> struct conj_helper<Packet4cf, Packet8f, false,false>
{
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet8f& y, const Packet4cf& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& x, const Packet8f& y) const
{ return Packet4cf(Eigen::internal::pmul(x.v, y)); }
};
template<> EIGEN_STRONG_INLINE Packet4cf pdiv<Packet4cf>(const Packet4cf& a, const Packet4cf& b)
{
Packet4cf num = pmul(a, pconj(b));
__m256 tmp = _mm256_mul_ps(b.v, b.v);
__m256 tmp2 = _mm256_shuffle_ps(tmp,tmp,0xB1);
__m256 denom = _mm256_add_ps(tmp, tmp2);
return Packet4cf(_mm256_div_ps(num.v, denom));
}
template<> EIGEN_STRONG_INLINE Packet4cf pcplxflip<Packet4cf>(const Packet4cf& x)
{
return Packet4cf(_mm256_shuffle_ps(x.v, x.v, _MM_SHUFFLE(2, 3, 0 ,1)));
}
//---------- double ----------
struct Packet2cd
{
EIGEN_STRONG_INLINE Packet2cd() {}
EIGEN_STRONG_INLINE explicit Packet2cd(const __m256d& a) : v(a) {}
__m256d v;
};
template<> struct packet_traits<std::complex<double> > : default_packet_traits
{
typedef Packet2cd type;
typedef Packet1cd half;
enum {
Vectorizable = 1,
AlignedOnScalar = 0,
size = 2,
HasHalfPacket = 1,
HasAdd = 1,
HasSub = 1,
HasMul = 1,
HasDiv = 1,
HasNegate = 1,
HasAbs = 0,
HasAbs2 = 0,
HasMin = 0,
HasMax = 0,
HasSetLinear = 0
};
};
template<> struct unpacket_traits<Packet2cd> { typedef std::complex<double> type; enum {size=2}; typedef Packet1cd half; };
template<> EIGEN_STRONG_INLINE Packet2cd padd<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_add_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cd psub<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_sub_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cd pnegate(const Packet2cd& a) { return Packet2cd(pnegate(a.v)); }
template<> EIGEN_STRONG_INLINE Packet2cd pconj(const Packet2cd& a)
{
const __m256d mask = _mm256_castsi256_pd(_mm256_set_epi32(0x80000000,0x0,0x0,0x0,0x80000000,0x0,0x0,0x0));
return Packet2cd(_mm256_xor_pd(a.v,mask));
}
template<> EIGEN_STRONG_INLINE Packet2cd pmul<Packet2cd>(const Packet2cd& a, const Packet2cd& b)
{
__m256d tmp1 = _mm256_shuffle_pd(a.v,a.v,0x0);
__m256d even = _mm256_mul_pd(tmp1, b.v);
__m256d tmp2 = _mm256_shuffle_pd(a.v,a.v,0xF);
__m256d tmp3 = _mm256_shuffle_pd(b.v,b.v,0x5);
__m256d odd = _mm256_mul_pd(tmp2, tmp3);
return Packet2cd(_mm256_addsub_pd(even, odd));
}
template<> EIGEN_STRONG_INLINE Packet2cd pand <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_and_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cd por <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_or_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cd pxor <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_xor_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cd pandnot<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_andnot_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cd pload <Packet2cd>(const std::complex<double>* from)
{ EIGEN_DEBUG_ALIGNED_LOAD return Packet2cd(pload<Packet4d>((const double*)from)); }
template<> EIGEN_STRONG_INLINE Packet2cd ploadu<Packet2cd>(const std::complex<double>* from)
{ EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cd(ploadu<Packet4d>((const double*)from)); }
template<> EIGEN_STRONG_INLINE Packet2cd pset1<Packet2cd>(const std::complex<double>& from)
{
// in case casting to a __m128d* is really not safe, then we can still fallback to this version: (much slower though)
// return Packet2cd(_mm256_loadu2_m128d((const double*)&from,(const double*)&from));
return Packet2cd(_mm256_broadcast_pd((const __m128d*)(const void*)&from));
}
template<> EIGEN_STRONG_INLINE Packet2cd ploaddup<Packet2cd>(const std::complex<double>* from) { return pset1<Packet2cd>(*from); }
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet2cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet2cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
template<> EIGEN_DEVICE_FUNC inline Packet2cd pgather<std::complex<double>, Packet2cd>(const std::complex<double>* from, DenseIndex stride)
{
return Packet2cd(_mm256_set_pd(std::imag(from[1*stride]), std::real(from[1*stride]),
std::imag(from[0*stride]), std::real(from[0*stride])));
}
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet2cd>(std::complex<double>* to, const Packet2cd& from, DenseIndex stride)
{
__m128d low = _mm256_extractf128_pd(from.v, 0);
to[stride*0] = std::complex<double>(_mm_cvtsd_f64(low), _mm_cvtsd_f64(_mm_shuffle_pd(low, low, 1)));
__m128d high = _mm256_extractf128_pd(from.v, 1);
to[stride*1] = std::complex<double>(_mm_cvtsd_f64(high), _mm_cvtsd_f64(_mm_shuffle_pd(high, high, 1)));
}
template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet2cd>(const Packet2cd& a)
{
__m128d low = _mm256_extractf128_pd(a.v, 0);
EIGEN_ALIGN16 double res[2];
_mm_store_pd(res, low);
return std::complex<double>(res[0],res[1]);
}
template<> EIGEN_STRONG_INLINE Packet2cd preverse(const Packet2cd& a) {
__m256d result = _mm256_permute2f128_pd(a.v, a.v, 1);
return Packet2cd(result);
}
template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet2cd>(const Packet2cd& a)
{
return predux(padd(Packet1cd(_mm256_extractf128_pd(a.v,0)),
Packet1cd(_mm256_extractf128_pd(a.v,1))));
}
template<> EIGEN_STRONG_INLINE Packet2cd preduxp<Packet2cd>(const Packet2cd* vecs)
{
Packet4d t0 = _mm256_permute2f128_pd(vecs[0].v,vecs[1].v, 0 + (2<<4));
Packet4d t1 = _mm256_permute2f128_pd(vecs[0].v,vecs[1].v, 1 + (3<<4));
return Packet2cd(_mm256_add_pd(t0,t1));
}
template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet2cd>(const Packet2cd& a)
{
return predux(pmul(Packet1cd(_mm256_extractf128_pd(a.v,0)),
Packet1cd(_mm256_extractf128_pd(a.v,1))));
}
template<int Offset>
struct palign_impl<Offset,Packet2cd>
{
static EIGEN_STRONG_INLINE void run(Packet2cd& first, const Packet2cd& second)
{
if (Offset==0) return;
palign_impl<Offset*2,Packet4d>::run(first.v, second.v);
}
};
template<> struct conj_helper<Packet2cd, Packet2cd, false,true>
{
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) const
{
return internal::pmul(a, pconj(b));
}
};
template<> struct conj_helper<Packet2cd, Packet2cd, true,false>
{
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) const
{
return internal::pmul(pconj(a), b);
}
};
template<> struct conj_helper<Packet2cd, Packet2cd, true,true>
{
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) const
{
return pconj(internal::pmul(a, b));
}
};
template<> struct conj_helper<Packet4d, Packet2cd, false,false>
{
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet4d& x, const Packet2cd& y, const Packet2cd& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet4d& x, const Packet2cd& y) const
{ return Packet2cd(Eigen::internal::pmul(x, y.v)); }
};
template<> struct conj_helper<Packet2cd, Packet4d, false,false>
{
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet4d& y, const Packet2cd& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& x, const Packet4d& y) const
{ return Packet2cd(Eigen::internal::pmul(x.v, y)); }
};
template<> EIGEN_STRONG_INLINE Packet2cd pdiv<Packet2cd>(const Packet2cd& a, const Packet2cd& b)
{
Packet2cd num = pmul(a, pconj(b));
__m256d tmp = _mm256_mul_pd(b.v, b.v);
__m256d denom = _mm256_hadd_pd(tmp, tmp);
return Packet2cd(_mm256_div_pd(num.v, denom));
}
template<> EIGEN_STRONG_INLINE Packet2cd pcplxflip<Packet2cd>(const Packet2cd& x)
{
return Packet2cd(_mm256_shuffle_pd(x.v, x.v, 0x5));
}
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<Packet4cf,4>& kernel) {
__m256d P0 = _mm256_castps_pd(kernel.packet[0].v);
__m256d P1 = _mm256_castps_pd(kernel.packet[1].v);
__m256d P2 = _mm256_castps_pd(kernel.packet[2].v);
__m256d P3 = _mm256_castps_pd(kernel.packet[3].v);
__m256d T0 = _mm256_shuffle_pd(P0, P1, 15);
__m256d T1 = _mm256_shuffle_pd(P0, P1, 0);
__m256d T2 = _mm256_shuffle_pd(P2, P3, 15);
__m256d T3 = _mm256_shuffle_pd(P2, P3, 0);
kernel.packet[1].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T0, T2, 32));
kernel.packet[3].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T0, T2, 49));
kernel.packet[0].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T1, T3, 32));
kernel.packet[2].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T1, T3, 49));
}
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<Packet2cd,2>& kernel) {
__m256d tmp = _mm256_permute2f128_pd(kernel.packet[0].v, kernel.packet[1].v, 0+(2<<4));
kernel.packet[1].v = _mm256_permute2f128_pd(kernel.packet[0].v, kernel.packet[1].v, 1+(3<<4));
kernel.packet[0].v = tmp;
}
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_COMPLEX_AVX_H

View File

@@ -1,564 +0,0 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Benoit Steiner (benoit.steiner.goog@gmail.com)
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_PACKET_MATH_AVX_H
#define EIGEN_PACKET_MATH_AVX_H
namespace Eigen {
namespace internal {
#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
#endif
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
#endif
#ifdef EIGEN_VECTORIZE_FMA
#ifndef EIGEN_HAS_FUSED_MADD
#define EIGEN_HAS_FUSED_MADD 1
#endif
#endif
typedef __m256 Packet8f;
typedef __m256i Packet8i;
typedef __m256d Packet4d;
template<> struct is_arithmetic<__m256> { enum { value = true }; };
template<> struct is_arithmetic<__m256i> { enum { value = true }; };
template<> struct is_arithmetic<__m256d> { enum { value = true }; };
#define _EIGEN_DECLARE_CONST_Packet8f(NAME,X) \
const Packet8f p8f_##NAME = pset1<Packet8f>(X)
#define _EIGEN_DECLARE_CONST_Packet4d(NAME,X) \
const Packet4d p4d_##NAME = pset1<Packet4d>(X)
template<> struct packet_traits<float> : default_packet_traits
{
typedef Packet8f type;
typedef Packet4f half;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size=8,
HasHalfPacket = 1,
HasDiv = 1,
HasSin = 0,
HasCos = 0,
HasLog = 0,
HasExp = 0,
HasSqrt = 0
};
};
template<> struct packet_traits<double> : default_packet_traits
{
typedef Packet4d type;
typedef Packet2d half;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size=4,
HasHalfPacket = 1,
HasDiv = 1,
HasExp = 0
};
};
/* Proper support for integers is only provided by AVX2. In the meantime, we'll
use SSE instructions and packets to deal with integers.
template<> struct packet_traits<int> : default_packet_traits
{
typedef Packet8i type;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size=8
};
};
*/
template<> struct unpacket_traits<Packet8f> { typedef float type; typedef Packet4f half; enum {size=8}; };
template<> struct unpacket_traits<Packet4d> { typedef double type; typedef Packet2d half; enum {size=4}; };
template<> struct unpacket_traits<Packet8i> { typedef int type; typedef Packet4i half; enum {size=8}; };
template<> EIGEN_STRONG_INLINE Packet8f pset1<Packet8f>(const float& from) { return _mm256_set1_ps(from); }
template<> EIGEN_STRONG_INLINE Packet4d pset1<Packet4d>(const double& from) { return _mm256_set1_pd(from); }
template<> EIGEN_STRONG_INLINE Packet8i pset1<Packet8i>(const int& from) { return _mm256_set1_epi32(from); }
template<> EIGEN_STRONG_INLINE Packet8f pload1<Packet8f>(const float* from) { return _mm256_broadcast_ss(from); }
template<> EIGEN_STRONG_INLINE Packet4d pload1<Packet4d>(const double* from) { return _mm256_broadcast_sd(from); }
template<> EIGEN_STRONG_INLINE Packet8f plset<float>(const float& a) { return _mm256_add_ps(_mm256_set1_ps(a), _mm256_set_ps(7.0,6.0,5.0,4.0,3.0,2.0,1.0,0.0)); }
template<> EIGEN_STRONG_INLINE Packet4d plset<double>(const double& a) { return _mm256_add_pd(_mm256_set1_pd(a), _mm256_set_pd(3.0,2.0,1.0,0.0)); }
template<> EIGEN_STRONG_INLINE Packet8f padd<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_add_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet4d padd<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_add_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet8f psub<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_sub_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet4d psub<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_sub_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet8f pnegate(const Packet8f& a)
{
return _mm256_sub_ps(_mm256_set1_ps(0.0),a);
}
template<> EIGEN_STRONG_INLINE Packet4d pnegate(const Packet4d& a)
{
return _mm256_sub_pd(_mm256_set1_pd(0.0),a);
}
template<> EIGEN_STRONG_INLINE Packet8f pconj(const Packet8f& a) { return a; }
template<> EIGEN_STRONG_INLINE Packet4d pconj(const Packet4d& a) { return a; }
template<> EIGEN_STRONG_INLINE Packet8i pconj(const Packet8i& a) { return a; }
template<> EIGEN_STRONG_INLINE Packet8f pmul<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_mul_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet4d pmul<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_mul_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet8f pdiv<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_div_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet4d pdiv<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_div_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet8i pdiv<Packet8i>(const Packet8i& /*a*/, const Packet8i& /*b*/)
{ eigen_assert(false && "packet integer division are not supported by AVX");
return pset1<Packet8i>(0);
}
#ifdef EIGEN_VECTORIZE_FMA
template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) {
#if defined(__clang__) || defined(__GNUC__)
// clang stupidly generates a vfmadd213ps instruction plus some vmovaps on registers,
// and gcc stupidly generates a vfmadd132ps instruction,
// so let's enforce it to generate a vfmadd231ps instruction since the most common use case is to accumulate
// the result of the product.
Packet8f res = c;
__asm__("vfmadd231ps %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
return res;
#else
return _mm256_fmadd_ps(a,b,c);
#endif
}
template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) {
#if defined(__clang__) || defined(__GNUC__)
// see above
Packet4d res = c;
__asm__("vfmadd231pd %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
return res;
#else
return _mm256_fmadd_pd(a,b,c);
#endif
}
#endif
template<> EIGEN_STRONG_INLINE Packet8f pmin<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_min_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet4d pmin<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_min_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet8f pmax<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_max_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet4d pmax<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_max_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet8f pand<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_and_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet4d pand<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_and_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet8f por<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_or_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet4d por<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_or_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet8f pxor<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_xor_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet4d pxor<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_xor_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet8f pandnot<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_andnot_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet4d pandnot<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_andnot_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet8f pload<Packet8f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_ps(from); }
template<> EIGEN_STRONG_INLINE Packet4d pload<Packet4d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_pd(from); }
template<> EIGEN_STRONG_INLINE Packet8i pload<Packet8i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_si256(reinterpret_cast<const __m256i*>(from)); }
template<> EIGEN_STRONG_INLINE Packet8f ploadu<Packet8f>(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_ps(from); }
template<> EIGEN_STRONG_INLINE Packet4d ploadu<Packet4d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_pd(from); }
template<> EIGEN_STRONG_INLINE Packet8i ploadu<Packet8i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_si256(reinterpret_cast<const __m256i*>(from)); }
// Loads 4 floats from memory a returns the packet {a0, a0 a1, a1, a2, a2, a3, a3}
template<> EIGEN_STRONG_INLINE Packet8f ploaddup<Packet8f>(const float* from)
{
// TODO try to find a way to avoid the need of a temporary register
// Packet8f tmp = _mm256_castps128_ps256(_mm_loadu_ps(from));
// tmp = _mm256_insertf128_ps(tmp, _mm_movehl_ps(_mm256_castps256_ps128(tmp),_mm256_castps256_ps128(tmp)), 1);
// return _mm256_unpacklo_ps(tmp,tmp);
// _mm256_insertf128_ps is very slow on Haswell, thus:
Packet8f tmp = _mm256_broadcast_ps((const __m128*)(const void*)from);
// mimic an "inplace" permutation of the lower 128bits using a blend
tmp = _mm256_blend_ps(tmp,_mm256_castps128_ps256(_mm_permute_ps( _mm256_castps256_ps128(tmp), _MM_SHUFFLE(1,0,1,0))), 15);
// then we can perform a consistent permutation on the global register to get everything in shape:
return _mm256_permute_ps(tmp, _MM_SHUFFLE(3,3,2,2));
}
// Loads 2 doubles from memory a returns the packet {a0, a0 a1, a1}
template<> EIGEN_STRONG_INLINE Packet4d ploaddup<Packet4d>(const double* from)
{
Packet4d tmp = _mm256_broadcast_pd((const __m128d*)(const void*)from);
return _mm256_permute_pd(tmp, 3<<2);
}
// Loads 2 floats from memory a returns the packet {a0, a0 a0, a0, a1, a1, a1, a1}
template<> EIGEN_STRONG_INLINE Packet8f ploadquad<Packet8f>(const float* from)
{
Packet8f tmp = _mm256_castps128_ps256(_mm_broadcast_ss(from));
return _mm256_insertf128_ps(tmp, _mm_broadcast_ss(from+1), 1);
}
template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet8f& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_store_ps(to, from); }
template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet4d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_store_pd(to, from); }
template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet8i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_storeu_si256(reinterpret_cast<__m256i*>(to), from); }
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet8f& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_ps(to, from); }
template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet4d& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_pd(to, from); }
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet8i& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_si256(reinterpret_cast<__m256i*>(to), from); }
// NOTE: leverage _mm256_i32gather_ps and _mm256_i32gather_pd if AVX2 instructions are available
// NOTE: for the record the following seems to be slower: return _mm256_i32gather_ps(from, _mm256_set1_epi32(stride), 4);
template<> EIGEN_DEVICE_FUNC inline Packet8f pgather<float, Packet8f>(const float* from, DenseIndex stride)
{
return _mm256_set_ps(from[7*stride], from[6*stride], from[5*stride], from[4*stride],
from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
}
template<> EIGEN_DEVICE_FUNC inline Packet4d pgather<double, Packet4d>(const double* from, DenseIndex stride)
{
return _mm256_set_pd(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
}
template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet8f>(float* to, const Packet8f& from, DenseIndex stride)
{
__m128 low = _mm256_extractf128_ps(from, 0);
to[stride*0] = _mm_cvtss_f32(low);
to[stride*1] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 1));
to[stride*2] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 2));
to[stride*3] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 3));
__m128 high = _mm256_extractf128_ps(from, 1);
to[stride*4] = _mm_cvtss_f32(high);
to[stride*5] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 1));
to[stride*6] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 2));
to[stride*7] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 3));
}
template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet4d>(double* to, const Packet4d& from, DenseIndex stride)
{
__m128d low = _mm256_extractf128_pd(from, 0);
to[stride*0] = _mm_cvtsd_f64(low);
to[stride*1] = _mm_cvtsd_f64(_mm_shuffle_pd(low, low, 1));
__m128d high = _mm256_extractf128_pd(from, 1);
to[stride*2] = _mm_cvtsd_f64(high);
to[stride*3] = _mm_cvtsd_f64(_mm_shuffle_pd(high, high, 1));
}
template<> EIGEN_STRONG_INLINE void pstore1<Packet8f>(float* to, const float& a)
{
Packet8f pa = pset1<Packet8f>(a);
pstore(to, pa);
}
template<> EIGEN_STRONG_INLINE void pstore1<Packet4d>(double* to, const double& a)
{
Packet4d pa = pset1<Packet4d>(a);
pstore(to, pa);
}
template<> EIGEN_STRONG_INLINE void pstore1<Packet8i>(int* to, const int& a)
{
Packet8i pa = pset1<Packet8i>(a);
pstore(to, pa);
}
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE float pfirst<Packet8f>(const Packet8f& a) {
return _mm_cvtss_f32(_mm256_castps256_ps128(a));
}
template<> EIGEN_STRONG_INLINE double pfirst<Packet4d>(const Packet4d& a) {
return _mm_cvtsd_f64(_mm256_castpd256_pd128(a));
}
template<> EIGEN_STRONG_INLINE int pfirst<Packet8i>(const Packet8i& a) {
return _mm_cvtsi128_si32(_mm256_castsi256_si128(a));
}
template<> EIGEN_STRONG_INLINE Packet8f preverse(const Packet8f& a)
{
__m256 tmp = _mm256_shuffle_ps(a,a,0x1b);
return _mm256_permute2f128_ps(tmp, tmp, 1);
}
template<> EIGEN_STRONG_INLINE Packet4d preverse(const Packet4d& a)
{
__m256d tmp = _mm256_shuffle_pd(a,a,5);
return _mm256_permute2f128_pd(tmp, tmp, 1);
__m256d swap_halves = _mm256_permute2f128_pd(a,a,1);
return _mm256_permute_pd(swap_halves,5);
}
// pabs should be ok
template<> EIGEN_STRONG_INLINE Packet8f pabs(const Packet8f& a)
{
const Packet8f mask = _mm256_castsi256_ps(_mm256_setr_epi32(0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF));
return _mm256_and_ps(a,mask);
}
template<> EIGEN_STRONG_INLINE Packet4d pabs(const Packet4d& a)
{
const Packet4d mask = _mm256_castsi256_pd(_mm256_setr_epi32(0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF));
return _mm256_and_pd(a,mask);
}
// preduxp should be ok
// FIXME: why is this ok? why isn't the simply implementation working as expected?
template<> EIGEN_STRONG_INLINE Packet8f preduxp<Packet8f>(const Packet8f* vecs)
{
__m256 hsum1 = _mm256_hadd_ps(vecs[0], vecs[1]);
__m256 hsum2 = _mm256_hadd_ps(vecs[2], vecs[3]);
__m256 hsum3 = _mm256_hadd_ps(vecs[4], vecs[5]);
__m256 hsum4 = _mm256_hadd_ps(vecs[6], vecs[7]);
__m256 hsum5 = _mm256_hadd_ps(hsum1, hsum1);
__m256 hsum6 = _mm256_hadd_ps(hsum2, hsum2);
__m256 hsum7 = _mm256_hadd_ps(hsum3, hsum3);
__m256 hsum8 = _mm256_hadd_ps(hsum4, hsum4);
__m256 perm1 = _mm256_permute2f128_ps(hsum5, hsum5, 0x23);
__m256 perm2 = _mm256_permute2f128_ps(hsum6, hsum6, 0x23);
__m256 perm3 = _mm256_permute2f128_ps(hsum7, hsum7, 0x23);
__m256 perm4 = _mm256_permute2f128_ps(hsum8, hsum8, 0x23);
__m256 sum1 = _mm256_add_ps(perm1, hsum5);
__m256 sum2 = _mm256_add_ps(perm2, hsum6);
__m256 sum3 = _mm256_add_ps(perm3, hsum7);
__m256 sum4 = _mm256_add_ps(perm4, hsum8);
__m256 blend1 = _mm256_blend_ps(sum1, sum2, 0xcc);
__m256 blend2 = _mm256_blend_ps(sum3, sum4, 0xcc);
__m256 final = _mm256_blend_ps(blend1, blend2, 0xf0);
return final;
}
template<> EIGEN_STRONG_INLINE Packet4d preduxp<Packet4d>(const Packet4d* vecs)
{
Packet4d tmp0, tmp1;
tmp0 = _mm256_hadd_pd(vecs[0], vecs[1]);
tmp0 = _mm256_add_pd(tmp0, _mm256_permute2f128_pd(tmp0, tmp0, 1));
tmp1 = _mm256_hadd_pd(vecs[2], vecs[3]);
tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1));
return _mm256_blend_pd(tmp0, tmp1, 0xC);
}
template<> EIGEN_STRONG_INLINE float predux<Packet8f>(const Packet8f& a)
{
Packet8f tmp0 = _mm256_hadd_ps(a,_mm256_permute2f128_ps(a,a,1));
tmp0 = _mm256_hadd_ps(tmp0,tmp0);
return pfirst(_mm256_hadd_ps(tmp0, tmp0));
}
template<> EIGEN_STRONG_INLINE double predux<Packet4d>(const Packet4d& a)
{
Packet4d tmp0 = _mm256_hadd_pd(a,_mm256_permute2f128_pd(a,a,1));
return pfirst(_mm256_hadd_pd(tmp0,tmp0));
}
template<> EIGEN_STRONG_INLINE Packet4f predux4<Packet8f>(const Packet8f& a)
{
return _mm_add_ps(_mm256_castps256_ps128(a),_mm256_extractf128_ps(a,1));
}
template<> EIGEN_STRONG_INLINE float predux_mul<Packet8f>(const Packet8f& a)
{
Packet8f tmp;
tmp = _mm256_mul_ps(a, _mm256_permute2f128_ps(a,a,1));
tmp = _mm256_mul_ps(tmp, _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(1,0,3,2)));
return pfirst(_mm256_mul_ps(tmp, _mm256_shuffle_ps(tmp,tmp,1)));
}
template<> EIGEN_STRONG_INLINE double predux_mul<Packet4d>(const Packet4d& a)
{
Packet4d tmp;
tmp = _mm256_mul_pd(a, _mm256_permute2f128_pd(a,a,1));
return pfirst(_mm256_mul_pd(tmp, _mm256_shuffle_pd(tmp,tmp,1)));
}
template<> EIGEN_STRONG_INLINE float predux_min<Packet8f>(const Packet8f& a)
{
Packet8f tmp = _mm256_min_ps(a, _mm256_permute2f128_ps(a,a,1));
tmp = _mm256_min_ps(tmp, _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(1,0,3,2)));
return pfirst(_mm256_min_ps(tmp, _mm256_shuffle_ps(tmp,tmp,1)));
}
template<> EIGEN_STRONG_INLINE double predux_min<Packet4d>(const Packet4d& a)
{
Packet4d tmp = _mm256_min_pd(a, _mm256_permute2f128_pd(a,a,1));
return pfirst(_mm256_min_pd(tmp, _mm256_shuffle_pd(tmp, tmp, 1)));
}
template<> EIGEN_STRONG_INLINE float predux_max<Packet8f>(const Packet8f& a)
{
Packet8f tmp = _mm256_max_ps(a, _mm256_permute2f128_ps(a,a,1));
tmp = _mm256_max_ps(tmp, _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(1,0,3,2)));
return pfirst(_mm256_max_ps(tmp, _mm256_shuffle_ps(tmp,tmp,1)));
}
template<> EIGEN_STRONG_INLINE double predux_max<Packet4d>(const Packet4d& a)
{
Packet4d tmp = _mm256_max_pd(a, _mm256_permute2f128_pd(a,a,1));
return pfirst(_mm256_max_pd(tmp, _mm256_shuffle_pd(tmp, tmp, 1)));
}
template<int Offset>
struct palign_impl<Offset,Packet8f>
{
static EIGEN_STRONG_INLINE void run(Packet8f& first, const Packet8f& second)
{
if (Offset==1)
{
first = _mm256_blend_ps(first, second, 1);
Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(0,3,2,1));
first = _mm256_blend_ps(tmp, _mm256_permute2f128_ps (tmp, tmp, 1), 0x88);
}
else if (Offset==2)
{
first = _mm256_blend_ps(first, second, 3);
Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(1,0,3,2));
first = _mm256_blend_ps(tmp, _mm256_permute2f128_ps (tmp, tmp, 1), 0xcc);
}
else if (Offset==3)
{
first = _mm256_blend_ps(first, second, 7);
Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(2,1,0,3));
first = _mm256_blend_ps(tmp, _mm256_permute2f128_ps (tmp, tmp, 1), 0xee);
}
else if (Offset==4)
{
first = _mm256_blend_ps(first, second, 15);
Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(3,2,1,0));
first = _mm256_permute_ps(_mm256_permute2f128_ps (tmp, tmp, 1), _MM_SHUFFLE(3,2,1,0));
}
else if (Offset==5)
{
first = _mm256_blend_ps(first, second, 31);
first = _mm256_permute2f128_ps(first, first, 1);
Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(0,3,2,1));
first = _mm256_permute2f128_ps(tmp, tmp, 1);
first = _mm256_blend_ps(tmp, first, 0x88);
}
else if (Offset==6)
{
first = _mm256_blend_ps(first, second, 63);
first = _mm256_permute2f128_ps(first, first, 1);
Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(1,0,3,2));
first = _mm256_permute2f128_ps(tmp, tmp, 1);
first = _mm256_blend_ps(tmp, first, 0xcc);
}
else if (Offset==7)
{
first = _mm256_blend_ps(first, second, 127);
first = _mm256_permute2f128_ps(first, first, 1);
Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(2,1,0,3));
first = _mm256_permute2f128_ps(tmp, tmp, 1);
first = _mm256_blend_ps(tmp, first, 0xee);
}
}
};
template<int Offset>
struct palign_impl<Offset,Packet4d>
{
static EIGEN_STRONG_INLINE void run(Packet4d& first, const Packet4d& second)
{
if (Offset==1)
{
first = _mm256_blend_pd(first, second, 1);
__m256d tmp = _mm256_permute_pd(first, 5);
first = _mm256_permute2f128_pd(tmp, tmp, 1);
first = _mm256_blend_pd(tmp, first, 0xA);
}
else if (Offset==2)
{
first = _mm256_blend_pd(first, second, 3);
first = _mm256_permute2f128_pd(first, first, 1);
}
else if (Offset==3)
{
first = _mm256_blend_pd(first, second, 7);
__m256d tmp = _mm256_permute_pd(first, 5);
first = _mm256_permute2f128_pd(tmp, tmp, 1);
first = _mm256_blend_pd(tmp, first, 5);
}
}
};
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<Packet8f,8>& kernel) {
__m256 T0 = _mm256_unpacklo_ps(kernel.packet[0], kernel.packet[1]);
__m256 T1 = _mm256_unpackhi_ps(kernel.packet[0], kernel.packet[1]);
__m256 T2 = _mm256_unpacklo_ps(kernel.packet[2], kernel.packet[3]);
__m256 T3 = _mm256_unpackhi_ps(kernel.packet[2], kernel.packet[3]);
__m256 T4 = _mm256_unpacklo_ps(kernel.packet[4], kernel.packet[5]);
__m256 T5 = _mm256_unpackhi_ps(kernel.packet[4], kernel.packet[5]);
__m256 T6 = _mm256_unpacklo_ps(kernel.packet[6], kernel.packet[7]);
__m256 T7 = _mm256_unpackhi_ps(kernel.packet[6], kernel.packet[7]);
__m256 S0 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(1,0,1,0));
__m256 S1 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(3,2,3,2));
__m256 S2 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(1,0,1,0));
__m256 S3 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(3,2,3,2));
__m256 S4 = _mm256_shuffle_ps(T4,T6,_MM_SHUFFLE(1,0,1,0));
__m256 S5 = _mm256_shuffle_ps(T4,T6,_MM_SHUFFLE(3,2,3,2));
__m256 S6 = _mm256_shuffle_ps(T5,T7,_MM_SHUFFLE(1,0,1,0));
__m256 S7 = _mm256_shuffle_ps(T5,T7,_MM_SHUFFLE(3,2,3,2));
kernel.packet[0] = _mm256_permute2f128_ps(S0, S4, 0x20);
kernel.packet[1] = _mm256_permute2f128_ps(S1, S5, 0x20);
kernel.packet[2] = _mm256_permute2f128_ps(S2, S6, 0x20);
kernel.packet[3] = _mm256_permute2f128_ps(S3, S7, 0x20);
kernel.packet[4] = _mm256_permute2f128_ps(S0, S4, 0x31);
kernel.packet[5] = _mm256_permute2f128_ps(S1, S5, 0x31);
kernel.packet[6] = _mm256_permute2f128_ps(S2, S6, 0x31);
kernel.packet[7] = _mm256_permute2f128_ps(S3, S7, 0x31);
}
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<Packet8f,4>& kernel) {
__m256 T0 = _mm256_unpacklo_ps(kernel.packet[0], kernel.packet[1]);
__m256 T1 = _mm256_unpackhi_ps(kernel.packet[0], kernel.packet[1]);
__m256 T2 = _mm256_unpacklo_ps(kernel.packet[2], kernel.packet[3]);
__m256 T3 = _mm256_unpackhi_ps(kernel.packet[2], kernel.packet[3]);
__m256 S0 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(1,0,1,0));
__m256 S1 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(3,2,3,2));
__m256 S2 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(1,0,1,0));
__m256 S3 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(3,2,3,2));
kernel.packet[0] = _mm256_permute2f128_ps(S0, S1, 0x20);
kernel.packet[1] = _mm256_permute2f128_ps(S2, S3, 0x20);
kernel.packet[2] = _mm256_permute2f128_ps(S0, S1, 0x31);
kernel.packet[3] = _mm256_permute2f128_ps(S2, S3, 0x31);
}
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<Packet4d,4>& kernel) {
__m256d T0 = _mm256_shuffle_pd(kernel.packet[0], kernel.packet[1], 15);
__m256d T1 = _mm256_shuffle_pd(kernel.packet[0], kernel.packet[1], 0);
__m256d T2 = _mm256_shuffle_pd(kernel.packet[2], kernel.packet[3], 15);
__m256d T3 = _mm256_shuffle_pd(kernel.packet[2], kernel.packet[3], 0);
kernel.packet[1] = _mm256_permute2f128_pd(T0, T2, 32);
kernel.packet[3] = _mm256_permute2f128_pd(T0, T2, 49);
kernel.packet[0] = _mm256_permute2f128_pd(T1, T3, 32);
kernel.packet[2] = _mm256_permute2f128_pd(T1, T3, 49);
}
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_PACKET_MATH_AVX_H

View File

@@ -16,14 +16,11 @@ namespace internal {
static Packet4ui p4ui_CONJ_XOR = vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_ZERO_);//{ 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
static Packet16uc p16uc_COMPLEX_RE = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };
static Packet16uc p16uc_COMPLEX_IM = vec_sld(p16uc_DUPLICATE, (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 };
static Packet16uc p16uc_COMPLEX_IM = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 1), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 };
static Packet16uc p16uc_COMPLEX_REV = vec_sld(p16uc_REVERSE, p16uc_REVERSE, 8);//{ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11 };
static Packet16uc p16uc_COMPLEX_REV2 = vec_sld(p16uc_FORWARD, p16uc_FORWARD, 8);//{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
static Packet16uc p16uc_PSET_HI = (Packet16uc) vec_mergeh((Packet4ui)p16uc_COMPLEX_RE, (Packet4ui)p16uc_COMPLEX_IM);//{ 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 };
static Packet16uc p16uc_PSET_LO = (Packet16uc) vec_mergel((Packet4ui)p16uc_COMPLEX_RE, (Packet4ui)p16uc_COMPLEX_IM);//{ 8,9,10,11, 12,13,14,15, 8,9,10,11, 12,13,14,15 };
static Packet16uc p16uc_COMPLEX_MASK16 = vec_sld((Packet16uc)p4i_ZERO, vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 3), 8);//{ 0,0,0,0, 0,0,0,0, 16,16,16,16, 16,16,16,16};
static Packet16uc p16uc_COMPLEX_TRANSPOSE_0 = vec_add(p16uc_PSET_HI, p16uc_COMPLEX_MASK16);//{ 0,1,2,3, 4,5,6,7, 16,17,18,19, 20,21,22,23};
static Packet16uc p16uc_COMPLEX_TRANSPOSE_1 = vec_add(p16uc_PSET_LO, p16uc_COMPLEX_MASK16);//{ 8,9,10,11, 12,13,14,15, 24,25,26,27, 28,29,30,31};
static Packet16uc p16uc_PSET_HI = (Packet16uc) vec_mergeh((Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 1));//{ 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 };
static Packet16uc p16uc_PSET_LO = (Packet16uc) vec_mergeh((Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 2), (Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 3));//{ 8,9,10,11, 12,13,14,15, 8,9,10,11, 12,13,14,15 };
//---------- float ----------
struct Packet2cf
@@ -36,7 +33,6 @@ struct Packet2cf
template<> struct packet_traits<std::complex<float> > : default_packet_traits
{
typedef Packet2cf type;
typedef Packet2cf half;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
@@ -55,7 +51,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
};
};
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; typedef Packet2cf half; };
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; };
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
{
@@ -69,22 +65,6 @@ template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<flo
return res;
}
template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, DenseIndex stride)
{
std::complex<float> EIGEN_ALIGN16 af[2];
af[0] = from[0*stride];
af[1] = from[1*stride];
return Packet2cf(vec_ld(0, (const float*)af));
}
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, DenseIndex stride)
{
std::complex<float> EIGEN_ALIGN16 af[2];
vec_st(from.v, 0, (float*)af);
to[0*stride] = af[0];
to[1*stride] = af[1];
}
template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_add(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_sub(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(a.v)); }
@@ -230,13 +210,6 @@ template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& x
return Packet2cf(vec_perm(x.v, x.v, p16uc_COMPLEX_REV));
}
EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel)
{
Packet4f tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_COMPLEX_TRANSPOSE_0);
kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_COMPLEX_TRANSPOSE_1);
kernel.packet[0].v = tmp;
}
} // end namespace internal
} // end namespace Eigen

132
Eigen/src/Core/arch/AltiVec/PacketMath.h Executable file → Normal file
View File

@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2008-2014 Konstantinos Margaritis <markos@freevec.org>
// Copyright (C) 2008 Konstantinos Margaritis <markos@codex.gr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -18,10 +18,6 @@ namespace internal {
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 4
#endif
#ifndef EIGEN_HAS_FUSED_MADD
#define EIGEN_HAS_FUSED_MADD 1
#endif
#ifndef EIGEN_HAS_FUSE_CJMADD
#define EIGEN_HAS_FUSE_CJMADD 1
#endif
@@ -60,32 +56,29 @@ typedef __vector unsigned char Packet16uc;
#define DST_CTRL(size, count, stride) (((size) << 24) | ((count) << 16) | (stride))
// Define global static constants:
static Packet4f p4f_COUNTDOWN = { 0.0, 1.0, 2.0, 3.0 };
static Packet4i p4i_COUNTDOWN = { 0, 1, 2, 3 };
static Packet16uc p16uc_REVERSE = { 12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3};
static Packet16uc p16uc_FORWARD = vec_lvsl(0, (float*)0); //{ 0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15}
static Packet16uc p16uc_DUPLICATE = { 0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7};
static Packet4f p4f_COUNTDOWN = { 3.0, 2.0, 1.0, 0.0 };
static Packet4i p4i_COUNTDOWN = { 3, 2, 1, 0 };
static Packet16uc p16uc_REVERSE = {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3};
static Packet16uc p16uc_FORWARD = vec_lvsl(0, (float*)0);
static Packet16uc p16uc_DUPLICATE = {0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7};
static _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0); //{ 0.0, 0.0, 0.0, 0.0}
static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0); //{ 0, 0, 0, 0,}
static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE,1); //{ 1, 1, 1, 1}
static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS16,-16); //{ -16, -16, -16, -16}
static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS1,-1); //{ -1, -1, -1, -1}
static Packet4f p4f_ONE = vec_ctf(p4i_ONE, 0); //{ 1.0, 1.0, 1.0, 1.0}
static Packet4f p4f_ZERO_ = (Packet4f) vec_sl((Packet4ui)p4i_MINUS1, (Packet4ui)p4i_MINUS1); //{ 0x80000000, 0x80000000, 0x80000000, 0x80000000}
static _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0);
static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0);
static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE,1);
static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS16,-16);
static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS1,-1);
static Packet4f p4f_ONE = vec_ctf(p4i_ONE, 0);
static Packet4f p4f_ZERO_ = (Packet4f) vec_sl((Packet4ui)p4i_MINUS1, (Packet4ui)p4i_MINUS1);
template<> struct packet_traits<float> : default_packet_traits
{
typedef Packet4f type;
typedef Packet4f half;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size=4,
HasHalfPacket=0,
// FIXME check the Has*
HasDiv = 1,
HasSin = 0,
HasCos = 0,
HasLog = 0,
@@ -96,7 +89,6 @@ template<> struct packet_traits<float> : default_packet_traits
template<> struct packet_traits<int> : default_packet_traits
{
typedef Packet4i type;
typedef Packet4i half;
enum {
// FIXME check the Has*
Vectorizable = 1,
@@ -105,8 +97,8 @@ template<> struct packet_traits<int> : default_packet_traits
};
};
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}; typedef Packet4f half; };
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; typedef Packet4i half; };
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}; };
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; };
/*
inline std::ostream & operator <<(std::ostream & s, const Packet4f & v)
{
@@ -152,7 +144,6 @@ inline std::ostream & operator <<(std::ostream & s, const Packetbi & v)
return s;
}
*/
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) {
// Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
float EIGEN_ALIGN16 af[4];
@@ -170,65 +161,6 @@ template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) {
return vc;
}
template<> EIGEN_STRONG_INLINE void
pbroadcast4<Packet4f>(const float *a,
Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
{
a3 = vec_ld(0,a);
a0 = vec_splat(a3, 0);
a1 = vec_splat(a3, 1);
a2 = vec_splat(a3, 2);
a3 = vec_splat(a3, 3);
}
template<> EIGEN_STRONG_INLINE void
pbroadcast4<Packet4i>(const int *a,
Packet4i& a0, Packet4i& a1, Packet4i& a2, Packet4i& a3)
{
a3 = vec_ld(0,a);
a0 = vec_splat(a3, 0);
a1 = vec_splat(a3, 1);
a2 = vec_splat(a3, 2);
a3 = vec_splat(a3, 3);
}
template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, DenseIndex stride)
{
float EIGEN_ALIGN16 af[4];
af[0] = from[0*stride];
af[1] = from[1*stride];
af[2] = from[2*stride];
af[3] = from[3*stride];
return vec_ld(0, af);
}
template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, DenseIndex stride)
{
int EIGEN_ALIGN16 ai[4];
ai[0] = from[0*stride];
ai[1] = from[1*stride];
ai[2] = from[2*stride];
ai[3] = from[3*stride];
return vec_ld(0, ai);
}
template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, DenseIndex stride)
{
float EIGEN_ALIGN16 af[4];
vec_st(from, 0, af);
to[0*stride] = af[0];
to[1*stride] = af[1];
to[2*stride] = af[2];
to[3*stride] = af[3];
}
template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, DenseIndex stride)
{
int EIGEN_ALIGN16 ai[4];
vec_st(from, 0, ai);
to[0*stride] = ai[0];
to[1*stride] = ai[1];
to[2*stride] = ai[2];
to[3*stride] = ai[3];
}
template<> EIGEN_STRONG_INLINE Packet4f plset<float>(const float& a) { return vec_add(pset1<Packet4f>(a), p4f_COUNTDOWN); }
template<> EIGEN_STRONG_INLINE Packet4i plset<int>(const int& a) { return vec_add(pset1<Packet4i>(a), p4i_COUNTDOWN); }
@@ -354,15 +286,15 @@ template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
{
Packet4f p;
if((ptrdiff_t(from) % 16) == 0) p = pload<Packet4f>(from);
else p = ploadu<Packet4f>(from);
if((ptrdiff_t(&from) % 16) == 0) p = pload<Packet4f>(from);
else p = ploadu<Packet4f>(from);
return vec_perm(p, p, p16uc_DUPLICATE);
}
template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
{
Packet4i p;
if((ptrdiff_t(from) % 16) == 0) p = pload<Packet4i>(from);
else p = ploadu<Packet4i>(from);
if((ptrdiff_t(&from) % 16) == 0) p = pload<Packet4i>(from);
else p = ploadu<Packet4i>(from);
return vec_perm(p, p, p16uc_DUPLICATE);
}
@@ -562,32 +494,6 @@ struct palign_impl<Offset,Packet4i>
}
};
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<Packet4f,4>& kernel) {
Packet4f t0, t1, t2, t3;
t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);
t1 = vec_mergel(kernel.packet[0], kernel.packet[2]);
t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]);
t3 = vec_mergel(kernel.packet[1], kernel.packet[3]);
kernel.packet[0] = vec_mergeh(t0, t2);
kernel.packet[1] = vec_mergel(t0, t2);
kernel.packet[2] = vec_mergeh(t1, t3);
kernel.packet[3] = vec_mergel(t1, t3);
}
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<Packet4i,4>& kernel) {
Packet4i t0, t1, t2, t3;
t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);
t1 = vec_mergel(kernel.packet[0], kernel.packet[2]);
t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]);
t3 = vec_mergel(kernel.packet[1], kernel.packet[3]);
kernel.packet[0] = vec_mergeh(t0, t2);
kernel.packet[1] = vec_mergel(t0, t2);
kernel.packet[2] = vec_mergeh(t1, t3);
kernel.packet[3] = vec_mergel(t1, t3);
}
} // end namespace internal
} // end namespace Eigen

View File

@@ -1,5 +1,4 @@
ADD_SUBDIRECTORY(SSE)
ADD_SUBDIRECTORY(AltiVec)
ADD_SUBDIRECTORY(NEON)
ADD_SUBDIRECTORY(AVX)
ADD_SUBDIRECTORY(Default)

View File

@@ -28,7 +28,6 @@ struct Packet2cf
template<> struct packet_traits<std::complex<float> > : default_packet_traits
{
typedef Packet2cf type;
typedef Packet2cf half;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
@@ -47,7 +46,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
};
};
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; typedef Packet2cf half; };
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; };
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
{
@@ -111,22 +110,6 @@ template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, DenseIndex stride)
{
Packet4f res;
res = vsetq_lane_f32(std::real(from[0*stride]), res, 0);
res = vsetq_lane_f32(std::imag(from[0*stride]), res, 1);
res = vsetq_lane_f32(std::real(from[1*stride]), res, 2);
res = vsetq_lane_f32(std::imag(from[1*stride]), res, 3);
return Packet2cf(res);
}
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, DenseIndex stride)
{
to[stride*0] = std::complex<float>(vgetq_lane_f32(from.v, 0), vgetq_lane_f32(from.v, 1));
to[stride*1] = std::complex<float>(vgetq_lane_f32(from.v, 2), vgetq_lane_f32(from.v, 3));
}
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_ARM_PREFETCH((float *)addr); }
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
@@ -263,14 +246,6 @@ template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, con
return Packet2cf(pdiv(res.v, vaddq_f32(s,rev_s)));
}
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<Packet2cf,2>& kernel) {
float32x4_t tmp = vcombine_f32(vget_high_f32(kernel.packet[0].v), vget_high_f32(kernel.packet[1].v));
kernel.packet[0].v = vcombine_f32(vget_low_f32(kernel.packet[0].v), vget_low_f32(kernel.packet[1].v));
kernel.packet[1].v = tmp;
}
} // end namespace internal
} // end namespace Eigen

View File

@@ -49,7 +49,6 @@ typedef uint32x4_t Packet4ui;
#define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {X, Y, Z, W}
#endif
// arm64 does have the pld instruction. If available, let's trust the __builtin_prefetch built-in function
// which available on LLVM and GCC (at least)
#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || defined(__GNUC__)
@@ -66,7 +65,6 @@ typedef uint32x4_t Packet4ui;
template<> struct packet_traits<float> : default_packet_traits
{
typedef Packet4f type;
typedef Packet4f half;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
@@ -84,7 +82,6 @@ template<> struct packet_traits<float> : default_packet_traits
template<> struct packet_traits<int> : default_packet_traits
{
typedef Packet4i type;
typedef Packet4i half;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
@@ -97,13 +94,12 @@ template<> struct packet_traits<int> : default_packet_traits
// workaround gcc 4.2, 4.3 and 4.4 compilatin issue
EIGEN_STRONG_INLINE float32x4_t vld1q_f32(const float* x) { return ::vld1q_f32((const float32_t*)x); }
EIGEN_STRONG_INLINE float32x2_t vld1_f32 (const float* x) { return ::vld1_f32 ((const float32_t*)x); }
EIGEN_STRONG_INLINE float32x2_t vld1_dup_f32 (const float* x) { return ::vld1_dup_f32 ((const float32_t*)x); }
EIGEN_STRONG_INLINE void vst1q_f32(float* to, float32x4_t from) { ::vst1q_f32((float32_t*)to,from); }
EIGEN_STRONG_INLINE void vst1_f32 (float* to, float32x2_t from) { ::vst1_f32 ((float32_t*)to,from); }
#endif
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}; typedef Packet4f half; };
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; typedef Packet4i half; };
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}; };
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; };
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return vdupq_n_f32(from); }
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return vdupq_n_s32(from); }
@@ -222,40 +218,6 @@ template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& f
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f32(to, from); }
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_s32(to, from); }
template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, DenseIndex stride)
{
Packet4f res;
res = vsetq_lane_f32(from[0*stride], res, 0);
res = vsetq_lane_f32(from[1*stride], res, 1);
res = vsetq_lane_f32(from[2*stride], res, 2);
res = vsetq_lane_f32(from[3*stride], res, 3);
return res;
}
template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, DenseIndex stride)
{
Packet4i res;
res = vsetq_lane_s32(from[0*stride], res, 0);
res = vsetq_lane_s32(from[1*stride], res, 1);
res = vsetq_lane_s32(from[2*stride], res, 2);
res = vsetq_lane_s32(from[3*stride], res, 3);
return res;
}
template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, DenseIndex stride)
{
to[stride*0] = vgetq_lane_f32(from, 0);
to[stride*1] = vgetq_lane_f32(from, 1);
to[stride*2] = vgetq_lane_f32(from, 2);
to[stride*3] = vgetq_lane_f32(from, 3);
}
template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, DenseIndex stride)
{
to[stride*0] = vgetq_lane_s32(from, 0);
to[stride*1] = vgetq_lane_s32(from, 1);
to[stride*2] = vgetq_lane_s32(from, 2);
to[stride*3] = vgetq_lane_s32(from, 3);
}
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { EIGEN_ARM_PREFETCH(addr); }
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { EIGEN_ARM_PREFETCH(addr); }
@@ -448,30 +410,9 @@ PALIGN_NEON(0,Packet4i,vextq_s32)
PALIGN_NEON(1,Packet4i,vextq_s32)
PALIGN_NEON(2,Packet4i,vextq_s32)
PALIGN_NEON(3,Packet4i,vextq_s32)
#undef PALIGN_NEON
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<Packet4f,4>& kernel) {
float32x4x2_t tmp1 = vzipq_f32(kernel.packet[0], kernel.packet[1]);
float32x4x2_t tmp2 = vzipq_f32(kernel.packet[2], kernel.packet[3]);
kernel.packet[0] = vcombine_f32(vget_low_f32(tmp1.val[0]), vget_low_f32(tmp2.val[0]));
kernel.packet[1] = vcombine_f32(vget_high_f32(tmp1.val[0]), vget_high_f32(tmp2.val[0]));
kernel.packet[2] = vcombine_f32(vget_low_f32(tmp1.val[1]), vget_low_f32(tmp2.val[1]));
kernel.packet[3] = vcombine_f32(vget_high_f32(tmp1.val[1]), vget_high_f32(tmp2.val[1]));
}
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<Packet4i,4>& kernel) {
int32x4x2_t tmp1 = vzipq_s32(kernel.packet[0], kernel.packet[1]);
int32x4x2_t tmp2 = vzipq_s32(kernel.packet[2], kernel.packet[3]);
kernel.packet[0] = vcombine_s32(vget_low_s32(tmp1.val[0]), vget_low_s32(tmp2.val[0]));
kernel.packet[1] = vcombine_s32(vget_high_s32(tmp1.val[0]), vget_high_s32(tmp2.val[0]));
kernel.packet[2] = vcombine_s32(vget_low_s32(tmp1.val[1]), vget_low_s32(tmp2.val[1]));
kernel.packet[3] = vcombine_s32(vget_high_s32(tmp1.val[1]), vget_high_s32(tmp2.val[1]));
}
} // end namespace internal
} // end namespace Eigen

View File

@@ -22,18 +22,13 @@ struct Packet2cf
__m128 v;
};
// Use the packet_traits defined in AVX/PacketMath.h instead if we're going
// to leverage AVX instructions.
#ifndef EIGEN_VECTORIZE_AVX
template<> struct packet_traits<std::complex<float> > : default_packet_traits
{
typedef Packet2cf type;
typedef Packet2cf half;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size = 2,
HasHalfPacket = 0,
HasAdd = 1,
HasSub = 1,
@@ -47,9 +42,8 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
HasSetLinear = 0
};
};
#endif
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; typedef Packet2cf half; };
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; };
template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_add_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_sub_ps(a.v,b.v)); }
@@ -110,23 +104,8 @@ template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<flo
template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), Packet4f(from.v)); }
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), Packet4f(from.v)); }
template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, DenseIndex stride)
{
return Packet2cf(_mm_set_ps(std::imag(from[1*stride]), std::real(from[1*stride]),
std::imag(from[0*stride]), std::real(from[0*stride])));
}
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, DenseIndex stride)
{
to[stride*0] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 0)),
_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 1)));
to[stride*1] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 2)),
_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 3)));
}
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), from.v); }
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), from.v); }
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
@@ -145,7 +124,7 @@ template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Pack
#endif
}
template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a) { return Packet2cf(_mm_castpd_ps(preverse(Packet2d(_mm_castps_pd(a.v))))); }
template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a) { return Packet2cf(_mm_castpd_ps(preverse(_mm_castps_pd(a.v)))); }
template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
{
@@ -235,7 +214,7 @@ template<> struct conj_helper<Packet4f, Packet2cf, false,false>
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet4f& x, const Packet2cf& y) const
{ return Packet2cf(Eigen::internal::pmul<Packet4f>(x, y.v)); }
{ return Packet2cf(Eigen::internal::pmul(x, y.v)); }
};
template<> struct conj_helper<Packet2cf, Packet4f, false,false>
@@ -244,7 +223,7 @@ template<> struct conj_helper<Packet2cf, Packet4f, false,false>
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& x, const Packet4f& y) const
{ return Packet2cf(Eigen::internal::pmul<Packet4f>(x.v, y)); }
{ return Packet2cf(Eigen::internal::pmul(x.v, y)); }
};
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
@@ -269,18 +248,13 @@ struct Packet1cd
__m128d v;
};
// Use the packet_traits defined in AVX/PacketMath.h instead if we're going
// to leverage AVX instructions.
#ifndef EIGEN_VECTORIZE_AVX
template<> struct packet_traits<std::complex<double> > : default_packet_traits
{
typedef Packet1cd type;
typedef Packet1cd half;
enum {
Vectorizable = 1,
AlignedOnScalar = 0,
size = 1,
HasHalfPacket = 0,
HasAdd = 1,
HasSub = 1,
@@ -294,13 +268,12 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
HasSetLinear = 0
};
};
#endif
template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1}; typedef Packet1cd half; };
template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1}; };
template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_add_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_sub_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); }
template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(a.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a)
{
const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
@@ -338,8 +311,8 @@ template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<dou
template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }
// FIXME force unaligned store, this is a temporary fix
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, Packet2d(from.v)); }
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, Packet2d(from.v)); }
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
@@ -437,7 +410,7 @@ template<> struct conj_helper<Packet2d, Packet1cd, false,false>
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet2d& x, const Packet1cd& y) const
{ return Packet1cd(Eigen::internal::pmul<Packet2d>(x, y.v)); }
{ return Packet1cd(Eigen::internal::pmul(x, y.v)); }
};
template<> struct conj_helper<Packet1cd, Packet2d, false,false>
@@ -446,7 +419,7 @@ template<> struct conj_helper<Packet1cd, Packet2d, false,false>
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& x, const Packet2d& y) const
{ return Packet1cd(Eigen::internal::pmul<Packet2d>(x.v, y)); }
{ return Packet1cd(Eigen::internal::pmul(x.v, y)); }
};
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
@@ -459,17 +432,7 @@ template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, con
EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
{
return Packet1cd(preverse(Packet2d(x.v)));
}
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<Packet2cf,2>& kernel) {
__m128d w1 = _mm_castps_pd(kernel.packet[0].v);
__m128d w2 = _mm_castps_pd(kernel.packet[1].v);
__m128 tmp = _mm_castpd_ps(_mm_unpackhi_pd(w1, w2));
kernel.packet[0].v = _mm_castpd_ps(_mm_unpacklo_pd(w1, w2));
kernel.packet[1].v = tmp;
return Packet1cd(preverse(x.v));
}
} // end namespace internal

View File

@@ -52,7 +52,7 @@ Packet4f plog<Packet4f>(const Packet4f& _x)
Packet4i emm0;
Packet4f invalid_mask = _mm_cmplt_ps(x, _mm_setzero_ps());
Packet4f invalid_mask = _mm_cmpnge_ps(x, _mm_setzero_ps()); // not greater equal is true if x is NaN
Packet4f iszero_mask = _mm_cmpeq_ps(x, _mm_setzero_ps());
x = pmax(x, p4f_min_norm_pos); /* cut off denormalized stuff */
@@ -63,7 +63,7 @@ Packet4f plog<Packet4f>(const Packet4f& _x)
x = _mm_or_ps(x, p4f_half);
emm0 = _mm_sub_epi32(emm0, p4i_0x7f);
Packet4f e = padd(Packet4f(_mm_cvtepi32_ps(emm0)), p4f_1);
Packet4f e = padd(_mm_cvtepi32_ps(emm0), p4f_1);
/* part2:
if( x < SQRTHF ) {
@@ -72,9 +72,9 @@ Packet4f plog<Packet4f>(const Packet4f& _x)
} else { x = x - 1.0; }
*/
Packet4f mask = _mm_cmplt_ps(x, p4f_cephes_SQRTHF);
Packet4f tmp = pand(x, mask);
Packet4f tmp = _mm_and_ps(x, mask);
x = psub(x, p4f_1);
e = psub(e, pand(p4f_1, mask));
e = psub(e, _mm_and_ps(p4f_1, mask));
x = padd(x, tmp);
Packet4f x2 = pmul(x,x);
@@ -138,7 +138,6 @@ Packet4f pexp<Packet4f>(const Packet4f& _x)
#ifdef EIGEN_VECTORIZE_SSE4_1
fx = _mm_floor_ps(fx);
#else
tmp = _mm_setzero_ps();
emm0 = _mm_cvttps_epi32(fx);
tmp = _mm_cvtepi32_ps(emm0);
/* if greater, substract 1 */
@@ -167,7 +166,7 @@ Packet4f pexp<Packet4f>(const Packet4f& _x)
emm0 = _mm_cvttps_epi32(fx);
emm0 = _mm_add_epi32(emm0, p4i_0x7f);
emm0 = _mm_slli_epi32(emm0, 23);
return pmul(y, Packet4f(_mm_castsi128_ps(emm0)));
return pmax(pmul(y, Packet4f(_mm_castsi128_ps(emm0))), _x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet2d pexp<Packet2d>(const Packet2d& _x)
@@ -207,7 +206,6 @@ Packet2d pexp<Packet2d>(const Packet2d& _x)
#ifdef EIGEN_VECTORIZE_SSE4_1
fx = _mm_floor_pd(fx);
#else
tmp = _mm_setzero_pd();
emm0 = _mm_cvttpd_epi32(fx);
tmp = _mm_cvtepi32_pd(emm0);
/* if greater, substract 1 */
@@ -241,7 +239,7 @@ Packet2d pexp<Packet2d>(const Packet2d& _x)
emm0 = _mm_add_epi32(emm0, p4i_1023_0);
emm0 = _mm_slli_epi32(emm0, 20);
emm0 = _mm_shuffle_epi32(emm0, _MM_SHUFFLE(1,2,0,3));
return pmul(x, Packet2d(_mm_castsi128_pd(emm0)));
return pmax(pmul(x, Packet2d(_mm_castsi128_pd(emm0))), _x);
}
/* evaluation of 4 sines at onces, using SSE2 intrinsics.

215
Eigen/src/Core/arch/SSE/PacketMath.h Executable file → Normal file
View File

@@ -22,41 +22,9 @@ namespace internal {
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
#endif
#ifdef EIGEN_VECTORIZE_FMA
#ifndef EIGEN_HAS_FUSED_MADD
#define EIGEN_HAS_FUSED_MADD 1
#endif
#endif
#if defined EIGEN_VECTORIZE_AVX && defined __GNUC__ && !(defined __clang__ || defined __INTEL_COMPILER)
// With GCC's default ABI version, a __m128 or __m256 are the same types and therefore we cannot
// have overloads for both types without linking error.
// One solution is to increase ABI version using -fabi-version=4 (or greater).
// To workaround this inconvenince, we rather wrap 128bit types into the following helper
// structure:
// TODO disable this wrapper if abi-versio>=4, but to detect that without asking the user to define a macro?
template<typename T>
struct eigen_packet_wrapper
{
EIGEN_ALWAYS_INLINE operator T&() { return m_val; }
EIGEN_ALWAYS_INLINE operator const T&() const { return m_val; }
EIGEN_ALWAYS_INLINE eigen_packet_wrapper() {}
EIGEN_ALWAYS_INLINE eigen_packet_wrapper(const T &v) : m_val(v) {}
EIGEN_ALWAYS_INLINE eigen_packet_wrapper& operator=(const T &v) {
m_val = v;
return *this;
}
T m_val;
};
typedef eigen_packet_wrapper<__m128> Packet4f;
typedef eigen_packet_wrapper<__m128i> Packet4i;
typedef eigen_packet_wrapper<__m128d> Packet2d;
#else
typedef __m128 Packet4f;
typedef __m128i Packet4i;
typedef __m128d Packet2d;
#endif
template<> struct is_arithmetic<__m128> { enum { value = true }; };
template<> struct is_arithmetic<__m128i> { enum { value = true }; };
@@ -90,18 +58,13 @@ template<> struct is_arithmetic<__m128d> { enum { value = true }; };
const Packet4i p4i_##NAME = pset1<Packet4i>(X)
// Use the packet_traits defined in AVX/PacketMath.h instead if we're going
// to leverage AVX instructions.
#ifndef EIGEN_VECTORIZE_AVX
template<> struct packet_traits<float> : default_packet_traits
{
typedef Packet4f type;
typedef Packet4f half;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size=4,
HasHalfPacket = 0,
HasDiv = 1,
HasSin = EIGEN_FAST_MATH,
@@ -114,23 +77,19 @@ template<> struct packet_traits<float> : default_packet_traits
template<> struct packet_traits<double> : default_packet_traits
{
typedef Packet2d type;
typedef Packet2d half;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size=2,
HasHalfPacket = 0,
HasDiv = 1,
HasExp = 1,
HasSqrt = 1
};
};
#endif
template<> struct packet_traits<int> : default_packet_traits
{
typedef Packet4i type;
typedef Packet4i half;
enum {
// FIXME check the Has*
Vectorizable = 1,
@@ -139,9 +98,9 @@ template<> struct packet_traits<int> : default_packet_traits
};
};
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}; typedef Packet4f half; };
template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2}; typedef Packet2d half; };
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; typedef Packet4i half; };
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}; };
template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2}; };
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; };
#if defined(_MSC_VER) && (_MSC_VER==1500)
// Workaround MSVC 9 internal compiler error.
@@ -151,26 +110,13 @@ template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { re
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set_pd(from,from); }
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set_epi32(from,from,from,from); }
#else
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set_ps1(from); }
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set1_ps(from); }
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set1_pd(from); }
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set1_epi32(from); }
#endif
// GCC generates a shufps instruction for _mm_set1_ps/_mm_load1_ps instead of the more efficient pshufd instruction.
// However, using inrinsics for pset1 makes gcc to generate crappy code in some cases (see bug 203)
// Using inline assembly is also not an option because then gcc fails to reorder properly the instructions.
// Therefore, we introduced the pload1 functions to be used in product kernels for which bug 203 does not apply.
// Also note that with AVX, we want it to generate a vbroadcastss.
#if (defined __GNUC__) && (!defined __INTEL_COMPILER) && (!defined __clang__) && (!defined __AVX__)
template<> EIGEN_STRONG_INLINE Packet4f pload1<Packet4f>(const float *from) {
return vec4f_swizzle1(_mm_load_ss(from),0,0,0,0);
}
#endif
#ifndef EIGEN_VECTORIZE_AVX
template<> EIGEN_STRONG_INLINE Packet4f plset<float>(const float& a) { return _mm_add_ps(pset1<Packet4f>(a), _mm_set_ps(3,2,1,0)); }
template<> EIGEN_STRONG_INLINE Packet2d plset<double>(const double& a) { return _mm_add_pd(pset1<Packet2d>(a),_mm_set_pd(1,0)); }
#endif
template<> EIGEN_STRONG_INLINE Packet4i plset<int>(const int& a) { return _mm_add_epi32(pset1<Packet4i>(a),_mm_set_epi32(3,2,1,0)); }
template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_add_ps(a,b); }
@@ -193,7 +139,7 @@ template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a)
}
template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a)
{
return psub(Packet4i(_mm_setr_epi32(0,0,0,0)), a);
return psub(_mm_setr_epi32(0,0,0,0), a);
}
template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }
@@ -227,10 +173,6 @@ template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, co
// for some weird raisons, it has to be overloaded for packet of integers
template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd(pmul(a,b), c); }
#ifdef EIGEN_VECTORIZE_FMA
template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return _mm_fmadd_ps(a,b,c); }
template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return _mm_fmadd_pd(a,b,c); }
#endif
template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_min_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_min_pd(a,b); }
@@ -276,7 +218,7 @@ template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, con
template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_ps(from); }
template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_pd(from); }
template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(reinterpret_cast<const __m128i*>(from)); }
template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(reinterpret_cast<const Packet4i*>(from)); }
#if defined(_MSC_VER)
template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) {
@@ -294,7 +236,7 @@ template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { E
#endif
}
template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_pd(from); }
template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_si128(reinterpret_cast<const __m128i*>(from)); }
template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_si128(reinterpret_cast<const Packet4i*>(from)); }
#else
// Fast unaligned loads. Note that here we cannot directly use intrinsics: this would
// require pointer casting to incompatible pointer types and leads to invalid code
@@ -303,17 +245,14 @@ template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { E
// TODO: do the same for MSVC (ICC is compatible)
// NOTE: with the code below, MSVC's compiler crashes!
#if defined(__GNUC__) && (defined(__i386__) || (defined(__x86_64) && EIGEN_GNUC_AT_LEAST(4, 8)))
#if defined(__GNUC__) && defined(__i386__)
// bug 195: gcc/i386 emits weird x87 fldl/fstpl instructions for _mm_load_sd
#define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 1
#define EIGEN_AVOID_CUSTOM_UNALIGNED_STORES 1
#elif defined(__clang__)
// bug 201: Segfaults in __mm_loadh_pd with clang 2.8
#define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 1
#define EIGEN_AVOID_CUSTOM_UNALIGNED_STORES 0
#else
#define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 0
#define EIGEN_AVOID_CUSTOM_UNALIGNED_STORES 0
#endif
template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
@@ -344,7 +283,7 @@ template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
{
EIGEN_DEBUG_UNALIGNED_LOAD
#if EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS
return _mm_loadu_si128(reinterpret_cast<const __m128i*>(from));
return _mm_loadu_si128(reinterpret_cast<const Packet4i*>(from));
#else
__m128d res;
res = _mm_load_sd((const double*)(from)) ;
@@ -363,77 +302,38 @@ template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
{
Packet4i tmp;
tmp = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(from));
tmp = _mm_loadl_epi64(reinterpret_cast<const Packet4i*>(from));
return vec4i_swizzle1(tmp, 0, 0, 1, 1);
}
template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_ps(to, from); }
template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_pd(to, from); }
template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<__m128i*>(to), from); }
template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<Packet4i*>(to), from); }
template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) {
EIGEN_DEBUG_UNALIGNED_STORE
#if EIGEN_AVOID_CUSTOM_UNALIGNED_STORES
_mm_storeu_pd(to, from);
#else
_mm_storel_pd((to), from);
_mm_storeh_pd((to+1), from);
#endif
}
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<double*>(to), Packet2d(_mm_castps_pd(from))); }
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<double*>(to), Packet2d(_mm_castsi128_pd(from))); }
template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, DenseIndex stride)
{
return _mm_set_ps(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
}
template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, DenseIndex stride)
{
return _mm_set_pd(from[1*stride], from[0*stride]);
}
template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, DenseIndex stride)
{
return _mm_set_epi32(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
}
template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, DenseIndex stride)
{
to[stride*0] = _mm_cvtss_f32(from);
to[stride*1] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 1));
to[stride*2] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 2));
to[stride*3] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 3));
}
template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, DenseIndex stride)
{
to[stride*0] = _mm_cvtsd_f64(from);
to[stride*1] = _mm_cvtsd_f64(_mm_shuffle_pd(from, from, 1));
}
template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, DenseIndex stride)
{
to[stride*0] = _mm_cvtsi128_si32(from);
to[stride*1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 1));
to[stride*2] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 2));
to[stride*3] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 3));
}
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<double*>(to), _mm_castps_pd(from)); }
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<double*>(to), _mm_castsi128_pd(from)); }
// some compilers might be tempted to perform multiple moves instead of using a vector path.
template<> EIGEN_STRONG_INLINE void pstore1<Packet4f>(float* to, const float& a)
{
Packet4f pa = _mm_set_ss(a);
pstore(to, Packet4f(vec4f_swizzle1(pa,0,0,0,0)));
pstore(to, vec4f_swizzle1(pa,0,0,0,0));
}
// some compilers might be tempted to perform multiple moves instead of using a vector path.
template<> EIGEN_STRONG_INLINE void pstore1<Packet2d>(double* to, const double& a)
{
Packet2d pa = _mm_set_sd(a);
pstore(to, Packet2d(vec2d_swizzle1(pa,0,0)));
pstore(to, vec2d_swizzle1(pa,0,0));
}
#ifndef EIGEN_VECTORIZE_AVX
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
#endif
#if defined(_MSC_VER) && defined(_WIN64) && !defined(__INTEL_COMPILER)
// The temporary variable fixes an internal compilation error in vs <= 2008 and a wrong-result bug in vs 2010
@@ -480,38 +380,6 @@ template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a)
#endif
}
// with AVX, the default implementations based on pload1 are faster
#ifndef __AVX__
template<> EIGEN_STRONG_INLINE void
pbroadcast4<Packet4f>(const float *a,
Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
{
a3 = pload<Packet4f>(a);
a0 = vec4f_swizzle1(a3, 0,0,0,0);
a1 = vec4f_swizzle1(a3, 1,1,1,1);
a2 = vec4f_swizzle1(a3, 2,2,2,2);
a3 = vec4f_swizzle1(a3, 3,3,3,3);
}
template<> EIGEN_STRONG_INLINE void
pbroadcast4<Packet2d>(const double *a,
Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)
{
#ifdef EIGEN_VECTORIZE_SSE3
a0 = _mm_loaddup_pd(a+0);
a1 = _mm_loaddup_pd(a+1);
a2 = _mm_loaddup_pd(a+2);
a3 = _mm_loaddup_pd(a+3);
#else
a1 = pload<Packet2d>(a);
a0 = vec2d_swizzle1(a1, 0,0);
a1 = vec2d_swizzle1(a1, 1,1);
a3 = pload<Packet2d>(a+2);
a2 = vec2d_swizzle1(a3, 0,0);
a3 = vec2d_swizzle1(a3, 1,1);
#endif
}
#endif
EIGEN_STRONG_INLINE void punpackp(Packet4f* vecs)
{
vecs[1] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x55));
@@ -539,10 +407,10 @@ template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
{
Packet4f tmp0 = _mm_hadd_ps(a,a);
return pfirst<Packet4f>(_mm_hadd_ps(tmp0, tmp0));
return pfirst(_mm_hadd_ps(tmp0, tmp0));
}
template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) { return pfirst<Packet2d>(_mm_hadd_pd(a, a)); }
template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) { return pfirst(_mm_hadd_pd(a, a)); }
// SSSE3 version:
// EIGEN_STRONG_INLINE float predux(const Packet4i& a)
@@ -585,7 +453,7 @@ template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
{
Packet4i tmp = _mm_add_epi32(a, _mm_unpackhi_epi64(a,a));
return pfirst(tmp) + pfirst<Packet4i>(_mm_shuffle_epi32(tmp, 1));
return pfirst(tmp) + pfirst(_mm_shuffle_epi32(tmp, 1));
}
template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
@@ -608,11 +476,11 @@ template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
{
Packet4f tmp = _mm_mul_ps(a, _mm_movehl_ps(a,a));
return pfirst<Packet4f>(_mm_mul_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
return pfirst(_mm_mul_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
}
template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a)
{
return pfirst<Packet2d>(_mm_mul_sd(a, _mm_unpackhi_pd(a,a)));
return pfirst(_mm_mul_sd(a, _mm_unpackhi_pd(a,a)));
}
template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
{
@@ -628,18 +496,14 @@ template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
{
Packet4f tmp = _mm_min_ps(a, _mm_movehl_ps(a,a));
return pfirst<Packet4f>(_mm_min_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
return pfirst(_mm_min_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
}
template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)
{
return pfirst<Packet2d>(_mm_min_sd(a, _mm_unpackhi_pd(a,a)));
return pfirst(_mm_min_sd(a, _mm_unpackhi_pd(a,a)));
}
template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
{
#ifdef EIGEN_VECTORIZE_SSE4_1
Packet4i tmp = _mm_min_epi32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0,0,3,2)));
return pfirst<Packet4i>(_mm_min_epi32(tmp,_mm_shuffle_epi32(tmp, 1)));
#else
// after some experiments, it is seems this is the fastest way to implement it
// for GCC (eg., it does not like using std::min after the pstore !!)
EIGEN_ALIGN16 int aux[4];
@@ -647,25 +511,20 @@ template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
int aux0 = aux[0]<aux[1] ? aux[0] : aux[1];
int aux2 = aux[2]<aux[3] ? aux[2] : aux[3];
return aux0<aux2 ? aux0 : aux2;
#endif // EIGEN_VECTORIZE_SSE4_1
}
// max
template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
{
Packet4f tmp = _mm_max_ps(a, _mm_movehl_ps(a,a));
return pfirst<Packet4f>(_mm_max_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
return pfirst(_mm_max_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
}
template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
{
return pfirst<Packet2d>(_mm_max_sd(a, _mm_unpackhi_pd(a,a)));
return pfirst(_mm_max_sd(a, _mm_unpackhi_pd(a,a)));
}
template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
{
#ifdef EIGEN_VECTORIZE_SSE4_1
Packet4i tmp = _mm_max_epi32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0,0,3,2)));
return pfirst<Packet4i>(_mm_max_epi32(tmp,_mm_shuffle_epi32(tmp, 1)));
#else
// after some experiments, it is seems this is the fastest way to implement it
// for GCC (eg., it does not like using std::min after the pstore !!)
EIGEN_ALIGN16 int aux[4];
@@ -673,7 +532,6 @@ template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
int aux0 = aux[0]>aux[1] ? aux[0] : aux[1];
int aux2 = aux[2]>aux[3] ? aux[2] : aux[3];
return aux0>aux2 ? aux0 : aux2;
#endif // EIGEN_VECTORIZE_SSE4_1
}
#if (defined __GNUC__)
@@ -784,31 +642,6 @@ struct palign_impl<Offset,Packet2d>
};
#endif
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<Packet4f,4>& kernel) {
_MM_TRANSPOSE4_PS(kernel.packet[0], kernel.packet[1], kernel.packet[2], kernel.packet[3]);
}
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<Packet2d,2>& kernel) {
__m128d tmp = _mm_unpackhi_pd(kernel.packet[0], kernel.packet[1]);
kernel.packet[0] = _mm_unpacklo_pd(kernel.packet[0], kernel.packet[1]);
kernel.packet[1] = tmp;
}
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<Packet4i,4>& kernel) {
__m128i T0 = _mm_unpacklo_epi32(kernel.packet[0], kernel.packet[1]);
__m128i T1 = _mm_unpacklo_epi32(kernel.packet[2], kernel.packet[3]);
__m128i T2 = _mm_unpackhi_epi32(kernel.packet[0], kernel.packet[1]);
__m128i T3 = _mm_unpackhi_epi32(kernel.packet[2], kernel.packet[3]);
kernel.packet[0] = _mm_unpacklo_epi64(T0, T1);
kernel.packet[1] = _mm_unpackhi_epi64(T0, T1);
kernel.packet[2] = _mm_unpacklo_epi64(T2, T3);
kernel.packet[3] = _mm_unpackhi_epi64(T2, T3);
}
} // end namespace internal
} // end namespace Eigen

View File

@@ -1,167 +0,0 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_ASSIGNMENT_FUNCTORS_H
#define EIGEN_ASSIGNMENT_FUNCTORS_H
namespace Eigen {
namespace internal {
/** \internal
* \brief Template functor for scalar/packet assignment
*
*/
template<typename Scalar> struct assign_op {
EIGEN_EMPTY_STRUCT_CTOR(assign_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a = b; }
template<int Alignment, typename Packet>
EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const
{ internal::pstoret<Scalar,Packet,Alignment>(a,b); }
};
template<typename Scalar>
struct functor_traits<assign_op<Scalar> > {
enum {
Cost = NumTraits<Scalar>::ReadCost,
PacketAccess = packet_traits<Scalar>::IsVectorized
};
};
/** \internal
* \brief Template functor for scalar/packet assignment with addition
*
*/
template<typename Scalar> struct add_assign_op {
EIGEN_EMPTY_STRUCT_CTOR(add_assign_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a += b; }
template<int Alignment, typename Packet>
EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const
{ internal::pstoret<Scalar,Packet,Alignment>(a,internal::padd(internal::ploadt<Packet,Alignment>(a),b)); }
};
template<typename Scalar>
struct functor_traits<add_assign_op<Scalar> > {
enum {
Cost = NumTraits<Scalar>::ReadCost + NumTraits<Scalar>::AddCost,
PacketAccess = packet_traits<Scalar>::HasAdd
};
};
/** \internal
* \brief Template functor for scalar/packet assignment with subtraction
*
*/
template<typename Scalar> struct sub_assign_op {
EIGEN_EMPTY_STRUCT_CTOR(sub_assign_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a -= b; }
template<int Alignment, typename Packet>
EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const
{ internal::pstoret<Scalar,Packet,Alignment>(a,internal::psub(internal::ploadt<Packet,Alignment>(a),b)); }
};
template<typename Scalar>
struct functor_traits<sub_assign_op<Scalar> > {
enum {
Cost = NumTraits<Scalar>::ReadCost + NumTraits<Scalar>::AddCost,
PacketAccess = packet_traits<Scalar>::HasAdd
};
};
/** \internal
* \brief Template functor for scalar/packet assignment with multiplication
*
*/
template<typename Scalar> struct mul_assign_op {
EIGEN_EMPTY_STRUCT_CTOR(mul_assign_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a *= b; }
template<int Alignment, typename Packet>
EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const
{ internal::pstoret<Scalar,Packet,Alignment>(a,internal::pmul(internal::ploadt<Packet,Alignment>(a),b)); }
};
template<typename Scalar>
struct functor_traits<mul_assign_op<Scalar> > {
enum {
Cost = NumTraits<Scalar>::ReadCost + NumTraits<Scalar>::MulCost,
PacketAccess = packet_traits<Scalar>::HasMul
};
};
/** \internal
* \brief Template functor for scalar/packet assignment with diviving
*
*/
template<typename Scalar> struct div_assign_op {
EIGEN_EMPTY_STRUCT_CTOR(div_assign_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a /= b; }
template<int Alignment, typename Packet>
EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const
{ internal::pstoret<Scalar,Packet,Alignment>(a,internal::pdiv(internal::ploadt<Packet,Alignment>(a),b)); }
};
template<typename Scalar>
struct functor_traits<div_assign_op<Scalar> > {
enum {
Cost = NumTraits<Scalar>::ReadCost + NumTraits<Scalar>::MulCost,
PacketAccess = packet_traits<Scalar>::HasMul
};
};
/** \internal
* \brief Template functor for scalar/packet assignment with swaping
*
* It works as follow. For a non-vectorized evaluation loop, we have:
* for(i) func(A.coeffRef(i), B.coeff(i));
* where B is a SwapWrapper expression. The trick is to make SwapWrapper::coeff behaves like a non-const coeffRef.
* Actually, SwapWrapper might not even be needed since even if B is a plain expression, since it has to be writable
* B.coeff already returns a const reference to the underlying scalar value.
*
* The case of a vectorized loop is more tricky:
* for(i,j) func.assignPacket<A_Align>(&A.coeffRef(i,j), B.packet<B_Align>(i,j));
* Here, B must be a SwapWrapper whose packet function actually returns a proxy object holding a Scalar*,
* the actual alignment and Packet type.
*
*/
template<typename Scalar> struct swap_assign_op {
EIGEN_EMPTY_STRUCT_CTOR(swap_assign_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const
{
using std::swap;
swap(a,const_cast<Scalar&>(b));
}
template<int LhsAlignment, int RhsAlignment, typename Packet>
EIGEN_STRONG_INLINE void swapPacket(Scalar* a, Scalar* b) const
{
Packet tmp = internal::ploadt<Packet,RhsAlignment>(b);
internal::pstoret<Scalar,Packet,RhsAlignment>(b, internal::ploadt<Packet,LhsAlignment>(a));
internal::pstoret<Scalar,Packet,LhsAlignment>(a, tmp);
}
};
template<typename Scalar>
struct functor_traits<swap_assign_op<Scalar> > {
enum {
Cost = 3 * NumTraits<Scalar>::ReadCost,
PacketAccess = packet_traits<Scalar>::IsVectorized
};
};
} // namespace internal
} // namespace Eigen
#endif // EIGEN_ASSIGNMENT_FUNCTORS_H

View File

@@ -1,456 +0,0 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_BINARY_FUNCTORS_H
#define EIGEN_BINARY_FUNCTORS_H
namespace Eigen {
namespace internal {
//---------- associative binary functors ----------
/** \internal
* \brief Template functor to compute the sum of two scalars
*
* \sa class CwiseBinaryOp, MatrixBase::operator+, class VectorwiseOp, DenseBase::sum()
*/
template<typename Scalar> struct scalar_sum_op {
// typedef Scalar result_type;
EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a + b; }
template<typename Packet>
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
{ return internal::padd(a,b); }
template<typename Packet>
EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const
{ return internal::predux(a); }
};
template<typename Scalar>
struct functor_traits<scalar_sum_op<Scalar> > {
enum {
Cost = NumTraits<Scalar>::AddCost,
PacketAccess = packet_traits<Scalar>::HasAdd
};
};
/** \internal
* \brief Template specialization to deprecate the summation of boolean expressions.
* This is required to solve Bug 426.
* \sa DenseBase::count(), DenseBase::any(), ArrayBase::cast(), MatrixBase::cast()
*/
template<> struct scalar_sum_op<bool> : scalar_sum_op<int> {
EIGEN_DEPRECATED
scalar_sum_op() {}
};
/** \internal
* \brief Template functor to compute the product of two scalars
*
* \sa class CwiseBinaryOp, Cwise::operator*(), class VectorwiseOp, MatrixBase::redux()
*/
template<typename LhsScalar,typename RhsScalar> struct scalar_product_op {
enum {
// TODO vectorize mixed product
Vectorizable = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasMul && packet_traits<RhsScalar>::HasMul
};
typedef typename scalar_product_traits<LhsScalar,RhsScalar>::ReturnType result_type;
EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a * b; }
template<typename Packet>
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
{ return internal::pmul(a,b); }
template<typename Packet>
EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const
{ return internal::predux_mul(a); }
};
template<typename LhsScalar,typename RhsScalar>
struct functor_traits<scalar_product_op<LhsScalar,RhsScalar> > {
enum {
Cost = (NumTraits<LhsScalar>::MulCost + NumTraits<RhsScalar>::MulCost)/2, // rough estimate!
PacketAccess = scalar_product_op<LhsScalar,RhsScalar>::Vectorizable
};
};
/** \internal
* \brief Template functor to compute the conjugate product of two scalars
*
* This is a short cut for conj(x) * y which is needed for optimization purpose; in Eigen2 support mode, this becomes x * conj(y)
*/
template<typename LhsScalar,typename RhsScalar> struct scalar_conj_product_op {
enum {
Conj = NumTraits<LhsScalar>::IsComplex
};
typedef typename scalar_product_traits<LhsScalar,RhsScalar>::ReturnType result_type;
EIGEN_EMPTY_STRUCT_CTOR(scalar_conj_product_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const
{ return conj_helper<LhsScalar,RhsScalar,Conj,false>().pmul(a,b); }
template<typename Packet>
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
{ return conj_helper<Packet,Packet,Conj,false>().pmul(a,b); }
};
template<typename LhsScalar,typename RhsScalar>
struct functor_traits<scalar_conj_product_op<LhsScalar,RhsScalar> > {
enum {
Cost = NumTraits<LhsScalar>::MulCost,
PacketAccess = internal::is_same<LhsScalar, RhsScalar>::value && packet_traits<LhsScalar>::HasMul
};
};
/** \internal
* \brief Template functor to compute the min of two scalars
*
* \sa class CwiseBinaryOp, MatrixBase::cwiseMin, class VectorwiseOp, MatrixBase::minCoeff()
*/
template<typename Scalar> struct scalar_min_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_min_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { EIGEN_USING_STD_MATH(min); return (min)(a, b); }
template<typename Packet>
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
{ return internal::pmin(a,b); }
template<typename Packet>
EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const
{ return internal::predux_min(a); }
};
template<typename Scalar>
struct functor_traits<scalar_min_op<Scalar> > {
enum {
Cost = NumTraits<Scalar>::AddCost,
PacketAccess = packet_traits<Scalar>::HasMin
};
};
/** \internal
* \brief Template functor to compute the max of two scalars
*
* \sa class CwiseBinaryOp, MatrixBase::cwiseMax, class VectorwiseOp, MatrixBase::maxCoeff()
*/
template<typename Scalar> struct scalar_max_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_max_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { EIGEN_USING_STD_MATH(max); return (max)(a, b); }
template<typename Packet>
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
{ return internal::pmax(a,b); }
template<typename Packet>
EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const
{ return internal::predux_max(a); }
};
template<typename Scalar>
struct functor_traits<scalar_max_op<Scalar> > {
enum {
Cost = NumTraits<Scalar>::AddCost,
PacketAccess = packet_traits<Scalar>::HasMax
};
};
/** \internal
* \brief Template functor to compute the hypot of two scalars
*
* \sa MatrixBase::stableNorm(), class Redux
*/
template<typename Scalar> struct scalar_hypot_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_hypot_op)
// typedef typename NumTraits<Scalar>::Real result_type;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const
{
EIGEN_USING_STD_MATH(max);
EIGEN_USING_STD_MATH(min);
using std::sqrt;
Scalar p, qp;
if(_x>_y)
{
p = _x;
qp = _y / p;
}
else
{
p = _y;
qp = _x / p;
}
return p * sqrt(Scalar(1) + qp*qp);
}
};
template<typename Scalar>
struct functor_traits<scalar_hypot_op<Scalar> > {
enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess=0 };
};
/** \internal
* \brief Template functor to compute the pow of two scalars
*/
template<typename Scalar, typename OtherScalar> struct scalar_binary_pow_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_binary_pow_op)
EIGEN_DEVICE_FUNC
inline Scalar operator() (const Scalar& a, const OtherScalar& b) const { return numext::pow(a, b); }
};
template<typename Scalar, typename OtherScalar>
struct functor_traits<scalar_binary_pow_op<Scalar,OtherScalar> > {
enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false };
};
//---------- non associative binary functors ----------
/** \internal
* \brief Template functor to compute the difference of two scalars
*
* \sa class CwiseBinaryOp, MatrixBase::operator-
*/
template<typename Scalar> struct scalar_difference_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a - b; }
template<typename Packet>
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
{ return internal::psub(a,b); }
};
template<typename Scalar>
struct functor_traits<scalar_difference_op<Scalar> > {
enum {
Cost = NumTraits<Scalar>::AddCost,
PacketAccess = packet_traits<Scalar>::HasSub
};
};
/** \internal
* \brief Template functor to compute the quotient of two scalars
*
* \sa class CwiseBinaryOp, Cwise::operator/()
*/
template<typename LhsScalar,typename RhsScalar> struct scalar_quotient_op {
enum {
// TODO vectorize mixed product
Vectorizable = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasDiv && packet_traits<RhsScalar>::HasDiv
};
typedef typename scalar_product_traits<LhsScalar,RhsScalar>::ReturnType result_type;
EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a / b; }
template<typename Packet>
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
{ return internal::pdiv(a,b); }
};
template<typename LhsScalar,typename RhsScalar>
struct functor_traits<scalar_quotient_op<LhsScalar,RhsScalar> > {
enum {
Cost = (NumTraits<LhsScalar>::MulCost + NumTraits<RhsScalar>::MulCost), // rough estimate!
PacketAccess = scalar_quotient_op<LhsScalar,RhsScalar>::Vectorizable
};
};
/** \internal
* \brief Template functor to compute the and of two booleans
*
* \sa class CwiseBinaryOp, ArrayBase::operator&&
*/
struct scalar_boolean_and_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_and_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a && b; }
};
template<> struct functor_traits<scalar_boolean_and_op> {
enum {
Cost = NumTraits<bool>::AddCost,
PacketAccess = false
};
};
/** \internal
* \brief Template functor to compute the or of two booleans
*
* \sa class CwiseBinaryOp, ArrayBase::operator||
*/
struct scalar_boolean_or_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_or_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a || b; }
};
template<> struct functor_traits<scalar_boolean_or_op> {
enum {
Cost = NumTraits<bool>::AddCost,
PacketAccess = false
};
};
//---------- binary functors bound to a constant, thus appearing as a unary functor ----------
/** \internal
* \brief Template functor to multiply a scalar by a fixed other one
*
* \sa class CwiseUnaryOp, MatrixBase::operator*, MatrixBase::operator/
*/
/* NOTE why doing the pset1() in packetOp *is* an optimization ?
* indeed it seems better to declare m_other as a Packet and do the pset1() once
* in the constructor. However, in practice:
* - GCC does not like m_other as a Packet and generate a load every time it needs it
* - on the other hand GCC is able to moves the pset1() outside the loop :)
* - simpler code ;)
* (ICC and gcc 4.4 seems to perform well in both cases, the issue is visible with y = a*x + b*y)
*/
template<typename Scalar>
struct scalar_multiple_op {
typedef typename packet_traits<Scalar>::type Packet;
// FIXME default copy constructors seems bugged with std::complex<>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE scalar_multiple_op(const scalar_multiple_op& other) : m_other(other.m_other) { }
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE scalar_multiple_op(const Scalar& other) : m_other(other) { }
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a * m_other; }
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
{ return internal::pmul(a, pset1<Packet>(m_other)); }
typename add_const_on_value_type<typename NumTraits<Scalar>::Nested>::type m_other;
};
template<typename Scalar>
struct functor_traits<scalar_multiple_op<Scalar> >
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasMul }; };
template<typename Scalar1, typename Scalar2>
struct scalar_multiple2_op {
typedef typename scalar_product_traits<Scalar1,Scalar2>::ReturnType result_type;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_multiple2_op(const scalar_multiple2_op& other) : m_other(other.m_other) { }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_multiple2_op(const Scalar2& other) : m_other(other) { }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar1& a) const { return a * m_other; }
typename add_const_on_value_type<typename NumTraits<Scalar2>::Nested>::type m_other;
};
template<typename Scalar1,typename Scalar2>
struct functor_traits<scalar_multiple2_op<Scalar1,Scalar2> >
{ enum { Cost = NumTraits<Scalar1>::MulCost, PacketAccess = false }; };
/** \internal
* \brief Template functor to divide a scalar by a fixed other one
*
* This functor is used to implement the quotient of a matrix by
* a scalar where the scalar type is not necessarily a floating point type.
*
* \sa class CwiseUnaryOp, MatrixBase::operator/
*/
template<typename Scalar>
struct scalar_quotient1_op {
typedef typename packet_traits<Scalar>::type Packet;
// FIXME default copy constructors seems bugged with std::complex<>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient1_op(const scalar_quotient1_op& other) : m_other(other.m_other) { }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient1_op(const Scalar& other) : m_other(other) {}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a / m_other; }
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
{ return internal::pdiv(a, pset1<Packet>(m_other)); }
typename add_const_on_value_type<typename NumTraits<Scalar>::Nested>::type m_other;
};
template<typename Scalar>
struct functor_traits<scalar_quotient1_op<Scalar> >
{ enum { Cost = 2 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasDiv }; };
// In Eigen, any binary op (Product, CwiseBinaryOp) require the Lhs and Rhs to have the same scalar type, except for multiplication
// where the mixing of different types is handled by scalar_product_traits
// In particular, real * complex<real> is allowed.
// FIXME move this to functor_traits adding a functor_default
template<typename Functor> struct functor_is_product_like { enum { ret = 0 }; };
template<typename LhsScalar,typename RhsScalar> struct functor_is_product_like<scalar_product_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
template<typename LhsScalar,typename RhsScalar> struct functor_is_product_like<scalar_conj_product_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
template<typename LhsScalar,typename RhsScalar> struct functor_is_product_like<scalar_quotient_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
/** \internal
* \brief Template functor to add a scalar to a fixed other one
* \sa class CwiseUnaryOp, Array::operator+
*/
/* If you wonder why doing the pset1() in packetOp() is an optimization check scalar_multiple_op */
template<typename Scalar>
struct scalar_add_op {
typedef typename packet_traits<Scalar>::type Packet;
// FIXME default copy constructors seems bugged with std::complex<>
EIGEN_DEVICE_FUNC inline scalar_add_op(const scalar_add_op& other) : m_other(other.m_other) { }
EIGEN_DEVICE_FUNC inline scalar_add_op(const Scalar& other) : m_other(other) { }
EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a + m_other; }
inline const Packet packetOp(const Packet& a) const
{ return internal::padd(a, pset1<Packet>(m_other)); }
const Scalar m_other;
};
template<typename Scalar>
struct functor_traits<scalar_add_op<Scalar> >
{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = packet_traits<Scalar>::HasAdd }; };
/** \internal
* \brief Template functor to subtract a fixed scalar to another one
* \sa class CwiseUnaryOp, Array::operator-, struct scalar_add_op, struct scalar_rsub_op
*/
template<typename Scalar>
struct scalar_sub_op {
typedef typename packet_traits<Scalar>::type Packet;
inline scalar_sub_op(const scalar_sub_op& other) : m_other(other.m_other) { }
inline scalar_sub_op(const Scalar& other) : m_other(other) { }
inline Scalar operator() (const Scalar& a) const { return a - m_other; }
inline const Packet packetOp(const Packet& a) const
{ return internal::psub(a, pset1<Packet>(m_other)); }
const Scalar m_other;
};
template<typename Scalar>
struct functor_traits<scalar_sub_op<Scalar> >
{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = packet_traits<Scalar>::HasAdd }; };
/** \internal
* \brief Template functor to subtract a scalar to fixed another one
* \sa class CwiseUnaryOp, Array::operator-, struct scalar_add_op, struct scalar_sub_op
*/
template<typename Scalar>
struct scalar_rsub_op {
typedef typename packet_traits<Scalar>::type Packet;
inline scalar_rsub_op(const scalar_rsub_op& other) : m_other(other.m_other) { }
inline scalar_rsub_op(const Scalar& other) : m_other(other) { }
inline Scalar operator() (const Scalar& a) const { return m_other - a; }
inline const Packet packetOp(const Packet& a) const
{ return internal::psub(pset1<Packet>(m_other), a); }
const Scalar m_other;
};
template<typename Scalar>
struct functor_traits<scalar_rsub_op<Scalar> >
{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = packet_traits<Scalar>::HasAdd }; };
/** \internal
* \brief Template functor to raise a scalar to a power
* \sa class CwiseUnaryOp, Cwise::pow
*/
template<typename Scalar>
struct scalar_pow_op {
// FIXME default copy constructors seems bugged with std::complex<>
inline scalar_pow_op(const scalar_pow_op& other) : m_exponent(other.m_exponent) { }
inline scalar_pow_op(const Scalar& exponent) : m_exponent(exponent) {}
EIGEN_DEVICE_FUNC
inline Scalar operator() (const Scalar& a) const { return numext::pow(a, m_exponent); }
const Scalar m_exponent;
};
template<typename Scalar>
struct functor_traits<scalar_pow_op<Scalar> >
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false }; };
/** \internal
* \brief Template functor to compute the quotient between a scalar and array entries.
* \sa class CwiseUnaryOp, Cwise::inverse()
*/
template<typename Scalar>
struct scalar_inverse_mult_op {
scalar_inverse_mult_op(const Scalar& other) : m_other(other) {}
EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return m_other / a; }
template<typename Packet>
inline const Packet packetOp(const Packet& a) const
{ return internal::pdiv(pset1<Packet>(m_other),a); }
Scalar m_other;
};
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_BINARY_FUNCTORS_H

View File

@@ -1,6 +0,0 @@
FILE(GLOB Eigen_Core_Functor_SRCS "*.h")
INSTALL(FILES
${Eigen_Core_Functor_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/functors COMPONENT Devel
)

View File

@@ -1,158 +0,0 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_NULLARY_FUNCTORS_H
#define EIGEN_NULLARY_FUNCTORS_H
namespace Eigen {
namespace internal {
template<typename Scalar>
struct scalar_constant_op {
typedef typename packet_traits<Scalar>::type Packet;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_constant_op(const scalar_constant_op& other) : m_other(other.m_other) { }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_constant_op(const Scalar& other) : m_other(other) { }
template<typename Index>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index, Index = 0) const { return m_other; }
template<typename Index>
EIGEN_STRONG_INLINE const Packet packetOp(Index, Index = 0) const { return internal::pset1<Packet>(m_other); }
const Scalar m_other;
};
template<typename Scalar>
struct functor_traits<scalar_constant_op<Scalar> >
// FIXME replace this packet test by a safe one
{ enum { Cost = 1, PacketAccess = packet_traits<Scalar>::Vectorizable, IsRepeatable = true }; };
template<typename Scalar> struct scalar_identity_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_identity_op)
template<typename Index>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index row, Index col) const { return row==col ? Scalar(1) : Scalar(0); }
};
template<typename Scalar>
struct functor_traits<scalar_identity_op<Scalar> >
{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = false, IsRepeatable = true }; };
template <typename Scalar, bool RandomAccess> struct linspaced_op_impl;
// linear access for packet ops:
// 1) initialization
// base = [low, ..., low] + ([step, ..., step] * [-size, ..., 0])
// 2) each step (where size is 1 for coeff access or PacketSize for packet access)
// base += [size*step, ..., size*step]
//
// TODO: Perhaps it's better to initialize lazily (so not in the constructor but in packetOp)
// in order to avoid the padd() in operator() ?
template <typename Scalar>
struct linspaced_op_impl<Scalar,false>
{
typedef typename packet_traits<Scalar>::type Packet;
linspaced_op_impl(const Scalar& low, const Scalar& step) :
m_low(low), m_step(step),
m_packetStep(pset1<Packet>(packet_traits<Scalar>::size*step)),
m_base(padd(pset1<Packet>(low), pmul(pset1<Packet>(step),plset<Scalar>(-packet_traits<Scalar>::size)))) {}
template<typename Index>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const
{
m_base = padd(m_base, pset1<Packet>(m_step));
return m_low+Scalar(i)*m_step;
}
template<typename Index>
EIGEN_STRONG_INLINE const Packet packetOp(Index) const { return m_base = padd(m_base,m_packetStep); }
const Scalar m_low;
const Scalar m_step;
const Packet m_packetStep;
mutable Packet m_base;
};
// random access for packet ops:
// 1) each step
// [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) )
template <typename Scalar>
struct linspaced_op_impl<Scalar,true>
{
typedef typename packet_traits<Scalar>::type Packet;
linspaced_op_impl(const Scalar& low, const Scalar& step) :
m_low(low), m_step(step),
m_lowPacket(pset1<Packet>(m_low)), m_stepPacket(pset1<Packet>(m_step)), m_interPacket(plset<Scalar>(0)) {}
template<typename Index>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return m_low+i*m_step; }
template<typename Index>
EIGEN_STRONG_INLINE const Packet packetOp(Index i) const
{ return internal::padd(m_lowPacket, pmul(m_stepPacket, padd(pset1<Packet>(Scalar(i)),m_interPacket))); }
const Scalar m_low;
const Scalar m_step;
const Packet m_lowPacket;
const Packet m_stepPacket;
const Packet m_interPacket;
};
// ----- Linspace functor ----------------------------------------------------------------
// Forward declaration (we default to random access which does not really give
// us a speed gain when using packet access but it allows to use the functor in
// nested expressions).
template <typename Scalar, bool RandomAccess = true> struct linspaced_op;
template <typename Scalar, bool RandomAccess> struct functor_traits< linspaced_op<Scalar,RandomAccess> >
{ enum { Cost = 1, PacketAccess = packet_traits<Scalar>::HasSetLinear, IsRepeatable = true }; };
template <typename Scalar, bool RandomAccess> struct linspaced_op
{
typedef typename packet_traits<Scalar>::type Packet;
linspaced_op(const Scalar& low, const Scalar& high, DenseIndex num_steps) : impl((num_steps==1 ? high : low), (num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1))) {}
template<typename Index>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return impl(i); }
// We need this function when assigning e.g. a RowVectorXd to a MatrixXd since
// there row==0 and col is used for the actual iteration.
template<typename Index>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index row, Index col) const
{
eigen_assert(col==0 || row==0);
return impl(col + row);
}
template<typename Index>
EIGEN_STRONG_INLINE const Packet packetOp(Index i) const { return impl.packetOp(i); }
// We need this function when assigning e.g. a RowVectorXd to a MatrixXd since
// there row==0 and col is used for the actual iteration.
template<typename Index>
EIGEN_STRONG_INLINE const Packet packetOp(Index row, Index col) const
{
eigen_assert(col==0 || row==0);
return impl.packetOp(col + row);
}
// This proxy object handles the actual required temporaries, the different
// implementations (random vs. sequential access) as well as the
// correct piping to size 2/4 packet operations.
const linspaced_op_impl<Scalar,RandomAccess> impl;
};
// all functors allow linear access, except scalar_identity_op. So we fix here a quick meta
// to indicate whether a functor allows linear access, just always answering 'yes' except for
// scalar_identity_op.
// FIXME move this to functor_traits adding a functor_default
template<typename Functor> struct functor_has_linear_access { enum { ret = 1 }; };
template<typename Scalar> struct functor_has_linear_access<scalar_identity_op<Scalar> > { enum { ret = 0 }; };
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_NULLARY_FUNCTORS_H

View File

@@ -1,129 +0,0 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_STL_FUNCTORS_H
#define EIGEN_STL_FUNCTORS_H
namespace Eigen {
namespace internal {
// default functor traits for STL functors:
template<typename T>
struct functor_traits<std::multiplies<T> >
{ enum { Cost = NumTraits<T>::MulCost, PacketAccess = false }; };
template<typename T>
struct functor_traits<std::divides<T> >
{ enum { Cost = NumTraits<T>::MulCost, PacketAccess = false }; };
template<typename T>
struct functor_traits<std::plus<T> >
{ enum { Cost = NumTraits<T>::AddCost, PacketAccess = false }; };
template<typename T>
struct functor_traits<std::minus<T> >
{ enum { Cost = NumTraits<T>::AddCost, PacketAccess = false }; };
template<typename T>
struct functor_traits<std::negate<T> >
{ enum { Cost = NumTraits<T>::AddCost, PacketAccess = false }; };
template<typename T>
struct functor_traits<std::logical_or<T> >
{ enum { Cost = 1, PacketAccess = false }; };
template<typename T>
struct functor_traits<std::logical_and<T> >
{ enum { Cost = 1, PacketAccess = false }; };
template<typename T>
struct functor_traits<std::logical_not<T> >
{ enum { Cost = 1, PacketAccess = false }; };
template<typename T>
struct functor_traits<std::greater<T> >
{ enum { Cost = 1, PacketAccess = false }; };
template<typename T>
struct functor_traits<std::less<T> >
{ enum { Cost = 1, PacketAccess = false }; };
template<typename T>
struct functor_traits<std::greater_equal<T> >
{ enum { Cost = 1, PacketAccess = false }; };
template<typename T>
struct functor_traits<std::less_equal<T> >
{ enum { Cost = 1, PacketAccess = false }; };
template<typename T>
struct functor_traits<std::equal_to<T> >
{ enum { Cost = 1, PacketAccess = false }; };
template<typename T>
struct functor_traits<std::not_equal_to<T> >
{ enum { Cost = 1, PacketAccess = false }; };
template<typename T>
struct functor_traits<std::binder2nd<T> >
{ enum { Cost = functor_traits<T>::Cost, PacketAccess = false }; };
template<typename T>
struct functor_traits<std::binder1st<T> >
{ enum { Cost = functor_traits<T>::Cost, PacketAccess = false }; };
template<typename T>
struct functor_traits<std::unary_negate<T> >
{ enum { Cost = 1 + functor_traits<T>::Cost, PacketAccess = false }; };
template<typename T>
struct functor_traits<std::binary_negate<T> >
{ enum { Cost = 1 + functor_traits<T>::Cost, PacketAccess = false }; };
#ifdef EIGEN_STDEXT_SUPPORT
template<typename T0,typename T1>
struct functor_traits<std::project1st<T0,T1> >
{ enum { Cost = 0, PacketAccess = false }; };
template<typename T0,typename T1>
struct functor_traits<std::project2nd<T0,T1> >
{ enum { Cost = 0, PacketAccess = false }; };
template<typename T0,typename T1>
struct functor_traits<std::select2nd<std::pair<T0,T1> > >
{ enum { Cost = 0, PacketAccess = false }; };
template<typename T0,typename T1>
struct functor_traits<std::select1st<std::pair<T0,T1> > >
{ enum { Cost = 0, PacketAccess = false }; };
template<typename T0,typename T1>
struct functor_traits<std::unary_compose<T0,T1> >
{ enum { Cost = functor_traits<T0>::Cost + functor_traits<T1>::Cost, PacketAccess = false }; };
template<typename T0,typename T1,typename T2>
struct functor_traits<std::binary_compose<T0,T1,T2> >
{ enum { Cost = functor_traits<T0>::Cost + functor_traits<T1>::Cost + functor_traits<T2>::Cost, PacketAccess = false }; };
#endif // EIGEN_STDEXT_SUPPORT
// allow to add new functors and specializations of functor_traits from outside Eigen.
// this macro is really needed because functor_traits must be specialized after it is declared but before it is used...
#ifdef EIGEN_FUNCTORS_PLUGIN
#include EIGEN_FUNCTORS_PLUGIN
#endif
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_STL_FUNCTORS_H

View File

@@ -1,396 +0,0 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_UNARY_FUNCTORS_H
#define EIGEN_UNARY_FUNCTORS_H
namespace Eigen {
namespace internal {
/** \internal
* \brief Template functor to compute the opposite of a scalar
*
* \sa class CwiseUnaryOp, MatrixBase::operator-
*/
template<typename Scalar> struct scalar_opposite_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_opposite_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return -a; }
template<typename Packet>
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
{ return internal::pnegate(a); }
};
template<typename Scalar>
struct functor_traits<scalar_opposite_op<Scalar> >
{ enum {
Cost = NumTraits<Scalar>::AddCost,
PacketAccess = packet_traits<Scalar>::HasNegate };
};
/** \internal
* \brief Template functor to compute the absolute value of a scalar
*
* \sa class CwiseUnaryOp, Cwise::abs
*/
template<typename Scalar> struct scalar_abs_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_abs_op)
typedef typename NumTraits<Scalar>::Real result_type;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { using std::abs; return abs(a); }
template<typename Packet>
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
{ return internal::pabs(a); }
};
template<typename Scalar>
struct functor_traits<scalar_abs_op<Scalar> >
{
enum {
Cost = NumTraits<Scalar>::AddCost,
PacketAccess = packet_traits<Scalar>::HasAbs
};
};
/** \internal
* \brief Template functor to compute the squared absolute value of a scalar
*
* \sa class CwiseUnaryOp, Cwise::abs2
*/
template<typename Scalar> struct scalar_abs2_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_abs2_op)
typedef typename NumTraits<Scalar>::Real result_type;
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return numext::abs2(a); }
template<typename Packet>
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
{ return internal::pmul(a,a); }
};
template<typename Scalar>
struct functor_traits<scalar_abs2_op<Scalar> >
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasAbs2 }; };
/** \internal
* \brief Template functor to compute the conjugate of a complex value
*
* \sa class CwiseUnaryOp, MatrixBase::conjugate()
*/
template<typename Scalar> struct scalar_conjugate_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_conjugate_op)
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { using numext::conj; return conj(a); }
template<typename Packet>
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::pconj(a); }
};
template<typename Scalar>
struct functor_traits<scalar_conjugate_op<Scalar> >
{
enum {
Cost = NumTraits<Scalar>::IsComplex ? NumTraits<Scalar>::AddCost : 0,
PacketAccess = packet_traits<Scalar>::HasConj
};
};
/** \internal
* \brief Template functor to cast a scalar to another type
*
* \sa class CwiseUnaryOp, MatrixBase::cast()
*/
template<typename Scalar, typename NewType>
struct scalar_cast_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
typedef NewType result_type;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const NewType operator() (const Scalar& a) const { return cast<Scalar, NewType>(a); }
};
template<typename Scalar, typename NewType>
struct functor_traits<scalar_cast_op<Scalar,NewType> >
{ enum { Cost = is_same<Scalar, NewType>::value ? 0 : NumTraits<NewType>::AddCost, PacketAccess = false }; };
/** \internal
* \brief Template functor to extract the real part of a complex
*
* \sa class CwiseUnaryOp, MatrixBase::real()
*/
template<typename Scalar>
struct scalar_real_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_real_op)
typedef typename NumTraits<Scalar>::Real result_type;
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return numext::real(a); }
};
template<typename Scalar>
struct functor_traits<scalar_real_op<Scalar> >
{ enum { Cost = 0, PacketAccess = false }; };
/** \internal
* \brief Template functor to extract the imaginary part of a complex
*
* \sa class CwiseUnaryOp, MatrixBase::imag()
*/
template<typename Scalar>
struct scalar_imag_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_imag_op)
typedef typename NumTraits<Scalar>::Real result_type;
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return numext::imag(a); }
};
template<typename Scalar>
struct functor_traits<scalar_imag_op<Scalar> >
{ enum { Cost = 0, PacketAccess = false }; };
/** \internal
* \brief Template functor to extract the real part of a complex as a reference
*
* \sa class CwiseUnaryOp, MatrixBase::real()
*/
template<typename Scalar>
struct scalar_real_ref_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_real_ref_op)
typedef typename NumTraits<Scalar>::Real result_type;
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return numext::real_ref(*const_cast<Scalar*>(&a)); }
};
template<typename Scalar>
struct functor_traits<scalar_real_ref_op<Scalar> >
{ enum { Cost = 0, PacketAccess = false }; };
/** \internal
* \brief Template functor to extract the imaginary part of a complex as a reference
*
* \sa class CwiseUnaryOp, MatrixBase::imag()
*/
template<typename Scalar>
struct scalar_imag_ref_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_imag_ref_op)
typedef typename NumTraits<Scalar>::Real result_type;
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return numext::imag_ref(*const_cast<Scalar*>(&a)); }
};
template<typename Scalar>
struct functor_traits<scalar_imag_ref_op<Scalar> >
{ enum { Cost = 0, PacketAccess = false }; };
/** \internal
*
* \brief Template functor to compute the exponential of a scalar
*
* \sa class CwiseUnaryOp, Cwise::exp()
*/
template<typename Scalar> struct scalar_exp_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_exp_op)
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::exp; return exp(a); }
typedef typename packet_traits<Scalar>::type Packet;
inline Packet packetOp(const Packet& a) const { return internal::pexp(a); }
};
template<typename Scalar>
struct functor_traits<scalar_exp_op<Scalar> >
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasExp }; };
/** \internal
*
* \brief Template functor to compute the logarithm of a scalar
*
* \sa class CwiseUnaryOp, Cwise::log()
*/
template<typename Scalar> struct scalar_log_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_log_op)
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::log; return log(a); }
typedef typename packet_traits<Scalar>::type Packet;
inline Packet packetOp(const Packet& a) const { return internal::plog(a); }
};
template<typename Scalar>
struct functor_traits<scalar_log_op<Scalar> >
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasLog }; };
/** \internal
* \brief Template functor to compute the square root of a scalar
* \sa class CwiseUnaryOp, Cwise::sqrt()
*/
template<typename Scalar> struct scalar_sqrt_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_sqrt_op)
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::sqrt; return sqrt(a); }
typedef typename packet_traits<Scalar>::type Packet;
inline Packet packetOp(const Packet& a) const { return internal::psqrt(a); }
};
template<typename Scalar>
struct functor_traits<scalar_sqrt_op<Scalar> >
{ enum {
Cost = 5 * NumTraits<Scalar>::MulCost,
PacketAccess = packet_traits<Scalar>::HasSqrt
};
};
/** \internal
* \brief Template functor to compute the cosine of a scalar
* \sa class CwiseUnaryOp, ArrayBase::cos()
*/
template<typename Scalar> struct scalar_cos_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_cos_op)
EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { using std::cos; return cos(a); }
typedef typename packet_traits<Scalar>::type Packet;
inline Packet packetOp(const Packet& a) const { return internal::pcos(a); }
};
template<typename Scalar>
struct functor_traits<scalar_cos_op<Scalar> >
{
enum {
Cost = 5 * NumTraits<Scalar>::MulCost,
PacketAccess = packet_traits<Scalar>::HasCos
};
};
/** \internal
* \brief Template functor to compute the sine of a scalar
* \sa class CwiseUnaryOp, ArrayBase::sin()
*/
template<typename Scalar> struct scalar_sin_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_sin_op)
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::sin; return sin(a); }
typedef typename packet_traits<Scalar>::type Packet;
inline Packet packetOp(const Packet& a) const { return internal::psin(a); }
};
template<typename Scalar>
struct functor_traits<scalar_sin_op<Scalar> >
{
enum {
Cost = 5 * NumTraits<Scalar>::MulCost,
PacketAccess = packet_traits<Scalar>::HasSin
};
};
/** \internal
* \brief Template functor to compute the tan of a scalar
* \sa class CwiseUnaryOp, ArrayBase::tan()
*/
template<typename Scalar> struct scalar_tan_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_tan_op)
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::tan; return tan(a); }
typedef typename packet_traits<Scalar>::type Packet;
inline Packet packetOp(const Packet& a) const { return internal::ptan(a); }
};
template<typename Scalar>
struct functor_traits<scalar_tan_op<Scalar> >
{
enum {
Cost = 5 * NumTraits<Scalar>::MulCost,
PacketAccess = packet_traits<Scalar>::HasTan
};
};
/** \internal
* \brief Template functor to compute the arc cosine of a scalar
* \sa class CwiseUnaryOp, ArrayBase::acos()
*/
template<typename Scalar> struct scalar_acos_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_acos_op)
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::acos; return acos(a); }
typedef typename packet_traits<Scalar>::type Packet;
inline Packet packetOp(const Packet& a) const { return internal::pacos(a); }
};
template<typename Scalar>
struct functor_traits<scalar_acos_op<Scalar> >
{
enum {
Cost = 5 * NumTraits<Scalar>::MulCost,
PacketAccess = packet_traits<Scalar>::HasACos
};
};
/** \internal
* \brief Template functor to compute the arc sine of a scalar
* \sa class CwiseUnaryOp, ArrayBase::asin()
*/
template<typename Scalar> struct scalar_asin_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_asin_op)
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::asin; return asin(a); }
typedef typename packet_traits<Scalar>::type Packet;
inline Packet packetOp(const Packet& a) const { return internal::pasin(a); }
};
template<typename Scalar>
struct functor_traits<scalar_asin_op<Scalar> >
{
enum {
Cost = 5 * NumTraits<Scalar>::MulCost,
PacketAccess = packet_traits<Scalar>::HasASin
};
};
/** \internal
* \brief Template functor to compute the atan of a scalar
* \sa class CwiseUnaryOp, ArrayBase::atan()
*/
template<typename Scalar> struct scalar_atan_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_atan_op)
inline const Scalar operator() (const Scalar& a) const { using std::atan; return atan(a); }
typedef typename packet_traits<Scalar>::type Packet;
inline Packet packetOp(const Packet& a) const { return internal::patan(a); }
};
template<typename Scalar>
struct functor_traits<scalar_atan_op<Scalar> >
{
enum {
Cost = 5 * NumTraits<Scalar>::MulCost,
PacketAccess = packet_traits<Scalar>::HasATan
};
};
/** \internal
* \brief Template functor to compute the inverse of a scalar
* \sa class CwiseUnaryOp, Cwise::inverse()
*/
template<typename Scalar>
struct scalar_inverse_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_inverse_op)
EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return Scalar(1)/a; }
template<typename Packet>
inline const Packet packetOp(const Packet& a) const
{ return internal::pdiv(pset1<Packet>(Scalar(1)),a); }
};
template<typename Scalar>
struct functor_traits<scalar_inverse_op<Scalar> >
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasDiv }; };
/** \internal
* \brief Template functor to compute the square of a scalar
* \sa class CwiseUnaryOp, Cwise::square()
*/
template<typename Scalar>
struct scalar_square_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_square_op)
EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a*a; }
template<typename Packet>
inline const Packet packetOp(const Packet& a) const
{ return internal::pmul(a,a); }
};
template<typename Scalar>
struct functor_traits<scalar_square_op<Scalar> >
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasMul }; };
/** \internal
* \brief Template functor to compute the cube of a scalar
* \sa class CwiseUnaryOp, Cwise::cube()
*/
template<typename Scalar>
struct scalar_cube_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_cube_op)
EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a*a*a; }
template<typename Packet>
inline const Packet packetOp(const Packet& a) const
{ return internal::pmul(a,pmul(a,a)); }
};
template<typename Scalar>
struct functor_traits<scalar_cube_op<Scalar> >
{ enum { Cost = 2*NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasMul }; };
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_FUNCTORS_H

View File

@@ -85,12 +85,12 @@ struct traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
| (EvalToRowMajor ? RowMajorBit : 0)
| NestingFlags
| (CanVectorizeLhs ? (LhsFlags & AlignedBit) : 0)
| (CanVectorizeRhs ? (RhsFlags & AlignedBit) : 0)
| (LhsFlags & RhsFlags & AlignedBit)
// TODO enable vectorization for mixed types
| (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0),
CoeffReadCost = InnerSize == Dynamic ? Dynamic
: InnerSize == 0 ? 0
: InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
+ (InnerSize - 1) * NumTraits<Scalar>::AddCost,
@@ -134,20 +134,18 @@ class CoeffBasedProduct
};
typedef internal::product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal,
Unroll ? InnerSize-1 : Dynamic,
Unroll ? InnerSize : Dynamic,
_LhsNested, _RhsNested, Scalar> ScalarCoeffImpl;
typedef CoeffBasedProduct<LhsNested,RhsNested,NestByRefBit> LazyCoeffBasedProductType;
public:
EIGEN_DEVICE_FUNC
inline CoeffBasedProduct(const CoeffBasedProduct& other)
: Base(), m_lhs(other.m_lhs), m_rhs(other.m_rhs)
{}
template<typename Lhs, typename Rhs>
EIGEN_DEVICE_FUNC
inline CoeffBasedProduct(const Lhs& lhs, const Rhs& rhs)
: m_lhs(lhs), m_rhs(rhs)
{
@@ -160,10 +158,9 @@ class CoeffBasedProduct
&& "if you wanted a coeff-wise or a dot product use the respective explicit functions");
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); }
EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); }
EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); }
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
{
Scalar res;
@@ -174,7 +171,6 @@ class CoeffBasedProduct
/* Allow index-based non-packet access. It is impossible though to allow index-based packed access,
* which is why we don't set the LinearAccessBit.
*/
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
{
Scalar res;
@@ -189,33 +185,29 @@ class CoeffBasedProduct
{
PacketScalar res;
internal::product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor,
Unroll ? InnerSize-1 : Dynamic,
Unroll ? InnerSize : Dynamic,
_LhsNested, _RhsNested, PacketScalar, LoadMode>
::run(row, col, m_lhs, m_rhs, res);
return res;
}
// Implicit conversion to the nested type (trigger the evaluation of the product)
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE operator const PlainObject& () const
{
m_result.lazyAssign(*this);
return m_result;
}
EIGEN_DEVICE_FUNC const _LhsNested& lhs() const { return m_lhs; }
EIGEN_DEVICE_FUNC const _RhsNested& rhs() const { return m_rhs; }
const _LhsNested& lhs() const { return m_lhs; }
const _RhsNested& rhs() const { return m_rhs; }
EIGEN_DEVICE_FUNC
const Diagonal<const LazyCoeffBasedProductType,0> diagonal() const
{ return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
template<int DiagonalIndex>
EIGEN_DEVICE_FUNC
const Diagonal<const LazyCoeffBasedProductType,DiagonalIndex> diagonal() const
{ return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
EIGEN_DEVICE_FUNC
const Diagonal<const LazyCoeffBasedProductType,Dynamic> diagonal(Index index) const
{ return reinterpret_cast<const LazyCoeffBasedProductType&>(*this).diagonal(index); }
@@ -248,11 +240,20 @@ template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
struct product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
{
typedef typename Lhs::Index Index;
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
{
product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res);
res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col);
res += lhs.coeff(row, UnrollingIndex-1) * rhs.coeff(UnrollingIndex-1, col);
}
};
template<typename Lhs, typename Rhs, typename RetScalar>
struct product_coeff_impl<DefaultTraversal, 1, Lhs, Rhs, RetScalar>
{
typedef typename Lhs::Index Index;
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
{
res = lhs.coeff(row, 0) * rhs.coeff(0, col);
}
};
@@ -260,10 +261,9 @@ template<typename Lhs, typename Rhs, typename RetScalar>
struct product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
{
typedef typename Lhs::Index Index;
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, RetScalar &res)
{
res = lhs.coeff(row, 0) * rhs.coeff(0, col);
res = RetScalar(0);
}
};
@@ -271,13 +271,9 @@ template<typename Lhs, typename Rhs, typename RetScalar>
struct product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
{
typedef typename Lhs::Index Index;
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res)
{
eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
res = lhs.coeff(row, 0) * rhs.coeff(0, col);
for(Index i = 1; i < lhs.cols(); ++i)
res += lhs.coeff(row, i) * rhs.coeff(i, col);
res = (lhs.row(row).transpose().cwiseProduct( rhs.col(col) )).sum();
}
};
@@ -307,6 +303,16 @@ struct product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet>
}
};
template<typename Lhs, typename Rhs, typename RetScalar>
struct product_coeff_impl<InnerVectorizedTraversal, 0, Lhs, Rhs, RetScalar>
{
typedef typename Lhs::Index Index;
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, RetScalar &res)
{
res = 0;
}
};
template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
struct product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
{
@@ -316,7 +322,7 @@ struct product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, Re
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
{
Packet pres;
product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
res = predux(pres);
}
};
@@ -384,7 +390,7 @@ struct product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
{
product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex-1)), rhs.template packet<LoadMode>(UnrollingIndex-1, col), res);
}
};
@@ -395,12 +401,12 @@ struct product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
{
product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex-1), pset1<Packet>(rhs.coeff(UnrollingIndex-1, col)), res);
}
};
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
struct product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
@@ -410,7 +416,7 @@ struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
};
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
struct product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
@@ -419,16 +425,35 @@ struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
}
};
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Packet &res)
{
res = pset1<Packet>(0);
}
};
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Packet &res)
{
res = pset1<Packet>(0);
}
};
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
{
eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
for(Index i = 1; i < lhs.cols(); ++i)
res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
res = pset1<Packet>(0);
for(Index i = 0; i < lhs.cols(); ++i)
res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
}
};
@@ -438,10 +463,9 @@ struct product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
typedef typename Lhs::Index Index;
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
{
eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
for(Index i = 1; i < lhs.cols(); ++i)
res = pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
res = pset1<Packet>(0);
for(Index i = 0; i < lhs.cols(); ++i)
res = pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
}
};

File diff suppressed because it is too large Load Diff

View File

@@ -23,8 +23,6 @@ template<
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs>
struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor>
{
typedef gebp_traits<RhsScalar,LhsScalar> Traits;
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
static EIGEN_STRONG_INLINE void run(
Index rows, Index cols, Index depth,
@@ -53,8 +51,6 @@ template<
struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor>
{
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
static void run(Index rows, Index cols, Index depth,
const LhsScalar* _lhs, Index lhsStride,
@@ -67,9 +63,11 @@ static void run(Index rows, Index cols, Index depth,
const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
Index kc = blocking.kc(); // cache block size along the K direction
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
Index nc = (std::min)(cols,blocking.nc()); // cache block size along the N direction
//Index nc = blocking.nc(); // cache block size along the N direction
gemm_pack_lhs<LhsScalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
gemm_pack_rhs<RhsScalar, Index, Traits::nr, RhsStorageOrder> pack_rhs;
@@ -82,68 +80,68 @@ static void run(Index rows, Index cols, Index depth,
Index tid = omp_get_thread_num();
Index threads = omp_get_num_threads();
LhsScalar* blockA = blocking.blockA();
eigen_internal_assert(blockA!=0);
std::size_t sizeA = kc*mc;
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, 0);
ei_declare_aligned_stack_constructed_variable(RhsScalar, w, sizeW, 0);
std::size_t sizeB = kc*nc;
ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, 0);
RhsScalar* blockB = blocking.blockB();
eigen_internal_assert(blockB!=0);
// For each horizontal panel of the rhs, and corresponding vertical panel of the lhs...
for(Index k=0; k<depth; k+=kc)
{
const Index actual_kc = (std::min)(k+kc,depth)-k; // => rows of B', and cols of the A'
// In order to reduce the chance that a thread has to wait for the other,
// let's start by packing B'.
pack_rhs(blockB, &rhs(k,0), rhsStride, actual_kc, nc);
// let's start by packing A'.
pack_lhs(blockA, &lhs(0,k), lhsStride, actual_kc, mc);
// Pack A_k to A' in a parallel fashion:
// each thread packs the sub block A_k,i to A'_i where i is the thread id.
// Pack B_k to B' in a parallel fashion:
// each thread packs the sub block B_k,j to B'_j where j is the thread id.
// However, before copying to A'_i, we have to make sure that no other thread is still using it,
// However, before copying to B'_j, we have to make sure that no other thread is still using it,
// i.e., we test that info[tid].users equals 0.
// Then, we set info[tid].users to the number of threads to mark that all other threads are going to use it.
while(info[tid].users!=0) {}
info[tid].users += threads;
pack_lhs(blockA+info[tid].lhs_start*actual_kc, &lhs(info[tid].lhs_start,k), lhsStride, actual_kc, info[tid].lhs_length);
// Notify the other threads that the part A'_i is ready to go.
pack_rhs(blockB+info[tid].rhs_start*actual_kc, &rhs(k,info[tid].rhs_start), rhsStride, actual_kc, info[tid].rhs_length);
// Notify the other threads that the part B'_j is ready to go.
info[tid].sync = k;
// Computes C_i += A' * B' per A'_i
// Computes C_i += A' * B' per B'_j
for(Index shift=0; shift<threads; ++shift)
{
Index i = (tid+shift)%threads;
Index j = (tid+shift)%threads;
// At this point we have to make sure that A'_i has been updated by the thread i,
// At this point we have to make sure that B'_j has been updated by the thread j,
// we use testAndSetOrdered to mimic a volatile access.
// However, no need to wait for the B' part which has been updated by the current thread!
if(shift>0)
while(info[i].sync!=k) {}
gebp(res+info[i].lhs_start, resStride, blockA+info[i].lhs_start*actual_kc, blockB, info[i].lhs_length, actual_kc, nc, alpha);
while(info[j].sync!=k) {}
gebp(res+info[j].rhs_start*resStride, resStride, blockA, blockB+info[j].rhs_start*actual_kc, mc, actual_kc, info[j].rhs_length, alpha, -1,-1,0,0, w);
}
// Then keep going as usual with the remaining B'
for(Index j=nc; j<cols; j+=nc)
// Then keep going as usual with the remaining A'
for(Index i=mc; i<rows; i+=mc)
{
const Index actual_nc = (std::min)(j+nc,cols)-j;
const Index actual_mc = (std::min)(i+mc,rows)-i;
// pack B_k,j to B'
pack_rhs(blockB, &rhs(k,j), rhsStride, actual_kc, actual_nc);
// pack A_i,k to A'
pack_lhs(blockA, &lhs(i,k), lhsStride, actual_kc, actual_mc);
// C_j += A' * B'
gebp(res+j*resStride, resStride, blockA, blockB, rows, actual_kc, actual_nc, alpha);
// C_i += A' * B'
gebp(res+i, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha, -1,-1,0,0, w);
}
// Release all the sub blocks A'_i of A' for the current thread,
// Release all the sub blocks B'_j of B' for the current thread,
// i.e., we simply decrement the number of users by 1
#pragma omp critical
{
for(Index i=0; i<threads; ++i)
for(Index j=0; j<threads; ++j)
#pragma omp atomic
--(info[i].users);
}
--(info[j].users);
}
}
else
@@ -153,34 +151,38 @@ static void run(Index rows, Index cols, Index depth,
// this is the sequential version!
std::size_t sizeA = kc*mc;
std::size_t sizeB = kc*nc;
std::size_t sizeB = kc*cols;
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, blocking.blockA());
ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, blocking.blockB());
ei_declare_aligned_stack_constructed_variable(RhsScalar, blockW, sizeW, blocking.blockW());
// For each horizontal panel of the rhs, and corresponding panel of the lhs...
// (==GEMM_VAR1)
for(Index k2=0; k2<depth; k2+=kc)
{
const Index actual_kc = (std::min)(k2+kc,depth)-k2;
// OK, here we have selected one horizontal panel of rhs and one vertical panel of lhs.
// => Pack lhs's panel into a sequential chunk of memory (L2/L3 caching)
// Note that this panel will be read as many times as the number of blocks in the rhs's
// horizontal panel which is, in practice, a very low number.
pack_lhs(blockA, &lhs(0,k2), lhsStride, actual_kc, rows);
// => Pack rhs's panel into a sequential chunk of memory (L2 caching)
// Note that this panel will be read as many times as the number of blocks in the lhs's
// vertical panel which is, in practice, a very low number.
pack_rhs(blockB, &rhs(k2,0), rhsStride, actual_kc, cols);
// For each kc x nc block of the rhs's horizontal panel...
for(Index j2=0; j2<cols; j2+=nc)
// For each mc x kc block of the lhs's vertical panel...
// (==GEPP_VAR1)
for(Index i2=0; i2<rows; i2+=mc)
{
const Index actual_nc = (std::min)(j2+nc,cols)-j2;
const Index actual_mc = (std::min)(i2+mc,rows)-i2;
// We pack the rhs's block into a sequential chunk of memory (L2 caching)
// We pack the lhs's block into a sequential chunk of memory (L1 caching)
// Note that this block will be read a very high number of times, which is equal to the number of
// micro horizontal panel of the large rhs's panel (e.g., rows/12 times).
pack_rhs(blockB, &rhs(k2,j2), rhsStride, actual_kc, actual_nc);
// micro vertical panel of the large rhs's panel (e.g., cols/4 times).
pack_lhs(blockA, &lhs(i2,k2), lhsStride, actual_kc, actual_mc);
// Everything is packed, we can now call the panel * block kernel:
gebp(res+j2*resStride, resStride, blockA, blockB, rows, actual_kc, actual_nc, alpha);
// Everything is packed, we can now call the block * panel kernel:
gebp(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha, -1, -1, 0, 0, blockW);
}
}
}
@@ -201,13 +203,14 @@ struct traits<GeneralProduct<Lhs,Rhs,GemmProduct> >
template<typename Scalar, typename Index, typename Gemm, typename Lhs, typename Rhs, typename Dest, typename BlockingType>
struct gemm_functor
{
gemm_functor(const Lhs& lhs, const Rhs& rhs, Dest& dest, const Scalar& actualAlpha, BlockingType& blocking)
gemm_functor(const Lhs& lhs, const Rhs& rhs, Dest& dest, const Scalar& actualAlpha,
BlockingType& blocking)
: m_lhs(lhs), m_rhs(rhs), m_dest(dest), m_actualAlpha(actualAlpha), m_blocking(blocking)
{}
void initParallelSession() const
{
m_blocking.allocateA();
m_blocking.allocateB();
}
void operator() (Index row, Index rows, Index col=0, Index cols=-1, GemmParallelInfo<Index>* info=0) const
@@ -221,8 +224,6 @@ struct gemm_functor
(Scalar*)&(m_dest.coeffRef(row,col)), m_dest.outerStride(),
m_actualAlpha, m_blocking, info);
}
typedef typename Gemm::Traits Traits;
protected:
const Lhs& m_lhs;
@@ -244,6 +245,7 @@ class level3_blocking
protected:
LhsScalar* m_blockA;
RhsScalar* m_blockB;
RhsScalar* m_blockW;
DenseIndex m_mc;
DenseIndex m_nc;
@@ -252,7 +254,7 @@ class level3_blocking
public:
level3_blocking()
: m_blockA(0), m_blockB(0), m_mc(0), m_nc(0), m_kc(0)
: m_blockA(0), m_blockB(0), m_blockW(0), m_mc(0), m_nc(0), m_kc(0)
{}
inline DenseIndex mc() const { return m_mc; }
@@ -261,6 +263,7 @@ class level3_blocking
inline LhsScalar* blockA() { return m_blockA; }
inline RhsScalar* blockB() { return m_blockB; }
inline RhsScalar* blockW() { return m_blockW; }
};
template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor>
@@ -279,25 +282,29 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
enum {
SizeA = ActualRows * MaxDepth,
SizeB = ActualCols * MaxDepth
SizeB = ActualCols * MaxDepth,
SizeW = MaxDepth * Traits::WorkSpaceFactor
};
EIGEN_ALIGN_DEFAULT LhsScalar m_staticA[SizeA];
EIGEN_ALIGN_DEFAULT RhsScalar m_staticB[SizeB];
EIGEN_ALIGN16 LhsScalar m_staticA[SizeA];
EIGEN_ALIGN16 RhsScalar m_staticB[SizeB];
EIGEN_ALIGN16 RhsScalar m_staticW[SizeW];
public:
gemm_blocking_space(DenseIndex /*rows*/, DenseIndex /*cols*/, DenseIndex /*depth*/, bool /*full_rows*/ = false)
gemm_blocking_space(DenseIndex /*rows*/, DenseIndex /*cols*/, DenseIndex /*depth*/)
{
this->m_mc = ActualRows;
this->m_nc = ActualCols;
this->m_kc = MaxDepth;
this->m_blockA = m_staticA;
this->m_blockB = m_staticB;
this->m_blockW = m_staticW;
}
inline void allocateA() {}
inline void allocateB() {}
inline void allocateW() {}
inline void allocateAll() {}
};
@@ -316,28 +323,20 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
DenseIndex m_sizeA;
DenseIndex m_sizeB;
DenseIndex m_sizeW;
public:
gemm_blocking_space(DenseIndex rows, DenseIndex cols, DenseIndex depth, bool full_rows = false)
gemm_blocking_space(DenseIndex rows, DenseIndex cols, DenseIndex depth)
{
this->m_mc = Transpose ? cols : rows;
this->m_nc = Transpose ? rows : cols;
this->m_kc = depth;
if(full_rows)
{
DenseIndex m = this->m_mc;
computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, m, this->m_nc);
}
else // full columns
{
DenseIndex n = this->m_nc;
computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, this->m_mc, n);
}
computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, this->m_mc, this->m_nc);
m_sizeA = this->m_mc * this->m_kc;
m_sizeB = this->m_kc * this->m_nc;
m_sizeW = this->m_kc*Traits::WorkSpaceFactor;
}
void allocateA()
@@ -352,16 +351,24 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
this->m_blockB = aligned_new<RhsScalar>(m_sizeB);
}
void allocateW()
{
if(this->m_blockW==0)
this->m_blockW = aligned_new<RhsScalar>(m_sizeW);
}
void allocateAll()
{
allocateA();
allocateB();
allocateW();
}
~gemm_blocking_space()
{
aligned_delete(this->m_blockA, m_sizeA);
aligned_delete(this->m_blockB, m_sizeB);
aligned_delete(this->m_blockW, m_sizeW);
}
};
@@ -386,37 +393,7 @@ class GeneralProduct<Lhs, Rhs, GemmProduct>
typedef internal::scalar_product_op<LhsScalar,RhsScalar> BinOp;
EIGEN_CHECK_BINARY_COMPATIBILIY(BinOp,LhsScalar,RhsScalar);
}
template<typename Dest>
inline void evalTo(Dest& dst) const
{
if((m_rhs.rows()+dst.rows()+dst.cols())<20 && m_rhs.rows()>0)
dst.noalias() = m_lhs .lazyProduct( m_rhs );
else
{
dst.setZero();
scaleAndAddTo(dst,Scalar(1));
}
}
template<typename Dest>
inline void addTo(Dest& dst) const
{
if((m_rhs.rows()+dst.rows()+dst.cols())<20 && m_rhs.rows()>0)
dst.noalias() += m_lhs .lazyProduct( m_rhs );
else
scaleAndAddTo(dst,Scalar(1));
}
template<typename Dest>
inline void subTo(Dest& dst) const
{
if((m_rhs.rows()+dst.rows()+dst.cols())<20 && m_rhs.rows()>0)
dst.noalias() -= m_lhs .lazyProduct( m_rhs );
else
scaleAndAddTo(dst,Scalar(-1));
}
template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
{
eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
@@ -439,7 +416,7 @@ class GeneralProduct<Lhs, Rhs, GemmProduct>
(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor>,
_ActualLhsType, _ActualRhsType, Dest, BlockingType> GemmFunctor;
BlockingType blocking(dst.rows(), dst.cols(), lhs.cols(), true);
BlockingType blocking(dst.rows(), dst.cols(), lhs.cols());
internal::parallelize_gemm<(Dest::MaxRowsAtCompileTime>32 || Dest::MaxRowsAtCompileTime==Dynamic)>(GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), this->rows(), this->cols(), Dest::Flags&RowMajorBit);
}

View File

@@ -73,8 +73,11 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
if(mc > Traits::nr)
mc = (mc/Traits::nr)*Traits::nr;
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
std::size_t sizeB = sizeW + kc*size;
ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, kc*mc, 0);
ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, kc*size, 0);
ei_declare_aligned_stack_constructed_variable(RhsScalar, allocatedBlockB, sizeB, 0);
RhsScalar* blockB = allocatedBlockB + sizeW;
gemm_pack_lhs<LhsScalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
gemm_pack_rhs<RhsScalar, Index, Traits::nr, RhsStorageOrder> pack_rhs;
@@ -100,15 +103,15 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
// 3 - after the diagonal => processed with gebp or skipped
if (UpLo==Lower)
gebp(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, (std::min)(size,i2), alpha,
-1, -1, 0, 0);
-1, -1, 0, 0, allocatedBlockB);
sybb(res+resStride*i2 + i2, resStride, blockA, blockB + actual_kc*i2, actual_mc, actual_kc, alpha);
sybb(res+resStride*i2 + i2, resStride, blockA, blockB + actual_kc*i2, actual_mc, actual_kc, alpha, allocatedBlockB);
if (UpLo==Upper)
{
Index j2 = i2+actual_mc;
gebp(res+resStride*j2+i2, resStride, blockA, blockB+actual_kc*j2, actual_mc, actual_kc, (std::max)(Index(0), size-j2), alpha,
-1, -1, 0, 0);
-1, -1, 0, 0, allocatedBlockB);
}
}
}
@@ -133,7 +136,7 @@ struct tribb_kernel
enum {
BlockSize = EIGEN_PLAIN_ENUM_MAX(mr,nr)
};
void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, const ResScalar& alpha)
void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, const ResScalar& alpha, RhsScalar* workspace)
{
gebp_kernel<LhsScalar, RhsScalar, Index, mr, nr, ConjLhs, ConjRhs> gebp_kernel;
Matrix<ResScalar,BlockSize,BlockSize,ColMajor> buffer;
@@ -147,7 +150,7 @@ struct tribb_kernel
if(UpLo==Upper)
gebp_kernel(res+j*resStride, resStride, blockA, actual_b, j, depth, actualBlockSize, alpha,
-1, -1, 0, 0);
-1, -1, 0, 0, workspace);
// selfadjoint micro block
{
@@ -155,7 +158,7 @@ struct tribb_kernel
buffer.setZero();
// 1 - apply the kernel on the temporary buffer
gebp_kernel(buffer.data(), BlockSize, blockA+depth*i, actual_b, actualBlockSize, depth, actualBlockSize, alpha,
-1, -1, 0, 0);
-1, -1, 0, 0, workspace);
// 2 - triangular accumulation
for(Index j1=0; j1<actualBlockSize; ++j1)
{
@@ -170,7 +173,7 @@ struct tribb_kernel
{
Index i = j+actualBlockSize;
gebp_kernel(res+j*resStride+i, resStride, blockA+depth*i, actual_b, size-i, depth, actualBlockSize, alpha,
-1, -1, 0, 0);
-1, -1, 0, 0, workspace);
}
}
}
@@ -265,8 +268,6 @@ template<typename MatrixType, unsigned int UpLo>
template<typename ProductDerived, typename _Lhs, typename _Rhs>
TriangularView<MatrixType,UpLo>& TriangularView<MatrixType,UpLo>::assignProduct(const ProductBase<ProductDerived, _Lhs,_Rhs>& prod, const Scalar& alpha)
{
eigen_assert(m_matrix.rows() == prod.rows() && m_matrix.cols() == prod.cols());
general_product_to_triangular_selector<MatrixType, ProductDerived, UpLo, (_Lhs::ColsAtCompileTime==1) || (_Rhs::RowsAtCompileTime==1)>::run(m_matrix.const_cast_derived(), prod.derived(), alpha);
return *this;

View File

@@ -53,8 +53,6 @@ template< \
int RhsStorageOrder, bool ConjugateRhs> \
struct general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor> \
{ \
typedef gebp_traits<EIGTYPE,EIGTYPE> Traits; \
\
static void run(Index rows, Index cols, Index depth, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \

View File

@@ -26,34 +26,6 @@ namespace internal {
* |real |cplx |real | alpha is converted to a cplx when calling the run function, no vectorization
* |cplx |real |cplx | invalid, the caller has to do tmp: = A * B; C += alpha*tmp
* |cplx |real |real | optimal case, vectorization possible via real-cplx mul
*
* Accesses to the matrix coefficients follow the following logic:
*
* - if all columns have the same alignment then
* - if the columns have the same alignment as the result vector, then easy! (-> AllAligned case)
* - otherwise perform unaligned loads only (-> NoneAligned case)
* - otherwise
* - if even columns have the same alignment then
* // odd columns are guaranteed to have the same alignment too
* - if even or odd columns have the same alignment as the result, then
* // for a register size of 2 scalars, this is guarantee to be the case (e.g., SSE with double)
* - perform half aligned and half unaligned loads (-> EvenAligned case)
* - otherwise perform unaligned loads only (-> NoneAligned case)
* - otherwise, if the register size is 4 scalars (e.g., SSE with float) then
* - one over 4 consecutive columns is guaranteed to be aligned with the result vector,
* perform simple aligned loads for this column and aligned loads plus re-alignment for the other. (-> FirstAligned case)
* // this re-alignment is done by the palign function implemented for SSE in Eigen/src/Core/arch/SSE/PacketMath.h
* - otherwise,
* // if we get here, this means the register size is greater than 4 (e.g., AVX with floats),
* // we currently fall back to the NoneAligned case
*
* The same reasoning apply for the transposed case.
*
* The last case (PacketSize>4) could probably be improved by generalizing the FirstAligned case, but since we do not support AVX yet...
* One might also wonder why in the EvenAligned case we perform unaligned loads instead of using the aligned-loads plus re-alignment
* strategy as in the FirstAligned case. The reason is that we observed that unaligned loads on a 8 byte boundary are not too slow
* compared to unaligned loads on a 4 byte boundary.
*
*/
template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version>
struct general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjugateLhs,RhsScalar,ConjugateRhs,Version>
@@ -80,8 +52,7 @@ EIGEN_DONT_INLINE static void run(
Index rows, Index cols,
const LhsScalar* lhs, Index lhsStride,
const RhsScalar* rhs, Index rhsIncr,
ResScalar* res, Index resIncr,
RhsScalar alpha);
ResScalar* res, Index resIncr, RhsScalar alpha);
};
template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version>
@@ -89,10 +60,9 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,ColMajor,Co
Index rows, Index cols,
const LhsScalar* lhs, Index lhsStride,
const RhsScalar* rhs, Index rhsIncr,
ResScalar* res, Index resIncr,
RhsScalar alpha)
ResScalar* res, Index resIncr, RhsScalar alpha)
{
EIGEN_UNUSED_VARIABLE(resIncr);
EIGEN_UNUSED_VARIABLE(resIncr)
eigen_internal_assert(resIncr==1);
#ifdef _EIGEN_ACCUMULATE_PACKETS
#error _EIGEN_ACCUMULATE_PACKETS has already been defined
@@ -141,12 +111,6 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,ColMajor,Co
alignedSize = 0;
alignedStart = 0;
}
else if(LhsPacketSize > 4)
{
// TODO: extend the code to support aligned loads whenever possible when LhsPacketSize > 4.
// Currently, it seems to be better to perform unaligned loads anyway
alignmentPattern = NoneAligned;
}
else if (LhsPacketSize>1)
{
eigen_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || size<LhsPacketSize);
@@ -351,7 +315,7 @@ EIGEN_DONT_INLINE static void run(
Index rows, Index cols,
const LhsScalar* lhs, Index lhsStride,
const RhsScalar* rhs, Index rhsIncr,
ResScalar* res, Index resIncr,
ResScalar* res, Index resIncr,
ResScalar alpha);
};
@@ -365,7 +329,6 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,RowMajor,Co
{
EIGEN_UNUSED_VARIABLE(rhsIncr);
eigen_internal_assert(rhsIncr==1);
#ifdef _EIGEN_ACCUMULATE_PACKETS
#error _EIGEN_ACCUMULATE_PACKETS has already been defined
#endif
@@ -411,11 +374,6 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,RowMajor,Co
alignedSize = 0;
alignedStart = 0;
}
else if(LhsPacketSize > 4)
{
// TODO: extend the code to support aligned loads whenever possible when LhsPacketSize > 4.
alignmentPattern = NoneAligned;
}
else if (LhsPacketSize>1)
{
eigen_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || depth<LhsPacketSize);
@@ -453,7 +411,7 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,RowMajor,Co
Index rowBound = ((rows-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows;
for (Index i=skipRows; i<rowBound; i+=rowsAtOnce)
{
EIGEN_ALIGN_DEFAULT ResScalar tmp0 = ResScalar(0);
EIGEN_ALIGN16 ResScalar tmp0 = ResScalar(0);
ResScalar tmp1 = ResScalar(0), tmp2 = ResScalar(0), tmp3 = ResScalar(0);
// this helps the compiler generating good binary code
@@ -562,7 +520,7 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,RowMajor,Co
{
for (Index i=start; i<end; ++i)
{
EIGEN_ALIGN_DEFAULT ResScalar tmp0 = ResScalar(0);
EIGEN_ALIGN16 ResScalar tmp0 = ResScalar(0);
ResPacket ptmp0 = pset1<ResPacket>(tmp0);
const LhsScalar* lhs0 = lhs + i*lhsStride;
// process first unaligned result's coeffs

View File

@@ -73,13 +73,13 @@ namespace internal {
template<typename Index> struct GemmParallelInfo
{
GemmParallelInfo() : sync(-1), users(0), lhs_start(0), lhs_length(0) {}
GemmParallelInfo() : sync(-1), users(0), rhs_start(0), rhs_length(0) {}
int volatile sync;
int volatile users;
Index lhs_start;
Index lhs_length;
Index rhs_start;
Index rhs_length;
};
template<bool Condition, typename Functor, typename Index>
@@ -107,7 +107,7 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos
if((!Condition) || (omp_get_num_threads()>1))
return func(0,rows, 0,cols);
Index size = transpose ? rows : cols;
Index size = transpose ? cols : rows;
// 2- compute the maximal number of threads from the size of the product:
// FIXME this has to be fine tuned
@@ -125,26 +125,30 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos
if(transpose)
std::swap(rows,cols);
Index blockCols = (cols / threads) & ~Index(0x3);
Index blockRows = (rows / threads);
blockRows = (blockRows/Functor::Traits::mr)*Functor::Traits::mr;
GemmParallelInfo<Index>* info = new GemmParallelInfo<Index>[threads];
#pragma omp parallel num_threads(threads)
{
Index i = omp_get_thread_num();
// Note that the actual number of threads might be lower than the number of request ones.
Index actual_threads = omp_get_num_threads();
Index blockCols = (cols / actual_threads) & ~Index(0x3);
Index blockRows = (rows / actual_threads) & ~Index(0x7);
Index r0 = i*blockRows;
Index actualBlockRows = (i+1==threads) ? rows-r0 : blockRows;
Index actualBlockRows = (i+1==actual_threads) ? rows-r0 : blockRows;
Index c0 = i*blockCols;
Index actualBlockCols = (i+1==threads) ? cols-c0 : blockCols;
Index actualBlockCols = (i+1==actual_threads) ? cols-c0 : blockCols;
info[i].lhs_start = r0;
info[i].lhs_length = actualBlockRows;
info[i].rhs_start = c0;
info[i].rhs_length = actualBlockCols;
if(transpose) func(c0, actualBlockCols, 0, rows, info);
else func(0, rows, c0, actualBlockCols, info);
if(transpose)
func(0, cols, r0, actualBlockRows, info);
else
func(r0, actualBlockRows, 0,cols, info);
}
delete[] info;

View File

@@ -15,7 +15,7 @@ namespace Eigen {
namespace internal {
// pack a selfadjoint block diagonal for use with the gebp_kernel
template<typename Scalar, typename Index, int Pack1, int Pack2_dummy, int StorageOrder>
template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder>
struct symm_pack_lhs
{
template<int BlockRows> inline
@@ -45,32 +45,25 @@ struct symm_pack_lhs
}
void operator()(Scalar* blockA, const Scalar* _lhs, Index lhsStride, Index cols, Index rows)
{
enum { PacketSize = packet_traits<Scalar>::size };
const_blas_data_mapper<Scalar,Index,StorageOrder> lhs(_lhs,lhsStride);
Index count = 0;
//Index peeled_mc3 = (rows/Pack1)*Pack1;
const Index peeled_mc3 = Pack1>=3*PacketSize ? (rows/(3*PacketSize))*(3*PacketSize) : 0;
const Index peeled_mc2 = Pack1>=2*PacketSize ? peeled_mc3+((rows-peeled_mc3)/(2*PacketSize))*(2*PacketSize) : 0;
const Index peeled_mc1 = Pack1>=1*PacketSize ? (rows/(1*PacketSize))*(1*PacketSize) : 0;
if(Pack1>=3*PacketSize)
for(Index i=0; i<peeled_mc3; i+=3*PacketSize)
pack<3*PacketSize>(blockA, lhs, cols, i, count);
if(Pack1>=2*PacketSize)
for(Index i=peeled_mc3; i<peeled_mc2; i+=2*PacketSize)
pack<2*PacketSize>(blockA, lhs, cols, i, count);
if(Pack1>=1*PacketSize)
for(Index i=peeled_mc2; i<peeled_mc1; i+=1*PacketSize)
pack<1*PacketSize>(blockA, lhs, cols, i, count);
Index peeled_mc = (rows/Pack1)*Pack1;
for(Index i=0; i<peeled_mc; i+=Pack1)
{
pack<Pack1>(blockA, lhs, cols, i, count);
}
if(rows-peeled_mc>=Pack2)
{
pack<Pack2>(blockA, lhs, cols, peeled_mc, count);
peeled_mc += Pack2;
}
// do the same with mr==1
for(Index i=peeled_mc1; i<rows; i++)
for(Index i=peeled_mc; i<rows; i++)
{
for(Index k=0; k<i; k++)
blockA[count++] = lhs(i, k); // normal
blockA[count++] = lhs(i, k); // normal
blockA[count++] = numext::real(lhs(i, i)); // real (diagonal)
@@ -89,8 +82,7 @@ struct symm_pack_rhs
Index end_k = k2 + rows;
Index count = 0;
const_blas_data_mapper<Scalar,Index,StorageOrder> rhs(_rhs,rhsStride);
Index packet_cols8 = nr>=8 ? (cols/8) * 8 : 0;
Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0;
Index packet_cols = (cols/nr)*nr;
// first part: normal case
for(Index j2=0; j2<k2; j2+=nr)
@@ -99,151 +91,79 @@ struct symm_pack_rhs
{
blockB[count+0] = rhs(k,j2+0);
blockB[count+1] = rhs(k,j2+1);
if (nr>=4)
if (nr==4)
{
blockB[count+2] = rhs(k,j2+2);
blockB[count+3] = rhs(k,j2+3);
}
if (nr>=8)
{
blockB[count+4] = rhs(k,j2+4);
blockB[count+5] = rhs(k,j2+5);
blockB[count+6] = rhs(k,j2+6);
blockB[count+7] = rhs(k,j2+7);
}
count += nr;
}
}
// second part: diagonal block
Index end8 = nr>=8 ? (std::min)(k2+rows,packet_cols8) : k2;
if(nr>=8)
for(Index j2=k2; j2<(std::min)(k2+rows,packet_cols); j2+=nr)
{
for(Index j2=k2; j2<end8; j2+=8)
// again we can split vertically in three different parts (transpose, symmetric, normal)
// transpose
for(Index k=k2; k<j2; k++)
{
// again we can split vertically in three different parts (transpose, symmetric, normal)
// transpose
for(Index k=k2; k<j2; k++)
blockB[count+0] = numext::conj(rhs(j2+0,k));
blockB[count+1] = numext::conj(rhs(j2+1,k));
if (nr==4)
{
blockB[count+0] = numext::conj(rhs(j2+0,k));
blockB[count+1] = numext::conj(rhs(j2+1,k));
blockB[count+2] = numext::conj(rhs(j2+2,k));
blockB[count+3] = numext::conj(rhs(j2+3,k));
blockB[count+4] = numext::conj(rhs(j2+4,k));
blockB[count+5] = numext::conj(rhs(j2+5,k));
blockB[count+6] = numext::conj(rhs(j2+6,k));
blockB[count+7] = numext::conj(rhs(j2+7,k));
count += 8;
}
// symmetric
Index h = 0;
for(Index k=j2; k<j2+8; k++)
{
// normal
for (Index w=0 ; w<h; ++w)
blockB[count+w] = rhs(k,j2+w);
blockB[count+h] = numext::real(rhs(k,k));
// transpose
for (Index w=h+1 ; w<8; ++w)
blockB[count+w] = numext::conj(rhs(j2+w,k));
count += 8;
++h;
}
// normal
for(Index k=j2+8; k<end_k; k++)
{
blockB[count+0] = rhs(k,j2+0);
blockB[count+1] = rhs(k,j2+1);
blockB[count+2] = rhs(k,j2+2);
blockB[count+3] = rhs(k,j2+3);
blockB[count+4] = rhs(k,j2+4);
blockB[count+5] = rhs(k,j2+5);
blockB[count+6] = rhs(k,j2+6);
blockB[count+7] = rhs(k,j2+7);
count += 8;
}
count += nr;
}
}
if(nr>=4)
{
for(Index j2=end8; j2<(std::min)(k2+rows,packet_cols4); j2+=4)
// symmetric
Index h = 0;
for(Index k=j2; k<j2+nr; k++)
{
// again we can split vertically in three different parts (transpose, symmetric, normal)
// transpose
for(Index k=k2; k<j2; k++)
{
blockB[count+0] = numext::conj(rhs(j2+0,k));
blockB[count+1] = numext::conj(rhs(j2+1,k));
blockB[count+2] = numext::conj(rhs(j2+2,k));
blockB[count+3] = numext::conj(rhs(j2+3,k));
count += 4;
}
// symmetric
Index h = 0;
for(Index k=j2; k<j2+4; k++)
{
// normal
for (Index w=0 ; w<h; ++w)
blockB[count+w] = rhs(k,j2+w);
blockB[count+h] = numext::real(rhs(k,k));
// transpose
for (Index w=h+1 ; w<4; ++w)
blockB[count+w] = numext::conj(rhs(j2+w,k));
count += 4;
++h;
}
// normal
for(Index k=j2+4; k<end_k; k++)
for (Index w=0 ; w<h; ++w)
blockB[count+w] = rhs(k,j2+w);
blockB[count+h] = numext::real(rhs(k,k));
// transpose
for (Index w=h+1 ; w<nr; ++w)
blockB[count+w] = numext::conj(rhs(j2+w,k));
count += nr;
++h;
}
// normal
for(Index k=j2+nr; k<end_k; k++)
{
blockB[count+0] = rhs(k,j2+0);
blockB[count+1] = rhs(k,j2+1);
if (nr==4)
{
blockB[count+0] = rhs(k,j2+0);
blockB[count+1] = rhs(k,j2+1);
blockB[count+2] = rhs(k,j2+2);
blockB[count+3] = rhs(k,j2+3);
count += 4;
}
count += nr;
}
}
// third part: transposed
if(nr>=8)
for(Index j2=k2+rows; j2<packet_cols; j2+=nr)
{
for(Index j2=k2+rows; j2<packet_cols8; j2+=8)
for(Index k=k2; k<end_k; k++)
{
for(Index k=k2; k<end_k; k++)
blockB[count+0] = numext::conj(rhs(j2+0,k));
blockB[count+1] = numext::conj(rhs(j2+1,k));
if (nr==4)
{
blockB[count+0] = numext::conj(rhs(j2+0,k));
blockB[count+1] = numext::conj(rhs(j2+1,k));
blockB[count+2] = numext::conj(rhs(j2+2,k));
blockB[count+3] = numext::conj(rhs(j2+3,k));
blockB[count+4] = numext::conj(rhs(j2+4,k));
blockB[count+5] = numext::conj(rhs(j2+5,k));
blockB[count+6] = numext::conj(rhs(j2+6,k));
blockB[count+7] = numext::conj(rhs(j2+7,k));
count += 8;
}
}
}
if(nr>=4)
{
for(Index j2=(std::max)(packet_cols8,k2+rows); j2<packet_cols4; j2+=4)
{
for(Index k=k2; k<end_k; k++)
{
blockB[count+0] = numext::conj(rhs(j2+0,k));
blockB[count+1] = numext::conj(rhs(j2+1,k));
blockB[count+2] = numext::conj(rhs(j2+2,k));
blockB[count+3] = numext::conj(rhs(j2+3,k));
count += 4;
}
count += nr;
}
}
// copy the remaining columns one at a time (=> the same with nr==1)
for(Index j2=packet_cols4; j2<cols; ++j2)
for(Index j2=packet_cols; j2<cols; ++j2)
{
// transpose
Index half = (std::min)(end_k,j2);
@@ -341,10 +261,11 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,t
// kc must smaller than mc
kc = (std::min)(kc,mc);
std::size_t sizeB = kc*cols;
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
std::size_t sizeB = sizeW + kc*cols;
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
Scalar* blockB = allocatedBlockB;
Scalar* blockB = allocatedBlockB + sizeW;
gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
@@ -427,10 +348,11 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,f
Index mc = rows; // cache block size along the M direction
Index nc = cols; // cache block size along the N direction
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
std::size_t sizeB = kc*cols;
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
std::size_t sizeB = sizeW + kc*cols;
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
Scalar* blockB = allocatedBlockB;
Scalar* blockB = allocatedBlockB + sizeW;
gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
@@ -500,11 +422,11 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>
NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsIsUpper,bool(RhsBlasTraits::NeedToConjugate)),
internal::traits<Dest>::Flags&RowMajorBit ? RowMajor : ColMajor>
::run(
lhs.rows(), rhs.cols(), // sizes
&lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
&rhs.coeffRef(0,0), rhs.outerStride(), // rhs info
&dst.coeffRef(0,0), dst.outerStride(), // result info
actualAlpha // alpha
lhs.rows(), rhs.cols(), // sizes
&lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
&rhs.coeffRef(0,0), rhs.outerStride(), // rhs info
&dst.coeffRef(0,0), dst.outerStride(), // result info
actualAlpha // alpha
);
}
};

View File

@@ -113,9 +113,9 @@ EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrd
for (size_t i=starti; i<alignedStart; ++i)
{
res[i] += cj0.pmul(A0[i], t0) + cj0.pmul(A1[i],t1);
t2 += cj1.pmul(A0[i], rhs[i]);
t3 += cj1.pmul(A1[i], rhs[i]);
res[i] += t0 * A0[i] + t1 * A1[i];
t2 += numext::conj(A0[i]) * rhs[i];
t3 += numext::conj(A1[i]) * rhs[i];
}
// Yes this an optimization for gcc 4.3 and 4.4 (=> huge speed up)
// gcc 4.2 does this optimization automatically.
@@ -218,7 +218,7 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
if(!EvalToDest)
{
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
Index size = dest.size();
int size = dest.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
MappedDest(actualDestPtr, dest.size()) = dest;
@@ -227,7 +227,7 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
if(!UseRhs)
{
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
Index size = rhs.size();
int size = rhs.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, rhs.size()) = rhs;

View File

@@ -125,9 +125,11 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
std::size_t sizeA = kc*mc;
std::size_t sizeB = kc*cols;
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW());
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer;
triangularBuffer.setZero();
@@ -185,7 +187,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
pack_lhs(blockA, triangularBuffer.data(), triangularBuffer.outerStride(), actualPanelWidth, actualPanelWidth);
gebp_kernel(res+startBlock, resStride, blockA, blockB, actualPanelWidth, actualPanelWidth, cols, alpha,
actualPanelWidth, actual_kc, 0, blockBOffset);
actualPanelWidth, actual_kc, 0, blockBOffset, blockW);
// GEBP with remaining micro panel
if (lengthTarget>0)
@@ -195,7 +197,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
pack_lhs(blockA, &lhs(startTarget,startBlock), lhsStride, actualPanelWidth, lengthTarget);
gebp_kernel(res+startTarget, resStride, blockA, blockB, lengthTarget, actualPanelWidth, cols, alpha,
actualPanelWidth, actual_kc, 0, blockBOffset);
actualPanelWidth, actual_kc, 0, blockBOffset, blockW);
}
}
}
@@ -209,7 +211,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
gemm_pack_lhs<Scalar, Index, Traits::mr,Traits::LhsProgress, LhsStorageOrder,false>()
(blockA, &lhs(i2, actual_k2), lhsStride, actual_kc, actual_mc);
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha, -1, -1, 0, 0);
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha, -1, -1, 0, 0, blockW);
}
}
}
@@ -263,10 +265,12 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
std::size_t sizeA = kc*mc;
std::size_t sizeB = kc*cols+EIGEN_ALIGN_BYTES/sizeof(Scalar);
std::size_t sizeB = kc*cols;
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW());
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer;
triangularBuffer.setZero();
@@ -300,7 +304,6 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
Index ts = (IsLower && actual_k2>=cols) ? 0 : actual_kc;
Scalar* geb = blockB+ts*ts;
geb = geb + internal::first_aligned(geb,EIGEN_ALIGN_BYTES/sizeof(Scalar));
pack_rhs(geb, &rhs(actual_k2,IsLower ? 0 : k2), rhsStride, actual_kc, rs);
@@ -354,13 +357,14 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
actual_mc, panelLength, actualPanelWidth,
alpha,
actual_kc, actual_kc, // strides
blockOffset, blockOffset);// offsets
blockOffset, blockOffset,// offsets
blockW); // workspace
}
}
gebp_kernel(res+i2+(IsLower ? 0 : k2)*resStride, resStride,
blockA, geb, actual_mc, actual_kc, rs,
alpha,
-1, -1, 0, 0);
-1, -1, 0, 0, blockW);
}
}
}

Some files were not shown because too many files have changed in this diff Show More