Compare commits

...

638 Commits

Author SHA1 Message Date
Gael Guennebaud
f9303cc7c5 bump to 3.3-alpha1 2015-09-04 17:26:36 +02:00
Gael Guennebaud
b20a55a608 Workaround wrong instanciation made by VS2010 2015-09-04 15:25:58 +02:00
Gael Guennebaud
ed265258e4 Fix returned index type of inner iterators of sparse blocks. 2015-09-03 15:07:35 +02:00
Gael Guennebaud
a835dfca73 InnerIterator::index() should really return a StorageIndex 2015-09-03 14:53:51 +02:00
Gael Guennebaud
941a99ac1a Add a few missing EIGEN_DEVICE_FUNC declarations 2015-09-03 14:14:54 +02:00
Gael Guennebaud
d91db41a31 Fix documentation example 2015-09-03 14:14:14 +02:00
Gael Guennebaud
3942db9d7c Use inline versus static free functions. 2015-09-03 13:42:54 +02:00
Doug Kwan
5c9ee73eb9 Implement plog and pexp for AltiVec. 2015-07-30 11:12:42 -07:00
Gael Guennebaud
5a1cc5d24c bug #1053: fix SuplerLU::solve with EIGEN_DEFAULT_TO_ROW_MAJOR 2015-09-03 11:25:36 +02:00
Gael Guennebaud
2795ffd6a0 Fix Index vs StorageIndex naming convention 2015-09-03 11:18:27 +02:00
Gael Guennebaud
ef2b54f422 Fix AMD ordering when a column has only one off-diagonal non-zero (also fix bug #1045) 2015-09-03 11:04:06 +02:00
Christoph Hertzberg
5ad7981f73 Use full packet size for Dynamic-sized objects (otherwise, the unalignedcount unit test fails with AVX enabled) 2015-09-02 22:51:43 +02:00
Gael Guennebaud
aa768add0b Since there is no reason for evaluators to be nested by reference, let's remove the evaluator<>::nestedType indirection. 2015-09-02 22:10:39 +02:00
Gael Guennebaud
51455824ea Fix AlignedVector3 wrt previous change 2015-09-02 21:51:58 +02:00
Gael Guennebaud
f8976fdbe0 Make evaluators non-copyable. This guarantee that evaluators storing temporaries do not introduce unwanted copy overhead. 2015-09-02 21:39:49 +02:00
Gael Guennebaud
92b9f0e102 Cleaning pass on evaluators: remove the useless and error prone evaluator<>::type indirection. 2015-09-02 21:38:40 +02:00
Gael Guennebaud
cda55ab245 Fix compilation of cuda unit test 2015-09-02 16:59:07 +02:00
Gael Guennebaud
14458ec0a0 Fix packetmath unit test for exp and log 2015-09-02 15:47:58 +02:00
Gael Guennebaud
6b99afa5ae Fix LSCG::solve with a sparse destination. 2015-09-02 15:34:03 +02:00
Gael Guennebaud
b5ad3d2cf7 Remove deprecated Flagged expression. 2015-09-02 14:53:50 +02:00
Gael Guennebaud
6522c3a6f0 Add regression test for bug #817 2015-09-02 13:16:03 +02:00
Gael Guennebaud
be5e2ecc21 bug #505: add more examples of bad and correct usages of auto and eval(). 2015-09-02 13:04:30 +02:00
Gael Guennebaud
aba8c9ee17 Add a documentation page for common pitfalls 2015-09-02 11:23:55 +02:00
Gael Guennebaud
a75616887e bug #1057: fix a declaration missmatch with MSVC 2015-09-02 09:31:32 +02:00
Gael Guennebaud
280f93ff65 Fix FullPivLU::image documentation 2015-09-02 09:19:27 +02:00
Gael Guennebaud
6059188f9d Simplify implementation of the evaluation's iterator of Sparse*Diagonal products to help the compiler to generate better code. 2015-09-01 22:34:30 +02:00
Gael Guennebaud
0b2412df50 Remove duplicated temporary in Sparse to Sparse assignment 2015-09-01 22:31:30 +02:00
Gael Guennebaud
9001f4a46b Add missing specialization of evaluator of sub-sparse-matrices that can be seen as a SparseCompressedBase. This changeset enable faster iterator for such expressions. 2015-09-01 22:29:17 +02:00
Benoit Steiner
f41831e445 Added support for argmax/argmin 2015-08-31 08:18:53 -07:00
Benoit Steiner
2ab603316a Use numext::mini/numext::maxi instead of std::min/std::max in the tensor code 2015-08-28 08:14:15 -07:00
Benoit Steiner
2ed1495eec nvcc doesn't support std::min or std::max on GPU. Use our own custom implementation instead 2015-08-27 16:59:55 -07:00
Sergiu Dotenco
d4c24eb016 fixed Quaternion identity initialization for non-implicitly convertible types 2015-08-20 20:55:37 +02:00
Christoph Hertzberg
78358a7241 Fixed broken commit a09cfe650f
. Missing } and unprotected min/max calls and definitions.
2015-08-22 15:03:16 +02:00
Benoit Steiner
a09cfe650f std::numeric_limits doesn't work reliably on CUDA devices. Use our own definition of numeric_limit<T>::max() and numeric_limit<T>::min() instead of the stl ones. 2015-08-21 16:01:40 -07:00
Christoph Hertzberg
e5c78d85c8 bug #1043: Avoid integer conversion sign warning 2015-08-19 21:50:21 +02:00
Christoph Hertzberg
1bdd06a199 Fix some trivial warnings 2015-08-19 21:38:18 +02:00
Christoph Hertzberg
0721690dbb Use standard include syntax in Tensor module (<> for include-path and "" for relative path) 2015-08-18 14:34:00 +02:00
Christoph Hertzberg
8097d8d028 surpress some warnings 2015-08-17 21:50:52 +02:00
Christoph Hertzberg
d2e0927127 Define EIGEN_MAX_STATIC_ALIGN_BYTES to 0 for architectures that don't require stack alignment 2015-08-17 16:44:52 +02:00
Gael Guennebaud
dc2c103b3b merge 2015-08-16 14:22:02 +02:00
Christoph Hertzberg
d6a4805fdf Protect further isnan/isfinite/isinf calls 2015-08-16 14:00:02 +02:00
Christoph Hertzberg
a40f6ab276 Merged in ITimer/eigen (pull request PR-133)
[Doc] Fix a spelling error in TopicMultithreading.dox
2015-08-14 17:46:57 +02:00
Christoph Hertzberg
61e0977e10 Protect all calls to isnan, isinf and isfinite with parentheses. 2015-08-14 17:32:34 +02:00
Christoph Hertzberg
712e2fed17 bug #829: Introduce macro EIGEN_HAS_CXX11_CONTAINERS and do not specialize std-containers if it is enabled. 2015-08-14 16:09:48 +02:00
Christoph Hertzberg
a5d1bb2be8 bug #1054: Use set(EIGEN_CXX_FLAG_VERSION "/version") only for Intel compilers on Windows.
Also removed code calling `head -n1` and always use integrated REGEX functionality.
2015-08-14 15:30:59 +02:00
ITimer
93635cafee Fixed a spelling error 2015-08-10 15:11:10 +08:00
Gael Guennebaud
23aab82c0c merge 2015-08-09 21:24:20 +02:00
Gael Guennebaud
0d5e673baa Fix Tensor module wrt nullary functor recent change 2015-08-09 21:20:24 +02:00
Christoph Hertzberg
cac6b23033 bug #1053: SparseLU failed with EIGEN_DEFAULT_TO_ROW_MAJOR 2015-08-07 23:10:56 +02:00
Gael Guennebaud
febcce34f1 Enable vectorization with half-packets 2015-08-07 20:05:31 +02:00
Gael Guennebaud
6245591349 Fix prototype of plset and generalize linspace functor. 2015-08-07 19:27:59 +02:00
Gael Guennebaud
60e4260d0d Some functors were not generic wrt packet-type. 2015-08-07 17:41:39 +02:00
Gael Guennebaud
e68c7b8368 Include SSE packetmath when AVX is enabled, and enable AVX's sine function only in fast-math mode (as SSE) 2015-08-07 17:40:39 +02:00
Gael Guennebaud
65bfa5fce7 Allow to use arbitrary packet-types during evaluation.
This is implemented by adding a PacketType template parameter to packet and writePacket members of evaluator<>.
2015-08-07 12:01:39 +02:00
Gael Guennebaud
3602926ed5 Mark ALignedBit as deprecated. 2015-08-07 10:45:02 +02:00
Gael Guennebaud
ce57dbd937 Let unpacket_traits<> exposes the required alignment and make use of it everywhere 2015-08-07 10:44:01 +02:00
Gael Guennebaud
2afdef6a54 Generalize first_aligned to take the requested alignment as a template parameter, and add a first_default_aligned variante calling first_aligned with the requirement of the largest packet for the given scalar type. 2015-08-06 17:52:01 +02:00
Gael Guennebaud
1f5024332e First part of a big refactoring of alignment control to enable the handling of arbitrarily aligned buffers. It includes:
- AlignedBit flag is deprecated. Alignment is now specified by the evaluator through the 'Alignment' enum, e.g., evaluator<Xpr>::Alignment. Its value is in Bytes.
 - Add several enums to specify alignment: Aligned8, Aligned16, Aligned32, Aligned64, Aligned128. AlignedMax corresponds to EIGEN_MAX_ALIGN_BYTES. Such enums are used to define the above Alignment value, and as the 'Options' template parameter of Map<> and Ref<>.
 - The Aligned enum is now deprecated. It is now an alias for Aligned16.
 - Currently, traits<Matrix<>>, traits<Array<>>, traits<Ref<>>, traits<Map<>>, and traits<Block<>> also expose the Alignment enum.
2015-08-06 15:31:07 +02:00
Gael Guennebaud
65186ef18d Fix logic in compute_default_alignment, extend it to Dynamic size, and move it to XprHelper.h file. 2015-08-06 14:07:59 +02:00
Gael Guennebaud
becd89df29 Enable runtime stack alignment in gemm_blocking_space. 2015-08-06 14:00:26 +02:00
Gael Guennebaud
d4f5efc51a Add a EIGEN_DEFAULT_ALIGN_BYTES macro defining default alignment for alloca and aligned_malloc.
It is defined as the max of EIGEN_IDEAL_MAX_ALIGN_BYTES and EIGEN_MAX_ALIGN_BYTES
2015-08-06 13:56:53 +02:00
Gael Guennebaud
7e0d7a76b8 Remove dense nested loops in IncompleteCholesky 2015-08-04 18:01:38 +02:00
Gael Guennebaud
e31fc50280 Numerous fixes for IncompleteCholesky. Still have to make it fully exploit the sparse structure of the L factor, and improve robustness to illconditionned problems. 2015-08-04 16:16:02 +02:00
Gael Guennebaud
9a4713e505 Add a unit test for IncompleteCholesky 2015-08-04 16:14:06 +02:00
Gael Guennebaud
506964fc29 Propagate precondition info to the iterative solver. 2015-08-04 16:13:34 +02:00
Gael Guennebaud
db0f5c9d90 Fix conversion warning 2015-08-04 16:12:44 +02:00
Gael Guennebaud
b986c147cd Fix ForceNonZeroDiag for complexes 2015-08-04 16:12:16 +02:00
Benoit Steiner
cbce0e3b12 Fixed compilation warning 2015-08-03 21:52:29 -07:00
Benoit Steiner
a5dc49e7e8 Fixed 2 compilation warnings generated by llvm 2015-07-29 15:06:08 -07:00
Benoit Steiner
e1d28b7ea7 Added a test for shuffling 2015-07-29 15:01:21 -07:00
Benoit Steiner
0570594f2c Fixed a few compilation warnings triggered by clang 2015-07-29 11:48:38 -07:00
Benoit Steiner
099597406f Simplified and generalized the DividerTraits code 2015-07-29 10:02:42 -07:00
Gael Guennebaud
6db3a557f4 Add missing specialization of struct DividerTraits<long> 2015-07-29 11:38:53 +02:00
Gael Guennebaud
aec4814370 Many files were missing in previous changeset. 2015-07-29 11:11:23 +02:00
Gael Guennebaud
f7d5b9323d typo 2015-07-29 11:08:49 +02:00
Gael Guennebaud
175ed636ea bug #973: update macro-level control of alignement by introducing user-controllable EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES macros. This changeset also removes EIGEN_ALIGN (replaced by EIGEN_MAX_ALIGN_BYTES>0), EIGEN_ALIGN_STATICALLY (replaced by EIGEN_MAX_STATIC_ALIGN_BYTES>0), EIGEN_USER_ALIGN*, EIGEN_ALIGN_DEFAULT (replaced by EIGEN_ALIGN_MAX). 2015-07-29 10:22:25 +02:00
Gael Guennebaud
76874b128e bug #1047: document the structure layout of class Matrix 2015-07-29 10:21:28 +02:00
Gael Guennebaud
41e1f3498c bug #1048: fix unused variable warning 2015-07-28 22:59:50 +02:00
Benoit Steiner
b9db19aec4 Pulled latest updates from trunk. 2015-07-27 09:39:57 -07:00
Benoit Steiner
f84417d97b Removed an incorrect assertion. 2015-07-27 09:25:22 -07:00
Benoit Steiner
1a30a8e7a2 Merged in godeffroy/eigen_tensor_generalized_contraction (pull request PR-130)
Allowed tensor contraction operation with an empty array of dimension pairs, which performs a tensor product.
2015-07-27 09:19:35 -07:00
Christoph Hertzberg
a44d022caf bug #792: SparseLU::factorize failed for structurally rank deficient matrices 2015-07-26 20:30:30 +02:00
Godeffroy Valet
2195822df6 Allowed tensor contraction operation with an empty array of dimension pairs, which performs a tensor product. 2015-07-25 11:58:36 +02:00
Benoit Steiner
f6282e451a Fixed a typo in an assertion. 2015-07-24 17:35:47 -07:00
Benoit Steiner
4b3052c54d Pulled latest update from trunk 2015-07-23 08:47:33 -07:00
Benoit Steiner
a446020b78 Reenable 2 tests previously disabled by mistake 2015-07-23 08:47:00 -07:00
Christoph Hertzberg
3d951df223 Re-enabled unit tests which were disabled in commit 4200bdec24
.
2015-07-23 10:55:03 +02:00
Benoit Steiner
6d6e6d0b88 Define EIGEN_VECTORIZE_AVX2 and EIGEN_VECTORIZE_FMA when the corresponding instructions can be used by the compiler 2015-07-22 18:22:16 -07:00
Benoit Steiner
ce65c2922a Pulled latest updates from trunk 2015-07-22 18:12:16 -07:00
Benoit Steiner
4200bdec24 Extended the range of value inputs for TensorIntDiv to support tensors with more than 4 billion elements. 2015-07-22 17:02:30 -07:00
Gael Guennebaud
3b0ad02c10 Remove wrongly pushed debugging statements 2015-07-22 14:33:57 +02:00
Jonas Adler
815fa0dbf6 Fixed some compiler bugs in NVCC, now compiles with CUDA.
(chtz: Manually joined sevaral commits to keep the history clean)
2015-07-22 12:29:18 +02:00
Benoit Steiner
d259b719d1 Made sure that the use const expressions are not enabled when compiling with nvcc even when gcc 4.9 is used as the host compiler. 2015-07-21 17:35:58 -07:00
Benoit Steiner
0dda72316f The eigen_check macro doesn't exist anymore: use assert instead 2015-07-21 17:34:15 -07:00
Gael Guennebaud
586d10f7e0 Fix compilation of tri(sparse) * dense with OpenMP 2015-07-21 22:52:21 +02:00
Gael Guennebaud
d3e5db9a80 add regression unit test for previous changeset 2015-07-21 22:23:17 +02:00
Valentin Roussellet
5e635f9ca1 AlignedVector3 accepts implicit conversions from more operators. 2015-07-21 16:42:52 +00:00
Gael Guennebaud
45ee14a13a Fix output of relative error, and add more support for long double 2015-07-21 22:22:12 +02:00
Gael Guennebaud
87f3e533f5 bug #1036: implement verify_is_approx_upto_permutation through a combinatorial search.
The previous implementation was subject to numerical cancellation issues.
2015-07-20 15:34:06 +02:00
Gael Guennebaud
ab8b497a7e Add pow(scalar,array) in quick ref 2015-07-20 13:59:21 +02:00
Gael Guennebaud
6544b49e59 Generalize pow(x,e) such that x and e can be a different expression type or a scalar for either x or e. Add x.pow(e) with e an array expression. 2015-07-20 13:57:55 +02:00
Gael Guennebaud
2d93060291 Fix trivial warnings. 2015-07-20 13:55:48 +02:00
Gael Guennebaud
c11971de37 Fix compilation of isnan(complex) 2015-07-20 12:56:01 +02:00
Gael Guennebaud
88e352adac Add support for replicate in CUDA 2015-07-20 10:53:03 +02:00
Benoit Steiner
6799c26cd6 Fixed a typo in a test and a compilation warning 2015-07-17 16:50:47 -07:00
Benoit Steiner
7a39439904 Rewrote Eigen::dimensions_match to prevent a static assertion when the rank of the tensors is different. 2015-07-17 16:46:30 -07:00
Benoit Steiner
e94f9eb637 Fixed a const correctness issue in TensorLayoutSwap 2015-07-17 15:44:26 -07:00
Benoit Steiner
513e357b48 Added support for prefetching on cuda devices 2015-07-17 15:35:16 -07:00
Benoit Steiner
943035e5bd Pulled latest updates from trunk 2015-07-17 09:42:45 -07:00
Benoit Steiner
06a22ca5bd Added support for sigmoid function to the tensor module 2015-07-17 09:29:00 -07:00
Nicolas Mellado
3275eddc24 Add const getters for LM parameters 2015-07-17 09:11:49 +02:00
Benoit Steiner
979b73cebf Fixed a typo in Macro.h 2015-07-16 14:17:50 -07:00
Benoit Steiner
a5ec25f11c Use the new EIGEN_HAS_INDEX_LIST define to enable the cxx11_tensor_index_list tests 2015-07-16 13:16:08 -07:00
Benoit Steiner
7a243959b4 Define EIGEN_HAS_INDEX_LIST whenever the class is defined. This makes it easier to support compilers that are cxx11 compliant and compilers that aren't. 2015-07-16 13:14:18 -07:00
Benoit Steiner
b756f6af5e Added missing APIs to the Eigen::Sizes class 2015-07-16 12:14:18 -07:00
Benoit Steiner
05787f8367 Added support for tensor inflation. 2015-07-16 09:04:05 -07:00
Benoit Steiner
b900fe47d5 Avoid relying on a default value for the Vectorizable template parameter of the EvalRange functor 2015-07-15 17:17:04 -07:00
Benoit Steiner
4b3d697e12 Fixed compilation error in a cuda test 2015-07-15 17:14:24 -07:00
Benoit Steiner
8315e025e1 Updated the cuda tests to use the new GpuDevice constructor 2015-07-15 12:39:26 -07:00
Benoit Steiner
e892524efe Added support for multi gpu configuration to the GpuDevice class 2015-07-15 12:38:34 -07:00
Gael Guennebaud
f5aa640862 Clean some previous changes and more cuda fixes 2015-07-15 10:57:55 +02:00
Nicolas Mellado
7cecd39a84 Merged eigen/eigen into default 2015-07-15 10:15:54 +02:00
Nicolas Mellado
592ee2a4b4 Add missing EIGEN_DEVICE_FUNC 2015-07-15 10:14:52 +02:00
Gael Guennebaud
6527dbb9f8 Merged in emartin/eigen (pull request PR-123)
Modify GEMM to handle m=0, n=0, and k=0 cases.
2015-07-13 23:58:30 +02:00
Benoit Steiner
b80036abec Enabled the construction of a fixed sized tensor directly from an expression. 2015-07-13 11:16:37 -07:00
Benoit Steiner
3912ca0d53 Fixed a bug in the integer division code that caused some large numerators to be incorrectly handled 2015-07-13 11:14:59 -07:00
Christoph Hertzberg
ea87561564 bug #1039: Redefining EIGEN_DEFAULT_DENSE_INDEX_TYPE may lead to errors 2015-07-13 16:08:25 +02:00
Gael Guennebaud
b8df8815f4 Fix operator<<(ostream,AlignedVector3) 2015-07-13 13:55:59 +02:00
Eric Martin
002c2923c2 Modify GEMM to handle m=0, n=0, and k=0 cases. 2015-07-11 21:46:13 -05:00
Nicolas Mellado
dbb3e2cf8a Cleaning 2015-07-11 18:15:31 +00:00
Nicolas Mellado
0d09845562 Revert files to remove EIGEN_USING_NUMEXT_MATH 2015-07-11 20:11:36 +02:00
Nicolas Mellado
20b96025fd Replace double constants by Scalar constants 2015-07-11 20:02:30 +02:00
Nicolas Mellado
1dd6a329e8 Cuda compatibility: remove explicit call to std math functions 2015-07-11 19:40:15 +02:00
Nicolas Mellado
bc40eb745d Merged eigen/eigen into default 2015-07-11 19:33:43 +02:00
Benoit Steiner
e6297741c9 Added support for generation of random complex numbers on CUDA devices 2015-07-07 17:40:49 -07:00
Benoit Steiner
6de6fa9483 Use NumTraits<T>::RequireInitialization instead of internal::is_arithmetic<T>::value to check whether it's possible to bypass the type constructor in the tensor code. 2015-07-07 15:23:56 -07:00
Benoit Steiner
7b7df7b6b8 Updated internal::is_arithmetic::value to be true for complex numbers 2015-07-07 12:57:35 -07:00
Benoit Steiner
6e55284e51 Pulled latest changes from trunk 2015-07-07 08:54:37 -07:00
Benoit Steiner
a93af65938 Improved and cleaned up the 2d patch extraction code 2015-07-07 08:52:14 -07:00
Gael Guennebaud
7fa6fe8d8c typo 2015-07-07 17:47:24 +02:00
Gael Guennebaud
fa17358c4b Rotation2D: fix slerp to take the shortest path, and add convenient method to get the angle in [-pi,pi] or [0,pi] 2015-07-07 17:27:12 +02:00
Benoit Steiner
3f2101b03b Use numext::swap instead of std::swap 2015-07-06 17:02:29 -07:00
Benoit Steiner
0485a2468d use Eigen smart_copy instead of std::copy 2015-07-06 17:01:51 -07:00
Benoit Steiner
ebdacfc5ea Fixed a compilation warning generated by clang 2015-07-06 15:03:11 -07:00
Benoit Steiner
81f9e968fd Only attempt to use the texture path on GPUs when it's supported by CUDA 2015-07-06 13:32:38 -07:00
Nicolas Mellado
66b30728f8 Merged eigen/eigen into default 2015-07-06 20:58:31 +02:00
Nicolas Mellado
5359e5cdb2 Protect against compilation errors with nvcc and numext/complex.
Disable functions explicitely involving std::complex when compiling with nvcc.
Improve code compatilibity using the new macro EIGEN_USING_NUMEXT_MATH (same spirit than EIGEN_USING_STD_MATH but for numext functions)
2015-07-06 20:55:01 +02:00
Benoit Steiner
864318e508 Misc small fixes to the tensor slicing code. 2015-07-06 11:45:56 -07:00
Gael Guennebaud
c2019dfeb3 Merged in Emie/eigen (pull request PR-121)
typo correction in mathFunction
2015-07-06 16:48:54 +02:00
Emilie Guy
ea7113dd0c typo correction in mathFunction 2015-07-06 14:31:08 +02:00
Nicolas Mellado
9115896590 Merged eigen/eigen into default 2015-07-03 00:41:11 +02:00
Benoit Steiner
95ef94f1ee Fixed a typo in the patch 2015-07-02 07:06:55 +00:00
Benoit Steiner
8f1d547c92 Added a default value for the cuda stream in the GpuDevice constructor 2015-07-01 18:32:18 -07:00
Benoit Steiner
1e911b276c Misc improvements and optimizations 2015-07-01 13:59:11 -07:00
Benoit Steiner
4ed213f97b Improved a previous fix 2015-07-01 13:06:30 -07:00
Benoit Steiner
56e155dd60 Fixed a couple of mistakes in the previous commit. 2015-07-01 12:40:27 -07:00
Benoit Steiner
925d0d375a Enabled the vectorized evaluation of several tensor expressions that was previously disabled by mistake 2015-07-01 11:32:04 -07:00
Benoit Steiner
44eedd8915 Marked the cast functions as EIGEN_DEVICE_FUNC to ensure that we can run casting on GPUs 2015-06-30 15:48:55 -07:00
Benoit Steiner
6021b68d8b Silenced a compilation warning 2015-06-30 15:42:25 -07:00
Benoit Steiner
f1f480b116 Added support for user defined custom tensor op. 2015-06-30 15:36:29 -07:00
Benoit Steiner
dc31fcb9ba Added support for 3D patch extraction 2015-06-30 14:48:26 -07:00
Benoit Steiner
f587075987 Made ThreadPoolDevice inherit from a new pure abstract ThreadPoolInterface class: this enables users to leverage their existing threadpool when using eigen tensors. 2015-06-30 14:21:24 -07:00
Benoit Steiner
28b36632ec Turned Eigen::array::size into a function to make the code compatible with std::array 2015-06-30 13:23:05 -07:00
Benoit Steiner
109005c6c9 Added a test for multithreaded full reductions 2015-06-30 13:08:12 -07:00
Benoit Steiner
a4aa7c6217 Fixed a few compilation warnings 2015-06-30 10:36:17 -07:00
Benoit Steiner
7d41e97fa9 Silenced a number of compilation warnings 2015-06-29 14:47:40 -07:00
Benoit Steiner
fffe63045c Added a test for full reductions on GPU 2015-06-29 14:10:32 -07:00
Benoit Steiner
db9dbbda32 Improved performance of full reduction by 2 order of magnitude on CPU and 3 orders of magnitude on GPU 2015-06-29 14:06:32 -07:00
Benoit Steiner
f0ce85b757 Improved support for fixed size tensors 2015-06-29 14:04:15 -07:00
Benoit Steiner
670c71d906 Express the full reduction operations (such as sum, max, min) using TensorDimensionList 2015-06-29 11:30:36 -07:00
Benoit Steiner
d8098ee7d5 Added support for tanh function to the tensor code 2015-06-29 11:14:42 -07:00
Benoit Steiner
3625734bc8 Moved some utilities to TensorMeta.h to make it easier to reuse them accross several tensor operations.
Created the TensorDimensionList class to encode the list of all the dimensions of a tensor of rank n. This could be done using TensorIndexList, however TensorIndexList require cxx11 which isn't yet supported as widely as we'd like.
2015-06-29 10:49:55 -07:00
Gael Guennebaud
392a30db82 Use VERIFY_IS_EQUAL instead of VERIFY(a==b) to get more feedback in case of failure 2015-06-26 16:22:49 +02:00
Gael Guennebaud
c911fc8dee split compiler intensive bdcsvd_1 unit test 2015-06-26 16:14:23 +02:00
Gael Guennebaud
98ff17eb9e Add special path for matrix<complex>/real.
This also fixes underflow issues when scaling complex matrices through complex/complex operator.
2015-06-26 16:08:15 +02:00
Gael Guennebaud
e102ddbf1f bug #1026: fix infinite loop for an empty input 2015-06-26 14:02:52 +02:00
Gael Guennebaud
555b9c6843 Doc: explain perf and multithreading issues in sparse iterative solvers 2015-06-26 10:49:40 +02:00
Gael Guennebaud
53b930887d Enable OpenMP parallelization of row-major-sparse * dense products.
I observed significant speed-up of the CG solver.
2015-06-26 10:32:34 +02:00
Gael Guennebaud
3f49cf4c90 More msvc 2013/2015 workarounds 2015-06-26 09:07:53 +02:00
Gael Guennebaud
7f824dd613 Optimize CG to enable faster spare row-major * dense vector products when the input matrix is complete (Upper|Lower) but column major. 2015-06-25 17:17:38 +02:00
Gael Guennebaud
c5f9eafcbc Fix assignement to selfadjoint-view when testing real-world problems 2015-06-25 17:08:58 +02:00
Gael Guennebaud
33e699c9fe Remove redundant accessors in Reverse 2015-06-25 14:14:59 +02:00
Gael Guennebaud
6b4d255cab Avoid division by a zero complex 2015-06-25 14:04:05 +02:00
Gael Guennebaud
973b0a90db Clarify documentation of the tolerance and error returned in iterative solvers 2015-06-25 13:51:13 +02:00
Gael Guennebaud
84264ceebc workaround msvc 2013/2015 wrong instanciation of isnan, isfinite, isinf 2015-06-25 10:00:26 +02:00
Gael Guennebaud
b4ab72678c bug #1000: MSVC 2013 does need the operator= workaround 2015-06-25 09:45:22 +02:00
Gael Guennebaud
788941d3b1 Workaround MSVC ambiguous instanciation 2015-06-24 23:35:17 +02:00
Gael Guennebaud
4c8cd13b35 Add explicit ctor for diagonal to sparse conversion 2015-06-24 18:11:06 +02:00
Gael Guennebaud
c38c195321 Document how cross behaves on complex numbers 2015-06-24 18:02:33 +02:00
Gael Guennebaud
23535ed31c Add unit test for dense = SparseQR::matrixQ 2015-06-24 17:55:41 +02:00
Gael Guennebaud
62f21e2d11 Add support for sparse = diagonal 2015-06-24 17:55:00 +02:00
Gael Guennebaud
763c833637 Make SparseSelfAdjointView, twists, and SparseQR more evaluator friendly 2015-06-24 17:54:09 +02:00
Gael Guennebaud
36643eec0c Add a call_assignment_no_alias_no_transpose shortcut 2015-06-24 17:50:43 +02:00
Gael Guennebaud
02db7c9bc6 Inherit operator+= and -= with 'using' kkeyword 2015-06-24 17:49:20 +02:00
Gael Guennebaud
53a61a067b Fallback to CMAKE_CXX_COMPILER_VERSION if VS version unknown 2015-06-24 15:17:37 +02:00
Gael Guennebaud
95e19be381 Fix compilation of MKL Pardiso support 2015-06-24 14:53:43 +02:00
Gael Guennebaud
2a33075aeb std::isnan is c++11 only 2015-06-24 10:29:17 +02:00
Gael Guennebaud
23da99492f Add unit-test for Visual2013 ambiguous call to operator= 2015-06-24 10:27:02 +02:00
Benoit Steiner
6441befbb3 Added more checks to test the correctness of the pexp implementation 2015-06-23 19:12:46 -07:00
Gael Guennebaud
c3e398d138 Fix overflow when checking SVD accuracy 2015-06-23 15:05:20 +02:00
Gael Guennebaud
b0d08869a9 Fix underflow in 3x3 tridiagonalization 2015-06-23 14:54:31 +02:00
Gael Guennebaud
18c9d155f3 Fix the fact that float(int) != float(int(float(int))) 2015-06-23 14:33:00 +02:00
Gael Guennebaud
71523a2e25 Fix a warning with icc 2015-06-23 14:20:20 +02:00
Gael Guennebaud
d9778f3391 Enable VML's pow wrapper on windows (the previous wrapper used the Fortran interface) 2015-06-23 14:04:50 +02:00
Gael Guennebaud
5f9630d7f9 bug #923: update support for Intel's VML wrt new evaluation mechanisms 2015-06-23 14:03:25 +02:00
Gael Guennebaud
793e4c6d77 bug #923: fix EIGEN_USE_BLAS mode 2015-06-23 11:13:24 +02:00
Gael Guennebaud
307c4fc292 Workaround missalignment produced by first_aligned for PacketSize==1 and size==1 2015-06-23 10:10:17 +02:00
Gael Guennebaud
bb3a9b4941 Use Ref<> to bypass forceAlignmentIf 2015-06-22 17:48:28 +02:00
Gael Guennebaud
476beed7f8 bug #1017: apply Christoph's patch preventing underflows in makeHouseholder 2015-06-22 16:51:45 +02:00
Gael Guennebaud
9fc1c92137 Fix isinf unit tests 2015-06-22 16:48:27 +02:00
Gael Guennebaud
9c7cfa7dab Update list of main modules 2015-06-22 14:17:24 +02:00
Gael Guennebaud
3ccd23efc0 Update coeff-wise quick-reference doc. 2015-06-22 14:08:54 +02:00
Gael Guennebaud
0848ba0a6e Fix return nullary return types: it must be based on the PlainObject type instead of the expression type. 2015-06-22 10:52:08 +02:00
Gael Guennebaud
b3b3dcad05 Reduce compiler memory consumption for SVD unit tests 2015-06-22 09:58:06 +02:00
Nicolas Mellado
ad5fdc7ddd Fix double to Scalar unwanted promotions 2015-06-21 20:21:23 +02:00
Gael Guennebaud
40821876ea Fix regression on CompressedStorage::operator= 2015-06-20 13:59:13 +02:00
Michael Abrahams
7043083be4 Use GCC flags in mingw 2015-06-20 18:54:41 +00:00
Gael Guennebaud
84aaef93ba Merged in vanhoucke/eigen_vanhoucke (pull request PR-118)
Fix two small undefined behaviors caught by static analysis.
2015-06-20 13:56:48 +02:00
Gael Guennebaud
6b33b29f00 Get rid of must_nest_by_value 2015-06-19 18:12:40 +02:00
Gael Guennebaud
846b227bb7 Get rid of class internal::nested<> (still have to updated Tensor module) 2015-06-19 17:56:39 +02:00
vanhoucke
368ea23406 Fix undefined behavior. When resizing a default-constructed SparseArray, we end up calling memcpy(ptr, 0, 0), which is technically UB and gets caught by static analysis. 2015-06-19 15:53:30 +00:00
vanhoucke
4cc0c961f3 Fix undefined behavior. 2015-06-19 15:46:46 +00:00
Gael Guennebaud
386d9e5ebd Fix usage of nested versus nested_eval 2015-06-19 17:42:27 +02:00
Gael Guennebaud
a5a7b68b76 Fix ambiguous instanciation using clean class-level SFINAE in product_evaluator 2015-06-19 17:25:13 +02:00
Gael Guennebaud
6fc5438205 Remove a few deprecated internal expressions 2015-06-19 17:06:12 +02:00
Gael Guennebaud
e9edb085c0 Check number of temporaries when applying permutations 2015-06-19 16:39:24 +02:00
Gael Guennebaud
6318d53b41 Factorize VERIFY_EVALUATION_COUNT in unit tests 2015-06-19 16:38:26 +02:00
Gael Guennebaud
5c84dd5665 Fix permutation/transposiitons products wrt nested_eval 2015-06-19 16:37:04 +02:00
Gael Guennebaud
0c8b0e007b Introduce a AliasFreeProduct option for Permutations and Transpositions 2015-06-19 15:38:19 +02:00
Gael Guennebaud
3f6aa4cd5d Remove useless specializations of evaluator_traits 2015-06-19 14:18:29 +02:00
Gael Guennebaud
4a8888dfbc Improbe compatibility of Transpositions and evaluators 2015-06-19 14:10:44 +02:00
Gael Guennebaud
3af4c6c1c9 Make Transpositions use evaluators 2015-06-19 11:50:24 +02:00
Gael Guennebaud
82b6ac0864 Enforce eigenvectors to be column-major (for performance reasons) 2015-06-19 11:25:46 +02:00
Gael Guennebaud
fad36cc814 Clean implementation of permutation * matrix products. 2015-06-19 10:51:57 +02:00
Gael Guennebaud
06036d8bb1 Fix compilation of BDCSVD with DEFAULT_TO_ROWMAJOR 2015-06-19 10:37:25 +02:00
Gael Guennebaud
d2db15016b Fix storage order computation in traits<Product> 2015-06-19 10:36:38 +02:00
Benoit Steiner
6a9a29e96f Fixed a compilation warning 2015-06-17 10:14:13 -07:00
Gael Guennebaud
bb6acc561e Workaround broken complex*real product on old clang versions 2015-06-17 16:11:58 +02:00
Gael Guennebaud
40f326ef2e workaround clang's broken complex division 2015-06-17 15:33:09 +02:00
Benoit Steiner
ab5db86fe9 Fixed merge conflict 2015-06-16 19:52:20 -07:00
Benoit Steiner
ea160a898c Pulled latest updates from trunk 2015-06-16 19:46:23 -07:00
Benoit Steiner
367794e668 Fixed compilation warnings triggered by clang 2015-06-16 19:43:49 -07:00
Gael Guennebaud
736a805883 Add unit test for bug #879 2015-06-16 22:11:41 +02:00
Gael Guennebaud
9ab8ac5c8b Fix compilation in TensorImagePatch 2015-06-16 14:50:08 +02:00
Gael Guennebaud
38874b1651 Fix shadow warnings in Tensor module 2015-06-16 14:43:46 +02:00
Gael Guennebaud
e2e66930c6 Fix compilation of alignedvector3 unit test 2015-06-16 14:40:55 +02:00
Gael Guennebaud
7baa1ba03e Remove the usage of result_of for DenseBase::redux as discussed in bug #1006 2015-06-15 22:40:18 +02:00
Gael Guennebaud
97cbe28829 Remove support of std::binder* in C++11 2015-06-15 15:34:05 +02:00
Gael Guennebaud
972a535288 Remove aligned-on-scalar assert and fallback to non vectorized path at runtime (first_aligned already had this runtime guard) 2015-06-14 15:04:07 +02:00
Gael Guennebaud
e5b490b654 Fix isfinite/isinf/isnan code snippets 2015-06-15 15:09:25 +02:00
Gael Guennebaud
a546be56e0 typo 2015-06-15 15:08:51 +02:00
Gael Guennebaud
3946c981b1 Relax tolerance when testing LDLT on singular problems 2015-06-15 15:08:16 +02:00
Gael Guennebaud
2212e40e95 Extend VERIFY_IS_APPROX to report the magnitude of the relative difference in case of failure. This will ease identifying strongest failing tests 2015-06-15 15:03:19 +02:00
Gael Guennebaud
321a2cbe3d Add missing forward declaration of AlignedBox 2015-06-15 15:01:20 +02:00
Gael Guennebaud
2f2a441a4d Fix use of unitialized buffers. 2015-06-13 22:19:40 +02:00
Gael Guennebaud
91b64a9c65 Relax aligned-on-scalar assert as in the 3.2 branch 2015-06-12 11:25:57 +02:00
Gael Guennebaud
84d103bee8 Enable C++11 math function in a more conservative manner. 2015-06-11 21:45:02 +02:00
Gael Guennebaud
916ef52fff merge 2015-06-11 09:35:49 +02:00
Gael Guennebaud
d93ba137f2 Introduce EIGEN_PI, get rid of M_PI and acos(-1.0) 2015-06-10 17:12:10 +02:00
Gael Guennebaud
9756c7fb4d Make more use of EIGEN_HAS_C99_MATH 2015-06-10 16:26:55 +02:00
Gael Guennebaud
93a62265dc fix isinf(complex(inf,NaN)) to return false. 2015-06-10 16:19:10 +02:00
Gael Guennebaud
b0d5aaafcc Rename free functions isFinite, isInf, isNaN to be compatible with c++11 2015-06-10 16:17:09 +02:00
Gael Guennebaud
25a98be948 bug #80: merge with d_hood branch on adding more coefficient-wise unary array functors 2015-06-10 15:52:05 +02:00
Gael Guennebaud
192bce2795 bug #890, add a more general routine to check that two dense object reference to the same data 2015-06-10 10:09:04 +02:00
Gael Guennebaud
e6832ce93d Add regression test for bug #890 2015-06-10 09:32:10 +02:00
Gael Guennebaud
0b2cbb2bdc bug #897: make umfpack support use Ref<> 2015-06-09 23:30:06 +02:00
Gael Guennebaud
feaf76c001 bug #910: add a StandardCompressedFormat option to Ref<SparseMatrix> to enforce standard compressed storage format.
If the input is not compressed, then this trigger a copy for a const Ref, and a runtime assert for non-const Ref.
2015-06-09 23:11:24 +02:00
Gael Guennebaud
f899aeb301 bug #650: fix sparse * dense wrt noalias and compound assignment 2015-06-09 18:33:24 +02:00
Gael Guennebaud
785b9c0127 bug #1003: assert in MapBase if the provided pointer is not aligned on scalar while it is expected to be. Also add a EIGEN_ALIGN8 macro. 2015-06-09 17:42:09 +02:00
Gael Guennebaud
0eb06f1391 Enable -Wshadow with clang 2015-06-09 17:44:18 +02:00
Gael Guennebaud
64753af3b7 code simplification 2015-06-09 15:35:34 +02:00
Gael Guennebaud
cacbc5679d formatting 2015-06-09 15:23:08 +02:00
Gael Guennebaud
04665ef9e1 remove redundant dynamic allocations in GMRES 2015-06-09 15:18:21 +02:00
Gael Guennebaud
d4c574707e fix some legitimate shadow warnings 2015-06-09 15:17:58 +02:00
Gael Guennebaud
f9350e70eb fix unused variable warning 2015-06-09 15:17:21 +02:00
Gael Guennebaud
4aba24a1b2 Clean argument names of some functions 2015-06-09 13:32:12 +02:00
Gael Guennebaud
302cf8ffe2 Add missing documentation for TriangularViewImpl<MatrixType,Mode,Sparse> 2015-06-09 11:40:07 +02:00
Gael Guennebaud
3a4299b245 bug #872: remove usage of deprecated bind1st. 2015-06-09 10:52:04 +02:00
Gael Guennebaud
9aef0db992 Skip too large real-world problems for solvers that do not scale (e.g., SimplicialLLT without reordering) 2015-06-09 09:29:53 +02:00
Gael Guennebaud
9a2447b0c9 Fix shadow warnings triggered by clang 2015-06-09 09:11:12 +02:00
Gael Guennebaud
cd8b996f99 Extend unit test and documentation of SelfAdjointEigenSolver::computeDirect 2015-06-08 16:16:42 +02:00
Gael Guennebaud
913a61870d Update utility for experimenting with 3x3 eigenvalues 2015-06-08 15:54:53 +02:00
Gael Guennebaud
8f031a3cee bug #997: add missing evaluators for m.lazyProduct(v.homogeneous()) 2015-06-08 15:43:41 +02:00
Gael Guennebaud
e6c5723dcd Add unit test for m.replicate(...)(index). 2015-06-08 15:42:15 +02:00
Gael Guennebaud
274b1f5d7e Fix homogeneous() for 1x1 matrix: in this case, homogeneous follows the storage order guaranteeing that v.transpose().homogeneous() == v.homogeneous().transpose() 2015-06-08 15:40:51 +02:00
Gael Guennebaud
cbe3a1a83e Add missing accessors for 1D index based access to Replicate<> expressions. 2015-06-08 15:39:09 +02:00
Gael Guennebaud
a7ae628c9f bug #1005: fix regression regarding sparse coeff-wise binary operator that did not trigger a static assertion for mismatched storage 2015-06-08 10:14:08 +02:00
Gael Guennebaud
0a9b5d1396 bug #705: fix handling of Lapack potrf return code 2015-06-05 15:59:13 +02:00
Gael Guennebaud
d0b7b5cb55 minor documentation fixes 2015-06-05 14:40:07 +02:00
Gael Guennebaud
56d4ef7ad6 BiCGSTAB: set default guess to 0, and improve restart mechanism by recomputing the accurate residual. 2015-06-05 14:37:57 +02:00
Gael Guennebaud
98a8d43457 Improve unit testing of real-word sparse problem (fix some shortcommings, use VERIFY, etc.) 2015-06-05 14:33:37 +02:00
Gael Guennebaud
b685660b22 Do go to full accuracy when testing BiCGSTAB. 2015-06-05 14:32:26 +02:00
Gael Guennebaud
8bc26562f4 Do not abort if the folder cannot be openned! 2015-06-05 14:31:29 +02:00
Gael Guennebaud
3e7bc8d686 Improve loading of symmetric sparse matrices in MatrixMarketIterator 2015-06-05 10:16:29 +02:00
Gael Guennebaud
acc761cf0c Merged in FlorianGeorge/eigen_blaze_fork_2 (pull request PR-60)
Use trans(X) instead of X.transpose() in Blaze Benchmark
2015-06-04 09:15:22 +02:00
Benoit Steiner
ea1190486f Fixed a compilation error triggered by nvcc 7 2015-05-28 11:57:51 -07:00
Benoit Steiner
0e5fed74e7 Worked around some constexpr related bugs in nvcc 7 2015-05-28 10:14:38 -07:00
Benoit Steiner
f13b3d4433 Added missing include files 2015-05-28 07:57:28 -07:00
Benoit Steiner
abec18bae0 Fixed potential compilation error 2015-05-26 10:11:15 -07:00
Benoit Steiner
9df186c140 Added a few more missing EIGEN_DEVICE_FUNC statements 2015-05-26 09:47:48 -07:00
Benoit Steiner
466bcc589e Added a few missing EIGEN_DEVICE_FUNC statements 2015-05-26 09:37:23 -07:00
Gael Guennebaud
d457734a19 Avoid calling smart_copy with null pointers. 2015-05-25 22:30:56 +02:00
Benoit Steiner
6b800744ce Moved away from std::async and std::future as the underlying mechnism for the thread pool device. On several platforms, the functions passed to std::async are not scheduled in the order in which they are given to std::async, which leads to massive performance issues in the contraction code.
Instead we now have a custom thread pool that ensures that the functions are picked up by the threads in the pool in the order in which they are enqueued in the pool.
2015-05-20 13:52:07 -07:00
Benoit Steiner
48f6b274e2 Fixed compilation error triggered by gcc 4.7 2015-05-20 08:54:44 -07:00
Benoit Steiner
2451679951 Avoid using the cuda memcpy for small tensor slices since the memcpy kernel is very expensive to launch 2015-05-19 15:19:01 -07:00
Benoit Steiner
a81d17b73a Added new version of the TensorIntDiv class optimized for 32 bit signed integers. It saves 1 register on CPU and 2 on GPU. 2015-05-19 13:59:52 -07:00
Benoit Jacob
051d5325cc Abandon blocking size lookup table approach. Not performing as well in real world as in microbenchmark. 2015-05-19 11:03:59 -04:00
Christoph Hertzberg
ebea530782 bug #1014: More stable direct computation of eigenvalues and -vectors for 3x3 matrices 2015-05-17 21:54:32 +02:00
Benoit Jacob
c88e1abaf3 also uninitialized here, see previous cset 2015-05-15 11:34:57 -04:00
Benoit Jacob
807793ec3b Fix uninitialized var warning. The compiler was clearing the register anyway, so this does not change resulting code 2015-05-15 11:15:53 -04:00
Pete Warden
140f85bb99 Check for the macro __ARM_NEON__ (with two underscores at the end) as well as __ARM_NEON. The second macro is correct according to the ARM language extensions specification, but historically the first one has been more common. Some older compilers (e.g. gcc v4.6 on a Beaglebone Black) only define the first, so without this patch NEON isn't enabled. 2015-05-12 16:03:43 -07:00
Gael Guennebaud
a852001196 Add regression test for bugs #854 and #1014, and check that the eigenvector matrix is unitary. 2015-05-12 18:45:39 +02:00
Gael Guennebaud
e66caf48e8 Make test matrices for eigensolver/selfadjoint even more tricky 2015-05-12 18:44:46 +02:00
Gael Guennebaud
ef81730625 Ignore denormal numbers in selfadjoint eigensolver. 2015-05-12 18:38:43 +02:00
Christoph Hertzberg
a605a1d7df Merged in MattPD/eigen/MattPD/doc-fix-wording-typos-in-templatekeywor-1431363009359 (pull request PR-116)
[Doc] Fix wording / typos in TemplateKeyword.dox
2015-05-11 23:37:52 +02:00
MattPD
447e060b81 [Doc] Fix wording / typos in TemplateKeyword.dox 2015-05-11 16:50:18 +00:00
Christoph Hertzberg
494fa991c3 bug #872: Avoid deprecated binder1st/binder2nd usage by providing custom functors for comparison operators 2015-05-07 17:28:40 +02:00
Gael Guennebaud
4a936974a5 bug #1013: fix 2x2 direct eigensolver for identical eiegnvalues 2015-05-07 15:55:12 +02:00
Gael Guennebaud
c2107d30ce Extend unit tests of sefladjoint-eigensolver 2015-05-07 15:54:07 +02:00
Gael Guennebaud
ebf8ca4fa8 Fix bug #1010: m_isInitialized was improperly updated 2015-05-07 14:20:42 +02:00
Konstantinos Margaritis
dd698e6680 Merged in doug_kwan/eigen (pull request PR-103)
Fix bug in pdiv<Packet1cd> which swaps 32-bit halves of a pair of
2015-05-05 20:50:14 +03:00
Benoit Steiner
1dded10cb7 Added a double-precision implementation of the exp() function for AVX. 2015-05-04 10:42:51 -07:00
Christoph Hertzberg
6273aca9b1 small typo 2015-05-04 15:26:31 +00:00
Christoph Hertzberg
4dd7d0b5dc Merged in mvdyck/eigen-3/mvdyck/doc-multithreading-fix-old-n-eigennbthr-1430750928880 (pull request PR-114)
[Doc] Multi-threading fix
2015-05-04 17:23:21 +02:00
michiel van dyck
4b9eddaef8 [Doc] Multi-threading fix
OLD: n = Eigen::nbThreads( n );
NEW: n = Eigen::nbThreads( );

from:
You can query the number of threads that will be used with:
\code
n = Eigen::nbThreads( );
\endcode

Kr Michiel
2015-05-04 14:48:52 +00:00
Christoph Hertzberg
28a4c92cbf bug #998: Started fixing doxygen warnings 2015-05-01 22:10:41 +02:00
Christoph Hertzberg
173b34e9ab bug #999: clarify that behavior of empty AlignedBoxes is undefined, and further improvements in documentation 2015-04-30 19:30:36 +02:00
Christoph Hertzberg
da2baf685d Regression test for bug #302
(transplanted from 80fd8fab87
)
Changed DenseIndex to Index
2015-04-26 21:05:33 +02:00
Christoph Hertzberg
8c6a3b5ace Fix trivial warnings in LevenbergMarquardt module and test 2015-04-24 21:35:30 +02:00
Gael Guennebaud
de18cd413d Disable posix_memalign on Solaris and SunOS, and allows to by-pass built-in posix_memalign detection rules. 2015-04-24 11:26:51 +02:00
Gael Guennebaud
1681a665d9 Extend unit test of Map<,,Stride<>> with stack allocated buffers and less trivial operations. 2015-04-24 10:38:28 +02:00
Gael Guennebaud
834f66e9fc Extend unit test of Map<> with stack allocated buffers and less trivial operations. 2015-04-24 10:10:19 +02:00
Gael Guennebaud
40258078c6 bug #360: add value_type typedef to DenseBase/SparseMatrixBase 2015-04-24 09:44:24 +02:00
Christoph Hertzberg
c460af414e Fix bug #1000: Manually inherit assignment operators for MSVC 2013 and later (as required by the standard). 2015-04-23 13:39:03 +02:00
Benoit Steiner
fd1d4bd86c Silenced a few compilation warnings 2015-04-22 16:16:15 -07:00
Benoit Steiner
91359e1d0a Added the ability to generate a tensor from a custom user defined 'generator'. This simplifies the creation of constant tensors initialized using specific regular patterns.
Created a gaussian window generator as a first use case.
2015-04-22 11:14:58 -07:00
Benoit Steiner
8838ed39f4 Added support for non-deterministic random number generation on GPU 2015-04-22 09:14:38 -07:00
Christoph Hertzberg
e7457e419d Merge with dfa991cbae 2015-04-22 03:39:32 +02:00
Benoit Steiner
dfa991cbae Make sure that the copy constructor of the evaluator is always called before launching the evaluation of a tensor expression on a cuda device. 2015-04-21 16:15:45 -07:00
Gael Guennebaud
dbd12b4cda Make sure that BlockImpl<const SparseMatrix> ctor is called with the right type 2015-04-21 10:15:36 +02:00
Gael Guennebaud
d6a8b43b39 Fix typo in the definition of EIGEN_COMP_GNUC_STRICT 2015-04-21 10:12:38 +02:00
Benoit Steiner
e709488361 Silenced a few compilation warnings 2015-04-20 17:39:45 -07:00
Benoit Steiner
10a1f81822 Sped up the assignment of a tensor to a tensor slice, as well as the assigment of a constant slice to a tensor 2015-04-20 17:34:11 -07:00
Deanna Hood
e5048b5501 Use std::isfinite when available 2015-04-20 14:59:57 -04:00
Deanna Hood
249c48ba00 Incorporate C++11 check into EIGEN_HAS_C99_MATH macro 2015-04-20 14:57:04 -04:00
Deanna Hood
0250f4a9f2 Merged default into unary-array-cwise-functors 2015-04-20 14:01:35 -04:00
Deanna Hood
0339502a4f Only use std::isnan and std::isinf if they are available 2015-04-20 13:14:06 -04:00
Benoit Steiner
43eb2ca6e1 Improved the tensor random number generators:
* Use a mersenne twister whenebver possible instead of the default entropy source since the default one isn't very good at all.
 * Added the ability to seed the generators with a time based seed to make them non-deterministic.
2015-04-20 09:24:09 -07:00
Christoph Hertzberg
016c29f207 Merge with 70bc3b0668 2015-04-20 08:33:39 +02:00
Benoit Steiner
70bc3b0668 Silenced a warning in the tensor code 2015-04-19 12:38:00 -07:00
Benoit Steiner
3220eb2b93 Fixed some compilation warnings 2015-04-19 12:36:35 -07:00
Gael Guennebaud
fc2d5b86ce simplify previous changeset: sub-expressions are nested by value 2015-04-18 22:50:16 +02:00
Gael Guennebaud
5a3c48e3c6 bug #942: fix dangling references in evaluator of diagonal * sparse products. 2015-04-18 22:43:27 +02:00
Benoit Steiner
3b429b71e6 Fixed compilation warning triggered by gcc 4.7 2015-04-18 13:41:06 -07:00
Benoit Steiner
9c6b82bcd5 Use ptrdiff_t instead of size_t to encode fixed sizes. This silences several clang compilation warnings
(transplanted from 4400e4436ac7c5bbd305a03c21aa4bce24ae199b)
2015-04-17 09:12:18 -07:00
Christoph Hertzberg
4f126b862d Add internal assertions to purely fixed-size DenseStorage, mark optional variables always as unused 2015-04-17 11:36:21 +02:00
Benoit Steiner
da5b98a94d Updated the cxx11_tensor_convolution test to avoid using cxx11 features. This should enable the test to compile with gcc 4.7 and older 2015-04-16 12:29:16 -07:00
Benoit Steiner
d19d09ae6a Updated a regression test to avoid compilation errors when compiling with gcc 4.7 2015-04-16 12:15:27 -07:00
Christoph Hertzberg
9d7843d0d0 Add internal assertions to DenseStorage constructor 2015-04-16 15:47:06 +02:00
Christoph Hertzberg
3be9f5c4d7 Constructing a Matrix/Array with implicit transpose could lead to memory leaks.
Also reduced code duplication for Matrix/Array constructors
2015-04-16 13:25:20 +02:00
Gael Guennebaud
e0cff9ae0d Fix bug #996: fix comparisons to 0 instead of Scalar(0) 2015-04-15 14:48:53 +02:00
Gael Guennebaud
5dbe758dc3 Backed out changeset 04c8c5d9ef 2015-04-15 14:47:08 +02:00
Gael Guennebaud
04c8c5d9ef Fix bug #996: fix comparisons to 0 instead of Scalar(0) 2015-04-15 14:44:57 +02:00
Benoit Steiner
0f82399fe9 Pulled latest changes from trunk 2015-04-14 19:13:34 -07:00
Christoph Hertzberg
761691f18d Make conversion from 0 to Scalar explicit (issue reported by Brad Bell) 2015-04-13 17:15:00 +02:00
Benoit Steiner
5401fbcc50 Improved the blocking strategy to speedup multithreaded tensor contractions. 2015-04-09 16:44:10 -07:00
Deanna Hood
085aa8e601 Don't use M_PI since it's only guaranteed to be defined in Eigen/Geometry 2015-04-08 13:59:18 -05:00
Gael Guennebaud
0eb220c00d add a note on bug #992 2015-04-08 09:25:34 +02:00
Benoit Jacob
d7f51feb07 bug #992: don't select a 3p GEMM path with non-vectorizable scalar types, this hits unsupported paths in symm/triangular products code 2015-04-07 15:13:55 -04:00
Benoit Jacob
0e9753c8df Fix compiler flags on Android/ARM:
- generate position-independent code (PIE), a requirement to run binaries on Android 5.0+ devices;
 - correctly handle EIGEN_TEST_FMA + EIGEN_TEST_NEON to pass -mfpu=neon-vfpv4.
2015-04-07 14:03:21 -04:00
Benoit Steiner
1de49ef4c2 Fixed a bug when chipping tensors laid out in row major order. 2015-04-07 10:44:13 -07:00
Benoit Steiner
a1f1e1e51d Fixed the order of 2 #includes 2015-04-06 10:41:39 -07:00
Benoit Steiner
7c18ab921c Pulled latest updates from trunk 2015-04-04 20:07:04 -07:00
Gael Guennebaud
15b5adb327 Fix regression in DynamicSparseMatrix and SuperLUSupport wrt recent change on nonZeros/nonZerosEstimate 2015-04-02 22:21:41 +02:00
Benoit Steiner
74e558cfa8 Pulled latest updates from trunk 2015-04-01 23:24:11 -07:00
Benoit Steiner
03a0df2010 Fixed some compilation warnings triggered by pre-cxx11 comoilers 2015-04-01 22:51:33 -07:00
Benoit Steiner
b8b7807269 Fixed some compilation warning triggered by the cxx11 emulation code 2015-04-01 21:48:18 -07:00
Benoit Steiner
383b6dfafe Fixed 2 typos 2015-04-01 16:44:36 -07:00
Gael Guennebaud
5861cfb55e Remove unused GenericSparseBlockInnerIteratorImpl code. 2015-04-01 22:29:29 +02:00
Gael Guennebaud
3105986e71 bug #875: remove broken SparseMatrixBase::nonZeros and introduce a nonZerosEstimate() method to sparse evaluators for internal uses.
Factorize some code in SparseCompressedBase.
2015-04-01 22:27:34 +02:00
Gael Guennebaud
39dcd01b0a bug #973: enable alignment of multiples of half-packet size (e.g., Vector6d with AVX) 2015-04-01 13:55:09 +02:00
Gael Guennebaud
8481dc21ea bug #986: add support for coefficient-based product with 0 depth. 2015-04-01 13:15:23 +02:00
Gael Guennebaud
79b4e6acaf Fix bug #987: wrong alignement guess in diagonal product. 2015-03-31 23:35:12 +02:00
Gael Guennebaud
3c38589984 Remove most of the dynamic memory allocations that occured in D&C SVD. Still remains the calls to JacobiSVD and UpperBidiagonalization. 2015-03-31 22:54:47 +02:00
Gael Guennebaud
8313fb7df7 Add row/column-wise reverseInPlace feature. 2015-03-31 21:35:53 +02:00
Gael Guennebaud
dfb674a25e Make reverseInPlace really work in-place. 2015-03-31 20:17:10 +02:00
Gael Guennebaud
20d030f207 Fix vectorization of swap for non trivial expressions 2015-03-31 20:16:02 +02:00
Benoit Steiner
678207e02a Added regression tests for tensor convolutions 2015-03-31 09:08:08 -07:00
Benoit Steiner
68d4afe985 Added support for convolution of tensors laid out in RowMajor mode 2015-03-31 09:07:09 -07:00
Benoit Steiner
f873686602 Added documentation for the convolution operation 2015-03-31 08:27:23 -07:00
Benoit Jacob
73cdeae1d3 Only use blocking sizes LUTs for single-thread products for now 2015-03-31 11:17:23 -04:00
Benoit Jacob
0cbd5ae3cb Correctly detect Android with ndk_build 2015-03-31 11:17:21 -04:00
Gael Guennebaud
ae01c05e18 Fix computeProductBlockingSizes with m==0, and add respective unit test. 2015-03-31 15:19:57 +02:00
Gael Guennebaud
bd76d837e6 Fix sign of SuperLU::determinant 2015-03-31 14:57:32 +02:00
Gael Guennebaud
35d3053d55 Fix regression introduced in 3b169d792d 2015-03-31 09:23:53 +02:00
Benoit Steiner
731d7b84b4 Sharded a large test 2015-03-30 23:26:45 -07:00
Christoph Hertzberg
7bd578d11d Change CMake warning to simple message for old Metis versions 2015-03-31 00:50:04 +02:00
Christoph Hertzberg
3b169d792d Suppress unused variable warning 2015-03-31 00:49:08 +02:00
Christoph Hertzberg
3238ca6abc Addendum to last patch: k is Index and not int 2015-03-31 00:42:14 +02:00
Christoph Hertzberg
1efae98fee bug #985: RealQZ failed when either matrix had zero rows or columns (report and patch by Ben Goodrich)
Also added a regression test
2015-03-30 23:56:20 +02:00
Benoit Steiner
35722fa022 Made the index type a template parameter of the tensor class instead of encoding it in the options. 2015-03-30 14:55:54 -07:00
Benoit Steiner
71950f02e5 Deleted unnecessary semicolons 2015-03-30 14:49:10 -07:00
Christoph Hertzberg
58af8bf90c bug #982: Make sure numext::maxi and numext::mini are called correctly, in case Scalar expressions return expression templates. 2015-03-30 16:47:22 +02:00
Gael Guennebaud
2adbf6b8ca fix stupid warning with old GCC 2015-03-28 22:34:54 +01:00
Gael Guennebaud
41e20248f8 merge 2015-03-28 14:43:35 +01:00
Christoph Hertzberg
09a5361d1b bug #983: Pass Vector3 by const reference and not by value 2015-03-28 12:36:24 +01:00
Christoph Hertzberg
266a84558f Optionally build the documentation when building unit tests. 2015-03-27 16:36:59 +01:00
Christoph Hertzberg
1b4bb20cf1 Merged in d_hood/eigen/sparse-tutorial-doc-fix (pull request PR-107)
[Doc] Fix missing image in sparse tutorial
2015-03-27 16:22:16 +01:00
Gael Guennebaud
eb7e4c2b9c Pass Vector3 type by reference 2015-03-27 12:11:24 +01:00
Gael Guennebaud
ad044008da Fix transpose versus adjoint. 2015-03-27 12:07:14 +01:00
Gael Guennebaud
79cb875249 merge 2015-03-27 10:56:04 +01:00
Gael Guennebaud
7e225b6fa4 Suppress some false negatives in SVD unit test 2015-03-27 10:55:53 +01:00
Gael Guennebaud
1b8cc9af43 Slight numerical stability improvement in 2x2 svd 2015-03-27 10:55:00 +01:00
Gael Guennebaud
3d59ae0203 Fix hypot(0,0). 2015-03-27 09:59:24 +01:00
Benoit Steiner
4df8b5a75e Avoid making an unecessary copy of the tensor expression when evaluating it on a GPU device 2015-03-25 14:36:07 -07:00
Benoit Steiner
b3343bfdae Fixed the vectorized implementation of the Tensor select() method 2015-03-25 13:25:53 -07:00
Benoit Steiner
ccf290a65c Cleaned up the TensorDevice code a little bit. 2015-03-25 12:37:38 -07:00
Benoit Steiner
d3f7915aeb Pulled latest update from the eigen main codebase 2015-03-24 13:12:14 -07:00
Benoit Steiner
abdbe8562e Fixed the CUDA packet primitives 2015-03-24 10:45:46 -07:00
Gael Guennebaud
29eaa2b0f1 Make MatrixBase::is* methods aware of nested_eval. 2015-03-24 13:42:42 +01:00
Gael Guennebaud
f42b105f73 Add the possibility to make VERIFY* checks to output a warning instead of abording. 2015-03-24 13:39:14 +01:00
Gael Guennebaud
d27968eb7e D&C SVD: directly falls back to JacobiSVD for very small problems (by-pass upper-bidiagonalization) 2015-03-24 13:38:07 +01:00
Gael Guennebaud
4472f3e578 Avoid SVD: consider denormalized small numbers as zero when computing the rank of the matrix 2015-03-23 09:40:21 +01:00
Deanna Hood
83e5b7656b Use M_PI instead of acos(-1) for pi 2015-03-22 06:04:31 +10:00
Deanna Hood
4bab4790c0 Add \sa tags of isFinite/isInf for each other 2015-03-22 05:39:08 +10:00
Gael Guennebaud
4e2b18d909 Update approx. minimum ordering method to push and keep structural empty diagonal elements to the bottom-right part of the matrix 2015-03-20 16:33:48 +01:00
Gael Guennebaud
8d9bfb3a7b fix loadMarket wrt Index versus int 2015-03-20 16:00:10 +01:00
Benoit Steiner
a6a628ca6b Added the -= operator to the device classes 2015-03-19 23:22:19 -07:00
Benoit Steiner
e134226a03 Fixed a bug in the handling of packets by the MeanReducer 2015-03-19 23:11:42 -07:00
Gael Guennebaud
9ee62fdcd5 Fix random unit test for 32bits systems. 2015-03-19 21:39:37 +01:00
Gael Guennebaud
d6b2f300db Fix MSVC compilation: aligned type must be passed by reference 2015-03-19 17:28:32 +01:00
Gael Guennebaud
61c45d7cfd Fix comparison warning 2015-03-19 17:13:22 +01:00
Gael Guennebaud
d7698c18b7 Split sparse_basic unit test 2015-03-19 15:11:05 +01:00
Gael Guennebaud
f329d0908a Improve random number generation for integer and add unit test 2015-03-19 15:10:36 +01:00
Deanna Hood
2ab4922431 Make html directory before generating output image there 2015-03-18 07:24:13 +10:00
Deanna Hood
41b717de25 More extensive unit tests for recent array-wise functors 2015-03-18 03:11:03 +10:00
Benoit Steiner
cc0f89eb3b Changed the way lvalue operations are declared in TensorBase: this fixes constness isses that prevented some expressions mixing lvalues and rvalues from compiling. 2015-03-17 09:57:20 -07:00
Benoit Jacob
dc04f12967 use unsigned short instead of uint16_t which doesn't exist in c++98 2015-03-17 10:31:45 -04:00
Deanna Hood
8878e1c1de Remove ambiguity with recent numext methods isNaN and isInf 2015-03-17 22:39:51 +10:00
Deanna Hood
596be3cd86 Use std::arg for real numbers when c++11 is used, custom implementation otherwise 2015-03-17 15:28:12 +10:00
Deanna Hood
e26134ed62 Use std::round when c++11 is used, custom implementation otherwise 2015-03-17 14:55:14 +10:00
Deanna Hood
e21e29a088 Update cost of arg call to depend on if the scalar is complex or not 2015-03-17 14:04:33 +10:00
Deanna Hood
447a5a6b01 Fix VML declarations to only be for real/complex as appropriate 2015-03-17 13:33:31 +10:00
Deanna Hood
f52b78491c Remove packet isNaN, isInf, isFinite 2015-03-17 09:26:24 +10:00
Deanna Hood
1c78d6f2a6 Add boolean not operator (!) array support 2015-03-17 08:29:57 +10:00
Deanna Hood
85da0c2281 Remove test of now-missing floor, ceil, round complex implementations 2015-03-17 06:56:47 +10:00
Benoit Jacob
364cfd529d Similar to cset 3589a9c115
, also in 2px4 kernel: actual_panel_rows computation should always be resilient to parameters not consistent with the known L1 cache size, see comment
2015-03-16 16:28:44 -04:00
Benoit Steiner
25664afacd Pulled latest updates from trunk 2015-03-16 13:25:45 -07:00
Deanna Hood
e1d6e6c972 Make cube, inverse and abs2 free-functions 2015-03-17 06:25:24 +10:00
Benoit Jacob
577056aa94 Include stdint.h. Not going for cstdint because it is a C++11 addition. Needed for uint16_t at least, in lookup-table code. 2015-03-16 16:21:50 -04:00
Benoit Steiner
5144f66728 Fixed compilation warning 2015-03-16 13:17:52 -07:00
Benoit Steiner
0fd6d52724 Fixed compilation error with clang 2015-03-16 13:16:12 -07:00
Benoit Jacob
eb6929cb19 fix bug in maxsize calculation, which would cause products of size > 2048 to address the lookup table out of bounds 2015-03-16 16:15:47 -04:00
Benoit Steiner
f218c0181d Fixes the Lvalue computation by actually setting the LvalueBit properly when instantiating tensors of const T. Added a test to check the fix. 2015-03-16 13:05:00 -07:00
Deanna Hood
fef4e071d7 Rename isinf to isInf 2015-03-17 05:58:47 +10:00
Deanna Hood
46cf9cda32 Add isfinite array support as isFinite 2015-03-17 04:33:12 +10:00
Deanna Hood
7b829940d1 Add code snippets for new methods 2015-03-17 03:40:28 +10:00
Deanna Hood
1d76ceab55 Remove floor, ceil, round for complex numbers 2015-03-17 02:36:07 +10:00
Deanna Hood
717b7954ce Update cost of coeff-wise arg call 2015-03-17 02:11:57 +10:00
Deanna Hood
fb68b149cb Rename isnan to isNaN 2015-03-17 02:04:42 +10:00
Benoit Jacob
35c3a8bb84 Update Nexus 5 lookup table from combining now 2 runs of the benchmark, using the analyze-blocking-sizes partition tool. Gives better worst-case performance. 2015-03-16 11:05:51 -04:00
Benoit Jacob
e274607d7f fix compilation with GCC 4.8 2015-03-16 10:48:27 -04:00
Benoit Jacob
151b8b95c6 Fix bug in case where EIGEN_TEST_SPECIFIC_BLOCKING_SIZE is defined but false 2015-03-15 19:10:51 -04:00
Benoit Jacob
02babb9c0f Provide a empirical lookup table for blocking sizes measured on a Nexus 5. Only for float, only for Android on ARM 32bit for now. 2015-03-15 18:13:12 -04:00
Benoit Jacob
3589a9c115 actual_panel_rows computation should always be resilient to parameters not consistent with the known L1 cache size, see comment 2015-03-15 18:12:18 -04:00
Benoit Jacob
1dd3d89818 Fix a unused-var warning 2015-03-15 18:07:19 -04:00
Benoit Jacob
ca5c12587b Polish lookup tables generation 2015-03-15 18:05:53 -04:00
Benoit Jacob
e56aabf205 Refactor computeProductBlockingSizes to make room for the possibility of using lookup tables 2015-03-15 18:05:12 -04:00
Benoit Jacob
b6b88c0808 update perf_monitoring/gemm/changesets.txt 2015-03-13 14:57:05 -07:00
Benoit Jacob
488c15615a organize a little our default cache sizes, and use a saner default L1 outside of x86 (10% faster on Nexus 5) 2015-03-13 14:51:26 -07:00
Gael Guennebaud
9f58524cbd merge 2015-03-13 21:16:39 +01:00
Gael Guennebaud
1330f8bbd1 bug #973, improve AVX support by enabling vectorization of Vector4i-like types, and enforcing alignement of Vector4f/Vector2d-like types to preserve compatibility with SSE and future Eigen versions that will vectorize them with AVX enabled. 2015-03-13 21:15:50 +01:00
Gael Guennebaud
d99ab35f9e Fix internal::random(x,y) for integer types. The previous implementation could return y+1. The new implementation uses rejection sampling to get an unbiased behabior. 2015-03-13 21:12:46 +01:00
Gael Guennebaud
8580eb6808 bug #949: add static assertion for incompatible scalar types in dense end-user decompositions. 2015-03-13 21:06:20 +01:00
Gael Guennebaud
a9df28c95b SparseMatrix::insert: switch to a fully uncompressed mode if sequential insertion is not possible (otherwise an arbitrary large amount of memory was preallocated in some cases) 2015-03-13 21:00:21 +01:00
Gael Guennebaud
5ffe29cb9f Bound pre-allocation to the maximal size representable by StorageIndex and throw bad_alloc if that's not possible. 2015-03-13 20:57:33 +01:00
Benoit Jacob
d73ccd717e Add support for dumping blocking sizes tables 2015-03-13 10:36:01 -07:00
Gael Guennebaud
2f6f8bf31c Add missing coeff/coeffRef members to Block<sparse>, and extend unit tests. 2015-03-13 16:24:40 +01:00
Benoit Jacob
f2c3e2b10f Add --only-cubic-sizes option to analyze-blocking-sizes tool 2015-03-12 13:16:33 -07:00
Doug Kwan
657407227e Fix bug in pdiv<Packet1cd> which swaps 32-bit halves of a pair of
doubles instead of swapping the doubles.
2015-03-11 15:13:37 -07:00
Deanna Hood
f89fcefa79 Add hyperbolic trigonometric functions from std array support 2015-03-11 13:13:30 +10:00
Deanna Hood
a5e49976f5 Add log10 array support 2015-03-11 08:56:42 +10:00
Deanna Hood
19a71056ae Allow calling of square(array) in addition to array.square() 2015-03-11 06:59:28 +10:00
Deanna Hood
31fdd67756 Additional unary coeff-wise functors (isnan, round, arg, e.g.) 2015-03-11 06:39:23 +10:00
Gael Guennebaud
fd78874888 Fix compilation of iterative solvers with dense matrices 2015-03-09 21:31:03 +01:00
Gael Guennebaud
d4317a85e8 Add typedefs for return types of SparseMatrixBase::selfadjointView 2015-03-09 21:29:46 +01:00
Gael Guennebaud
9e885fb766 Add unit tests for CG and sparse-LLT for long int as storage-index 2015-03-09 14:33:15 +01:00
Gael Guennebaud
224a1fe4c6 bug #963: make IncompleteLUT compatible with non-default storage index types. 2015-03-09 13:55:20 +01:00
Gael Guennebaud
cf9940e17b Make sparse unit-test helpers aware of StorageIndex 2015-03-09 13:54:05 +01:00
Benoit Jacob
39228cb224 deserialization assumed benchmarks in same order, but we shuffle them. 2015-03-06 19:29:01 -05:00
Benoit Jacob
a4f956b1da merge 2015-03-06 19:13:36 -05:00
Benoit Jacob
19bf13aa62 Automatically serialize partial results to disk, reboot, and resume, when timings are getting bad 2015-03-06 19:11:50 -05:00
Gael Guennebaud
0ee391863e Avoid undeflow when blocking size are tuned manually. 2015-03-06 21:51:09 +01:00
Gael Guennebaud
14a5f135a3 bug #969: workaround abiguous calls to Ref using enable_if. 2015-03-06 17:51:31 +01:00
Gael Guennebaud
d23fcc0672 bug #978: add unit test for zero-sized products 2015-03-06 16:12:08 +01:00
Gael Guennebaud
87681e508f bug #978: early return for vanishing products 2015-03-06 16:11:22 +01:00
Gael Guennebaud
4c8eeeaed6 update gemm changeset list 2015-03-06 15:08:20 +01:00
Gael Guennebaud
cd3bbffa73 Improve blocking heuristic: if the lhs fit within L1, then block on the rhs in L1 (allows to keep packed rhs in L1) 2015-03-06 14:31:39 +01:00
Gael Guennebaud
eedd5063fd Update gemm performance monitoring tool:
- permit to recompute a subset of changesets
 - update changeset list
 - add a few more cases
2015-03-06 11:47:13 +01:00
Gael Guennebaud
58740ce4c6 Improve product kernel: replace the previous dynamic loop swaping strategy by a more general one:
It consists in increasing the actual number of rows of lhs's micro horizontal panel for small depth such that L1 cache is fully exploited.
2015-03-06 10:30:35 +01:00
Benoit Jacob
4ab01f7c21 slightly increase tolerance to clock speed variation 2015-03-05 14:41:16 -05:00
Benoit Jacob
5db2baa573 Make benchmark-blocking-sizes detect changes to clock speed and be resilient to that. 2015-03-05 13:44:20 -05:00
Gael Guennebaud
4c8b95d5c5 Rename LSCG to LeastSquaresConjugateGradient 2015-03-05 10:16:32 +01:00
Gael Guennebaud
7550107028 Product optimization: implement a dynamic loop-swapping startegy to improve memory accesses to the destination matrix in the case of K-rank-update like products, i.e., for products of the kind: "large x small" * "small x large" 2015-03-05 10:03:46 +01:00
Gael Guennebaud
2dc968e453 bug #824: improve accuracy of Quaternion::angularDistance using atan2 instead of acos. 2015-03-04 17:03:13 +01:00
Benoit Jacob
2231b3dece output to cout, not cerr, the actual results 2015-03-04 09:45:12 -05:00
Benoit Jacob
00ea121881 Complete the tool to analyze the efficiency of default sizes. 2015-03-04 09:30:56 -05:00
Benoit Steiner
0196141938 Fixed the optimized AVX implementation of the fast rsqrt function 2015-03-02 13:49:39 -08:00
Benoit Steiner
b0f2b6f297 Updated the tensor type casting code as follow: in the case where TgtRatio < SrcRatio, disable the vectorization of the source expression unless is has direct-access. 2015-03-02 10:11:40 -08:00
Benoit Steiner
d9cb604a5d Disabled the use of aligned memory loads when converting a tensor from float to doubles since alignment can't always be guaranteed. 2015-03-02 09:41:36 -08:00
Benoit Steiner
4fd7f47692 Added an optimized version of rsqrt for SSE and AVX that is used when EIGEN_FAST_MATH is defined. 2015-03-02 09:38:47 -08:00
Benoit Steiner
ae73859a0a Fixed incorrect assertion 2015-02-28 08:02:02 -08:00
Benoit Steiner
131449298f Fixed clang compilation warning 2015-02-28 03:01:19 -08:00
Benoit Steiner
56ea45ff0f Silenced some compilation warnings 2015-02-28 02:37:41 -08:00
Benoit Steiner
bb483313f6 Fixed another batch of compilation warnings 2015-02-28 02:32:46 -08:00
Benoit Steiner
fb53384b0f Improved the default implementation of prsqrt 2015-02-28 01:51:26 -08:00
Benoit Steiner
61409d9449 Silenced one more comilation warning 2015-02-28 01:49:09 -08:00
Benoit Steiner
1a7b84dc75 Silenced a few compilation warnings 2015-02-28 01:45:15 -08:00
Benoit Steiner
37357a310f Fixed compilation warnings 2015-02-27 23:54:24 -08:00
Benoit Steiner
cf1eea11de Fixed compilation warnings 2015-02-27 23:52:02 -08:00
Benoit Steiner
78732186ee Fixed compilation warnings 2015-02-27 23:51:16 -08:00
Benoit Steiner
4250a0cab0 Fixed compilation warnings 2015-02-27 21:59:10 -08:00
Benoit Steiner
a4e37b0617 Reverted the README 2015-02-27 13:09:49 -08:00
Benoit Steiner
306fceccbe Pulled latest updates from trunk 2015-02-27 13:05:26 -08:00
Benoit Steiner
75e7f381c8 Pulled latest updates from trunk 2015-02-27 12:57:55 -08:00
Benoit Steiner
2386fc8528 Added support for 32bit index on a per tensor/tensor expression. This enables us to use 32bit indices to evaluate expressions on GPU faster while keeping the ability to use 64 bit indices to manipulate large tensors on CPU in the same binary. 2015-02-27 12:57:13 -08:00
Benoit Steiner
e1f6a45b14 README.md edited online with Bitbucket 2015-02-27 20:44:24 +00:00
Benoit Steiner
90893bbe18 README.md edited online with Bitbucket 2015-02-27 20:44:10 +00:00
Benoit Steiner
473e6d4c3d README.md edited online with Bitbucket 2015-02-27 20:41:45 +00:00
Benoit Steiner
4369538227 README.md edited online with Bitbucket 2015-02-27 20:41:33 +00:00
Benoit Steiner
99cfbd6e84 README.md edited online with Bitbucket 2015-02-27 20:41:14 +00:00
Benoit Jacob
6466fa63be Reimplement the selection between rotating and non-rotating kernels
using templates instead of macros and if()'s.
That was needed to fix the build of unit tests on ARM, which I had
broken. My bad for not testing earlier.
2015-02-27 15:30:10 -05:00
Benoit Steiner
05089aba75 Switch to truncated casting when converting floating point types to integer. This ensures that vectorized casts are consistent with scalar casts 2015-02-27 09:27:30 -08:00
Benoit Steiner
bf9877a92a Pulled latest updates from trunk 2015-02-27 09:23:22 -08:00
Benoit Steiner
90f4e90f1d Fixed off-by-one error that prevented the evaluation of small tensor expressions from being vectorized 2015-02-27 09:22:37 -08:00
Benoit Steiner
573b377110 Added support for vectorized type casting of tensors 2015-02-27 08:46:04 -08:00
Benoit Jacob
2fc3b484d7 remove trailing comma 2015-02-27 11:37:45 -05:00
Benoit Jacob
33669348c4 Disable Packet2f/2i halfpacket support in NEON.
I believe that it was erroneously turned on, since Packet2f/2i intrinsics are unimplemented,
and code trying to use halfpackets just fails to compile on NEON, as it tries to use the
default implementation of pload/pstore and the types don't match.
2015-02-27 11:35:37 -05:00
Benoit Jacob
f5ff4d826f Fix NEON build flags: in the current NDK, at least with the clang-3.5 toolchain,
-mfpu=neon is not enough to activate NEON, since it's incompatible with the default float ABI,
and I have to pass -mfloat-abi=softfp (which is what everyone does in practice).
In fact, it would be a good idea to pass -mfloat-abi=softfp all the time, regardless of NEON.
Also removing the -mcpu=cortex-a8, as 1) it's not needed and 2) if we really wanted to pass
a specific -mcpu flag, that would presumably to tune performance for benchmarks, and it would
then not really make sense to tune for the very old cortex-a8 (it reflects ARM CPUs from 5 years ago).
2015-02-27 10:56:50 -05:00
Benoit Jacob
b7fc8746e0 Replace a static assert by a runtime one, fixes the build of unit tests on ARM
Also safely assert in the non-implemented path that should never be taken in practice,
and would return wrong results.
2015-02-27 10:01:59 -05:00
Benoit Steiner
f074bb4b5f Fixed another compilation problem with TensorIntDiv.h 2015-02-26 11:14:23 -08:00
Benoit Steiner
57154fdb32 Can now use the tensor 'reverse' operation as a lvalue 2015-02-26 11:13:42 -08:00
Benoit Steiner
f41b1f1666 Added support for fast reciprocal square root computation. 2015-02-26 09:42:41 -08:00
Benoit Steiner
2fffe69b1b Added missing copy constructor 2015-02-26 09:27:53 -08:00
Gael Guennebaud
bcf9bb5c1f Avoid packing rhs multiple-times when blocking on the lhs only. 2015-02-26 17:01:33 +01:00
Gael Guennebaud
4ec3f04b3a Make sure that the block size computation is tested by our unit test. 2015-02-26 17:00:36 +01:00
Gael Guennebaud
2e9cb06a87 Update changeset list to be checked by perf_monitoring/gemm. 2015-02-26 16:13:33 +01:00
Gael Guennebaud
a46061ab7b Make perf_monitoring/gemm script more flexible:
- skip existing dataset
  - add a "-up" option to recompute the dataset (see script header)
  - allow to specify a filename prefix
2015-02-26 16:12:58 +01:00
Gael Guennebaud
a8ad8887bf Implement a more generic blocking-size selection algorithm. See explanations inlines.
It performs extremely well on Haswell. The main issue is to reliably and quickly find the
actual cache size to be used for our 2nd level of blocking, that is: max(l2,l3/nb_core_sharing_l3)
2015-02-26 16:04:35 +01:00
Gael Guennebaud
400becc591 Fix typos in block-size testing code, and set peeling on k to 8. 2015-02-26 15:57:06 +01:00
Benoit Steiner
bffb6bdf45 Made TensorIntDiv.h compile with MSVC 2015-02-25 23:54:43 -08:00
Benoit Steiner
27f3fb2bcc Fixed another clang warning 2015-02-25 22:54:20 -08:00
Benoit Steiner
f8fbb3f9a6 Fixed several compilation warnings reported by clang 2015-02-25 22:22:37 -08:00
Benoit Steiner
8e817b65d0 Silenced a few more compilation warnings generated by nvcc 2015-02-25 17:46:20 -08:00
Benoit Steiner
410070e5ab Added more tests to validate support for tensors laid out in RowMajor order. 2015-02-25 16:14:59 -08:00
Benoit Steiner
1cfd51908c Added support for RowMajor layout to the tensor patch extraction cofde. 2015-02-25 13:29:12 -08:00
Benoit Steiner
eb21a8173e Pulled latest changes from trunk 2015-02-25 09:49:44 -08:00
Benoit Steiner
8afce86e64 Added support for RowMajor layout to the image patch extraction code
Speeded up the unsupported_cxx11_tensor_image_patch test and reduced its memory footprint
2015-02-25 09:48:54 -08:00
Benoit Jacob
692136350b So I extensively measured the impact of the offset in this prefetch. I tried offset values from 0 to 128 (on this float* pointer, so implicitly times 4 bytes).
On x86, I tested a Sandy Bridge with AVX with 12M cache and a Haswell with AVX+FMA with 6M cache on MatrixXf sizes up to 2400.

I could not see any significant impact of this offset.

On Nexus 5, the offset has a slight effect: values around 32 (times sizeof float) are worst. Anything else is the same: the current 64 (8*pk), or... 0.

So let's just go with 0!

Note that we needed a fix anyway for not accounting for the value of RhsProgress. 0 nicely avoids the issue altogether!
2015-02-25 12:37:14 -05:00
Christoph Hertzberg
531fa9de77 bug #970: Add EIGEN_DEVICE_FUNC to RValue functions, in case Cuda supports RValue-references. 2015-02-24 21:03:28 +01:00
Benoit Jacob
26275b250a Fix my recent prefetch changes:
- the first prefetch is actually harmful on Haswell with FMA,
   but it is the most beneficial on ARM.
 - the second prefetch... I was very stupid and multiplied by sizeof(scalar)
   and offset of a scalar* pointer. The old offset was 64; pk = 8, so 64=pk*8.
   So this effectively restores the older offset. Actually, there were
   two prefetches here, one with offset 48 and one with offset 64. I could not
   confirm any benefit from this strange 48 offset on either the haswell or
   my ARM device.
2015-02-23 16:55:17 -05:00
Benoit Jacob
488874781b Add analyze-blocking-sizes program under bench/ to analyze multiple logs
generated by benchmark-blocking-sizes.
2015-02-23 14:02:29 -05:00
Christoph Hertzberg
052b6b40f1 Fix two trivial warnings 2015-02-22 12:40:51 +01:00
Christoph Hertzberg
ecbf2a6656 log1p is defined only for real Scalars in C++11 2015-02-21 19:58:24 +01:00
Christoph Hertzberg
6af6cf0c2e I can reproduce any problems that justified this hack. However it makes builds fail in C++11 mode. 2015-02-21 19:43:56 +01:00
Gael Guennebaud
3cf642baa3 Fix compilation of unit tests disabling assertion cheking 2015-02-21 14:13:48 +01:00
Benoit Jacob
458cf91cd9 Add benchmark-blocking-sizes.cpp to bench/ per mailing list discussion. 2015-02-20 17:08:04 -05:00
Gael Guennebaud
03ec601ff7 Initial version of a small script to help tracking performance regressions 2015-02-20 19:20:34 +01:00
Gael Guennebaud
333b497383 update bench_gemm 2015-02-20 11:59:49 +01:00
Gael Guennebaud
2da1594750 Fix doc of Ref<> 2015-02-20 11:52:22 +01:00
Gael Guennebaud
01b8440579 With C++11 Matrix<float> + Matrix<complex<float>> does not even compile 2015-02-20 09:32:49 +01:00
Gael Guennebaud
3594451ee0 Remove EIGEN_TEST_C++0x option and let EIGEN_TEST_CXX11 adds the -std=c++11 flag 2015-02-20 09:31:27 +01:00
Gael Guennebaud
b192e29eae In C++11 destructors do not throw by default (fix CommaInitializer unit test) 2015-02-20 09:28:34 +01:00
Benoit Steiner
ab41652d81 Pulled latest changes from trunk 2015-02-19 21:23:37 -08:00
Benoit Steiner
7765039f1c Marked the CUDA packet primitives as EIGEN_DEVICE_FUNC since they'll end up being executed on the GPU device. 2015-02-19 21:22:51 -08:00
Gael Guennebaud
a66f5fc2fd Fix regression with C++11 support of lambda: now internal::result_of falls back to std::result_of in C++11. 2015-02-19 23:32:12 +01:00
Gael Guennebaud
ece6b440f9 Fix a C++11 compilation issue in unit test 2015-02-19 23:31:08 +01:00
Gael Guennebaud
1b7e12847d Fix some calls to result_of on binary functors as unary ones. 2015-02-19 23:30:41 +01:00
Gael Guennebaud
0f4dd15dfc Declare const some const variables 2015-02-19 23:28:57 +01:00
Benoit Steiner
92ceb02c6d Pulle latest updates from trunk 2015-02-19 11:59:52 -08:00
Benoit Steiner
110fb90250 Improved the documentations 2015-02-19 11:59:04 -08:00
Gael Guennebaud
829dddd0fd Add support for C++11 result_of/lambdas 2015-02-19 15:18:37 +01:00
Benoit Jacob
db05f2d01e rotating kernel: avoid compiling anything outside of ARM 2015-02-18 15:43:52 -05:00
Benoit Jacob
0ed00d5438 remove a newly introduced redundant typedef - sorry. 2015-02-18 15:05:01 -05:00
Benoit Jacob
9bd8a4bab5 bug #955 - Implement a rotating kernel alternative in the 3px4 gebp path
This is substantially faster on ARM, where it's important to minimize the number of loads.

This is specific to the case where all packet types are of size 4. I made my best attempt to minimize how dirty this is... opinions welcome.

Eventually one could have a generic rotated kernel, but it would take some work to get there. Also, on sandy bridge, in my experience, it's not beneficial (even about 1% slower).
2015-02-18 15:03:35 -05:00
Hauke Heibel
ee27d50633 Fixed template parameter. 2015-02-18 18:51:08 +01:00
Gael Guennebaud
73a24de424 merge 2015-02-18 15:51:00 +01:00
Gael Guennebaud
63eb0f6fe6 Clean a bit computeProductBlockingSizes (use Index type, remove CEIL macro) 2015-02-18 15:49:05 +01:00
Gael Guennebaud
fc5c3e85e2 Fix bug #961: eigen-doc.tgz included part of itself. 2015-02-18 15:47:01 +01:00
Benoit Jacob
4a3e6c8be1 bug #958 - Allow testing specific blocking sizes
This is only a debugging/testing patch. It allows testing specific
product blocking sizes, typically to study the impact on performance.

Example usage:

int testk, testm, testn;
#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZES
#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K testk
#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M testm
#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N testn
#include <Eigen/Core>
2015-02-18 09:43:55 -05:00
Gael Guennebaud
c7bb1e8ea8 Fix a regression when using OpenMP, and fix bug #714: the number of threads might be lower than the number of requested ones 2015-02-18 15:19:23 +01:00
Jan Blechta
168ceb271e Really use zero guess in ConjugateGradients::solve as documented
and expected for consistency with other methods.
2015-02-18 14:26:10 +01:00
Gael Guennebaud
8fdcaded5e merge 2015-03-04 10:18:08 +01:00
Gael Guennebaud
c43154bbc5 Check for no-reallocation in SparseMatrix::insert (bug #974) 2015-03-04 10:16:46 +01:00
Gael Guennebaud
1ce0178363 Improve efficiency of SparseMatrix::insert/coeffRef for sequential outer-index insertion strategies (bug #974) 2015-03-04 09:39:26 +01:00
Gael Guennebaud
3dca4a1efc Update manual wrt new LSCG solver. 2015-03-04 09:35:30 +01:00
Gael Guennebaud
05274219a7 Add a CG-based solver for rectangular least-square problems (bug #975). 2015-03-04 09:34:27 +01:00
Benoit Jacob
2aa09e6b4e Fix asm comments in 1px1 kernel 2015-03-03 13:44:00 -05:00
Benoit Steiner
5d2fd64a1a Fixed compilation error when compiling with gcc4.7 2015-03-03 08:56:49 -08:00
Benoit Jacob
f64b4480af Add missing copyright notices 2015-03-03 11:43:56 -05:00
Benoit Jacob
eae8e27b7d Add a benchmark-default-sizes action to benchmark-blocking-sizes.cpp 2015-03-03 11:41:21 -05:00
Marc Glisse
37a93c4263 New scoring functor to select the pivot.
This is can be useful for non-floating point scalars, where choosing the biggest element is generally not the best choice.
2015-03-03 17:08:28 +01:00
Benoit Jacob
ccc1277a42 must also disable complex<double> when disabling double vectorization 2015-03-03 10:17:05 -05:00
Benoit Jacob
f839099512 Work around an ICE in Clang 3.5 in the iOS toolchain with double NEON intrinsics. 2015-03-03 09:35:22 -05:00
Benoit Jacob
9930e9583b Improve analyze-blocking-sizes, and in particular give it a evaluate-defaults tool
that shows the efficiency of Eigen's default blocking sizes choices, using a
previously computed table from benchmark-blocking-sizes.
2015-03-02 18:08:38 -05:00
Benoit Jacob
1ec0f4fadf HalfPacket also needed to be disabled for double, on ARMv8. 2015-03-02 16:08:54 -05:00
Gael Guennebaud
3109f0e74e Add SSE vectorization of Quaternion::conjugate. Significant speed-up when combined with products like q1*q2.conjugate() 2015-03-02 20:09:33 +01:00
Abhijit Kundu
ef09ce4552 Fix for TensorIO for Fixed sized Tensors.
The following code snippet was failing to compile:

TensorFixedSize<double, Sizes<4, 3> > t_4x3;
cout << 4x3;
2015-02-28 21:30:31 -05:00
Abhijit Kundu
3a4b6827b4 Merged eigen/eigen into default 2015-02-28 20:15:28 -05:00
Christoph Hertzberg
31e2ffe82c Replaced POSIX random() by internal::random 2015-02-28 18:39:37 +01:00
Christoph Hertzberg
73dd95e7b0 Use @CMAKE_MAKE_PROGRAM@ instead of make in buildtests.sh 2015-02-28 16:51:53 +01:00
Christoph Hertzberg
682196e9fc Fixed MPRealSupport 2015-02-28 16:41:00 +01:00
Christoph Hertzberg
33f40b2883 Cygwin does not like weak linking either. 2015-02-28 14:53:11 +01:00
Christoph Hertzberg
0f82a1d7b7 bug #967: Automatically add cxx11 suffix when building in C++11 mode 2015-02-28 14:52:26 +01:00
Gael Guennebaud
9aee1e300a Increase unit-test L1 cache size to ensure we are doing at least 2 peeled loop within product kernel. 2015-02-27 22:55:12 +01:00
Gael Guennebaud
b10cd3afd2 Re-enbale detection of min/max parentheses protection, and re-enable mpreal_support unit test. 2015-02-27 22:38:00 +01:00
Abhijit Kundu
4084dce038 Added CMake support for Tensor module. CMake now installs CXX11 Tensor module like the rest of the unsupported modules 2015-02-26 16:50:09 -05:00
Gael Guennebaud
548b781380 Fix bug #945: workaround MSVC warning 2015-02-18 12:53:49 +01:00
Gael Guennebaud
6f4adc9e94 Add missing install directives for arch/CUDA 2015-02-18 11:40:06 +01:00
Gael Guennebaud
371d3bef36 Workaround dead store warnings in unit tests. 2015-02-18 11:30:44 +01:00
Gael Guennebaud
63464754ef Add an internal assertion in makeCompressed to catch a possible risk of null-pointer access. 2015-02-18 11:29:54 +01:00
Gael Guennebaud
eb563049f7 Remove some dead stores. 2015-02-18 11:26:48 +01:00
Gael Guennebaud
dc7e6acc05 Fix possible usage of a null pointer in CholmodSupport 2015-02-18 11:26:25 +01:00
Gael Guennebaud
d4eda01488 Big 957, workaround MSVC/ICC compilation issue 2015-02-18 11:24:32 +01:00
Christoph Hertzberg
24d65ac0b0 Removed redundant typedef which confused old gcc versions. 2015-02-18 01:03:32 +01:00
Gael Guennebaud
20cac72b82 Packet must be passed by const reference and not by value to avoid alignment issue. 2015-02-17 22:58:32 +01:00
Benoit Steiner
36c9d08274 Pulled latest updates from trunk 2015-02-17 10:04:25 -08:00
Benoit Steiner
f77054f43c Silenced compilation warning 2015-02-17 10:02:04 -08:00
Benoit Steiner
1d3b64d32b Added support for tensor concatenation as lvalue 2015-02-17 09:57:41 -08:00
Benoit Steiner
00f048d44f Added support for tensor concatenation as lvalue 2015-02-17 09:54:40 -08:00
Christoph Hertzberg
97a36ecba4 Suppress some remaining Index conversion warnings 2015-02-17 18:52:39 +01:00
Gael Guennebaud
159fb181c2 Disable __m128* wrappers when compiling with AVX and -fabi-version=4 2015-02-17 16:27:20 +01:00
Gael Guennebaud
91ab2489dd Fix compilation with GCC/AVX (workaround __m128 and __m256 being the same type with default ABI) 2015-02-17 16:08:07 +01:00
Gael Guennebaud
9daf8eba6f Fix compilation of Cholmod*(matrix) ctor 2015-02-17 15:24:52 +01:00
Gael Guennebaud
3373c903b3 Fix compilation of int*complex with gcc 2015-02-16 19:18:12 +01:00
Gael Guennebaud
9f49f00feb Extend sparse-determinant unitests 2015-02-16 19:09:48 +01:00
Florian George
f56d452c7e Enable atv in Blaze Benchmark 2014-05-04 17:07:17 +02:00
Florian George
af79b158a1 Use trans(X) instead of X.transpose() in Blaze Benchmark 2014-05-04 17:06:34 +02:00
433 changed files with 19636 additions and 5997 deletions

View File

@@ -147,6 +147,12 @@ if(NOT MSVC)
ei_add_cxx_compiler_flag("-Wenum-conversion")
ei_add_cxx_compiler_flag("-Wc++11-extensions")
# -Wshadow is insanely too strict with gcc, hopefully it will become usable with gcc 6
# if(NOT CMAKE_COMPILER_IS_GNUCXX OR (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "5.0.0"))
if(NOT CMAKE_COMPILER_IS_GNUCXX)
ei_add_cxx_compiler_flag("-Wshadow")
endif()
ei_add_cxx_compiler_flag("-Wno-psabi")
ei_add_cxx_compiler_flag("-Wno-variadic-macros")
ei_add_cxx_compiler_flag("-Wno-long-long")
@@ -168,6 +174,11 @@ if(NOT MSVC)
else()
ei_add_cxx_compiler_flag("-ansi")
endif()
if(ANDROID_NDK)
ei_add_cxx_compiler_flag("-pie")
ei_add_cxx_compiler_flag("-fPIE")
endif()
set(CMAKE_REQUIRED_FLAGS "")
@@ -208,7 +219,7 @@ if(NOT MSVC)
endif()
option(EIGEN_TEST_FMA "Enable/Disable FMA in tests/examples" OFF)
if(EIGEN_TEST_FMA)
if(EIGEN_TEST_FMA AND NOT EIGEN_TEST_NEON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfma")
message(STATUS "Enabling FMA in tests/examples")
endif()
@@ -227,7 +238,12 @@ if(NOT MSVC)
option(EIGEN_TEST_NEON "Enable/Disable Neon in tests/examples" OFF)
if(EIGEN_TEST_NEON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -mcpu=cortex-a8")
if(EIGEN_TEST_FMA)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon-vfpv4")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon")
endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfloat-abi=softfp")
message(STATUS "Enabling NEON in tests/examples")
endif()
@@ -321,7 +337,7 @@ if(EIGEN_TEST_NO_EXCEPTIONS)
message(STATUS "Disabling exceptions in tests/examples")
endif()
option(EIGEN_TEST_C++0x "Enables all C++0x features." OFF)
option(EIGEN_TEST_CXX11 "Enable testing with C++11 and C++11 features (e.g. Tensor module)." OFF)
include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})

View File

@@ -24,9 +24,15 @@
#ifdef EIGEN_INTERNAL_DEBUGGING
#undef EIGEN_INTERNAL_DEBUGGING
#endif
// Do not try to vectorize on CUDA!
#ifndef EIGEN_DONT_VECTORIZE
#define EIGEN_DONT_VECTORIZE
#endif
#ifdef EIGEN_EXCEPTIONS
#undef EIGEN_EXCEPTIONS
#endif
// All functions callable from CUDA code must be qualified with __device__
#define EIGEN_DEVICE_FUNC __host__ __device__
@@ -67,9 +73,9 @@
// and inclusion of their respective header files
#include "src/Core/util/MKL_support.h"
// if alignment is disabled, then disable vectorization. Note: EIGEN_ALIGN is the proper check, it takes into
// account both the user's will (EIGEN_DONT_ALIGN) and our own platform checks
#if !EIGEN_ALIGN
// if alignment is disabled, then disable vectorization. Note: EIGEN_MAX_ALIGN_BYTES is the proper check, it takes into
// account both the user's will (EIGEN_MAX_ALIGN_BYTES,EIGEN_DONT_ALIGN) and our own platform checks
#if EIGEN_MAX_ALIGN_BYTES==0
#ifndef EIGEN_DONT_VECTORIZE
#define EIGEN_DONT_VECTORIZE
#endif
@@ -125,6 +131,12 @@
#define EIGEN_VECTORIZE_SSE4_1
#define EIGEN_VECTORIZE_SSE4_2
#endif
#ifdef __AVX2__
#define EIGEN_VECTORIZE_AVX2
#endif
#ifdef __FMA__
#define EIGEN_VECTORIZE_FMA
#endif
// include files
@@ -178,7 +190,7 @@
#undef bool
#undef vector
#undef pixel
#elif defined __ARM_NEON
#elif (defined __ARM_NEON) || (defined __ARM_NEON__)
#define EIGEN_VECTORIZE
#define EIGEN_VECTORIZE_NEON
#include <arm_neon.h>
@@ -294,15 +306,19 @@ using std::ptrdiff_t;
// Use AVX for floats and doubles, SSE for integers
#include "src/Core/arch/SSE/PacketMath.h"
#include "src/Core/arch/SSE/Complex.h"
#include "src/Core/arch/SSE/MathFunctions.h"
#include "src/Core/arch/AVX/PacketMath.h"
#include "src/Core/arch/AVX/MathFunctions.h"
#include "src/Core/arch/AVX/Complex.h"
#include "src/Core/arch/AVX/TypeCasting.h"
#elif defined EIGEN_VECTORIZE_SSE
#include "src/Core/arch/SSE/PacketMath.h"
#include "src/Core/arch/SSE/MathFunctions.h"
#include "src/Core/arch/SSE/Complex.h"
#include "src/Core/arch/SSE/TypeCasting.h"
#elif defined(EIGEN_VECTORIZE_ALTIVEC) || defined(EIGEN_VECTORIZE_VSX)
#include "src/Core/arch/AltiVec/PacketMath.h"
#include "src/Core/arch/AltiVec/MathFunctions.h"
#include "src/Core/arch/AltiVec/Complex.h"
#elif defined EIGEN_VECTORIZE_NEON
#include "src/Core/arch/NEON/PacketMath.h"
@@ -343,7 +359,6 @@ using std::ptrdiff_t;
#include "src/Core/NestByValue.h"
// #include "src/Core/ForceAlignedAccess.h"
// #include "src/Core/Flagged.h"
#include "src/Core/ReturnByValue.h"
#include "src/Core/NoAlias.h"
@@ -375,7 +390,6 @@ using std::ptrdiff_t;
#include "src/Core/IO.h"
#include "src/Core/Swap.h"
#include "src/Core/CommaInitializer.h"
#include "src/Core/ProductBase.h"
#include "src/Core/GeneralProduct.h"
#include "src/Core/Solve.h"
#include "src/Core/Inverse.h"

View File

@@ -9,10 +9,6 @@
#include "LU"
#include <limits>
#ifndef M_PI
#define M_PI 3.14159265358979323846
#endif
/** \defgroup Geometry_Module Geometry module
*
*

View File

@@ -12,24 +12,26 @@
* This module currently provides iterative methods to solve problems of the form \c A \c x = \c b, where \c A is a squared matrix, usually very large and sparse.
* Those solvers are accessible via the following classes:
* - ConjugateGradient for selfadjoint (hermitian) matrices,
* - LeastSquaresConjugateGradient for rectangular least-square problems,
* - BiCGSTAB for general square matrices.
*
* These iterative solvers are associated with some preconditioners:
* - IdentityPreconditioner - not really useful
* - DiagonalPreconditioner - also called JAcobi preconditioner, work very well on diagonal dominant matrices.
* - IncompleteILUT - incomplete LU factorization with dual thresholding
* - DiagonalPreconditioner - also called Jacobi preconditioner, work very well on diagonal dominant matrices.
* - IncompleteLUT - incomplete LU factorization with dual thresholding
*
* Such problems can also be solved using the direct sparse decomposition modules: SparseCholesky, CholmodSupport, UmfPackSupport, SuperLUSupport.
*
* \code
* #include <Eigen/IterativeLinearSolvers>
* \endcode
\code
#include <Eigen/IterativeLinearSolvers>
\endcode
*/
#include "src/IterativeLinearSolvers/SolveWithGuess.h"
#include "src/IterativeLinearSolvers/IterativeSolverBase.h"
#include "src/IterativeLinearSolvers/BasicPreconditioners.h"
#include "src/IterativeLinearSolvers/ConjugateGradient.h"
#include "src/IterativeLinearSolvers/LeastSquareConjugateGradient.h"
#include "src/IterativeLinearSolvers/BiCGSTAB.h"
#include "src/IterativeLinearSolvers/IncompleteLUT.h"

View File

@@ -11,9 +11,9 @@
* - \ref SparseQR_Module
* - \ref IterativeLinearSolvers_Module
*
* \code
* #include <Eigen/Sparse>
* \endcode
\code
#include <Eigen/Sparse>
\endcode
*/
#include "SparseCore"

View File

@@ -226,6 +226,11 @@ template<typename _MatrixType, int _UpLo> class LDLT
#endif
protected:
static void check_template_parameters()
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
}
/** \internal
* Used to compute and store the Cholesky decomposition A = L D L^* = U^* D U.
@@ -309,9 +314,9 @@ template<> struct ldlt_inplace<Lower>
}
// In some previous versions of Eigen (e.g., 3.2.1), the scaling was omitted if the pivot
// was smaller than the cutoff value. However, soince LDLT is not rank-revealing
// we should only make sure we do not introduce INF or NaN values.
// LAPACK also uses 0 as the cutoff value.
// was smaller than the cutoff value. However, since LDLT is not rank-revealing
// we should only make sure that we do not introduce INF or NaN values.
// Remark that LAPACK also uses 0 as the cutoff value.
RealScalar realAkk = numext::real(mat.coeffRef(k,k));
if((rs>0) && (abs(realAkk) > RealScalar(0)))
A21 /= realAkk;
@@ -424,6 +429,8 @@ template<typename MatrixType> struct LDLT_Traits<MatrixType,Upper>
template<typename MatrixType, int _UpLo>
LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const MatrixType& a)
{
check_template_parameters();
eigen_assert(a.rows()==a.cols());
const Index size = a.rows();
@@ -447,7 +454,7 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const MatrixType& a)
*/
template<typename MatrixType, int _UpLo>
template<typename Derived>
LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Derived>& w, const typename NumTraits<typename MatrixType::Scalar>::Real& sigma)
LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Derived>& w, const typename LDLT<MatrixType,_UpLo>::RealScalar& sigma)
{
typedef typename TranspositionType::StorageIndex IndexType;
const Index size = w.rows();
@@ -490,9 +497,9 @@ void LDLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) cons
const typename Diagonal<const MatrixType>::RealReturnType vecD(vectorD());
// In some previous versions, tolerance was set to the max of 1/highest and the maximal diagonal entry * epsilon
// as motivated by LAPACK's xGELSS:
// RealScalar tolerance = numext::maxi(vectorD.array().abs().maxCoeff() *NumTraits<RealScalar>::epsilon(),RealScalar(1) / NumTraits<RealScalar>::highest());
// RealScalar tolerance = numext::maxi(vecD.array().abs().maxCoeff() * NumTraits<RealScalar>::epsilon(),RealScalar(1) / NumTraits<RealScalar>::highest());
// However, LDLT is not rank revealing, and so adjusting the tolerance wrt to the highest
// diagonal element is not well justified and to numerical issues in some cases.
// diagonal element is not well justified and leads to numerical issues in some cases.
// Moreover, Lapack's xSYTRS routines use 0 for the tolerance.
RealScalar tolerance = RealScalar(1) / NumTraits<RealScalar>::highest();

View File

@@ -170,6 +170,12 @@ template<typename _MatrixType, int _UpLo> class LLT
#endif
protected:
static void check_template_parameters()
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
}
/** \internal
* Used to compute and store L
* The strict upper part is not used and even not initialized.
@@ -377,6 +383,8 @@ template<typename MatrixType> struct LLT_Traits<MatrixType,Upper>
template<typename MatrixType, int _UpLo>
LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const MatrixType& a)
{
check_template_parameters();
eigen_assert(a.rows()==a.cols());
const Index size = a.rows();
m_matrix.resize(size, size);

View File

@@ -60,7 +60,7 @@ template<> struct mkl_llt<EIGTYPE> \
lda = m.outerStride(); \
\
info = LAPACKE_##MKLPREFIX##potrf( matrix_order, uplo, size, (MKLTYPE*)a, lda ); \
info = (info==0) ? Success : NumericalIssue; \
info = (info==0) ? -1 : info>0 ? info-1 : size; \
return info; \
} \
}; \

View File

@@ -277,6 +277,7 @@ class CholmodBase : public SparseSolverBase<Derived>
if(!x_cd)
{
this->m_info = NumericalIssue;
return;
}
// TODO optimize this copy by swapping when possible (be careful with alignment, etc.)
dest = Matrix<Scalar,Dest::RowsAtCompileTime,Dest::ColsAtCompileTime>::Map(reinterpret_cast<Scalar*>(x_cd->x),b.rows(),b.cols());
@@ -298,6 +299,7 @@ class CholmodBase : public SparseSolverBase<Derived>
if(!x_cs)
{
this->m_info = NumericalIssue;
return;
}
// TODO optimize this copy by swapping when possible (be careful with alignment, etc.)
dest = viewAsEigen<DestScalar,DestOptions,DestIndex>(*x_cs);
@@ -367,7 +369,7 @@ class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimpl
CholmodSimplicialLLT(const MatrixType& matrix) : Base()
{
init();
compute(matrix);
this->compute(matrix);
}
~CholmodSimplicialLLT() {}
@@ -414,7 +416,7 @@ class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimp
CholmodSimplicialLDLT(const MatrixType& matrix) : Base()
{
init();
compute(matrix);
this->compute(matrix);
}
~CholmodSimplicialLDLT() {}
@@ -459,7 +461,7 @@ class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSuper
CholmodSupernodalLLT(const MatrixType& matrix) : Base()
{
init();
compute(matrix);
this->compute(matrix);
}
~CholmodSupernodalLLT() {}
@@ -506,7 +508,7 @@ class CholmodDecomposition : public CholmodBase<_MatrixType, _UpLo, CholmodDecom
CholmodDecomposition(const MatrixType& matrix) : Base()
{
init();
compute(matrix);
this->compute(matrix);
}
~CholmodDecomposition() {}

View File

@@ -24,6 +24,9 @@ namespace Eigen {
* API for the %Matrix class provides easy access to linear-algebra
* operations.
*
* See documentation of class Matrix for detailed information on the template parameters
* storage layout.
*
* This class can be extended with the help of the plugin mechanism described on the page
* \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_ARRAY_PLUGIN.
*
@@ -74,7 +77,7 @@ class Array
{
return Base::operator=(other);
}
/** Set all the entries to \a value.
* \sa DenseBase::setConstant(), DenseBase::fill()
*/
@@ -101,7 +104,7 @@ class Array
*/
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Array& operator=(const ArrayBase<OtherDerived>& other)
EIGEN_STRONG_INLINE Array& operator=(const DenseBase<OtherDerived>& other)
{
return Base::_set(other);
}
@@ -145,6 +148,7 @@ class Array
#endif
#ifdef EIGEN_HAVE_RVALUE_REFERENCES
EIGEN_DEVICE_FUNC
Array(Array&& other)
: Base(std::move(other))
{
@@ -152,6 +156,7 @@ class Array
if (RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic)
Base::_set_noalias(other);
}
EIGEN_DEVICE_FUNC
Array& operator=(Array&& other)
{
other.swap(*this);
@@ -220,43 +225,18 @@ class Array
m_storage.data()[3] = val3;
}
/** Constructor copying the value of the expression \a other */
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Array(const ArrayBase<OtherDerived>& other)
: Base(other.rows() * other.cols(), other.rows(), other.cols())
{
Base::_check_template_params();
Base::_set_noalias(other);
}
/** Copy constructor */
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Array(const Array& other)
: Base(other.rows() * other.cols(), other.rows(), other.cols())
{
Base::_check_template_params();
Base::_set_noalias(other);
}
/** Copy constructor with in-place evaluation */
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Array(const ReturnByValue<OtherDerived>& other)
{
Base::_check_template_params();
Base::resize(other.rows(), other.cols());
other.evalTo(*this);
}
: Base(other)
{ }
/** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Array(const EigenBase<OtherDerived> &other)
: Base(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols())
{
Base::_check_template_params();
Base::_resize_to_match(other);
*this = other;
}
: Base(other.derived())
{ }
EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; }
EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); }

View File

@@ -83,22 +83,10 @@ template<typename Derived> class ArrayBase
#endif // not EIGEN_PARSED_BY_DOXYGEN
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** \internal the plain matrix type corresponding to this expression. Note that is not necessarily
* exactly the return type of eval(): in the case of plain matrices, the return type of eval() is a const
* reference to a matrix, not a matrix! It is however guaranteed that the return type of eval() is either
* PlainObject or const PlainObject&.
*/
typedef Array<typename internal::traits<Derived>::Scalar,
internal::traits<Derived>::RowsAtCompileTime,
internal::traits<Derived>::ColsAtCompileTime,
AutoAlign | (internal::traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
internal::traits<Derived>::MaxRowsAtCompileTime,
internal::traits<Derived>::MaxColsAtCompileTime
> PlainObject;
typedef typename Base::PlainObject PlainObject;
/** \internal Represents a matrix with all coefficients equal to one another*/
typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,Derived> ConstantReturnType;
typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,PlainObject> ConstantReturnType;
#endif // not EIGEN_PARSED_BY_DOXYGEN
#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::ArrayBase

View File

@@ -52,7 +52,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
const Scalar
>::type ScalarWithConstIfNotLvalue;
typedef typename internal::nested<ExpressionType>::type NestedExpressionType;
typedef typename internal::ref_selector<ExpressionType>::type NestedExpressionType;
EIGEN_DEVICE_FUNC
explicit EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {}
@@ -149,7 +149,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
/** Forwards the resizing request to the nested expression
* \sa DenseBase::resize(Index,Index)*/
EIGEN_DEVICE_FUNC
void resize(Index nbRows, Index nbCols) { m_expression.const_cast_derived().resize(nbRows,nbCols); }
void resize(Index rows, Index cols) { m_expression.const_cast_derived().resize(rows,cols); }
protected:
NestedExpressionType m_expression;
@@ -195,10 +195,10 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
const Scalar
>::type ScalarWithConstIfNotLvalue;
typedef typename internal::nested<ExpressionType>::type NestedExpressionType;
typedef typename internal::ref_selector<ExpressionType>::type NestedExpressionType;
EIGEN_DEVICE_FUNC
explicit inline MatrixWrapper(ExpressionType& a_matrix) : m_expression(a_matrix) {}
explicit inline MatrixWrapper(ExpressionType& matrix) : m_expression(matrix) {}
EIGEN_DEVICE_FUNC
inline Index rows() const { return m_expression.rows(); }
@@ -288,7 +288,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
/** Forwards the resizing request to the nested expression
* \sa DenseBase::resize(Index,Index)*/
EIGEN_DEVICE_FUNC
void resize(Index nbRows, Index nbCols) { m_expression.const_cast_derived().resize(nbRows,nbCols); }
void resize(Index rows, Index cols) { m_expression.const_cast_derived().resize(rows,cols); }
protected:
NestedExpressionType m_expression;

View File

@@ -28,18 +28,22 @@ template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc>
struct copy_using_evaluator_traits
{
typedef typename DstEvaluator::XprType Dst;
typedef typename Dst::Scalar DstScalar;
// TODO distinguish between linear traversal and inner-traversals
typedef typename find_best_packet<DstScalar,Dst::SizeAtCompileTime>::type PacketType;
enum {
DstFlags = DstEvaluator::Flags,
SrcFlags = SrcEvaluator::Flags
SrcFlags = SrcEvaluator::Flags,
RequiredAlignment = unpacket_traits<PacketType>::alignment
};
public:
enum {
DstIsAligned = DstFlags & AlignedBit,
DstAlignment = DstEvaluator::Alignment,
SrcAlignment = SrcEvaluator::Alignment,
DstHasDirectAccess = DstFlags & DirectAccessBit,
SrcIsAligned = SrcFlags & AlignedBit,
JointAlignment = bool(DstIsAligned) && bool(SrcIsAligned) ? Aligned : Unaligned
JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment)
};
private:
@@ -51,7 +55,7 @@ private:
: int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
: int(Dst::MaxRowsAtCompileTime),
MaxSizeAtCompileTime = Dst::SizeAtCompileTime,
PacketSize = packet_traits<typename Dst::Scalar>::size
PacketSize = unpacket_traits<PacketType>::size
};
enum {
@@ -62,10 +66,10 @@ private:
&& (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit)
&& (functor_traits<AssignFunc>::PacketAccess),
MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0
&& int(DstIsAligned) && int(SrcIsAligned),
&& int(JointAlignment)>=int(RequiredAlignment),
MayLinearize = StorageOrdersAgree && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess
&& (DstIsAligned || MaxSizeAtCompileTime == Dynamic),
&& ((int(DstAlignment)>=int(RequiredAlignment)) || MaxSizeAtCompileTime == Dynamic),
/* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
so it's only good for large enough sizes. */
MaySliceVectorize = MightVectorize && DstHasDirectAccess
@@ -107,8 +111,8 @@ public:
: int(NoUnrolling)
)
: int(Traversal) == int(LinearVectorizedTraversal)
? ( bool(MayUnrollCompletely) && bool(DstIsAligned) ? int(CompleteUnrolling)
: int(NoUnrolling) )
? ( bool(MayUnrollCompletely) && (int(DstAlignment)>=int(RequiredAlignment)) ? int(CompleteUnrolling)
: int(NoUnrolling) )
: int(Traversal) == int(LinearTraversal)
? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling)
: int(NoUnrolling) )
@@ -124,8 +128,9 @@ public:
EIGEN_DEBUG_VAR(DstFlags)
EIGEN_DEBUG_VAR(SrcFlags)
std::cerr.unsetf(std::ios::hex);
EIGEN_DEBUG_VAR(DstIsAligned)
EIGEN_DEBUG_VAR(SrcIsAligned)
EIGEN_DEBUG_VAR(DstAlignment)
EIGEN_DEBUG_VAR(SrcAlignment)
EIGEN_DEBUG_VAR(RequiredAlignment)
EIGEN_DEBUG_VAR(JointAlignment)
EIGEN_DEBUG_VAR(InnerSize)
EIGEN_DEBUG_VAR(InnerMaxSize)
@@ -225,6 +230,7 @@ struct copy_using_evaluator_innervec_CompleteUnrolling
// FIXME: this is not very clean, perhaps this information should be provided by the kernel?
typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
typedef typename DstEvaluatorType::XprType DstXprType;
typedef typename Kernel::PacketType PacketType;
enum {
outer = Index / DstXprType::InnerSizeAtCompileTime,
@@ -234,8 +240,8 @@ struct copy_using_evaluator_innervec_CompleteUnrolling
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{
kernel.template assignPacketByOuterInner<Aligned, JointAlignment>(outer, inner);
enum { NextIndex = Index + packet_traits<typename DstXprType::Scalar>::size };
kernel.template assignPacketByOuterInner<Aligned, JointAlignment, PacketType>(outer, inner);
enum { NextIndex = Index + unpacket_traits<PacketType>::size };
copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
}
};
@@ -249,10 +255,11 @@ struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop>
template<typename Kernel, int Index_, int Stop>
struct copy_using_evaluator_innervec_InnerUnrolling
{
typedef typename Kernel::PacketType PacketType;
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
{
kernel.template assignPacketByOuterInner<Aligned, Aligned>(outer, Index_);
enum { NextIndex = Index_ + packet_traits<typename Kernel::Scalar>::size };
kernel.template assignPacketByOuterInner<Aligned, Aligned, PacketType>(outer, Index_);
enum { NextIndex = Index_ + unpacket_traits<PacketType>::size };
copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop>::run(kernel, outer);
}
};
@@ -360,20 +367,23 @@ struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling>
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{
const Index size = kernel.size();
typedef packet_traits<typename Kernel::Scalar> PacketTraits;
typedef typename Kernel::Scalar Scalar;
typedef typename Kernel::PacketType PacketType;
enum {
packetSize = PacketTraits::size,
dstIsAligned = int(Kernel::AssignmentTraits::DstIsAligned),
dstAlignment = PacketTraits::AlignedOnScalar ? Aligned : dstIsAligned,
requestedAlignment = Kernel::AssignmentTraits::RequiredAlignment,
packetSize = unpacket_traits<PacketType>::size,
dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
dstAlignment = packet_traits<Scalar>::AlignedOnScalar ? int(requestedAlignment)
: int(Kernel::AssignmentTraits::DstAlignment),
srcAlignment = Kernel::AssignmentTraits::JointAlignment
};
const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned(&kernel.dstEvaluator().coeffRef(0), size);
const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned<requestedAlignment>(&kernel.dstEvaluator().coeffRef(0), size);
const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
unaligned_dense_assignment_loop<dstIsAligned!=0>::run(kernel, 0, alignedStart);
for(Index index = alignedStart; index < alignedEnd; index += packetSize)
kernel.template assignPacket<dstAlignment, srcAlignment>(index);
kernel.template assignPacket<dstAlignment, srcAlignment, PacketType>(index);
unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size);
}
@@ -403,14 +413,15 @@ struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrollin
template<typename Kernel>
struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
{
typedef typename Kernel::PacketType PacketType;
EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel)
{
const Index innerSize = kernel.innerSize();
const Index outerSize = kernel.outerSize();
const Index packetSize = packet_traits<typename Kernel::Scalar>::size;
const Index packetSize = unpacket_traits<PacketType>::size;
for(Index outer = 0; outer < outerSize; ++outer)
for(Index inner = 0; inner < innerSize; inner+=packetSize)
kernel.template assignPacketByOuterInner<Aligned, Aligned>(outer, inner);
kernel.template assignPacketByOuterInner<Aligned, Aligned, PacketType>(outer, inner);
}
};
@@ -471,18 +482,27 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
{
EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel)
{
typedef packet_traits<typename Kernel::Scalar> PacketTraits;
typedef typename Kernel::Scalar Scalar;
typedef typename Kernel::PacketType PacketType;
enum {
packetSize = PacketTraits::size,
alignable = PacketTraits::AlignedOnScalar,
dstAlignment = alignable ? Aligned : int(Kernel::AssignmentTraits::DstIsAligned)
packetSize = unpacket_traits<PacketType>::size,
requestedAlignment = int(Kernel::AssignmentTraits::RequiredAlignment),
alignable = packet_traits<Scalar>::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment)>=sizeof(Scalar),
dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
dstAlignment = alignable ? int(requestedAlignment)
: int(Kernel::AssignmentTraits::DstAlignment)
};
const Scalar *dst_ptr = &kernel.dstEvaluator().coeffRef(0,0);
if((!bool(dstIsAligned)) && (size_t(dst_ptr) % sizeof(Scalar))>0)
{
// the pointer is not aligend-on scalar, so alignment is not possible
return dense_assignment_loop<Kernel,DefaultTraversal,NoUnrolling>::run(kernel);
}
const Index packetAlignedMask = packetSize - 1;
const Index innerSize = kernel.innerSize();
const Index outerSize = kernel.outerSize();
const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0;
Index alignedStart = ((!alignable) || Kernel::AssignmentTraits::DstIsAligned) ? 0
: internal::first_aligned(&kernel.dstEvaluator().coeffRef(0,0), innerSize);
Index alignedStart = ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned<requestedAlignment>(dst_ptr, innerSize);
for(Index outer = 0; outer < outerSize; ++outer)
{
@@ -493,7 +513,7 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
// do the vectorizable part of the assignment
for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize)
kernel.template assignPacketByOuterInner<dstAlignment, Unaligned>(outer, inner);
kernel.template assignPacketByOuterInner<dstAlignment, Unaligned, PacketType>(outer, inner);
// do the non-vectorizable part of the assignment
for(Index inner = alignedEnd; inner<innerSize ; ++inner)
@@ -527,6 +547,7 @@ public:
typedef typename DstEvaluatorType::Scalar Scalar;
typedef typename DstEvaluatorType::StorageIndex StorageIndex;
typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor> AssignmentTraits;
typedef typename AssignmentTraits::PacketType PacketType;
EIGEN_DEVICE_FUNC generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
@@ -571,24 +592,24 @@ public:
}
template<int StoreMode, int LoadMode>
template<int StoreMode, int LoadMode, typename PacketType>
EIGEN_DEVICE_FUNC void assignPacket(Index row, Index col)
{
m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row,col), m_src.template packet<LoadMode>(row,col));
m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row,col), m_src.template packet<LoadMode,PacketType>(row,col));
}
template<int StoreMode, int LoadMode>
template<int StoreMode, int LoadMode, typename PacketType>
EIGEN_DEVICE_FUNC void assignPacket(Index index)
{
m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode>(index));
m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode,PacketType>(index));
}
template<int StoreMode, int LoadMode>
template<int StoreMode, int LoadMode, typename PacketType>
EIGEN_DEVICE_FUNC void assignPacketByOuterInner(Index outer, Index inner)
{
Index row = rowIndexByOuterInner(outer, inner);
Index col = colIndexByOuterInner(outer, inner);
assignPacket<StoreMode,LoadMode>(row, col);
assignPacket<StoreMode,LoadMode,PacketType>(row, col);
}
EIGEN_DEVICE_FUNC static Index rowIndexByOuterInner(Index outer, Index inner)
@@ -626,8 +647,8 @@ EIGEN_DEVICE_FUNC void call_dense_assignment_loop(const DstXprType& dst, const S
{
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
typedef evaluator<DstXprType> DstEvaluatorType;
typedef evaluator<SrcXprType> SrcEvaluatorType;
DstEvaluatorType dstEvaluator(dst);
SrcEvaluatorType srcEvaluator(src);
@@ -749,6 +770,26 @@ EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src)
call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar>());
}
template<typename Dst, typename Src, typename Func>
EIGEN_DEVICE_FUNC void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func)
{
Index dstRows = src.rows();
Index dstCols = src.cols();
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
dst.resize(dstRows, dstCols);
// TODO check whether this is the right place to perform these checks:
EIGEN_STATIC_ASSERT_LVALUE(Dst)
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst,Src)
Assignment<Dst,Src,Func>::run(dst, src, func);
}
template<typename Dst, typename Src>
EIGEN_DEVICE_FUNC void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src)
{
call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar>());
}
// forward declaration
template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, const Src &src);
@@ -776,7 +817,6 @@ struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Scalar>
EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/)
{
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
src.evalTo(dst);
}
};

254
Eigen/src/Core/Assign_MKL.h Normal file → Executable file
View File

@@ -1,6 +1,7 @@
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Copyright (C) 2015 Gael Guennebaud <gael.guennebaud@inria.fr>
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
@@ -37,17 +38,13 @@ namespace Eigen {
namespace internal {
template<typename Op> struct vml_call
{ enum { IsSupported = 0 }; };
template<typename Dst, typename Src, typename UnaryOp>
template<typename Dst, typename Src>
class vml_assign_traits
{
private:
enum {
DstHasDirectAccess = Dst::Flags & DirectAccessBit,
SrcHasDirectAccess = Src::Flags & DirectAccessBit,
StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)),
InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
: int(Dst::Flags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
@@ -57,165 +54,118 @@ class vml_assign_traits
: int(Dst::MaxRowsAtCompileTime),
MaxSizeAtCompileTime = Dst::SizeAtCompileTime,
MightEnableVml = vml_call<UnaryOp>::IsSupported && StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess
&& Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1,
MightEnableVml = StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess && Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1,
MightLinearize = MightEnableVml && (int(Dst::Flags) & int(Src::Flags) & LinearAccessBit),
VmlSize = MightLinearize ? MaxSizeAtCompileTime : InnerMaxSize,
LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD,
MayEnableVml = MightEnableVml && LargeEnough,
MayLinearize = MayEnableVml && MightLinearize
LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD
};
public:
enum {
Traversal = MayLinearize ? LinearVectorizedTraversal
: MayEnableVml ? InnerVectorizedTraversal
: DefaultTraversal
EnableVml = MightEnableVml && LargeEnough,
Traversal = MightLinearize ? LinearTraversal : DefaultTraversal
};
};
template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling,
int VmlTraversal = vml_assign_traits<Derived1, Derived2, UnaryOp>::Traversal >
struct vml_assign_impl
: assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>
{
};
template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling>
struct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, InnerVectorizedTraversal>
{
typedef typename Derived1::Scalar Scalar;
static inline void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src)
{
// in case we want to (or have to) skip VML at runtime we can call:
// assign_impl<Derived1,Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>::run(dst,src);
const Index innerSize = dst.innerSize();
const Index outerSize = dst.outerSize();
for(Index outer = 0; outer < outerSize; ++outer) {
const Scalar *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) :
&(src.nestedExpression().coeffRef(0, outer));
Scalar *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer));
vml_call<UnaryOp>::run(src.functor(), innerSize, src_ptr, dst_ptr );
}
}
};
template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling>
struct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, LinearVectorizedTraversal>
{
static inline void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src)
{
// in case we want to (or have to) skip VML at runtime we can call:
// assign_impl<Derived1,Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>::run(dst,src);
vml_call<UnaryOp>::run(src.functor(), dst.size(), src.nestedExpression().data(), dst.data() );
}
};
// Macroses
#define EIGEN_MKL_VML_SPECIALIZE_ASSIGN(TRAVERSAL,UNROLLING) \
template<typename Derived1, typename Derived2, typename UnaryOp> \
struct assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>, TRAVERSAL, UNROLLING, Specialized> { \
static inline void run(Derived1 &dst, const Eigen::CwiseUnaryOp<UnaryOp, Derived2> &src) { \
vml_assign_impl<Derived1,Derived2,UnaryOp,TRAVERSAL,UNROLLING>::run(dst, src); \
} \
};
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,NoUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,CompleteUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,InnerUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,NoUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,CompleteUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,NoUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,CompleteUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,InnerUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,CompleteUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,NoUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(SliceVectorizedTraversal,NoUnrolling)
#define EIGEN_PP_EXPAND(ARG) ARG
#if !defined (EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1)
#define EIGEN_MKL_VML_MODE VML_HA
#define EIGEN_VMLMODE_EXPAND_LA , VML_HA
#else
#define EIGEN_MKL_VML_MODE VML_LA
#define EIGEN_VMLMODE_EXPAND_LA , VML_LA
#endif
#define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \
template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \
enum { IsSupported = 1 }; \
static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& /*func*/, \
int size, const EIGENTYPE* src, EIGENTYPE* dst) { \
VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst); \
} \
#define EIGEN_VMLMODE_EXPAND__
#define EIGEN_VMLMODE_PREFIX_LA vm
#define EIGEN_VMLMODE_PREFIX__ v
#define EIGEN_VMLMODE_PREFIX(VMLMODE) EIGEN_CAT(EIGEN_VMLMODE_PREFIX_,VMLMODE)
#define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \
template< typename DstXprType, typename SrcXprNested> \
struct Assignment<DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested>, assign_op<EIGENTYPE>, \
Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml,EIGENTYPE>::type> { \
typedef CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested> SrcXprType; \
static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE> &/*func*/) { \
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \
if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) { \
VMLOP(dst.size(), (const VMLTYPE*)src.nestedExpression().data(), \
(VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE) ); \
} else { \
const Index outerSize = dst.outerSize(); \
for(Index outer = 0; outer < outerSize; ++outer) { \
const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) : \
&(src.nestedExpression().coeffRef(0, outer)); \
EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); \
VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr, \
(VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE)); \
} \
} \
} \
}; \
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE) \
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),s##VMLOP), float, float, VMLMODE) \
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),d##VMLOP), double, double, VMLMODE)
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE) \
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),c##VMLOP), scomplex, MKL_Complex8, VMLMODE) \
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),z##VMLOP), dcomplex, MKL_Complex16, VMLMODE)
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP, VMLMODE) \
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE) \
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sin, Sin, LA)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(asin, Asin, LA)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sinh, Sinh, LA)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(cos, Cos, LA)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(acos, Acos, LA)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(cosh, Cosh, LA)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(tan, Tan, LA)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(atan, Atan, LA)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(tanh, Tanh, LA)
// EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs, Abs, _)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(exp, Exp, LA)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(log, Ln, LA)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(log10, Log10, LA)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sqrt, Sqrt, _)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr, _)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(arg, Arg, _)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(round, Round, _)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(floor, Floor, _)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(ceil, Ceil, _)
#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \
template< typename DstXprType, typename SrcXprNested> \
struct Assignment<DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested>, assign_op<EIGENTYPE>, \
Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml,EIGENTYPE>::type> { \
typedef CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested> SrcXprType; \
static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE> &/*func*/) { \
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \
VMLTYPE exponent = reinterpret_cast<const VMLTYPE&>(src.functor().m_exponent); \
if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) \
{ \
VMLOP( dst.size(), (const VMLTYPE*)src.nestedExpression().data(), exponent, \
(VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE) ); \
} else { \
const Index outerSize = dst.outerSize(); \
for(Index outer = 0; outer < outerSize; ++outer) { \
const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) : \
&(src.nestedExpression().coeffRef(0, outer)); \
EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); \
VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr, exponent, \
(VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE)); \
} \
} \
} \
};
#define EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \
template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \
enum { IsSupported = 1 }; \
static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& /*func*/, \
int size, const EIGENTYPE* src, EIGENTYPE* dst) { \
MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \
VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst, vmlMode); \
} \
};
#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \
template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \
enum { IsSupported = 1 }; \
static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& func, \
int size, const EIGENTYPE* src, EIGENTYPE* dst) { \
EIGENTYPE exponent = func.m_exponent; \
MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \
VMLOP(&size, (const VMLTYPE*)src, (const VMLTYPE*)&exponent, \
(VMLTYPE*)dst, &vmlMode); \
} \
};
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vs##VMLOP, float, float) \
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vd##VMLOP, double, double)
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP) \
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vc##VMLOP, scomplex, MKL_Complex8) \
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vz##VMLOP, dcomplex, MKL_Complex16)
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP) \
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP)
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \
EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vms##VMLOP, float, float) \
EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmd##VMLOP, double, double)
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP) \
EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmc##VMLOP, scomplex, MKL_Complex8) \
EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmz##VMLOP, dcomplex, MKL_Complex16)
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(EIGENOP, VMLOP) \
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sin, Sin)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(asin, Asin)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(cos, Cos)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(acos, Acos)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(tan, Tan)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(atan, Atan)
//EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs, Abs)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(exp, Exp)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(log, Ln)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sqrt, Sqrt)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr)
// The vm*powx functions are not avaibale in the windows version of MKL.
#ifndef _WIN32
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmspowx_, float, float)
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdpowx_, double, double)
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcpowx_, scomplex, MKL_Complex8)
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzpowx_, dcomplex, MKL_Complex16)
#endif
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmsPowx, float, float, LA)
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdPowx, double, double, LA)
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcPowx, scomplex, MKL_Complex8, LA)
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzPowx, dcomplex, MKL_Complex16, LA)
} // end namespace internal

View File

@@ -55,7 +55,7 @@ struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprTyp
typedef typename traits<XprType>::Scalar Scalar;
typedef typename traits<XprType>::StorageKind StorageKind;
typedef typename traits<XprType>::XprKind XprKind;
typedef typename nested<XprType>::type XprTypeNested;
typedef typename ref_selector<XprType>::type XprTypeNested;
typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
enum{
MatrixRows = traits<XprType>::RowsAtCompileTime,
@@ -81,14 +81,16 @@ struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprTyp
OuterStrideAtCompileTime = HasSameStorageOrderAsXprType
? int(outer_stride_at_compile_time<XprType>::ret)
: int(inner_stride_at_compile_time<XprType>::ret),
// IsAligned is needed by MapBase's assertions
// We can sefely set it to false here. Internal alignment errors will be detected by an eigen_internal_assert in the respective evaluator
IsAligned = 0,
// FIXME, this traits is rather specialized for dense object and it needs to be cleaned further
FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0,
FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0,
Flags = (traits<XprType>::Flags & (DirectAccessBit | (InnerPanel?CompressedAccessBit:0))) | FlagsLvalueBit | FlagsRowMajorBit
Flags = (traits<XprType>::Flags & (DirectAccessBit | (InnerPanel?CompressedAccessBit:0))) | FlagsLvalueBit | FlagsRowMajorBit,
// FIXME DirectAccessBit should not be handled by expressions
//
// Alignment is needed by MapBase's assertions
// We can sefely set it to false here. Internal alignment errors will be detected by an eigen_internal_assert in the respective evaluator
Alignment = 0
};
};
@@ -124,26 +126,26 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class
/** Fixed-size constructor
*/
EIGEN_DEVICE_FUNC
inline Block(XprType& xpr, Index a_startRow, Index a_startCol)
: Impl(xpr, a_startRow, a_startCol)
inline Block(XprType& xpr, Index startRow, Index startCol)
: Impl(xpr, startRow, startCol)
{
EIGEN_STATIC_ASSERT(RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic,THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE)
eigen_assert(a_startRow >= 0 && BlockRows >= 1 && a_startRow + BlockRows <= xpr.rows()
&& a_startCol >= 0 && BlockCols >= 1 && a_startCol + BlockCols <= xpr.cols());
eigen_assert(startRow >= 0 && BlockRows >= 1 && startRow + BlockRows <= xpr.rows()
&& startCol >= 0 && BlockCols >= 1 && startCol + BlockCols <= xpr.cols());
}
/** Dynamic-size constructor
*/
EIGEN_DEVICE_FUNC
inline Block(XprType& xpr,
Index a_startRow, Index a_startCol,
Index startRow, Index startCol,
Index blockRows, Index blockCols)
: Impl(xpr, a_startRow, a_startCol, blockRows, blockCols)
: Impl(xpr, startRow, startCol, blockRows, blockCols)
{
eigen_assert((RowsAtCompileTime==Dynamic || RowsAtCompileTime==blockRows)
&& (ColsAtCompileTime==Dynamic || ColsAtCompileTime==blockCols));
eigen_assert(a_startRow >= 0 && blockRows >= 0 && a_startRow <= xpr.rows() - blockRows
&& a_startCol >= 0 && blockCols >= 0 && a_startCol <= xpr.cols() - blockCols);
eigen_assert(startRow >= 0 && blockRows >= 0 && startRow <= xpr.rows() - blockRows
&& startCol >= 0 && blockCols >= 0 && startCol <= xpr.cols() - blockCols);
}
};
@@ -159,10 +161,10 @@ class BlockImpl<XprType, BlockRows, BlockCols, InnerPanel, Dense>
typedef Impl Base;
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl)
EIGEN_DEVICE_FUNC inline BlockImpl(XprType& xpr, Index i) : Impl(xpr,i) {}
EIGEN_DEVICE_FUNC inline BlockImpl(XprType& xpr, Index a_startRow, Index a_startCol) : Impl(xpr, a_startRow, a_startCol) {}
EIGEN_DEVICE_FUNC inline BlockImpl(XprType& xpr, Index startRow, Index startCol) : Impl(xpr, startRow, startCol) {}
EIGEN_DEVICE_FUNC
inline BlockImpl(XprType& xpr, Index a_startRow, Index a_startCol, Index blockRows, Index blockCols)
: Impl(xpr, a_startRow, a_startCol, blockRows, blockCols) {}
inline BlockImpl(XprType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols)
: Impl(xpr, startRow, startCol, blockRows, blockCols) {}
};
namespace internal {
@@ -198,8 +200,8 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
/** Fixed-size constructor
*/
EIGEN_DEVICE_FUNC
inline BlockImpl_dense(XprType& xpr, Index a_startRow, Index a_startCol)
: m_xpr(xpr), m_startRow(a_startRow), m_startCol(a_startCol),
inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol)
: m_xpr(xpr), m_startRow(startRow), m_startCol(startCol),
m_blockRows(BlockRows), m_blockCols(BlockCols)
{}
@@ -207,9 +209,9 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
*/
EIGEN_DEVICE_FUNC
inline BlockImpl_dense(XprType& xpr,
Index a_startRow, Index a_startCol,
Index startRow, Index startCol,
Index blockRows, Index blockCols)
: m_xpr(xpr), m_startRow(a_startRow), m_startCol(a_startCol),
: m_xpr(xpr), m_startRow(startRow), m_startCol(startCol),
m_blockRows(blockRows), m_blockCols(blockCols)
{}

View File

@@ -80,7 +80,7 @@ struct any_unroller<Derived, Dynamic>
template<typename Derived>
inline bool DenseBase<Derived>::all() const
{
typedef typename internal::evaluator<Derived>::type Evaluator;
typedef internal::evaluator<Derived> Evaluator;
enum {
unroll = SizeAtCompileTime != Dynamic
&& Evaluator::CoeffReadCost != Dynamic
@@ -106,7 +106,7 @@ inline bool DenseBase<Derived>::all() const
template<typename Derived>
inline bool DenseBase<Derived>::any() const
{
typedef typename internal::evaluator<Derived>::type Evaluator;
typedef internal::evaluator<Derived> Evaluator;
enum {
unroll = SizeAtCompileTime != Dynamic
&& Evaluator::CoeffReadCost != Dynamic

View File

@@ -105,6 +105,9 @@ struct CommaInitializer
EIGEN_DEVICE_FUNC
inline ~CommaInitializer()
#if defined VERIFY_RAISES_ASSERT && (!defined EIGEN_NO_ASSERTION_CHECKING) && defined EIGEN_EXCEPTIONS
throw(Eigen::eigen_assert_exception)
#endif
{
eigen_assert((m_row+m_currentBlockRows) == m_xpr.rows()
&& m_col == m_xpr.cols()

View File

@@ -28,13 +28,9 @@ struct storage_kind_to_evaluator_kind {
// It can be Dense, Sparse, Triangular, Diagonal, SelfAdjoint, Band, etc.
template<typename StorageKind> struct storage_kind_to_shape;
template<> struct storage_kind_to_shape<Dense> { typedef DenseShape Shape; };
// FIXME Is this necessary? And why was it not before refactoring???
template<> struct storage_kind_to_shape<PermutationStorage> { typedef PermutationShape Shape; };
template<> struct storage_kind_to_shape<Dense> { typedef DenseShape Shape; };
template<> struct storage_kind_to_shape<PermutationStorage> { typedef PermutationShape Shape; };
template<> struct storage_kind_to_shape<TranspositionsStorage> { typedef TranspositionsShape Shape; };
// Evaluators have to be specialized with respect to various criteria such as:
// - storage/structure/shape
@@ -63,11 +59,6 @@ template< typename T,
template<typename T>
struct evaluator_traits_base
{
// TODO check whether these two indirections are really needed.
// Basically, if nobody overwrite type and nestedType, then, they can be dropped
// typedef evaluator<T> type;
// typedef evaluator<T> nestedType;
// by default, get evaluator kind and shape from storage
typedef typename storage_kind_to_evaluator_kind<typename traits<T>::StorageKind>::Kind Kind;
typedef typename storage_kind_to_shape<typename traits<T>::StorageKind>::Shape Shape;
@@ -94,27 +85,28 @@ struct evaluator : public unary_evaluator<T>
// TODO: Think about const-correctness
template<typename T>
struct evaluator<const T>
: evaluator<T>
{ };
// ---------- base class for all writable evaluators ----------
// TODO this class does not seem to be necessary anymore
template<typename ExpressionType>
struct evaluator_base
{
// typedef typename evaluator_traits<ExpressionType>::type type;
// typedef typename evaluator_traits<ExpressionType>::nestedType nestedType;
typedef evaluator<ExpressionType> type;
typedef evaluator<ExpressionType> nestedType;
EIGEN_DEVICE_FUNC
explicit evaluator(const T& xpr) : evaluator<T>(xpr) {}
};
// ---------- base class for all evaluators ----------
template<typename ExpressionType>
struct evaluator_base : public noncopyable
{
// FIXME is it really usefull?
typedef typename traits<ExpressionType>::StorageIndex StorageIndex;
// TODO that's not very nice to have to propagate all these traits. They are currently only needed to handle outer,inner indices.
typedef traits<ExpressionType> ExpressionTraits;
enum {
Alignment = 0
};
};
// -------------------- Matrix and Array --------------------
@@ -131,8 +123,6 @@ struct evaluator<PlainObjectBase<Derived> >
typedef PlainObjectBase<Derived> PlainObjectType;
typedef typename PlainObjectType::Scalar Scalar;
typedef typename PlainObjectType::CoeffReturnType CoeffReturnType;
typedef typename PlainObjectType::PacketScalar PacketScalar;
typedef typename PlainObjectType::PacketReturnType PacketReturnType;
enum {
IsRowMajor = PlainObjectType::IsRowMajor,
@@ -141,8 +131,8 @@ struct evaluator<PlainObjectBase<Derived> >
ColsAtCompileTime = PlainObjectType::ColsAtCompileTime,
CoeffReadCost = NumTraits<Scalar>::ReadCost,
Flags = compute_matrix_evaluator_flags< Scalar,Derived::RowsAtCompileTime,Derived::ColsAtCompileTime,
Derived::Options,Derived::MaxRowsAtCompileTime,Derived::MaxColsAtCompileTime>::ret
Flags = traits<Derived>::EvaluatorFlags,
Alignment = traits<Derived>::Alignment
};
EIGEN_DEVICE_FUNC evaluator()
@@ -182,36 +172,36 @@ struct evaluator<PlainObjectBase<Derived> >
return const_cast<Scalar*>(m_data)[index];
}
template<int LoadMode>
PacketReturnType packet(Index row, Index col) const
template<int LoadMode, typename PacketType>
PacketType packet(Index row, Index col) const
{
if (IsRowMajor)
return ploadt<PacketScalar, LoadMode>(m_data + row * m_outerStride.value() + col);
return ploadt<PacketType, LoadMode>(m_data + row * m_outerStride.value() + col);
else
return ploadt<PacketScalar, LoadMode>(m_data + row + col * m_outerStride.value());
return ploadt<PacketType, LoadMode>(m_data + row + col * m_outerStride.value());
}
template<int LoadMode>
PacketReturnType packet(Index index) const
template<int LoadMode, typename PacketType>
PacketType packet(Index index) const
{
return ploadt<PacketScalar, LoadMode>(m_data + index);
return ploadt<PacketType, LoadMode>(m_data + index);
}
template<int StoreMode>
void writePacket(Index row, Index col, const PacketScalar& x)
template<int StoreMode,typename PacketType>
void writePacket(Index row, Index col, const PacketType& x)
{
if (IsRowMajor)
return pstoret<Scalar, PacketScalar, StoreMode>
return pstoret<Scalar, PacketType, StoreMode>
(const_cast<Scalar*>(m_data) + row * m_outerStride.value() + col, x);
else
return pstoret<Scalar, PacketScalar, StoreMode>
return pstoret<Scalar, PacketType, StoreMode>
(const_cast<Scalar*>(m_data) + row + col * m_outerStride.value(), x);
}
template<int StoreMode>
void writePacket(Index index, const PacketScalar& x)
template<int StoreMode, typename PacketType>
void writePacket(Index index, const PacketType& x)
{
return pstoret<Scalar, PacketScalar, StoreMode>(const_cast<Scalar*>(m_data) + index, x);
return pstoret<Scalar, PacketType, StoreMode>(const_cast<Scalar*>(m_data) + index, x);
}
protected:
@@ -229,7 +219,7 @@ struct evaluator<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
{
typedef Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> XprType;
evaluator() {}
EIGEN_DEVICE_FUNC evaluator() {}
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& m)
: evaluator<PlainObjectBase<XprType> >(m)
@@ -242,7 +232,7 @@ struct evaluator<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
{
typedef Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> XprType;
evaluator() {}
EIGEN_DEVICE_FUNC evaluator() {}
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& m)
: evaluator<PlainObjectBase<XprType> >(m)
@@ -259,15 +249,14 @@ struct unary_evaluator<Transpose<ArgType>, IndexBased>
enum {
CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
Flags = evaluator<ArgType>::Flags ^ RowMajorBit
Flags = evaluator<ArgType>::Flags ^ RowMajorBit,
Alignment = evaluator<ArgType>::Alignment
};
EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& t) : m_argImpl(t.nestedExpression()) {}
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename XprType::PacketScalar PacketScalar;
typedef typename XprType::PacketReturnType PacketReturnType;
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
{
@@ -289,32 +278,32 @@ struct unary_evaluator<Transpose<ArgType>, IndexBased>
return m_argImpl.coeffRef(index);
}
template<int LoadMode>
PacketReturnType packet(Index row, Index col) const
template<int LoadMode, typename PacketType>
PacketType packet(Index row, Index col) const
{
return m_argImpl.template packet<LoadMode>(col, row);
return m_argImpl.template packet<LoadMode,PacketType>(col, row);
}
template<int LoadMode>
PacketReturnType packet(Index index) const
template<int LoadMode, typename PacketType>
PacketType packet(Index index) const
{
return m_argImpl.template packet<LoadMode>(index);
return m_argImpl.template packet<LoadMode,PacketType>(index);
}
template<int StoreMode>
void writePacket(Index row, Index col, const PacketScalar& x)
template<int StoreMode, typename PacketType>
void writePacket(Index row, Index col, const PacketType& x)
{
m_argImpl.template writePacket<StoreMode>(col, row, x);
m_argImpl.template writePacket<StoreMode,PacketType>(col, row, x);
}
template<int StoreMode>
void writePacket(Index index, const PacketScalar& x)
template<int StoreMode, typename PacketType>
void writePacket(Index index, const PacketType& x)
{
m_argImpl.template writePacket<StoreMode>(index, x);
m_argImpl.template writePacket<StoreMode,PacketType>(index, x);
}
protected:
typename evaluator<ArgType>::nestedType m_argImpl;
evaluator<ArgType> m_argImpl;
};
// -------------------- CwiseNullaryOp --------------------
@@ -335,7 +324,8 @@ struct evaluator<CwiseNullaryOp<NullaryOp,PlainObjectType> >
& ( HereditaryBits
| (functor_has_linear_access<NullaryOp>::ret ? LinearAccessBit : 0)
| (functor_traits<NullaryOp>::PacketAccess ? PacketAccessBit : 0)))
| (functor_traits<NullaryOp>::IsRepeatable ? 0 : EvalBeforeNestingBit) // FIXME EvalBeforeNestingBit should be needed anymore
| (functor_traits<NullaryOp>::IsRepeatable ? 0 : EvalBeforeNestingBit), // FIXME EvalBeforeNestingBit should be needed anymore
Alignment = 0 // FIXME alignment should not matter here, perhaps we could set it to AlignMax??
};
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& n)
@@ -343,7 +333,6 @@ struct evaluator<CwiseNullaryOp<NullaryOp,PlainObjectType> >
{ }
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename XprType::PacketScalar PacketScalar;
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
{
@@ -355,16 +344,16 @@ struct evaluator<CwiseNullaryOp<NullaryOp,PlainObjectType> >
return m_functor(index);
}
template<int LoadMode>
PacketScalar packet(Index row, Index col) const
template<int LoadMode, typename PacketType>
PacketType packet(Index row, Index col) const
{
return m_functor.packetOp(row, col);
return m_functor.template packetOp<Index,PacketType>(row, col);
}
template<int LoadMode>
PacketScalar packet(Index index) const
template<int LoadMode, typename PacketType>
PacketType packet(Index index) const
{
return m_functor.packetOp(index);
return m_functor.template packetOp<Index,PacketType>(index);
}
protected:
@@ -382,9 +371,9 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased >
enum {
CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost,
Flags = evaluator<ArgType>::Flags & (
HereditaryBits | LinearAccessBit | AlignedBit
| (functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0))
Flags = evaluator<ArgType>::Flags
& (HereditaryBits | LinearAccessBit | (functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0)),
Alignment = evaluator<ArgType>::Alignment
};
EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op)
@@ -393,7 +382,6 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased >
{ }
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename XprType::PacketScalar PacketScalar;
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
{
@@ -405,21 +393,21 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased >
return m_functor(m_argImpl.coeff(index));
}
template<int LoadMode>
PacketScalar packet(Index row, Index col) const
template<int LoadMode, typename PacketType>
PacketType packet(Index row, Index col) const
{
return m_functor.packetOp(m_argImpl.template packet<LoadMode>(row, col));
return m_functor.packetOp(m_argImpl.template packet<LoadMode, PacketType>(row, col));
}
template<int LoadMode>
PacketScalar packet(Index index) const
template<int LoadMode, typename PacketType>
PacketType packet(Index index) const
{
return m_functor.packetOp(m_argImpl.template packet<LoadMode>(index));
return m_functor.packetOp(m_argImpl.template packet<LoadMode, PacketType>(index));
}
protected:
const UnaryOp m_functor;
typename evaluator<ArgType>::nestedType m_argImpl;
evaluator<ArgType> m_argImpl;
};
// -------------------- CwiseBinaryOp --------------------
@@ -451,13 +439,13 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase
Flags0 = (int(LhsFlags) | int(RhsFlags)) & (
HereditaryBits
| (int(LhsFlags) & int(RhsFlags) &
( AlignedBit
| (StorageOrdersAgree ? LinearAccessBit : 0)
( (StorageOrdersAgree ? LinearAccessBit : 0)
| (functor_traits<BinaryOp>::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0)
)
)
),
Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit)
Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit),
Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<Lhs>::Alignment,evaluator<Rhs>::Alignment)
};
EIGEN_DEVICE_FUNC explicit binary_evaluator(const XprType& xpr)
@@ -467,7 +455,6 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase
{ }
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename XprType::PacketScalar PacketScalar;
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
{
@@ -479,24 +466,24 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase
return m_functor(m_lhsImpl.coeff(index), m_rhsImpl.coeff(index));
}
template<int LoadMode>
PacketScalar packet(Index row, Index col) const
template<int LoadMode, typename PacketType>
PacketType packet(Index row, Index col) const
{
return m_functor.packetOp(m_lhsImpl.template packet<LoadMode>(row, col),
m_rhsImpl.template packet<LoadMode>(row, col));
return m_functor.packetOp(m_lhsImpl.template packet<LoadMode,PacketType>(row, col),
m_rhsImpl.template packet<LoadMode,PacketType>(row, col));
}
template<int LoadMode>
PacketScalar packet(Index index) const
template<int LoadMode, typename PacketType>
PacketType packet(Index index) const
{
return m_functor.packetOp(m_lhsImpl.template packet<LoadMode>(index),
m_rhsImpl.template packet<LoadMode>(index));
return m_functor.packetOp(m_lhsImpl.template packet<LoadMode,PacketType>(index),
m_rhsImpl.template packet<LoadMode,PacketType>(index));
}
protected:
const BinaryOp m_functor;
typename evaluator<Lhs>::nestedType m_lhsImpl;
typename evaluator<Rhs>::nestedType m_rhsImpl;
evaluator<Lhs> m_lhsImpl;
evaluator<Rhs> m_rhsImpl;
};
// -------------------- CwiseUnaryView --------------------
@@ -510,7 +497,9 @@ struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType>, IndexBased>
enum {
CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost,
Flags = (evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit))
Flags = (evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit)),
Alignment = 0 // FIXME it is not very clear why alignment is necessarily lost...
};
EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op)
@@ -543,7 +532,7 @@ struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType>, IndexBased>
protected:
const UnaryOp m_unaryOp;
typename evaluator<ArgType>::nestedType m_argImpl;
evaluator<ArgType> m_argImpl;
};
// -------------------- Map --------------------
@@ -560,8 +549,6 @@ struct mapbase_evaluator : evaluator_base<Derived>
typedef typename XprType::PointerType PointerType;
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename XprType::PacketScalar PacketScalar;
typedef typename XprType::PacketReturnType PacketReturnType;
enum {
IsRowMajor = XprType::RowsAtCompileTime,
@@ -597,30 +584,30 @@ struct mapbase_evaluator : evaluator_base<Derived>
return m_data[index * m_xpr.innerStride()];
}
template<int LoadMode>
PacketReturnType packet(Index row, Index col) const
template<int LoadMode, typename PacketType>
PacketType packet(Index row, Index col) const
{
PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride();
return internal::ploadt<PacketScalar, LoadMode>(ptr);
return internal::ploadt<PacketType, LoadMode>(ptr);
}
template<int LoadMode>
PacketReturnType packet(Index index) const
template<int LoadMode, typename PacketType>
PacketType packet(Index index) const
{
return internal::ploadt<PacketScalar, LoadMode>(m_data + index * m_xpr.innerStride());
return internal::ploadt<PacketType, LoadMode>(m_data + index * m_xpr.innerStride());
}
template<int StoreMode>
void writePacket(Index row, Index col, const PacketScalar& x)
template<int StoreMode, typename PacketType>
void writePacket(Index row, Index col, const PacketType& x)
{
PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride();
return internal::pstoret<Scalar, PacketScalar, StoreMode>(ptr, x);
return internal::pstoret<Scalar, PacketType, StoreMode>(ptr, x);
}
template<int StoreMode>
void writePacket(Index index, const PacketScalar& x)
template<int StoreMode, typename PacketType>
void writePacket(Index index, const PacketType& x)
{
internal::pstoret<Scalar, PacketScalar, StoreMode>(m_data + index * m_xpr.innerStride(), x);
internal::pstoret<Scalar, PacketType, StoreMode>(m_data + index * m_xpr.innerStride(), x);
}
protected:
@@ -634,6 +621,8 @@ struct evaluator<Map<PlainObjectType, MapOptions, StrideType> >
{
typedef Map<PlainObjectType, MapOptions, StrideType> XprType;
typedef typename XprType::Scalar Scalar;
// TODO: should check for smaller packet types once we can handle multi-sized packet types
typedef typename packet_traits<Scalar>::type PacketScalar;
enum {
InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
@@ -645,18 +634,21 @@ struct evaluator<Map<PlainObjectType, MapOptions, StrideType> >
HasNoInnerStride = InnerStrideAtCompileTime == 1,
HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0,
HasNoStride = HasNoInnerStride && HasNoOuterStride,
IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned),
IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic,
PacketAlignment = unpacket_traits<PacketScalar>::alignment,
KeepsPacketAccess = bool(HasNoInnerStride)
&& ( bool(IsDynamicSize)
|| HasNoOuterStride
|| ( OuterStrideAtCompileTime!=Dynamic
&& ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime)%EIGEN_ALIGN_BYTES)==0 ) ),
&& ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime) % PacketAlignment)==0 ) ),
Flags0 = evaluator<PlainObjectType>::Flags,
Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit),
Flags2 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime))
? int(Flags1) : int(Flags1 & ~LinearAccessBit),
Flags = KeepsPacketAccess ? int(Flags2) : (int(Flags2) & ~PacketAccessBit)
Flags1 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime))
? int(Flags0) : int(Flags0 & ~LinearAccessBit),
Flags = KeepsPacketAccess ? int(Flags1) : (int(Flags1) & ~PacketAccessBit),
Alignment = int(MapOptions)&int(AlignedMask)
};
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& map)
@@ -673,7 +665,8 @@ struct evaluator<Ref<PlainObjectType, RefOptions, StrideType> >
typedef Ref<PlainObjectType, RefOptions, StrideType> XprType;
enum {
Flags = evaluator<Map<PlainObjectType, RefOptions, StrideType> >::Flags
Flags = evaluator<Map<PlainObjectType, RefOptions, StrideType> >::Flags,
Alignment = evaluator<Map<PlainObjectType, RefOptions, StrideType> >::Alignment
};
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& ref)
@@ -691,7 +684,9 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
: block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel>
{
typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
typedef typename XprType::Scalar Scalar;
typedef typename XprType::Scalar Scalar;
// TODO: should check for smaller packet types once we can handle multi-sized packet types
typedef typename packet_traits<Scalar>::type PacketScalar;
enum {
CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
@@ -717,14 +712,16 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
&& (InnerStrideAtCompileTime == 1)
? PacketAccessBit : 0,
MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % EIGEN_ALIGN_BYTES) == 0)) ? AlignedBit : 0,
FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator<ArgType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0,
FlagsRowMajorBit = XprType::Flags&RowMajorBit,
Flags0 = evaluator<ArgType>::Flags & ( (HereditaryBits & ~RowMajorBit) |
DirectAccessBit |
MaskPacketAccessBit |
MaskAlignedBit),
Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit
MaskPacketAccessBit),
Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit,
PacketAlignment = unpacket_traits<PacketScalar>::alignment,
Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0,
Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ArgType>::Alignment, Alignment0)
};
typedef block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> block_evaluator_type;
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& block) : block_evaluator_type(block) {}
@@ -756,8 +753,6 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename XprType::PacketScalar PacketScalar;
typedef typename XprType::PacketReturnType PacketReturnType;
enum {
RowsAtCompileTime = XprType::RowsAtCompileTime
@@ -783,35 +778,35 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
}
template<int LoadMode>
PacketReturnType packet(Index row, Index col) const
template<int LoadMode, typename PacketType>
PacketType packet(Index row, Index col) const
{
return m_argImpl.template packet<LoadMode>(m_startRow.value() + row, m_startCol.value() + col);
return m_argImpl.template packet<LoadMode,PacketType>(m_startRow.value() + row, m_startCol.value() + col);
}
template<int LoadMode>
PacketReturnType packet(Index index) const
template<int LoadMode, typename PacketType>
PacketType packet(Index index) const
{
return packet<LoadMode>(RowsAtCompileTime == 1 ? 0 : index,
RowsAtCompileTime == 1 ? index : 0);
return packet<LoadMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
RowsAtCompileTime == 1 ? index : 0);
}
template<int StoreMode>
void writePacket(Index row, Index col, const PacketScalar& x)
template<int StoreMode, typename PacketType>
void writePacket(Index row, Index col, const PacketType& x)
{
return m_argImpl.template writePacket<StoreMode>(m_startRow.value() + row, m_startCol.value() + col, x);
return m_argImpl.template writePacket<StoreMode,PacketType>(m_startRow.value() + row, m_startCol.value() + col, x);
}
template<int StoreMode>
void writePacket(Index index, const PacketScalar& x)
template<int StoreMode, typename PacketType>
void writePacket(Index index, const PacketType& x)
{
return writePacket<StoreMode>(RowsAtCompileTime == 1 ? 0 : index,
RowsAtCompileTime == 1 ? index : 0,
x);
return writePacket<StoreMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
RowsAtCompileTime == 1 ? index : 0,
x);
}
protected:
typename evaluator<ArgType>::nestedType m_argImpl;
evaluator<ArgType> m_argImpl;
const variable_if_dynamic<Index, ArgType::RowsAtCompileTime == 1 ? 0 : Dynamic> m_startRow;
const variable_if_dynamic<Index, ArgType::ColsAtCompileTime == 1 ? 0 : Dynamic> m_startCol;
};
@@ -825,12 +820,13 @@ struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /* HasDirectAc
typename Block<ArgType, BlockRows, BlockCols, InnerPanel>::PlainObject>
{
typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
typedef typename XprType::Scalar Scalar;
EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block)
: mapbase_evaluator<XprType, typename XprType::PlainObject>(block)
{
// FIXME this should be an internal assertion
eigen_assert(EIGEN_IMPLIES(evaluator<XprType>::Flags&AlignedBit, (size_t(block.data()) % EIGEN_ALIGN_BYTES) == 0) && "data is not aligned");
eigen_assert(((size_t(block.data()) % EIGEN_PLAIN_ENUM_MAX(1,evaluator<XprType>::Alignment)) == 0) && "data is not aligned");
}
};
@@ -849,7 +845,9 @@ struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
+ EIGEN_SIZE_MAX(evaluator<ThenMatrixType>::CoeffReadCost,
evaluator<ElseMatrixType>::CoeffReadCost),
Flags = (unsigned int)evaluator<ThenMatrixType>::Flags & evaluator<ElseMatrixType>::Flags & HereditaryBits
Flags = (unsigned int)evaluator<ThenMatrixType>::Flags & evaluator<ElseMatrixType>::Flags & HereditaryBits,
Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ThenMatrixType>::Alignment, evaluator<ElseMatrixType>::Alignment)
};
inline EIGEN_DEVICE_FUNC explicit evaluator(const XprType& select)
@@ -877,9 +875,9 @@ struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
}
protected:
typename evaluator<ConditionMatrixType>::nestedType m_conditionImpl;
typename evaluator<ThenMatrixType>::nestedType m_thenImpl;
typename evaluator<ElseMatrixType>::nestedType m_elseImpl;
evaluator<ConditionMatrixType> m_conditionImpl;
evaluator<ThenMatrixType> m_thenImpl;
evaluator<ElseMatrixType> m_elseImpl;
};
@@ -891,7 +889,6 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
{
typedef Replicate<ArgType, RowFactor, ColFactor> XprType;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename XprType::PacketReturnType PacketReturnType;
enum {
Factor = (RowFactor==Dynamic || ColFactor==Dynamic) ? Dynamic : RowFactor*ColFactor
};
@@ -901,7 +898,9 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
enum {
CoeffReadCost = evaluator<ArgTypeNestedCleaned>::CoeffReadCost,
Flags = (evaluator<ArgTypeNestedCleaned>::Flags & HereditaryBits & ~RowMajorBit) | (traits<XprType>::Flags & RowMajorBit)
Flags = (evaluator<ArgTypeNestedCleaned>::Flags & HereditaryBits & ~RowMajorBit) | (traits<XprType>::Flags & RowMajorBit),
Alignment = evaluator<ArgTypeNestedCleaned>::Alignment
};
EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& replicate)
@@ -923,9 +922,19 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
return m_argImpl.coeff(actual_row, actual_col);
}
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
{
// try to avoid using modulo; this is a pure optimization strategy
const Index actual_index = internal::traits<XprType>::RowsAtCompileTime==1
? (ColFactor==1 ? index : index%m_cols.value())
: (RowFactor==1 ? index : index%m_rows.value());
return m_argImpl.coeff(actual_index);
}
template<int LoadMode>
PacketReturnType packet(Index row, Index col) const
template<int LoadMode, typename PacketType>
PacketType packet(Index row, Index col) const
{
const Index actual_row = internal::traits<XprType>::RowsAtCompileTime==1 ? 0
: RowFactor==1 ? row
@@ -934,12 +943,22 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
: ColFactor==1 ? col
: col % m_cols.value();
return m_argImpl.template packet<LoadMode>(actual_row, actual_col);
return m_argImpl.template packet<LoadMode,PacketType>(actual_row, actual_col);
}
template<int LoadMode, typename PacketType>
PacketType packet(Index index) const
{
const Index actual_index = internal::traits<XprType>::RowsAtCompileTime==1
? (ColFactor==1 ? index : index%m_cols.value())
: (RowFactor==1 ? index : index%m_rows.value());
return m_argImpl.template packet<LoadMode,PacketType>(actual_index);
}
protected:
const ArgTypeNested m_arg; // FIXME is it OK to store both the argument and its evaluator?? (we have the same situation in evaluator_product)
typename evaluator<ArgTypeNestedCleaned>::nestedType m_argImpl;
evaluator<ArgTypeNestedCleaned> m_argImpl;
const variable_if_dynamic<Index, ArgType::RowsAtCompileTime> m_rows;
const variable_if_dynamic<Index, ArgType::ColsAtCompileTime> m_cols;
};
@@ -965,7 +984,9 @@ struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
CoeffReadCost = TraversalSize==Dynamic ? Dynamic
: TraversalSize * evaluator<ArgType>::CoeffReadCost + int(CostOpType::value),
Flags = (traits<XprType>::Flags&RowMajorBit) | (evaluator<ArgType>::Flags&HereditaryBits)
Flags = (traits<XprType>::Flags&RowMajorBit) | (evaluator<ArgType>::Flags&HereditaryBits),
Alignment = 0 // FIXME this could be improved
};
EIGEN_DEVICE_FUNC explicit evaluator(const XprType expr)
@@ -1001,15 +1022,14 @@ struct evaluator_wrapper_base
typedef typename remove_all<typename XprType::NestedExpressionType>::type ArgType;
enum {
CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
Flags = evaluator<ArgType>::Flags
Flags = evaluator<ArgType>::Flags,
Alignment = evaluator<ArgType>::Alignment
};
EIGEN_DEVICE_FUNC explicit evaluator_wrapper_base(const ArgType& arg) : m_argImpl(arg) {}
typedef typename ArgType::Scalar Scalar;
typedef typename ArgType::CoeffReturnType CoeffReturnType;
typedef typename ArgType::PacketScalar PacketScalar;
typedef typename ArgType::PacketReturnType PacketReturnType;
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
{
@@ -1031,32 +1051,32 @@ struct evaluator_wrapper_base
return m_argImpl.coeffRef(index);
}
template<int LoadMode>
PacketReturnType packet(Index row, Index col) const
template<int LoadMode, typename PacketType>
PacketType packet(Index row, Index col) const
{
return m_argImpl.template packet<LoadMode>(row, col);
return m_argImpl.template packet<LoadMode,PacketType>(row, col);
}
template<int LoadMode>
PacketReturnType packet(Index index) const
template<int LoadMode, typename PacketType>
PacketType packet(Index index) const
{
return m_argImpl.template packet<LoadMode>(index);
return m_argImpl.template packet<LoadMode,PacketType>(index);
}
template<int StoreMode>
void writePacket(Index row, Index col, const PacketScalar& x)
template<int StoreMode, typename PacketType>
void writePacket(Index row, Index col, const PacketType& x)
{
m_argImpl.template writePacket<StoreMode>(row, col, x);
}
template<int StoreMode>
void writePacket(Index index, const PacketScalar& x)
template<int StoreMode, typename PacketType>
void writePacket(Index index, const PacketType& x)
{
m_argImpl.template writePacket<StoreMode>(index, x);
}
protected:
typename evaluator<ArgType>::nestedType m_argImpl;
evaluator<ArgType> m_argImpl;
};
template<typename TArgType>
@@ -1085,7 +1105,7 @@ struct unary_evaluator<ArrayWrapper<TArgType> >
// -------------------- Reverse --------------------
// defined in Reverse.h:
template<typename PacketScalar, bool ReversePacket> struct reverse_packet_cond;
template<typename PacketType, bool ReversePacket> struct reverse_packet_cond;
template<typename ArgType, int Direction>
struct unary_evaluator<Reverse<ArgType, Direction> >
@@ -1094,17 +1114,12 @@ struct unary_evaluator<Reverse<ArgType, Direction> >
typedef Reverse<ArgType, Direction> XprType;
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename XprType::PacketScalar PacketScalar;
typedef typename XprType::PacketReturnType PacketReturnType;
enum {
PacketSize = internal::packet_traits<Scalar>::size,
IsRowMajor = XprType::IsRowMajor,
IsColMajor = !IsRowMajor,
ReverseRow = (Direction == Vertical) || (Direction == BothDirections),
ReverseCol = (Direction == Horizontal) || (Direction == BothDirections),
OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1,
OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1,
ReversePacket = (Direction == BothDirections)
|| ((Direction == Vertical) && IsColMajor)
|| ((Direction == Horizontal) && IsRowMajor),
@@ -1117,9 +1132,10 @@ struct unary_evaluator<Reverse<ArgType, Direction> >
LinearAccess = ( (Direction==BothDirections) && (int(Flags0)&PacketAccessBit) )
? LinearAccessBit : 0,
Flags = int(Flags0) & (HereditaryBits | PacketAccessBit | LinearAccess)
Flags = int(Flags0) & (HereditaryBits | PacketAccessBit | LinearAccess),
Alignment = 0 // FIXME in some rare cases, Alignment could be preserved, like a Vector4f.
};
typedef internal::reverse_packet_cond<PacketScalar,ReversePacket> reverse_packet;
EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& reverse)
: m_argImpl(reverse.nestedExpression()),
@@ -1149,38 +1165,53 @@ struct unary_evaluator<Reverse<ArgType, Direction> >
return m_argImpl.coeffRef(m_rows.value() * m_cols.value() - index - 1);
}
template<int LoadMode>
PacketScalar packet(Index row, Index col) const
template<int LoadMode, typename PacketType>
PacketType packet(Index row, Index col) const
{
return reverse_packet::run(m_argImpl.template packet<LoadMode>(
enum {
PacketSize = unpacket_traits<PacketType>::size,
OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1,
OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1
};
typedef internal::reverse_packet_cond<PacketType,ReversePacket> reverse_packet;
return reverse_packet::run(m_argImpl.template packet<LoadMode,PacketType>(
ReverseRow ? m_rows.value() - row - OffsetRow : row,
ReverseCol ? m_cols.value() - col - OffsetCol : col));
}
template<int LoadMode>
PacketScalar packet(Index index) const
template<int LoadMode, typename PacketType>
PacketType packet(Index index) const
{
return preverse(m_argImpl.template packet<LoadMode>(m_rows.value() * m_cols.value() - index - PacketSize));
enum { PacketSize = unpacket_traits<PacketType>::size };
return preverse(m_argImpl.template packet<LoadMode,PacketType>(m_rows.value() * m_cols.value() - index - PacketSize));
}
template<int LoadMode>
void writePacket(Index row, Index col, const PacketScalar& x)
template<int LoadMode, typename PacketType>
void writePacket(Index row, Index col, const PacketType& x)
{
// FIXME we could factorize some code with packet(i,j)
enum {
PacketSize = unpacket_traits<PacketType>::size,
OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1,
OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1
};
typedef internal::reverse_packet_cond<PacketType,ReversePacket> reverse_packet;
m_argImpl.template writePacket<LoadMode>(
ReverseRow ? m_rows.value() - row - OffsetRow : row,
ReverseCol ? m_cols.value() - col - OffsetCol : col,
reverse_packet::run(x));
}
template<int LoadMode>
void writePacket(Index index, const PacketScalar& x)
template<int LoadMode, typename PacketType>
void writePacket(Index index, const PacketType& x)
{
enum { PacketSize = unpacket_traits<PacketType>::size };
m_argImpl.template writePacket<LoadMode>
(m_rows.value() * m_cols.value() - index - PacketSize, preverse(x));
}
protected:
typename evaluator<ArgType>::nestedType m_argImpl;
evaluator<ArgType> m_argImpl;
// If we do not reverse rows, then we do not need to know the number of rows; same for columns
const variable_if_dynamic<Index, ReverseRow ? ArgType::RowsAtCompileTime : 0> m_rows;
@@ -1199,7 +1230,9 @@ struct evaluator<Diagonal<ArgType, DiagIndex> >
enum {
CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
Flags = (unsigned int)evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit) & ~RowMajorBit
Flags = (unsigned int)evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit) & ~RowMajorBit,
Alignment = 0
};
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& diagonal)
@@ -1233,7 +1266,7 @@ struct evaluator<Diagonal<ArgType, DiagIndex> >
}
protected:
typename evaluator<ArgType>::nestedType m_argImpl;
evaluator<ArgType> m_argImpl;
const internal::variable_if_dynamicindex<Index, XprType::DiagIndex> m_index;
private:
@@ -1291,15 +1324,12 @@ class EvalToTemp
template<typename ArgType>
struct evaluator<EvalToTemp<ArgType> >
: public evaluator<typename ArgType::PlainObject>::type
: public evaluator<typename ArgType::PlainObject>
{
typedef EvalToTemp<ArgType> XprType;
typedef typename ArgType::PlainObject PlainObject;
typedef typename evaluator<PlainObject>::type Base;
typedef evaluator<PlainObject> Base;
typedef evaluator type;
typedef evaluator nestedType;
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
: m_result(xpr.rows(), xpr.cols())
{

View File

@@ -34,7 +34,7 @@ class InnerIterator
{
protected:
typedef internal::inner_iterator_selector<XprType, typename internal::evaluator_traits<XprType>::Kind> IteratorType;
typedef typename internal::evaluator<XprType>::type EvaluatorType;
typedef internal::evaluator<XprType> EvaluatorType;
typedef typename internal::traits<XprType>::Scalar Scalar;
public:
/** Construct an iterator over the \a outerId -th row or column of \a xpr */
@@ -74,7 +74,7 @@ template<typename XprType>
class inner_iterator_selector<XprType, IndexBased>
{
protected:
typedef typename evaluator<XprType>::type EvaluatorType;
typedef evaluator<XprType> EvaluatorType;
typedef typename traits<XprType>::Scalar Scalar;
enum { IsRowMajor = (XprType::Flags&RowMajorBit)==RowMajorBit };
@@ -112,7 +112,7 @@ class inner_iterator_selector<XprType, IteratorBased>
{
protected:
typedef typename evaluator<XprType>::InnerIterator Base;
typedef typename evaluator<XprType>::type EvaluatorType;
typedef evaluator<XprType> EvaluatorType;
public:
EIGEN_STRONG_INLINE inner_iterator_selector(const EvaluatorType &eval, const Index &outerId, const Index &/*innerSize*/)

View File

@@ -95,8 +95,8 @@ class CwiseBinaryOp :
BinaryOp>::ret>::Base Base;
EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseBinaryOp)
typedef typename internal::nested<LhsType>::type LhsNested;
typedef typename internal::nested<RhsType>::type RhsNested;
typedef typename internal::ref_selector<LhsType>::type LhsNested;
typedef typename internal::ref_selector<RhsType>::type RhsNested;
typedef typename internal::remove_reference<LhsNested>::type _LhsNested;
typedef typename internal::remove_reference<RhsNested>::type _RhsNested;

View File

@@ -49,13 +49,13 @@ class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp
EIGEN_DENSE_PUBLIC_INTERFACE(CwiseNullaryOp)
EIGEN_DEVICE_FUNC
CwiseNullaryOp(Index nbRows, Index nbCols, const NullaryOp& func = NullaryOp())
: m_rows(nbRows), m_cols(nbCols), m_functor(func)
CwiseNullaryOp(Index rows, Index cols, const NullaryOp& func = NullaryOp())
: m_rows(rows), m_cols(cols), m_functor(func)
{
eigen_assert(nbRows >= 0
&& (RowsAtCompileTime == Dynamic || RowsAtCompileTime == nbRows)
&& nbCols >= 0
&& (ColsAtCompileTime == Dynamic || ColsAtCompileTime == nbCols));
eigen_assert(rows >= 0
&& (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
&& cols >= 0
&& (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols));
}
EIGEN_DEVICE_FUNC
@@ -113,10 +113,10 @@ class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp
*/
template<typename Derived>
template<typename CustomNullaryOp>
EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, Derived>
EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
DenseBase<Derived>::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func)
{
return CwiseNullaryOp<CustomNullaryOp, Derived>(rows, cols, func);
return CwiseNullaryOp<CustomNullaryOp, PlainObject>(rows, cols, func);
}
/** \returns an expression of a matrix defined by a custom functor \a func
@@ -139,12 +139,12 @@ DenseBase<Derived>::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& f
*/
template<typename Derived>
template<typename CustomNullaryOp>
EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, Derived>
EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
DenseBase<Derived>::NullaryExpr(Index size, const CustomNullaryOp& func)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
if(RowsAtCompileTime == 1) return CwiseNullaryOp<CustomNullaryOp, Derived>(1, size, func);
else return CwiseNullaryOp<CustomNullaryOp, Derived>(size, 1, func);
if(RowsAtCompileTime == 1) return CwiseNullaryOp<CustomNullaryOp, PlainObject>(1, size, func);
else return CwiseNullaryOp<CustomNullaryOp, PlainObject>(size, 1, func);
}
/** \returns an expression of a matrix defined by a custom functor \a func
@@ -158,19 +158,19 @@ DenseBase<Derived>::NullaryExpr(Index size, const CustomNullaryOp& func)
*/
template<typename Derived>
template<typename CustomNullaryOp>
EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, Derived>
EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
DenseBase<Derived>::NullaryExpr(const CustomNullaryOp& func)
{
return CwiseNullaryOp<CustomNullaryOp, Derived>(RowsAtCompileTime, ColsAtCompileTime, func);
return CwiseNullaryOp<CustomNullaryOp, PlainObject>(RowsAtCompileTime, ColsAtCompileTime, func);
}
/** \returns an expression of a constant matrix of value \a value
*
* The parameters \a nbRows and \a nbCols are the number of rows and of columns of
* The parameters \a rows and \a cols are the number of rows and of columns of
* the returned matrix. Must be compatible with this DenseBase type.
*
* This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
* it is redundant to pass \a nbRows and \a nbCols as arguments, so Zero() should be used
* it is redundant to pass \a rows and \a cols as arguments, so Zero() should be used
* instead.
*
* The template parameter \a CustomNullaryOp is the type of the functor.
@@ -179,9 +179,9 @@ DenseBase<Derived>::NullaryExpr(const CustomNullaryOp& func)
*/
template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
DenseBase<Derived>::Constant(Index nbRows, Index nbCols, const Scalar& value)
DenseBase<Derived>::Constant(Index rows, Index cols, const Scalar& value)
{
return DenseBase<Derived>::NullaryExpr(nbRows, nbCols, internal::scalar_constant_op<Scalar>(value));
return DenseBase<Derived>::NullaryExpr(rows, cols, internal::scalar_constant_op<Scalar>(value));
}
/** \returns an expression of a constant matrix of value \a value
@@ -245,7 +245,7 @@ EIGEN_STRONG_INLINE const typename DenseBase<Derived>::SequentialLinSpacedReturn
DenseBase<Derived>::LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,false>(low,high,size));
return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,PacketScalar,false>(low,high,size));
}
/**
@@ -258,7 +258,7 @@ DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& hig
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,false>(low,high,Derived::SizeAtCompileTime));
return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,PacketScalar,false>(low,high,Derived::SizeAtCompileTime));
}
/**
@@ -279,7 +279,7 @@ EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedRetu
DenseBase<Derived>::LinSpaced(Index size, const Scalar& low, const Scalar& high)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,true>(low,high,size));
return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,PacketScalar,true>(low,high,size));
}
/**
@@ -292,7 +292,7 @@ DenseBase<Derived>::LinSpaced(const Scalar& low, const Scalar& high)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,true>(low,high,Derived::SizeAtCompileTime));
return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,PacketScalar,true>(low,high,Derived::SizeAtCompileTime));
}
/** \returns true if all coefficients in this matrix are approximately equal to \a val, to within precision \a prec */
@@ -300,9 +300,10 @@ template<typename Derived>
bool DenseBase<Derived>::isApproxToConstant
(const Scalar& val, const RealScalar& prec) const
{
typename internal::nested_eval<Derived,1>::type self(derived());
for(Index j = 0; j < cols(); ++j)
for(Index i = 0; i < rows(); ++i)
if(!internal::isApprox(this->coeff(i, j), val, prec))
if(!internal::isApprox(self.coeff(i, j), val, prec))
return false;
return true;
}
@@ -356,8 +357,8 @@ PlainObjectBase<Derived>::setConstant(Index size, const Scalar& val)
/** Resizes to the given size, and sets all coefficients in this expression to the given \a value.
*
* \param nbRows the new number of rows
* \param nbCols the new number of columns
* \param rows the new number of rows
* \param cols the new number of columns
* \param val the value to which all coefficients are set
*
* Example: \include Matrix_setConstant_int_int.cpp
@@ -367,9 +368,9 @@ PlainObjectBase<Derived>::setConstant(Index size, const Scalar& val)
*/
template<typename Derived>
EIGEN_STRONG_INLINE Derived&
PlainObjectBase<Derived>::setConstant(Index nbRows, Index nbCols, const Scalar& val)
PlainObjectBase<Derived>::setConstant(Index rows, Index cols, const Scalar& val)
{
resize(nbRows, nbCols);
resize(rows, cols);
return setConstant(val);
}
@@ -390,7 +391,7 @@ template<typename Derived>
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index newSize, const Scalar& low, const Scalar& high)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return derived() = Derived::NullaryExpr(newSize, internal::linspaced_op<Scalar,false>(low,high,newSize));
return derived() = Derived::NullaryExpr(newSize, internal::linspaced_op<Scalar,PacketScalar,false>(low,high,newSize));
}
/**
@@ -428,9 +429,9 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(const Scalar& low,
*/
template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
DenseBase<Derived>::Zero(Index nbRows, Index nbCols)
DenseBase<Derived>::Zero(Index rows, Index cols)
{
return Constant(nbRows, nbCols, Scalar(0));
return Constant(rows, cols, Scalar(0));
}
/** \returns an expression of a zero vector.
@@ -484,9 +485,10 @@ DenseBase<Derived>::Zero()
template<typename Derived>
bool DenseBase<Derived>::isZero(const RealScalar& prec) const
{
typename internal::nested_eval<Derived,1>::type self(derived());
for(Index j = 0; j < cols(); ++j)
for(Index i = 0; i < rows(); ++i)
if(!internal::isMuchSmallerThan(this->coeff(i, j), static_cast<Scalar>(1), prec))
if(!internal::isMuchSmallerThan(self.coeff(i, j), static_cast<Scalar>(1), prec))
return false;
return true;
}
@@ -523,8 +525,8 @@ PlainObjectBase<Derived>::setZero(Index newSize)
/** Resizes to the given size, and sets all coefficients in this expression to zero.
*
* \param nbRows the new number of rows
* \param nbCols the new number of columns
* \param rows the new number of rows
* \param cols the new number of columns
*
* Example: \include Matrix_setZero_int_int.cpp
* Output: \verbinclude Matrix_setZero_int_int.out
@@ -533,9 +535,9 @@ PlainObjectBase<Derived>::setZero(Index newSize)
*/
template<typename Derived>
EIGEN_STRONG_INLINE Derived&
PlainObjectBase<Derived>::setZero(Index nbRows, Index nbCols)
PlainObjectBase<Derived>::setZero(Index rows, Index cols)
{
resize(nbRows, nbCols);
resize(rows, cols);
return setConstant(Scalar(0));
}
@@ -543,7 +545,7 @@ PlainObjectBase<Derived>::setZero(Index nbRows, Index nbCols)
/** \returns an expression of a matrix where all coefficients equal one.
*
* The parameters \a nbRows and \a nbCols are the number of rows and of columns of
* The parameters \a rows and \a cols are the number of rows and of columns of
* the returned matrix. Must be compatible with this MatrixBase type.
*
* This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
@@ -557,9 +559,9 @@ PlainObjectBase<Derived>::setZero(Index nbRows, Index nbCols)
*/
template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
DenseBase<Derived>::Ones(Index nbRows, Index nbCols)
DenseBase<Derived>::Ones(Index rows, Index cols)
{
return Constant(nbRows, nbCols, Scalar(1));
return Constant(rows, cols, Scalar(1));
}
/** \returns an expression of a vector where all coefficients equal one.
@@ -649,8 +651,8 @@ PlainObjectBase<Derived>::setOnes(Index newSize)
/** Resizes to the given size, and sets all coefficients in this expression to one.
*
* \param nbRows the new number of rows
* \param nbCols the new number of columns
* \param rows the new number of rows
* \param cols the new number of columns
*
* Example: \include Matrix_setOnes_int_int.cpp
* Output: \verbinclude Matrix_setOnes_int_int.out
@@ -659,9 +661,9 @@ PlainObjectBase<Derived>::setOnes(Index newSize)
*/
template<typename Derived>
EIGEN_STRONG_INLINE Derived&
PlainObjectBase<Derived>::setOnes(Index nbRows, Index nbCols)
PlainObjectBase<Derived>::setOnes(Index rows, Index cols)
{
resize(nbRows, nbCols);
resize(rows, cols);
return setConstant(Scalar(1));
}
@@ -669,7 +671,7 @@ PlainObjectBase<Derived>::setOnes(Index nbRows, Index nbCols)
/** \returns an expression of the identity matrix (not necessarily square).
*
* The parameters \a nbRows and \a nbCols are the number of rows and of columns of
* The parameters \a rows and \a cols are the number of rows and of columns of
* the returned matrix. Must be compatible with this MatrixBase type.
*
* This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
@@ -683,9 +685,9 @@ PlainObjectBase<Derived>::setOnes(Index nbRows, Index nbCols)
*/
template<typename Derived>
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType
MatrixBase<Derived>::Identity(Index nbRows, Index nbCols)
MatrixBase<Derived>::Identity(Index rows, Index cols)
{
return DenseBase<Derived>::NullaryExpr(nbRows, nbCols, internal::scalar_identity_op<Scalar>());
return DenseBase<Derived>::NullaryExpr(rows, cols, internal::scalar_identity_op<Scalar>());
}
/** \returns an expression of the identity matrix (not necessarily square).
@@ -719,18 +721,19 @@ template<typename Derived>
bool MatrixBase<Derived>::isIdentity
(const RealScalar& prec) const
{
typename internal::nested_eval<Derived,1>::type self(derived());
for(Index j = 0; j < cols(); ++j)
{
for(Index i = 0; i < rows(); ++i)
{
if(i == j)
{
if(!internal::isApprox(this->coeff(i, j), static_cast<Scalar>(1), prec))
if(!internal::isApprox(self.coeff(i, j), static_cast<Scalar>(1), prec))
return false;
}
else
{
if(!internal::isMuchSmallerThan(this->coeff(i, j), static_cast<RealScalar>(1), prec))
if(!internal::isMuchSmallerThan(self.coeff(i, j), static_cast<RealScalar>(1), prec))
return false;
}
}
@@ -780,8 +783,8 @@ EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity()
/** \brief Resizes to the given size, and writes the identity expression (not necessarily square) into *this.
*
* \param nbRows the new number of rows
* \param nbCols the new number of columns
* \param rows the new number of rows
* \param cols the new number of columns
*
* Example: \include Matrix_setIdentity_int_int.cpp
* Output: \verbinclude Matrix_setIdentity_int_int.out
@@ -789,9 +792,9 @@ EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity()
* \sa MatrixBase::setIdentity(), class CwiseNullaryOp, MatrixBase::Identity()
*/
template<typename Derived>
EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity(Index nbRows, Index nbCols)
EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity(Index rows, Index cols)
{
derived().resize(nbRows, nbCols);
derived().resize(rows, cols);
return setIdentity();
}

View File

@@ -84,8 +84,7 @@ class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename in
nestedExpression() { return m_matrix.const_cast_derived(); }
protected:
// FIXME changed from MatrixType::Nested because of a weird compilation error with sun CC
typename internal::nested<MatrixType>::type m_matrix;
typename internal::ref_selector<MatrixType>::type m_matrix;
ViewOp m_functor;
};

View File

@@ -49,6 +49,8 @@ template<typename Derived> class DenseBase
public:
using internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar,
typename NumTraits<typename internal::traits<Derived>::Scalar>::Real>::operator*;
using internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar,
typename NumTraits<typename internal::traits<Derived>::Scalar>::Real>::operator/;
/** Inner iterator type to iterate over the coefficients of a row or column.
@@ -66,8 +68,14 @@ template<typename Derived> class DenseBase
*/
typedef typename internal::traits<Derived>::StorageIndex StorageIndex;
/** The numeric type of the expression' coefficients, e.g. float, double, int or std::complex<float>, etc. */
typedef typename internal::traits<Derived>::Scalar Scalar;
typedef typename internal::packet_traits<Scalar>::type PacketScalar;
/** The numeric type of the expression' coefficients, e.g. float, double, int or std::complex<float>, etc.
*
* It is an alias for the Scalar type */
typedef Scalar value_type;
typedef typename NumTraits<Scalar>::Real RealScalar;
typedef internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar,
@@ -169,8 +177,39 @@ template<typename Derived> class DenseBase
InnerStrideAtCompileTime = internal::inner_stride_at_compile_time<Derived>::ret,
OuterStrideAtCompileTime = internal::outer_stride_at_compile_time<Derived>::ret
};
typedef typename internal::find_best_packet<Scalar,SizeAtCompileTime>::type PacketScalar;
enum { IsPlainObjectBase = 0 };
/** The plain matrix type corresponding to this expression.
* \sa PlainObject */
typedef Matrix<typename internal::traits<Derived>::Scalar,
internal::traits<Derived>::RowsAtCompileTime,
internal::traits<Derived>::ColsAtCompileTime,
AutoAlign | (internal::traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
internal::traits<Derived>::MaxRowsAtCompileTime,
internal::traits<Derived>::MaxColsAtCompileTime
> PlainMatrix;
/** The plain array type corresponding to this expression.
* \sa PlainObject */
typedef Array<typename internal::traits<Derived>::Scalar,
internal::traits<Derived>::RowsAtCompileTime,
internal::traits<Derived>::ColsAtCompileTime,
AutoAlign | (internal::traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
internal::traits<Derived>::MaxRowsAtCompileTime,
internal::traits<Derived>::MaxColsAtCompileTime
> PlainArray;
/** \brief The plain matrix or array type corresponding to this expression.
*
* This is not necessarily exactly the return type of eval(). In the case of plain matrices,
* the return type of eval() is a const reference to a matrix, not a matrix! It is however guaranteed
* that the return type of eval() is either PlainObject or const PlainObject&.
*/
typedef typename internal::conditional<internal::is_same<typename internal::traits<Derived>::XprKind,MatrixXpr >::value,
PlainMatrix, PlainArray>::type PlainObject;
/** \returns the number of nonzero coefficients which is in practice the number
* of stored coefficients. */
@@ -221,22 +260,21 @@ template<typename Derived> class DenseBase
* nothing else.
*/
EIGEN_DEVICE_FUNC
void resize(Index nbRows, Index nbCols)
void resize(Index rows, Index cols)
{
EIGEN_ONLY_USED_FOR_DEBUG(nbRows);
EIGEN_ONLY_USED_FOR_DEBUG(nbCols);
eigen_assert(nbRows == this->rows() && nbCols == this->cols()
EIGEN_ONLY_USED_FOR_DEBUG(rows);
EIGEN_ONLY_USED_FOR_DEBUG(cols);
eigen_assert(rows == this->rows() && cols == this->cols()
&& "DenseBase::resize() does not actually allow to resize.");
}
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** \internal Represents a matrix with all coefficients equal to one another*/
typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,Derived> ConstantReturnType;
typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,PlainObject> ConstantReturnType;
/** \internal Represents a vector with linearly spaced coefficients that allows sequential access only. */
typedef CwiseNullaryOp<internal::linspaced_op<Scalar,false>,Derived> SequentialLinSpacedReturnType;
typedef CwiseNullaryOp<internal::linspaced_op<Scalar,PacketScalar,false>,PlainObject> SequentialLinSpacedReturnType;
/** \internal Represents a vector with linearly spaced coefficients that allows random access. */
typedef CwiseNullaryOp<internal::linspaced_op<Scalar,true>,Derived> RandomAccessLinSpacedReturnType;
typedef CwiseNullaryOp<internal::linspaced_op<Scalar,PacketScalar,true>,PlainObject> RandomAccessLinSpacedReturnType;
/** \internal the return type of MatrixBase::eigenvalues() */
typedef Matrix<typename NumTraits<typename internal::traits<Derived>::Scalar>::Real, internal::traits<Derived>::ColsAtCompileTime, 1> EigenvaluesReturnType;
@@ -269,18 +307,17 @@ template<typename Derived> class DenseBase
EIGEN_DEVICE_FUNC
Derived& operator=(const ReturnByValue<OtherDerived>& func);
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** Copies \a other into *this without evaluating other. \returns a reference to *this.
/** \ínternal
* Copies \a other into *this without evaluating other. \returns a reference to *this.
* \deprecated */
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
Derived& lazyAssign(const DenseBase<OtherDerived>& other);
#endif // not EIGEN_PARSED_BY_DOXYGEN
EIGEN_DEVICE_FUNC
CommaInitializer<Derived> operator<< (const Scalar& s);
// TODO flagged is temporarly disabled. It seems useless now
/** \deprecated it now returns \c *this */
template<unsigned int Added,unsigned int Removed>
EIGEN_DEPRECATED
const Derived& flagged() const
@@ -316,13 +353,13 @@ template<typename Derived> class DenseBase
LinSpaced(const Scalar& low, const Scalar& high);
template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
static const CwiseNullaryOp<CustomNullaryOp, Derived>
static const CwiseNullaryOp<CustomNullaryOp, PlainObject>
NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func);
template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
static const CwiseNullaryOp<CustomNullaryOp, Derived>
static const CwiseNullaryOp<CustomNullaryOp, PlainObject>
NullaryExpr(Index size, const CustomNullaryOp& func);
template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
static const CwiseNullaryOp<CustomNullaryOp, Derived>
static const CwiseNullaryOp<CustomNullaryOp, PlainObject>
NullaryExpr(const CustomNullaryOp& func);
EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index rows, Index cols);
@@ -368,6 +405,8 @@ template<typename Derived> class DenseBase
*
* Notice that in the case of a plain matrix or vector (not an expression) this function just returns
* a const reference, in order to avoid a useless copy.
*
* \warning Be carefull with eval() and the auto C++ keyword, as detailed in this \link TopicPitfalls_auto_keyword page \endlink.
*/
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE EvalReturnType eval() const
@@ -429,8 +468,7 @@ template<typename Derived> class DenseBase
template<typename BinaryOp>
EIGEN_DEVICE_FUNC
typename internal::result_of<BinaryOp(typename internal::traits<Derived>::Scalar)>::type
redux(const BinaryOp& func) const;
Scalar redux(const BinaryOp& func) const;
template<typename Visitor>
EIGEN_DEVICE_FUNC
@@ -456,14 +494,35 @@ template<typename Derived> class DenseBase
typedef VectorwiseOp<Derived, Vertical> ColwiseReturnType;
typedef const VectorwiseOp<const Derived, Vertical> ConstColwiseReturnType;
ConstRowwiseReturnType rowwise() const;
RowwiseReturnType rowwise();
ConstColwiseReturnType colwise() const;
ColwiseReturnType colwise();
/** \returns a VectorwiseOp wrapper of *this providing additional partial reduction operations
*
* Example: \include MatrixBase_rowwise.cpp
* Output: \verbinclude MatrixBase_rowwise.out
*
* \sa colwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
*/
//Code moved here due to a CUDA compiler bug
EIGEN_DEVICE_FUNC inline ConstRowwiseReturnType rowwise() const {
return ConstRowwiseReturnType(derived());
}
EIGEN_DEVICE_FUNC RowwiseReturnType rowwise();
static const CwiseNullaryOp<internal::scalar_random_op<Scalar>,Derived> Random(Index rows, Index cols);
static const CwiseNullaryOp<internal::scalar_random_op<Scalar>,Derived> Random(Index size);
static const CwiseNullaryOp<internal::scalar_random_op<Scalar>,Derived> Random();
/** \returns a VectorwiseOp wrapper of *this providing additional partial reduction operations
*
* Example: \include MatrixBase_colwise.cpp
* Output: \verbinclude MatrixBase_colwise.out
*
* \sa rowwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
*/
EIGEN_DEVICE_FUNC inline ConstColwiseReturnType colwise() const {
return ConstColwiseReturnType(derived());
}
EIGEN_DEVICE_FUNC ColwiseReturnType colwise();
typedef CwiseNullaryOp<internal::scalar_random_op<Scalar>,PlainObject> RandomReturnType;
static const RandomReturnType Random(Index rows, Index cols);
static const RandomReturnType Random(Index size);
static const RandomReturnType Random();
template<typename ThenDerived,typename ElseDerived>
const Select<Derived,ThenDerived,ElseDerived>
@@ -481,14 +540,33 @@ template<typename Derived> class DenseBase
template<int p> RealScalar lpNorm() const;
template<int RowFactor, int ColFactor>
EIGEN_DEVICE_FUNC
const Replicate<Derived,RowFactor,ColFactor> replicate() const;
const Replicate<Derived,Dynamic,Dynamic> replicate(Index rowFacor,Index colFactor) const;
/**
* \return an expression of the replication of \c *this
*
* Example: \include MatrixBase_replicate_int_int.cpp
* Output: \verbinclude MatrixBase_replicate_int_int.out
*
* \sa VectorwiseOp::replicate(), DenseBase::replicate<int,int>(), class Replicate
*/
//Code moved here due to a CUDA compiler bug
EIGEN_DEVICE_FUNC
const Replicate<Derived, Dynamic, Dynamic> replicate(Index rowFactor, Index colFactor) const
{
return Replicate<Derived, Dynamic, Dynamic>(derived(), rowFactor, colFactor);
}
typedef Reverse<Derived, BothDirections> ReverseReturnType;
typedef const Reverse<const Derived, BothDirections> ConstReverseReturnType;
ReverseReturnType reverse();
ConstReverseReturnType reverse() const;
void reverseInPlace();
EIGEN_DEVICE_FUNC ReverseReturnType reverse();
/** This is the const version of reverse(). */
//Code moved here due to a CUDA compiler bug
EIGEN_DEVICE_FUNC ConstReverseReturnType reverse() const
{
return ConstReverseReturnType(derived());
}
EIGEN_DEVICE_FUNC void reverseInPlace();
#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::DenseBase
# include "../plugins/BlockMethods.h"

View File

@@ -97,7 +97,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
{
eigen_internal_assert(row >= 0 && row < rows()
&& col >= 0 && col < cols());
return typename internal::evaluator<Derived>::type(derived()).coeff(row,col);
return internal::evaluator<Derived>(derived()).coeff(row,col);
}
EIGEN_DEVICE_FUNC
@@ -139,7 +139,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
coeff(Index index) const
{
eigen_internal_assert(index >= 0 && index < size());
return typename internal::evaluator<Derived>::type(derived()).coeff(index);
return internal::evaluator<Derived>(derived()).coeff(index);
}
@@ -216,8 +216,9 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
template<int LoadMode>
EIGEN_STRONG_INLINE PacketReturnType packet(Index row, Index col) const
{
typedef typename internal::packet_traits<Scalar>::type DefaultPacketType;
eigen_internal_assert(row >= 0 && row < rows() && col >= 0 && col < cols());
return typename internal::evaluator<Derived>::type(derived()).template packet<LoadMode>(row,col);
return internal::evaluator<Derived>(derived()).template packet<LoadMode,DefaultPacketType>(row,col);
}
@@ -242,8 +243,9 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
template<int LoadMode>
EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
{
typedef typename internal::packet_traits<Scalar>::type DefaultPacketType;
eigen_internal_assert(index >= 0 && index < size());
return typename internal::evaluator<Derived>::type(derived()).template packet<LoadMode>(index);
return internal::evaluator<Derived>(derived()).template packet<LoadMode,DefaultPacketType>(index);
}
protected:
@@ -323,7 +325,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
{
eigen_internal_assert(row >= 0 && row < rows()
&& col >= 0 && col < cols());
return typename internal::evaluator<Derived>::type(derived()).coeffRef(row,col);
return internal::evaluator<Derived>(derived()).coeffRef(row,col);
}
EIGEN_DEVICE_FUNC
@@ -369,7 +371,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
coeffRef(Index index)
{
eigen_internal_assert(index >= 0 && index < size());
return typename internal::evaluator<Derived>::type(derived()).coeffRef(index);
return internal::evaluator<Derived>(derived()).coeffRef(index);
}
/** \returns a reference to the coefficient at given index.
@@ -580,33 +582,42 @@ class DenseCoeffsBase<Derived, DirectWriteAccessors>
namespace internal {
template<typename Derived, bool JustReturnZero>
template<int Alignment, typename Derived, bool JustReturnZero>
struct first_aligned_impl
{
static inline Index run(const Derived&)
{ return 0; }
};
template<typename Derived>
struct first_aligned_impl<Derived, false>
template<int Alignment, typename Derived>
struct first_aligned_impl<Alignment, Derived, false>
{
static inline Index run(const Derived& m)
{
return internal::first_aligned(&m.const_cast_derived().coeffRef(0,0), m.size());
return internal::first_aligned<Alignment>(&m.const_cast_derived().coeffRef(0,0), m.size());
}
};
/** \internal \returns the index of the first element of the array that is well aligned for vectorization.
/** \internal \returns the index of the first element of the array stored by \a m that is properly aligned with respect to \a Alignment for vectorization.
*
* \tparam Alignment requested alignment in Bytes.
*
* There is also the variant first_aligned(const Scalar*, Integer) defined in Memory.h. See it for more
* documentation.
*/
template<typename Derived>
static inline Index first_aligned(const Derived& m)
template<int Alignment, typename Derived>
static inline Index first_aligned(const DenseBase<Derived>& m)
{
return first_aligned_impl
<Derived, (Derived::Flags & AlignedBit) || !(Derived::Flags & DirectAccessBit)>
::run(m);
enum { ReturnZero = (int(evaluator<Derived>::Alignment) >= Alignment) || !(Derived::Flags & DirectAccessBit) };
return first_aligned_impl<Alignment, Derived, ReturnZero>::run(m.derived());
}
template<typename Derived>
static inline Index first_default_aligned(const DenseBase<Derived>& m)
{
typedef typename Derived::Scalar Scalar;
typedef typename packet_traits<Scalar>::type DefaultPacketType;
return first_aligned<unpacket_traits<DefaultPacketType>::alignment>(m);
}
template<typename Derived, bool HasDirectAccess = has_direct_access<Derived>::ret>

View File

@@ -40,8 +40,7 @@ void check_static_allocation_size()
*/
template <typename T, int Size, int MatrixOrArrayOptions,
int Alignment = (MatrixOrArrayOptions&DontAlign) ? 0
: (((Size*sizeof(T))%EIGEN_ALIGN_BYTES)==0) ? EIGEN_ALIGN_BYTES
: 0 >
: compute_default_alignment<T,Size>::value >
struct plain_array
{
T array[Size];
@@ -81,14 +80,71 @@ struct plain_array
#endif
template <typename T, int Size, int MatrixOrArrayOptions>
struct plain_array<T, Size, MatrixOrArrayOptions, EIGEN_ALIGN_BYTES>
struct plain_array<T, Size, MatrixOrArrayOptions, 8>
{
EIGEN_USER_ALIGN_DEFAULT T array[Size];
EIGEN_ALIGN_TO_BOUNDARY(8) T array[Size];
EIGEN_DEVICE_FUNC
plain_array()
{
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(7);
check_static_allocation_size<T,Size>();
}
EIGEN_DEVICE_FUNC
plain_array(constructor_without_unaligned_array_assert)
{
check_static_allocation_size<T,Size>();
}
};
template <typename T, int Size, int MatrixOrArrayOptions>
struct plain_array<T, Size, MatrixOrArrayOptions, 16>
{
EIGEN_ALIGN_TO_BOUNDARY(16) T array[Size];
EIGEN_DEVICE_FUNC
plain_array()
{
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(EIGEN_ALIGN_BYTES-1);
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(15);
check_static_allocation_size<T,Size>();
}
EIGEN_DEVICE_FUNC
plain_array(constructor_without_unaligned_array_assert)
{
check_static_allocation_size<T,Size>();
}
};
template <typename T, int Size, int MatrixOrArrayOptions>
struct plain_array<T, Size, MatrixOrArrayOptions, 32>
{
EIGEN_ALIGN_TO_BOUNDARY(32) T array[Size];
EIGEN_DEVICE_FUNC
plain_array()
{
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(31);
check_static_allocation_size<T,Size>();
}
EIGEN_DEVICE_FUNC
plain_array(constructor_without_unaligned_array_assert)
{
check_static_allocation_size<T,Size>();
}
};
template <typename T, int Size, int MatrixOrArrayOptions>
struct plain_array<T, Size, MatrixOrArrayOptions, 64>
{
EIGEN_ALIGN_TO_BOUNDARY(64) T array[Size];
EIGEN_DEVICE_FUNC
plain_array()
{
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(63);
check_static_allocation_size<T,Size>();
}
@@ -102,7 +158,7 @@ struct plain_array<T, Size, MatrixOrArrayOptions, EIGEN_ALIGN_BYTES>
template <typename T, int MatrixOrArrayOptions, int Alignment>
struct plain_array<T, 0, MatrixOrArrayOptions, Alignment>
{
EIGEN_USER_ALIGN_DEFAULT T array[1];
T array[1];
EIGEN_DEVICE_FUNC plain_array() {}
EIGEN_DEVICE_FUNC plain_array(constructor_without_unaligned_array_assert) {}
};
@@ -140,7 +196,13 @@ template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseSt
if (this != &other) m_data = other.m_data;
return *this;
}
EIGEN_DEVICE_FUNC DenseStorage(Index,Index,Index) {}
EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) {
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
eigen_internal_assert(size==rows*cols && rows==_Rows && cols==_Cols);
EIGEN_UNUSED_VARIABLE(size);
EIGEN_UNUSED_VARIABLE(rows);
EIGEN_UNUSED_VARIABLE(cols);
}
EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); }
EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;}
EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;}
@@ -186,10 +248,10 @@ template<typename T, int Size, int _Options> class DenseStorage<T, Size, Dynamic
Index m_cols;
public:
EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0), m_cols(0) {}
explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
: m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0), m_cols(0) {}
DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows), m_cols(other.m_cols) {}
DenseStorage& operator=(const DenseStorage& other)
EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows), m_cols(other.m_cols) {}
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
{
if (this != &other)
{
@@ -199,13 +261,13 @@ template<typename T, int Size, int _Options> class DenseStorage<T, Size, Dynamic
}
return *this;
}
DenseStorage(Index, Index nbRows, Index nbCols) : m_rows(nbRows), m_cols(nbCols) {}
void swap(DenseStorage& other)
EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index cols) : m_rows(rows), m_cols(cols) {}
EIGEN_DEVICE_FUNC void swap(DenseStorage& other)
{ std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
EIGEN_DEVICE_FUNC Index rows() const {return m_rows;}
EIGEN_DEVICE_FUNC Index cols() const {return m_cols;}
void conservativeResize(Index, Index nbRows, Index nbCols) { m_rows = nbRows; m_cols = nbCols; }
void resize(Index, Index nbRows, Index nbCols) { m_rows = nbRows; m_cols = nbCols; }
EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index cols) { m_rows = rows; m_cols = cols; }
EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index cols) { m_rows = rows; m_cols = cols; }
EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
};
@@ -217,10 +279,10 @@ template<typename T, int Size, int _Cols, int _Options> class DenseStorage<T, Si
Index m_rows;
public:
EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0) {}
explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
: m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0) {}
DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows) {}
DenseStorage& operator=(const DenseStorage& other)
EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows) {}
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
{
if (this != &other)
{
@@ -229,12 +291,12 @@ template<typename T, int Size, int _Cols, int _Options> class DenseStorage<T, Si
}
return *this;
}
DenseStorage(Index, Index nbRows, Index) : m_rows(nbRows) {}
void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index) : m_rows(rows) {}
EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;}
EIGEN_DEVICE_FUNC Index cols(void) const {return _Cols;}
void conservativeResize(Index, Index nbRows, Index) { m_rows = nbRows; }
void resize(Index, Index nbRows, Index) { m_rows = nbRows; }
EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index) { m_rows = rows; }
EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index) { m_rows = rows; }
EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
};
@@ -246,10 +308,10 @@ template<typename T, int Size, int _Rows, int _Options> class DenseStorage<T, Si
Index m_cols;
public:
EIGEN_DEVICE_FUNC DenseStorage() : m_cols(0) {}
explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
: m_data(internal::constructor_without_unaligned_array_assert()), m_cols(0) {}
DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_cols(other.m_cols) {}
DenseStorage& operator=(const DenseStorage& other)
EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_cols(other.m_cols) {}
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
{
if (this != &other)
{
@@ -258,12 +320,12 @@ template<typename T, int Size, int _Rows, int _Options> class DenseStorage<T, Si
}
return *this;
}
DenseStorage(Index, Index, Index nbCols) : m_cols(nbCols) {}
void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
EIGEN_DEVICE_FUNC DenseStorage(Index, Index, Index cols) : m_cols(cols) {}
EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
EIGEN_DEVICE_FUNC Index rows(void) const {return _Rows;}
EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;}
void conservativeResize(Index, Index, Index nbCols) { m_cols = nbCols; }
void resize(Index, Index, Index nbCols) { m_cols = nbCols; }
void conservativeResize(Index, Index, Index cols) { m_cols = cols; }
void resize(Index, Index, Index cols) { m_cols = cols; }
EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
};
@@ -276,19 +338,22 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
Index m_cols;
public:
EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0), m_cols(0) {}
explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
: m_data(0), m_rows(0), m_cols(0) {}
DenseStorage(Index size, Index nbRows, Index nbCols)
: m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(nbRows), m_cols(nbCols)
{ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN }
DenseStorage(const DenseStorage& other)
EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols)
: m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(rows), m_cols(cols)
{
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
eigen_internal_assert(size==rows*cols && rows>=0 && cols >=0);
}
EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
: m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(other.m_rows*other.m_cols))
, m_rows(other.m_rows)
, m_cols(other.m_cols)
{
internal::smart_copy(other.m_data, other.m_data+other.m_rows*other.m_cols, m_data);
}
DenseStorage& operator=(const DenseStorage& other)
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
{
if (this != &other)
{
@@ -298,6 +363,7 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
return *this;
}
#ifdef EIGEN_HAVE_RVALUE_REFERENCES
EIGEN_DEVICE_FUNC
DenseStorage(DenseStorage&& other)
: m_data(std::move(other.m_data))
, m_rows(std::move(other.m_rows))
@@ -307,6 +373,7 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
other.m_rows = 0;
other.m_cols = 0;
}
EIGEN_DEVICE_FUNC
DenseStorage& operator=(DenseStorage&& other)
{
using std::swap;
@@ -316,18 +383,18 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
return *this;
}
#endif
~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, m_rows*m_cols); }
void swap(DenseStorage& other)
EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, m_rows*m_cols); }
EIGEN_DEVICE_FUNC void swap(DenseStorage& other)
{ std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;}
EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;}
void conservativeResize(Index size, Index nbRows, Index nbCols)
void conservativeResize(Index size, Index rows, Index cols)
{
m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*m_cols);
m_rows = nbRows;
m_cols = nbCols;
m_rows = rows;
m_cols = cols;
}
void resize(Index size, Index nbRows, Index nbCols)
EIGEN_DEVICE_FUNC void resize(Index size, Index rows, Index cols)
{
if(size != m_rows*m_cols)
{
@@ -338,8 +405,8 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
m_data = 0;
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
}
m_rows = nbRows;
m_cols = nbCols;
m_rows = rows;
m_cols = cols;
}
EIGEN_DEVICE_FUNC const T *data() const { return m_data; }
EIGEN_DEVICE_FUNC T *data() { return m_data; }
@@ -353,15 +420,19 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
public:
EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_cols(0) {}
explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {}
DenseStorage(Index size, Index, Index nbCols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_cols(nbCols)
{ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN }
DenseStorage(const DenseStorage& other)
EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_cols(cols)
{
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
eigen_internal_assert(size==rows*cols && rows==_Rows && cols >=0);
EIGEN_UNUSED_VARIABLE(rows);
}
EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
: m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(_Rows*other.m_cols))
, m_cols(other.m_cols)
{
internal::smart_copy(other.m_data, other.m_data+_Rows*m_cols, m_data);
}
DenseStorage& operator=(const DenseStorage& other)
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
{
if (this != &other)
{
@@ -371,6 +442,7 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
return *this;
}
#ifdef EIGEN_HAVE_RVALUE_REFERENCES
EIGEN_DEVICE_FUNC
DenseStorage(DenseStorage&& other)
: m_data(std::move(other.m_data))
, m_cols(std::move(other.m_cols))
@@ -378,6 +450,7 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
other.m_data = nullptr;
other.m_cols = 0;
}
EIGEN_DEVICE_FUNC
DenseStorage& operator=(DenseStorage&& other)
{
using std::swap;
@@ -386,16 +459,16 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
return *this;
}
#endif
~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols); }
void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols); }
EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;}
EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;}
void conservativeResize(Index size, Index, Index nbCols)
EIGEN_DEVICE_FUNC void conservativeResize(Index size, Index, Index cols)
{
m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, _Rows*m_cols);
m_cols = nbCols;
m_cols = cols;
}
EIGEN_STRONG_INLINE void resize(Index size, Index, Index nbCols)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(Index size, Index, Index cols)
{
if(size != _Rows*m_cols)
{
@@ -406,7 +479,7 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
m_data = 0;
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
}
m_cols = nbCols;
m_cols = cols;
}
EIGEN_DEVICE_FUNC const T *data() const { return m_data; }
EIGEN_DEVICE_FUNC T *data() { return m_data; }
@@ -420,15 +493,19 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
public:
EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0) {}
explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {}
DenseStorage(Index size, Index nbRows, Index) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(nbRows)
{ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN }
DenseStorage(const DenseStorage& other)
EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(rows)
{
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
eigen_internal_assert(size==rows*cols && rows>=0 && cols == _Cols);
EIGEN_UNUSED_VARIABLE(cols);
}
EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
: m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(other.m_rows*_Cols))
, m_rows(other.m_rows)
{
internal::smart_copy(other.m_data, other.m_data+other.m_rows*_Cols, m_data);
}
DenseStorage& operator=(const DenseStorage& other)
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
{
if (this != &other)
{
@@ -438,6 +515,7 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
return *this;
}
#ifdef EIGEN_HAVE_RVALUE_REFERENCES
EIGEN_DEVICE_FUNC
DenseStorage(DenseStorage&& other)
: m_data(std::move(other.m_data))
, m_rows(std::move(other.m_rows))
@@ -445,6 +523,7 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
other.m_data = nullptr;
other.m_rows = 0;
}
EIGEN_DEVICE_FUNC
DenseStorage& operator=(DenseStorage&& other)
{
using std::swap;
@@ -453,16 +532,16 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
return *this;
}
#endif
~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows); }
void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows); }
EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;}
EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;}
void conservativeResize(Index size, Index nbRows, Index)
void conservativeResize(Index size, Index rows, Index)
{
m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*_Cols);
m_rows = nbRows;
m_rows = rows;
}
EIGEN_STRONG_INLINE void resize(Index size, Index nbRows, Index)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(Index size, Index rows, Index)
{
if(size != m_rows*_Cols)
{
@@ -473,7 +552,7 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
m_data = 0;
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
}
m_rows = nbRows;
m_rows = rows;
}
EIGEN_DEVICE_FUNC const T *data() const { return m_data; }
EIGEN_DEVICE_FUNC T *data() { return m_data; }

View File

@@ -37,7 +37,7 @@ template<typename MatrixType, int DiagIndex>
struct traits<Diagonal<MatrixType,DiagIndex> >
: traits<MatrixType>
{
typedef typename nested<MatrixType>::type MatrixTypeNested;
typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;
typedef typename MatrixType::StorageKind StorageKind;
enum {
@@ -170,7 +170,7 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value()>0 ? 0 : -m_index.value(); }
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value()>0 ? m_index.value() : 0; }
// trigger a compile time error is someone try to call packet
// trigger a compile-time error if someone try to call packet
template<int LoadMode> typename MatrixType::PacketReturnType packet(Index) const;
template<int LoadMode> typename MatrixType::PacketReturnType packet(Index,Index) const;
};

View File

@@ -99,7 +99,7 @@ EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scala
template<typename Derived>
inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const
{
using std::sqrt;
EIGEN_USING_STD_MATH(sqrt)
return sqrt(squaredNorm());
}
@@ -141,7 +141,7 @@ struct lpNorm_selector
EIGEN_DEVICE_FUNC
static inline RealScalar run(const MatrixBase<Derived>& m)
{
using std::pow;
EIGEN_USING_STD_MATH(pow)
return pow(m.cwiseAbs().array().pow(p).sum(), RealScalar(1)/p);
}
};
@@ -224,13 +224,13 @@ bool MatrixBase<Derived>::isOrthogonal
template<typename Derived>
bool MatrixBase<Derived>::isUnitary(const RealScalar& prec) const
{
typename Derived::Nested nested(derived());
typename internal::nested_eval<Derived,1>::type self(derived());
for(Index i = 0; i < cols(); ++i)
{
if(!internal::isApprox(nested.col(i).squaredNorm(), static_cast<RealScalar>(1), prec))
if(!internal::isApprox(self.col(i).squaredNorm(), static_cast<RealScalar>(1), prec))
return false;
for(Index j = 0; j < i; ++j)
if(!internal::isMuchSmallerThan(nested.col(i).dot(nested.col(j)), static_cast<Scalar>(1), prec))
if(!internal::isMuchSmallerThan(self.col(i).dot(self.col(j)), static_cast<Scalar>(1), prec))
return false;
}
return true;

View File

@@ -1,140 +0,0 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_FLAGGED_H
#define EIGEN_FLAGGED_H
namespace Eigen {
/** \class Flagged
* \ingroup Core_Module
*
* \brief Expression with modified flags
*
* \param ExpressionType the type of the object of which we are modifying the flags
* \param Added the flags added to the expression
* \param Removed the flags removed from the expression (has priority over Added).
*
* This class represents an expression whose flags have been modified.
* It is the return type of MatrixBase::flagged()
* and most of the time this is the only way it is used.
*
* \sa MatrixBase::flagged()
*/
namespace internal {
template<typename ExpressionType, unsigned int Added, unsigned int Removed>
struct traits<Flagged<ExpressionType, Added, Removed> > : traits<ExpressionType>
{
enum { Flags = (ExpressionType::Flags | Added) & ~Removed };
};
}
template<typename ExpressionType, unsigned int Added, unsigned int Removed> class Flagged
: public MatrixBase<Flagged<ExpressionType, Added, Removed> >
{
public:
typedef MatrixBase<Flagged> Base;
EIGEN_DENSE_PUBLIC_INTERFACE(Flagged)
typedef typename internal::conditional<internal::must_nest_by_value<ExpressionType>::ret,
ExpressionType, const ExpressionType&>::type ExpressionTypeNested;
typedef typename ExpressionType::InnerIterator InnerIterator;
explicit inline Flagged(const ExpressionType& matrix) : m_matrix(matrix) {}
EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows(); }
EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols(); }
EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_matrix.outerStride(); }
EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_matrix.innerStride(); }
EIGEN_DEVICE_FUNC inline CoeffReturnType coeff(Index row, Index col) const
{
return m_matrix.coeff(row, col);
}
EIGEN_DEVICE_FUNC inline CoeffReturnType coeff(Index index) const
{
return m_matrix.coeff(index);
}
EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index row, Index col) const
{
return m_matrix.const_cast_derived().coeffRef(row, col);
}
EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index index) const
{
return m_matrix.const_cast_derived().coeffRef(index);
}
EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col)
{
return m_matrix.const_cast_derived().coeffRef(row, col);
}
EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index)
{
return m_matrix.const_cast_derived().coeffRef(index);
}
template<int LoadMode>
inline const PacketScalar packet(Index row, Index col) const
{
return m_matrix.template packet<LoadMode>(row, col);
}
template<int LoadMode>
inline void writePacket(Index row, Index col, const PacketScalar& x)
{
m_matrix.const_cast_derived().template writePacket<LoadMode>(row, col, x);
}
template<int LoadMode>
inline const PacketScalar packet(Index index) const
{
return m_matrix.template packet<LoadMode>(index);
}
template<int LoadMode>
inline void writePacket(Index index, const PacketScalar& x)
{
m_matrix.const_cast_derived().template writePacket<LoadMode>(index, x);
}
EIGEN_DEVICE_FUNC const ExpressionType& _expression() const { return m_matrix; }
template<typename OtherDerived>
EIGEN_DEVICE_FUNC typename ExpressionType::PlainObject solveTriangular(const MatrixBase<OtherDerived>& other) const;
template<typename OtherDerived>
EIGEN_DEVICE_FUNC void solveTriangularInPlace(const MatrixBase<OtherDerived>& other) const;
protected:
ExpressionTypeNested m_matrix;
};
/** \returns an expression of *this with added and removed flags
*
* This is mostly for internal use.
*
* \sa class Flagged
*/
template<typename Derived>
template<unsigned int Added,unsigned int Removed>
inline const Flagged<Derived, Added, Removed>
DenseBase<Derived>::flagged() const
{
return Flagged<Derived, Added, Removed>(derived());
}
} // end namespace Eigen
#endif // EIGEN_FLAGGED_H

View File

@@ -183,7 +183,7 @@ struct gemv_static_vector_if<Scalar,Size,Dynamic,true>
template<typename Scalar,int Size,int MaxSize>
struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
{
#if EIGEN_ALIGN_STATICALLY
#if EIGEN_MAX_STATIC_ALIGN_BYTES!=0
internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0> m_data;
EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
#else
@@ -196,7 +196,7 @@ struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?PacketSize:0),0> m_data;
EIGEN_STRONG_INLINE Scalar* data() {
return ForceAlignment
? reinterpret_cast<Scalar*>((reinterpret_cast<size_t>(m_data.array) & ~(size_t(EIGEN_ALIGN_BYTES-1))) + EIGEN_ALIGN_BYTES)
? reinterpret_cast<Scalar*>((reinterpret_cast<size_t>(m_data.array) & ~(size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES)
: m_data.array;
}
#endif
@@ -249,8 +249,8 @@ template<> struct gemv_dense_sense_selector<OnTheRight,ColMajor,true>
gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0));
bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0));
const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);

View File

@@ -49,6 +49,7 @@ struct default_packet_traits
HasMul = 1,
HasNegate = 1,
HasAbs = 1,
HasArg = 0,
HasAbs2 = 1,
HasMin = 1,
HasMax = 1,
@@ -58,8 +59,10 @@ struct default_packet_traits
HasDiv = 0,
HasSqrt = 0,
HasRsqrt = 0,
HasExp = 0,
HasLog = 0,
HasLog10 = 0,
HasPow = 0,
HasSin = 0,
@@ -67,7 +70,14 @@ struct default_packet_traits
HasTan = 0,
HasASin = 0,
HasACos = 0,
HasATan = 0
HasATan = 0,
HasSinh = 0,
HasCosh = 0,
HasTanh = 0,
HasRound = 0,
HasFloor = 0,
HasCeil = 0
};
};
@@ -97,6 +107,28 @@ template<typename T> struct packet_traits : default_packet_traits
template<typename T> struct packet_traits<const T> : packet_traits<T> { };
template <typename Src, typename Tgt> struct type_casting_traits {
enum {
VectorizedCast = 0,
SrcCoeffRatio = 1,
TgtCoeffRatio = 1
};
};
/** \internal \returns static_cast<TgtType>(a) (coeff-wise) */
template <typename SrcPacket, typename TgtPacket>
EIGEN_DEVICE_FUNC inline TgtPacket
pcast(const SrcPacket& a) {
return static_cast<TgtPacket>(a);
}
template <typename SrcPacket, typename TgtPacket>
EIGEN_DEVICE_FUNC inline TgtPacket
pcast(const SrcPacket& a, const SrcPacket& /*b*/) {
return static_cast<TgtPacket>(a);
}
/** \internal \returns a + b (coeff-wise) */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
padd(const Packet& a,
@@ -140,6 +172,10 @@ pmax(const Packet& a,
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
pabs(const Packet& a) { using std::abs; return abs(a); }
/** \internal \returns the phase angle of \a a */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
parg(const Packet& a) { using numext::arg; return arg(a); }
/** \internal \returns the bitwise and of \a a and \a b */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
pand(const Packet& a, const Packet& b) { return a & b; }
@@ -225,8 +261,8 @@ inline void pbroadcast2(const typename unpacket_traits<Packet>::type *a,
}
/** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */
template<typename Scalar> inline typename packet_traits<Scalar>::type
plset(const Scalar& a) { return a; }
template<typename Packet> inline Packet
plset(const typename unpacket_traits<Packet>::type& a) { return a; }
/** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */
template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from)
@@ -245,7 +281,15 @@ template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu
/** \internal tries to do cache prefetching of \a addr */
template<typename Scalar> inline void prefetch(const Scalar* addr)
{
#if !EIGEN_COMP_MSVC
#ifdef __CUDA_ARCH__
#if defined(__LP64__)
// 64-bit pointer operand constraint for inlined asm
asm(" prefetch.L1 [ %1 ];" : "=l"(addr) : "l"(addr));
#else
// 32-bit pointer operand constraint for inlined asm
asm(" prefetch.L1 [ %1 ];" : "=r"(addr) : "r"(addr));
#endif
#elif !EIGEN_COMP_MSVC
__builtin_prefetch(addr);
#endif
}
@@ -287,6 +331,21 @@ template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Pack
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a)
{ return a; }
template<size_t offset, typename Packet>
struct protate_impl
{
// Empty so attempts to use this unimplemented path will fail to compile.
// Only specializations of this template should be used.
};
/** \internal \returns a packet with the coefficients rotated to the right in little-endian convention,
* by the given offset, e.g. for offset == 1:
* (packet[3], packet[2], packet[1], packet[0]) becomes (packet[0], packet[3], packet[2], packet[1])
*/
template<size_t offset, typename Packet> EIGEN_DEVICE_FUNC inline Packet protate(const Packet& a)
{
return offset ? protate_impl<offset, Packet>::run(a) : a;
}
/** \internal \returns \a a with real and imaginary part flipped (for complex type only) */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a)
@@ -321,10 +380,22 @@ Packet pasin(const Packet& a) { using std::asin; return asin(a); }
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet pacos(const Packet& a) { using std::acos; return acos(a); }
/** \internal \returns the atan of \a a (coeff-wise) */
/** \internal \returns the arc tangent of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet patan(const Packet& a) { using std::atan; return atan(a); }
/** \internal \returns the hyperbolic sine of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet psinh(const Packet& a) { using std::sinh; return sinh(a); }
/** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet pcosh(const Packet& a) { using std::cosh; return cosh(a); }
/** \internal \returns the hyperbolic tan of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet ptanh(const Packet& a) { using std::tanh; return tanh(a); }
/** \internal \returns the exp of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet pexp(const Packet& a) { using std::exp; return exp(a); }
@@ -333,10 +404,32 @@ Packet pexp(const Packet& a) { using std::exp; return exp(a); }
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet plog(const Packet& a) { using std::log; return log(a); }
/** \internal \returns the log10 of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet plog10(const Packet& a) { using std::log10; return log10(a); }
/** \internal \returns the square-root of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet psqrt(const Packet& a) { using std::sqrt; return sqrt(a); }
/** \internal \returns the reciprocal square-root of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet prsqrt(const Packet& a) {
return pdiv(pset1<Packet>(1), psqrt(a));
}
/** \internal \returns the rounded value of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet pround(const Packet& a) { using numext::round; return round(a); }
/** \internal \returns the floor of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet pfloor(const Packet& a) { using numext::floor; return floor(a); }
/** \internal \returns the ceil of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); }
/***************************************************************************
* The following functions might not have to be overwritten for vectorized types
***************************************************************************/
@@ -357,22 +450,22 @@ pmadd(const Packet& a,
{ return padd(pmul(a, b),c); }
/** \internal \returns a packet version of \a *from.
* If LoadMode equals #Aligned, \a from must be 16 bytes aligned */
template<typename Packet, int LoadMode>
* The pointer \a from must be aligned on a \a Alignment bytes boundary. */
template<typename Packet, int Alignment>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits<Packet>::type* from)
{
if(LoadMode == Aligned)
if(Alignment >= unpacket_traits<Packet>::alignment)
return pload<Packet>(from);
else
return ploadu<Packet>(from);
}
/** \internal copy the packet \a from to \a *to.
* If StoreMode equals #Aligned, \a to must be 16 bytes aligned */
template<typename Scalar, typename Packet, int LoadMode>
* The pointer \a from must be aligned on a \a Alignment bytes boundary. */
template<typename Scalar, typename Packet, int Alignment>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from)
{
if(LoadMode == Aligned)
if(Alignment >= unpacket_traits<Packet>::alignment)
pstore(to, from);
else
pstoreu(to, from);

View File

@@ -14,7 +14,7 @@
#define EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(NAME,FUNCTOR) \
template<typename Derived> \
inline const Eigen::CwiseUnaryOp<Eigen::internal::FUNCTOR<typename Derived::Scalar>, const Derived> \
NAME(const Eigen::ArrayBase<Derived>& x) { \
(NAME)(const Eigen::ArrayBase<Derived>& x) { \
return Eigen::CwiseUnaryOp<Eigen::internal::FUNCTOR<typename Derived::Scalar>, const Derived>(x.derived()); \
}
@@ -34,22 +34,36 @@
} \
};
namespace Eigen
{
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(real,scalar_real_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(imag,scalar_imag_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(conj,scalar_conjugate_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(inverse,scalar_inverse_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sin,scalar_sin_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cos,scalar_cos_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(asin,scalar_asin_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(acos,scalar_acos_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tan,scalar_tan_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(atan,scalar_atan_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(asin,scalar_asin_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(acos,scalar_acos_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sinh,scalar_sinh_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cosh,scalar_cosh_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh,scalar_tanh_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log10,scalar_log10_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs,scalar_abs_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs2,scalar_abs2_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(arg,scalar_arg_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sqrt,scalar_sqrt_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(square,scalar_square_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cube,scalar_cube_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(round,scalar_round_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(floor,scalar_floor_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(ceil,scalar_ceil_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isnan,scalar_isnan_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isinf,scalar_isinf_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isfinite,scalar_isfinite_op)
template<typename Derived>
inline const Eigen::CwiseUnaryOp<Eigen::internal::scalar_pow_op<typename Derived::Scalar>, const Derived>
@@ -57,16 +71,46 @@ namespace Eigen
return x.derived().pow(exponent);
}
template<typename Derived>
inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_binary_pow_op<typename Derived::Scalar, typename Derived::Scalar>, const Derived, const Derived>
pow(const Eigen::ArrayBase<Derived>& x, const Eigen::ArrayBase<Derived>& exponents)
/** \returns an expression of the coefficient-wise power of \a x to the given array of \a exponents.
*
* This function computes the coefficient-wise power.
*
* Example: \include Cwise_array_power_array.cpp
* Output: \verbinclude Cwise_array_power_array.out
*
* \sa ArrayBase::pow()
*/
template<typename Derived,typename ExponentDerived>
inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_binary_pow_op<typename Derived::Scalar, typename ExponentDerived::Scalar>, const Derived, const ExponentDerived>
pow(const Eigen::ArrayBase<Derived>& x, const Eigen::ArrayBase<ExponentDerived>& exponents)
{
return Eigen::CwiseBinaryOp<Eigen::internal::scalar_binary_pow_op<typename Derived::Scalar, typename Derived::Scalar>, const Derived, const Derived>(
return Eigen::CwiseBinaryOp<Eigen::internal::scalar_binary_pow_op<typename Derived::Scalar, typename ExponentDerived::Scalar>, const Derived, const ExponentDerived>(
x.derived(),
exponents.derived()
);
}
/** \returns an expression of the coefficient-wise power of the scalar \a x to the given array of \a exponents.
*
* This function computes the coefficient-wise power between a scalar and an array of exponents.
* Beaware that the scalar type of the input scalar \a x and the exponents \a exponents must be the same.
*
* Example: \include Cwise_scalar_power_array.cpp
* Output: \verbinclude Cwise_scalar_power_array.out
*
* \sa ArrayBase::pow()
*/
template<typename Derived>
inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_binary_pow_op<typename Derived::Scalar, typename Derived::Scalar>, const typename Derived::ConstantReturnType, const Derived>
pow(const typename Derived::Scalar& x, const Eigen::ArrayBase<Derived>& exponents)
{
typename Derived::ConstantReturnType constant_x(exponents.rows(), exponents.cols(), x);
return Eigen::CwiseBinaryOp<Eigen::internal::scalar_binary_pow_op<typename Derived::Scalar, typename Derived::Scalar>, const typename Derived::ConstantReturnType, const Derived>(
constant_x,
exponents.derived()
);
}
/**
* \brief Component-wise division of a scalar by array elements.
**/

View File

@@ -47,7 +47,7 @@ class Inverse : public InverseImpl<XprType,typename internal::traits<XprType>::S
public:
typedef typename XprType::StorageIndex StorageIndex;
typedef typename XprType::PlainObject PlainObject;
typedef typename internal::nested<XprType>::type XprTypeNested;
typedef typename internal::ref_selector<XprType>::type XprTypeNested;
typedef typename internal::remove_all<XprTypeNested>::type XprTypeNestedCleaned;
explicit Inverse(const XprType &xpr)
@@ -100,14 +100,11 @@ namespace internal {
*/
template<typename ArgType>
struct unary_evaluator<Inverse<ArgType> >
: public evaluator<typename Inverse<ArgType>::PlainObject>::type
: public evaluator<typename Inverse<ArgType>::PlainObject>
{
typedef Inverse<ArgType> InverseType;
typedef typename InverseType::PlainObject PlainObject;
typedef typename evaluator<PlainObject>::type Base;
typedef evaluator<InverseType> type;
typedef evaluator<InverseType> nestedType;
typedef evaluator<PlainObject> Base;
enum { Flags = Base::Flags | EvalBeforeNestingBit };

View File

@@ -19,7 +19,7 @@ namespace Eigen {
* \brief A matrix or vector expression mapping an existing array of data.
*
* \tparam PlainObjectType the equivalent matrix type of the mapped data
* \tparam MapOptions specifies whether the pointer is \c #Aligned, or \c #Unaligned.
* \tparam MapOptions specifies the pointer alignment in bytes. It can be: \c #Aligned128, , \c #Aligned64, \c #Aligned32, \c #Aligned16, \c #Aligned8 or \c #Unaligned.
* The default is \c #Unaligned.
* \tparam StrideType optionally specifies strides. By default, Map assumes the memory layout
* of an ordinary, contiguous array. This can be overridden by specifying strides.
@@ -77,7 +77,7 @@ struct traits<Map<PlainObjectType, MapOptions, StrideType> >
OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
? int(PlainObjectType::OuterStrideAtCompileTime)
: int(StrideType::OuterStrideAtCompileTime),
IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned),
Alignment = int(MapOptions)&int(AlignedMask),
Flags0 = TraitsBase::Flags & (~NestByRefBit),
Flags = is_lvalue<PlainObjectType>::value ? int(Flags0) : (int(Flags0) & ~LvalueBit)
};
@@ -117,11 +117,11 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
/** Constructor in the fixed-size case.
*
* \param dataPtr pointer to the array to map
* \param a_stride optional Stride object, passing the strides.
* \param stride optional Stride object, passing the strides.
*/
EIGEN_DEVICE_FUNC
explicit inline Map(PointerArgType dataPtr, const StrideType& a_stride = StrideType())
: Base(cast_to_pointer_type(dataPtr)), m_stride(a_stride)
explicit inline Map(PointerArgType dataPtr, const StrideType& stride = StrideType())
: Base(cast_to_pointer_type(dataPtr)), m_stride(stride)
{
PlainObjectType::Base::_check_template_params();
}
@@ -129,12 +129,12 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
/** Constructor in the dynamic-size vector case.
*
* \param dataPtr pointer to the array to map
* \param a_size the size of the vector expression
* \param a_stride optional Stride object, passing the strides.
* \param size the size of the vector expression
* \param stride optional Stride object, passing the strides.
*/
EIGEN_DEVICE_FUNC
inline Map(PointerArgType dataPtr, Index a_size, const StrideType& a_stride = StrideType())
: Base(cast_to_pointer_type(dataPtr), a_size), m_stride(a_stride)
inline Map(PointerArgType dataPtr, Index size, const StrideType& stride = StrideType())
: Base(cast_to_pointer_type(dataPtr), size), m_stride(stride)
{
PlainObjectType::Base::_check_template_params();
}
@@ -142,13 +142,13 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
/** Constructor in the dynamic-size matrix case.
*
* \param dataPtr pointer to the array to map
* \param nbRows the number of rows of the matrix expression
* \param nbCols the number of columns of the matrix expression
* \param a_stride optional Stride object, passing the strides.
* \param rows the number of rows of the matrix expression
* \param cols the number of columns of the matrix expression
* \param stride optional Stride object, passing the strides.
*/
EIGEN_DEVICE_FUNC
inline Map(PointerArgType dataPtr, Index nbRows, Index nbCols, const StrideType& a_stride = StrideType())
: Base(cast_to_pointer_type(dataPtr), nbRows, nbCols), m_stride(a_stride)
inline Map(PointerArgType dataPtr, Index rows, Index cols, const StrideType& stride = StrideType())
: Base(cast_to_pointer_type(dataPtr), rows, cols), m_stride(stride)
{
PlainObjectType::Base::_check_template_params();
}

View File

@@ -146,12 +146,12 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
}
EIGEN_DEVICE_FUNC
inline MapBase(PointerType dataPtr, Index nbRows, Index nbCols)
: m_data(dataPtr), m_rows(nbRows), m_cols(nbCols)
inline MapBase(PointerType dataPtr, Index rows, Index cols)
: m_data(dataPtr), m_rows(rows), m_cols(cols)
{
eigen_assert( (dataPtr == 0)
|| ( nbRows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == nbRows)
&& nbCols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == nbCols)));
|| ( rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
&& cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols)));
checkSanity();
}
@@ -160,7 +160,9 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
EIGEN_DEVICE_FUNC
void checkSanity() const
{
eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::IsAligned, (size_t(m_data) % EIGEN_ALIGN_BYTES) == 0) && "data is not aligned");
#if EIGEN_MAX_ALIGN_BYTES>0
eigen_assert(((size_t(m_data) % EIGEN_PLAIN_ENUM_MAX(1,internal::traits<Derived>::Alignment)) == 0) && "data is not aligned");
#endif
}
PointerType m_data;
@@ -234,7 +236,7 @@ template<typename Derived> class MapBase<Derived, WriteAccessors>
EIGEN_DEVICE_FUNC explicit inline MapBase(PointerType dataPtr) : Base(dataPtr) {}
EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index vecSize) : Base(dataPtr, vecSize) {}
EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index nbRows, Index nbCols) : Base(dataPtr, nbRows, nbCols) {}
EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index rows, Index cols) : Base(dataPtr, rows, cols) {}
EIGEN_DEVICE_FUNC
Derived& operator=(const MapBase& other)

View File

@@ -10,6 +10,9 @@
#ifndef EIGEN_MATHFUNCTIONS_H
#define EIGEN_MATHFUNCTIONS_H
// source: http://www.geom.uiuc.edu/~huberty/math5337/groupe/digits.html
#define EIGEN_PI 3.141592653589793238462643383279502884197169399375105820974944592307816406
namespace Eigen {
// On WINCE, std::abs is defined for int only, so let's defined our own overloads:
@@ -276,7 +279,7 @@ struct norm1_default_impl
EIGEN_DEVICE_FUNC
static inline RealScalar run(const Scalar& x)
{
using std::abs;
EIGEN_USING_STD_MATH(abs);
return abs(real(x)) + abs(imag(x));
}
};
@@ -287,7 +290,7 @@ struct norm1_default_impl<Scalar, false>
EIGEN_DEVICE_FUNC
static inline Scalar run(const Scalar& x)
{
using std::abs;
EIGEN_USING_STD_MATH(abs);
return abs(x);
}
};
@@ -313,8 +316,8 @@ struct hypot_impl
{
EIGEN_USING_STD_MATH(max);
EIGEN_USING_STD_MATH(min);
using std::abs;
using std::sqrt;
EIGEN_USING_STD_MATH(abs);
EIGEN_USING_STD_MATH(sqrt);
RealScalar _x = abs(x);
RealScalar _y = abs(y);
Scalar p, qp;
@@ -328,6 +331,7 @@ struct hypot_impl
p = _y;
qp = _x / p;
}
if(p==RealScalar(0)) return RealScalar(0);
return p * sqrt(RealScalar(1) + qp*qp);
}
};
@@ -345,6 +349,7 @@ struct hypot_retval
template<typename OldType, typename NewType>
struct cast_impl
{
EIGEN_DEVICE_FUNC
static inline NewType run(const OldType& x)
{
return static_cast<NewType>(x);
@@ -354,35 +359,119 @@ struct cast_impl
// here, for once, we're plainly returning NewType: we don't want cast to do weird things.
template<typename OldType, typename NewType>
EIGEN_DEVICE_FUNC
inline NewType cast(const OldType& x)
{
return cast_impl<OldType, NewType>::run(x);
}
/****************************************************************************
* Implementation of logp1 *
* Implementation of round *
****************************************************************************/
#if EIGEN_HAS_CXX11_MATH
template<typename Scalar>
struct round_impl {
static inline Scalar run(const Scalar& x)
{
EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL)
using std::round;
return round(x);
}
};
#else
template<typename Scalar>
struct round_impl
{
static inline Scalar run(const Scalar& x)
{
EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL)
EIGEN_USING_STD_MATH(floor);
EIGEN_USING_STD_MATH(ceil);
return (x > Scalar(0)) ? floor(x + Scalar(0.5)) : ceil(x - Scalar(0.5));
}
};
#endif
template<typename Scalar>
struct round_retval
{
typedef Scalar type;
};
/****************************************************************************
* Implementation of arg *
****************************************************************************/
#if EIGEN_HAS_CXX11_MATH
template<typename Scalar>
struct arg_impl {
static inline Scalar run(const Scalar& x)
{
EIGEN_USING_STD_MATH(arg);
return arg(x);
}
};
#else
template<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
struct arg_default_impl
{
typedef typename NumTraits<Scalar>::Real RealScalar;
EIGEN_DEVICE_FUNC
static inline RealScalar run(const Scalar& x)
{
return (x < Scalar(0)) ? Scalar(EIGEN_PI) : Scalar(0); }
};
template<typename Scalar>
struct arg_default_impl<Scalar,true>
{
typedef typename NumTraits<Scalar>::Real RealScalar;
EIGEN_DEVICE_FUNC
static inline RealScalar run(const Scalar& x)
{
EIGEN_USING_STD_MATH(arg);
return arg(x);
}
};
template<typename Scalar> struct arg_impl : arg_default_impl<Scalar> {};
#endif
template<typename Scalar>
struct arg_retval
{
typedef typename NumTraits<Scalar>::Real type;
};
/****************************************************************************
* Implementation of log1p *
****************************************************************************/
template<typename Scalar, bool isComplex = NumTraits<Scalar>::IsComplex >
struct log1p_impl
{
static inline Scalar run(const Scalar& x)
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
// Let's be conservative and enable the default C++11 implementation only if we are sure it exists
#if (__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC || EIGEN_COMP_ICC) \
&& (EIGEN_ARCH_i386_OR_x86_64) && (EIGEN_OS_GNULINUX || EIGEN_OS_WIN_STRICT || EIGEN_OS_MAC)
using std::log1p;
return log1p(x);
#else
typedef typename NumTraits<Scalar>::Real RealScalar;
using std::log;
Scalar x1p = RealScalar(1) + x;
return ( x1p == Scalar(1) ) ? x : x * ( log(x1p) / (x1p - RealScalar(1)) );
#endif
typedef typename NumTraits<Scalar>::Real RealScalar;
EIGEN_USING_STD_MATH(log);
Scalar x1p = RealScalar(1) + x;
return ( x1p == Scalar(1) ) ? x : x * ( log(x1p) / (x1p - RealScalar(1)) );
}
};
#if EIGEN_HAS_CXX11_MATH
template<typename Scalar>
struct log1p_impl<Scalar, false> {
static inline Scalar run(const Scalar& x)
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
using std::log1p;
return log1p(x);
}
};
#endif
template<typename Scalar>
struct log1p_retval
{
@@ -399,7 +488,7 @@ struct pow_default_impl
typedef Scalar retval;
static inline Scalar run(const Scalar& x, const Scalar& y)
{
using std::pow;
EIGEN_USING_STD_MATH(pow);
return pow(x, y);
}
};
@@ -467,48 +556,48 @@ struct random_default_impl<Scalar, false, false>
};
enum {
floor_log2_terminate,
floor_log2_move_up,
floor_log2_move_down,
floor_log2_bogus
meta_floor_log2_terminate,
meta_floor_log2_move_up,
meta_floor_log2_move_down,
meta_floor_log2_bogus
};
template<unsigned int n, int lower, int upper> struct floor_log2_selector
template<unsigned int n, int lower, int upper> struct meta_floor_log2_selector
{
enum { middle = (lower + upper) / 2,
value = (upper <= lower + 1) ? int(floor_log2_terminate)
: (n < (1 << middle)) ? int(floor_log2_move_down)
: (n==0) ? int(floor_log2_bogus)
: int(floor_log2_move_up)
value = (upper <= lower + 1) ? int(meta_floor_log2_terminate)
: (n < (1 << middle)) ? int(meta_floor_log2_move_down)
: (n==0) ? int(meta_floor_log2_bogus)
: int(meta_floor_log2_move_up)
};
};
template<unsigned int n,
int lower = 0,
int upper = sizeof(unsigned int) * CHAR_BIT - 1,
int selector = floor_log2_selector<n, lower, upper>::value>
struct floor_log2 {};
int selector = meta_floor_log2_selector<n, lower, upper>::value>
struct meta_floor_log2 {};
template<unsigned int n, int lower, int upper>
struct floor_log2<n, lower, upper, floor_log2_move_down>
struct meta_floor_log2<n, lower, upper, meta_floor_log2_move_down>
{
enum { value = floor_log2<n, lower, floor_log2_selector<n, lower, upper>::middle>::value };
enum { value = meta_floor_log2<n, lower, meta_floor_log2_selector<n, lower, upper>::middle>::value };
};
template<unsigned int n, int lower, int upper>
struct floor_log2<n, lower, upper, floor_log2_move_up>
struct meta_floor_log2<n, lower, upper, meta_floor_log2_move_up>
{
enum { value = floor_log2<n, floor_log2_selector<n, lower, upper>::middle, upper>::value };
enum { value = meta_floor_log2<n, meta_floor_log2_selector<n, lower, upper>::middle, upper>::value };
};
template<unsigned int n, int lower, int upper>
struct floor_log2<n, lower, upper, floor_log2_terminate>
struct meta_floor_log2<n, lower, upper, meta_floor_log2_terminate>
{
enum { value = (n >= ((unsigned int)(1) << (lower+1))) ? lower+1 : lower };
};
template<unsigned int n, int lower, int upper>
struct floor_log2<n, lower, upper, floor_log2_bogus>
struct meta_floor_log2<n, lower, upper, meta_floor_log2_bogus>
{
// no value, error at compile time
};
@@ -516,11 +605,24 @@ struct floor_log2<n, lower, upper, floor_log2_bogus>
template<typename Scalar>
struct random_default_impl<Scalar, false, true>
{
typedef typename NumTraits<Scalar>::NonInteger NonInteger;
static inline Scalar run(const Scalar& x, const Scalar& y)
{
return x + Scalar((NonInteger(y)-x+1) * std::rand() / (RAND_MAX + NonInteger(1)));
{
using std::max;
using std::min;
typedef typename conditional<NumTraits<Scalar>::IsSigned,std::ptrdiff_t,std::size_t>::type ScalarX;
if(y<x)
return x;
std::size_t range = ScalarX(y)-ScalarX(x);
std::size_t offset = 0;
// rejection sampling
std::size_t divisor = (range+RAND_MAX-1)/(range+1);
std::size_t multiplier = (range+RAND_MAX-1)/std::size_t(RAND_MAX);
do {
offset = ( (std::size_t(std::rand()) * multiplier) / divisor );
} while (offset > range);
return Scalar(ScalarX(x) + offset);
}
static inline Scalar run()
@@ -528,7 +630,7 @@ struct random_default_impl<Scalar, false, true>
#ifdef EIGEN_MAKING_DOCS
return run(Scalar(NumTraits<Scalar>::IsSigned ? -10 : 0), Scalar(10));
#else
enum { rand_bits = floor_log2<(unsigned int)(RAND_MAX)+1>::value,
enum { rand_bits = meta_floor_log2<(unsigned int)(RAND_MAX)+1>::value,
scalar_bits = sizeof(Scalar) * CHAR_BIT,
shift = EIGEN_PLAIN_ENUM_MAX(0, int(rand_bits) - int(scalar_bits)),
offset = NumTraits<Scalar>::IsSigned ? (1 << (EIGEN_PLAIN_ENUM_MIN(rand_bits,scalar_bits)-1)) : 0
@@ -568,14 +670,15 @@ inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random()
} // end namespace internal
/****************************************************************************
* Generic math function *
* Generic math functions *
****************************************************************************/
namespace numext {
#ifndef __CUDA_ARCH__
template<typename T>
EIGEN_DEVICE_FUNC
inline T mini(const T& x, const T& y)
EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y)
{
EIGEN_USING_STD_MATH(min);
return min EIGEN_NOT_A_MACRO (x,y);
@@ -583,11 +686,38 @@ inline T mini(const T& x, const T& y)
template<typename T>
EIGEN_DEVICE_FUNC
inline T maxi(const T& x, const T& y)
EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y)
{
EIGEN_USING_STD_MATH(max);
return max EIGEN_NOT_A_MACRO (x,y);
}
#else
template<typename T>
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y)
{
return y < x ? y : x;
}
template<>
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE float mini(const float& x, const float& y)
{
return fmin(x, y);
}
template<typename T>
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y)
{
return x < y ? y : x;
}
template<>
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE float maxi(const float& x, const float& y)
{
return fmax(x, y);
}
#endif
template<typename Scalar>
EIGEN_DEVICE_FUNC
@@ -617,6 +747,13 @@ inline EIGEN_MATHFUNC_RETVAL(imag, Scalar) imag(const Scalar& x)
return EIGEN_MATHFUNC_IMPL(imag, Scalar)::run(x);
}
template<typename Scalar>
EIGEN_DEVICE_FUNC
inline EIGEN_MATHFUNC_RETVAL(arg, Scalar) arg(const Scalar& x)
{
return EIGEN_MATHFUNC_IMPL(arg, Scalar)::run(x);
}
template<typename Scalar>
EIGEN_DEVICE_FUNC
inline typename internal::add_const_on_value_type< EIGEN_MATHFUNC_RETVAL(imag_ref, Scalar) >::type imag_ref(const Scalar& x)
@@ -673,22 +810,81 @@ inline EIGEN_MATHFUNC_RETVAL(pow, Scalar) pow(const Scalar& x, const Scalar& y)
return EIGEN_MATHFUNC_IMPL(pow, Scalar)::run(x, y);
}
// std::isfinite is non standard, so let's define our own version,
// even though it is not very efficient.
template<typename T>
EIGEN_DEVICE_FUNC
bool (isfinite)(const T& x)
{
return x<NumTraits<T>::highest() && x>NumTraits<T>::lowest();
#if EIGEN_HAS_CXX11_MATH
using std::isfinite;
return isfinite EIGEN_NOT_A_MACRO (x);
#else
return x<NumTraits<T>::highest() && x>NumTraits<T>::lowest();
#endif
}
template<typename T>
EIGEN_DEVICE_FUNC
bool (isnan)(const T& x)
{
#if EIGEN_HAS_CXX11_MATH
using std::isnan;
return isnan EIGEN_NOT_A_MACRO (x);
#else
return x != x;
#endif
}
template<typename T>
EIGEN_DEVICE_FUNC
bool (isinf)(const T& x)
{
#if EIGEN_HAS_CXX11_MATH
using std::isinf;
return isinf EIGEN_NOT_A_MACRO (x);
#else
return x>NumTraits<T>::highest() || x<NumTraits<T>::lowest();
#endif
}
template<typename T>
bool (isfinite)(const std::complex<T>& x)
{
using std::real;
using std::imag;
return isfinite(real(x)) && isfinite(imag(x));
return (numext::isfinite)(numext::real(x)) && (numext::isfinite)(numext::imag(x));
}
template<typename T>
bool (isnan)(const std::complex<T>& x)
{
return (numext::isnan)(numext::real(x)) || (numext::isnan)(numext::imag(x));
}
template<typename T>
bool (isinf)(const std::complex<T>& x)
{
return ((numext::isinf)(numext::real(x)) || (numext::isinf)(numext::imag(x))) && (!(numext::isnan)(x));
}
template<typename Scalar>
EIGEN_DEVICE_FUNC
inline EIGEN_MATHFUNC_RETVAL(round, Scalar) round(const Scalar& x)
{
return EIGEN_MATHFUNC_IMPL(round, Scalar)::run(x);
}
template<typename T>
EIGEN_DEVICE_FUNC
T (floor)(const T& x)
{
EIGEN_USING_STD_MATH(floor);
return floor(x);
}
template<typename T>
EIGEN_DEVICE_FUNC
T (ceil)(const T& x)
{
EIGEN_USING_STD_MATH(ceil);
return ceil(x);
}
// Log base 2 for 32 bits positive integers.
@@ -726,14 +922,14 @@ struct scalar_fuzzy_default_impl<Scalar, false, false>
template<typename OtherScalar> EIGEN_DEVICE_FUNC
static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, const RealScalar& prec)
{
using std::abs;
EIGEN_USING_STD_MATH(abs);
return abs(x) <= abs(y) * prec;
}
EIGEN_DEVICE_FUNC
static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec)
{
EIGEN_USING_STD_MATH(min);
using std::abs;
EIGEN_USING_STD_MATH(abs);
return abs(x - y) <= (min)(abs(x), abs(y)) * prec;
}
EIGEN_DEVICE_FUNC

View File

@@ -24,13 +24,13 @@ namespace Eigen {
* The %Matrix class encompasses \em both fixed-size and dynamic-size objects (\ref fixedsize "note").
*
* The first three template parameters are required:
* \tparam _Scalar \anchor matrix_tparam_scalar Numeric type, e.g. float, double, int or std::complex<float>.
* User defined sclar types are supported as well (see \ref user_defined_scalars "here").
* \tparam _Scalar Numeric type, e.g. float, double, int or std::complex<float>.
* User defined scalar types are supported as well (see \ref user_defined_scalars "here").
* \tparam _Rows Number of rows, or \b Dynamic
* \tparam _Cols Number of columns, or \b Dynamic
*
* The remaining template parameters are optional -- in most cases you don't have to worry about them.
* \tparam _Options \anchor matrix_tparam_options A combination of either \b #RowMajor or \b #ColMajor, and of either
* \tparam _Options A combination of either \b #RowMajor or \b #ColMajor, and of either
* \b #AutoAlign or \b #DontAlign.
* The former controls \ref TopicStorageOrders "storage order", and defaults to column-major. The latter controls alignment, which is required
* for vectorization. It defaults to aligning matrices except for fixed sizes that aren't a multiple of the packet size.
@@ -97,6 +97,40 @@ namespace Eigen {
* are the dimensions of the original matrix, while _Rows and _Cols are Dynamic.</dd>
* </dl>
*
* <i><b>ABI and storage layout</b></i>
*
* The table below summarizes the ABI of some possible Matrix instances which is fixed thorough the lifetime of Eigen 3.
* <table class="manual">
* <tr><th>Matrix type</th><th>Equivalent C structure</th></tr>
* <tr><td>\code Matrix<T,Dynamic,Dynamic> \endcode</td><td>\code
* struct {
* T *data; // with (size_t(data)%EIGEN_MAX_ALIGN_BYTES)==0
* Eigen::Index rows, cols;
* };
* \endcode</td></tr>
* <tr class="alt"><td>\code
* Matrix<T,Dynamic,1>
* Matrix<T,1,Dynamic> \endcode</td><td>\code
* struct {
* T *data; // with (size_t(data)%EIGEN_MAX_ALIGN_BYTES)==0
* Eigen::Index size;
* };
* \endcode</td></tr>
* <tr><td>\code Matrix<T,Rows,Cols> \endcode</td><td>\code
* struct {
* T data[Rows*Cols]; // with (size_t(data)%A(Rows*Cols*sizeof(T)))==0
* };
* \endcode</td></tr>
* <tr class="alt"><td>\code Matrix<T,Dynamic,Dynamic,0,MaxRows,MaxCols> \endcode</td><td>\code
* struct {
* T data[MaxRows*MaxCols]; // with (size_t(data)%A(MaxRows*MaxCols*sizeof(T)))==0
* Eigen::Index rows, cols;
* };
* \endcode</td></tr>
* </table>
* Note that in this table Rows, Cols, MaxRows and MaxCols are all positive integers. A(S) is defined to the largest possible power-of-two
* smaller to EIGEN_MAX_STATIC_ALIGN_BYTES.
*
* \see MatrixBase for the majority of the API methods for matrices, \ref TopicClassHierarchy,
* \ref TopicStorageOrders
*/
@@ -105,6 +139,20 @@ namespace internal {
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
struct traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
{
private:
enum { size = internal::size_at_compile_time<_Rows,_Cols>::ret };
typedef typename find_best_packet<_Scalar,size>::type PacketScalar;
enum {
row_major_bit = _Options&RowMajor ? RowMajorBit : 0,
is_dynamic_size_storage = _MaxRows==Dynamic || _MaxCols==Dynamic,
max_size = is_dynamic_size_storage ? Dynamic : _MaxRows*_MaxCols,
default_alignment = compute_default_alignment<_Scalar,max_size>::value,
actual_alignment = ((_Options&DontAlign)==0) ? default_alignment : 0,
required_alignment = unpacket_traits<PacketScalar>::alignment,
packet_access_bit = packet_traits<_Scalar>::Vectorizable && (actual_alignment>=required_alignment) ? PacketAccessBit : 0
};
public:
typedef _Scalar Scalar;
typedef Dense StorageKind;
typedef Eigen::Index StorageIndex;
@@ -115,11 +163,13 @@ struct traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
MaxRowsAtCompileTime = _MaxRows,
MaxColsAtCompileTime = _MaxCols,
Flags = compute_matrix_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret,
// FIXME, the following flag in only used to define NeedsToAlign in PlainObjectBase
EvaluatorFlags = compute_matrix_evaluator_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret,
Options = _Options,
InnerStrideAtCompileTime = 1,
OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime
OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime,
// FIXME, the following flag in only used to define NeedsToAlign in PlainObjectBase
EvaluatorFlags = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit,
Alignment = actual_alignment
};
};
}
@@ -170,7 +220,7 @@ class Matrix
*/
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Matrix& operator=(const MatrixBase<OtherDerived>& other)
EIGEN_STRONG_INLINE Matrix& operator=(const DenseBase<OtherDerived>& other)
{
return Base::_set(other);
}
@@ -219,6 +269,7 @@ class Matrix
{ Base::_check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED }
#ifdef EIGEN_HAVE_RVALUE_REFERENCES
EIGEN_DEVICE_FUNC
Matrix(Matrix&& other)
: Base(std::move(other))
{
@@ -226,6 +277,7 @@ class Matrix
if (RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic)
Base::_set_noalias(other);
}
EIGEN_DEVICE_FUNC
Matrix& operator=(Matrix&& other)
{
other.swap(*this);
@@ -264,8 +316,8 @@ class Matrix
*
* \warning This constructor is disabled for fixed-size \c 1x1 matrices. For instance,
* calling Matrix<double,1,1>(1) will call the initialization constructor: Matrix(const Scalar&).
* For fixed-size \c 1x1 matrices it is thefore recommended to use the default
* constructor Matrix() instead, especilly when using one of the non standard
* For fixed-size \c 1x1 matrices it is therefore recommended to use the default
* constructor Matrix() instead, especially when using one of the non standard
* \c EIGEN_INITIALIZE_MATRICES_BY_{ZERO,\c NAN} macros (see \ref TopicPreprocessorDirectives).
*/
EIGEN_STRONG_INLINE explicit Matrix(Index dim);
@@ -279,8 +331,8 @@ class Matrix
*
* \warning This constructor is disabled for fixed-size \c 1x2 and \c 2x1 vectors. For instance,
* calling Matrix2f(2,1) will call the initialization constructor: Matrix(const Scalar& x, const Scalar& y).
* For fixed-size \c 1x2 or \c 2x1 vectors it is thefore recommended to use the default
* constructor Matrix() instead, especilly when using one of the non standard
* For fixed-size \c 1x2 or \c 2x1 vectors it is therefore recommended to use the default
* constructor Matrix() instead, especially when using one of the non standard
* \c EIGEN_INITIALIZE_MATRICES_BY_{ZERO,\c NAN} macros (see \ref TopicPreprocessorDirectives).
*/
EIGEN_DEVICE_FUNC
@@ -313,37 +365,10 @@ class Matrix
}
/** \brief Constructor copying the value of the expression \a other */
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Matrix(const MatrixBase<OtherDerived>& other)
: Base(other.rows() * other.cols(), other.rows(), other.cols())
{
// This test resides here, to bring the error messages closer to the user. Normally, these checks
// are performed deeply within the library, thus causing long and scary error traces.
EIGEN_STATIC_ASSERT((internal::is_same<Scalar, typename OtherDerived::Scalar>::value),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
Base::_check_template_params();
Base::_set_noalias(other);
}
/** \brief Copy constructor */
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Matrix(const Matrix& other)
: Base(other.rows() * other.cols(), other.rows(), other.cols())
{
Base::_check_template_params();
Base::_set_noalias(other);
}
/** \brief Copy constructor with in-place evaluation */
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Matrix(const ReturnByValue<OtherDerived>& other)
{
Base::_check_template_params();
Base::resize(other.rows(), other.cols());
other.evalTo(*this);
}
EIGEN_STRONG_INLINE Matrix(const Matrix& other) : Base(other)
{ }
/** \brief Copy constructor for generic expressions.
* \sa MatrixBase::operator=(const EigenBase<OtherDerived>&)
@@ -351,14 +376,8 @@ class Matrix
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Matrix(const EigenBase<OtherDerived> &other)
: Base(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols())
{
Base::_check_template_params();
Base::_resize_to_match(other);
// FIXME/CHECK: isn't *this = other.derived() more efficient. it allows to
// go for pure _set() implementations, right?
*this = other;
}
: Base(other.derived())
{ }
EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; }
EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); }

View File

@@ -81,6 +81,7 @@ template<typename Derived> class MatrixBase
using Base::operator*=;
using Base::operator/=;
using Base::operator*;
using Base::operator/;
typedef typename Base::CoeffReturnType CoeffReturnType;
typedef typename Base::ConstTransposeReturnType ConstTransposeReturnType;
@@ -101,23 +102,11 @@ template<typename Derived> class MatrixBase
EIGEN_DEVICE_FUNC
inline Index diagonalSize() const { return (std::min)(rows(),cols()); }
/** \brief The plain matrix type corresponding to this expression.
*
* This is not necessarily exactly the return type of eval(). In the case of plain matrices,
* the return type of eval() is a const reference to a matrix, not a matrix! It is however guaranteed
* that the return type of eval() is either PlainObject or const PlainObject&.
*/
typedef Matrix<typename internal::traits<Derived>::Scalar,
internal::traits<Derived>::RowsAtCompileTime,
internal::traits<Derived>::ColsAtCompileTime,
AutoAlign | (internal::traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
internal::traits<Derived>::MaxRowsAtCompileTime,
internal::traits<Derived>::MaxColsAtCompileTime
> PlainObject;
typedef typename Base::PlainObject PlainObject;
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** \internal Represents a matrix with all coefficients equal to one another*/
typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,Derived> ConstantReturnType;
typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,PlainObject> ConstantReturnType;
/** \internal the return type of MatrixBase::adjoint() */
typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, ConstTransposeReturnType>,
@@ -126,7 +115,7 @@ template<typename Derived> class MatrixBase
/** \internal Return type of eigenvalues() */
typedef Matrix<std::complex<RealScalar>, internal::traits<Derived>::ColsAtCompileTime, 1, ColMajor> EigenvaluesReturnType;
/** \internal the return type of identity */
typedef CwiseNullaryOp<internal::scalar_identity_op<Scalar>,Derived> IdentityReturnType;
typedef CwiseNullaryOp<internal::scalar_identity_op<Scalar>,PlainObject> IdentityReturnType;
/** \internal the return type of unit vectors */
typedef Block<const CwiseNullaryOp<internal::scalar_identity_op<Scalar>, SquareMatrixType>,
internal::traits<Derived>::RowsAtCompileTime,
@@ -164,12 +153,6 @@ template<typename Derived> class MatrixBase
EIGEN_DEVICE_FUNC
Derived& operator=(const ReturnByValue<OtherDerived>& other);
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename ProductDerived, typename Lhs, typename Rhs>
EIGEN_DEVICE_FUNC
Derived& lazyAssign(const ProductBase<ProductDerived, Lhs,Rhs>& other);
#endif // not EIGEN_PARSED_BY_DOXYGEN
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
Derived& operator+=(const MatrixBase<OtherDerived>& other);
@@ -329,7 +312,7 @@ template<typename Derived> class MatrixBase
template<bool Enable> inline const Derived& forceAlignedAccessIf() const { return derived(); }
template<bool Enable> inline Derived& forceAlignedAccessIf() { return derived(); }
Scalar trace() const;
EIGEN_DEVICE_FUNC Scalar trace() const;
template<int p> EIGEN_DEVICE_FUNC RealScalar lpNorm() const;
@@ -412,7 +395,8 @@ template<typename Derived> class MatrixBase
ScalarMultipleReturnType operator*(const UniformScaling<Scalar>& s) const;
// put this as separate enum value to work around possible GCC 4.3 bug (?)
enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1?Vertical:Horizontal };
enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1&&RowsAtCompileTime==1 ? ((internal::traits<Derived>::Flags&RowMajorBit)==RowMajorBit ? Horizontal : Vertical)
: ColsAtCompileTime==1 ? Vertical : Horizontal };
typedef Homogeneous<Derived, HomogeneousReturnTypeDirection> HomogeneousReturnType;
HomogeneousReturnType homogeneous() const;

View File

@@ -83,8 +83,25 @@ template<typename T> struct GenericNumTraits
// make sure to override this for floating-point types
return Real(0);
}
static inline T highest() { return (std::numeric_limits<T>::max)(); }
static inline T lowest() { return IsInteger ? (std::numeric_limits<T>::min)() : (-(std::numeric_limits<T>::max)()); }
EIGEN_DEVICE_FUNC
static inline T highest() {
#if defined(__CUDA_ARCH__)
return (internal::device::numeric_limits<T>::max)();
#else
return (std::numeric_limits<T>::max)();
#endif
}
EIGEN_DEVICE_FUNC
static inline T lowest() {
#if defined(__CUDA_ARCH__)
return IsInteger ? (internal::device::numeric_limits<T>::min)() : (-(internal::device::numeric_limits<T>::max)());
#else
return IsInteger ? (std::numeric_limits<T>::min)() : (-(std::numeric_limits<T>::max)());
#endif
}
};
template<typename T> struct NumTraits : GenericNumTraits<T>

View File

@@ -42,10 +42,6 @@ namespace Eigen {
namespace internal {
template<typename PermutationType, typename MatrixType, int Side, bool Transposed=false>
struct permut_matrix_product_retval;
template<typename PermutationType, typename MatrixType, int Side, bool Transposed=false>
struct permut_sparsematrix_product_retval;
enum PermPermProduct_t {PermPermProduct};
} // end namespace internal
@@ -353,7 +349,7 @@ class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompile
* array's size.
*/
template<typename Other>
explicit inline PermutationMatrix(const MatrixBase<Other>& a_indices) : m_indices(a_indices)
explicit inline PermutationMatrix(const MatrixBase<Other>& indices) : m_indices(indices)
{}
/** Convert the Transpositions \a tr to a permutation matrix */
@@ -401,12 +397,12 @@ class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompile
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename Other>
PermutationMatrix(const Transpose<PermutationBase<Other> >& other)
: m_indices(other.nestedPermutation().size())
: m_indices(other.nestedExpression().size())
{
eigen_internal_assert(m_indices.size() <= NumTraits<StorageIndex>::highest());
StorageIndex end = StorageIndex(m_indices.size());
for (StorageIndex i=0; i<end;++i)
m_indices.coeffRef(other.nestedPermutation().indices().coeff(i)) = i;
m_indices.coeffRef(other.nestedExpression().indices().coeff(i)) = i;
}
template<typename Lhs,typename Rhs>
PermutationMatrix(internal::PermPermProduct_t, const Lhs& lhs, const Rhs& rhs)
@@ -527,8 +523,8 @@ class PermutationWrapper : public PermutationBase<PermutationWrapper<_IndicesTyp
typedef typename Traits::IndicesType IndicesType;
#endif
inline PermutationWrapper(const IndicesType& a_indices)
: m_indices(a_indices)
inline PermutationWrapper(const IndicesType& indices)
: m_indices(indices)
{}
/** const version of indices(). */
@@ -541,18 +537,15 @@ class PermutationWrapper : public PermutationBase<PermutationWrapper<_IndicesTyp
};
// TODO: Do we need to define these operator* functions? Would it be better to have them inherited
// from MatrixBase?
/** \returns the matrix with the permutation applied to the columns.
*/
template<typename MatrixDerived, typename PermutationDerived>
EIGEN_DEVICE_FUNC
const Product<MatrixDerived, PermutationDerived, DefaultProduct>
const Product<MatrixDerived, PermutationDerived, AliasFreeProduct>
operator*(const MatrixBase<MatrixDerived> &matrix,
const PermutationBase<PermutationDerived>& permutation)
{
return Product<MatrixDerived, PermutationDerived, DefaultProduct>
return Product<MatrixDerived, PermutationDerived, AliasFreeProduct>
(matrix.derived(), permutation.derived());
}
@@ -560,89 +553,16 @@ operator*(const MatrixBase<MatrixDerived> &matrix,
*/
template<typename PermutationDerived, typename MatrixDerived>
EIGEN_DEVICE_FUNC
const Product<PermutationDerived, MatrixDerived, DefaultProduct>
const Product<PermutationDerived, MatrixDerived, AliasFreeProduct>
operator*(const PermutationBase<PermutationDerived> &permutation,
const MatrixBase<MatrixDerived>& matrix)
{
return Product<PermutationDerived, MatrixDerived, DefaultProduct>
return Product<PermutationDerived, MatrixDerived, AliasFreeProduct>
(permutation.derived(), matrix.derived());
}
namespace internal {
template<typename PermutationType, typename MatrixType, int Side, bool Transposed>
struct traits<permut_matrix_product_retval<PermutationType, MatrixType, Side, Transposed> >
: traits<typename MatrixType::PlainObject>
{
typedef typename MatrixType::PlainObject ReturnType;
};
template<typename PermutationType, typename MatrixType, int Side, bool Transposed>
struct permut_matrix_product_retval
: public ReturnByValue<permut_matrix_product_retval<PermutationType, MatrixType, Side, Transposed> >
{
typedef typename remove_all<typename MatrixType::Nested>::type MatrixTypeNestedCleaned;
typedef typename MatrixType::StorageIndex StorageIndex;
permut_matrix_product_retval(const PermutationType& perm, const MatrixType& matrix)
: m_permutation(perm), m_matrix(matrix)
{}
inline Index rows() const { return m_matrix.rows(); }
inline Index cols() const { return m_matrix.cols(); }
template<typename Dest> inline void evalTo(Dest& dst) const
{
const Index n = Side==OnTheLeft ? rows() : cols();
// FIXME we need an is_same for expression that is not sensitive to constness. For instance
// is_same_xpr<Block<const Matrix>, Block<Matrix> >::value should be true.
if(is_same<MatrixTypeNestedCleaned,Dest>::value && extract_data(dst) == extract_data(m_matrix))
{
// apply the permutation inplace
Matrix<bool,PermutationType::RowsAtCompileTime,1,0,PermutationType::MaxRowsAtCompileTime> mask(m_permutation.size());
mask.fill(false);
Index r = 0;
while(r < m_permutation.size())
{
// search for the next seed
while(r<m_permutation.size() && mask[r]) r++;
if(r>=m_permutation.size())
break;
// we got one, let's follow it until we are back to the seed
Index k0 = r++;
Index kPrev = k0;
mask.coeffRef(k0) = true;
for(Index k=m_permutation.indices().coeff(k0); k!=k0; k=m_permutation.indices().coeff(k))
{
Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>(dst, k)
.swap(Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>
(dst,((Side==OnTheLeft) ^ Transposed) ? k0 : kPrev));
mask.coeffRef(k) = true;
kPrev = k;
}
}
}
else
{
for(Index i = 0; i < n; ++i)
{
Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>
(dst, ((Side==OnTheLeft) ^ Transposed) ? m_permutation.indices().coeff(i) : i)
=
Block<const MatrixTypeNestedCleaned,Side==OnTheLeft ? 1 : MatrixType::RowsAtCompileTime,Side==OnTheRight ? 1 : MatrixType::ColsAtCompileTime>
(m_matrix, ((Side==OnTheRight) ^ Transposed) ? m_permutation.indices().coeff(i) : i);
}
}
}
protected:
const PermutationType& m_permutation;
typename MatrixType::Nested m_matrix;
};
/* Template partial specialization for transposed/inverse permutations */
template<typename Derived>
@@ -700,22 +620,22 @@ class Transpose<PermutationBase<Derived> >
/** \returns the matrix with the inverse permutation applied to the columns.
*/
template<typename OtherDerived> friend
const Product<OtherDerived, Transpose, DefaultProduct>
const Product<OtherDerived, Transpose, AliasFreeProduct>
operator*(const MatrixBase<OtherDerived>& matrix, const Transpose& trPerm)
{
return Product<OtherDerived, Transpose, DefaultProduct>(matrix.derived(), trPerm.derived());
return Product<OtherDerived, Transpose, AliasFreeProduct>(matrix.derived(), trPerm.derived());
}
/** \returns the matrix with the inverse permutation applied to the rows.
*/
template<typename OtherDerived>
const Product<Transpose, OtherDerived, DefaultProduct>
const Product<Transpose, OtherDerived, AliasFreeProduct>
operator*(const MatrixBase<OtherDerived>& matrix) const
{
return Product<Transpose, OtherDerived, DefaultProduct>(*this, matrix.derived());
return Product<Transpose, OtherDerived, AliasFreeProduct>(*this, matrix.derived());
}
const PermutationType& nestedPermutation() const { return m_permutation; }
const PermutationType& nestedExpression() const { return m_permutation; }
protected:
const PermutationType& m_permutation;
@@ -728,32 +648,6 @@ const PermutationWrapper<const Derived> MatrixBase<Derived>::asPermutation() con
}
namespace internal {
// TODO currently a permutation matrix expression has the form PermutationMatrix or PermutationWrapper
// or their transpose; in the future shape should be defined by the expression traits
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType>
struct evaluator_traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType> >
{
typedef typename storage_kind_to_evaluator_kind<Dense>::Kind Kind;
typedef PermutationShape Shape;
static const int AssumeAliasing = 0;
};
template<typename IndicesType>
struct evaluator_traits<PermutationWrapper<IndicesType> >
{
typedef typename storage_kind_to_evaluator_kind<Dense>::Kind Kind;
typedef PermutationShape Shape;
static const int AssumeAliasing = 0;
};
template<typename Derived>
struct evaluator_traits<Transpose<PermutationBase<Derived> > >
{
typedef typename storage_kind_to_evaluator_kind<Dense>::Kind Kind;
typedef PermutationShape Shape;
static const int AssumeAliasing = 0;
};
template<> struct AssignmentKind<DenseShape,PermutationShape> { typedef EigenBase2EigenBase Kind; };

View File

@@ -69,8 +69,9 @@ template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers> struct m
#ifdef EIGEN_PARSED_BY_DOXYGEN
namespace internal {
// this is a warkaround to doxygen not being able to understand the inheritence logic
// this is a workaround to doxygen not being able to understand the inheritance logic
// when it is hidden by the dense_xpr_base helper struct.
/** This class is just a workaround for Doxygen and it does not not actually exist. */
template<typename Derived> struct dense_xpr_base_dispatcher_for_doxygen;// : public MatrixBase<Derived> {};
/** This class is just a workaround for Doxygen and it does not not actually exist. */
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
@@ -96,6 +97,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
typedef typename internal::traits<Derived>::StorageKind StorageKind;
typedef typename internal::traits<Derived>::Scalar Scalar;
typedef typename internal::packet_traits<Scalar>::type PacketScalar;
typedef typename NumTraits<Scalar>::Real RealScalar;
typedef Derived DenseType;
@@ -114,20 +116,23 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
typedef Eigen::Map<Derived, Unaligned> MapType;
friend class Eigen::Map<const Derived, Unaligned>;
typedef const Eigen::Map<const Derived, Unaligned> ConstMapType;
friend class Eigen::Map<Derived, Aligned>;
typedef Eigen::Map<Derived, Aligned> AlignedMapType;
friend class Eigen::Map<const Derived, Aligned>;
typedef const Eigen::Map<const Derived, Aligned> ConstAlignedMapType;
#if EIGEN_MAX_ALIGN_BYTES>0
// for EIGEN_MAX_ALIGN_BYTES==0, AlignedMax==Unaligned, and many compilers generate warnings for friend-ing a class twice.
friend class Eigen::Map<Derived, AlignedMax>;
friend class Eigen::Map<const Derived, AlignedMax>;
#endif
typedef Eigen::Map<Derived, AlignedMax> AlignedMapType;
typedef const Eigen::Map<const Derived, AlignedMax> ConstAlignedMapType;
template<typename StrideType> struct StridedMapType { typedef Eigen::Map<Derived, Unaligned, StrideType> type; };
template<typename StrideType> struct StridedConstMapType { typedef Eigen::Map<const Derived, Unaligned, StrideType> type; };
template<typename StrideType> struct StridedAlignedMapType { typedef Eigen::Map<Derived, Aligned, StrideType> type; };
template<typename StrideType> struct StridedConstAlignedMapType { typedef Eigen::Map<const Derived, Aligned, StrideType> type; };
template<typename StrideType> struct StridedAlignedMapType { typedef Eigen::Map<Derived, AlignedMax, StrideType> type; };
template<typename StrideType> struct StridedConstAlignedMapType { typedef Eigen::Map<const Derived, AlignedMax, StrideType> type; };
protected:
DenseStorage<Scalar, Base::MaxSizeAtCompileTime, Base::RowsAtCompileTime, Base::ColsAtCompileTime, Options> m_storage;
public:
enum { NeedsToAlign = SizeAtCompileTime != Dynamic && (internal::traits<Derived>::EvaluatorFlags & AlignedBit) != 0 };
enum { NeedsToAlign = (SizeAtCompileTime != Dynamic) && (internal::traits<Derived>::Alignment>0) };
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
EIGEN_DEVICE_FUNC
@@ -244,22 +249,22 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
* \sa resize(Index) for vectors, resize(NoChange_t, Index), resize(Index, NoChange_t)
*/
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE void resize(Index nbRows, Index nbCols)
EIGEN_STRONG_INLINE void resize(Index rows, Index cols)
{
eigen_assert( EIGEN_IMPLIES(RowsAtCompileTime!=Dynamic,nbRows==RowsAtCompileTime)
&& EIGEN_IMPLIES(ColsAtCompileTime!=Dynamic,nbCols==ColsAtCompileTime)
&& EIGEN_IMPLIES(RowsAtCompileTime==Dynamic && MaxRowsAtCompileTime!=Dynamic,nbRows<=MaxRowsAtCompileTime)
&& EIGEN_IMPLIES(ColsAtCompileTime==Dynamic && MaxColsAtCompileTime!=Dynamic,nbCols<=MaxColsAtCompileTime)
&& nbRows>=0 && nbCols>=0 && "Invalid sizes when resizing a matrix or array.");
internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(nbRows, nbCols);
eigen_assert( EIGEN_IMPLIES(RowsAtCompileTime!=Dynamic,rows==RowsAtCompileTime)
&& EIGEN_IMPLIES(ColsAtCompileTime!=Dynamic,cols==ColsAtCompileTime)
&& EIGEN_IMPLIES(RowsAtCompileTime==Dynamic && MaxRowsAtCompileTime!=Dynamic,rows<=MaxRowsAtCompileTime)
&& EIGEN_IMPLIES(ColsAtCompileTime==Dynamic && MaxColsAtCompileTime!=Dynamic,cols<=MaxColsAtCompileTime)
&& rows>=0 && cols>=0 && "Invalid sizes when resizing a matrix or array.");
internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(rows, cols);
#ifdef EIGEN_INITIALIZE_COEFFS
Index size = nbRows*nbCols;
Index size = rows*cols;
bool size_changed = size != this->size();
m_storage.resize(size, nbRows, nbCols);
m_storage.resize(size, rows, cols);
if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
#else
internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(nbRows, nbCols);
m_storage.resize(nbRows*nbCols, nbRows, nbCols);
internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(rows, cols);
m_storage.resize(rows*cols, rows, cols);
#endif
}
@@ -300,9 +305,9 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
* \sa resize(Index,Index)
*/
EIGEN_DEVICE_FUNC
inline void resize(NoChange_t, Index nbCols)
inline void resize(NoChange_t, Index cols)
{
resize(rows(), nbCols);
resize(rows(), cols);
}
/** Resizes the matrix, changing only the number of rows. For the parameter of type NoChange_t, just pass the special value \c NoChange
@@ -314,9 +319,9 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
* \sa resize(Index,Index)
*/
EIGEN_DEVICE_FUNC
inline void resize(Index nbRows, NoChange_t)
inline void resize(Index rows, NoChange_t)
{
resize(nbRows, cols());
resize(rows, cols());
}
/** Resizes \c *this to have the same dimensions as \a other.
@@ -356,9 +361,9 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
* appended to the matrix they will be uninitialized.
*/
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE void conservativeResize(Index nbRows, Index nbCols)
EIGEN_STRONG_INLINE void conservativeResize(Index rows, Index cols)
{
internal::conservative_resize_like_impl<Derived>::run(*this, nbRows, nbCols);
internal::conservative_resize_like_impl<Derived>::run(*this, rows, cols);
}
/** Resizes the matrix to \a rows x \a cols while leaving old values untouched.
@@ -369,10 +374,10 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
* In case the matrix is growing, new rows will be uninitialized.
*/
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE void conservativeResize(Index nbRows, NoChange_t)
EIGEN_STRONG_INLINE void conservativeResize(Index rows, NoChange_t)
{
// Note: see the comment in conservativeResize(Index,Index)
conservativeResize(nbRows, cols());
conservativeResize(rows, cols());
}
/** Resizes the matrix to \a rows x \a cols while leaving old values untouched.
@@ -383,10 +388,10 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
* In case the matrix is growing, new columns will be uninitialized.
*/
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE void conservativeResize(NoChange_t, Index nbCols)
EIGEN_STRONG_INLINE void conservativeResize(NoChange_t, Index cols)
{
// Note: see the comment in conservativeResize(Index,Index)
conservativeResize(rows(), nbCols);
conservativeResize(rows(), cols);
}
/** Resizes the vector to \a size while retaining old values.
@@ -479,9 +484,13 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
}
#endif
/** Copy constructor */
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE PlainObjectBase(Index a_size, Index nbRows, Index nbCols)
: m_storage(a_size, nbRows, nbCols)
EIGEN_STRONG_INLINE PlainObjectBase(const PlainObjectBase& other)
: Base(), m_storage(other.m_storage) { }
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE PlainObjectBase(Index size, Index rows, Index cols)
: m_storage(size, rows, cols)
{
// _check_template_params();
// EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
@@ -498,15 +507,36 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
return this->derived();
}
/** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */
/** \sa PlainObjectBase::operator=(const EigenBase<OtherDerived>&) */
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE PlainObjectBase(const DenseBase<OtherDerived> &other)
: m_storage()
{
_check_template_params();
resizeLike(other);
_set_noalias(other);
}
/** \sa PlainObjectBase::operator=(const EigenBase<OtherDerived>&) */
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE PlainObjectBase(const EigenBase<OtherDerived> &other)
: m_storage(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols())
: m_storage()
{
_check_template_params();
internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(other.derived().rows(), other.derived().cols());
Base::operator=(other.derived());
resizeLike(other);
*this = other.derived();
}
/** \brief Copy constructor with in-place evaluation */
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE PlainObjectBase(const ReturnByValue<OtherDerived>& other)
{
_check_template_params();
// FIXME this does not automatically transpose vectors if necessary
resize(other.rows(), other.cols());
other.evalTo(this->derived());
}
/** \name Map
@@ -668,12 +698,12 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
template<typename T0, typename T1>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE void _init2(Index nbRows, Index nbCols, typename internal::enable_if<Base::SizeAtCompileTime!=2,T0>::type* = 0)
EIGEN_STRONG_INLINE void _init2(Index rows, Index cols, typename internal::enable_if<Base::SizeAtCompileTime!=2,T0>::type* = 0)
{
EIGEN_STATIC_ASSERT(bool(NumTraits<T0>::IsInteger) &&
bool(NumTraits<T1>::IsInteger),
FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED)
resize(nbRows,nbCols);
resize(rows,cols);
}
template<typename T0, typename T1>

View File

@@ -25,7 +25,7 @@ template<typename Lhs, typename Rhs, int Option, typename StorageKind> class Pro
* This class represents an expression of the product of two arbitrary matrices.
*
* The other template parameters are:
* \tparam Option can be DefaultProduct or LazyProduct
* \tparam Option can be DefaultProduct, AliasFreeProduct, or LazyProduct
*
*/
@@ -53,6 +53,18 @@ template<typename Lhs, typename Rhs, typename LhsShape>
typedef typename Lhs::Scalar Scalar;
};
template<typename Lhs, typename Rhs, typename RhsShape>
struct product_result_scalar<Lhs, Rhs, TranspositionsShape, RhsShape>
{
typedef typename Rhs::Scalar Scalar;
};
template<typename Lhs, typename Rhs, typename LhsShape>
struct product_result_scalar<Lhs, Rhs, LhsShape, TranspositionsShape>
{
typedef typename Lhs::Scalar Scalar;
};
template<typename Lhs, typename Rhs, int Option>
struct traits<Product<Lhs, Rhs, Option> >
{
@@ -80,10 +92,11 @@ struct traits<Product<Lhs, Rhs, Option> >
InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsTraits::ColsAtCompileTime, RhsTraits::RowsAtCompileTime),
// The storage order is somewhat arbitrary here. The correct one will be determined through the evaluator.
Flags = ( (MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1)
|| ((LhsTraits::Flags&NoPreferredStorageOrderBit) && (RhsTraits::Flags&RowMajorBit))
|| ((RhsTraits::Flags&NoPreferredStorageOrderBit) && (LhsTraits::Flags&RowMajorBit)) )
? RowMajorBit : (MaxColsAtCompileTime==1 ? 0 : NoPreferredStorageOrderBit)
Flags = (MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1) ? RowMajorBit
: (MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1) ? 0
: ( ((LhsTraits::Flags&NoPreferredStorageOrderBit) && (RhsTraits::Flags&RowMajorBit))
|| ((RhsTraits::Flags&NoPreferredStorageOrderBit) && (LhsTraits::Flags&RowMajorBit)) ) ? RowMajorBit
: NoPreferredStorageOrderBit
};
};
@@ -108,8 +121,8 @@ class Product : public ProductImpl<_Lhs,_Rhs,Option,
internal::product_type<Lhs,Rhs>::ret>::ret>::Base Base;
EIGEN_GENERIC_PUBLIC_INTERFACE(Product)
typedef typename internal::nested<Lhs>::type LhsNested;
typedef typename internal::nested<Rhs>::type RhsNested;
typedef typename internal::ref_selector<Lhs>::type LhsNested;
typedef typename internal::ref_selector<Rhs>::type RhsNested;
typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
@@ -152,7 +165,7 @@ public:
operator const Scalar() const
{
return typename internal::evaluator<ProductXpr>::type(derived()).coeff(0,0);
return internal::evaluator<ProductXpr>(derived()).coeff(0,0);
}
};
@@ -190,7 +203,7 @@ class ProductImpl<Lhs,Rhs,Option,Dense>
EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS);
eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) );
return typename internal::evaluator<Derived>::type(derived()).coeff(row,col);
return internal::evaluator<Derived>(derived()).coeff(row,col);
}
EIGEN_DEVICE_FUNC Scalar coeff(Index i) const
@@ -198,7 +211,7 @@ class ProductImpl<Lhs,Rhs,Option,Dense>
EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS);
eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) );
return typename internal::evaluator<Derived>::type(derived()).coeff(i);
return internal::evaluator<Derived>(derived()).coeff(i);
}

View File

@@ -1,27 +0,0 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_PRODUCTBASE_H
#define EIGEN_PRODUCTBASE_H
namespace Eigen {
/** \internal
* Overloaded to perform an efficient C = (A*B).lazy() */
template<typename Derived>
template<typename ProductDerived, typename Lhs, typename Rhs>
Derived& MatrixBase<Derived>::lazyAssign(const ProductBase<ProductDerived, Lhs,Rhs>& other)
{
other.derived().evalTo(derived());
return derived();
}
} // end namespace Eigen
#endif // EIGEN_PRODUCTBASE_H

374
Eigen/src/Core/ProductEvaluators.h Normal file → Executable file
View File

@@ -32,9 +32,6 @@ struct evaluator<Product<Lhs, Rhs, Options> >
typedef Product<Lhs, Rhs, Options> XprType;
typedef product_evaluator<XprType> Base;
typedef evaluator type;
typedef evaluator nestedType;
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {}
};
@@ -47,9 +44,6 @@ struct evaluator<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Produ
typedef CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Product<Lhs, Rhs, DefaultProduct> > XprType;
typedef evaluator<Product<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>,const Lhs>, Rhs, DefaultProduct> > Base;
typedef evaluator type;
typedef evaluator nestedType;
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
: Base(xpr.functor().m_other * xpr.nestedExpression().lhs() * xpr.nestedExpression().rhs())
{}
@@ -63,9 +57,6 @@ struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> >
typedef Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> XprType;
typedef evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> > Base;
typedef evaluator type;
typedef evaluator nestedType;
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
: Base(Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>(
Product<Lhs, Rhs, LazyProduct>(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()),
@@ -90,18 +81,25 @@ struct evaluator_traits<Product<Lhs, Rhs, DefaultProduct> >
enum { AssumeAliasing = 1 };
};
template<typename Lhs, typename Rhs>
struct evaluator_traits<Product<Lhs, Rhs, AliasFreeProduct> >
: evaluator_traits_base<Product<Lhs, Rhs, AliasFreeProduct> >
{
enum { AssumeAliasing = 0 };
};
// This is the default evaluator implementation for products:
// It creates a temporary and call generic_product_impl
template<typename Lhs, typename Rhs, int ProductTag, typename LhsShape, typename RhsShape>
struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, ProductTag, LhsShape, RhsShape, typename traits<Lhs>::Scalar, typename traits<Rhs>::Scalar>
: public evaluator<typename Product<Lhs, Rhs, DefaultProduct>::PlainObject>::type
template<typename Lhs, typename Rhs, int Options, int ProductTag, typename LhsShape, typename RhsShape>
struct product_evaluator<Product<Lhs, Rhs, Options>, ProductTag, LhsShape, RhsShape, typename traits<Lhs>::Scalar, typename traits<Rhs>::Scalar,
EnableIf<(Options==DefaultProduct || Options==AliasFreeProduct)> >
: public evaluator<typename Product<Lhs, Rhs, Options>::PlainObject>
{
typedef Product<Lhs, Rhs, DefaultProduct> XprType;
typedef Product<Lhs, Rhs, Options> XprType;
typedef typename XprType::PlainObject PlainObject;
typedef typename evaluator<PlainObject>::type Base;
typedef evaluator<PlainObject> Base;
enum {
Flags = Base::Flags | EvalBeforeNestingBit
// CoeffReadCost = 0 // FIXME why is it needed? (this was already the case before the evaluators, see traits<ProductBase>)
};
EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
@@ -109,7 +107,8 @@ struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, ProductTag, LhsShape
{
::new (static_cast<Base*>(this)) Base(m_result);
// FIXME shall we handle nested_eval here?
// FIXME shall we handle nested_eval here?,
// if so, then we must take care at removing the call to nested_eval in the specializations (e.g., in permutation_matrix_product, transposition_matrix_product, etc.)
// typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
// typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
// typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
@@ -128,10 +127,11 @@ protected:
};
// Dense = Product
template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar>
struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::assign_op<Scalar>, Dense2Dense, Scalar>
template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scalar>, Dense2Dense,
typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type>
{
typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
typedef Product<Lhs,Rhs,Options> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
{
// FIXME shall we handle nested_eval here?
@@ -140,10 +140,11 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::assign_
};
// Dense += Product
template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar>
struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::add_assign_op<Scalar>, Dense2Dense, Scalar>
template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<Scalar>, Dense2Dense,
typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type>
{
typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
typedef Product<Lhs,Rhs,Options> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar> &)
{
// FIXME shall we handle nested_eval here?
@@ -152,10 +153,11 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::add_ass
};
// Dense -= Product
template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar>
struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::sub_assign_op<Scalar>, Dense2Dense, Scalar>
template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<Scalar>, Dense2Dense,
typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type>
{
typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
typedef Product<Lhs,Rhs,Options> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar> &)
{
// FIXME shall we handle nested_eval here?
@@ -210,7 +212,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct>
template<typename Dst, typename Lhs, typename Rhs, typename Func>
EIGEN_DONT_INLINE void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&)
{
typename evaluator<Rhs>::type rhsEval(rhs);
evaluator<Rhs> rhsEval(rhs);
// FIXME make sure lhs is sequentially stored
// FIXME not very good if rhs is real and lhs complex while alpha is real too
// FIXME we should probably build an evaluator for dst
@@ -223,7 +225,7 @@ EIGEN_DONT_INLINE void outer_product_selector_run(Dst& dst, const Lhs &lhs, cons
template<typename Dst, typename Lhs, typename Rhs, typename Func>
EIGEN_DONT_INLINE void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&)
{
typename evaluator<Lhs>::type lhsEval(lhs);
evaluator<Lhs> lhsEval(lhs);
// FIXME make sure rhs is sequentially stored
// FIXME not very good if lhs is real and rhs complex while alpha is real too
// FIXME we should probably build an evaluator for dst
@@ -395,8 +397,8 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
typedef typename evaluator<LhsNestedCleaned>::type LhsEtorType;
typedef typename evaluator<RhsNestedCleaned>::type RhsEtorType;
typedef evaluator<LhsNestedCleaned> LhsEtorType;
typedef evaluator<RhsNestedCleaned> RhsEtorType;
enum {
RowsAtCompileTime = LhsNestedCleaned::RowsAtCompileTime,
@@ -409,7 +411,8 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
LhsCoeffReadCost = LhsEtorType::CoeffReadCost,
RhsCoeffReadCost = RhsEtorType::CoeffReadCost,
CoeffReadCost = (InnerSize == Dynamic || LhsCoeffReadCost==Dynamic || RhsCoeffReadCost==Dynamic || NumTraits<Scalar>::AddCost==Dynamic || NumTraits<Scalar>::MulCost==Dynamic) ? Dynamic
CoeffReadCost = InnerSize==0 ? NumTraits<Scalar>::ReadCost
: (InnerSize == Dynamic || LhsCoeffReadCost==Dynamic || RhsCoeffReadCost==Dynamic || NumTraits<Scalar>::AddCost==Dynamic || NumTraits<Scalar>::MulCost==Dynamic) ? Dynamic
: InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
+ (InnerSize - 1) * NumTraits<Scalar>::AddCost,
@@ -418,24 +421,22 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
LhsFlags = LhsEtorType::Flags,
RhsFlags = RhsEtorType::Flags,
LhsAlignment = LhsEtorType::Alignment,
RhsAlignment = RhsEtorType::Alignment,
LhsIsAligned = int(LhsAlignment) >= int(unpacket_traits<PacketScalar>::alignment),
RhsIsAligned = int(RhsAlignment) >= int(unpacket_traits<PacketScalar>::alignment),
LhsRowMajor = LhsFlags & RowMajorBit,
RhsRowMajor = RhsFlags & RowMajorBit,
SameType = is_same<typename LhsNestedCleaned::Scalar,typename RhsNestedCleaned::Scalar>::value,
CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit)
&& (ColsAtCompileTime == Dynamic
|| ( (ColsAtCompileTime % packet_traits<Scalar>::size) == 0
&& (RhsFlags&AlignedBit)
)
),
&& (ColsAtCompileTime == Dynamic || ( (ColsAtCompileTime % PacketSize) == 0 && RhsIsAligned ) ),
CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit)
&& (RowsAtCompileTime == Dynamic
|| ( (RowsAtCompileTime % packet_traits<Scalar>::size) == 0
&& (LhsFlags&AlignedBit)
)
),
&& (RowsAtCompileTime == Dynamic || ( (RowsAtCompileTime % PacketSize) == 0 && LhsIsAligned ) ),
EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
: (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
@@ -443,11 +444,13 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
| (EvalToRowMajor ? RowMajorBit : 0)
| (CanVectorizeLhs ? (LhsFlags & AlignedBit) : 0)
| (CanVectorizeRhs ? (RhsFlags & AlignedBit) : 0)
// TODO enable vectorization for mixed types
| (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0),
Alignment = CanVectorizeLhs ? LhsAlignment
: CanVectorizeRhs ? RhsAlignment
: 0,
/* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
* of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner
* loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
@@ -457,7 +460,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
&& LhsRowMajor
&& (!RhsRowMajor)
&& (LhsFlags & RhsFlags & ActualPacketAccessBit)
&& (LhsFlags & RhsFlags & AlignedBit)
&& (LhsIsAligned && RhsIsAligned)
&& (InnerSize % packet_traits<Scalar>::size == 0)
};
@@ -479,13 +482,13 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum();
}
template<int LoadMode>
const PacketReturnType packet(Index row, Index col) const
template<int LoadMode, typename PacketType>
const PacketType packet(Index row, Index col) const
{
PacketScalar res;
PacketType res;
typedef etor_product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor,
Unroll ? InnerSize-1 : Dynamic,
LhsEtorType, RhsEtorType, PacketScalar, LoadMode> PacketImpl;
Unroll ? InnerSize : Dynamic,
LhsEtorType, RhsEtorType, PacketType, LoadMode> PacketImpl;
PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res);
return res;
@@ -527,7 +530,7 @@ struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, Load
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
{
etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex-1)), rhs.template packet<LoadMode,Packet>(UnrollingIndex-1, col), res);
}
};
@@ -537,25 +540,43 @@ struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, Load
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
{
etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
res = pmadd(lhs.template packet<LoadMode,Packet>(row, UnrollingIndex-1), pset1<Packet>(rhs.coeff(UnrollingIndex-1, col)), res);
}
};
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode>
{
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
{
res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode,Packet>(0, col));
}
};
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode>
{
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
{
res = pmul(lhs.template packet<LoadMode,Packet>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
}
};
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
{
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
{
res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
res = pset1<Packet>(0);
}
};
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
{
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
{
res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
res = pset1<Packet>(0);
}
};
@@ -564,10 +585,9 @@ struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
{
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
{
eigen_assert(innerDim>0 && "you are using a non initialized matrix");
res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
for(Index i = 1; i < innerDim; ++i)
res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
res = pset1<Packet>(0);
for(Index i = 0; i < innerDim; ++i)
res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode,Packet>(i, col), res);
}
};
@@ -576,10 +596,9 @@ struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
{
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
{
eigen_assert(innerDim>0 && "you are using a non initialized matrix");
res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
for(Index i = 1; i < innerDim; ++i)
res = pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
res = pset1<Packet>(0);
for(Index i = 0; i < innerDim; ++i)
res = pmadd(lhs.template packet<LoadMode,Packet>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
}
};
@@ -663,7 +682,6 @@ struct diagonal_product_evaluator_base
: evaluator_base<Derived>
{
typedef typename scalar_product_traits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar;
typedef typename internal::packet_traits<Scalar>::type PacketScalar;
public:
enum {
CoeffReadCost = NumTraits<Scalar>::MulCost + evaluator<MatrixType>::CoeffReadCost + evaluator<DiagonalType>::CoeffReadCost,
@@ -678,8 +696,8 @@ public:
//_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagFlags)&PacketAccessBit))),
_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))),
_LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0,
Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0) | AlignedBit
//(int(MatrixFlags)&int(DiagFlags)&AlignedBit),
Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0),
Alignment = evaluator<MatrixType>::Alignment
};
diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag)
@@ -693,26 +711,26 @@ public:
}
protected:
template<int LoadMode>
EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, internal::true_type) const
template<int LoadMode,typename PacketType>
EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::true_type) const
{
return internal::pmul(m_matImpl.template packet<LoadMode>(row, col),
internal::pset1<PacketScalar>(m_diagImpl.coeff(id)));
return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col),
internal::pset1<PacketType>(m_diagImpl.coeff(id)));
}
template<int LoadMode>
EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, internal::false_type) const
template<int LoadMode,typename PacketType>
EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::false_type) const
{
enum {
InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime,
DiagonalPacketLoadMode = (LoadMode == Aligned && (((InnerSize%16) == 0) || (int(DiagFlags)&AlignedBit)==AlignedBit) ? Aligned : Unaligned)
DiagonalPacketLoadMode = EIGEN_PLAIN_ENUM_MIN(LoadMode,((InnerSize%16) == 0) ? int(Aligned16) : int(evaluator<DiagonalType>::Alignment)) // FIXME hardcoded 16!!
};
return internal::pmul(m_matImpl.template packet<LoadMode>(row, col),
m_diagImpl.template packet<DiagonalPacketLoadMode>(id));
return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col),
m_diagImpl.template packet<DiagonalPacketLoadMode,PacketType>(id));
}
typename evaluator<DiagonalType>::nestedType m_diagImpl;
typename evaluator<MatrixType>::nestedType m_matImpl;
evaluator<DiagonalType> m_diagImpl;
evaluator<MatrixType> m_matImpl;
};
// diagonal * dense
@@ -724,9 +742,7 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalSha
using Base::m_diagImpl;
using Base::m_matImpl;
using Base::coeff;
using Base::packet_impl;
typedef typename Base::Scalar Scalar;
typedef typename Base::PacketScalar PacketScalar;
typedef Product<Lhs, Rhs, ProductKind> XprType;
typedef typename XprType::PlainObject PlainObject;
@@ -746,18 +762,19 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalSha
}
#ifndef __CUDACC__
template<int LoadMode>
EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const
template<int LoadMode,typename PacketType>
EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
{
// NVCC complains about template keyword, so we disable this function in CUDA mode
return this->template packet_impl<LoadMode>(row,col, row,
// FIXME: NVCC used to complain about the template keyword, but we have to check whether this is still the case.
// See also similar calls below.
return this->template packet_impl<LoadMode,PacketType>(row,col, row,
typename internal::conditional<int(StorageOrder)==RowMajor, internal::true_type, internal::false_type>::type());
}
template<int LoadMode>
EIGEN_STRONG_INLINE PacketScalar packet(Index idx) const
template<int LoadMode,typename PacketType>
EIGEN_STRONG_INLINE PacketType packet(Index idx) const
{
return packet<LoadMode>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx);
return packet<LoadMode,PacketType>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx);
}
#endif
};
@@ -771,9 +788,7 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape,
using Base::m_diagImpl;
using Base::m_matImpl;
using Base::coeff;
using Base::packet_impl;
typedef typename Base::Scalar Scalar;
typedef typename Base::PacketScalar PacketScalar;
typedef Product<Lhs, Rhs, ProductKind> XprType;
typedef typename XprType::PlainObject PlainObject;
@@ -791,17 +806,17 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape,
}
#ifndef __CUDACC__
template<int LoadMode>
EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const
template<int LoadMode,typename PacketType>
EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
{
return this->template packet_impl<LoadMode>(row,col, col,
return this->template packet_impl<LoadMode,PacketType>(row,col, col,
typename internal::conditional<int(StorageOrder)==ColMajor, internal::true_type, internal::false_type>::type());
}
template<int LoadMode>
EIGEN_STRONG_INLINE PacketScalar packet(Index idx) const
template<int LoadMode,typename PacketType>
EIGEN_STRONG_INLINE PacketType packet(Index idx) const
{
return packet<LoadMode>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx);
return packet<LoadMode,PacketType>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx);
}
#endif
};
@@ -809,48 +824,187 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape,
/***************************************************************************
* Products with permutation matrices
***************************************************************************/
template<typename Lhs, typename Rhs, int ProductTag>
struct generic_product_impl<Lhs, Rhs, PermutationShape, DenseShape, ProductTag>
/** \internal
* \class permutation_matrix_product
* Internal helper class implementing the product between a permutation matrix and a matrix.
* This class is specialized for DenseShape below and for SparseShape in SparseCore/SparsePermutation.h
*/
template<typename ExpressionType, int Side, bool Transposed, typename ExpressionShape>
struct permutation_matrix_product;
template<typename ExpressionType, int Side, bool Transposed>
struct permutation_matrix_product<ExpressionType, Side, Transposed, DenseShape>
{
typedef typename nested_eval<ExpressionType, 1>::type MatrixType;
typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;
template<typename Dest, typename PermutationType>
static inline void run(Dest& dst, const PermutationType& perm, const ExpressionType& xpr)
{
MatrixType mat(xpr);
const Index n = Side==OnTheLeft ? mat.rows() : mat.cols();
// FIXME we need an is_same for expression that is not sensitive to constness. For instance
// is_same_xpr<Block<const Matrix>, Block<Matrix> >::value should be true.
//if(is_same<MatrixTypeCleaned,Dest>::value && extract_data(dst) == extract_data(mat))
if(is_same_dense(dst, mat))
{
// apply the permutation inplace
Matrix<bool,PermutationType::RowsAtCompileTime,1,0,PermutationType::MaxRowsAtCompileTime> mask(perm.size());
mask.fill(false);
Index r = 0;
while(r < perm.size())
{
// search for the next seed
while(r<perm.size() && mask[r]) r++;
if(r>=perm.size())
break;
// we got one, let's follow it until we are back to the seed
Index k0 = r++;
Index kPrev = k0;
mask.coeffRef(k0) = true;
for(Index k=perm.indices().coeff(k0); k!=k0; k=perm.indices().coeff(k))
{
Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>(dst, k)
.swap(Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>
(dst,((Side==OnTheLeft) ^ Transposed) ? k0 : kPrev));
mask.coeffRef(k) = true;
kPrev = k;
}
}
}
else
{
for(Index i = 0; i < n; ++i)
{
Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>
(dst, ((Side==OnTheLeft) ^ Transposed) ? perm.indices().coeff(i) : i)
=
Block<const MatrixTypeCleaned,Side==OnTheLeft ? 1 : MatrixTypeCleaned::RowsAtCompileTime,Side==OnTheRight ? 1 : MatrixTypeCleaned::ColsAtCompileTime>
(mat, ((Side==OnTheRight) ^ Transposed) ? perm.indices().coeff(i) : i);
}
}
}
};
template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
struct generic_product_impl<Lhs, Rhs, PermutationShape, MatrixShape, ProductTag>
{
template<typename Dest>
static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
{
permut_matrix_product_retval<Lhs, Rhs, OnTheLeft, false> pmpr(lhs, rhs);
pmpr.evalTo(dst);
permutation_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);
}
};
template<typename Lhs, typename Rhs, int ProductTag>
struct generic_product_impl<Lhs, Rhs, DenseShape, PermutationShape, ProductTag>
template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
struct generic_product_impl<Lhs, Rhs, MatrixShape, PermutationShape, ProductTag>
{
template<typename Dest>
static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
{
permut_matrix_product_retval<Rhs, Lhs, OnTheRight, false> pmpr(rhs, lhs);
pmpr.evalTo(dst);
permutation_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);
}
};
template<typename Lhs, typename Rhs, int ProductTag>
struct generic_product_impl<Transpose<Lhs>, Rhs, PermutationShape, DenseShape, ProductTag>
template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
struct generic_product_impl<Transpose<Lhs>, Rhs, PermutationShape, MatrixShape, ProductTag>
{
template<typename Dest>
static void evalTo(Dest& dst, const Transpose<Lhs>& lhs, const Rhs& rhs)
{
permut_matrix_product_retval<Lhs, Rhs, OnTheLeft, true> pmpr(lhs.nestedPermutation(), rhs);
pmpr.evalTo(dst);
permutation_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
}
};
template<typename Lhs, typename Rhs, int ProductTag>
struct generic_product_impl<Lhs, Transpose<Rhs>, DenseShape, PermutationShape, ProductTag>
template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
struct generic_product_impl<Lhs, Transpose<Rhs>, MatrixShape, PermutationShape, ProductTag>
{
template<typename Dest>
static void evalTo(Dest& dst, const Lhs& lhs, const Transpose<Rhs>& rhs)
{
permut_matrix_product_retval<Rhs, Lhs, OnTheRight, true> pmpr(rhs.nestedPermutation(), lhs);
pmpr.evalTo(dst);
permutation_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
}
};
/***************************************************************************
* Products with transpositions matrices
***************************************************************************/
// FIXME could we unify Transpositions and Permutation into a single "shape"??
/** \internal
* \class transposition_matrix_product
* Internal helper class implementing the product between a permutation matrix and a matrix.
*/
template<typename ExpressionType, int Side, bool Transposed, typename ExpressionShape>
struct transposition_matrix_product
{
typedef typename nested_eval<ExpressionType, 1>::type MatrixType;
typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;
template<typename Dest, typename TranspositionType>
static inline void run(Dest& dst, const TranspositionType& tr, const ExpressionType& xpr)
{
MatrixType mat(xpr);
typedef typename TranspositionType::StorageIndex StorageIndex;
const Index size = tr.size();
StorageIndex j = 0;
if(!(is_same<MatrixTypeCleaned,Dest>::value && extract_data(dst) == extract_data(mat)))
dst = mat;
for(Index k=(Transposed?size-1:0) ; Transposed?k>=0:k<size ; Transposed?--k:++k)
if(Index(j=tr.coeff(k))!=k)
{
if(Side==OnTheLeft) dst.row(k).swap(dst.row(j));
else if(Side==OnTheRight) dst.col(k).swap(dst.col(j));
}
}
};
template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
struct generic_product_impl<Lhs, Rhs, TranspositionsShape, MatrixShape, ProductTag>
{
template<typename Dest>
static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
{
transposition_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);
}
};
template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
struct generic_product_impl<Lhs, Rhs, MatrixShape, TranspositionsShape, ProductTag>
{
template<typename Dest>
static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
{
transposition_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);
}
};
template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
struct generic_product_impl<Transpose<Lhs>, Rhs, TranspositionsShape, MatrixShape, ProductTag>
{
template<typename Dest>
static void evalTo(Dest& dst, const Transpose<Lhs>& lhs, const Rhs& rhs)
{
transposition_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
}
};
template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
struct generic_product_impl<Lhs, Transpose<Rhs>, MatrixShape, TranspositionsShape, ProductTag>
{
template<typename Dest>
static void evalTo(Dest& dst, const Lhs& lhs, const Transpose<Rhs>& rhs)
{
transposition_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
}
};

View File

@@ -53,7 +53,7 @@ struct functor_traits<scalar_random_op<Scalar> >
* \sa DenseBase::setRandom(), DenseBase::Random(Index), DenseBase::Random()
*/
template<typename Derived>
inline const CwiseNullaryOp<internal::scalar_random_op<typename internal::traits<Derived>::Scalar>, Derived>
inline const typename DenseBase<Derived>::RandomReturnType
DenseBase<Derived>::Random(Index rows, Index cols)
{
return NullaryExpr(rows, cols, internal::scalar_random_op<Scalar>());
@@ -84,7 +84,7 @@ DenseBase<Derived>::Random(Index rows, Index cols)
* \sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random()
*/
template<typename Derived>
inline const CwiseNullaryOp<internal::scalar_random_op<typename internal::traits<Derived>::Scalar>, Derived>
inline const typename DenseBase<Derived>::RandomReturnType
DenseBase<Derived>::Random(Index size)
{
return NullaryExpr(size, internal::scalar_random_op<Scalar>());
@@ -110,7 +110,7 @@ DenseBase<Derived>::Random(Index size)
* \sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random(Index)
*/
template<typename Derived>
inline const CwiseNullaryOp<internal::scalar_random_op<typename internal::traits<Derived>::Scalar>, Derived>
inline const typename DenseBase<Derived>::RandomReturnType
DenseBase<Derived>::Random()
{
return NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_random_op<Scalar>());
@@ -162,8 +162,8 @@ PlainObjectBase<Derived>::setRandom(Index newSize)
*
* \not_reentrant
*
* \param nbRows the new number of rows
* \param nbCols the new number of columns
* \param rows the new number of rows
* \param cols the new number of columns
*
* Example: \include Matrix_setRandom_int_int.cpp
* Output: \verbinclude Matrix_setRandom_int_int.out
@@ -172,9 +172,9 @@ PlainObjectBase<Derived>::setRandom(Index newSize)
*/
template<typename Derived>
EIGEN_STRONG_INLINE Derived&
PlainObjectBase<Derived>::setRandom(Index nbRows, Index nbCols)
PlainObjectBase<Derived>::setRandom(Index rows, Index cols)
{
resize(nbRows, nbCols);
resize(rows, cols);
return setRandom();
}

View File

@@ -165,7 +165,7 @@ struct redux_vec_unroller<Func, Derived, Start, 1>
index = Start * packet_traits<typename Derived::Scalar>::size,
outer = index / int(Derived::InnerSizeAtCompileTime),
inner = index % int(Derived::InnerSizeAtCompileTime),
alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned
alignment = Derived::Alignment
};
typedef typename Derived::Scalar Scalar;
@@ -173,7 +173,7 @@ struct redux_vec_unroller<Func, Derived, Start, 1>
static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&)
{
return mat.template packetByOuterInner<alignment>(outer, inner);
return mat.template packetByOuterInner<alignment,PacketScalar>(outer, inner);
}
};
@@ -222,11 +222,12 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
const Index size = mat.size();
const Index packetSize = packet_traits<Scalar>::size;
const Index alignedStart = internal::first_aligned(mat);
const int packetAlignment = unpacket_traits<PacketScalar>::alignment;
enum {
alignment = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) || bool(Derived::Flags & AlignedBit)
? Aligned : Unaligned
alignment0 = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned),
alignment = EIGEN_PLAIN_ENUM_MAX(alignment0, Derived::Alignment)
};
const Index alignedStart = internal::first_default_aligned(mat.nestedExpression());
const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
const Index alignedEnd2 = alignedStart + alignedSize2;
@@ -234,19 +235,19 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
Scalar res;
if(alignedSize)
{
PacketScalar packet_res0 = mat.template packet<alignment>(alignedStart);
PacketScalar packet_res0 = mat.template packet<alignment,PacketScalar>(alignedStart);
if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop
{
PacketScalar packet_res1 = mat.template packet<alignment>(alignedStart+packetSize);
PacketScalar packet_res1 = mat.template packet<alignment,PacketScalar>(alignedStart+packetSize);
for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize)
{
packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(index));
packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment>(index+packetSize));
packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment,PacketScalar>(index));
packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment,PacketScalar>(index+packetSize));
}
packet_res0 = func.packetOp(packet_res0,packet_res1);
if(alignedEnd>alignedEnd2)
packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(alignedEnd2));
packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment,PacketScalar>(alignedEnd2));
}
res = func.predux(packet_res0);
@@ -272,7 +273,7 @@ template<typename Func, typename Derived>
struct redux_impl<Func, Derived, SliceVectorizedTraversal, NoUnrolling>
{
typedef typename Derived::Scalar Scalar;
typedef typename packet_traits<Scalar>::type PacketScalar;
typedef typename packet_traits<Scalar>::type PacketType;
EIGEN_DEVICE_FUNC static Scalar run(const Derived &mat, const Func& func)
{
@@ -286,10 +287,10 @@ struct redux_impl<Func, Derived, SliceVectorizedTraversal, NoUnrolling>
Scalar res;
if(packetedInnerSize)
{
PacketScalar packet_res = mat.template packet<Unaligned>(0,0);
PacketType packet_res = mat.template packet<Unaligned,PacketType>(0,0);
for(Index j=0; j<outerSize; ++j)
for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize))
packet_res = func.packetOp(packet_res, mat.template packetByOuterInner<Unaligned>(j,i));
packet_res = func.packetOp(packet_res, mat.template packetByOuterInner<Unaligned,PacketType>(j,i));
res = func.predux(packet_res);
for(Index j=0; j<outerSize; ++j)
@@ -352,7 +353,8 @@ public:
IsRowMajor = XprType::IsRowMajor,
SizeAtCompileTime = XprType::SizeAtCompileTime,
InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime,
CoeffReadCost = evaluator<XprType>::CoeffReadCost
CoeffReadCost = evaluator<XprType>::CoeffReadCost,
Alignment = evaluator<XprType>::Alignment
};
EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); }
@@ -369,24 +371,26 @@ public:
CoeffReturnType coeff(Index index) const
{ return m_evaluator.coeff(index); }
template<int LoadMode>
template<int LoadMode, typename PacketType>
PacketReturnType packet(Index row, Index col) const
{ return m_evaluator.template packet<LoadMode>(row, col); }
{ return m_evaluator.template packet<LoadMode,PacketType>(row, col); }
template<int LoadMode>
template<int LoadMode, typename PacketType>
PacketReturnType packet(Index index) const
{ return m_evaluator.template packet<LoadMode>(index); }
{ return m_evaluator.template packet<LoadMode,PacketType>(index); }
EIGEN_DEVICE_FUNC
CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
{ return m_evaluator.coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
template<int LoadMode>
template<int LoadMode, typename PacketType>
PacketReturnType packetByOuterInner(Index outer, Index inner) const
{ return m_evaluator.template packet<LoadMode>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
{ return m_evaluator.template packet<LoadMode,PacketType>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
const XprType & nestedExpression() const { return m_xpr; }
protected:
typename internal::evaluator<XprType>::nestedType m_evaluator;
internal::evaluator<XprType> m_evaluator;
const XprType &m_xpr;
};
@@ -406,7 +410,7 @@ protected:
*/
template<typename Derived>
template<typename Func>
EIGEN_STRONG_INLINE typename internal::result_of<Func(typename internal::traits<Derived>::Scalar)>::type
typename internal::traits<Derived>::Scalar
DenseBase<Derived>::redux(const Func& func) const
{
eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");

View File

@@ -18,7 +18,7 @@ namespace Eigen {
* \brief A matrix or vector expression mapping an existing expression
*
* \tparam PlainObjectType the equivalent matrix type of the mapped data
* \tparam Options specifies whether the pointer is \c #Aligned, or \c #Unaligned.
* \tparam MapOptions specifies the pointer alignment in bytes. It can be: \c #Aligned128, , \c #Aligned64, \c #Aligned32, \c #Aligned16, \c #Aligned8 or \c #Unaligned.
* The default is \c #Unaligned.
* \tparam StrideType optionally specifies strides. By default, Ref implies a contiguous storage along the inner dimension (inner stride==1),
* but accepts a variable outer stride (leading dimension).
@@ -48,8 +48,9 @@ namespace Eigen {
* VectorXf a;
* foo1(a.head()); // OK
* foo1(A.col()); // OK
* foo1(A.row()); // compilation error because here innerstride!=1
* foo2(A.row()); // The row is copied into a contiguous temporary
* foo1(A.row()); // Compilation error because here innerstride!=1
* foo2(A.row()); // Compilation error because A.row() is a 1xN object while foo2 is expecting a Nx1 object
* foo2(A.row().transpose()); // The row is copied into a contiguous temporary
* foo2(2*a); // The expression is evaluated into a temporary
* foo2(A.col().segment(2,4)); // No temporary
* \endcode
@@ -91,7 +92,8 @@ struct traits<Ref<_PlainObjectType, _Options, _StrideType> >
typedef _StrideType StrideType;
enum {
Options = _Options,
Flags = traits<Map<_PlainObjectType, _Options, _StrideType> >::Flags | NestByRefBit
Flags = traits<Map<_PlainObjectType, _Options, _StrideType> >::Flags | NestByRefBit,
Alignment = traits<Map<_PlainObjectType, _Options, _StrideType> >::Alignment
};
template<typename Derived> struct match {
@@ -103,8 +105,9 @@ struct traits<Ref<_PlainObjectType, _Options, _StrideType> >
|| (int(StrideType::InnerStrideAtCompileTime)==0 && int(Derived::InnerStrideAtCompileTime)==1),
OuterStrideMatch = Derived::IsVectorAtCompileTime
|| int(StrideType::OuterStrideAtCompileTime)==int(Dynamic) || int(StrideType::OuterStrideAtCompileTime)==int(Derived::OuterStrideAtCompileTime),
AlignmentMatch = (_Options!=Aligned) || ((PlainObjectType::Flags&AlignedBit)==0) || ((traits<Derived>::Flags&AlignedBit)==AlignedBit),
MatchAtCompileTime = HasDirectAccess && StorageOrderMatch && InnerStrideMatch && OuterStrideMatch && AlignmentMatch
AlignmentMatch = (int(traits<PlainObjectType>::Alignment)==int(Unaligned)) || (int(evaluator<Derived>::Alignment) >= int(Alignment)), // FIXME the first condition is not very clear, it should be replaced by the required alignment
ScalarTypeMatch = internal::is_same<typename PlainObjectType::Scalar, typename Derived::Scalar>::value,
MatchAtCompileTime = HasDirectAccess && StorageOrderMatch && InnerStrideMatch && OuterStrideMatch && AlignmentMatch && ScalarTypeMatch
};
typedef typename internal::conditional<MatchAtCompileTime,internal::true_type,internal::false_type>::type type;
};
@@ -183,9 +186,11 @@ protected:
template<typename PlainObjectType, int Options, typename StrideType> class Ref
: public RefBase<Ref<PlainObjectType, Options, StrideType> >
{
private:
typedef internal::traits<Ref> Traits;
template<typename Derived>
EIGEN_DEVICE_FUNC inline Ref(const PlainObjectBase<Derived>& expr);
EIGEN_DEVICE_FUNC inline Ref(const PlainObjectBase<Derived>& expr,
typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0);
public:
typedef RefBase<Ref> Base;
@@ -194,13 +199,15 @@ template<typename PlainObjectType, int Options, typename StrideType> class Ref
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename Derived>
EIGEN_DEVICE_FUNC inline Ref(PlainObjectBase<Derived>& expr)
EIGEN_DEVICE_FUNC inline Ref(PlainObjectBase<Derived>& expr,
typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0)
{
EIGEN_STATIC_ASSERT(bool(Traits::template match<Derived>::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH);
Base::construct(expr.derived());
}
template<typename Derived>
EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr)
EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr,
typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0)
#else
template<typename Derived>
inline Ref(DenseBase<Derived>& expr)
@@ -227,7 +234,8 @@ template<typename TPlainObjectType, int Options, typename StrideType> class Ref<
EIGEN_DENSE_PUBLIC_INTERFACE(Ref)
template<typename Derived>
EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr)
EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr,
typename internal::enable_if<bool(Traits::template match<Derived>::ScalarTypeMatch),Derived>::type* = 0)
{
// std::cout << match_helper<Derived>::HasDirectAccess << "," << match_helper<Derived>::OuterStrideMatch << "," << match_helper<Derived>::InnerStrideMatch << "\n";
// std::cout << int(StrideType::OuterStrideAtCompileTime) << " - " << int(Derived::OuterStrideAtCompileTime) << "\n";

View File

@@ -35,10 +35,7 @@ struct traits<Replicate<MatrixType,RowFactor,ColFactor> >
typedef typename MatrixType::Scalar Scalar;
typedef typename traits<MatrixType>::StorageKind StorageKind;
typedef typename traits<MatrixType>::XprKind XprKind;
enum {
Factor = (RowFactor==Dynamic || ColFactor==Dynamic) ? Dynamic : RowFactor*ColFactor
};
typedef typename nested<MatrixType,Factor>::type MatrixTypeNested;
typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;
enum {
RowsAtCompileTime = RowFactor==Dynamic || int(MatrixType::RowsAtCompileTime)==Dynamic
@@ -72,8 +69,9 @@ template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
typedef typename internal::remove_all<MatrixType>::type NestedExpression;
template<typename OriginalMatrixType>
inline explicit Replicate(const OriginalMatrixType& a_matrix)
: m_matrix(a_matrix), m_rowFactor(RowFactor), m_colFactor(ColFactor)
EIGEN_DEVICE_FUNC
inline explicit Replicate(const OriginalMatrixType& matrix)
: m_matrix(matrix), m_rowFactor(RowFactor), m_colFactor(ColFactor)
{
EIGEN_STATIC_ASSERT((internal::is_same<typename internal::remove_const<MatrixType>::type,OriginalMatrixType>::value),
THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE)
@@ -81,41 +79,20 @@ template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
}
template<typename OriginalMatrixType>
inline Replicate(const OriginalMatrixType& a_matrix, Index rowFactor, Index colFactor)
: m_matrix(a_matrix), m_rowFactor(rowFactor), m_colFactor(colFactor)
EIGEN_DEVICE_FUNC
inline Replicate(const OriginalMatrixType& matrix, Index rowFactor, Index colFactor)
: m_matrix(matrix), m_rowFactor(rowFactor), m_colFactor(colFactor)
{
EIGEN_STATIC_ASSERT((internal::is_same<typename internal::remove_const<MatrixType>::type,OriginalMatrixType>::value),
THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE)
}
EIGEN_DEVICE_FUNC
inline Index rows() const { return m_matrix.rows() * m_rowFactor.value(); }
EIGEN_DEVICE_FUNC
inline Index cols() const { return m_matrix.cols() * m_colFactor.value(); }
inline Scalar coeff(Index rowId, Index colId) const
{
// try to avoid using modulo; this is a pure optimization strategy
const Index actual_row = internal::traits<MatrixType>::RowsAtCompileTime==1 ? 0
: RowFactor==1 ? rowId
: rowId%m_matrix.rows();
const Index actual_col = internal::traits<MatrixType>::ColsAtCompileTime==1 ? 0
: ColFactor==1 ? colId
: colId%m_matrix.cols();
return m_matrix.coeff(actual_row, actual_col);
}
template<int LoadMode>
inline PacketScalar packet(Index rowId, Index colId) const
{
const Index actual_row = internal::traits<MatrixType>::RowsAtCompileTime==1 ? 0
: RowFactor==1 ? rowId
: rowId%m_matrix.rows();
const Index actual_col = internal::traits<MatrixType>::ColsAtCompileTime==1 ? 0
: ColFactor==1 ? colId
: colId%m_matrix.cols();
return m_matrix.template packet<LoadMode>(actual_row, actual_col);
}
EIGEN_DEVICE_FUNC
const _MatrixTypeNested& nestedExpression() const
{
return m_matrix;
@@ -137,27 +114,12 @@ template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
*/
template<typename Derived>
template<int RowFactor, int ColFactor>
inline const Replicate<Derived,RowFactor,ColFactor>
const Replicate<Derived,RowFactor,ColFactor>
DenseBase<Derived>::replicate() const
{
return Replicate<Derived,RowFactor,ColFactor>(derived());
}
/**
* \return an expression of the replication of \c *this
*
* Example: \include MatrixBase_replicate_int_int.cpp
* Output: \verbinclude MatrixBase_replicate_int_int.out
*
* \sa VectorwiseOp::replicate(), DenseBase::replicate<int,int>(), class Replicate
*/
template<typename Derived>
inline const Replicate<Derived,Dynamic,Dynamic>
DenseBase<Derived>::replicate(Index rowFactor,Index colFactor) const
{
return Replicate<Derived,Dynamic,Dynamic>(derived(),rowFactor,colFactor);
}
/**
* \return an expression of the replication of each column (or row) of \c *this
*

View File

@@ -94,15 +94,12 @@ namespace internal {
template<typename Derived>
struct evaluator<ReturnByValue<Derived> >
: public evaluator<typename internal::traits<Derived>::ReturnType>::type
: public evaluator<typename internal::traits<Derived>::ReturnType>
{
typedef ReturnByValue<Derived> XprType;
typedef typename internal::traits<Derived>::ReturnType PlainObject;
typedef typename evaluator<PlainObject>::type Base;
typedef evaluator<PlainObject> Base;
typedef evaluator type;
typedef evaluator nestedType;
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
: m_result(xpr.rows(), xpr.cols())
{

View File

@@ -37,7 +37,7 @@ struct traits<Reverse<MatrixType, Direction> >
typedef typename MatrixType::Scalar Scalar;
typedef typename traits<MatrixType>::StorageKind StorageKind;
typedef typename traits<MatrixType>::XprKind XprKind;
typedef typename nested<MatrixType>::type MatrixTypeNested;
typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;
enum {
RowsAtCompileTime = MatrixType::RowsAtCompileTime,
@@ -48,14 +48,14 @@ struct traits<Reverse<MatrixType, Direction> >
};
};
template<typename PacketScalar, bool ReversePacket> struct reverse_packet_cond
template<typename PacketType, bool ReversePacket> struct reverse_packet_cond
{
static inline PacketScalar run(const PacketScalar& x) { return preverse(x); }
static inline PacketType run(const PacketType& x) { return preverse(x); }
};
template<typename PacketScalar> struct reverse_packet_cond<PacketScalar,false>
template<typename PacketType> struct reverse_packet_cond<PacketType,false>
{
static inline PacketScalar run(const PacketScalar& x) { return x; }
static inline PacketType run(const PacketType& x) { return x; }
};
} // end namespace internal
@@ -70,10 +70,6 @@ template<typename MatrixType, int Direction> class Reverse
typedef typename internal::remove_all<MatrixType>::type NestedExpression;
using Base::IsRowMajor;
// next line is necessary because otherwise const version of operator()
// is hidden by non-const version defined in this file
using Base::operator();
protected:
enum {
PacketSize = internal::packet_traits<Scalar>::size,
@@ -101,69 +97,6 @@ template<typename MatrixType, int Direction> class Reverse
return -m_matrix.innerStride();
}
EIGEN_DEVICE_FUNC inline Scalar& operator()(Index row, Index col)
{
eigen_assert(row >= 0 && row < rows() && col >= 0 && col < cols());
return coeffRef(row, col);
}
EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col)
{
return m_matrix.const_cast_derived().coeffRef(ReverseRow ? m_matrix.rows() - row - 1 : row,
ReverseCol ? m_matrix.cols() - col - 1 : col);
}
EIGEN_DEVICE_FUNC inline CoeffReturnType coeff(Index row, Index col) const
{
return m_matrix.coeff(ReverseRow ? m_matrix.rows() - row - 1 : row,
ReverseCol ? m_matrix.cols() - col - 1 : col);
}
EIGEN_DEVICE_FUNC inline CoeffReturnType coeff(Index index) const
{
return m_matrix.coeff(m_matrix.size() - index - 1);
}
EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index)
{
return m_matrix.const_cast_derived().coeffRef(m_matrix.size() - index - 1);
}
EIGEN_DEVICE_FUNC inline Scalar& operator()(Index index)
{
eigen_assert(index >= 0 && index < m_matrix.size());
return coeffRef(index);
}
template<int LoadMode>
inline const PacketScalar packet(Index row, Index col) const
{
return reverse_packet::run(m_matrix.template packet<LoadMode>(
ReverseRow ? m_matrix.rows() - row - OffsetRow : row,
ReverseCol ? m_matrix.cols() - col - OffsetCol : col));
}
template<int LoadMode>
inline void writePacket(Index row, Index col, const PacketScalar& x)
{
m_matrix.const_cast_derived().template writePacket<LoadMode>(
ReverseRow ? m_matrix.rows() - row - OffsetRow : row,
ReverseCol ? m_matrix.cols() - col - OffsetCol : col,
reverse_packet::run(x));
}
template<int LoadMode>
inline const PacketScalar packet(Index index) const
{
return internal::preverse(m_matrix.template packet<LoadMode>( m_matrix.size() - index - PacketSize ));
}
template<int LoadMode>
inline void writePacket(Index index, const PacketScalar& x)
{
m_matrix.const_cast_derived().template writePacket<LoadMode>(m_matrix.size() - index - PacketSize, internal::preverse(x));
}
EIGEN_DEVICE_FUNC const typename internal::remove_all<typename MatrixType::Nested>::type&
nestedExpression() const
{
@@ -187,30 +120,90 @@ DenseBase<Derived>::reverse()
return ReverseReturnType(derived());
}
/** This is the const version of reverse(). */
template<typename Derived>
inline const typename DenseBase<Derived>::ConstReverseReturnType
DenseBase<Derived>::reverse() const
{
return ConstReverseReturnType(derived());
}
//reverse const overload moved DenseBase.h due to a CUDA compiler bug
/** This is the "in place" version of reverse: it reverses \c *this.
*
* In most cases it is probably better to simply use the reversed expression
* of a matrix. However, when reversing the matrix data itself is really needed,
* then this "in-place" version is probably the right choice because it provides
* the following additional features:
* the following additional benefits:
* - less error prone: doing the same operation with .reverse() requires special care:
* \code m = m.reverse().eval(); \endcode
* - this API allows to avoid creating a temporary (the current implementation creates a temporary, but that could be avoided using swap)
* - this API enables reverse operations without the need for a temporary
* - it allows future optimizations (cache friendliness, etc.)
*
* \sa reverse() */
* \sa VectorwiseOp::reverseInPlace(), reverse() */
template<typename Derived>
inline void DenseBase<Derived>::reverseInPlace()
{
derived() = derived().reverse().eval();
if(cols()>rows())
{
Index half = cols()/2;
leftCols(half).swap(rightCols(half).reverse());
if((cols()%2)==1)
{
Index half2 = rows()/2;
col(half).head(half2).swap(col(half).tail(half2).reverse());
}
}
else
{
Index half = rows()/2;
topRows(half).swap(bottomRows(half).reverse());
if((rows()%2)==1)
{
Index half2 = cols()/2;
row(half).head(half2).swap(row(half).tail(half2).reverse());
}
}
}
namespace internal {
template<int Direction>
struct vectorwise_reverse_inplace_impl;
template<>
struct vectorwise_reverse_inplace_impl<Vertical>
{
template<typename ExpressionType>
static void run(ExpressionType &xpr)
{
Index half = xpr.rows()/2;
xpr.topRows(half).swap(xpr.bottomRows(half).colwise().reverse());
}
};
template<>
struct vectorwise_reverse_inplace_impl<Horizontal>
{
template<typename ExpressionType>
static void run(ExpressionType &xpr)
{
Index half = xpr.cols()/2;
xpr.leftCols(half).swap(xpr.rightCols(half).rowwise().reverse());
}
};
} // end namespace internal
/** This is the "in place" version of VectorwiseOp::reverse: it reverses each column or row of \c *this.
*
* In most cases it is probably better to simply use the reversed expression
* of a matrix. However, when reversing the matrix data itself is really needed,
* then this "in-place" version is probably the right choice because it provides
* the following additional benefits:
* - less error prone: doing the same operation with .reverse() requires special care:
* \code m = m.reverse().eval(); \endcode
* - this API enables reverse operations without the need for a temporary
*
* \sa DenseBase::reverseInPlace(), reverse() */
template<typename ExpressionType, int Direction>
void VectorwiseOp<ExpressionType,Direction>::reverseInPlace()
{
internal::vectorwise_reverse_inplace_impl<Direction>::run(_expression().const_cast_derived());
}
} // end namespace Eigen

View File

@@ -32,7 +32,7 @@ namespace internal {
template<typename MatrixType, unsigned int UpLo>
struct traits<SelfAdjointView<MatrixType, UpLo> > : traits<MatrixType>
{
typedef typename nested<MatrixType>::type MatrixTypeNested;
typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
typedef MatrixType ExpressionType;
typedef typename MatrixType::PlainObject FullMatrixType;

View File

@@ -113,15 +113,12 @@ namespace internal {
// Evaluator of Solve -> eval into a temporary
template<typename Decomposition, typename RhsType>
struct evaluator<Solve<Decomposition,RhsType> >
: public evaluator<typename Solve<Decomposition,RhsType>::PlainObject>::type
: public evaluator<typename Solve<Decomposition,RhsType>::PlainObject>
{
typedef Solve<Decomposition,RhsType> SolveType;
typedef typename SolveType::PlainObject PlainObject;
typedef typename evaluator<PlainObject>::type Base;
typedef evaluator<PlainObject> Base;
typedef evaluator type;
typedef evaluator nestedType;
EIGEN_DEVICE_FUNC explicit evaluator(const SolveType& solve)
: m_result(solve.rows(), solve.cols())
{

View File

@@ -198,8 +198,8 @@ void TriangularViewImpl<MatrixType,Mode,Dense>::solveInPlace(const MatrixBase<Ot
* diagonal must be non zero). It works as a forward (resp. backward) substitution if \c *this
* is an upper (resp. lower) triangular matrix.
*
* Example: \include MatrixBase_marked.cpp
* Output: \verbinclude MatrixBase_marked.out
* Example: \include Triangular_solve.cpp
* Output: \verbinclude Triangular_solve.out
*
* This function returns an expression of the inverse-multiply and can works in-place if it is assigned
* to the same matrix or vector \a other.

View File

@@ -157,19 +157,32 @@ inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
MatrixBase<Derived>::stableNorm() const
{
using std::sqrt;
using std::abs;
const Index blockSize = 4096;
RealScalar scale(0);
RealScalar invScale(1);
RealScalar ssq(0); // sum of square
typedef typename internal::nested_eval<Derived,2>::type DerivedCopy;
typedef typename internal::remove_all<DerivedCopy>::type DerivedCopyClean;
DerivedCopy copy(derived());
enum {
Alignment = (int(Flags)&DirectAccessBit) || (int(Flags)&AlignedBit) ? 1 : 0
CanAlign = (int(Flags)&DirectAccessBit) || (int(internal::evaluator<DerivedCopyClean>::Alignment)>0) // FIXME
};
typedef typename internal::conditional<CanAlign, Ref<const Matrix<Scalar,Dynamic,1,0,blockSize,1>, internal::evaluator<DerivedCopyClean>::Alignment>,
typename DerivedCopyClean
::ConstSegmentReturnType>::type SegmentWrapper;
Index n = size();
Index bi = internal::first_aligned(derived());
if(n==1)
return abs(this->coeff(0));
Index bi = internal::first_default_aligned(copy);
if (bi>0)
internal::stable_norm_kernel(this->head(bi), ssq, scale, invScale);
internal::stable_norm_kernel(copy.head(bi), ssq, scale, invScale);
for (; bi<n; bi+=blockSize)
internal::stable_norm_kernel(this->segment(bi,numext::mini(blockSize, n - bi)).template forceAlignedAccessIf<Alignment>(), ssq, scale, invScale);
internal::stable_norm_kernel(SegmentWrapper(copy.segment(bi,numext::mini(blockSize, n - bi))), ssq, scale, invScale);
return scale * sqrt(ssq);
}

View File

@@ -21,7 +21,6 @@ class generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, swap
{
protected:
typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, swap_assign_op<typename DstEvaluatorTypeT::Scalar>, BuiltIn> Base;
typedef typename DstEvaluatorTypeT::PacketScalar PacketScalar;
using Base::m_dst;
using Base::m_src;
using Base::m_functor;
@@ -35,25 +34,29 @@ public:
: Base(dst, src, func, dstExpr)
{}
template<int StoreMode, int LoadMode>
template<int StoreMode, int LoadMode, typename PacketType>
void assignPacket(Index row, Index col)
{
m_functor.template swapPacket<StoreMode,LoadMode,PacketScalar>(&m_dst.coeffRef(row,col), &const_cast<SrcEvaluatorTypeT&>(m_src).coeffRef(row,col));
PacketType tmp = m_src.template packet<LoadMode,PacketType>(row,col);
const_cast<SrcEvaluatorTypeT&>(m_src).template writePacket<LoadMode>(row,col, m_dst.template packet<StoreMode,PacketType>(row,col));
m_dst.template writePacket<StoreMode>(row,col,tmp);
}
template<int StoreMode, int LoadMode>
template<int StoreMode, int LoadMode, typename PacketType>
void assignPacket(Index index)
{
m_functor.template swapPacket<StoreMode,LoadMode,PacketScalar>(&m_dst.coeffRef(index), &const_cast<SrcEvaluatorTypeT&>(m_src).coeffRef(index));
PacketType tmp = m_src.template packet<LoadMode,PacketType>(index);
const_cast<SrcEvaluatorTypeT&>(m_src).template writePacket<LoadMode>(index, m_dst.template packet<StoreMode,PacketType>(index));
m_dst.template writePacket<StoreMode>(index,tmp);
}
// TODO find a simple way not to have to copy/paste this function from generic_dense_assignment_kernel, by simple I mean no CRTP (Gael)
template<int StoreMode, int LoadMode>
template<int StoreMode, int LoadMode, typename PacketType>
void assignPacketByOuterInner(Index outer, Index inner)
{
Index row = Base::rowIndexByOuterInner(outer, inner);
Index col = Base::colIndexByOuterInner(outer, inner);
assignPacket<StoreMode,LoadMode>(row, col);
assignPacket<StoreMode,LoadMode,PacketType>(row, col);
}
};

View File

@@ -31,7 +31,7 @@ namespace internal {
template<typename MatrixType>
struct traits<Transpose<MatrixType> > : public traits<MatrixType>
{
typedef typename nested<MatrixType>::type MatrixTypeNested;
typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
typedef typename remove_reference<MatrixTypeNested>::type MatrixTypeNestedPlain;
enum {
RowsAtCompileTime = MatrixType::ColsAtCompileTime,
@@ -60,7 +60,7 @@ template<typename MatrixType> class Transpose
typedef typename internal::remove_all<MatrixType>::type NestedExpression;
EIGEN_DEVICE_FUNC
explicit inline Transpose(MatrixType& a_matrix) : m_matrix(a_matrix) {}
explicit inline Transpose(MatrixType& matrix) : m_matrix(matrix) {}
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Transpose)
@@ -233,7 +233,7 @@ struct inplace_transpose_selector<MatrixType,true,true> { // PacketSize x Packet
typedef typename MatrixType::Scalar Scalar;
typedef typename internal::packet_traits<typename MatrixType::Scalar>::type Packet;
const Index PacketSize = internal::packet_traits<Scalar>::size;
const Index Alignment = internal::evaluator<MatrixType>::Flags&AlignedBit ? Aligned : Unaligned;
const Index Alignment = internal::evaluator<MatrixType>::Alignment;
PacketBlock<Packet> A;
for (Index i=0; i<PacketSize; ++i)
A.packet[i] = m.template packetByOuterInner<Alignment>(i,0);
@@ -317,14 +317,6 @@ inline void MatrixBase<Derived>::adjointInPlace()
namespace internal {
template<typename BinOp,typename NestedXpr,typename Rhs>
struct blas_traits<SelfCwiseBinaryOp<BinOp,NestedXpr,Rhs> >
: blas_traits<NestedXpr>
{
typedef SelfCwiseBinaryOp<BinOp,NestedXpr,Rhs> XprType;
static inline const XprType extract(const XprType& x) { return x; }
};
template<bool DestIsTransposed, typename OtherDerived>
struct check_transpose_aliasing_compile_time_selector
{

View File

@@ -41,10 +41,6 @@ namespace Eigen {
* \sa class PermutationMatrix
*/
namespace internal {
template<typename TranspositionType, typename MatrixType, int Side, bool Transposed=false> struct transposition_matrix_product_retval;
}
template<typename Derived>
class TranspositionsBase
{
@@ -66,7 +62,7 @@ class TranspositionsBase
indices() = other.indices();
return derived();
}
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** This is a special case of the templated operator=. Its purpose is to
* prevent a default operator= from hiding the templated operator=.
@@ -79,7 +75,11 @@ class TranspositionsBase
#endif
/** \returns the number of transpositions */
inline Index size() const { return indices().size(); }
Index size() const { return indices().size(); }
/** \returns the number of rows of the equivalent permutation matrix */
Index rows() const { return indices().size(); }
/** \returns the number of columns of the equivalent permutation matrix */
Index cols() const { return indices().size(); }
/** Direct access to the underlying index vector */
inline const StorageIndex& coeff(Index i) const { return indices().coeff(i); }
@@ -147,9 +147,10 @@ class TranspositionsBase
namespace internal {
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex>
struct traits<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex> >
: traits<PermutationMatrix<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex> >
{
typedef Matrix<_StorageIndex, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType;
typedef _StorageIndex StorageIndex;
typedef TranspositionsStorage StorageKind;
};
}
@@ -178,7 +179,7 @@ class Transpositions : public TranspositionsBase<Transpositions<SizeAtCompileTim
/** Generic constructor from expression of the transposition indices. */
template<typename Other>
explicit inline Transpositions(const MatrixBase<Other>& a_indices) : m_indices(a_indices)
explicit inline Transpositions(const MatrixBase<Other>& indices) : m_indices(indices)
{}
/** Copies the \a other transpositions into \c *this */
@@ -218,9 +219,11 @@ class Transpositions : public TranspositionsBase<Transpositions<SizeAtCompileTim
namespace internal {
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex, int _PacketAccess>
struct traits<Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex>,_PacketAccess> >
: traits<PermutationMatrix<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex> >
{
typedef Map<const Matrix<_StorageIndex,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1>, _PacketAccess> IndicesType;
typedef _StorageIndex StorageIndex;
typedef TranspositionsStorage StorageKind;
};
}
@@ -275,9 +278,9 @@ class Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex>,P
namespace internal {
template<typename _IndicesType>
struct traits<TranspositionsWrapper<_IndicesType> >
: traits<PermutationWrapper<_IndicesType> >
{
typedef typename _IndicesType::Scalar StorageIndex;
typedef _IndicesType IndicesType;
typedef TranspositionsStorage StorageKind;
};
}
@@ -292,8 +295,8 @@ class TranspositionsWrapper
typedef typename Traits::IndicesType IndicesType;
typedef typename IndicesType::Scalar StorageIndex;
explicit inline TranspositionsWrapper(IndicesType& a_indices)
: m_indices(a_indices)
explicit inline TranspositionsWrapper(IndicesType& indices)
: m_indices(indices)
{}
/** Copies the \a other transpositions into \c *this */
@@ -325,80 +328,43 @@ class TranspositionsWrapper
const typename IndicesType::Nested m_indices;
};
/** \returns the \a matrix with the \a transpositions applied to the columns.
*/
template<typename Derived, typename TranspositionsDerived>
inline const internal::transposition_matrix_product_retval<TranspositionsDerived, Derived, OnTheRight>
operator*(const MatrixBase<Derived>& matrix,
const TranspositionsBase<TranspositionsDerived> &transpositions)
template<typename MatrixDerived, typename TranspositionsDerived>
EIGEN_DEVICE_FUNC
const Product<MatrixDerived, TranspositionsDerived, AliasFreeProduct>
operator*(const MatrixBase<MatrixDerived> &matrix,
const TranspositionsBase<TranspositionsDerived>& transpositions)
{
return internal::transposition_matrix_product_retval
<TranspositionsDerived, Derived, OnTheRight>
(transpositions.derived(), matrix.derived());
return Product<MatrixDerived, TranspositionsDerived, AliasFreeProduct>
(matrix.derived(), transpositions.derived());
}
/** \returns the \a matrix with the \a transpositions applied to the rows.
*/
template<typename Derived, typename TranspositionDerived>
inline const internal::transposition_matrix_product_retval
<TranspositionDerived, Derived, OnTheLeft>
operator*(const TranspositionsBase<TranspositionDerived> &transpositions,
const MatrixBase<Derived>& matrix)
template<typename TranspositionsDerived, typename MatrixDerived>
EIGEN_DEVICE_FUNC
const Product<TranspositionsDerived, MatrixDerived, AliasFreeProduct>
operator*(const TranspositionsBase<TranspositionsDerived> &transpositions,
const MatrixBase<MatrixDerived>& matrix)
{
return internal::transposition_matrix_product_retval
<TranspositionDerived, Derived, OnTheLeft>
(transpositions.derived(), matrix.derived());
return Product<TranspositionsDerived, MatrixDerived, AliasFreeProduct>
(transpositions.derived(), matrix.derived());
}
// Template partial specialization for transposed/inverse transpositions
namespace internal {
template<typename TranspositionType, typename MatrixType, int Side, bool Transposed>
struct traits<transposition_matrix_product_retval<TranspositionType, MatrixType, Side, Transposed> >
{
typedef typename MatrixType::PlainObject ReturnType;
};
template<typename TranspositionType, typename MatrixType, int Side, bool Transposed>
struct transposition_matrix_product_retval
: public ReturnByValue<transposition_matrix_product_retval<TranspositionType, MatrixType, Side, Transposed> >
{
typedef typename remove_all<typename MatrixType::Nested>::type MatrixTypeNestedCleaned;
typedef typename TranspositionType::StorageIndex StorageIndex;
transposition_matrix_product_retval(const TranspositionType& tr, const MatrixType& matrix)
: m_transpositions(tr), m_matrix(matrix)
{}
inline Index rows() const { return m_matrix.rows(); }
inline Index cols() const { return m_matrix.cols(); }
template<typename Dest> inline void evalTo(Dest& dst) const
{
const Index size = m_transpositions.size();
StorageIndex j = 0;
if(!(is_same<MatrixTypeNestedCleaned,Dest>::value && extract_data(dst) == extract_data(m_matrix)))
dst = m_matrix;
for(Index k=(Transposed?size-1:0) ; Transposed?k>=0:k<size ; Transposed?--k:++k)
if(Index(j=m_transpositions.coeff(k))!=k)
{
if(Side==OnTheLeft)
dst.row(k).swap(dst.row(j));
else if(Side==OnTheRight)
dst.col(k).swap(dst.col(j));
}
}
protected:
const TranspositionType& m_transpositions;
typename MatrixType::Nested m_matrix;
};
template<typename Derived>
struct traits<Transpose<TranspositionsBase<Derived> > >
: traits<Derived>
{};
} // end namespace internal
/* Template partial specialization for transposed/inverse transpositions */
template<typename TranspositionsDerived>
class Transpose<TranspositionsBase<TranspositionsDerived> >
{
@@ -408,25 +374,29 @@ class Transpose<TranspositionsBase<TranspositionsDerived> >
explicit Transpose(const TranspositionType& t) : m_transpositions(t) {}
inline int size() const { return m_transpositions.size(); }
Index size() const { return m_transpositions.size(); }
Index rows() const { return m_transpositions.size(); }
Index cols() const { return m_transpositions.size(); }
/** \returns the \a matrix with the inverse transpositions applied to the columns.
*/
template<typename Derived> friend
inline const internal::transposition_matrix_product_retval<TranspositionType, Derived, OnTheRight, true>
operator*(const MatrixBase<Derived>& matrix, const Transpose& trt)
template<typename OtherDerived> friend
const Product<OtherDerived, Transpose, AliasFreeProduct>
operator*(const MatrixBase<OtherDerived>& matrix, const Transpose& trt)
{
return internal::transposition_matrix_product_retval<TranspositionType, Derived, OnTheRight, true>(trt.m_transpositions, matrix.derived());
return Product<OtherDerived, Transpose, AliasFreeProduct>(matrix.derived(), trt.derived());
}
/** \returns the \a matrix with the inverse transpositions applied to the rows.
*/
template<typename Derived>
inline const internal::transposition_matrix_product_retval<TranspositionType, Derived, OnTheLeft, true>
operator*(const MatrixBase<Derived>& matrix) const
template<typename OtherDerived>
const Product<Transpose, OtherDerived, AliasFreeProduct>
operator*(const MatrixBase<OtherDerived>& matrix) const
{
return internal::transposition_matrix_product_retval<TranspositionType, Derived, OnTheLeft, true>(m_transpositions, matrix.derived());
return Product<Transpose, OtherDerived, AliasFreeProduct>(*this, matrix.derived());
}
const TranspositionType& nestedExpression() const { return m_transpositions; }
protected:
const TranspositionType& m_transpositions;

View File

@@ -19,9 +19,7 @@ template<int Side, typename TriangularType, typename Rhs> struct triangular_solv
}
/** \internal
*
* \class TriangularBase
/** \class TriangularBase
* \ingroup Core_Module
*
* \brief Base class for triangular part in a matrix
@@ -38,10 +36,14 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
MaxColsAtCompileTime = internal::traits<Derived>::MaxColsAtCompileTime,
SizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::RowsAtCompileTime,
internal::traits<Derived>::ColsAtCompileTime>::ret)
/**< This is equal to the number of coefficients, i.e. the number of
internal::traits<Derived>::ColsAtCompileTime>::ret),
/**< This is equal to the number of coefficients, i.e. the number of
* rows times the number of columns, or to \a Dynamic if this is not
* known at compile-time. \sa RowsAtCompileTime, ColsAtCompileTime */
MaxSizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::MaxRowsAtCompileTime,
internal::traits<Derived>::MaxColsAtCompileTime>::ret)
};
typedef typename internal::traits<Derived>::Scalar Scalar;
typedef typename internal::traits<Derived>::StorageKind StorageKind;
@@ -63,11 +65,11 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
inline Index innerStride() const { return derived().innerStride(); }
// dummy resize function
void resize(Index nbRows, Index nbCols)
void resize(Index rows, Index cols)
{
EIGEN_UNUSED_VARIABLE(nbRows);
EIGEN_UNUSED_VARIABLE(nbCols);
eigen_assert(nbRows==rows() && nbCols==nbCols);
EIGEN_UNUSED_VARIABLE(rows);
EIGEN_UNUSED_VARIABLE(cols);
eigen_assert(rows==this->rows() && cols==this->cols());
}
EIGEN_DEVICE_FUNC
@@ -148,17 +150,17 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
/** \class TriangularView
* \ingroup Core_Module
*
* \brief Base class for triangular part in a matrix
* \brief Expression of a triangular part in a matrix
*
* \param MatrixType the type of the object in which we are taking the triangular part
* \param Mode the kind of triangular matrix expression to construct. Can be #Upper,
* #Lower, #UnitUpper, #UnitLower, #StrictlyUpper, or #StrictlyLower.
* This is in fact a bit field; it must have either #Upper or #Lower,
* and additionnaly it may have #UnitDiag or #ZeroDiag or neither.
* and additionally it may have #UnitDiag or #ZeroDiag or neither.
*
* This class represents a triangular part of a matrix, not necessarily square. Strictly speaking, for rectangular
* matrices one should speak of "trapezoid" parts. This class is the return type
* of MatrixBase::triangularView() and most of the time this is the only way it is used.
* of MatrixBase::triangularView() and SparseMatrixBase::triangularView(), and most of the time this is the only way it is used.
*
* \sa MatrixBase::triangularView()
*/
@@ -166,7 +168,7 @@ namespace internal {
template<typename MatrixType, unsigned int _Mode>
struct traits<TriangularView<MatrixType, _Mode> > : traits<MatrixType>
{
typedef typename nested<MatrixType>::type MatrixTypeNested;
typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
typedef typename remove_reference<MatrixTypeNested>::type MatrixTypeNestedNonRef;
typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
typedef typename MatrixType::PlainObject FullMatrixType;
@@ -306,6 +308,15 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
MatrixTypeNested m_matrix;
};
/** \ingroup Core_Module
*
* \brief Base class for a triangular part in a \b dense matrix
*
* This class is an abstract base class of class TriangularView, and objects of type TriangularViewImpl cannot be instantiated.
* It extends class TriangularView with additional methods which available for dense expressions only.
*
* \sa class TriangularView, MatrixBase::triangularView()
*/
template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_MatrixType,_Mode,Dense>
: public TriangularBase<TriangularView<_MatrixType, _Mode> >
{
@@ -549,8 +560,8 @@ void TriangularBase<Derived>::evalTo(MatrixBase<DenseDerived> &other) const
* The parameter \a Mode can have the following values: \c #Upper, \c #StrictlyUpper, \c #UnitUpper,
* \c #Lower, \c #StrictlyLower, \c #UnitLower.
*
* Example: \include MatrixBase_extract.cpp
* Output: \verbinclude MatrixBase_extract.out
* Example: \include MatrixBase_triangularView.cpp
* Output: \verbinclude MatrixBase_triangularView.out
*
* \sa class TriangularView
*/
@@ -653,7 +664,6 @@ struct unary_evaluator<TriangularView<MatrixType,Mode>, IndexBased>
{
typedef TriangularView<MatrixType,Mode> XprType;
typedef evaluator<typename internal::remove_all<MatrixType>::type> Base;
typedef evaluator<XprType> type;
unary_evaluator(const XprType &xpr) : Base(xpr.nestedExpression()) {}
};
@@ -723,8 +733,8 @@ EIGEN_DEVICE_FUNC void call_triangular_assignment_loop(const DstXprType& dst, co
{
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
typedef evaluator<DstXprType> DstEvaluatorType;
typedef evaluator<SrcXprType> SrcEvaluatorType;
DstEvaluatorType dstEvaluator(dst);
SrcEvaluatorType srcEvaluator(src);

View File

@@ -41,7 +41,7 @@ struct traits<PartialReduxExpr<MatrixType, MemberOp, Direction> >
typedef typename traits<MatrixType>::StorageKind StorageKind;
typedef typename traits<MatrixType>::XprKind XprKind;
typedef typename MatrixType::Scalar InputScalar;
typedef typename nested<MatrixType>::type MatrixTypeNested;
typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
typedef typename remove_all<MatrixTypeNested>::type _MatrixTypeNested;
enum {
RowsAtCompileTime = Direction==Vertical ? 1 : MatrixType::RowsAtCompileTime,
@@ -65,13 +65,16 @@ class PartialReduxExpr : public internal::dense_xpr_base< PartialReduxExpr<Matri
typedef typename internal::traits<PartialReduxExpr>::MatrixTypeNested MatrixTypeNested;
typedef typename internal::traits<PartialReduxExpr>::_MatrixTypeNested _MatrixTypeNested;
EIGEN_DEVICE_FUNC
explicit PartialReduxExpr(const MatrixType& mat, const MemberOp& func = MemberOp())
: m_matrix(mat), m_functor(func) {}
EIGEN_DEVICE_FUNC
Index rows() const { return (Direction==Vertical ? 1 : m_matrix.rows()); }
EIGEN_DEVICE_FUNC
Index cols() const { return (Direction==Horizontal ? 1 : m_matrix.cols()); }
EIGEN_STRONG_INLINE const Scalar coeff(Index i, Index j) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index i, Index j) const
{
if (Direction==Vertical)
return m_functor(m_matrix.col(j));
@@ -79,7 +82,7 @@ class PartialReduxExpr : public internal::dense_xpr_base< PartialReduxExpr<Matri
return m_functor(m_matrix.row(i));
}
const Scalar coeff(Index index) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
{
if (Direction==Vertical)
return m_functor(m_matrix.col(index));
@@ -100,7 +103,8 @@ class PartialReduxExpr : public internal::dense_xpr_base< PartialReduxExpr<Matri
template<typename Scalar, int Size> struct Cost \
{ enum { value = COST }; }; \
template<typename XprType> \
EIGEN_STRONG_INLINE ResultType operator()(const XprType& mat) const \
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
ResultType operator()(const XprType& mat) const \
{ return mat.MEMBER(); } \
}
@@ -124,13 +128,13 @@ EIGEN_MEMBER_FUNCTOR(prod, (Size-1)*NumTraits<Scalar>::MulCost);
template <typename BinaryOp, typename Scalar>
struct member_redux {
typedef typename result_of<
BinaryOp(Scalar)
BinaryOp(Scalar,Scalar)
>::type result_type;
template<typename _Scalar, int Size> struct Cost
{ enum { value = (Size-1) * functor_traits<BinaryOp>::Cost }; };
explicit member_redux(const BinaryOp func) : m_functor(func) {}
EIGEN_DEVICE_FUNC explicit member_redux(const BinaryOp func) : m_functor(func) {}
template<typename Derived>
inline result_type operator()(const DenseBase<Derived>& mat) const
EIGEN_DEVICE_FUNC inline result_type operator()(const DenseBase<Derived>& mat) const
{ return mat.redux(m_functor); }
const BinaryOp m_functor;
};
@@ -160,8 +164,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
typedef typename ExpressionType::Scalar Scalar;
typedef typename ExpressionType::RealScalar RealScalar;
typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
typedef typename internal::conditional<internal::must_nest_by_value<ExpressionType>::ret,
ExpressionType, ExpressionType&>::type ExpressionTypeNested;
typedef typename internal::ref_selector<ExpressionType>::non_const_type ExpressionTypeNested;
typedef typename internal::remove_all<ExpressionTypeNested>::type ExpressionTypeNestedCleaned;
template<template<typename _Scalar> class Functor,
@@ -182,17 +185,18 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
};
enum {
IsVertical = (Direction==Vertical) ? 1 : 0,
IsHorizontal = (Direction==Horizontal) ? 1 : 0
isVertical = (Direction==Vertical) ? 1 : 0,
isHorizontal = (Direction==Horizontal) ? 1 : 0
};
protected:
/** \internal
* \returns the i-th subvector according to the \c Direction */
typedef typename internal::conditional<Direction==Vertical,
typedef typename internal::conditional<isVertical,
typename ExpressionType::ColXpr,
typename ExpressionType::RowXpr>::type SubVector;
EIGEN_DEVICE_FUNC
SubVector subVector(Index i)
{
return SubVector(m_matrix.derived(),i);
@@ -200,58 +204,62 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
/** \internal
* \returns the number of subvectors in the direction \c Direction */
EIGEN_DEVICE_FUNC
Index subVectors() const
{ return Direction==Vertical?m_matrix.cols():m_matrix.rows(); }
{ return isVertical?m_matrix.cols():m_matrix.rows(); }
template<typename OtherDerived> struct ExtendedType {
typedef Replicate<OtherDerived,
Direction==Vertical ? 1 : ExpressionType::RowsAtCompileTime,
Direction==Horizontal ? 1 : ExpressionType::ColsAtCompileTime> Type;
isVertical ? 1 : ExpressionType::RowsAtCompileTime,
isHorizontal ? 1 : ExpressionType::ColsAtCompileTime> Type;
};
/** \internal
* Replicates a vector to match the size of \c *this */
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
typename ExtendedType<OtherDerived>::Type
extendedTo(const DenseBase<OtherDerived>& other) const
{
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(Direction==Vertical, OtherDerived::MaxColsAtCompileTime==1),
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isVertical, OtherDerived::MaxColsAtCompileTime==1),
YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED)
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(Direction==Horizontal, OtherDerived::MaxRowsAtCompileTime==1),
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isHorizontal, OtherDerived::MaxRowsAtCompileTime==1),
YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED)
return typename ExtendedType<OtherDerived>::Type
(other.derived(),
Direction==Vertical ? 1 : m_matrix.rows(),
Direction==Horizontal ? 1 : m_matrix.cols());
isVertical ? 1 : m_matrix.rows(),
isHorizontal ? 1 : m_matrix.cols());
}
template<typename OtherDerived> struct OppositeExtendedType {
typedef Replicate<OtherDerived,
Direction==Horizontal ? 1 : ExpressionType::RowsAtCompileTime,
Direction==Vertical ? 1 : ExpressionType::ColsAtCompileTime> Type;
isHorizontal ? 1 : ExpressionType::RowsAtCompileTime,
isVertical ? 1 : ExpressionType::ColsAtCompileTime> Type;
};
/** \internal
* Replicates a vector in the opposite direction to match the size of \c *this */
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
typename OppositeExtendedType<OtherDerived>::Type
extendedToOpposite(const DenseBase<OtherDerived>& other) const
{
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(Direction==Horizontal, OtherDerived::MaxColsAtCompileTime==1),
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isHorizontal, OtherDerived::MaxColsAtCompileTime==1),
YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED)
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(Direction==Vertical, OtherDerived::MaxRowsAtCompileTime==1),
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isVertical, OtherDerived::MaxRowsAtCompileTime==1),
YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED)
return typename OppositeExtendedType<OtherDerived>::Type
(other.derived(),
Direction==Horizontal ? 1 : m_matrix.rows(),
Direction==Vertical ? 1 : m_matrix.cols());
isHorizontal ? 1 : m_matrix.rows(),
isVertical ? 1 : m_matrix.cols());
}
public:
EIGEN_DEVICE_FUNC
explicit inline VectorwiseOp(ExpressionType& matrix) : m_matrix(matrix) {}
/** \internal */
EIGEN_DEVICE_FUNC
inline const ExpressionType& _expression() const { return m_matrix; }
/** \returns a row or column vector expression of \c *this reduxed by \a func
@@ -262,6 +270,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* \sa class VectorwiseOp, DenseBase::colwise(), DenseBase::rowwise()
*/
template<typename BinaryOp>
EIGEN_DEVICE_FUNC
const typename ReduxReturnType<BinaryOp>::Type
redux(const BinaryOp& func = BinaryOp()) const
{ return typename ReduxReturnType<BinaryOp>::Type(_expression(), internal::member_redux<BinaryOp,Scalar>(func)); }
@@ -290,6 +299,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* Output: \verbinclude PartialRedux_minCoeff.out
*
* \sa DenseBase::minCoeff() */
EIGEN_DEVICE_FUNC
const MinCoeffReturnType minCoeff() const
{ return MinCoeffReturnType(_expression()); }
@@ -302,6 +312,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* Output: \verbinclude PartialRedux_maxCoeff.out
*
* \sa DenseBase::maxCoeff() */
EIGEN_DEVICE_FUNC
const MaxCoeffReturnType maxCoeff() const
{ return MaxCoeffReturnType(_expression()); }
@@ -313,6 +324,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* Output: \verbinclude PartialRedux_squaredNorm.out
*
* \sa DenseBase::squaredNorm() */
EIGEN_DEVICE_FUNC
const SquaredNormReturnType squaredNorm() const
{ return SquaredNormReturnType(_expression()); }
@@ -324,6 +336,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* Output: \verbinclude PartialRedux_norm.out
*
* \sa DenseBase::norm() */
EIGEN_DEVICE_FUNC
const NormReturnType norm() const
{ return NormReturnType(_expression()); }
@@ -334,6 +347,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* This is a vector with real entries, even if the original matrix has complex entries.
*
* \sa DenseBase::blueNorm() */
EIGEN_DEVICE_FUNC
const BlueNormReturnType blueNorm() const
{ return BlueNormReturnType(_expression()); }
@@ -344,6 +358,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* This is a vector with real entries, even if the original matrix has complex entries.
*
* \sa DenseBase::stableNorm() */
EIGEN_DEVICE_FUNC
const StableNormReturnType stableNorm() const
{ return StableNormReturnType(_expression()); }
@@ -354,6 +369,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* This is a vector with real entries, even if the original matrix has complex entries.
*
* \sa DenseBase::hypotNorm() */
EIGEN_DEVICE_FUNC
const HypotNormReturnType hypotNorm() const
{ return HypotNormReturnType(_expression()); }
@@ -364,6 +380,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* Output: \verbinclude PartialRedux_sum.out
*
* \sa DenseBase::sum() */
EIGEN_DEVICE_FUNC
const SumReturnType sum() const
{ return SumReturnType(_expression()); }
@@ -371,6 +388,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* of each column (or row) of the referenced expression.
*
* \sa DenseBase::mean() */
EIGEN_DEVICE_FUNC
const MeanReturnType mean() const
{ return MeanReturnType(_expression()); }
@@ -379,6 +397,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* This expression can be assigned to a vector with entries of type \c bool.
*
* \sa DenseBase::all() */
EIGEN_DEVICE_FUNC
const AllReturnType all() const
{ return AllReturnType(_expression()); }
@@ -387,6 +406,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* This expression can be assigned to a vector with entries of type \c bool.
*
* \sa DenseBase::any() */
EIGEN_DEVICE_FUNC
const AnyReturnType any() const
{ return Any(_expression()); }
@@ -399,6 +419,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* Output: \verbinclude PartialRedux_count.out
*
* \sa DenseBase::count() */
EIGEN_DEVICE_FUNC
const CountReturnType count() const
{ return CountReturnType(_expression()); }
@@ -409,6 +430,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* Output: \verbinclude PartialRedux_prod.out
*
* \sa DenseBase::prod() */
EIGEN_DEVICE_FUNC
const ProdReturnType prod() const
{ return ProdReturnType(_expression()); }
@@ -420,10 +442,12 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* Output: \verbinclude Vectorwise_reverse.out
*
* \sa DenseBase::reverse() */
EIGEN_DEVICE_FUNC
const ReverseReturnType reverse() const
{ return ReverseReturnType( _expression() ); }
typedef Replicate<ExpressionType,Direction==Vertical?Dynamic:1,Direction==Horizontal?Dynamic:1> ReplicateReturnType;
typedef Replicate<ExpressionType,(isVertical?Dynamic:1),(isHorizontal?Dynamic:1)> ReplicateReturnType;
EIGEN_DEVICE_FUNC
const ReplicateReturnType replicate(Index factor) const;
/**
@@ -435,17 +459,20 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* \sa VectorwiseOp::replicate(Index), DenseBase::replicate(), class Replicate
*/
// NOTE implemented here because of sunstudio's compilation errors
template<int Factor> const Replicate<ExpressionType,(IsVertical?Factor:1),(IsHorizontal?Factor:1)>
// isVertical*Factor+isHorizontal instead of (isVertical?Factor:1) to handle CUDA bug with ternary operator
template<int Factor> const Replicate<ExpressionType,isVertical*Factor+isHorizontal,isHorizontal*Factor+isVertical>
EIGEN_DEVICE_FUNC
replicate(Index factor = Factor) const
{
return Replicate<ExpressionType,Direction==Vertical?Factor:1,Direction==Horizontal?Factor:1>
(_expression(),Direction==Vertical?factor:1,Direction==Horizontal?factor:1);
return Replicate<ExpressionType,(isVertical?Factor:1),(isHorizontal?Factor:1)>
(_expression(),isVertical?factor:1,isHorizontal?factor:1);
}
/////////// Artithmetic operators ///////////
/** Copies the vector \a other to each subvector of \c *this */
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
ExpressionType& operator=(const DenseBase<OtherDerived>& other)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
@@ -456,6 +483,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
/** Adds the vector \a other to each subvector of \c *this */
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
ExpressionType& operator+=(const DenseBase<OtherDerived>& other)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
@@ -465,6 +493,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
/** Substracts the vector \a other to each subvector of \c *this */
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
ExpressionType& operator-=(const DenseBase<OtherDerived>& other)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
@@ -474,6 +503,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
/** Multiples each subvector of \c *this by the vector \a other */
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
ExpressionType& operator*=(const DenseBase<OtherDerived>& other)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
@@ -485,6 +515,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
/** Divides each subvector of \c *this by the vector \a other */
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
ExpressionType& operator/=(const DenseBase<OtherDerived>& other)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
@@ -495,7 +526,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
}
/** Returns the expression of the sum of the vector \a other to each subvector of \c *this */
template<typename OtherDerived> EIGEN_STRONG_INLINE
template<typename OtherDerived> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
CwiseBinaryOp<internal::scalar_sum_op<Scalar>,
const ExpressionTypeNestedCleaned,
const typename ExtendedType<OtherDerived>::Type>
@@ -508,6 +539,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
/** Returns the expression of the difference between each subvector of \c *this and the vector \a other */
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
CwiseBinaryOp<internal::scalar_difference_op<Scalar>,
const ExpressionTypeNestedCleaned,
const typename ExtendedType<OtherDerived>::Type>
@@ -520,10 +552,11 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
/** Returns the expression where each subvector is the product of the vector \a other
* by the corresponding subvector of \c *this */
template<typename OtherDerived> EIGEN_STRONG_INLINE
template<typename OtherDerived> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
CwiseBinaryOp<internal::scalar_product_op<Scalar>,
const ExpressionTypeNestedCleaned,
const typename ExtendedType<OtherDerived>::Type>
EIGEN_DEVICE_FUNC
operator*(const DenseBase<OtherDerived>& other) const
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
@@ -535,6 +568,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
/** Returns the expression where each subvector is the quotient of the corresponding
* subvector of \c *this by the vector \a other */
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
CwiseBinaryOp<internal::scalar_quotient_op<Scalar>,
const ExpressionTypeNestedCleaned,
const typename ExtendedType<OtherDerived>::Type>
@@ -550,6 +584,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* The referenced matrix is \b not modified.
* \sa MatrixBase::normalized(), normalize()
*/
EIGEN_DEVICE_FUNC
CwiseBinaryOp<internal::scalar_quotient_op<Scalar>,
const ExpressionTypeNestedCleaned,
const typename OppositeExtendedType<typename ReturnType<internal::member_norm,RealScalar>::Type>::Type>
@@ -559,10 +594,12 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
/** Normalize in-place each row or columns of the referenced matrix.
* \sa MatrixBase::normalize(), normalized()
*/
void normalize() {
EIGEN_DEVICE_FUNC void normalize() {
m_matrix = this->normalized();
}
EIGEN_DEVICE_FUNC inline void reverseInPlace();
/////////// Geometry module ///////////
typedef Homogeneous<ExpressionType,Direction> HomogeneousReturnType;
@@ -570,6 +607,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
typedef typename ExpressionType::PlainObject CrossReturnType;
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
const CrossReturnType cross(const MatrixBase<OtherDerived>& other) const;
enum {
@@ -600,19 +638,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
ExpressionTypeNested m_matrix;
};
/** \returns a VectorwiseOp wrapper of *this providing additional partial reduction operations
*
* Example: \include MatrixBase_colwise.cpp
* Output: \verbinclude MatrixBase_colwise.out
*
* \sa rowwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
*/
template<typename Derived>
inline const typename DenseBase<Derived>::ConstColwiseReturnType
DenseBase<Derived>::colwise() const
{
return ConstColwiseReturnType(derived());
}
//const colwise moved to DenseBase.h due to CUDA compiler bug
/** \returns a writable VectorwiseOp wrapper of *this providing additional partial reduction operations
*
@@ -625,19 +652,8 @@ DenseBase<Derived>::colwise()
return ColwiseReturnType(derived());
}
/** \returns a VectorwiseOp wrapper of *this providing additional partial reduction operations
*
* Example: \include MatrixBase_rowwise.cpp
* Output: \verbinclude MatrixBase_rowwise.out
*
* \sa colwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
*/
template<typename Derived>
inline const typename DenseBase<Derived>::ConstRowwiseReturnType
DenseBase<Derived>::rowwise() const
{
return ConstRowwiseReturnType(derived());
}
//const rowwise moved to DenseBase.h due to CUDA compiler bug
/** \returns a writable VectorwiseOp wrapper of *this providing additional partial reduction operations
*

View File

@@ -22,6 +22,7 @@ struct visitor_impl
row = (UnrollCount-1) % Derived::RowsAtCompileTime
};
EIGEN_DEVICE_FUNC
static inline void run(const Derived &mat, Visitor& visitor)
{
visitor_impl<Visitor, Derived, UnrollCount-1>::run(mat, visitor);
@@ -32,6 +33,7 @@ struct visitor_impl
template<typename Visitor, typename Derived>
struct visitor_impl<Visitor, Derived, 1>
{
EIGEN_DEVICE_FUNC
static inline void run(const Derived &mat, Visitor& visitor)
{
return visitor.init(mat.coeff(0, 0), 0, 0);
@@ -41,6 +43,7 @@ struct visitor_impl<Visitor, Derived, 1>
template<typename Visitor, typename Derived>
struct visitor_impl<Visitor, Derived, Dynamic>
{
EIGEN_DEVICE_FUNC
static inline void run(const Derived& mat, Visitor& visitor)
{
visitor.init(mat.coeff(0,0), 0, 0);
@@ -57,6 +60,7 @@ template<typename XprType>
class visitor_evaluator
{
public:
EIGEN_DEVICE_FUNC
explicit visitor_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) {}
typedef typename XprType::Scalar Scalar;
@@ -67,15 +71,15 @@ public:
CoeffReadCost = internal::evaluator<XprType>::CoeffReadCost
};
Index rows() const { return m_xpr.rows(); }
Index cols() const { return m_xpr.cols(); }
Index size() const { return m_xpr.size(); }
EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); }
EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); }
EIGEN_DEVICE_FUNC Index size() const { return m_xpr.size(); }
CoeffReturnType coeff(Index row, Index col) const
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
{ return m_evaluator.coeff(row, col); }
protected:
typename internal::evaluator<XprType>::nestedType m_evaluator;
internal::evaluator<XprType> m_evaluator;
const XprType &m_xpr;
};
} // end namespace internal
@@ -99,6 +103,7 @@ protected:
*/
template<typename Derived>
template<typename Visitor>
EIGEN_DEVICE_FUNC
void DenseBase<Derived>::visit(Visitor& visitor) const
{
typedef typename internal::visitor_evaluator<Derived> ThisEvaluator;
@@ -125,6 +130,7 @@ struct coeff_visitor
typedef typename Derived::Scalar Scalar;
Index row, col;
Scalar res;
EIGEN_DEVICE_FUNC
inline void init(const Scalar& value, Index i, Index j)
{
res = value;
@@ -142,6 +148,7 @@ template <typename Derived>
struct min_coeff_visitor : coeff_visitor<Derived>
{
typedef typename Derived::Scalar Scalar;
EIGEN_DEVICE_FUNC
void operator() (const Scalar& value, Index i, Index j)
{
if(value < this->res)
@@ -168,7 +175,8 @@ struct functor_traits<min_coeff_visitor<Scalar> > {
template <typename Derived>
struct max_coeff_visitor : coeff_visitor<Derived>
{
typedef typename Derived::Scalar Scalar;
typedef typename Derived::Scalar Scalar;
EIGEN_DEVICE_FUNC
void operator() (const Scalar& value, Index i, Index j)
{
if(value > this->res)
@@ -196,6 +204,7 @@ struct functor_traits<max_coeff_visitor<Scalar> > {
*/
template<typename Derived>
template<typename IndexType>
EIGEN_DEVICE_FUNC
typename internal::traits<Derived>::Scalar
DenseBase<Derived>::minCoeff(IndexType* rowId, IndexType* colId) const
{
@@ -213,6 +222,7 @@ DenseBase<Derived>::minCoeff(IndexType* rowId, IndexType* colId) const
*/
template<typename Derived>
template<typename IndexType>
EIGEN_DEVICE_FUNC
typename internal::traits<Derived>::Scalar
DenseBase<Derived>::minCoeff(IndexType* index) const
{
@@ -230,6 +240,7 @@ DenseBase<Derived>::minCoeff(IndexType* index) const
*/
template<typename Derived>
template<typename IndexType>
EIGEN_DEVICE_FUNC
typename internal::traits<Derived>::Scalar
DenseBase<Derived>::maxCoeff(IndexType* rowPtr, IndexType* colPtr) const
{
@@ -247,6 +258,7 @@ DenseBase<Derived>::maxCoeff(IndexType* rowPtr, IndexType* colPtr) const
*/
template<typename Derived>
template<typename IndexType>
EIGEN_DEVICE_FUNC
typename internal::traits<Derived>::Scalar
DenseBase<Derived>::maxCoeff(IndexType* index) const
{

View File

@@ -45,7 +45,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
};
};
template<> struct unpacket_traits<Packet4cf> { typedef std::complex<float> type; enum {size=4}; typedef Packet2cf half; };
template<> struct unpacket_traits<Packet4cf> { typedef std::complex<float> type; enum {size=4, alignment=Aligned32}; typedef Packet2cf half; };
template<> EIGEN_STRONG_INLINE Packet4cf padd<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_add_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet4cf psub<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_sub_ps(a.v,b.v)); }
@@ -267,7 +267,7 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
};
};
template<> struct unpacket_traits<Packet2cd> { typedef std::complex<double> type; enum {size=2}; typedef Packet1cd half; };
template<> struct unpacket_traits<Packet2cd> { typedef std::complex<double> type; enum {size=2, alignment=Aligned32}; typedef Packet1cd half; };
template<> EIGEN_STRONG_INLINE Packet2cd padd<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_add_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cd psub<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_sub_pd(a.v,b.v)); }

View File

@@ -271,6 +271,86 @@ pexp<Packet8f>(const Packet8f& _x) {
return pmax(pmul(y, _mm256_castsi256_ps(emm0)), _x);
}
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d
pexp<Packet4d>(const Packet4d& _x) {
Packet4d x = _x;
_EIGEN_DECLARE_CONST_Packet4d(1, 1.0);
_EIGEN_DECLARE_CONST_Packet4d(2, 2.0);
_EIGEN_DECLARE_CONST_Packet4d(half, 0.5);
_EIGEN_DECLARE_CONST_Packet4d(exp_hi, 709.437);
_EIGEN_DECLARE_CONST_Packet4d(exp_lo, -709.436139303);
_EIGEN_DECLARE_CONST_Packet4d(cephes_LOG2EF, 1.4426950408889634073599);
_EIGEN_DECLARE_CONST_Packet4d(cephes_exp_p0, 1.26177193074810590878e-4);
_EIGEN_DECLARE_CONST_Packet4d(cephes_exp_p1, 3.02994407707441961300e-2);
_EIGEN_DECLARE_CONST_Packet4d(cephes_exp_p2, 9.99999999999999999910e-1);
_EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q0, 3.00198505138664455042e-6);
_EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q1, 2.52448340349684104192e-3);
_EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q2, 2.27265548208155028766e-1);
_EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q3, 2.00000000000000000009e0);
_EIGEN_DECLARE_CONST_Packet4d(cephes_exp_C1, 0.693145751953125);
_EIGEN_DECLARE_CONST_Packet4d(cephes_exp_C2, 1.42860682030941723212e-6);
_EIGEN_DECLARE_CONST_Packet4i(1023, 1023);
Packet4d tmp, fx;
// clamp x
x = pmax(pmin(x, p4d_exp_hi), p4d_exp_lo);
// Express exp(x) as exp(g + n*log(2)).
fx = pmadd(p4d_cephes_LOG2EF, x, p4d_half);
// Get the integer modulus of log(2), i.e. the "n" described above.
fx = _mm256_floor_pd(fx);
// Get the remainder modulo log(2), i.e. the "g" described above. Subtract
// n*log(2) out in two steps, i.e. n*C1 + n*C2, C1+C2=log2 to get the last
// digits right.
tmp = pmul(fx, p4d_cephes_exp_C1);
Packet4d z = pmul(fx, p4d_cephes_exp_C2);
x = psub(x, tmp);
x = psub(x, z);
Packet4d x2 = pmul(x, x);
// Evaluate the numerator polynomial of the rational interpolant.
Packet4d px = p4d_cephes_exp_p0;
px = pmadd(px, x2, p4d_cephes_exp_p1);
px = pmadd(px, x2, p4d_cephes_exp_p2);
px = pmul(px, x);
// Evaluate the denominator polynomial of the rational interpolant.
Packet4d qx = p4d_cephes_exp_q0;
qx = pmadd(qx, x2, p4d_cephes_exp_q1);
qx = pmadd(qx, x2, p4d_cephes_exp_q2);
qx = pmadd(qx, x2, p4d_cephes_exp_q3);
// I don't really get this bit, copied from the SSE2 routines, so...
// TODO(gonnet): Figure out what is going on here, perhaps find a better
// rational interpolant?
x = _mm256_div_pd(px, psub(qx, px));
x = pmadd(p4d_2, x, p4d_1);
// Build e=2^n by constructing the exponents in a 128-bit vector and
// shifting them to where they belong in double-precision values.
__m128i emm0 = _mm256_cvtpd_epi32(fx);
emm0 = _mm_add_epi32(emm0, p4i_1023);
emm0 = _mm_shuffle_epi32(emm0, _MM_SHUFFLE(3, 1, 2, 0));
__m128i lo = _mm_slli_epi64(emm0, 52);
__m128i hi = _mm_slli_epi64(_mm_srli_epi64(emm0, 32), 52);
__m256i e = _mm256_insertf128_si256(_mm256_setzero_si256(), lo, 0);
e = _mm256_insertf128_si256(e, hi, 1);
// Construct the result 2^n * exp(g) = e * x. The max is used to catch
// non-finite values in the input.
return pmax(pmul(x, Packet4d(e)), _x);
}
// Functions for sqrt.
// The EIGEN_FAST_MATH version uses the _mm_rsqrt_ps approximation and one step
// of Newton's method, at a cost of 1-2 bits of precision as opposed to the
@@ -300,15 +380,59 @@ psqrt<Packet8f>(const Packet8f& _x) {
return pmul(_x, x);
}
#else
template <>
EIGEN_STRONG_INLINE Packet8f psqrt<Packet8f>(const Packet8f& x) {
template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet8f psqrt<Packet8f>(const Packet8f& x) {
return _mm256_sqrt_ps(x);
}
#endif
template <>
EIGEN_STRONG_INLINE Packet4d psqrt<Packet4d>(const Packet4d& x) {
template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet4d psqrt<Packet4d>(const Packet4d& x) {
return _mm256_sqrt_pd(x);
}
#if EIGEN_FAST_MATH
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet8f prsqrt<Packet8f>(const Packet8f& _x) {
_EIGEN_DECLARE_CONST_Packet8f_FROM_INT(inf, 0x7f800000);
_EIGEN_DECLARE_CONST_Packet8f_FROM_INT(nan, 0x7fc00000);
_EIGEN_DECLARE_CONST_Packet8f(one_point_five, 1.5f);
_EIGEN_DECLARE_CONST_Packet8f(minus_half, -0.5f);
_EIGEN_DECLARE_CONST_Packet8f_FROM_INT(flt_min, 0x00800000);
Packet8f neg_half = pmul(_x, p8f_minus_half);
// select only the inverse sqrt of positive normal inputs (denormals are
// flushed to zero and cause infs as well).
Packet8f le_zero_mask = _mm256_cmp_ps(_x, p8f_flt_min, _CMP_LT_OQ);
Packet8f x = _mm256_andnot_ps(le_zero_mask, _mm256_rsqrt_ps(_x));
// Fill in NaNs and Infs for the negative/zero entries.
Packet8f neg_mask = _mm256_cmp_ps(_x, _mm256_setzero_ps(), _CMP_LT_OQ);
Packet8f zero_mask = _mm256_andnot_ps(neg_mask, le_zero_mask);
Packet8f infs_and_nans = _mm256_or_ps(_mm256_and_ps(neg_mask, p8f_nan),
_mm256_and_ps(zero_mask, p8f_inf));
// Do a single step of Newton's iteration.
x = pmul(x, pmadd(neg_half, pmul(x, x), p8f_one_point_five));
// Insert NaNs and Infs in all the right places.
return _mm256_or_ps(x, infs_and_nans);
}
#else
template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet8f prsqrt<Packet8f>(const Packet8f& x) {
_EIGEN_DECLARE_CONST_Packet8f(one, 1.0f);
return _mm256_div_ps(p8f_one, _mm256_sqrt_ps(x));
}
#endif
template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet4d prsqrt<Packet4d>(const Packet4d& x) {
_EIGEN_DECLARE_CONST_Packet4d(one, 1.0);
return _mm256_div_pd(p4d_one, _mm256_sqrt_pd(x));
}
} // end namespace internal

View File

@@ -60,11 +60,12 @@ template<> struct packet_traits<float> : default_packet_traits
HasHalfPacket = 1,
HasDiv = 1,
HasSin = 1,
HasSin = EIGEN_FAST_MATH,
HasCos = 0,
HasLog = 1,
HasExp = 1,
HasSqrt = 1,
HasRsqrt = 1,
HasBlend = 1
};
};
@@ -79,8 +80,9 @@ template<> struct packet_traits<double> : default_packet_traits
HasHalfPacket = 1,
HasDiv = 1,
HasExp = 0,
HasExp = 1,
HasSqrt = 1,
HasRsqrt = 1,
HasBlend = 1
};
};
@@ -98,9 +100,9 @@ template<> struct packet_traits<int> : default_packet_traits
};
*/
template<> struct unpacket_traits<Packet8f> { typedef float type; typedef Packet4f half; enum {size=8}; };
template<> struct unpacket_traits<Packet4d> { typedef double type; typedef Packet2d half; enum {size=4}; };
template<> struct unpacket_traits<Packet8i> { typedef int type; typedef Packet4i half; enum {size=8}; };
template<> struct unpacket_traits<Packet8f> { typedef float type; typedef Packet4f half; enum {size=8, alignment=Aligned32}; };
template<> struct unpacket_traits<Packet4d> { typedef double type; typedef Packet2d half; enum {size=4, alignment=Aligned32}; };
template<> struct unpacket_traits<Packet8i> { typedef int type; typedef Packet4i half; enum {size=8, alignment=Aligned32}; };
template<> EIGEN_STRONG_INLINE Packet8f pset1<Packet8f>(const float& from) { return _mm256_set1_ps(from); }
template<> EIGEN_STRONG_INLINE Packet4d pset1<Packet4d>(const double& from) { return _mm256_set1_pd(from); }
@@ -109,8 +111,8 @@ template<> EIGEN_STRONG_INLINE Packet8i pset1<Packet8i>(const int& from) { re
template<> EIGEN_STRONG_INLINE Packet8f pload1<Packet8f>(const float* from) { return _mm256_broadcast_ss(from); }
template<> EIGEN_STRONG_INLINE Packet4d pload1<Packet4d>(const double* from) { return _mm256_broadcast_sd(from); }
template<> EIGEN_STRONG_INLINE Packet8f plset<float>(const float& a) { return _mm256_add_ps(_mm256_set1_ps(a), _mm256_set_ps(7.0,6.0,5.0,4.0,3.0,2.0,1.0,0.0)); }
template<> EIGEN_STRONG_INLINE Packet4d plset<double>(const double& a) { return _mm256_add_pd(_mm256_set1_pd(a), _mm256_set_pd(3.0,2.0,1.0,0.0)); }
template<> EIGEN_STRONG_INLINE Packet8f plset<Packet8f>(const float& a) { return _mm256_add_ps(_mm256_set1_ps(a), _mm256_set_ps(7.0,6.0,5.0,4.0,3.0,2.0,1.0,0.0)); }
template<> EIGEN_STRONG_INLINE Packet4d plset<Packet4d>(const double& a) { return _mm256_add_pd(_mm256_set1_pd(a), _mm256_set_pd(3.0,2.0,1.0,0.0)); }
template<> EIGEN_STRONG_INLINE Packet8f padd<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_add_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet4d padd<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_add_pd(a,b); }
@@ -432,26 +434,30 @@ struct palign_impl<Offset,Packet8f>
if (Offset==1)
{
first = _mm256_blend_ps(first, second, 1);
Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(0,3,2,1));
first = _mm256_blend_ps(tmp, _mm256_permute2f128_ps (tmp, tmp, 1), 0x88);
Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(0,3,2,1));
Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
first = _mm256_blend_ps(tmp1, tmp2, 0x88);
}
else if (Offset==2)
{
first = _mm256_blend_ps(first, second, 3);
Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(1,0,3,2));
first = _mm256_blend_ps(tmp, _mm256_permute2f128_ps (tmp, tmp, 1), 0xcc);
Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(1,0,3,2));
Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
first = _mm256_blend_ps(tmp1, tmp2, 0xcc);
}
else if (Offset==3)
{
first = _mm256_blend_ps(first, second, 7);
Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(2,1,0,3));
first = _mm256_blend_ps(tmp, _mm256_permute2f128_ps (tmp, tmp, 1), 0xee);
Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(2,1,0,3));
Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
first = _mm256_blend_ps(tmp1, tmp2, 0xee);
}
else if (Offset==4)
{
first = _mm256_blend_ps(first, second, 15);
Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(3,2,1,0));
first = _mm256_permute_ps(_mm256_permute2f128_ps (tmp, tmp, 1), _MM_SHUFFLE(3,2,1,0));
Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(3,2,1,0));
Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
first = _mm256_permute_ps(tmp2, _MM_SHUFFLE(3,2,1,0));
}
else if (Offset==5)
{

View File

@@ -0,0 +1,51 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_TYPE_CASTING_AVX_H
#define EIGEN_TYPE_CASTING_AVX_H
namespace Eigen {
namespace internal {
// For now we use SSE to handle integers, so we can't use AVX instructions to cast
// from int to float
template <>
struct type_casting_traits<float, int> {
enum {
VectorizedCast = 0,
SrcCoeffRatio = 1,
TgtCoeffRatio = 1
};
};
template <>
struct type_casting_traits<int, float> {
enum {
VectorizedCast = 0,
SrcCoeffRatio = 1,
TgtCoeffRatio = 1
};
};
template<> EIGEN_STRONG_INLINE Packet8i pcast<Packet8f, Packet8i>(const Packet8f& a) {
return _mm256_cvtps_epi32(a);
}
template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8i, Packet8f>(const Packet8i& a) {
return _mm256_cvtepi32_ps(a);
}
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_TYPE_CASTING_AVX_H

View File

@@ -53,7 +53,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
};
};
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; typedef Packet2cf half; };
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
{
@@ -275,7 +275,7 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
};
};
template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1}; typedef Packet1cd half; };
template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
@@ -408,7 +408,7 @@ template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, con
// TODO optimize it for AltiVec
Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
Packet2d s = vec_madd(b.v, b.v, p2d_ZERO_);
return Packet1cd(pdiv(res.v, vec_add(s,vec_perm(s, s, p16uc_COMPLEX32_REV))));
return Packet1cd(pdiv(res.v, vec_add(s,vec_perm(s, s, p16uc_REVERSE64))));
}
EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)

View File

@@ -0,0 +1,290 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2007 Julien Pommier
// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
/* The sin, cos, exp, and log functions of this file come from
* Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
*/
#ifndef EIGEN_MATH_FUNCTIONS_ALTIVEC_H
#define EIGEN_MATH_FUNCTIONS_ALTIVEC_H
namespace Eigen {
namespace internal {
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet4f plog<Packet4f>(const Packet4f& _x)
{
Packet4f x = _x;
_EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
_EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
_EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
_EIGEN_DECLARE_CONST_Packet4i(23, 23);
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask, ~0x7f800000);
/* the smallest non denormalized float number */
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos, 0x00800000);
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_inf, 0xff800000); // -1.f/0.f
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_nan, 0xffffffff);
/* natural logarithm computed for 4 simultaneous float
return NaN for x <= 0
*/
_EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292E-2f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, - 1.1514610310E-1f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740E-1f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, - 1.2420140846E-1f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, + 1.4249322787E-1f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, - 1.6668057665E-1f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, + 2.0000714765E-1f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, - 2.4999993993E-1f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, + 3.3333331174E-1f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375f);
Packet4i emm0;
/* isvalid_mask is 0 if x < 0 or x is NaN. */
Packet4ui isvalid_mask = reinterpret_cast<Packet4ui>(vec_cmpge(x, p4f_ZERO));
Packet4ui iszero_mask = reinterpret_cast<Packet4ui>(vec_cmpeq(x, p4f_ZERO));
x = pmax(x, p4f_min_norm_pos); /* cut off denormalized stuff */
emm0 = vec_sr(reinterpret_cast<Packet4i>(x),
reinterpret_cast<Packet4ui>(p4i_23));
/* keep only the fractional part */
x = pand(x, p4f_inv_mant_mask);
x = por(x, p4f_half);
emm0 = psub(emm0, p4i_0x7f);
Packet4f e = padd(vec_ctf(emm0, 0), p4f_1);
/* part2:
if( x < SQRTHF ) {
e -= 1;
x = x + x - 1.0;
} else { x = x - 1.0; }
*/
Packet4f mask = reinterpret_cast<Packet4f>(vec_cmplt(x, p4f_cephes_SQRTHF));
Packet4f tmp = pand(x, mask);
x = psub(x, p4f_1);
e = psub(e, pand(p4f_1, mask));
x = padd(x, tmp);
Packet4f x2 = pmul(x,x);
Packet4f x3 = pmul(x2,x);
Packet4f y, y1, y2;
y = pmadd(p4f_cephes_log_p0, x, p4f_cephes_log_p1);
y1 = pmadd(p4f_cephes_log_p3, x, p4f_cephes_log_p4);
y2 = pmadd(p4f_cephes_log_p6, x, p4f_cephes_log_p7);
y = pmadd(y , x, p4f_cephes_log_p2);
y1 = pmadd(y1, x, p4f_cephes_log_p5);
y2 = pmadd(y2, x, p4f_cephes_log_p8);
y = pmadd(y, x3, y1);
y = pmadd(y, x3, y2);
y = pmul(y, x3);
y1 = pmul(e, p4f_cephes_log_q1);
tmp = pmul(x2, p4f_half);
y = padd(y, y1);
x = psub(x, tmp);
y2 = pmul(e, p4f_cephes_log_q2);
x = padd(x, y);
x = padd(x, y2);
// negative arg will be NAN, 0 will be -INF
x = vec_sel(x, p4f_minus_inf, iszero_mask);
x = vec_sel(p4f_minus_nan, x, isvalid_mask);
return x;
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet4f pexp<Packet4f>(const Packet4f& _x)
{
Packet4f x = _x;
_EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
_EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
_EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
_EIGEN_DECLARE_CONST_Packet4i(23, 23);
_EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647950f);
_EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f);
Packet4f tmp, fx;
Packet4i emm0;
// clamp x
x = vec_max(vec_min(x, p4f_exp_hi), p4f_exp_lo);
/* express exp(x) as exp(g + n*log(2)) */
fx = pmadd(x, p4f_cephes_LOG2EF, p4f_half);
fx = vec_floor(fx);
tmp = pmul(fx, p4f_cephes_exp_C1);
Packet4f z = pmul(fx, p4f_cephes_exp_C2);
x = psub(x, tmp);
x = psub(x, z);
z = pmul(x,x);
Packet4f y = p4f_cephes_exp_p0;
y = pmadd(y, x, p4f_cephes_exp_p1);
y = pmadd(y, x, p4f_cephes_exp_p2);
y = pmadd(y, x, p4f_cephes_exp_p3);
y = pmadd(y, x, p4f_cephes_exp_p4);
y = pmadd(y, x, p4f_cephes_exp_p5);
y = pmadd(y, z, x);
y = padd(y, p4f_1);
// build 2^n
emm0 = vec_cts(fx, 0);
emm0 = vec_add(emm0, p4i_0x7f);
emm0 = vec_sl(emm0, reinterpret_cast<Packet4ui>(p4i_23));
// Altivec's max & min operators just drop silent NaNs. Check NaNs in
// inputs and return them unmodified.
Packet4ui isnumber_mask = reinterpret_cast<Packet4ui>(vec_cmpeq(_x, _x));
return vec_sel(_x, pmax(pmul(y, reinterpret_cast<Packet4f>(emm0)), _x),
isnumber_mask);
}
#ifdef __VSX__
// VSX support varies between different compilers and even different
// versions of the same compiler. For gcc version >= 4.9.3, we can use
// vec_cts to efficiently convert Packet2d to Packet2l. Otherwise, use
// a slow version that works with older compilers.
static inline Packet2l ConvertToPacket2l(const Packet2d& x) {
#if EIGEN_GNUC_AT_LEAST(5, 0) || \
(EIGEN_GNUC_AT(4, 9) && __GNUC_PATCHLEVEL__ >= 3)
return vec_cts(x, 0); // TODO: check clang version.
#else
double tmp[2];
memcpy(tmp, &x, sizeof(tmp));
Packet2l l = { static_cast<long long>(tmp[0]),
static_cast<long long>(tmp[1]) };
return l;
#endif
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet2d pexp<Packet2d>(const Packet2d& _x)
{
Packet2d x = _x;
_EIGEN_DECLARE_CONST_Packet2d(1 , 1.0);
_EIGEN_DECLARE_CONST_Packet2d(2 , 2.0);
_EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
_EIGEN_DECLARE_CONST_Packet2d(exp_hi, 709.437);
_EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303);
_EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
Packet2d tmp, fx;
Packet2l emm0;
// clamp x
x = pmax(pmin(x, p2d_exp_hi), p2d_exp_lo);
/* express exp(x) as exp(g + n*log(2)) */
fx = pmadd(p2d_cephes_LOG2EF, x, p2d_half);
fx = vec_floor(fx);
tmp = pmul(fx, p2d_cephes_exp_C1);
Packet2d z = pmul(fx, p2d_cephes_exp_C2);
x = psub(x, tmp);
x = psub(x, z);
Packet2d x2 = pmul(x,x);
Packet2d px = p2d_cephes_exp_p0;
px = pmadd(px, x2, p2d_cephes_exp_p1);
px = pmadd(px, x2, p2d_cephes_exp_p2);
px = pmul (px, x);
Packet2d qx = p2d_cephes_exp_q0;
qx = pmadd(qx, x2, p2d_cephes_exp_q1);
qx = pmadd(qx, x2, p2d_cephes_exp_q2);
qx = pmadd(qx, x2, p2d_cephes_exp_q3);
x = pdiv(px,psub(qx,px));
x = pmadd(p2d_2,x,p2d_1);
// build 2^n
emm0 = ConvertToPacket2l(fx);
#ifdef __POWER8_VECTOR__
static const Packet2l p2l_1023 = { 1023, 1023 };
static const Packet2ul p2ul_52 = { 52, 52 };
emm0 = vec_add(emm0, p2l_1023);
emm0 = vec_sl(emm0, p2ul_52);
#else
// Code is a bit complex for POWER7. There is actually a
// vec_xxsldi intrinsic but it is not supported by some gcc versions.
// So we shift (52-32) bits and do a word swap with zeros.
_EIGEN_DECLARE_CONST_Packet4i(1023, 1023);
_EIGEN_DECLARE_CONST_Packet4i(20, 20); // 52 - 32
Packet4i emm04i = reinterpret_cast<Packet4i>(emm0);
emm04i = vec_add(emm04i, p4i_1023);
emm04i = vec_sl(emm04i, reinterpret_cast<Packet4ui>(p4i_20));
static const Packet16uc perm = {
0x14, 0x15, 0x16, 0x17, 0x00, 0x01, 0x02, 0x03,
0x1c, 0x1d, 0x1e, 0x1f, 0x08, 0x09, 0x0a, 0x0b };
#ifdef _BIG_ENDIAN
emm0 = reinterpret_cast<Packet2l>(vec_perm(p4i_ZERO, emm04i, perm));
#else
emm0 = reinterpret_cast<Packet2l>(vec_perm(emm04i, p4i_ZERO, perm));
#endif
#endif
// Altivec's max & min operators just drop silent NaNs. Check NaNs in
// inputs and return them unmodified.
Packet2ul isnumber_mask = reinterpret_cast<Packet2ul>(vec_cmpeq(_x, _x));
return vec_sel(_x, pmax(pmul(x, reinterpret_cast<Packet2d>(emm0)), _x),
isnumber_mask);
}
#endif
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_MATH_FUNCTIONS_ALTIVEC_H

View File

@@ -59,6 +59,9 @@ typedef __vector unsigned char Packet16uc;
#define _EIGEN_DECLARE_CONST_Packet2l(NAME,X) \
Packet2l p2l_##NAME = pset1<Packet2l>(X)
#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
const Packet4f p4f_##NAME = reinterpret_cast<Packet4f>(pset1<Packet4i>(X))
#define DST_CHAN 1
#define DST_CTRL(size, count, stride) (((size) << 24) | ((count) << 16) | (stride))
@@ -66,10 +69,12 @@ typedef __vector unsigned char Packet16uc;
// These constants are endian-agnostic
static _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0); //{ 0.0, 0.0, 0.0, 0.0}
static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0); //{ 0, 0, 0, 0,}
#ifndef __VSX__
static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE,1); //{ 1, 1, 1, 1}
static Packet4f p4f_ONE = vec_ctf(p4i_ONE, 0); //{ 1.0, 1.0, 1.0, 1.0}
#endif
static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS16,-16); //{ -16, -16, -16, -16}
static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS1,-1); //{ -1, -1, -1, -1}
static Packet4f p4f_ONE = vec_ctf(p4i_ONE, 0); //{ 1.0, 1.0, 1.0, 1.0}
static Packet4f p4f_ZERO_ = (Packet4f) vec_sl((Packet4ui)p4i_MINUS1, (Packet4ui)p4i_MINUS1); //{ 0x80000000, 0x80000000, 0x80000000, 0x80000000}
static Packet4f p4f_COUNTDOWN = { 0.0, 1.0, 2.0, 3.0 };
@@ -130,8 +135,8 @@ template<> struct packet_traits<float> : default_packet_traits
HasDiv = 1,
HasSin = 0,
HasCos = 0,
HasLog = 0,
HasExp = 0,
HasLog = 1,
HasExp = 1,
HasSqrt = 0
};
};
@@ -148,8 +153,8 @@ template<> struct packet_traits<int> : default_packet_traits
};
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}; typedef Packet4f half; };
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; typedef Packet4i half; };
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; };
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
inline std::ostream & operator <<(std::ostream & s, const Packet16uc & v)
{
@@ -289,8 +294,8 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const
to[3*stride] = ai[3];
}
template<> EIGEN_STRONG_INLINE Packet4f plset<float>(const float& a) { return vec_add(pset1<Packet4f>(a), p4f_COUNTDOWN); }
template<> EIGEN_STRONG_INLINE Packet4i plset<int>(const int& a) { return vec_add(pset1<Packet4i>(a), p4i_COUNTDOWN); }
template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) { return vec_add(pset1<Packet4f>(a), p4f_COUNTDOWN); }
template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a) { return vec_add(pset1<Packet4i>(a), p4i_COUNTDOWN); }
template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_add(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_add(a,b); }
@@ -751,12 +756,12 @@ template<> struct packet_traits<double> : default_packet_traits
HasHalfPacket = 0,
HasDiv = 1,
HasExp = 0,
HasExp = 1,
HasSqrt = 0
};
};
template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2}; typedef Packet2d half; };
template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
inline std::ostream & operator <<(std::ostream & s, const Packet2d & v)
@@ -807,7 +812,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to,
to[0*stride] = af[0];
to[1*stride] = af[1];
}
template<> EIGEN_STRONG_INLINE Packet2d plset<double>(const double& a) { return vec_add(pset1<Packet2d>(a), p2d_COUNTDOWN); }
template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) { return vec_add(pset1<Packet2d>(a), p2d_COUNTDOWN); }
template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_add(a,b); }

View File

@@ -1,5 +1,9 @@
ADD_SUBDIRECTORY(SSE)
ADD_SUBDIRECTORY(AltiVec)
ADD_SUBDIRECTORY(NEON)
ADD_SUBDIRECTORY(AVX)
ADD_SUBDIRECTORY(CUDA)
ADD_SUBDIRECTORY(Default)
ADD_SUBDIRECTORY(NEON)
ADD_SUBDIRECTORY(SSE)

View File

@@ -0,0 +1,6 @@
FILE(GLOB Eigen_Core_arch_CUDA_SRCS "*.h")
INSTALL(FILES
${Eigen_Core_arch_CUDA_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/CUDA COMPONENT Devel
)

View File

@@ -18,49 +18,49 @@ namespace internal {
// introduce conflicts between these packet_traits definitions and the ones
// we'll use on the host side (SSE, AVX, ...)
#if defined(__CUDACC__) && defined(EIGEN_USE_GPU)
template<> EIGEN_STRONG_INLINE
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
float4 plog<float4>(const float4& a)
{
return make_float4(logf(a.x), logf(a.y), logf(a.z), logf(a.w));
}
template<> EIGEN_STRONG_INLINE
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
double2 plog<double2>(const double2& a)
{
return make_double2(log(a.x), log(a.y));
}
template<> EIGEN_STRONG_INLINE
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
float4 pexp<float4>(const float4& a)
{
return make_float4(expf(a.x), expf(a.y), expf(a.z), expf(a.w));
}
template<> EIGEN_STRONG_INLINE
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
double2 pexp<double2>(const double2& a)
{
return make_double2(exp(a.x), exp(a.y));
}
template<> EIGEN_STRONG_INLINE
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
float4 psqrt<float4>(const float4& a)
{
return make_float4(sqrtf(a.x), sqrtf(a.y), sqrtf(a.z), sqrtf(a.w));
}
template<> EIGEN_STRONG_INLINE
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
double2 psqrt<double2>(const double2& a)
{
return make_double2(sqrt(a.x), sqrt(a.y));
}
template<> EIGEN_STRONG_INLINE
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
float4 prsqrt<float4>(const float4& a)
{
return make_float4(rsqrtf(a.x), rsqrtf(a.y), rsqrtf(a.z), rsqrtf(a.w));
}
template<> EIGEN_STRONG_INLINE
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
double2 prsqrt<double2>(const double2& a)
{
return make_double2(rsqrt(a.x), rsqrt(a.y));

View File

@@ -65,8 +65,8 @@ template<> struct packet_traits<double> : default_packet_traits
};
template<> struct unpacket_traits<float4> { typedef float type; enum {size=4}; typedef float4 half; };
template<> struct unpacket_traits<double2> { typedef double type; enum {size=2}; typedef double2 half; };
template<> struct unpacket_traits<float4> { typedef float type; enum {size=4, alignment=Aligned16}; typedef float4 half; };
template<> struct unpacket_traits<double2> { typedef double type; enum {size=2, alignment=Aligned16}; typedef double2 half; };
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pset1<float4>(const float& from) {
return make_float4(from, from, from, from);
@@ -76,10 +76,10 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pset1<double2>(const do
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 plset<float>(const float& a) {
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 plset<float4>(const float& a) {
return make_float4(a, a+1, a+2, a+3);
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 plset<double>(const double& a) {
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 plset<double2>(const double& a) {
return make_double2(a, a+1);
}
@@ -197,21 +197,21 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Unaligned>(cons
}
#endif
template<> EIGEN_DEVICE_FUNC inline float4 pgather<float, float4>(const float* from, int stride) {
template<> EIGEN_DEVICE_FUNC inline float4 pgather<float, float4>(const float* from, Index stride) {
return make_float4(from[0*stride], from[1*stride], from[2*stride], from[3*stride]);
}
template<> EIGEN_DEVICE_FUNC inline double2 pgather<double, double2>(const double* from, int stride) {
template<> EIGEN_DEVICE_FUNC inline double2 pgather<double, double2>(const double* from, Index stride) {
return make_double2(from[0*stride], from[1*stride]);
}
template<> EIGEN_DEVICE_FUNC inline void pscatter<float, float4>(float* to, const float4& from, int stride) {
template<> EIGEN_DEVICE_FUNC inline void pscatter<float, float4>(float* to, const float4& from, Index stride) {
to[stride*0] = from.x;
to[stride*1] = from.y;
to[stride*2] = from.z;
to[stride*3] = from.w;
}
template<> EIGEN_DEVICE_FUNC inline void pscatter<double, double2>(double* to, const double2& from, int stride) {
template<> EIGEN_DEVICE_FUNC inline void pscatter<double, double2>(double* to, const double2& from, Index stride) {
to[stride*0] = from.x;
to[stride*1] = from.y;
}
@@ -245,14 +245,14 @@ template<> EIGEN_DEVICE_FUNC inline double predux_min<double2>(const double2& a)
}
template<> EIGEN_DEVICE_FUNC inline float4 pabs<float4>(const float4& a) {
return make_float4(fabs(a.x), fabs(a.y), fabs(a.z), fabs(a.w));
return make_float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w));
}
template<> EIGEN_DEVICE_FUNC inline double2 pabs<double2>(const double2& a) {
return make_double2(abs(a.x), abs(a.y));
return make_double2(fabs(a.x), fabs(a.y));
}
template<> EIGEN_DEVICE_FUNC inline void
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<float4,4>& kernel) {
double tmp = kernel.packet[0].y;
kernel.packet[0].y = kernel.packet[1].x;
@@ -279,7 +279,7 @@ ptranspose(PacketBlock<float4,4>& kernel) {
kernel.packet[3].z = tmp;
}
template<> EIGEN_DEVICE_FUNC inline void
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<double2,2>& kernel) {
double tmp = kernel.packet[0].y;
kernel.packet[0].y = kernel.packet[1].x;

View File

@@ -48,7 +48,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
};
};
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; typedef Packet2cf half; };
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
{
@@ -114,7 +114,7 @@ template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<f
template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
{
Packet4f res;
Packet4f res = pset1<Packet4f>(0.f);
res = vsetq_lane_f32(std::real(from[0*stride]), res, 0);
res = vsetq_lane_f32(std::imag(from[0*stride]), res, 1);
res = vsetq_lane_f32(std::real(from[1*stride]), res, 2);
@@ -272,7 +272,7 @@ ptranspose(PacketBlock<Packet2cf,2>& kernel) {
}
//---------- double ----------
#if EIGEN_ARCH_ARM64
#if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
static uint64x2_t p2ul_CONJ_XOR = EIGEN_INIT_NEON_PACKET2(0x0, 0x8000000000000000);
@@ -306,7 +306,7 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
};
};
template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1}; typedef Packet1cd half; };
template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
template<> EIGEN_STRONG_INLINE Packet1cd pload<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
@@ -365,7 +365,7 @@ template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::c
template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride)
{
Packet2d res;
Packet2d res = pset1<Packet2d>(0.0);
res = vsetq_lane_f64(std::real(from[0*stride]), res, 0);
res = vsetq_lane_f64(std::imag(from[0*stride]), res, 1);
return Packet1cd(res);

View File

@@ -76,12 +76,12 @@ typedef uint32x4_t Packet4ui;
template<> struct packet_traits<float> : default_packet_traits
{
typedef Packet4f type;
typedef Packet2f half;
typedef Packet4f half; // Packet2f intrinsics not implemented yet
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size = 4,
HasHalfPacket=1,
HasHalfPacket=0, // Packet2f intrinsics not implemented yet
HasDiv = 1,
// FIXME check the Has*
@@ -95,12 +95,12 @@ template<> struct packet_traits<float> : default_packet_traits
template<> struct packet_traits<int> : default_packet_traits
{
typedef Packet4i type;
typedef Packet2i half;
typedef Packet4i half; // Packet2i intrinsics not implemented yet
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size=4,
HasHalfPacket=1
HasHalfPacket=0 // Packet2i intrinsics not implemented yet
// FIXME check the Has*
};
};
@@ -114,18 +114,18 @@ EIGEN_STRONG_INLINE void vst1q_f32(float* to, float32x4_t from) { ::vst1q
EIGEN_STRONG_INLINE void vst1_f32 (float* to, float32x2_t from) { ::vst1_f32 ((float32_t*)to,from); }
#endif
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}; typedef Packet4f half; };
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; typedef Packet4i half; };
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; };
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return vdupq_n_f32(from); }
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return vdupq_n_s32(from); }
template<> EIGEN_STRONG_INLINE Packet4f plset<float>(const float& a)
template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a)
{
Packet4f countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3);
return vaddq_f32(pset1<Packet4f>(a), countdown);
}
template<> EIGEN_STRONG_INLINE Packet4i plset<int>(const int& a)
template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a)
{
Packet4i countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3);
return vaddq_s32(pset1<Packet4i>(a), countdown);
@@ -252,7 +252,7 @@ template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& f
template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)
{
Packet4f res;
Packet4f res = pset1<Packet4f>(0.f);
res = vsetq_lane_f32(from[0*stride], res, 0);
res = vsetq_lane_f32(from[1*stride], res, 1);
res = vsetq_lane_f32(from[2*stride], res, 2);
@@ -261,7 +261,7 @@ template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const floa
}
template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride)
{
Packet4i res;
Packet4i res = pset1<Packet4i>(0);
res = vsetq_lane_s32(from[0*stride], res, 0);
res = vsetq_lane_s32(from[1*stride], res, 1);
res = vsetq_lane_s32(from[2*stride], res, 2);
@@ -309,6 +309,23 @@ template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) {
a_hi = vget_high_s32(a_r64);
return vcombine_s32(a_hi, a_lo);
}
template<size_t offset>
struct protate_impl<offset, Packet4f>
{
static Packet4f run(const Packet4f& a) {
return vextq_f32(a, a, offset);
}
};
template<size_t offset>
struct protate_impl<offset, Packet4i>
{
static Packet4i run(const Packet4i& a) {
return vextq_s32(a, a, offset);
}
};
template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vabsq_f32(a); }
template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vabsq_s32(a); }
@@ -501,7 +518,19 @@ ptranspose(PacketBlock<Packet4i,4>& kernel) {
}
//---------- double ----------
#if EIGEN_ARCH_ARM64
// Clang 3.5 in the iOS toolchain has an ICE triggered by NEON intrisics for double.
// Confirmed at least with __apple_build_version__ = 6000054.
#ifdef __apple_build_version__
// Let's hope that by the time __apple_build_version__ hits the 601* range, the bug will be fixed.
// https://gist.github.com/yamaya/2924292 suggests that the 3 first digits are only updated with
// major toolchain updates.
#define EIGEN_APPLE_DOUBLE_NEON_BUG (__apple_build_version__ < 6010000)
#else
#define EIGEN_APPLE_DOUBLE_NEON_BUG 0
#endif
#if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
#if (EIGEN_COMP_GNUC_STRICT && defined(__ANDROID__)) || defined(__apple_build_version__)
// Bug 907: workaround missing declarations of the following two functions in the ADK
@@ -524,12 +553,12 @@ typedef float64x1_t Packet1d;
template<> struct packet_traits<double> : default_packet_traits
{
typedef Packet2d type;
typedef Packet1d half;
typedef Packet2d half;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size = 2,
HasHalfPacket=1,
HasHalfPacket=0,
HasDiv = 1,
// FIXME check the Has*
@@ -541,11 +570,11 @@ template<> struct packet_traits<double> : default_packet_traits
};
};
template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2}; typedef Packet2d half; };
template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return vdupq_n_f64(from); }
template<> EIGEN_STRONG_INLINE Packet2d plset<double>(const double& a)
template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a)
{
Packet2d countdown = EIGEN_INIT_NEON_PACKET2(0, 1);
return vaddq_f64(pset1<Packet2d>(a), countdown);
@@ -608,7 +637,7 @@ template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d&
template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride)
{
Packet2d res;
Packet2d res = pset1<Packet2d>(0.0);
res = vsetq_lane_f64(from[0*stride], res, 0);
res = vsetq_lane_f64(from[1*stride], res, 1);
return res;
@@ -625,6 +654,14 @@ template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { retu
template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) { return vcombine_f64(vget_high_f64(a), vget_low_f64(a)); }
template<size_t offset>
struct protate_impl<offset, Packet2d>
{
static Packet2d run(const Packet2d& a) {
return vextq_f64(a, a, offset);
}
};
template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vabsq_f64(a); }
#if EIGEN_COMP_CLANG && defined(__apple_build_version__)

View File

@@ -50,7 +50,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
};
#endif
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; typedef Packet2cf half; };
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_add_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_sub_ps(a.v,b.v)); }
@@ -297,7 +297,7 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
};
#endif
template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1}; typedef Packet1cd half; };
template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_add_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_sub_pd(a.v,b.v)); }
@@ -474,7 +474,7 @@ ptranspose(PacketBlock<Packet2cf,2>& kernel) {
}
template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) {
__m128d result = pblend(ifPacket, _mm_castps_pd(thenPacket.v), _mm_castps_pd(elsePacket.v));
__m128d result = pblend<Packet2d>(ifPacket, _mm_castps_pd(thenPacket.v), _mm_castps_pd(elsePacket.v));
return Packet2cf(_mm_castpd_ps(result));
}

View File

@@ -138,7 +138,6 @@ Packet4f pexp<Packet4f>(const Packet4f& _x)
#ifdef EIGEN_VECTORIZE_SSE4_1
fx = _mm_floor_ps(fx);
#else
tmp = _mm_setzero_ps();
emm0 = _mm_cvttps_epi32(fx);
tmp = _mm_cvtepi32_ps(emm0);
/* if greater, substract 1 */
@@ -207,7 +206,6 @@ Packet2d pexp<Packet2d>(const Packet2d& _x)
#ifdef EIGEN_VECTORIZE_SSE4_1
fx = _mm_floor_pd(fx);
#else
tmp = _mm_setzero_pd();
emm0 = _mm_cvttpd_epi32(fx);
tmp = _mm_cvtepi32_pd(emm0);
/* if greater, substract 1 */
@@ -464,11 +462,59 @@ Packet4f psqrt<Packet4f>(const Packet4f& _x)
#else
template<> EIGEN_STRONG_INLINE Packet4f psqrt<Packet4f>(const Packet4f& x) { return _mm_sqrt_ps(x); }
template<>EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet4f psqrt<Packet4f>(const Packet4f& x) { return _mm_sqrt_ps(x); }
#endif
template<> EIGEN_STRONG_INLINE Packet2d psqrt<Packet2d>(const Packet2d& x) { return _mm_sqrt_pd(x); }
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet2d psqrt<Packet2d>(const Packet2d& x) { return _mm_sqrt_pd(x); }
#if EIGEN_FAST_MATH
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet4f prsqrt<Packet4f>(const Packet4f& _x) {
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inf, 0x7f800000);
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(nan, 0x7fc00000);
_EIGEN_DECLARE_CONST_Packet4f(one_point_five, 1.5f);
_EIGEN_DECLARE_CONST_Packet4f(minus_half, -0.5f);
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(flt_min, 0x00800000);
Packet4f neg_half = pmul(_x, p4f_minus_half);
// select only the inverse sqrt of positive normal inputs (denormals are
// flushed to zero and cause infs as well).
Packet4f le_zero_mask = _mm_cmple_ps(_x, p4f_flt_min);
Packet4f x = _mm_andnot_ps(le_zero_mask, _mm_rsqrt_ps(_x));
// Fill in NaNs and Infs for the negative/zero entries.
Packet4f neg_mask = _mm_cmplt_ps(_x, _mm_setzero_ps());
Packet4f zero_mask = _mm_andnot_ps(neg_mask, le_zero_mask);
Packet4f infs_and_nans = _mm_or_ps(_mm_and_ps(neg_mask, p4f_nan),
_mm_and_ps(zero_mask, p4f_inf));
// Do a single step of Newton's iteration.
x = pmul(x, pmadd(neg_half, pmul(x, x), p4f_one_point_five));
// Insert NaNs and Infs in all the right places.
return _mm_or_ps(x, infs_and_nans);
}
#else
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet4f prsqrt<Packet4f>(const Packet4f& x) {
// Unfortunately we can't use the much faster mm_rqsrt_ps since it only provides an approximation.
return _mm_div_ps(pset1<Packet4f>(1.0f), _mm_sqrt_ps(x));
}
#endif
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet2d prsqrt<Packet2d>(const Packet2d& x) {
// Unfortunately we can't use the much faster mm_rqsrt_pd since it only provides an approximation.
return _mm_div_pd(pset1<Packet2d>(1.0), _mm_sqrt_pd(x));
}
} // end namespace internal

View File

@@ -28,13 +28,12 @@ namespace internal {
#endif
#endif
#if defined EIGEN_VECTORIZE_AVX && EIGEN_COMP_GNUC_STRICT
#if (defined EIGEN_VECTORIZE_AVX) && EIGEN_COMP_GNUC_STRICT && (__GXX_ABI_VERSION < 1004)
// With GCC's default ABI version, a __m128 or __m256 are the same types and therefore we cannot
// have overloads for both types without linking error.
// One solution is to increase ABI version using -fabi-version=4 (or greater).
// To workaround this inconvenince, we rather wrap 128bit types into the following helper
// Otherwise, we workaround this inconvenience by wrapping 128bit types into the following helper
// structure:
// TODO disable this wrapper if abi-versio>=4, but to detect that without asking the user to define a macro?
template<typename T>
struct eigen_packet_wrapper
{
@@ -109,6 +108,7 @@ template<> struct packet_traits<float> : default_packet_traits
HasLog = 1,
HasExp = 1,
HasSqrt = 1,
HasRsqrt = 1,
HasBlend = 1
};
};
@@ -125,6 +125,7 @@ template<> struct packet_traits<double> : default_packet_traits
HasDiv = 1,
HasExp = 1,
HasSqrt = 1,
HasRsqrt = 1,
HasBlend = 1
};
};
@@ -143,9 +144,9 @@ template<> struct packet_traits<int> : default_packet_traits
};
};
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}; typedef Packet4f half; };
template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2}; typedef Packet2d half; };
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; typedef Packet4i half; };
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; };
template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
#if EIGEN_COMP_MSVC==1500
// Workaround MSVC 9 internal compiler error.
@@ -171,11 +172,9 @@ template<> EIGEN_STRONG_INLINE Packet4f pload1<Packet4f>(const float *from) {
}
#endif
#ifndef EIGEN_VECTORIZE_AVX
template<> EIGEN_STRONG_INLINE Packet4f plset<float>(const float& a) { return _mm_add_ps(pset1<Packet4f>(a), _mm_set_ps(3,2,1,0)); }
template<> EIGEN_STRONG_INLINE Packet2d plset<double>(const double& a) { return _mm_add_pd(pset1<Packet2d>(a),_mm_set_pd(1,0)); }
#endif
template<> EIGEN_STRONG_INLINE Packet4i plset<int>(const int& a) { return _mm_add_epi32(pset1<Packet4i>(a),_mm_set_epi32(3,2,1,0)); }
template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) { return _mm_add_ps(pset1<Packet4f>(a), _mm_set_ps(3,2,1,0)); }
template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) { return _mm_add_pd(pset1<Packet2d>(a),_mm_set_pd(1,0)); }
template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a) { return _mm_add_epi32(pset1<Packet4i>(a),_mm_set_epi32(3,2,1,0)); }
template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_add_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_add_pd(a,b); }
@@ -463,6 +462,29 @@ template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a)
template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a)
{ return _mm_shuffle_epi32(a,0x1B); }
template<size_t offset>
struct protate_impl<offset, Packet4f>
{
static Packet4f run(const Packet4f& a) {
return vec4f_swizzle1(a, offset, (offset + 1) % 4, (offset + 2) % 4, (offset + 3) % 4);
}
};
template<size_t offset>
struct protate_impl<offset, Packet4i>
{
static Packet4i run(const Packet4i& a) {
return vec4i_swizzle1(a, offset, (offset + 1) % 4, (offset + 2) % 4, (offset + 3) % 4);
}
};
template<size_t offset>
struct protate_impl<offset, Packet2d>
{
static Packet2d run(const Packet2d& a) {
return vec2d_swizzle1(a, offset, (offset + 1) % 2);
}
};
template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a)
{

View File

@@ -0,0 +1,77 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_TYPE_CASTING_SSE_H
#define EIGEN_TYPE_CASTING_SSE_H
namespace Eigen {
namespace internal {
template <>
struct type_casting_traits<float, int> {
enum {
VectorizedCast = 1,
SrcCoeffRatio = 1,
TgtCoeffRatio = 1
};
};
template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a) {
return _mm_cvttps_epi32(a);
}
template <>
struct type_casting_traits<int, float> {
enum {
VectorizedCast = 1,
SrcCoeffRatio = 1,
TgtCoeffRatio = 1
};
};
template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) {
return _mm_cvtepi32_ps(a);
}
template <>
struct type_casting_traits<double, float> {
enum {
VectorizedCast = 1,
SrcCoeffRatio = 2,
TgtCoeffRatio = 1
};
};
template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet2d, Packet4f>(const Packet2d& a, const Packet2d& b) {
return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6));
}
template <>
struct type_casting_traits<float, double> {
enum {
VectorizedCast = 1,
SrcCoeffRatio = 1,
TgtCoeffRatio = 2
};
};
template<> EIGEN_STRONG_INLINE Packet2d pcast<Packet4f, Packet2d>(const Packet4f& a) {
// Simply discard the second half of the input
return _mm_cvtps_pd(a);
}
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_TYPE_CASTING_SSE_H

View File

@@ -150,14 +150,6 @@ template<typename Scalar> struct swap_assign_op {
swap(a,const_cast<Scalar&>(b));
#endif
}
template<int LhsAlignment, int RhsAlignment, typename Packet>
EIGEN_STRONG_INLINE void swapPacket(Scalar* a, Scalar* b) const
{
Packet tmp = internal::ploadt<Packet,RhsAlignment>(b);
internal::pstoret<Scalar,Packet,RhsAlignment>(b, internal::ploadt<Packet,LhsAlignment>(a));
internal::pstoret<Scalar,Packet,LhsAlignment>(a, tmp);
}
};
template<typename Scalar>
struct functor_traits<swap_assign_op<Scalar> > {

View File

@@ -154,6 +154,48 @@ struct functor_traits<scalar_max_op<Scalar> > {
};
};
/** \internal
* \brief Template functors for comparison of two scalars
* \todo Implement packet-comparisons
*/
template<typename Scalar, ComparisonName cmp> struct scalar_cmp_op;
template<typename Scalar, ComparisonName cmp>
struct functor_traits<scalar_cmp_op<Scalar, cmp> > {
enum {
Cost = NumTraits<Scalar>::AddCost,
PacketAccess = false
};
};
template<ComparisonName Cmp, typename Scalar>
struct result_of<scalar_cmp_op<Scalar, Cmp>(Scalar,Scalar)> {
typedef bool type;
};
template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_EQ> {
EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a==b;}
};
template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_LT> {
EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a<b;}
};
template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_LE> {
EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a<=b;}
};
template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_UNORD> {
EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return !(a<=b || b<=a);}
};
template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_NEQ> {
EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a!=b;}
};
/** \internal
* \brief Template functor to compute the hypot of two scalars
*
@@ -299,7 +341,6 @@ template<> struct functor_traits<scalar_boolean_or_op> {
*/
template<typename Scalar>
struct scalar_multiple_op {
typedef typename packet_traits<Scalar>::type Packet;
// FIXME default copy constructors seems bugged with std::complex<>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE scalar_multiple_op(const scalar_multiple_op& other) : m_other(other.m_other) { }
@@ -307,6 +348,7 @@ struct scalar_multiple_op {
EIGEN_STRONG_INLINE scalar_multiple_op(const Scalar& other) : m_other(other) { }
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a * m_other; }
template <typename Packet>
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
{ return internal::pmul(a, pset1<Packet>(m_other)); }
typename add_const_on_value_type<typename NumTraits<Scalar>::Nested>::type m_other;
@@ -337,11 +379,11 @@ struct functor_traits<scalar_multiple2_op<Scalar1,Scalar2> >
*/
template<typename Scalar>
struct scalar_quotient1_op {
typedef typename packet_traits<Scalar>::type Packet;
// FIXME default copy constructors seems bugged with std::complex<>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient1_op(const scalar_quotient1_op& other) : m_other(other.m_other) { }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient1_op(const Scalar& other) : m_other(other) {}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a / m_other; }
template <typename Packet>
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
{ return internal::pdiv(a, pset1<Packet>(m_other)); }
typename add_const_on_value_type<typename NumTraits<Scalar>::Nested>::type m_other;
@@ -350,6 +392,18 @@ template<typename Scalar>
struct functor_traits<scalar_quotient1_op<Scalar> >
{ enum { Cost = 2 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasDiv }; };
template<typename Scalar1, typename Scalar2>
struct scalar_quotient2_op {
typedef typename scalar_product_traits<Scalar1,Scalar2>::ReturnType result_type;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient2_op(const scalar_quotient2_op& other) : m_other(other.m_other) { }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient2_op(const Scalar2& other) : m_other(other) { }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar1& a) const { return a / m_other; }
typename add_const_on_value_type<typename NumTraits<Scalar2>::Nested>::type m_other;
};
template<typename Scalar1,typename Scalar2>
struct functor_traits<scalar_quotient2_op<Scalar1,Scalar2> >
{ enum { Cost = 2 * NumTraits<Scalar1>::MulCost, PacketAccess = false }; };
// In Eigen, any binary op (Product, CwiseBinaryOp) require the Lhs and Rhs to have the same scalar type, except for multiplication
// where the mixing of different types is handled by scalar_product_traits
// In particular, real * complex<real> is allowed.
@@ -367,11 +421,11 @@ template<typename LhsScalar,typename RhsScalar> struct functor_is_product_like<s
/* If you wonder why doing the pset1() in packetOp() is an optimization check scalar_multiple_op */
template<typename Scalar>
struct scalar_add_op {
typedef typename packet_traits<Scalar>::type Packet;
// FIXME default copy constructors seems bugged with std::complex<>
EIGEN_DEVICE_FUNC inline scalar_add_op(const scalar_add_op& other) : m_other(other.m_other) { }
EIGEN_DEVICE_FUNC inline scalar_add_op(const Scalar& other) : m_other(other) { }
EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a + m_other; }
template <typename Packet>
inline const Packet packetOp(const Packet& a) const
{ return internal::padd(a, pset1<Packet>(m_other)); }
const Scalar m_other;
@@ -386,10 +440,10 @@ struct functor_traits<scalar_add_op<Scalar> >
*/
template<typename Scalar>
struct scalar_sub_op {
typedef typename packet_traits<Scalar>::type Packet;
inline scalar_sub_op(const scalar_sub_op& other) : m_other(other.m_other) { }
inline scalar_sub_op(const Scalar& other) : m_other(other) { }
inline Scalar operator() (const Scalar& a) const { return a - m_other; }
template <typename Packet>
inline const Packet packetOp(const Packet& a) const
{ return internal::psub(a, pset1<Packet>(m_other)); }
const Scalar m_other;
@@ -404,10 +458,10 @@ struct functor_traits<scalar_sub_op<Scalar> >
*/
template<typename Scalar>
struct scalar_rsub_op {
typedef typename packet_traits<Scalar>::type Packet;
inline scalar_rsub_op(const scalar_rsub_op& other) : m_other(other.m_other) { }
inline scalar_rsub_op(const Scalar& other) : m_other(other) { }
inline Scalar operator() (const Scalar& a) const { return m_other - a; }
template <typename Packet>
inline const Packet packetOp(const Packet& a) const
{ return internal::psub(pset1<Packet>(m_other), a); }
const Scalar m_other;

View File

@@ -16,13 +16,12 @@ namespace internal {
template<typename Scalar>
struct scalar_constant_op {
typedef typename packet_traits<Scalar>::type Packet;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_constant_op(const scalar_constant_op& other) : m_other(other.m_other) { }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_constant_op(const Scalar& other) : m_other(other) { }
template<typename Index>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index, Index = 0) const { return m_other; }
template<typename Index>
EIGEN_STRONG_INLINE const Packet packetOp(Index, Index = 0) const { return internal::pset1<Packet>(m_other); }
template<typename Index, typename PacketType>
EIGEN_STRONG_INLINE const PacketType packetOp(Index, Index = 0) const { return internal::pset1<PacketType>(m_other); }
const Scalar m_other;
};
template<typename Scalar>
@@ -39,7 +38,7 @@ template<typename Scalar>
struct functor_traits<scalar_identity_op<Scalar> >
{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = false, IsRepeatable = true }; };
template <typename Scalar, bool RandomAccess> struct linspaced_op_impl;
template <typename Scalar, typename Packet, bool RandomAccess> struct linspaced_op_impl;
// linear access for packet ops:
// 1) initialization
@@ -49,15 +48,13 @@ template <typename Scalar, bool RandomAccess> struct linspaced_op_impl;
//
// TODO: Perhaps it's better to initialize lazily (so not in the constructor but in packetOp)
// in order to avoid the padd() in operator() ?
template <typename Scalar>
struct linspaced_op_impl<Scalar,false>
template <typename Scalar, typename Packet>
struct linspaced_op_impl<Scalar,Packet,false>
{
typedef typename packet_traits<Scalar>::type Packet;
linspaced_op_impl(const Scalar& low, const Scalar& step) :
m_low(low), m_step(step),
m_packetStep(pset1<Packet>(packet_traits<Scalar>::size*step)),
m_base(padd(pset1<Packet>(low), pmul(pset1<Packet>(step),plset<Scalar>(-packet_traits<Scalar>::size)))) {}
m_packetStep(pset1<Packet>(unpacket_traits<Packet>::size*step)),
m_base(padd(pset1<Packet>(low), pmul(pset1<Packet>(step),plset<Packet>(-unpacket_traits<Packet>::size)))) {}
template<typename Index>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const
@@ -78,14 +75,12 @@ struct linspaced_op_impl<Scalar,false>
// random access for packet ops:
// 1) each step
// [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) )
template <typename Scalar>
struct linspaced_op_impl<Scalar,true>
template <typename Scalar, typename Packet>
struct linspaced_op_impl<Scalar,Packet,true>
{
typedef typename packet_traits<Scalar>::type Packet;
linspaced_op_impl(const Scalar& low, const Scalar& step) :
m_low(low), m_step(step),
m_lowPacket(pset1<Packet>(m_low)), m_stepPacket(pset1<Packet>(m_step)), m_interPacket(plset<Scalar>(0)) {}
m_lowPacket(pset1<Packet>(m_low)), m_stepPacket(pset1<Packet>(m_step)), m_interPacket(plset<Packet>(0)) {}
template<typename Index>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return m_low+i*m_step; }
@@ -106,12 +101,11 @@ struct linspaced_op_impl<Scalar,true>
// Forward declaration (we default to random access which does not really give
// us a speed gain when using packet access but it allows to use the functor in
// nested expressions).
template <typename Scalar, bool RandomAccess = true> struct linspaced_op;
template <typename Scalar, bool RandomAccess> struct functor_traits< linspaced_op<Scalar,RandomAccess> >
template <typename Scalar, typename PacketType, bool RandomAccess = true> struct linspaced_op;
template <typename Scalar, typename PacketType, bool RandomAccess> struct functor_traits< linspaced_op<Scalar,PacketType,RandomAccess> >
{ enum { Cost = 1, PacketAccess = packet_traits<Scalar>::HasSetLinear, IsRepeatable = true }; };
template <typename Scalar, bool RandomAccess> struct linspaced_op
template <typename Scalar, typename PacketType, bool RandomAccess> struct linspaced_op
{
typedef typename packet_traits<Scalar>::type Packet;
linspaced_op(const Scalar& low, const Scalar& high, Index num_steps) : impl((num_steps==1 ? high : low), (num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1))) {}
template<typename Index>
@@ -126,12 +120,12 @@ template <typename Scalar, bool RandomAccess> struct linspaced_op
return impl(col + row);
}
template<typename Index>
template<typename Index, typename Packet>
EIGEN_STRONG_INLINE const Packet packetOp(Index i) const { return impl.packetOp(i); }
// We need this function when assigning e.g. a RowVectorXd to a MatrixXd since
// there row==0 and col is used for the actual iteration.
template<typename Index>
template<typename Index, typename Packet>
EIGEN_STRONG_INLINE const Packet packetOp(Index row, Index col) const
{
eigen_assert(col==0 || row==0);
@@ -141,7 +135,8 @@ template <typename Scalar, bool RandomAccess> struct linspaced_op
// This proxy object handles the actual required temporaries, the different
// implementations (random vs. sequential access) as well as the
// correct piping to size 2/4 packet operations.
const linspaced_op_impl<Scalar,RandomAccess> impl;
// TODO find a way to make the packet type configurable
const linspaced_op_impl<Scalar,PacketType,RandomAccess> impl;
};
// all functors allow linear access, except scalar_identity_op. So we fix here a quick meta

View File

@@ -72,6 +72,8 @@ template<typename T>
struct functor_traits<std::not_equal_to<T> >
{ enum { Cost = 1, PacketAccess = false }; };
#if(__cplusplus < 201103L)
// std::binder* are deprecated since c++11 and will be removed in c++17
template<typename T>
struct functor_traits<std::binder2nd<T> >
{ enum { Cost = functor_traits<T>::Cost, PacketAccess = false }; };
@@ -79,6 +81,7 @@ struct functor_traits<std::binder2nd<T> >
template<typename T>
struct functor_traits<std::binder1st<T> >
{ enum { Cost = functor_traits<T>::Cost, PacketAccess = false }; };
#endif
template<typename T>
struct functor_traits<std::unary_negate<T> >

View File

@@ -55,6 +55,34 @@ struct functor_traits<scalar_abs_op<Scalar> >
};
};
/** \internal
* \brief Template functor to compute the score of a scalar, to chose a pivot
*
* \sa class CwiseUnaryOp
*/
template<typename Scalar> struct scalar_score_coeff_op : scalar_abs_op<Scalar>
{
typedef void Score_is_abs;
};
template<typename Scalar>
struct functor_traits<scalar_score_coeff_op<Scalar> > : functor_traits<scalar_abs_op<Scalar> > {};
/* Avoid recomputing abs when we know the score and they are the same. Not a true Eigen functor. */
template<typename Scalar, typename=void> struct abs_knowing_score
{
EIGEN_EMPTY_STRUCT_CTOR(abs_knowing_score)
typedef typename NumTraits<Scalar>::Real result_type;
template<typename Score>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a, const Score&) const { using std::abs; return abs(a); }
};
template<typename Scalar> struct abs_knowing_score<Scalar, typename scalar_score_coeff_op<Scalar>::Score_is_abs>
{
EIGEN_EMPTY_STRUCT_CTOR(abs_knowing_score)
typedef typename NumTraits<Scalar>::Real result_type;
template<typename Scal>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scal&, const result_type& a) const { return a; }
};
/** \internal
* \brief Template functor to compute the squared absolute value of a scalar
*
@@ -94,6 +122,27 @@ struct functor_traits<scalar_conjugate_op<Scalar> >
};
};
/** \internal
* \brief Template functor to compute the phase angle of a complex
*
* \sa class CwiseUnaryOp, Cwise::arg
*/
template<typename Scalar> struct scalar_arg_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_arg_op)
typedef typename NumTraits<Scalar>::Real result_type;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { using numext::arg; return arg(a); }
template<typename Packet>
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
{ return internal::parg(a); }
};
template<typename Scalar>
struct functor_traits<scalar_arg_op<Scalar> >
{
enum {
Cost = NumTraits<Scalar>::IsComplex ? 5 * NumTraits<Scalar>::MulCost : NumTraits<Scalar>::AddCost,
PacketAccess = packet_traits<Scalar>::HasArg
};
};
/** \internal
* \brief Template functor to cast a scalar to another type
*
@@ -182,7 +231,7 @@ struct functor_traits<scalar_imag_ref_op<Scalar> >
template<typename Scalar> struct scalar_exp_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_exp_op)
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::exp; return exp(a); }
typedef typename packet_traits<Scalar>::type Packet;
template <typename Packet>
inline Packet packetOp(const Packet& a) const { return internal::pexp(a); }
};
template<typename Scalar>
@@ -198,13 +247,28 @@ struct functor_traits<scalar_exp_op<Scalar> >
template<typename Scalar> struct scalar_log_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_log_op)
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::log; return log(a); }
typedef typename packet_traits<Scalar>::type Packet;
template <typename Packet>
inline Packet packetOp(const Packet& a) const { return internal::plog(a); }
};
template<typename Scalar>
struct functor_traits<scalar_log_op<Scalar> >
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasLog }; };
/** \internal
*
* \brief Template functor to compute the base-10 logarithm of a scalar
*
* \sa class CwiseUnaryOp, Cwise::log10()
*/
template<typename Scalar> struct scalar_log10_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_log10_op)
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::log10; return log10(a); }
template <typename Packet>
inline Packet packetOp(const Packet& a) const { return internal::plog10(a); }
};
template<typename Scalar>
struct functor_traits<scalar_log10_op<Scalar> >
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasLog10 }; };
/** \internal
* \brief Template functor to compute the square root of a scalar
@@ -213,7 +277,7 @@ struct functor_traits<scalar_log_op<Scalar> >
template<typename Scalar> struct scalar_sqrt_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_sqrt_op)
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::sqrt; return sqrt(a); }
typedef typename packet_traits<Scalar>::type Packet;
template <typename Packet>
inline Packet packetOp(const Packet& a) const { return internal::psqrt(a); }
};
template<typename Scalar>
@@ -224,6 +288,25 @@ struct functor_traits<scalar_sqrt_op<Scalar> >
};
};
/** \internal
* \brief Template functor to compute the reciprocal square root of a scalar
* \sa class CwiseUnaryOp, Cwise::rsqrt()
*/
template<typename Scalar> struct scalar_rsqrt_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_rsqrt_op)
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::sqrt; return Scalar(1)/sqrt(a); }
template <typename Packet>
inline Packet packetOp(const Packet& a) const { return internal::prsqrt(a); }
};
template<typename Scalar>
struct functor_traits<scalar_rsqrt_op<Scalar> >
{ enum {
Cost = 5 * NumTraits<Scalar>::MulCost,
PacketAccess = packet_traits<Scalar>::HasRsqrt
};
};
/** \internal
* \brief Template functor to compute the cosine of a scalar
* \sa class CwiseUnaryOp, ArrayBase::cos()
@@ -231,7 +314,7 @@ struct functor_traits<scalar_sqrt_op<Scalar> >
template<typename Scalar> struct scalar_cos_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_cos_op)
EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { using std::cos; return cos(a); }
typedef typename packet_traits<Scalar>::type Packet;
template <typename Packet>
inline Packet packetOp(const Packet& a) const { return internal::pcos(a); }
};
template<typename Scalar>
@@ -250,7 +333,7 @@ struct functor_traits<scalar_cos_op<Scalar> >
template<typename Scalar> struct scalar_sin_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_sin_op)
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::sin; return sin(a); }
typedef typename packet_traits<Scalar>::type Packet;
template <typename Packet>
inline Packet packetOp(const Packet& a) const { return internal::psin(a); }
};
template<typename Scalar>
@@ -270,7 +353,7 @@ struct functor_traits<scalar_sin_op<Scalar> >
template<typename Scalar> struct scalar_tan_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_tan_op)
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::tan; return tan(a); }
typedef typename packet_traits<Scalar>::type Packet;
template <typename Packet>
inline Packet packetOp(const Packet& a) const { return internal::ptan(a); }
};
template<typename Scalar>
@@ -289,7 +372,7 @@ struct functor_traits<scalar_tan_op<Scalar> >
template<typename Scalar> struct scalar_acos_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_acos_op)
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::acos; return acos(a); }
typedef typename packet_traits<Scalar>::type Packet;
template <typename Packet>
inline Packet packetOp(const Packet& a) const { return internal::pacos(a); }
};
template<typename Scalar>
@@ -308,7 +391,7 @@ struct functor_traits<scalar_acos_op<Scalar> >
template<typename Scalar> struct scalar_asin_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_asin_op)
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::asin; return asin(a); }
typedef typename packet_traits<Scalar>::type Packet;
template <typename Packet>
inline Packet packetOp(const Packet& a) const { return internal::pasin(a); }
};
template<typename Scalar>
@@ -320,7 +403,6 @@ struct functor_traits<scalar_asin_op<Scalar> >
};
};
/** \internal
* \brief Template functor to compute the atan of a scalar
* \sa class CwiseUnaryOp, ArrayBase::atan()
@@ -328,7 +410,7 @@ struct functor_traits<scalar_asin_op<Scalar> >
template<typename Scalar> struct scalar_atan_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_atan_op)
inline const Scalar operator() (const Scalar& a) const { using std::atan; return atan(a); }
typedef typename packet_traits<Scalar>::type Packet;
template <typename Packet>
inline Packet packetOp(const Packet& a) const { return internal::patan(a); }
};
template<typename Scalar>
@@ -340,6 +422,63 @@ struct functor_traits<scalar_atan_op<Scalar> >
};
};
/** \internal
* \brief Template functor to compute the tanh of a scalar
* \sa class CwiseUnaryOp, ArrayBase::tanh()
*/
template<typename Scalar> struct scalar_tanh_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_tanh_op)
inline const Scalar operator() (const Scalar& a) const { using std::tanh; return tanh(a); }
template <typename Packet>
inline Packet packetOp(const Packet& a) const { return internal::ptanh(a); }
};
template<typename Scalar>
struct functor_traits<scalar_tanh_op<Scalar> >
{
enum {
Cost = 5 * NumTraits<Scalar>::MulCost,
PacketAccess = packet_traits<Scalar>::HasTanh
};
};
/** \internal
* \brief Template functor to compute the sinh of a scalar
* \sa class CwiseUnaryOp, ArrayBase::sinh()
*/
template<typename Scalar> struct scalar_sinh_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_sinh_op)
inline const Scalar operator() (const Scalar& a) const { using std::sinh; return sinh(a); }
template <typename Packet>
inline Packet packetOp(const Packet& a) const { return internal::psinh(a); }
};
template<typename Scalar>
struct functor_traits<scalar_sinh_op<Scalar> >
{
enum {
Cost = 5 * NumTraits<Scalar>::MulCost,
PacketAccess = packet_traits<Scalar>::HasSinh
};
};
/** \internal
* \brief Template functor to compute the cosh of a scalar
* \sa class CwiseUnaryOp, ArrayBase::cosh()
*/
template<typename Scalar> struct scalar_cosh_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_cosh_op)
inline const Scalar operator() (const Scalar& a) const { using std::cosh; return cosh(a); }
template <typename Packet>
inline Packet packetOp(const Packet& a) const { return internal::pcosh(a); }
};
template<typename Scalar>
struct functor_traits<scalar_cosh_op<Scalar> >
{
enum {
Cost = 5 * NumTraits<Scalar>::MulCost,
PacketAccess = packet_traits<Scalar>::HasCosh
};
};
/** \internal
* \brief Template functor to compute the inverse of a scalar
* \sa class CwiseUnaryOp, Cwise::inverse()
@@ -388,6 +527,134 @@ template<typename Scalar>
struct functor_traits<scalar_cube_op<Scalar> >
{ enum { Cost = 2*NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasMul }; };
/** \internal
* \brief Template functor to compute the rounded value of a scalar
* \sa class CwiseUnaryOp, ArrayBase::round()
*/
template<typename Scalar> struct scalar_round_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_round_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return numext::round(a); }
template <typename Packet>
inline Packet packetOp(const Packet& a) const { return internal::pround(a); }
};
template<typename Scalar>
struct functor_traits<scalar_round_op<Scalar> >
{
enum {
Cost = NumTraits<Scalar>::MulCost,
PacketAccess = packet_traits<Scalar>::HasRound
};
};
/** \internal
* \brief Template functor to compute the floor of a scalar
* \sa class CwiseUnaryOp, ArrayBase::floor()
*/
template<typename Scalar> struct scalar_floor_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_floor_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return numext::floor(a); }
template <typename Packet>
inline Packet packetOp(const Packet& a) const { return internal::pfloor(a); }
};
template<typename Scalar>
struct functor_traits<scalar_floor_op<Scalar> >
{
enum {
Cost = NumTraits<Scalar>::MulCost,
PacketAccess = packet_traits<Scalar>::HasFloor
};
};
/** \internal
* \brief Template functor to compute the ceil of a scalar
* \sa class CwiseUnaryOp, ArrayBase::ceil()
*/
template<typename Scalar> struct scalar_ceil_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_ceil_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return numext::ceil(a); }
typedef typename packet_traits<Scalar>::type Packet;
inline Packet packetOp(const Packet& a) const { return internal::pceil(a); }
};
template<typename Scalar>
struct functor_traits<scalar_ceil_op<Scalar> >
{
enum {
Cost = NumTraits<Scalar>::MulCost,
PacketAccess = packet_traits<Scalar>::HasCeil
};
};
/** \internal
* \brief Template functor to compute whether a scalar is NaN
* \sa class CwiseUnaryOp, ArrayBase::isnan()
*/
template<typename Scalar> struct scalar_isnan_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_isnan_op)
typedef bool result_type;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return (numext::isnan)(a); }
};
template<typename Scalar>
struct functor_traits<scalar_isnan_op<Scalar> >
{
enum {
Cost = NumTraits<Scalar>::MulCost,
PacketAccess = false
};
};
/** \internal
* \brief Template functor to check whether a scalar is +/-inf
* \sa class CwiseUnaryOp, ArrayBase::isinf()
*/
template<typename Scalar> struct scalar_isinf_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_isinf_op)
typedef bool result_type;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return (numext::isinf)(a); }
};
template<typename Scalar>
struct functor_traits<scalar_isinf_op<Scalar> >
{
enum {
Cost = NumTraits<Scalar>::MulCost,
PacketAccess = false
};
};
/** \internal
* \brief Template functor to check whether a scalar has a finite value
* \sa class CwiseUnaryOp, ArrayBase::isfinite()
*/
template<typename Scalar> struct scalar_isfinite_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_isfinite_op)
typedef bool result_type;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return (numext::isfinite)(a); }
};
template<typename Scalar>
struct functor_traits<scalar_isfinite_op<Scalar> >
{
enum {
Cost = NumTraits<Scalar>::MulCost,
PacketAccess = false
};
};
/** \internal
* \brief Template functor to compute the logical not of a boolean
*
* \sa class CwiseUnaryOp, ArrayBase::operator!
*/
template<typename Scalar> struct scalar_boolean_not_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_not_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a) const { return !a; }
};
template<typename Scalar>
struct functor_traits<scalar_boolean_not_op<Scalar> > {
enum {
Cost = NumTraits<bool>::AddCost,
PacketAccess = false
};
};
} // end namespace internal

View File

@@ -25,21 +25,31 @@ inline std::ptrdiff_t manage_caching_sizes_helper(std::ptrdiff_t a, std::ptrdiff
return a<=0 ? b : a;
}
#if EIGEN_ARCH_i386_OR_x86_64
const std::ptrdiff_t defaultL1CacheSize = 32*1024;
const std::ptrdiff_t defaultL2CacheSize = 256*1024;
const std::ptrdiff_t defaultL3CacheSize = 2*1024*1024;
#else
const std::ptrdiff_t defaultL1CacheSize = 16*1024;
const std::ptrdiff_t defaultL2CacheSize = 512*1024;
const std::ptrdiff_t defaultL3CacheSize = 512*1024;
#endif
/** \internal */
inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff_t* l2, std::ptrdiff_t* l3)
{
static bool m_cache_sizes_initialized = false;
static std::ptrdiff_t m_l1CacheSize = 32*1024;
static std::ptrdiff_t m_l2CacheSize = 256*1024;
static std::ptrdiff_t m_l3CacheSize = 2*1024*1024;
static std::ptrdiff_t m_l1CacheSize = 0;
static std::ptrdiff_t m_l2CacheSize = 0;
static std::ptrdiff_t m_l3CacheSize = 0;
if(!m_cache_sizes_initialized)
{
int l1CacheSize, l2CacheSize, l3CacheSize;
queryCacheSizes(l1CacheSize, l2CacheSize, l3CacheSize);
m_l1CacheSize = manage_caching_sizes_helper(l1CacheSize, 8*1024);
m_l2CacheSize = manage_caching_sizes_helper(l2CacheSize, 256*1024);
m_l3CacheSize = manage_caching_sizes_helper(l3CacheSize, 8*1024*1024);
m_l1CacheSize = manage_caching_sizes_helper(l1CacheSize, defaultL1CacheSize);
m_l2CacheSize = manage_caching_sizes_helper(l2CacheSize, defaultL2CacheSize);
m_l3CacheSize = manage_caching_sizes_helper(l3CacheSize, defaultL3CacheSize);
m_cache_sizes_initialized = true;
}
@@ -64,6 +74,211 @@ inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff
}
}
/* Helper for computeProductBlockingSizes.
*
* Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar,
* this function computes the blocking size parameters along the respective dimensions
* for matrix products and related algorithms. The blocking sizes depends on various
* parameters:
* - the L1 and L2 cache sizes,
* - the register level blocking sizes defined by gebp_traits,
* - the number of scalars that fit into a packet (when vectorization is enabled).
*
* \sa setCpuCacheSizes */
template<typename LhsScalar, typename RhsScalar, int KcFactor>
void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index num_threads = 1)
{
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
// Explanations:
// Let's recall that the product algorithms form mc x kc vertical panels A' on the lhs and
// kc x nc blocks B' on the rhs. B' has to fit into L2/L3 cache. Moreover, A' is processed
// per mr x kc horizontal small panels where mr is the blocking size along the m dimension
// at the register level. This small horizontal panel has to stay within L1 cache.
std::ptrdiff_t l1, l2, l3;
manage_caching_sizes(GetAction, &l1, &l2, &l3);
if (num_threads > 1) {
typedef typename Traits::ResScalar ResScalar;
enum {
kdiv = KcFactor * (Traits::mr * sizeof(LhsScalar) + Traits::nr * sizeof(RhsScalar)),
ksub = Traits::mr * Traits::nr * sizeof(ResScalar),
k_mask = -8,
mr = Traits::mr,
mr_mask = -mr,
nr = Traits::nr,
nr_mask = -nr
};
// Increasing k gives us more time to prefetch the content of the "C"
// registers. However once the latency is hidden there is no point in
// increasing the value of k, so we'll cap it at 320 (value determined
// experimentally).
const Index k_cache = (std::min<Index>)((l1-ksub)/kdiv, 320);
if (k_cache < k) {
k = k_cache & k_mask;
eigen_internal_assert(k > 0);
}
const Index n_cache = (l2-l1) / (nr * sizeof(RhsScalar) * k);
const Index n_per_thread = numext::div_ceil(n, num_threads);
if (n_cache <= n_per_thread) {
// Don't exceed the capacity of the l2 cache.
eigen_internal_assert(n_cache >= static_cast<Index>(nr));
n = n_cache & nr_mask;
eigen_internal_assert(n > 0);
} else {
n = (std::min<Index>)(n, (n_per_thread + nr - 1) & nr_mask);
}
if (l3 > l2) {
// l3 is shared between all cores, so we'll give each thread its own chunk of l3.
const Index m_cache = (l3-l2) / (sizeof(LhsScalar) * k * num_threads);
const Index m_per_thread = numext::div_ceil(m, num_threads);
if(m_cache < m_per_thread && m_cache >= static_cast<Index>(mr)) {
m = m_cache & mr_mask;
eigen_internal_assert(m > 0);
} else {
m = (std::min<Index>)(m, (m_per_thread + mr - 1) & mr_mask);
}
}
}
else {
// In unit tests we do not want to use extra large matrices,
// so we reduce the cache size to check the blocking strategy is not flawed
#ifdef EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS
l1 = 9*1024;
l2 = 32*1024;
l3 = 512*1024;
#endif
// Early return for small problems because the computation below are time consuming for small problems.
// Perhaps it would make more sense to consider k*n*m??
// Note that for very tiny problem, this function should be bypassed anyway
// because we use the coefficient-based implementation for them.
if((std::max)(k,(std::max)(m,n))<48)
return;
typedef typename Traits::ResScalar ResScalar;
enum {
k_peeling = 8,
k_div = KcFactor * (Traits::mr * sizeof(LhsScalar) + Traits::nr * sizeof(RhsScalar)),
k_sub = Traits::mr * Traits::nr * sizeof(ResScalar)
};
// ---- 1st level of blocking on L1, yields kc ----
// Blocking on the third dimension (i.e., k) is chosen so that an horizontal panel
// of size mr x kc of the lhs plus a vertical panel of kc x nr of the rhs both fits within L1 cache.
// We also include a register-level block of the result (mx x nr).
// (In an ideal world only the lhs panel would stay in L1)
// Moreover, kc has to be a multiple of 8 to be compatible with loop peeling, leading to a maximum blocking size of:
const Index max_kc = ((l1-k_sub)/k_div) & (~(k_peeling-1));
const Index old_k = k;
if(k>max_kc)
{
// We are really blocking on the third dimension:
// -> reduce blocking size to make sure the last block is as large as possible
// while keeping the same number of sweeps over the result.
k = (k%max_kc)==0 ? max_kc
: max_kc - k_peeling * ((max_kc-1-(k%max_kc))/(k_peeling*(k/max_kc+1)));
eigen_internal_assert(((old_k/k) == (old_k/max_kc)) && "the number of sweeps has to remain the same");
}
// ---- 2nd level of blocking on max(L2,L3), yields nc ----
// TODO find a reliable way to get the actual amount of cache per core to use for 2nd level blocking, that is:
// actual_l2 = max(l2, l3/nb_core_sharing_l3)
// The number below is quite conservative: it is better to underestimate the cache size rather than overestimating it)
// For instance, it corresponds to 6MB of L3 shared among 4 cores.
#ifdef EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS
const Index actual_l2 = l3;
#else
const Index actual_l2 = 1572864; // == 1.5 MB
#endif
// Here, nc is chosen such that a block of kc x nc of the rhs fit within half of L2.
// The second half is implicitly reserved to access the result and lhs coefficients.
// When k<max_kc, then nc can arbitrarily growth. In practice, it seems to be fruitful
// to limit this growth: we bound nc to growth by a factor x1.5.
// However, if the entire lhs block fit within L1, then we are not going to block on the rows at all,
// and it becomes fruitful to keep the packed rhs blocks in L1 if there is enough remaining space.
Index max_nc;
const Index lhs_bytes = m * k * sizeof(LhsScalar);
const Index remaining_l1 = l1- k_sub - lhs_bytes;
if(remaining_l1 >= Index(Traits::nr*sizeof(RhsScalar))*k)
{
// L1 blocking
max_nc = remaining_l1 / (k*sizeof(RhsScalar));
}
else
{
// L2 blocking
max_nc = (3*actual_l2)/(2*2*max_kc*sizeof(RhsScalar));
}
// WARNING Below, we assume that Traits::nr is a power of two.
Index nc = std::min<Index>(actual_l2/(2*k*sizeof(RhsScalar)), max_nc) & (~(Traits::nr-1));
if(n>nc)
{
// We are really blocking over the columns:
// -> reduce blocking size to make sure the last block is as large as possible
// while keeping the same number of sweeps over the packed lhs.
// Here we allow one more sweep if this gives us a perfect match, thus the commented "-1"
n = (n%nc)==0 ? nc
: (nc - Traits::nr * ((nc/*-1*/-(n%nc))/(Traits::nr*(n/nc+1))));
}
else if(old_k==k)
{
// So far, no blocking at all, i.e., kc==k, and nc==n.
// In this case, let's perform a blocking over the rows such that the packed lhs data is kept in cache L1/L2
// TODO: part of this blocking strategy is now implemented within the kernel itself, so the L1-based heuristic here should be obsolete.
Index problem_size = k*n*sizeof(LhsScalar);
Index actual_lm = actual_l2;
Index max_mc = m;
if(problem_size<=1024)
{
// problem is small enough to keep in L1
// Let's choose m such that lhs's block fit in 1/3 of L1
actual_lm = l1;
}
else if(l3!=0 && problem_size<=32768)
{
// we have both L2 and L3, and problem is small enough to be kept in L2
// Let's choose m such that lhs's block fit in 1/3 of L2
actual_lm = l2;
max_mc = 576;
}
Index mc = (std::min<Index>)(actual_lm/(3*k*sizeof(LhsScalar)), max_mc);
if (mc > Traits::mr) mc -= mc % Traits::mr;
else if (mc==0) return;
m = (m%mc)==0 ? mc
: (mc - Traits::mr * ((mc/*-1*/-(m%mc))/(Traits::mr*(m/mc+1))));
}
}
}
inline bool useSpecificBlockingSizes(Index& k, Index& m, Index& n)
{
#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES
if (EIGEN_TEST_SPECIFIC_BLOCKING_SIZES) {
k = std::min<Index>(k, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K);
m = std::min<Index>(m, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M);
n = std::min<Index>(n, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N);
return true;
}
#else
EIGEN_UNUSED_VARIABLE(k)
EIGEN_UNUSED_VARIABLE(m)
EIGEN_UNUSED_VARIABLE(n)
#endif
return false;
}
/** \brief Computes the blocking parameters for a m x k times k x n matrix product
*
* \param[in,out] k Input: the third dimension of the product. Output: the blocking size along the same dimension.
@@ -72,88 +287,34 @@ inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff
*
* Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar,
* this function computes the blocking size parameters along the respective dimensions
* for matrix products and related algorithms. The blocking sizes depends on various
* parameters:
* - the L1 and L2 cache sizes,
* - the register level blocking sizes defined by gebp_traits,
* - the number of scalars that fit into a packet (when vectorization is enabled).
* for matrix products and related algorithms.
*
* The blocking size parameters may be evaluated:
* - either by a heuristic based on cache sizes;
* - or using fixed prescribed values (for testing purposes).
*
* \sa setCpuCacheSizes */
#define CEIL(a, b) ((a)+(b)-1)/(b)
template<typename LhsScalar, typename RhsScalar, int KcFactor, typename SizeType>
void computeProductBlockingSizes(SizeType& k, SizeType& m, SizeType& n, int num_threads)
template<typename LhsScalar, typename RhsScalar, int KcFactor>
void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1)
{
// Explanations:
// Let's recall the product algorithms form kc x nc horizontal panels B' on the rhs and
// mc x kc blocks A' on the lhs. A' has to fit into L2 cache. Moreover, B' is processed
// per kc x nr vertical small panels where nr is the blocking size along the n dimension
// at the register level. For vectorization purpose, these small vertical panels are unpacked,
// e.g., each coefficient is replicated to fit a packet. This small vertical panel has to
// stay in L1 cache.
std::ptrdiff_t l1, l2, l3;
manage_caching_sizes(GetAction, &l1, &l2, &l3);
if (num_threads > 1) {
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
typedef typename Traits::ResScalar ResScalar;
enum {
kdiv = KcFactor * (Traits::mr * sizeof(LhsScalar) + Traits::nr * sizeof(RhsScalar)),
ksub = Traits::mr * Traits::nr * sizeof(ResScalar),
k_mask = (0xffffffff/8)*8,
mr = Traits::mr,
mr_mask = (0xffffffff/mr)*mr,
nr = Traits::nr,
nr_mask = (0xffffffff/nr)*nr
};
SizeType k_cache = (l1-ksub)/kdiv;
if (k_cache < k) {
k = k_cache & k_mask;
eigen_assert(k > 0);
}
SizeType n_cache = (l2-l1) / (nr * sizeof(RhsScalar) * k);
SizeType n_per_thread = CEIL(n, num_threads);
if (n_cache <= n_per_thread) {
// Don't exceed the capacity of the l2 cache.
eigen_assert(n_cache >= static_cast<SizeType>(nr));
n = n_cache & nr_mask;
eigen_assert(n > 0);
} else {
n = (std::min<SizeType>)(n, (n_per_thread + nr - 1) & nr_mask);
}
if (l3 > l2) {
// l3 is shared between all cores, so we'll give each thread its own chunk of l3.
SizeType m_cache = (l3-l2) / (sizeof(LhsScalar) * k * num_threads);
SizeType m_per_thread = CEIL(m, num_threads);
if(m_cache < m_per_thread && m_cache >= static_cast<SizeType>(mr)) {
m = m_cache & mr_mask;
eigen_assert(m > 0);
} else {
m = (std::min<SizeType>)(m, (m_per_thread + mr - 1) & mr_mask);
}
}
}
else {
// In unit tests we do not want to use extra large matrices,
// so we reduce the block size to check the blocking strategy is not flawed
#ifndef EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS
k = std::min<SizeType>(k,sizeof(LhsScalar)<=4 ? 360 : 240);
n = std::min<SizeType>(n,3840/sizeof(RhsScalar));
m = std::min<SizeType>(m,3840/sizeof(RhsScalar));
#else
k = std::min<SizeType>(k,24);
n = std::min<SizeType>(n,384/sizeof(RhsScalar));
m = std::min<SizeType>(m,384/sizeof(RhsScalar));
#endif
if (!useSpecificBlockingSizes(k, m, n)) {
evaluateProductBlockingSizesHeuristic<LhsScalar, RhsScalar, KcFactor>(k, m, n, num_threads);
}
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
enum {
kr = 8,
mr = Traits::mr,
nr = Traits::nr
};
if (k > kr) k -= k % kr;
if (m > mr) m -= m % mr;
if (n > nr) n -= n % nr;
}
template<typename LhsScalar, typename RhsScalar, typename SizeType>
inline void computeProductBlockingSizes(SizeType& k, SizeType& m, SizeType& n, int num_threads)
template<typename LhsScalar, typename RhsScalar>
inline void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1)
{
computeProductBlockingSizes<LhsScalar,RhsScalar,1>(k, m, n, num_threads);
}
@@ -220,11 +381,14 @@ public:
nr = 4,
// register block size along the M direction (currently, this one cannot be modified)
default_mr = (EIGEN_PLAIN_ENUM_MIN(16,NumberOfRegisters)/2/nr)*LhsPacketSize,
#if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC) && !defined(EIGEN_VECTORIZE_VSX)
// we assume 16 registers
mr = 3*LhsPacketSize,
// See bug 992, if the scalar type is not vectorizable but that EIGEN_HAS_SINGLE_INSTRUCTION_MADD is defined,
// then using 3*LhsPacketSize triggers non-implemented paths in syrk.
mr = Vectorizable ? 3*LhsPacketSize : default_mr,
#else
mr = (EIGEN_PLAIN_ENUM_MIN(16,NumberOfRegisters)/2/nr)*LhsPacketSize,
mr = default_mr,
#endif
LhsProgress = LhsPacketSize,
@@ -698,6 +862,80 @@ protected:
conj_helper<ResPacket,ResPacket,false,ConjRhs> cj;
};
// helper for the rotating kernel below
template <typename GebpKernel, bool UseRotatingKernel = GebpKernel::UseRotatingKernel>
struct PossiblyRotatingKernelHelper
{
// default implementation, not rotating
typedef typename GebpKernel::Traits Traits;
typedef typename Traits::RhsScalar RhsScalar;
typedef typename Traits::RhsPacket RhsPacket;
typedef typename Traits::AccPacket AccPacket;
const Traits& traits;
PossiblyRotatingKernelHelper(const Traits& t) : traits(t) {}
template <size_t K, size_t Index>
void loadOrRotateRhs(RhsPacket& to, const RhsScalar* from) const
{
traits.loadRhs(from + (Index+4*K)*Traits::RhsProgress, to);
}
void unrotateResult(AccPacket&,
AccPacket&,
AccPacket&,
AccPacket&)
{
}
};
// rotating implementation
template <typename GebpKernel>
struct PossiblyRotatingKernelHelper<GebpKernel, true>
{
typedef typename GebpKernel::Traits Traits;
typedef typename Traits::RhsScalar RhsScalar;
typedef typename Traits::RhsPacket RhsPacket;
typedef typename Traits::AccPacket AccPacket;
const Traits& traits;
PossiblyRotatingKernelHelper(const Traits& t) : traits(t) {}
template <size_t K, size_t Index>
void loadOrRotateRhs(RhsPacket& to, const RhsScalar* from) const
{
if (Index == 0) {
to = pload<RhsPacket>(from + 4*K*Traits::RhsProgress);
} else {
EIGEN_ASM_COMMENT("Do not reorder code, we're very tight on registers");
to = protate<1>(to);
}
}
void unrotateResult(AccPacket& res0,
AccPacket& res1,
AccPacket& res2,
AccPacket& res3)
{
PacketBlock<AccPacket> resblock;
resblock.packet[0] = res0;
resblock.packet[1] = res1;
resblock.packet[2] = res2;
resblock.packet[3] = res3;
ptranspose(resblock);
resblock.packet[3] = protate<1>(resblock.packet[3]);
resblock.packet[2] = protate<2>(resblock.packet[2]);
resblock.packet[1] = protate<3>(resblock.packet[1]);
ptranspose(resblock);
res0 = resblock.packet[0];
res1 = resblock.packet[1];
res2 = resblock.packet[2];
res3 = resblock.packet[3];
}
};
/* optimized GEneral packed Block * packed Panel product kernel
*
* Mixing type logic: C += A * B
@@ -731,6 +969,16 @@ struct gebp_kernel
ResPacketSize = Traits::ResPacketSize
};
static const bool UseRotatingKernel =
EIGEN_ARCH_ARM &&
internal::is_same<LhsScalar, float>::value &&
internal::is_same<RhsScalar, float>::value &&
internal::is_same<ResScalar, float>::value &&
Traits::LhsPacketSize == 4 &&
Traits::RhsPacketSize == 4 &&
Traits::ResPacketSize == 4;
EIGEN_DONT_INLINE
void operator()(const DataMapper& res, const LhsScalar* blockA, const RhsScalar* blockB,
Index rows, Index depth, Index cols, ResScalar alpha,
@@ -758,22 +1006,36 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
const Index peeled_kc = depth & ~(pk-1);
const Index prefetch_res_offset = 32/sizeof(ResScalar);
// const Index depth2 = depth & ~1;
//---------- Process 3 * LhsProgress rows at once ----------
// This corresponds to 3*LhsProgress x nr register blocks.
// Usually, make sense only with FMA
if(mr>=3*Traits::LhsProgress)
{
// loops on each largest micro horizontal panel of lhs (3*Traits::LhsProgress x depth)
for(Index i=0; i<peeled_mc3; i+=3*Traits::LhsProgress)
{
PossiblyRotatingKernelHelper<gebp_kernel> possiblyRotatingKernelHelper(traits);
// Here, the general idea is to loop on each largest micro horizontal panel of the lhs (3*Traits::LhsProgress x depth)
// and on each largest micro vertical panel of the rhs (depth * nr).
// Blocking sizes, i.e., 'depth' has been computed so that the micro horizontal panel of the lhs fit in L1.
// However, if depth is too small, we can extend the number of rows of these horizontal panels.
// This actual number of rows is computed as follow:
const Index l1 = defaultL1CacheSize; // in Bytes, TODO, l1 should be passed to this function.
// The max(1, ...) here is needed because we may be using blocking params larger than what our known l1 cache size
// suggests we should be using: either because our known l1 cache size is inaccurate (e.g. on Android, we can only guess),
// or because we are testing specific blocking sizes.
const Index actual_panel_rows = (3*LhsProgress) * std::max<Index>(1,( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 3*LhsProgress) ));
for(Index i1=0; i1<peeled_mc3; i1+=actual_panel_rows)
{
// loops on each largest micro vertical panel of rhs (depth * nr)
const Index actual_panel_end = (std::min)(i1+actual_panel_rows, peeled_mc3);
for(Index j2=0; j2<packet_cols4; j2+=nr)
{
// We select a 3*Traits::LhsProgress x nr micro block of res which is entirely
for(Index i=i1; i<actual_panel_end; i+=3*LhsProgress)
{
// We selected a 3*Traits::LhsProgress x nr micro block of res which is entirely
// stored into 3 x nr registers.
const LhsScalar* blA = &blockA[i*strideA+offsetA*(3*Traits::LhsProgress)];
const LhsScalar* blA = &blockA[i*strideA+offsetA*(3*LhsProgress)];
prefetch(&blA[0]);
// gets res block as register
@@ -798,50 +1060,50 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
const RhsScalar* blB = &blockB[j2*strideB+offsetB*nr];
prefetch(&blB[0]);
LhsPacket A0, A1;
for(Index k=0; k<peeled_kc; k+=pk)
{
EIGEN_ASM_COMMENT("begin gebp micro kernel 3pX4");
RhsPacket B_0, T0;
LhsPacket A2;
#define EIGEN_GEBGP_ONESTEP(K) \
#define EIGEN_GEBP_ONESTEP(K) \
do { \
EIGEN_ASM_COMMENT("begin step of gebp micro kernel 3pX4"); \
EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
internal::prefetch(blA+(3*K+16)*LhsProgress); \
internal::prefetch(blB+(4*K+16)*RhsProgress); /* Bug 953 */ \
if (EIGEN_ARCH_ARM) internal::prefetch(blB+(4*K+16)*RhsProgress); /* Bug 953 */ \
traits.loadLhs(&blA[(0+3*K)*LhsProgress], A0); \
traits.loadLhs(&blA[(1+3*K)*LhsProgress], A1); \
traits.loadLhs(&blA[(2+3*K)*LhsProgress], A2); \
traits.loadRhs(&blB[(0+4*K)*RhsProgress], B_0); \
possiblyRotatingKernelHelper.template loadOrRotateRhs<K, 0>(B_0, blB); \
traits.madd(A0, B_0, C0, T0); \
traits.madd(A1, B_0, C4, T0); \
traits.madd(A2, B_0, C8, B_0); \
traits.loadRhs(&blB[1+4*K*RhsProgress], B_0); \
possiblyRotatingKernelHelper.template loadOrRotateRhs<K, 1>(B_0, blB); \
traits.madd(A0, B_0, C1, T0); \
traits.madd(A1, B_0, C5, T0); \
traits.madd(A2, B_0, C9, B_0); \
traits.loadRhs(&blB[2+4*K*RhsProgress], B_0); \
possiblyRotatingKernelHelper.template loadOrRotateRhs<K, 2>(B_0, blB); \
traits.madd(A0, B_0, C2, T0); \
traits.madd(A1, B_0, C6, T0); \
traits.madd(A2, B_0, C10, B_0); \
traits.loadRhs(&blB[3+4*K*RhsProgress], B_0); \
possiblyRotatingKernelHelper.template loadOrRotateRhs<K, 3>(B_0, blB); \
traits.madd(A0, B_0, C3 , T0); \
traits.madd(A1, B_0, C7, T0); \
traits.madd(A2, B_0, C11, B_0); \
EIGEN_ASM_COMMENT("end step of gebp micro kernel 3pX4"); \
} while(false)
internal::prefetch(blB + 4 * pk * sizeof(RhsScalar)); /* Bug 953 */
EIGEN_GEBGP_ONESTEP(0);
EIGEN_GEBGP_ONESTEP(1);
EIGEN_GEBGP_ONESTEP(2);
EIGEN_GEBGP_ONESTEP(3);
EIGEN_GEBGP_ONESTEP(4);
EIGEN_GEBGP_ONESTEP(5);
EIGEN_GEBGP_ONESTEP(6);
EIGEN_GEBGP_ONESTEP(7);
internal::prefetch(blB);
EIGEN_GEBP_ONESTEP(0);
EIGEN_GEBP_ONESTEP(1);
EIGEN_GEBP_ONESTEP(2);
EIGEN_GEBP_ONESTEP(3);
EIGEN_GEBP_ONESTEP(4);
EIGEN_GEBP_ONESTEP(5);
EIGEN_GEBP_ONESTEP(6);
EIGEN_GEBP_ONESTEP(7);
blB += pk*4*RhsProgress;
blA += pk*3*Traits::LhsProgress;
@@ -853,12 +1115,17 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
{
RhsPacket B_0, T0;
LhsPacket A2;
EIGEN_GEBGP_ONESTEP(0);
EIGEN_GEBP_ONESTEP(0);
blB += 4*RhsProgress;
blA += 3*Traits::LhsProgress;
}
#undef EIGEN_GEBGP_ONESTEP
#undef EIGEN_GEBP_ONESTEP
possiblyRotatingKernelHelper.unrotateResult(C0, C1, C2, C3);
possiblyRotatingKernelHelper.unrotateResult(C4, C5, C6, C7);
possiblyRotatingKernelHelper.unrotateResult(C8, C9, C10, C11);
ResPacket R0, R1, R2;
ResPacket alphav = pset1<ResPacket>(alpha);
@@ -900,12 +1167,15 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
traits.acc(C11, alphav, R2);
r3.storePacket(0 * Traits::ResPacketSize, R0);
r3.storePacket(1 * Traits::ResPacketSize, R1);
r3.storePacket(2 * Traits::ResPacketSize, R2);
r3.storePacket(2 * Traits::ResPacketSize, R2);
}
}
// Deal with remaining columns of the rhs
for(Index j2=packet_cols4; j2<cols; j2++)
{
for(Index i=i1; i<actual_panel_end; i+=3*LhsProgress)
{
// One column at a time
const LhsScalar* blA = &blockA[i*strideA+offsetA*(3*Traits::LhsProgress)];
prefetch(&blA[0]);
@@ -976,7 +1246,8 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
traits.acc(C8, alphav, R2);
r0.storePacket(0 * Traits::ResPacketSize, R0);
r0.storePacket(1 * Traits::ResPacketSize, R1);
r0.storePacket(2 * Traits::ResPacketSize, R2);
r0.storePacket(2 * Traits::ResPacketSize, R2);
}
}
}
}
@@ -984,13 +1255,21 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
//---------- Process 2 * LhsProgress rows at once ----------
if(mr>=2*Traits::LhsProgress)
{
// loops on each largest micro horizontal panel of lhs (2*LhsProgress x depth)
for(Index i=peeled_mc3; i<peeled_mc2; i+=2*LhsProgress)
const Index l1 = defaultL1CacheSize; // in Bytes, TODO, l1 should be passed to this function.
// The max(1, ...) here is needed because we may be using blocking params larger than what our known l1 cache size
// suggests we should be using: either because our known l1 cache size is inaccurate (e.g. on Android, we can only guess),
// or because we are testing specific blocking sizes.
Index actual_panel_rows = (2*LhsProgress) * std::max<Index>(1,( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 2*LhsProgress) ));
for(Index i1=peeled_mc3; i1<peeled_mc2; i1+=actual_panel_rows)
{
// loops on each largest micro vertical panel of rhs (depth * nr)
Index actual_panel_end = (std::min)(i1+actual_panel_rows, peeled_mc2);
for(Index j2=0; j2<packet_cols4; j2+=nr)
{
// We select a 2*Traits::LhsProgress x nr micro block of res which is entirely
for(Index i=i1; i<actual_panel_end; i+=2*LhsProgress)
{
// We selected a 2*Traits::LhsProgress x nr micro block of res which is entirely
// stored into 2 x nr registers.
const LhsScalar* blA = &blockA[i*strideA+offsetA*(2*Traits::LhsProgress)];
@@ -1094,11 +1373,14 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
r2.storePacket(1 * Traits::ResPacketSize, R1);
r3.storePacket(0 * Traits::ResPacketSize, R2);
r3.storePacket(1 * Traits::ResPacketSize, R3);
}
}
// Deal with remaining columns of the rhs
for(Index j2=packet_cols4; j2<cols; j2++)
{
for(Index i=i1; i<actual_panel_end; i+=2*LhsProgress)
{
// One column at a time
const LhsScalar* blA = &blockA[i*strideA+offsetA*(2*Traits::LhsProgress)];
prefetch(&blA[0]);
@@ -1165,6 +1447,7 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
traits.acc(C4, alphav, R1);
r0.storePacket(0 * Traits::ResPacketSize, R0);
r0.storePacket(1 * Traits::ResPacketSize, R1);
}
}
}
}
@@ -1286,17 +1569,17 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
for(Index k=0; k<peeled_kc; k+=pk)
{
EIGEN_ASM_COMMENT("begin gebp micro kernel 2pX1");
EIGEN_ASM_COMMENT("begin gebp micro kernel 1pX1");
RhsPacket B_0;
#define EIGEN_GEBGP_ONESTEP(K) \
do { \
EIGEN_ASM_COMMENT("begin step of gebp micro kernel 2pX1"); \
EIGEN_ASM_COMMENT("begin step of gebp micro kernel 1pX1"); \
EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
traits.loadLhs(&blA[(0+1*K)*LhsProgress], A0); \
traits.loadRhs(&blB[(0+K)*RhsProgress], B_0); \
traits.madd(A0, B_0, C0, B_0); \
EIGEN_ASM_COMMENT("end step of gebp micro kernel 2pX1"); \
EIGEN_ASM_COMMENT("end step of gebp micro kernel 1pX1"); \
} while(false);
EIGEN_GEBGP_ONESTEP(0);
@@ -1311,7 +1594,7 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
blB += pk*RhsProgress;
blA += pk*1*Traits::LhsProgress;
EIGEN_ASM_COMMENT("end gebp micro kernel 2pX1");
EIGEN_ASM_COMMENT("end gebp micro kernel 1pX1");
}
// process remaining peeled loop
@@ -1783,19 +2066,19 @@ EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, DataMapper, nr, ColMajor, Co
const LinearMapper dm3 = rhs.getLinearMapper(0, j2 + 3);
Index k=0;
if((PacketSize%4)==0) // TODO enbale vectorized transposition for PacketSize==2 ??
if((PacketSize%4)==0) // TODO enable vectorized transposition for PacketSize==2 ??
{
for(; k<peeled_k; k+=PacketSize) {
PacketBlock<Packet,(PacketSize%4)==0?4:PacketSize> kernel;
kernel.packet[0] = dm0.loadPacket(k);
kernel.packet[1] = dm1.loadPacket(k);
kernel.packet[2] = dm2.loadPacket(k);
kernel.packet[3] = dm3.loadPacket(k);
kernel.packet[1%PacketSize] = dm1.loadPacket(k);
kernel.packet[2%PacketSize] = dm2.loadPacket(k);
kernel.packet[3%PacketSize] = dm3.loadPacket(k);
ptranspose(kernel);
pstoreu(blockB+count+0*PacketSize, cj.pconj(kernel.packet[0]));
pstoreu(blockB+count+1*PacketSize, cj.pconj(kernel.packet[1]));
pstoreu(blockB+count+2*PacketSize, cj.pconj(kernel.packet[2]));
pstoreu(blockB+count+3*PacketSize, cj.pconj(kernel.packet[3]));
pstoreu(blockB+count+1*PacketSize, cj.pconj(kernel.packet[1%PacketSize]));
pstoreu(blockB+count+2*PacketSize, cj.pconj(kernel.packet[2%PacketSize]));
pstoreu(blockB+count+3*PacketSize, cj.pconj(kernel.packet[3%PacketSize]));
count+=4*PacketSize;
}
}

View File

@@ -164,6 +164,8 @@ static void run(Index rows, Index cols, Index depth,
ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, blocking.blockA());
ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, blocking.blockB());
const bool pack_rhs_once = mc!=rows && kc==depth && nc==cols;
// For each horizontal panel of the rhs, and corresponding panel of the lhs...
for(Index i2=0; i2<rows; i2+=mc)
@@ -188,7 +190,8 @@ static void run(Index rows, Index cols, Index depth,
// We pack the rhs's block into a sequential chunk of memory (L2 caching)
// Note that this block will be read a very high number of times, which is equal to the number of
// micro horizontal panel of the large rhs's panel (e.g., rows/12 times).
pack_rhs(blockB, rhs.getSubMapper(k2,j2), actual_kc, actual_nc);
if((!pack_rhs_once) || i2==0)
pack_rhs(blockB, rhs.getSubMapper(k2,j2), actual_kc, actual_nc);
// Everything is packed, we can now call the panel * block kernel:
gebp(res.getSubMapper(i2, j2), blockA, blockB, actual_mc, actual_kc, actual_nc, alpha);
@@ -201,15 +204,10 @@ static void run(Index rows, Index cols, Index depth,
};
/*********************************************************************************
* Specialization of GeneralProduct<> for "large" GEMM, i.e.,
* Specialization of generic_product_impl for "large" GEMM, i.e.,
* implementation of the high level wrapper to general_matrix_matrix_product
**********************************************************************************/
template<typename Lhs, typename Rhs>
struct traits<GeneralProduct<Lhs,Rhs,GemmProduct> >
: traits<ProductBase<GeneralProduct<Lhs,Rhs,GemmProduct>, Lhs, Rhs> >
{};
template<typename Scalar, typename Index, typename Gemm, typename Lhs, typename Rhs, typename Dest, typename BlockingType>
struct gemm_functor
{
@@ -217,8 +215,9 @@ struct gemm_functor
: m_lhs(lhs), m_rhs(rhs), m_dest(dest), m_actualAlpha(actualAlpha), m_blocking(blocking)
{}
void initParallelSession() const
void initParallelSession(Index num_threads) const
{
m_blocking.initParallel(m_lhs.rows(), m_rhs.cols(), m_lhs.cols(), num_threads);
m_blocking.allocateA();
}
@@ -276,7 +275,7 @@ class level3_blocking
};
template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor>
class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, KcFactor, true>
class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, KcFactor, true /* == FiniteAtCompileTime */>
: public level3_blocking<
typename conditional<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::type,
typename conditional<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::type>
@@ -294,19 +293,32 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
SizeB = ActualCols * MaxDepth
};
EIGEN_ALIGN_DEFAULT LhsScalar m_staticA[SizeA];
EIGEN_ALIGN_DEFAULT RhsScalar m_staticB[SizeB];
#if EIGEN_MAX_STATIC_ALIGN_BYTES >= EIGEN_DEFAULT_ALIGN_BYTES
EIGEN_ALIGN_MAX LhsScalar m_staticA[SizeA];
EIGEN_ALIGN_MAX RhsScalar m_staticB[SizeB];
#else
EIGEN_ALIGN_MAX char m_staticA[SizeA * sizeof(LhsScalar) + EIGEN_DEFAULT_ALIGN_BYTES-1];
EIGEN_ALIGN_MAX char m_staticB[SizeB * sizeof(RhsScalar) + EIGEN_DEFAULT_ALIGN_BYTES-1];
#endif
public:
gemm_blocking_space(Index /*rows*/, Index /*cols*/, Index /*depth*/, int /*num_threads*/, bool /*full_rows = false*/)
gemm_blocking_space(Index /*rows*/, Index /*cols*/, Index /*depth*/, Index /*num_threads*/, bool /*full_rows = false*/)
{
this->m_mc = ActualRows;
this->m_nc = ActualCols;
this->m_kc = MaxDepth;
#if EIGEN_MAX_STATIC_ALIGN_BYTES >= EIGEN_DEFAULT_ALIGN_BYTES
this->m_blockA = m_staticA;
this->m_blockB = m_staticB;
#else
this->m_blockA = reinterpret_cast<LhsScalar*>((std::size_t(m_staticA) + (EIGEN_DEFAULT_ALIGN_BYTES-1)) & ~std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1));
this->m_blockB = reinterpret_cast<RhsScalar*>((std::size_t(m_staticB) + (EIGEN_DEFAULT_ALIGN_BYTES-1)) & ~std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1));
#endif
}
void initParallel(Index, Index, Index, Index)
{}
inline void allocateA() {}
inline void allocateB() {}
@@ -331,7 +343,7 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
public:
gemm_blocking_space(Index rows, Index cols, Index depth, int num_threads, bool l3_blocking)
gemm_blocking_space(Index rows, Index cols, Index depth, Index num_threads, bool l3_blocking)
{
this->m_mc = Transpose ? cols : rows;
this->m_nc = Transpose ? rows : cols;
@@ -351,6 +363,19 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
m_sizeA = this->m_mc * this->m_kc;
m_sizeB = this->m_kc * this->m_nc;
}
void initParallel(Index rows, Index cols, Index depth, Index num_threads)
{
this->m_mc = Transpose ? cols : rows;
this->m_nc = Transpose ? rows : cols;
this->m_kc = depth;
eigen_internal_assert(this->m_blockA==0 && this->m_blockB==0);
Index m = this->m_mc;
computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, m, this->m_nc, num_threads);
m_sizeA = this->m_mc * this->m_kc;
m_sizeB = this->m_kc * this->m_nc;
}
void allocateA()
{
@@ -437,6 +462,8 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
static void scaleAndAddTo(Dest& dst, const Lhs& a_lhs, const Rhs& a_rhs, const Scalar& alpha)
{
eigen_assert(dst.rows()==a_lhs.rows() && dst.cols()==a_rhs.cols());
if(a_lhs.cols()==0 || a_lhs.rows()==0 || a_rhs.cols()==0)
return;
typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);

View File

@@ -125,7 +125,7 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,C
// How many coeffs of the result do we have to skip to be aligned.
// Here we assume data are at least aligned on the base scalar type.
Index alignedStart = internal::first_aligned(res,size);
Index alignedStart = internal::first_default_aligned(res,size);
Index alignedSize = ResPacketSize>1 ? alignedStart + ((size-alignedStart) & ~ResPacketAlignedMask) : 0;
const Index peeledSize = alignedSize - RhsPacketSize*peels - RhsPacketSize + 1;
@@ -463,7 +463,8 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,R
Index rowBound = ((rows-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows;
for (Index i=skipRows; i<rowBound; i+=rowsAtOnce)
{
EIGEN_ALIGN_DEFAULT ResScalar tmp0 = ResScalar(0);
// FIXME: what is the purpose of this EIGEN_ALIGN_DEFAULT ??
EIGEN_ALIGN_MAX ResScalar tmp0 = ResScalar(0);
ResScalar tmp1 = ResScalar(0), tmp2 = ResScalar(0), tmp3 = ResScalar(0);
// this helps the compiler generating good binary code
@@ -572,7 +573,7 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,R
{
for (Index i=start; i<end; ++i)
{
EIGEN_ALIGN_DEFAULT ResScalar tmp0 = ResScalar(0);
EIGEN_ALIGN_MAX ResScalar tmp0 = ResScalar(0);
ResPacket ptmp0 = pset1<ResPacket>(tmp0);
const LhsScalars lhs0 = lhs.getVectorMapper(i, 0);
// process first unaligned result's coeffs

25
Eigen/src/Core/products/GeneralMatrixVector_MKL.h Normal file → Executable file
View File

@@ -46,38 +46,37 @@ namespace internal {
// gemv specialization
template<typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
struct general_matrix_vector_product_gemv :
general_matrix_vector_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,ConjugateRhs,BuiltIn> {};
template<typename Index, typename LhsScalar, int StorageOrder, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
struct general_matrix_vector_product_gemv;
#define EIGEN_MKL_GEMV_SPECIALIZE(Scalar) \
template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
struct general_matrix_vector_product<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs,Specialized> { \
struct general_matrix_vector_product<Index,Scalar,const_blas_data_mapper<Scalar,Index,ColMajor>,ColMajor,ConjugateLhs,Scalar,const_blas_data_mapper<Scalar,Index,RowMajor>,ConjugateRhs,Specialized> { \
static void run( \
Index rows, Index cols, \
const Scalar* lhs, Index lhsStride, \
const Scalar* rhs, Index rhsIncr, \
const const_blas_data_mapper<Scalar,Index,ColMajor> &lhs, \
const const_blas_data_mapper<Scalar,Index,RowMajor> &rhs, \
Scalar* res, Index resIncr, Scalar alpha) \
{ \
if (ConjugateLhs) { \
general_matrix_vector_product<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs,BuiltIn>::run( \
rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \
general_matrix_vector_product<Index,Scalar,const_blas_data_mapper<Scalar,Index,ColMajor>,ColMajor,ConjugateLhs,Scalar,const_blas_data_mapper<Scalar,Index,RowMajor>,ConjugateRhs,BuiltIn>::run( \
rows, cols, lhs, rhs, res, resIncr, alpha); \
} else { \
general_matrix_vector_product_gemv<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs>::run( \
rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \
rows, cols, lhs.data(), lhs.stride(), rhs.data(), rhs.stride(), res, resIncr, alpha); \
} \
} \
}; \
template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
struct general_matrix_vector_product<Index,Scalar,RowMajor,ConjugateLhs,Scalar,ConjugateRhs,Specialized> { \
struct general_matrix_vector_product<Index,Scalar,const_blas_data_mapper<Scalar,Index,RowMajor>,RowMajor,ConjugateLhs,Scalar,const_blas_data_mapper<Scalar,Index,ColMajor>,ConjugateRhs,Specialized> { \
static void run( \
Index rows, Index cols, \
const Scalar* lhs, Index lhsStride, \
const Scalar* rhs, Index rhsIncr, \
const const_blas_data_mapper<Scalar,Index,RowMajor> &lhs, \
const const_blas_data_mapper<Scalar,Index,ColMajor> &rhs, \
Scalar* res, Index resIncr, Scalar alpha) \
{ \
general_matrix_vector_product_gemv<Index,Scalar,RowMajor,ConjugateLhs,Scalar,ConjugateRhs>::run( \
rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \
rows, cols, lhs.data(), lhs.stride(), rhs.data(), rhs.stride(), res, resIncr, alpha); \
} \
}; \

View File

@@ -120,25 +120,28 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos
return func(0,rows, 0,cols);
Eigen::initParallel();
func.initParallelSession();
func.initParallelSession(threads);
if(transpose)
std::swap(rows,cols);
Index blockCols = (cols / threads) & ~Index(0x3);
Index blockRows = (rows / threads);
blockRows = (blockRows/Functor::Traits::mr)*Functor::Traits::mr;
ei_declare_aligned_stack_constructed_variable(GemmParallelInfo<Index>,info,threads,0);
#pragma omp parallel num_threads(threads)
{
Index i = omp_get_thread_num();
// Note that the actual number of threads might be lower than the number of request ones.
Index actual_threads = omp_get_num_threads();
Index blockCols = (cols / actual_threads) & ~Index(0x3);
Index blockRows = (rows / actual_threads);
blockRows = (blockRows/Functor::Traits::mr)*Functor::Traits::mr;
Index r0 = i*blockRows;
Index actualBlockRows = (i+1==threads) ? rows-r0 : blockRows;
Index actualBlockRows = (i+1==actual_threads) ? rows-r0 : blockRows;
Index c0 = i*blockCols;
Index actualBlockCols = (i+1==threads) ? cols-c0 : blockCols;
Index actualBlockCols = (i+1==actual_threads) ? cols-c0 : blockCols;
info[i].lhs_start = r0;
info[i].lhs_length = actualBlockRows;

View File

@@ -94,7 +94,7 @@ EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrd
size_t starti = FirstTriangular ? 0 : j+2;
size_t endi = FirstTriangular ? j : size;
size_t alignedStart = (starti) + internal::first_aligned(&res[starti], endi-starti);
size_t alignedStart = (starti) + internal::first_default_aligned(&res[starti], endi-starti);
size_t alignedEnd = alignedStart + ((endi-alignedStart)/(PacketSize))*(PacketSize);
// TODO make sure this product is a real * complex and that the rhs is properly conjugated if needed

View File

@@ -257,6 +257,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
Scalar* _res, Index resStride,
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
{
const Index PacketBytes = packet_traits<Scalar>::size*sizeof(Scalar);
// strip zeros
Index diagSize = (std::min)(_cols,_depth);
Index rows = _rows;
@@ -274,7 +275,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
std::size_t sizeA = kc*mc;
std::size_t sizeB = kc*cols+EIGEN_ALIGN_BYTES/sizeof(Scalar);
std::size_t sizeB = kc*cols+EIGEN_MAX_ALIGN_BYTES/sizeof(Scalar);
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
@@ -311,7 +312,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
Index ts = (IsLower && actual_k2>=cols) ? 0 : actual_kc;
Scalar* geb = blockB+ts*ts;
geb = geb + internal::first_aligned(geb,EIGEN_ALIGN_BYTES/sizeof(Scalar));
geb = geb + internal::first_aligned<PacketBytes>(geb,PacketBytes/sizeof(Scalar));
pack_rhs(geb, rhs.getSubMapper(actual_k2,IsLower ? 0 : k2), actual_kc, rs);

4
Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h Normal file → Executable file
View File

@@ -122,7 +122,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
Map<const MatrixLhs, 0, OuterStride<> > lhsMap(_lhs,rows,depth,OuterStride<>(lhsStride)); \
MatrixLhs aa_tmp=lhsMap.template triangularView<Mode>(); \
MKL_INT aStride = aa_tmp.outerStride(); \
gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth); \
gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth, 1, true); \
general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \
rows, cols, depth, aa_tmp.data(), aStride, _rhs, rhsStride, res, resStride, alpha, gemm_blocking, 0); \
\
@@ -236,7 +236,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
Map<const MatrixRhs, 0, OuterStride<> > rhsMap(_rhs,depth,cols, OuterStride<>(rhsStride)); \
MatrixRhs aa_tmp=rhsMap.template triangularView<Mode>(); \
MKL_INT aStride = aa_tmp.outerStride(); \
gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth); \
gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth, 1, true); \
general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \
rows, cols, depth, _lhs, lhsStride, aa_tmp.data(), aStride, res, resStride, alpha, gemm_blocking, 0); \
\

View File

@@ -117,8 +117,9 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conju
{
// TODO write a small kernel handling this (can be shared with trsv)
Index i = IsLower ? k2+k1+k : k2-k1-k-1;
Index s = IsLower ? k2+k1 : i+1;
Index rs = actualPanelWidth - k - 1; // remaining size
Index s = TriStorageOrder==RowMajor ? (IsLower ? k2+k1 : i+1)
: IsLower ? i+1 : i-rs;
Scalar a = (Mode & UnitDiag) ? Scalar(1) : Scalar(1)/conj(tri(i,i));
for (Index j=j2; j<j2+actual_cols; ++j)
@@ -135,7 +136,6 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conju
}
else
{
Index s = IsLower ? i+1 : i-rs;
Scalar b = (other(i,j) *= a);
Scalar* r = &other(s,j);
const Scalar* l = &tri(s,i);

7
Eigen/src/Core/util/BlasUtil.h Normal file → Executable file
View File

@@ -166,7 +166,7 @@ class BlasLinearMapper {
return ploadt<HalfPacket, AlignmentType>(m_data + i);
}
EIGEN_ALWAYS_INLINE void storePacket(Index i, Packet p) const {
EIGEN_ALWAYS_INLINE void storePacket(Index i, const Packet &p) const {
pstoret<Scalar, Packet, AlignmentType>(m_data + i, p);
}
@@ -214,7 +214,7 @@ class blas_data_mapper {
}
template<typename SubPacket>
EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, SubPacket p) const {
EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, const SubPacket &p) const {
pscatter<Scalar, SubPacket>(&operator()(i, j), p, m_stride);
}
@@ -224,12 +224,13 @@ class blas_data_mapper {
}
const Index stride() const { return m_stride; }
const Scalar* data() const { return m_data; }
Index firstAligned(Index size) const {
if (size_t(m_data)%sizeof(Scalar)) {
return -1;
}
return internal::first_aligned(m_data, size);
return internal::first_default_aligned(m_data, size);
}
protected:

View File

@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2007-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
@@ -140,7 +140,7 @@ const unsigned int LvalueBit = 0x20;
*/
const unsigned int DirectAccessBit = 0x40;
/** \ingroup flags
/** \deprecated \ingroup flags
*
* means the first coefficient packet is guaranteed to be aligned.
* An expression cannot has the AlignedBit without the PacketAccessBit flag.
@@ -215,12 +215,31 @@ enum {
};
/** \ingroup enums
* Enum for indicating whether an object is aligned or not. */
* Enum for indicating whether a buffer is aligned or not. */
enum {
/** Object is not correctly aligned for vectorization. */
Unaligned=0,
/** Object is aligned for vectorization. */
Aligned=1
Unaligned=0, /**< Data pointer has no specific alignment. */
Aligned8=8, /**< Data pointer is aligned on a 8 bytes boundary. */
Aligned16=16, /**< Data pointer is aligned on a 16 bytes boundary. */
Aligned32=32, /**< Data pointer is aligned on a 32 bytes boundary. */
Aligned64=64, /**< Data pointer is aligned on a 64 bytes boundary. */
Aligned128=128, /**< Data pointer is aligned on a 128 bytes boundary. */
AlignedMask=255,
Aligned=16, /**< \deprecated Synonym for Aligned16. */
#if EIGEN_MAX_ALIGN_BYTES==128
AlignedMax = Aligned128
#elif EIGEN_MAX_ALIGN_BYTES==64
AlignedMax = Aligned64
#elif EIGEN_MAX_ALIGN_BYTES==32
AlignedMax = Aligned32
#elif EIGEN_MAX_ALIGN_BYTES==16
AlignedMax = Aligned16
#elif EIGEN_MAX_ALIGN_BYTES==8
AlignedMax = Aligned8
#elif EIGEN_MAX_ALIGN_BYTES==0
AlignedMax = Unaligned
#else
#error Invalid value for EIGEN_MAX_ALIGN_BYTES
#endif
};
/** \ingroup enums
@@ -452,8 +471,8 @@ namespace Architecture
}
/** \internal \ingroup enums
* Enum used as template parameter in GeneralProduct. */
enum { DefaultProduct=0, CoeffBasedProductMode, LazyCoeffBasedProductMode, LazyProduct, OuterProduct, InnerProduct, GemvProduct, GemmProduct };
* Enum used as template parameter in Product and product evalautors. */
enum { DefaultProduct=0, LazyProduct, AliasFreeProduct, CoeffBasedProductMode, LazyCoeffBasedProductMode, OuterProduct, InnerProduct, GemvProduct, GemmProduct };
/** \internal \ingroup enums
* Enum used in experimental parallel implementation. */
@@ -468,6 +487,9 @@ struct Sparse {};
/** The type used to identify a permutation storage. */
struct PermutationStorage {};
/** The type used to identify a permutation storage. */
struct TranspositionsStorage {};
/** The type used to identify a matrix expression */
struct MatrixXpr {};
@@ -482,6 +504,7 @@ struct BandShape { static std::string debugName() { return "BandSha
struct TriangularShape { static std::string debugName() { return "TriangularShape"; } };
struct SelfAdjointShape { static std::string debugName() { return "SelfAdjointShape"; } };
struct PermutationShape { static std::string debugName() { return "PermutationShape"; } };
struct TranspositionsShape { static std::string debugName() { return "TranspositionsShape"; } };
struct SparseShape { static std::string debugName() { return "SparseShape"; } };
namespace internal {
@@ -492,6 +515,16 @@ struct IndexBased {};
// evaluator based on iterators to access coefficients.
struct IteratorBased {};
/** \internal
* Constants for comparison functors
*/
enum ComparisonName {
cmp_EQ = 0,
cmp_LT = 1,
cmp_LE = 2,
cmp_UNORD = 3,
cmp_NEQ = 4
};
} // end namespace internal
} // end namespace Eigen

View File

@@ -91,8 +91,6 @@ template<typename NullaryOp, typename MatrixType> class CwiseNullaryOp;
template<typename UnaryOp, typename MatrixType> class CwiseUnaryOp;
template<typename ViewOp, typename MatrixType> class CwiseUnaryView;
template<typename BinaryOp, typename Lhs, typename Rhs> class CwiseBinaryOp;
template<typename BinOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp; // TODO deprecated
template<typename Derived, typename Lhs, typename Rhs> class ProductBase; // TODO deprecated
template<typename Decomposition, typename Rhstype> class Solve;
template<typename XprType> class Inverse;
@@ -102,9 +100,6 @@ namespace internal {
template<typename Lhs, typename Rhs, int Option = DefaultProduct> class Product;
template<typename Lhs, typename Rhs, int Mode> class GeneralProduct; // TODO deprecated
template<typename Lhs, typename Rhs, int NestingFlags> class CoeffBasedProduct; // TODO deprecated
template<typename Derived> class DiagonalBase;
template<typename _DiagonalVectorType> class DiagonalWrapper;
template<typename _Scalar, int SizeAtCompileTime, int MaxSizeAtCompileTime=SizeAtCompileTime> class DiagonalMatrix;
@@ -152,6 +147,9 @@ template<typename _Scalar, int Rows=Dynamic, int Cols=Dynamic, int Supers=Dynami
namespace internal {
template<typename Lhs, typename Rhs> struct product_type;
template<bool> struct EnableIf;
/** \internal
* \class product_evaluator
* Products need their own evaluator with more template arguments allowing for
@@ -162,7 +160,8 @@ template< typename T,
typename LhsShape = typename evaluator_traits<typename T::Lhs>::Shape,
typename RhsShape = typename evaluator_traits<typename T::Rhs>::Shape,
typename LhsScalar = typename traits<typename T::Lhs>::Scalar,
typename RhsScalar = typename traits<typename T::Rhs>::Scalar
typename RhsScalar = typename traits<typename T::Rhs>::Scalar,
typename = EnableIf<true> // extra template parameter for SFINAE-based specialization
> struct product_evaluator;
}
@@ -189,6 +188,7 @@ template<typename Scalar> struct scalar_imag_op;
template<typename Scalar> struct scalar_abs_op;
template<typename Scalar> struct scalar_abs2_op;
template<typename Scalar> struct scalar_sqrt_op;
template<typename Scalar> struct scalar_rsqrt_op;
template<typename Scalar> struct scalar_exp_op;
template<typename Scalar> struct scalar_log_op;
template<typename Scalar> struct scalar_cos_op;
@@ -213,6 +213,7 @@ template<typename Scalar> struct scalar_identity_op;
template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_product_op;
template<typename LhsScalar,typename RhsScalar> struct scalar_multiple2_op;
template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_quotient_op;
template<typename LhsScalar,typename RhsScalar> struct scalar_quotient2_op;
} // end namespace internal
@@ -264,6 +265,7 @@ template<typename Derived> class QuaternionBase;
template<typename Scalar> class Rotation2D;
template<typename Scalar> class AngleAxis;
template<typename Scalar,int Dim> class Translation;
template<typename Scalar,int Dim> class AlignedBox;
template<typename Scalar, int Options = AutoAlign> class Quaternion;
template<typename Scalar,int Dim,int Mode,int _Options=AutoAlign> class Transform;

View File

@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
@@ -13,7 +13,7 @@
#define EIGEN_WORLD_VERSION 3
#define EIGEN_MAJOR_VERSION 2
#define EIGEN_MINOR_VERSION 90
#define EIGEN_MINOR_VERSION 91
#define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
(EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
@@ -101,7 +101,7 @@
/// \internal EIGEN_GNUC_STRICT set to 1 if the compiler is really GCC and not a compatible compiler (e.g., ICC, clang, mingw, etc.)
#if EIGEN_COMP_GNUC && !(EIGEN_COMP_CLANG || EIGEN_COMP_CLANG || EIGEN_COMP_MINGW || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM )
#if EIGEN_COMP_GNUC && !(EIGEN_COMP_CLANG || EIGEN_COMP_ICC || EIGEN_COMP_MINGW || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM )
#define EIGEN_COMP_GNUC_STRICT 1
#else
#define EIGEN_COMP_GNUC_STRICT 0
@@ -213,7 +213,8 @@
#endif
/// \internal EIGEN_OS_ANDROID set to 1 if the OS is Android
#if defined(__ANDROID__)
// note: ANDROID is defined when using ndk_build, __ANDROID__ is defined when using a standalone toolchain.
#if defined(__ANDROID__) || defined(ANDROID)
#define EIGEN_OS_ANDROID 1
#else
#define EIGEN_OS_ANDROID 0
@@ -282,6 +283,19 @@
#define EIGEN_OS_WIN_STRICT 0
#endif
/// \internal EIGEN_OS_SUN set to 1 if the OS is SUN
#if (defined(sun) || defined(__sun)) && !(defined(__SVR4) || defined(__svr4__))
#define EIGEN_OS_SUN 1
#else
#define EIGEN_OS_SUN 0
#endif
/// \internal EIGEN_OS_SOLARIS set to 1 if the OS is Solaris
#if (defined(sun) || defined(__sun)) && (defined(__SVR4) || defined(__svr4__))
#define EIGEN_OS_SOLARIS 1
#else
#define EIGEN_OS_SOLARIS 0
#endif
@@ -292,65 +306,10 @@
#define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 1
#endif
// 16 byte alignment is only useful for vectorization. Since it affects the ABI, we need to enable
// 16 byte alignment on all platforms where vectorization might be enabled. In theory we could always
// enable alignment, but it can be a cause of problems on some platforms, so we just disable it in
// certain common platform (compiler+architecture combinations) to avoid these problems.
// Only static alignment is really problematic (relies on nonstandard compiler extensions that don't
// work everywhere, for example don't work on GCC/ARM), try to keep heap alignment even
// when we have to disable static alignment.
#if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64)
#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
#else
#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0
#endif
// static alignment is completely disabled with GCC 3, Sun Studio, and QCC/QNX
#if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT \
&& !EIGEN_GCC3_OR_OLDER \
&& !EIGEN_COMP_SUNCC \
&& !EIGEN_OS_QNX
#define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1
#else
#define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0
#endif
// Defined the boundary (in bytes) on which the data needs to be aligned. Note
// that unless EIGEN_ALIGN is defined and not equal to 0, the data may not be
// aligned at all regardless of the value of this #define.
#define EIGEN_ALIGN_BYTES 16
#ifdef EIGEN_DONT_ALIGN
#ifndef EIGEN_DONT_ALIGN_STATICALLY
#define EIGEN_DONT_ALIGN_STATICALLY
#endif
#define EIGEN_ALIGN 0
#elif !defined(EIGEN_DONT_VECTORIZE)
#if defined(__AVX__)
#undef EIGEN_ALIGN_BYTES
#define EIGEN_ALIGN_BYTES 32
#endif
#define EIGEN_ALIGN 1
#else
#define EIGEN_ALIGN 0
#endif
// This macro can be used to prevent from macro expansion, e.g.:
// std::max EIGEN_NOT_A_MACRO(a,b)
#define EIGEN_NOT_A_MACRO
// EIGEN_ALIGN_STATICALLY is the true test whether we want to align arrays on the stack or not. It takes into account both the user choice to explicitly disable
// alignment (EIGEN_DONT_ALIGN_STATICALLY) and the architecture config (EIGEN_ARCH_WANTS_STACK_ALIGNMENT). Henceforth, only EIGEN_ALIGN_STATICALLY should be used.
#if EIGEN_ARCH_WANTS_STACK_ALIGNMENT && !defined(EIGEN_DONT_ALIGN_STATICALLY)
#define EIGEN_ALIGN_STATICALLY 1
#else
#define EIGEN_ALIGN_STATICALLY 0
#ifndef EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
#define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
#endif
#endif
#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION Eigen::RowMajor
#else
@@ -382,17 +341,44 @@
#define EIGEN_HAVE_RVALUE_REFERENCES
#endif
// Does the compiler support result_of?
#if (__has_feature(cxx_lambdas) || (defined(__cplusplus) && __cplusplus >= 201103L))
#define EIGEN_HAS_STD_RESULT_OF 1
#endif
// Does the compiler support variadic templates?
#if __cplusplus > 199711L
#define EIGEN_HAS_VARIADIC_TEMPLATES 1
#endif
// Does the compiler support const expressions?
#if (defined(__plusplus) && __cplusplus >= 201402L) || \
#ifdef __CUDACC__
// Const expressions are not supported regardless of what host compiler is used
#elif (defined(__cplusplus) && __cplusplus >= 201402L) || \
EIGEN_GNUC_AT_LEAST(4,9)
#define EIGEN_HAS_CONSTEXPR 1
#endif
// Does the compiler support C++11 math?
// Let's be conservative and enable the default C++11 implementation only if we are sure it exists
#ifndef EIGEN_HAS_CXX11_MATH
#if (__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC || EIGEN_COMP_ICC) \
&& (EIGEN_ARCH_i386_OR_x86_64) && (EIGEN_OS_GNULINUX || EIGEN_OS_WIN_STRICT || EIGEN_OS_MAC)
#define EIGEN_HAS_CXX11_MATH 1
#else
#define EIGEN_HAS_CXX11_MATH 0
#endif
#endif
// Does the compiler support proper C++11 containers?
#ifndef EIGEN_HAS_CXX11_CONTAINERS
#if ((__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG)) || EIGEN_COMP_MSVC >= 1900
#define EIGEN_HAS_CXX11_CONTAINERS 1
#else
#define EIGEN_HAS_CXX11_CONTAINERS 0
#endif
#endif
/** Allows to disable some optimizations which might affect the accuracy of the result.
* Such optimization are enabled by default, and set EIGEN_FAST_MATH to 0 to disable them.
* They currently include:
@@ -550,6 +536,20 @@ namespace Eigen {
#endif
#endif
//------------------------------------------------------------------------------------------
// Static and dynamic alignment control
//
// The main purpose of this section is to define EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES
// as the maximal boundary in bytes on which dynamically and statically allocated data may be alignment respectively.
// The values of EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES can be specified by the user. If not,
// a default value is automatically computed based on architecture, compiler, and OS.
//
// This section also defines macros EIGEN_ALIGN_TO_BOUNDARY(N) and the shortcuts EIGEN_ALIGN{8,16,32,_MAX}
// to be used to declare statically aligned buffers.
//------------------------------------------------------------------------------------------
/* EIGEN_ALIGN_TO_BOUNDARY(n) forces data to be n-byte aligned. This is used to satisfy SIMD requirements.
* However, we do that EVEN if vectorization (EIGEN_VECTORIZE) is disabled,
* so that vectorization doesn't affect binary compatibility.
@@ -570,22 +570,124 @@ namespace Eigen {
#error Please tell me what is the equivalent of __attribute__((aligned(n))) for your compiler
#endif
// If the user explicitly disable vectorization, then we also disable alignment
#if defined(EIGEN_DONT_VECTORIZE)
#define EIGEN_IDEAL_MAX_ALIGN_BYTES 0
#elif defined(__AVX__)
// 32 bytes static alignmeent is preferred only if really required
#define EIGEN_IDEAL_MAX_ALIGN_BYTES 32
#else
#define EIGEN_IDEAL_MAX_ALIGN_BYTES 16
#endif
// EIGEN_MIN_ALIGN_BYTES defines the minimal value for which the notion of explicit alignment makes sense
#define EIGEN_MIN_ALIGN_BYTES 16
// Defined the boundary (in bytes) on which the data needs to be aligned. Note
// that unless EIGEN_ALIGN is defined and not equal to 0, the data may not be
// aligned at all regardless of the value of this #define.
#if (defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)) && defined(EIGEN_MAX_STATIC_ALIGN_BYTES) && EIGEN_MAX_STATIC_ALIGN_BYTES>0
#error EIGEN_MAX_STATIC_ALIGN_BYTES and EIGEN_DONT_ALIGN[_STATICALLY] are both defined with EIGEN_MAX_STATIC_ALIGN_BYTES!=0. Use EIGEN_MAX_STATIC_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN_STATICALLY.
#endif
// EIGEN_DONT_ALIGN_STATICALLY and EIGEN_DONT_ALIGN are deprectated
// They imply EIGEN_MAX_STATIC_ALIGN_BYTES=0
#if defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)
#ifdef EIGEN_MAX_STATIC_ALIGN_BYTES
#undef EIGEN_MAX_STATIC_ALIGN_BYTES
#endif
#define EIGEN_MAX_STATIC_ALIGN_BYTES 0
#endif
#ifndef EIGEN_MAX_STATIC_ALIGN_BYTES
// Try to automatically guess what is the best default value for EIGEN_MAX_STATIC_ALIGN_BYTES
// 16 byte alignment is only useful for vectorization. Since it affects the ABI, we need to enable
// 16 byte alignment on all platforms where vectorization might be enabled. In theory we could always
// enable alignment, but it can be a cause of problems on some platforms, so we just disable it in
// certain common platform (compiler+architecture combinations) to avoid these problems.
// Only static alignment is really problematic (relies on nonstandard compiler extensions that don't
// work everywhere, for example don't work on GCC/ARM), try to keep heap alignment even
// when we have to disable static alignment.
#if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64)
#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
#else
#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0
#endif
// static alignment is completely disabled with GCC 3, Sun Studio, and QCC/QNX
#if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT \
&& !EIGEN_GCC3_OR_OLDER \
&& !EIGEN_COMP_SUNCC \
&& !EIGEN_OS_QNX
#define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1
#else
#define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0
#endif
#if EIGEN_ARCH_WANTS_STACK_ALIGNMENT
#define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
#else
#define EIGEN_MAX_STATIC_ALIGN_BYTES 0
#endif
#endif
// If EIGEN_MAX_ALIGN_BYTES is defined, then it is considered as an upper bound for EIGEN_MAX_ALIGN_BYTES
#if defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES<EIGEN_MAX_STATIC_ALIGN_BYTES
#undef EIGEN_MAX_STATIC_ALIGN_BYTES
#define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
#endif
#if EIGEN_MAX_STATIC_ALIGN_BYTES==0 && !defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT)
#define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
#endif
// At this stage, EIGEN_MAX_STATIC_ALIGN_BYTES>0 is the true test whether we want to align arrays on the stack or not.
// It takes into account both the user choice to explicitly enable/disable alignment (by settting EIGEN_MAX_STATIC_ALIGN_BYTES)
// and the architecture config (EIGEN_ARCH_WANTS_STACK_ALIGNMENT).
// Henceforth, only EIGEN_MAX_STATIC_ALIGN_BYTES should be used.
// Shortcuts to EIGEN_ALIGN_TO_BOUNDARY
#define EIGEN_ALIGN8 EIGEN_ALIGN_TO_BOUNDARY(8)
#define EIGEN_ALIGN16 EIGEN_ALIGN_TO_BOUNDARY(16)
#define EIGEN_ALIGN32 EIGEN_ALIGN_TO_BOUNDARY(32)
#define EIGEN_ALIGN_DEFAULT EIGEN_ALIGN_TO_BOUNDARY(EIGEN_ALIGN_BYTES)
#if EIGEN_ALIGN_STATICALLY
#define EIGEN_USER_ALIGN_TO_BOUNDARY(n) EIGEN_ALIGN_TO_BOUNDARY(n)
#define EIGEN_USER_ALIGN16 EIGEN_ALIGN16
#define EIGEN_USER_ALIGN32 EIGEN_ALIGN32
#define EIGEN_USER_ALIGN_DEFAULT EIGEN_ALIGN_DEFAULT
#define EIGEN_ALIGN64 EIGEN_ALIGN_TO_BOUNDARY(64)
#if EIGEN_MAX_STATIC_ALIGN_BYTES>0
#define EIGEN_ALIGN_MAX EIGEN_ALIGN_TO_BOUNDARY(EIGEN_MAX_STATIC_ALIGN_BYTES)
#else
#define EIGEN_USER_ALIGN_TO_BOUNDARY(n)
#define EIGEN_USER_ALIGN16
#define EIGEN_USER_ALIGN32
#define EIGEN_USER_ALIGN_DEFAULT
#define EIGEN_ALIGN_MAX
#endif
// Dynamic alignment control
#if defined(EIGEN_DONT_ALIGN) && defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES>0
#error EIGEN_MAX_ALIGN_BYTES and EIGEN_DONT_ALIGN are both defined with EIGEN_MAX_ALIGN_BYTES!=0. Use EIGEN_MAX_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN.
#endif
#ifdef EIGEN_DONT_ALIGN
#ifdef EIGEN_MAX_ALIGN_BYTES
#undef EIGEN_MAX_ALIGN_BYTES
#endif
#define EIGEN_MAX_ALIGN_BYTES 0
#elif !defined(EIGEN_MAX_ALIGN_BYTES)
#define EIGEN_MAX_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
#endif
#if EIGEN_IDEAL_MAX_ALIGN_BYTES > EIGEN_MAX_ALIGN_BYTES
#define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
#else
#define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
#endif
//----------------------------------------------------------------------
#ifdef EIGEN_DONT_USE_RESTRICT_KEYWORD
#define EIGEN_RESTRICT
#endif
@@ -611,7 +713,7 @@ namespace Eigen {
// just an empty macro !
#define EIGEN_EMPTY
#if EIGEN_COMP_MSVC_STRICT && EIGEN_COMP_MSVC < 1900
#if EIGEN_COMP_MSVC_STRICT && EIGEN_COMP_MSVC < 1900 // for older MSVC versions using the base operator is sufficient (cf Bug 1000)
#define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
using Base::operator =;
#elif EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653)
@@ -630,6 +732,11 @@ namespace Eigen {
}
#endif
/** \internal
* \brief Macro to manually inherit assignment operators.
* This is necessary, because the implicitly defined assignment operator gets deleted when a custom operator= is defined.
*/
#define EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Derived) EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived)
/**
@@ -646,7 +753,7 @@ namespace Eigen {
typedef typename Eigen::internal::traits<Derived>::Scalar Scalar; /*!< \brief Numeric type, e.g. float, double, int or std::complex<float>. */ \
typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; /*!< \brief The underlying numeric type for composed scalar types. \details In cases where Scalar is e.g. std::complex<T>, T were corresponding to RealScalar. */ \
typedef typename Base::CoeffReturnType CoeffReturnType; /*!< \brief The return type for coefficient access. \details Depending on whether the object allows direct coefficient access (e.g. for a MatrixXd), this type is either 'const Scalar&' or simply 'Scalar' for objects that do not allow direct coefficient access. */ \
typedef typename Eigen::internal::nested<Derived>::type Nested; \
typedef typename Eigen::internal::ref_selector<Derived>::type Nested; \
typedef typename Eigen::internal::traits<Derived>::StorageKind StorageKind; \
typedef typename Eigen::internal::traits<Derived>::StorageIndex StorageIndex; \
enum { RowsAtCompileTime = Eigen::internal::traits<Derived>::RowsAtCompileTime, \
@@ -719,8 +826,13 @@ namespace Eigen {
# define EIGEN_TRY try
# define EIGEN_CATCH(X) catch (X)
#else
# define EIGEN_THROW_X(X) std::abort()
# define EIGEN_THROW std::abort()
# ifdef __CUDA_ARCH__
# define EIGEN_THROW_X(X) asm("trap;") return {}
# define EIGEN_THROW asm("trap;"); return {}
# else
# define EIGEN_THROW_X(X) std::abort()
# define EIGEN_THROW std::abort()
# endif
# define EIGEN_TRY if (true)
# define EIGEN_CATCH(X) else
#endif

View File

@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2008-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
// Copyright (C) 2009 Kenneth Riddile <kfriddile@yahoo.com>
// Copyright (C) 2010 Hauke Heibel <hauke.heibel@gmail.com>
@@ -32,7 +32,7 @@
// page 114, "[The] LP64 model [...] is used by all 64-bit UNIX ports" so it's indeed
// quite safe, at least within the context of glibc, to equate 64-bit with LP64.
#if defined(__GLIBC__) && ((__GLIBC__>=2 && __GLIBC_MINOR__ >= 8) || __GLIBC__>2) \
&& defined(__LP64__) && ! defined( __SANITIZE_ADDRESS__ ) && (EIGEN_ALIGN_BYTES == 16)
&& defined(__LP64__) && ! defined( __SANITIZE_ADDRESS__ ) && (EIGEN_DEFAULT_ALIGN_BYTES == 16)
#define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 1
#else
#define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 0
@@ -42,14 +42,14 @@
// See http://svn.freebsd.org/viewvc/base/stable/6/lib/libc/stdlib/malloc.c?view=markup
// FreeBSD 7 seems to have 16-byte aligned malloc except on ARM and MIPS architectures
// See http://svn.freebsd.org/viewvc/base/stable/7/lib/libc/stdlib/malloc.c?view=markup
#if defined(__FreeBSD__) && !(EIGEN_ARCH_ARM || EIGEN_ARCH_MIPS) && (EIGEN_ALIGN_BYTES == 16)
#if defined(__FreeBSD__) && !(EIGEN_ARCH_ARM || EIGEN_ARCH_MIPS) && (EIGEN_DEFAULT_ALIGN_BYTES == 16)
#define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 1
#else
#define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 0
#endif
#if (EIGEN_OS_MAC && (EIGEN_ALIGN_BYTES == 16)) \
|| (EIGEN_OS_WIN64 && (EIGEN_ALIGN_BYTES == 16)) \
#if (EIGEN_OS_MAC && (EIGEN_DEFAULT_ALIGN_BYTES == 16)) \
|| (EIGEN_OS_WIN64 && (EIGEN_DEFAULT_ALIGN_BYTES == 16)) \
|| EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED \
|| EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED
#define EIGEN_MALLOC_ALREADY_ALIGNED 1
@@ -59,18 +59,20 @@
#endif
// See bug 554 (http://eigen.tuxfamily.org/bz/show_bug.cgi?id=554)
// It seems to be unsafe to check _POSIX_ADVISORY_INFO without including unistd.h first.
// Currently, let's include it only on unix systems:
#if EIGEN_OS_UNIX
#include <unistd.h>
#if (EIGEN_OS_QNX || (defined _GNU_SOURCE) || EIGEN_COMP_PGI || ((defined _XOPEN_SOURCE) && (_XOPEN_SOURCE >= 600))) && (defined _POSIX_ADVISORY_INFO) && (_POSIX_ADVISORY_INFO > 0)
#define EIGEN_HAS_POSIX_MEMALIGN 1
#endif
#endif
#ifndef EIGEN_HAS_POSIX_MEMALIGN
#define EIGEN_HAS_POSIX_MEMALIGN 0
// See bug 554 (http://eigen.tuxfamily.org/bz/show_bug.cgi?id=554)
// It seems to be unsafe to check _POSIX_ADVISORY_INFO without including unistd.h first.
// Currently, let's include it only on unix systems:
#if EIGEN_OS_UNIX && !(EIGEN_OS_SUN || EIGEN_OS_SOLARIS)
#include <unistd.h>
#if (EIGEN_OS_QNX || (defined _GNU_SOURCE) || EIGEN_COMP_PGI || ((defined _XOPEN_SOURCE) && (_XOPEN_SOURCE >= 600))) && (defined _POSIX_ADVISORY_INFO) && (_POSIX_ADVISORY_INFO > 0)
#define EIGEN_HAS_POSIX_MEMALIGN 1
#endif
#endif
#ifndef EIGEN_HAS_POSIX_MEMALIGN
#define EIGEN_HAS_POSIX_MEMALIGN 0
#endif
#endif
#if defined EIGEN_VECTORIZE_SSE || defined EIGEN_VECTORIZE_AVX
@@ -105,9 +107,9 @@ inline void throw_std_bad_alloc()
*/
inline void* handmade_aligned_malloc(std::size_t size)
{
void *original = std::malloc(size+EIGEN_ALIGN_BYTES);
void *original = std::malloc(size+EIGEN_DEFAULT_ALIGN_BYTES);
if (original == 0) return 0;
void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_ALIGN_BYTES-1))) + EIGEN_ALIGN_BYTES);
void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1))) + EIGEN_DEFAULT_ALIGN_BYTES);
*(reinterpret_cast<void**>(aligned) - 1) = original;
return aligned;
}
@@ -128,9 +130,9 @@ inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t =
if (ptr == 0) return handmade_aligned_malloc(size);
void *original = *(reinterpret_cast<void**>(ptr) - 1);
std::ptrdiff_t previous_offset = static_cast<char *>(ptr)-static_cast<char *>(original);
original = std::realloc(original,size+EIGEN_ALIGN_BYTES);
original = std::realloc(original,size+EIGEN_DEFAULT_ALIGN_BYTES);
if (original == 0) return 0;
void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_ALIGN_BYTES-1))) + EIGEN_ALIGN_BYTES);
void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1))) + EIGEN_DEFAULT_ALIGN_BYTES);
void *previous_aligned = static_cast<char *>(original)+previous_offset;
if(aligned!=previous_aligned)
std::memmove(aligned, previous_aligned, size);
@@ -216,16 +218,16 @@ EIGEN_DEVICE_FUNC inline void* aligned_malloc(size_t size)
check_that_malloc_is_allowed();
void *result;
#if !EIGEN_ALIGN
#if EIGEN_DEFAULT_ALIGN_BYTES==0
result = std::malloc(size);
#elif EIGEN_MALLOC_ALREADY_ALIGNED
result = std::malloc(size);
#elif EIGEN_HAS_POSIX_MEMALIGN
if(posix_memalign(&result, EIGEN_ALIGN_BYTES, size)) result = 0;
if(posix_memalign(&result, EIGEN_DEFAULT_ALIGN_BYTES, size)) result = 0;
#elif EIGEN_HAS_MM_MALLOC
result = _mm_malloc(size, EIGEN_ALIGN_BYTES);
result = _mm_malloc(size, EIGEN_DEFAULT_ALIGN_BYTES);
#elif EIGEN_OS_WIN_STRICT
result = _aligned_malloc(size, EIGEN_ALIGN_BYTES);
result = _aligned_malloc(size, EIGEN_DEFAULT_ALIGN_BYTES);
#else
result = handmade_aligned_malloc(size);
#endif
@@ -239,7 +241,7 @@ EIGEN_DEVICE_FUNC inline void* aligned_malloc(size_t size)
/** \internal Frees memory allocated with aligned_malloc. */
EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr)
{
#if !EIGEN_ALIGN
#if EIGEN_DEFAULT_ALIGN_BYTES==0
std::free(ptr);
#elif EIGEN_MALLOC_ALREADY_ALIGNED
std::free(ptr);
@@ -264,7 +266,7 @@ inline void* aligned_realloc(void *ptr, size_t new_size, size_t old_size)
EIGEN_UNUSED_VARIABLE(old_size);
void *result;
#if !EIGEN_ALIGN
#if EIGEN_DEFAULT_ALIGN_BYTES==0
result = std::realloc(ptr,new_size);
#elif EIGEN_MALLOC_ALREADY_ALIGNED
result = std::realloc(ptr,new_size);
@@ -275,12 +277,12 @@ inline void* aligned_realloc(void *ptr, size_t new_size, size_t old_size)
// implements _mm_malloc/_mm_free based on the corresponding _aligned_
// functions. This may not always be the case and we just try to be safe.
#if EIGEN_OS_WIN_STRICT && defined(_mm_free)
result = _aligned_realloc(ptr,new_size,EIGEN_ALIGN_BYTES);
result = _aligned_realloc(ptr,new_size,EIGEN_DEFAULT_ALIGN_BYTES);
#else
result = generic_aligned_realloc(ptr,new_size,old_size);
#endif
#elif EIGEN_OS_WIN_STRICT
result = _aligned_realloc(ptr,new_size,EIGEN_ALIGN_BYTES);
result = _aligned_realloc(ptr,new_size,EIGEN_DEFAULT_ALIGN_BYTES);
#else
result = handmade_aligned_realloc(ptr,new_size,old_size);
#endif
@@ -504,47 +506,57 @@ template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_align
/****************************************************************************/
/** \internal Returns the index of the first element of the array that is well aligned for vectorization.
/** \internal Returns the index of the first element of the array that is well aligned with respect to the requested \a Alignment.
*
* \tparam Alignment requested alignment in Bytes.
* \param array the address of the start of the array
* \param size the size of the array
*
* \note If no element of the array is well aligned, the size of the array is returned. Typically,
* for example with SSE, "well aligned" means 16-byte-aligned. If vectorization is disabled or if the
* \note If no element of the array is well aligned or the requested alignment is not a multiple of a scalar,
* the size of the array is returned. For example with SSE, the requested alignment is typically 16-bytes. If
* packet size for the given scalar type is 1, then everything is considered well-aligned.
*
* \note If the scalar type is vectorizable, we rely on the following assumptions: sizeof(Scalar) is a
* power of 2, the packet size in bytes is also a power of 2, and is a multiple of sizeof(Scalar). On the
* other hand, we do not assume that the array address is a multiple of sizeof(Scalar), as that fails for
* \note Otherwise, if the Alignment is larger that the scalar size, we rely on the assumptions that sizeof(Scalar) is a
* power of 2. On the other hand, we do not assume that the array address is a multiple of sizeof(Scalar), as that fails for
* example with Scalar=double on certain 32-bit platforms, see bug #79.
*
* There is also the variant first_aligned(const MatrixBase&) defined in DenseCoeffsBase.h.
* \sa first_default_aligned()
*/
template<typename Scalar, typename Index>
template<int Alignment, typename Scalar, typename Index>
inline Index first_aligned(const Scalar* array, Index size)
{
static const Index PacketSize = packet_traits<Scalar>::size;
static const Index PacketAlignedMask = PacketSize-1;
static const Index ScalarSize = sizeof(Scalar);
static const Index AlignmentSize = Alignment / ScalarSize;
static const Index AlignmentMask = AlignmentSize-1;
if(PacketSize==1)
if(AlignmentSize<=1)
{
// Either there is no vectorization, or a packet consists of exactly 1 scalar so that all elements
// of the array have the same alignment.
// Either the requested alignment if smaller than a scalar, or it exactly match a 1 scalar
// so that all elements of the array have the same alignment.
return 0;
}
else if(size_t(array) & (sizeof(Scalar)-1))
else if( (std::size_t(array) & (sizeof(Scalar)-1)) || (Alignment%ScalarSize)!=0)
{
// There is vectorization for this scalar type, but the array is not aligned to the size of a single scalar.
// The array is not aligned to the size of a single scalar, or the requested alignment is not a multiple of the scalar size.
// Consequently, no element of the array is well aligned.
return size;
}
else
{
return std::min<Index>( (PacketSize - (Index((size_t(array)/sizeof(Scalar))) & PacketAlignedMask))
& PacketAlignedMask, size);
return std::min<Index>( (AlignmentSize - (Index((std::size_t(array)/sizeof(Scalar))) & AlignmentMask)) & AlignmentMask, size);
}
}
/** \internal Returns the index of the first element of the array that is well aligned with respect the largest packet requirement.
* \sa first_aligned(Scalar*,Index) and first_default_aligned(DenseBase<Derived>) */
template<typename Scalar, typename Index>
inline Index first_default_aligned(const Scalar* array, Index size)
{
typedef typename packet_traits<Scalar>::type DefaultPacketType;
return first_aligned<unpacket_traits<DefaultPacketType>::alignment>(array, size);
}
/** \internal Returns the smallest integer multiple of \a base and greater or equal to \a size
*/
template<typename Index>
@@ -557,18 +569,18 @@ inline Index first_multiple(Index size, Index base)
// use memcpy on trivial types, i.e., on types that does not require an initialization ctor.
template<typename T, bool UseMemcpy> struct smart_copy_helper;
template<typename T> void smart_copy(const T* start, const T* end, T* target)
template<typename T> EIGEN_DEVICE_FUNC void smart_copy(const T* start, const T* end, T* target)
{
smart_copy_helper<T,!NumTraits<T>::RequireInitialization>::run(start, end, target);
}
template<typename T> struct smart_copy_helper<T,true> {
static inline void run(const T* start, const T* end, T* target)
EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target)
{ memcpy(target, start, std::ptrdiff_t(end)-std::ptrdiff_t(start)); }
};
template<typename T> struct smart_copy_helper<T,false> {
static inline void run(const T* start, const T* end, T* target)
EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target)
{ std::copy(start, end, target); }
};
@@ -687,9 +699,14 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
* The underlying stack allocation function can controlled with the EIGEN_ALLOCA preprocessor token.
*/
#ifdef EIGEN_ALLOCA
// We always manually re-align the result of EIGEN_ALLOCA.
// If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment.
#define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast<void*>((reinterpret_cast<size_t>(EIGEN_ALLOCA(SIZE+EIGEN_ALIGN_BYTES-1)) + EIGEN_ALIGN_BYTES-1) & ~(size_t(EIGEN_ALIGN_BYTES-1)))
#if EIGEN_DEFAULT_ALIGN_BYTES>0
// We always manually re-align the result of EIGEN_ALLOCA.
// If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment.
#define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast<void*>((reinterpret_cast<std::size_t>(EIGEN_ALLOCA(SIZE+EIGEN_DEFAULT_ALIGN_BYTES-1)) + EIGEN_DEFAULT_ALIGN_BYTES-1) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1)))
#else
#define EIGEN_ALIGNED_ALLOCA(SIZE) EIGEN_ALLOCA(SIZE)
#endif
#define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \
Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
@@ -713,7 +730,7 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
*** Implementation of EIGEN_MAKE_ALIGNED_OPERATOR_NEW [_IF] ***
*****************************************************************************/
#if EIGEN_ALIGN
#if EIGEN_MAX_ALIGN_BYTES!=0
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
void* operator new(size_t size, const std::nothrow_t&) throw() { \
EIGEN_TRY { return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); } \
@@ -749,7 +766,7 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(true)
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(((Size)!=Eigen::Dynamic) && ((sizeof(Scalar)*(Size))%EIGEN_ALIGN_BYTES==0)))
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(((Size)!=Eigen::Dynamic) && ((sizeof(Scalar)*(Size))%EIGEN_MAX_ALIGN_BYTES==0)))
/****************************************************************************/

View File

@@ -117,6 +117,10 @@ template<typename T> struct enable_if<true,T>
{ typedef T type; };
#if defined(__CUDA_ARCH__)
#if !defined(__FLT_EPSILON__)
#define __FLT_EPSILON__ FLT_EPSILON
#define __DBL_EPSILON__ DBL_EPSILON
#endif
namespace device {
@@ -124,16 +128,53 @@ template<typename T> struct numeric_limits
{
EIGEN_DEVICE_FUNC
static T epsilon() { return 0; }
static T (max)() { assert(false && "Highest not supported for this type"); }
static T (min)() { assert(false && "Lowest not supported for this type"); }
};
template<> struct numeric_limits<float>
{
EIGEN_DEVICE_FUNC
static float epsilon() { return __FLT_EPSILON__; }
EIGEN_DEVICE_FUNC
static float (max)() { return CUDART_MAX_NORMAL_F; }
EIGEN_DEVICE_FUNC
static float (min)() { return __FLT_EPSILON__; }
};
template<> struct numeric_limits<double>
{
EIGEN_DEVICE_FUNC
static double epsilon() { return __DBL_EPSILON__; }
EIGEN_DEVICE_FUNC
static double (max)() { return CUDART_INF; }
EIGEN_DEVICE_FUNC
static double (min)() { return __DBL_EPSILON__; }
};
template<> struct numeric_limits<int>
{
EIGEN_DEVICE_FUNC
static int epsilon() { return 0; }
EIGEN_DEVICE_FUNC
static int (max)() { return INT_MAX; }
EIGEN_DEVICE_FUNC
static int (min)() { return INT_MIN; }
};
template<> struct numeric_limits<long>
{
EIGEN_DEVICE_FUNC
static long epsilon() { return 0; }
EIGEN_DEVICE_FUNC
static long (max)() { return LONG_MAX; }
EIGEN_DEVICE_FUNC
static long (min)() { return LONG_MIN; }
};
template<> struct numeric_limits<long long>
{
EIGEN_DEVICE_FUNC
static long long epsilon() { return 0; }
EIGEN_DEVICE_FUNC
static long long (max)() { return LLONG_MAX; }
EIGEN_DEVICE_FUNC
static long long (min)() { return LLONG_MIN; }
};
}
@@ -145,11 +186,11 @@ template<> struct numeric_limits<double>
*/
class noncopyable
{
noncopyable(const noncopyable&);
const noncopyable& operator=(const noncopyable&);
EIGEN_DEVICE_FUNC noncopyable(const noncopyable&);
EIGEN_DEVICE_FUNC const noncopyable& operator=(const noncopyable&);
protected:
noncopyable() {}
~noncopyable() {}
EIGEN_DEVICE_FUNC noncopyable() {}
EIGEN_DEVICE_FUNC ~noncopyable() {}
};
@@ -160,7 +201,13 @@ protected:
* upcoming next STL generation (using a templated result member).
* If none of these members is provided, then the type of the first argument is returned. FIXME, that behavior is a pretty bad hack.
*/
template<typename T> struct result_of {};
#ifdef EIGEN_HAS_STD_RESULT_OF
template<typename T> struct result_of {
typedef typename std::result_of<T>::type type1;
typedef typename remove_all<type1>::type type;
};
#else
template<typename T> struct result_of { };
struct has_none {int a[1];};
struct has_std_result_type {int a[2];};
@@ -178,10 +225,10 @@ struct unary_result_of_select<Func, ArgType, sizeof(has_tr1_result)> {typedef ty
template<typename Func, typename ArgType>
struct result_of<Func(ArgType)> {
template<typename T>
static has_std_result_type testFunctor(T const *, typename T::result_type const * = 0);
static has_std_result_type testFunctor(T const *, typename T::result_type const * = 0);
template<typename T>
static has_tr1_result testFunctor(T const *, typename T::template result<T(ArgType)>::type const * = 0);
static has_none testFunctor(...);
static has_tr1_result testFunctor(T const *, typename T::template result<T(ArgType)>::type const * = 0);
static has_none testFunctor(...);
// note that the following indirection is needed for gcc-3.3
enum {FunctorType = sizeof(testFunctor(static_cast<Func*>(0)))};
@@ -202,15 +249,16 @@ struct binary_result_of_select<Func, ArgType0, ArgType1, sizeof(has_tr1_result)>
template<typename Func, typename ArgType0, typename ArgType1>
struct result_of<Func(ArgType0,ArgType1)> {
template<typename T>
static has_std_result_type testFunctor(T const *, typename T::result_type const * = 0);
static has_std_result_type testFunctor(T const *, typename T::result_type const * = 0);
template<typename T>
static has_tr1_result testFunctor(T const *, typename T::template result<T(ArgType0,ArgType1)>::type const * = 0);
static has_none testFunctor(...);
static has_tr1_result testFunctor(T const *, typename T::template result<T(ArgType0,ArgType1)>::type const * = 0);
static has_none testFunctor(...);
// note that the following indirection is needed for gcc-3.3
enum {FunctorType = sizeof(testFunctor(static_cast<Func*>(0)))};
typedef typename binary_result_of_select<Func, ArgType0, ArgType1, FunctorType>::type type;
};
#endif
/** \internal In short, it computes int(sqrt(\a Y)) with \a Y an integer.
* Usage example: \code meta_sqrt<1023>::ret \endcode
@@ -284,6 +332,14 @@ template<typename T> EIGEN_DEVICE_FUNC void swap(T &a, T &b) { T tmp = b; b =
template<typename T> EIGEN_STRONG_INLINE void swap(T &a, T &b) { std::swap(a,b); }
#endif
// Integer division with rounding up.
// T is assumed to be an integer type with a>=0, and b>0
template<typename T>
T div_ceil(const T &a, const T &b)
{
return (a+b-1) / b;
}
} // end namespace numext
} // end namespace Eigen

View File

@@ -119,7 +119,72 @@ template<typename T> struct unpacket_traits
{
typedef T type;
typedef T half;
enum {size=1};
enum
{
size = 1,
alignment = 1
};
};
template<int Size, typename PacketType,
bool Stop = Size==Dynamic || (Size%unpacket_traits<PacketType>::size)==0 || is_same<PacketType,typename unpacket_traits<PacketType>::half>::value>
struct find_best_packet_helper;
template< int Size, typename PacketType>
struct find_best_packet_helper<Size,PacketType,true>
{
typedef PacketType type;
};
template<int Size, typename PacketType>
struct find_best_packet_helper<Size,PacketType,false>
{
typedef typename find_best_packet_helper<Size,typename unpacket_traits<PacketType>::half>::type type;
};
template<typename T, int Size>
struct find_best_packet
{
typedef typename find_best_packet_helper<Size,typename packet_traits<T>::type>::type type;
};
#if EIGEN_MAX_STATIC_ALIGN_BYTES>0
template<int ArrayBytes, int AlignmentBytes,
bool Match = bool((ArrayBytes%AlignmentBytes)==0),
bool TryHalf = bool(AlignmentBytes>EIGEN_MIN_ALIGN_BYTES) >
struct compute_default_alignment_helper
{
enum { value = 0 };
};
template<int ArrayBytes, int AlignmentBytes, bool TryHalf>
struct compute_default_alignment_helper<ArrayBytes, AlignmentBytes, true, TryHalf> // Match
{
enum { value = AlignmentBytes };
};
template<int ArrayBytes, int AlignmentBytes>
struct compute_default_alignment_helper<ArrayBytes, AlignmentBytes, false, true> // Try-half
{
// current packet too large, try with an half-packet
enum { value = compute_default_alignment_helper<ArrayBytes, AlignmentBytes/2>::value };
};
#else
// If static alignment is disabled, no need to bother.
// This also avoids a division by zero in "bool Match = bool((ArrayBytes%AlignmentBytes)==0)"
template<int ArrayBytes, int AlignmentBytes>
struct compute_default_alignment_helper
{
enum { value = 0 };
};
#endif
template<typename T, int Size> struct compute_default_alignment {
enum { value = compute_default_alignment_helper<Size*sizeof(T),EIGEN_MAX_STATIC_ALIGN_BYTES>::value };
};
template<typename T> struct compute_default_alignment<T,Dynamic> {
enum { value = EIGEN_MAX_ALIGN_BYTES };
};
template<typename _Scalar, int _Rows, int _Cols,
@@ -153,40 +218,6 @@ class compute_matrix_flags
enum { ret = DirectAccessBit | LvalueBit | NestByRefBit | row_major_bit };
};
template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
class compute_matrix_evaluator_flags
{
enum {
row_major_bit = Options&RowMajor ? RowMajorBit : 0,
is_dynamic_size_storage = MaxRows==Dynamic || MaxCols==Dynamic,
aligned_bit =
(
((Options&DontAlign)==0)
&& (
#if EIGEN_ALIGN_STATICALLY
((!is_dynamic_size_storage) && (((MaxCols*MaxRows*int(sizeof(Scalar))) % EIGEN_ALIGN_BYTES) == 0))
#else
0
#endif
||
#if EIGEN_ALIGN
is_dynamic_size_storage
#else
0
#endif
)
) ? AlignedBit : 0,
packet_access_bit = packet_traits<Scalar>::Vectorizable && aligned_bit ? PacketAccessBit : 0
};
public:
enum { ret = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit | aligned_bit };
};
template<int _Rows, int _Cols> struct size_at_compile_time
{
enum { ret = (_Rows==Dynamic || _Cols==Dynamic) ? Dynamic : _Rows * _Cols };
@@ -309,9 +340,6 @@ template<typename T> struct plain_matrix_type_row_major
> type;
};
// we should be able to get rid of this one too
template<typename T> struct must_nest_by_value { enum { ret = false }; };
/** \internal The reference selector for template expressions. The idea is that we don't
* need to use references for expressions since they are light weight proxy
* objects which should generate no copying overhead. */
@@ -323,6 +351,12 @@ struct ref_selector
T const&,
const T
>::type type;
typedef typename conditional<
bool(traits<T>::Flags & NestByRefBit),
T &,
T
>::type non_const_type;
};
/** \internal Adds the const qualifier on the value-type of T2 if and only if T1 is a const type */
@@ -337,13 +371,6 @@ struct transfer_constness
};
// When using evaluators, we never evaluate when assembling the expression!!
// TODO: get rid of this nested class since it's just an alias for ref_selector.
template<typename T, int n=1, typename PlainObject = void> struct nested
{
typedef typename ref_selector<T>::type type;
};
// However, we still need a mechanism to detect whether an expression which is evaluated multiple time
// has to be evaluated into a temporary.
// That's the purpose of this new nested_eval helper:
@@ -428,7 +455,9 @@ struct special_scalar_op_base : public DenseCoeffsBase<Derived>
{
// dummy operator* so that the
// "using special_scalar_op_base::operator*" compiles
void operator*() const;
struct dummy {};
void operator*(dummy) const;
void operator/(dummy) const;
};
template<typename Derived,typename Scalar,typename OtherScalar>
@@ -452,6 +481,16 @@ struct special_scalar_op_base<Derived,Scalar,OtherScalar,true> : public DenseCo
#endif
return static_cast<const special_scalar_op_base&>(matrix).operator*(scalar);
}
const CwiseUnaryOp<scalar_quotient2_op<Scalar,OtherScalar>, Derived>
operator/(const OtherScalar& scalar) const
{
#ifdef EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN
EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN
#endif
return CwiseUnaryOp<scalar_quotient2_op<Scalar,OtherScalar>, Derived>
(*static_cast<const Derived*>(this), scalar_quotient2_op<Scalar,OtherScalar>(scalar));
}
};
template<typename XprType, typename CastType> struct cast_return_type
@@ -603,6 +642,18 @@ template<typename T, int S> struct is_diagonal<DiagonalMatrix<T,S> >
template<typename S1, typename S2> struct glue_shapes;
template<> struct glue_shapes<DenseShape,TriangularShape> { typedef TriangularShape type; };
template<typename T1, typename T2>
bool is_same_dense(const T1 &mat1, const T2 &mat2, typename enable_if<has_direct_access<T1>::ret&&has_direct_access<T2>::ret, T1>::type * = 0)
{
return (mat1.data()==mat2.data()) && (mat1.innerStride()==mat2.innerStride()) && (mat1.outerStride()==mat2.outerStride());
}
template<typename T1, typename T2>
bool is_same_dense(const T1 &, const T2 &, typename enable_if<!(has_direct_access<T1>::ret&&has_direct_access<T2>::ret), T1>::type * = 0)
{
return false;
}
} // end namespace internal
// we require Lhs and Rhs to have the same scalar type. Currently there is no example of a binary functor

View File

@@ -234,6 +234,12 @@ template<typename _MatrixType> class ComplexEigenSolver
}
protected:
static void check_template_parameters()
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
}
EigenvectorType m_eivec;
EigenvalueType m_eivalues;
ComplexSchur<MatrixType> m_schur;
@@ -251,6 +257,8 @@ template<typename MatrixType>
ComplexEigenSolver<MatrixType>&
ComplexEigenSolver<MatrixType>::compute(const MatrixType& matrix, bool computeEigenvectors)
{
check_template_parameters();
// this code is inspired from Jampack
eigen_assert(matrix.cols() == matrix.rows());

View File

@@ -299,6 +299,13 @@ template<typename _MatrixType> class EigenSolver
void doComputeEigenvectors();
protected:
static void check_template_parameters()
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
EIGEN_STATIC_ASSERT(!NumTraits<Scalar>::IsComplex, NUMERIC_TYPE_MUST_BE_REAL);
}
MatrixType m_eivec;
EigenvalueType m_eivalues;
bool m_isInitialized;
@@ -366,6 +373,8 @@ template<typename MatrixType>
EigenSolver<MatrixType>&
EigenSolver<MatrixType>::compute(const MatrixType& matrix, bool computeEigenvectors)
{
check_template_parameters();
using std::sqrt;
using std::abs;
using numext::isfinite;
@@ -390,7 +399,7 @@ EigenSolver<MatrixType>::compute(const MatrixType& matrix, bool computeEigenvect
if (i == matrix.cols() - 1 || m_matT.coeff(i+1, i) == Scalar(0))
{
m_eivalues.coeffRef(i) = m_matT.coeff(i, i);
if(!isfinite(m_eivalues.coeffRef(i)))
if(!(isfinite)(m_eivalues.coeffRef(i)))
{
m_isInitialized = true;
m_eigenvectorsOk = false;
@@ -408,7 +417,7 @@ EigenSolver<MatrixType>::compute(const MatrixType& matrix, bool computeEigenvect
{
Scalar t0 = m_matT.coeff(i+1, i);
Scalar t1 = m_matT.coeff(i, i+1);
Scalar maxval = numext::maxi(abs(p),numext::maxi(abs(t0),abs(t1)));
Scalar maxval = numext::maxi<Scalar>(abs(p),numext::maxi<Scalar>(abs(t0),abs(t1)));
t0 /= maxval;
t1 /= maxval;
Scalar p0 = p/maxval;
@@ -417,7 +426,7 @@ EigenSolver<MatrixType>::compute(const MatrixType& matrix, bool computeEigenvect
m_eivalues.coeffRef(i) = ComplexScalar(m_matT.coeff(i+1, i+1) + p, z);
m_eivalues.coeffRef(i+1) = ComplexScalar(m_matT.coeff(i+1, i+1) + p, -z);
if(!(isfinite(m_eivalues.coeffRef(i)) && isfinite(m_eivalues.coeffRef(i+1))))
if(!((isfinite)(m_eivalues.coeffRef(i)) && (isfinite)(m_eivalues.coeffRef(i+1))))
{
m_isInitialized = true;
m_eigenvectorsOk = false;
@@ -475,7 +484,7 @@ void EigenSolver<MatrixType>::doComputeEigenvectors()
}
// Backsubstitute to find vectors of upper triangular form
if (norm == 0.0)
if (norm == Scalar(0))
{
return;
}
@@ -497,7 +506,7 @@ void EigenSolver<MatrixType>::doComputeEigenvectors()
Scalar w = m_matT.coeff(i,i) - p;
Scalar r = m_matT.row(i).segment(l,n-l+1).dot(m_matT.col(n).segment(l, n-l+1));
if (m_eivalues.coeff(i).imag() < 0.0)
if (m_eivalues.coeff(i).imag() < Scalar(0))
{
lastw = w;
lastr = r;
@@ -505,9 +514,9 @@ void EigenSolver<MatrixType>::doComputeEigenvectors()
else
{
l = i;
if (m_eivalues.coeff(i).imag() == 0.0)
if (m_eivalues.coeff(i).imag() == Scalar(0))
{
if (w != 0.0)
if (w != Scalar(0))
m_matT.coeffRef(i,n) = -r / w;
else
m_matT.coeffRef(i,n) = -r / (eps * norm);
@@ -545,19 +554,19 @@ void EigenSolver<MatrixType>::doComputeEigenvectors()
}
else
{
std::complex<Scalar> cc = cdiv<Scalar>(0.0,-m_matT.coeff(n-1,n),m_matT.coeff(n-1,n-1)-p,q);
std::complex<Scalar> cc = cdiv<Scalar>(Scalar(0),-m_matT.coeff(n-1,n),m_matT.coeff(n-1,n-1)-p,q);
m_matT.coeffRef(n-1,n-1) = numext::real(cc);
m_matT.coeffRef(n-1,n) = numext::imag(cc);
}
m_matT.coeffRef(n,n-1) = 0.0;
m_matT.coeffRef(n,n) = 1.0;
m_matT.coeffRef(n,n-1) = Scalar(0);
m_matT.coeffRef(n,n) = Scalar(1);
for (Index i = n-2; i >= 0; i--)
{
Scalar ra = m_matT.row(i).segment(l, n-l+1).dot(m_matT.col(n-1).segment(l, n-l+1));
Scalar sa = m_matT.row(i).segment(l, n-l+1).dot(m_matT.col(n).segment(l, n-l+1));
Scalar w = m_matT.coeff(i,i) - p;
if (m_eivalues.coeff(i).imag() < 0.0)
if (m_eivalues.coeff(i).imag() < Scalar(0))
{
lastw = w;
lastra = ra;
@@ -579,7 +588,7 @@ void EigenSolver<MatrixType>::doComputeEigenvectors()
Scalar y = m_matT.coeff(i+1,i);
Scalar vr = (m_eivalues.coeff(i).real() - p) * (m_eivalues.coeff(i).real() - p) + m_eivalues.coeff(i).imag() * m_eivalues.coeff(i).imag() - q * q;
Scalar vi = (m_eivalues.coeff(i).real() - p) * Scalar(2) * q;
if ((vr == 0.0) && (vi == 0.0))
if ((vr == Scalar(0)) && (vi == Scalar(0)))
vr = eps * norm * (abs(w) + abs(q) + abs(x) + abs(y) + abs(lastw));
std::complex<Scalar> cc = cdiv(x*lastra-lastw*ra+q*sa,x*lastsa-lastw*sa-q*ra,vr,vi);
@@ -599,7 +608,7 @@ void EigenSolver<MatrixType>::doComputeEigenvectors()
}
// Overflow control
Scalar t = numext::maxi(abs(m_matT.coeff(i,n-1)),abs(m_matT.coeff(i,n)));
Scalar t = numext::maxi<Scalar>(abs(m_matT.coeff(i,n-1)),abs(m_matT.coeff(i,n)));
if ((eps * t) * t > Scalar(1))
m_matT.block(i, n-1, size-i, 2) /= t;

View File

@@ -263,6 +263,13 @@ template<typename _MatrixType> class GeneralizedEigenSolver
}
protected:
static void check_template_parameters()
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
EIGEN_STATIC_ASSERT(!NumTraits<Scalar>::IsComplex, NUMERIC_TYPE_MUST_BE_REAL);
}
MatrixType m_eivec;
ComplexVectorType m_alphas;
VectorType m_betas;
@@ -290,6 +297,8 @@ template<typename MatrixType>
GeneralizedEigenSolver<MatrixType>&
GeneralizedEigenSolver<MatrixType>::compute(const MatrixType& A, const MatrixType& B, bool computeEigenvectors)
{
check_template_parameters();
using std::sqrt;
using std::abs;
eigen_assert(A.cols() == A.rows() && B.cols() == A.rows() && B.cols() == B.rows());

Some files were not shown because too many files have changed in this diff Show More